Example usage for org.apache.hadoop.fs Path getFileSystem

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getFileSystem.

Prototype

public FileSystem getFileSystem(Configuration conf) throws IOException

Source Link

Document

Return the FileSystem that owns this Path.

Usage

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java

License:Apache License

@Override
public void commitJob(JobContext context) throws IOException {
    Configuration configuration = context.getConfiguration();
    MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration);
    BasicMapReduceTaskContext taskContext = classLoader.getTaskContextProvider().get(this.taskContext);

    String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET);
    PartitionedFileSet outputDataset = taskContext.getDataset(outputDatasetName);
    Partitioning partitioning = outputDataset.getPartitioning();

    Set<PartitionKey> partitionsToAdd = new HashSet<>();
    Set<String> relativePaths = new HashSet<>();
    // Go over all files in the temporary directory and keep track of partitions to add for them
    FileStatus[] allCommittedTaskPaths = getAllCommittedTaskPaths(context);
    for (FileStatus committedTaskPath : allCommittedTaskPaths) {
        FileSystem fs = committedTaskPath.getPath().getFileSystem(configuration);
        RemoteIterator<LocatedFileStatus> fileIter = fs.listFiles(committedTaskPath.getPath(), true);
        while (fileIter.hasNext()) {
            Path path = fileIter.next().getPath();
            String relativePath = getRelative(committedTaskPath.getPath(), path);

            int lastPathSepIdx = relativePath.lastIndexOf(Path.SEPARATOR);
            if (lastPathSepIdx == -1) {
                // this shouldn't happen because each relative path should consist of at least one partition key and
                // the output file name
                LOG.warn("Skipping path '{}'. It's relative path '{}' has fewer than two parts", path,
                        relativePath);//from   www .ja v a 2  s .  co m
                continue;
            }
            // relativePath = "../key1/key2/part-m-00000"
            // relativeDir = "../key1/key2"
            // fileName = "part-m-00000"
            String relativeDir = relativePath.substring(0, lastPathSepIdx);
            String fileName = relativePath.substring(lastPathSepIdx + 1);

            Path finalDir = new Path(FileOutputFormat.getOutputPath(context), relativeDir);
            Path finalPath = new Path(finalDir, fileName);
            if (fs.exists(finalPath)) {
                throw new FileAlreadyExistsException("Final output path " + finalPath + " already exists");
            }
            PartitionKey partitionKey = getPartitionKey(partitioning, relativeDir);
            partitionsToAdd.add(partitionKey);
            relativePaths.add(relativeDir);
        }
    }

    // We need to copy to the parent of the FileOutputFormat's outputDir, since we added a _temporary_jobId suffix to
    // the original outputDir.
    Path finalOutput = FileOutputFormat.getOutputPath(context);
    FileSystem fs = finalOutput.getFileSystem(configuration);
    for (FileStatus stat : getAllCommittedTaskPaths(context)) {
        mergePaths(fs, stat, finalOutput);
    }

    // compute the metadata to be written to every output partition
    Map<String, String> metadata = ConfigurationUtil.getNamedConfigurations(this.taskContext.getConfiguration(),
            PartitionedFileSetArguments.OUTPUT_PARTITION_METADATA_PREFIX);

    // create all the necessary partitions
    for (PartitionKey partitionKey : partitionsToAdd) {
        PartitionOutput partitionOutput = outputDataset.getPartitionOutput(partitionKey);
        partitionOutput.setMetadata(metadata);
        partitionOutput.addPartition();
    }

    // close the TaskContext, which flushes dataset operations
    try {
        taskContext.flushOperations();
    } catch (Exception e) {
        Throwables.propagateIfPossible(e, IOException.class);
        throw new IOException(e);
    }

    // delete the job-specific _temporary folder and create a _done file in the o/p folder
    cleanupJob(context);

    // mark all the final output paths with a _SUCCESS file, if configured to do so (default = true)
    if (configuration.getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, true)) {
        for (String relativePath : relativePaths) {
            Path pathToMark = new Path(finalOutput, relativePath);
            Path markerPath = new Path(pathToMark, SUCCEEDED_FILE_NAME);
            fs.createNewFile(markerPath);
        }
    }
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java

License:Apache License

/**
 * Get a list of all paths where output from committed tasks are stored.
 * @param context the context of the current job
 * @return the list of these Paths/FileStatuses.
 * @throws IOException// w w  w.jav a2 s.co  m
 */
private FileStatus[] getAllCommittedTaskPaths(JobContext context) throws IOException {
    Path jobAttemptPath = getJobAttemptPath(context);
    FileSystem fs = jobAttemptPath.getFileSystem(context.getConfiguration());
    return fs.listStatus(jobAttemptPath, new CommittedTaskFilter());
}

From source file:co.nubetech.hiho.dedup.DelimitedLineRecordReader.java

License:Apache License

/**
 * // w w w  .  j a  v a  2s  . com
 * @param delimiter
 * @param column
 * 
 * 
 */

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.delimiter = job.get(DelimitedTextInputFormat.DELIMITER_CONF);
    this.column = job.getInt(DelimitedTextInputFormat.COLUMN_CONF, 0);
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        in = new LineReader(codec.createInputStream(fileIn), job);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        in = new LineReader(fileIn, job);
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:co.nubetech.hiho.job.ExportToOracleDb.java

License:Apache License

public static String getAlterTableDML(Path inputPath, Configuration conf) throws IOException, HIHOException {
    // after running the job, we need to alter the external table to take
    // care of the files we have added
    FileStatus[] contents = inputPath.getFileSystem(conf).listStatus(inputPath);
    if (contents != null) {
        StringBuilder dml = new StringBuilder();
        dml.append(" ALTER TABLE ");
        dml.append(getTableName(conf.get(HIHOConf.EXTERNAL_TABLE_DML)));
        dml.append(" LOCATION (");
        int i = 0;
        for (FileStatus content : contents) {
            String fileName = content.getPath().getName();
            dml.append("\'");
            dml.append(fileName);/*w  w  w . j a  v a2 s. c o  m*/
            dml.append("\'");
            // add comma uptil one and last
            if (i < contents.length - 1) {
                dml.append(",");
            }
            i++;
        }
        // execute dml to alter location

        dml.append(")");
        return dml.toString();
    }
    return null;
}

From source file:co.nubetech.hiho.job.TestExportToOracleDb.java

License:Apache License

@Test
public void testAlterTableDMl() throws HIHOException, IOException {
    Configuration conf = mock(Configuration.class);
    Path path = mock(Path.class);
    FileStatus status1 = mock(FileStatus.class);
    Path path1 = mock(Path.class);
    when(path1.getName()).thenReturn("part-xxxxx");
    when(status1.getPath()).thenReturn(path1);
    FileStatus status2 = mock(FileStatus.class);
    Path path2 = mock(Path.class);
    when(path2.getName()).thenReturn("part-yyyyy");
    when(status2.getPath()).thenReturn(path2);
    FileSystem fs = mock(FileSystem.class);
    when(fs.listStatus(path)).thenReturn(new FileStatus[] { status1, status2 });
    when(path.getFileSystem(conf)).thenReturn(fs);
    when(conf.get(HIHOConf.EXTERNAL_TABLE_DML)).thenReturn(
            "create table age(  i   Number,  n   Varchar(20),  a   Number)organization external (  type  oracle_loader default directory ext_dir access parameters (records delimited  by newlinefields  terminated by ','missing field values are null )location  (/home/nube/:file.txt) reject' limit unlimited;");
    String dml = ExportToOracleDb.getAlterTableDML(path, conf);
    assertEquals(" ALTER TABLE age LOCATION ('part-xxxxx','part-yyyyy')", dml);
}

From source file:co.nubetech.hiho.mapred.input.FileStreamRecordReader.java

License:Apache License

@Override
public FSDataInputStream createValue() {
    logger.debug("Creating value");
    FSDataInputStream stream = null;//w  ww .  j av  a  2 s  . co m
    Path file = split.getPath();
    logger.debug("Path is " + file);
    fileName = file.getName();
    try {
        FileSystem fs = file.getFileSystem(configuration);
        stream = new FSDataInputStream(fs.open(file));
    } catch (IOException e) {
        e.printStackTrace();
    }
    logger.debug("Opened stream");
    return stream;
}

From source file:co.nubetech.hiho.mapreduce.lib.input.FileStreamRecordReader.java

License:Apache License

@Override
public boolean nextKeyValue() throws IOException {
    logger.debug("Inside nextKeyValue");
    if (!isRead) {
        Path file = split.getPath();
        logger.debug("Path is " + file);
        fileName = file.getName();/*from   ww w  . ja v a  2s .co  m*/
        FileSystem fs = file.getFileSystem(context.getConfiguration());
        stream = fs.open(file);
        logger.debug("Opened stream");
        isRead = true;
        return true;
    }
    return false;
}

From source file:co.nubetech.hiho.mapreduce.lib.output.AppendSequenceFileOutputFormat.java

License:Apache License

@Override
public Path getDefaultWorkFile(TaskAttemptContext context, String extension) throws IOException {
    Path p1;// w  ww. java 2  s.co m
    isAppend = context.getConfiguration().get(HIHOConf.IS_APPEND, "false");
    if (isAppend.equalsIgnoreCase("false")) {
        p1 = super.getDefaultWorkFile(context, extension);
    } else {
        FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(context);
        Path p = committer.getWorkPath();
        fileCount = p.getFileSystem(context.getConfiguration()).getContentSummary(getOutputPath(context))
                .getFileCount();
        if (fileCount > 1)
            fileCount = fileCount - 1;
        p1 = new Path(committer.getWorkPath(), getUniqueFile(context, "part", extension));
    }
    return p1;
}

From source file:co.nubetech.hiho.mapreduce.lib.output.AppendTextOutputFormat.java

License:Apache License

@Override
public Path getDefaultWorkFile(TaskAttemptContext context, String extension) throws IOException {
    Path p1;//ww w. j a v  a 2 s .  c  o  m
    isAppend = context.getConfiguration().get(HIHOConf.IS_APPEND);
    if (isAppend.equalsIgnoreCase("false")) {
        p1 = super.getDefaultWorkFile(context, extension);
    } else {
        FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(context);
        Path p = committer.getWorkPath();
        fileCount = p.getFileSystem(context.getConfiguration()).getContentSummary(getOutputPath(context))
                .getFileCount();
        if (fileCount > 1) {
            fileCount = fileCount - 1;
        }
        p1 = new Path(committer.getWorkPath(), getUniqueFile(context, "part", extension));
    }
    return p1;
}

From source file:co.nubetech.hiho.mapreduce.lib.output.FTPTextOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {

    Configuration conf = job.getConfiguration();

    String ip = conf.get(HIHOConf.FTP_ADDRESS);
    String portno = conf.get(HIHOConf.FTP_PORT);
    String usr = conf.get(HIHOConf.FTP_USER);
    String pwd = conf.get(HIHOConf.FTP_PASSWORD);
    String dir = getOutputPath(job).toString();
    System.out.println("\n\ninside ftpoutputformat" + ip + " " + portno + " " + usr + " " + pwd + " " + dir);
    String keyValueSeparator = conf.get("mapred.textoutputformat.separator", "\t");
    FTPClient f = new FTPClient();
    f.connect(ip, Integer.parseInt(portno));
    f.login(usr, pwd);//from   w  w w.  j a v  a  2s. c  om
    f.changeWorkingDirectory(dir);
    f.setFileType(FTP.BINARY_FILE_TYPE);

    boolean isCompressed = getCompressOutput(job);
    CompressionCodec codec = null;
    String extension = "";
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }
    Path file = getDefaultWorkFile(job, extension);
    FileSystem fs = file.getFileSystem(conf);
    String filename = file.getName();
    if (!isCompressed) {
        // FSDataOutputStream fileOut = fs.create(file, false);
        OutputStream os = f.appendFileStream(filename);
        DataOutputStream fileOut = new DataOutputStream(os);
        return new FTPLineRecordWriter<K, V>(fileOut, new String(keyValueSeparator), f);

    } else {
        // FSDataOutputStream fileOut = fs.create(file, false);
        OutputStream os = f.appendFileStream(filename);
        DataOutputStream fileOut = new DataOutputStream(os);
        return new FTPLineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)),
                keyValueSeparator, f);
    }
}