Example usage for org.apache.hadoop.fs FileSystem getFileStatus

List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getFileStatus.

Prototype

public abstract FileStatus getFileStatus(Path f) throws IOException;

Source Link

Document

Return a file status object that represents the path.

Usage

From source file:com.twitter.elephanttwin.util.HdfsUtils.java

License:Apache License

/**
 * Get the last modification date of an HDFS file.
 *
 * @param fs The file system.//from w ww.j a  va  2 s.  c  o  m
 * @param fileUri URI of the file.
 * @return The last modification date of the file, in msecs since epoch, or -1 if unknown.
 * @throws IOException
 */
public static long getModificationTime(FileSystem fs, String fileUri) throws IOException {
    Preconditions.checkNotNull(fs);
    Preconditions.checkNotNull(fileUri);
    FileStatus fileStatus = fs.getFileStatus(new Path(fileUri));
    return fileStatus == null ? -1 : fileStatus.getModificationTime();
}

From source file:com.twitter.elephanttwin.util.HdfsUtils.java

License:Apache License

/**
 * @param fs The file system./*from w  w w  .ja  va  2s . c om*/
 * @param fileUri URI of the file.
 * @return True if the file is an HDFS directory.
 * @throws IOException
 */
public static boolean isDirectory(FileSystem fs, String fileUri) throws IOException {
    Preconditions.checkNotNull(fs);
    Preconditions.checkNotNull(fileUri);
    FileStatus fileStatus = fs.getFileStatus(new Path(fileUri));
    return fileStatus != null && fileStatus.isDir();
}

From source file:com.twitter.hraven.etl.JobFileModifiedRangePathFilter.java

License:Apache License

@Override
public boolean accept(Path path) {
    if (!super.accept(path)) {
        return false;
    }/*from   w  w  w  .  j  a  v  a 2 s.c  o m*/

    JobFile jobFile = new JobFile(path.getName());
    if (jobFile.isJobConfFile() || jobFile.isJobHistoryFile()) {
        try {
            FileSystem fs = path.getFileSystem(myConf);
            FileStatus fileStatus = fs.getFileStatus(path);
            long fileModificationTimeMillis = fileStatus.getModificationTime();
            return accept(fileModificationTimeMillis);
        } catch (IOException e) {
            throw new ImportException("Cannot determine file modification time of " + path.getName(), e);
        }
    } else {
        // Reject anything that does not match a job conf filename.
        LOG.info(" Not a valid job conf / job history file " + path.getName());
        return false;
    }
}

From source file:com.twitter.hraven.etl.JobFileModifiedRangeSubstringPathFilter.java

License:Apache License

@Override
public boolean accept(Path path) {
    if (!super.accept(path)) {
        return false;
    }//from   w w w.j  a va 2s  .c  o m

    JobFile jobFile = new JobFile(path.getName());
    if (jobFile.isJobConfFile() || jobFile.isJobHistoryFile()) {
        if (jobFile.isJobHistoryFile()) {
            if (!includesPathSubstrings(path) || !excludesPathSubstrings(path)) {
                return false;
            }
        }
        try {
            FileSystem fs = path.getFileSystem(myConf);
            FileStatus fileStatus = fs.getFileStatus(path);
            long fileModificationTimeMillis = fileStatus.getModificationTime();
            return accept(fileModificationTimeMillis);
        } catch (IOException e) {
            throw new ImportException("Cannot determine file modification time of " + path.getName(), e);
        }
    } else {
        // Reject anything that does not match a job conf filename.
        LOG.info(" Not a valid job conf / job history file " + path.getName());
        return false;
    }
}

From source file:com.twitter.hraven.etl.JobFilePreprocessor.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    // When we started processing. This is also the upper limit of files we
    // accept, next run will pick up the new incoming files.
    long processingStartMillis = System.currentTimeMillis();

    Configuration hbaseConf = HBaseConfiguration.create(getConf());

    // Grab input args and allow for -Dxyz style arguments
    String[] otherArgs = new GenericOptionsParser(hbaseConf, args).getRemainingArgs();

    // Grab the arguments we're looking for.
    CommandLine commandLine = parseArgs(otherArgs);

    // Output should be an hdfs path.
    FileSystem hdfs = FileSystem.get(hbaseConf);

    // Grab the input path argument
    String output = commandLine.getOptionValue("o");
    LOG.info(" output=" + output);
    Path outputPath = new Path(output);
    FileStatus outputFileStatus = hdfs.getFileStatus(outputPath);

    if (!outputFileStatus.isDir()) {
        throw new IOException("Output is not a directory" + outputFileStatus.getPath().getName());
    }//  w w  w  .  j  a  v a2 s .  c  o m

    // Grab the input path argument
    String input;
    if (commandLine.hasOption("i")) {
        input = commandLine.getOptionValue("i");
    } else {
        input = hbaseConf.get("mapred.job.tracker.history.completed.location");
    }
    LOG.info("input=" + input);

    // Grab the batch-size argument
    int batchSize;
    if (commandLine.hasOption("b")) {
        try {
            batchSize = Integer.parseInt(commandLine.getOptionValue("b"));
        } catch (NumberFormatException nfe) {
            throw new IllegalArgumentException(
                    "batch size option -b is is not a valid number: " + commandLine.getOptionValue("b"), nfe);
        }
        // Additional check
        if (batchSize < 1) {
            throw new IllegalArgumentException(
                    "Cannot process files in batches smaller than 1. Specified batch size option -b is: "
                            + commandLine.getOptionValue("b"));
        }
    } else {
        batchSize = DEFAULT_BATCH_SIZE;
    }

    boolean forceAllFiles = commandLine.hasOption("f");
    LOG.info("forceAllFiles: " + forceAllFiles);

    Path inputPath = new Path(input);
    FileStatus inputFileStatus = hdfs.getFileStatus(inputPath);

    if (!inputFileStatus.isDir()) {
        throw new IOException("Input is not a directory" + inputFileStatus.getPath().getName());
    }

    // Grab the cluster argument
    String cluster = commandLine.getOptionValue("c");
    LOG.info("cluster=" + cluster);

    /**
     * Grab the size of huge files to be moved argument
     * hbase cell can't store files bigger than
     * maxFileSize, hence no need to consider them for rawloading
     * Reference:
     * {@link https://github.com/twitter/hraven/issues/59}
     */
    String maxFileSizeStr = commandLine.getOptionValue("s");
    LOG.info("maxFileSize=" + maxFileSizeStr);
    long maxFileSize = DEFAULT_RAW_FILE_SIZE_LIMIT;
    try {
        maxFileSize = Long.parseLong(maxFileSizeStr);
    } catch (NumberFormatException nfe) {
        throw new ProcessingException(
                "Caught NumberFormatException during conversion " + " of maxFileSize to long", nfe);
    }

    ProcessRecordService processRecordService = new ProcessRecordService(hbaseConf);

    boolean success = true;
    try {

        // Figure out where we last left off (if anywhere at all)
        ProcessRecord lastProcessRecord = null;

        if (!forceAllFiles) {
            lastProcessRecord = processRecordService.getLastSuccessfulProcessRecord(cluster);
        }

        long minModificationTimeMillis = 0;
        if (lastProcessRecord != null) {
            // Start of this time period is the end of the last period.
            minModificationTimeMillis = lastProcessRecord.getMaxModificationTimeMillis();
        }

        // Do a sanity check. The end time of the last scan better not be later
        // than when we started processing.
        if (minModificationTimeMillis > processingStartMillis) {
            throw new RuntimeException("The last processing record has maxModificationMillis later than now: "
                    + lastProcessRecord);
        }

        // Accept only jobFiles and only those that fall in the desired range of
        // modification time.
        JobFileModifiedRangePathFilter jobFileModifiedRangePathFilter = new JobFileModifiedRangePathFilter(
                hbaseConf, minModificationTimeMillis);

        String timestamp = Constants.TIMESTAMP_FORMAT.format(new Date(minModificationTimeMillis));

        ContentSummary contentSummary = hdfs.getContentSummary(inputPath);
        LOG.info("Listing / filtering (" + contentSummary.getFileCount() + ") files in: " + inputPath
                + " that are modified since " + timestamp);

        // get the files in the done folder,
        // need to traverse dirs under done recursively for versions
        // that include MAPREDUCE-323: on/after hadoop 0.20.203.0
        // on/after cdh3u5
        FileStatus[] jobFileStatusses = FileLister.getListFilesToProcess(maxFileSize, true, hdfs, inputPath,
                jobFileModifiedRangePathFilter);

        LOG.info("Sorting " + jobFileStatusses.length + " job files.");

        Arrays.sort(jobFileStatusses, new FileStatusModificationComparator());

        // Process these files in batches at a time.
        int batchCount = BatchUtil.getBatchCount(jobFileStatusses.length, batchSize);
        LOG.info("Batch count: " + batchCount);
        for (int b = 0; b < batchCount; b++) {
            processBatch(jobFileStatusses, b, batchSize, processRecordService, cluster, outputPath);
        }

    } finally {
        processRecordService.close();
    }

    Statistics statistics = FileSystem.getStatistics(inputPath.toUri().getScheme(), hdfs.getClass());
    if (statistics != null) {
        LOG.info("HDFS bytes read: " + statistics.getBytesRead());
        LOG.info("HDFS bytes written: " + statistics.getBytesWritten());
        LOG.info("HDFS read ops: " + statistics.getReadOps());
        LOG.info("HDFS large read ops: " + statistics.getLargeReadOps());
        LOG.info("HDFS write ops: " + statistics.getWriteOps());
    }

    // Return the status
    return success ? 0 : 1;
}

From source file:com.twitter.hraven.etl.TestFileLister.java

License:Apache License

@Test
public void testPruneFileListBySize() throws IOException {

    long maxFileSize = 20L;
    FileStatus[] origList = new FileStatus[2];
    FileSystem hdfs = FileSystem.get(UTIL.getConfiguration());
    Path inputPath = new Path("/inputdir_filesize");
    boolean os = hdfs.mkdirs(inputPath);
    assertTrue(os);//from w w w . ja  v a2s. com
    assertTrue(hdfs.exists(inputPath));

    final String JOB_HISTORY_FILE_NAME = "src/test/resources/job_1329348432655_0001-1329348443227-user-Sleep+job-1329348468601-10-1-SUCCEEDED-default.jhist";
    File jobHistoryfile = new File(JOB_HISTORY_FILE_NAME);
    Path srcPath = new Path(jobHistoryfile.toURI());
    hdfs.copyFromLocalFile(srcPath, inputPath);
    Path expPath = new Path(inputPath.toUri() + "/" + srcPath.getName());
    assertTrue(hdfs.exists(expPath));
    origList[0] = hdfs.getFileStatus(expPath);

    final String JOB_CONF_FILE_NAME = "src/test/resources/job_1329348432655_0001_conf.xml";
    File jobConfFile = new File(JOB_CONF_FILE_NAME);
    srcPath = new Path(jobConfFile.toURI());
    hdfs.copyFromLocalFile(srcPath, inputPath);
    expPath = new Path(inputPath.toUri() + "/" + srcPath.getName());
    assertTrue(hdfs.exists(expPath));
    origList[1] = hdfs.getFileStatus(expPath);

    FileStatus[] prunedList = FileLister.pruneFileListBySize(maxFileSize, origList, hdfs, inputPath);
    assertNotNull(prunedList);
    assertTrue(prunedList.length == 0);

    Path emptyFile = new Path(
            inputPath.toUri() + "/" + "job_1329341111111_0101-1329111113227-user2-Sleep.jhist");
    os = hdfs.createNewFile(emptyFile);
    assertTrue(os);
    assertTrue(hdfs.exists(emptyFile));
    origList[0] = hdfs.getFileStatus(emptyFile);

    Path emptyConfFile = new Path(inputPath.toUri() + "/" + "job_1329341111111_0101_conf.xml");
    os = hdfs.createNewFile(emptyConfFile);

    assertTrue(os);
    assertTrue(hdfs.exists(emptyConfFile));
    origList[1] = hdfs.getFileStatus(emptyConfFile);

    prunedList = FileLister.pruneFileListBySize(maxFileSize, origList, hdfs, inputPath);
    assertNotNull(prunedList);
    assertTrue(prunedList.length == 2);

}

From source file:com.twitter.hraven.etl.TestFileLister.java

License:Apache License

/**
 * removes conf file which has already been put in prunedList
 *
 * @throws IOException/* w  ww.j  a v a 2 s  . c  o  m*/
 */
@Test
public void testPruneFileListRemovingConfFromPruneList() throws IOException {

    long maxFileSize = 20L;
    FileStatus[] origList = new FileStatus[2];
    FileSystem hdfs = FileSystem.get(UTIL.getConfiguration());
    Path inputPath = new Path("/inputdir_filesize_pruneList");
    boolean os = hdfs.mkdirs(inputPath);
    assertTrue(os);
    assertTrue(hdfs.exists(inputPath));

    Path relocationPath = new Path("/relocation_filesize_pruneList");
    os = hdfs.mkdirs(relocationPath);
    assertTrue(os);
    assertTrue(hdfs.exists(relocationPath));

    Path emptyConfFile = new Path(inputPath.toUri() + "/" + "job_1329348432655_0001_conf.xml");
    os = hdfs.createNewFile(emptyConfFile);
    assertTrue(os);
    assertTrue(hdfs.exists(emptyConfFile));
    origList[0] = hdfs.getFileStatus(emptyConfFile);

    final String JOB_HISTORY_FILE_NAME = "src/test/resources/job_1329348432655_0001-1329348443227-user-Sleep+job-1329348468601-10-1-SUCCEEDED-default.jhist";
    File jobHistoryfile = new File(JOB_HISTORY_FILE_NAME);
    Path srcPath = new Path(jobHistoryfile.toURI());
    hdfs.copyFromLocalFile(srcPath, inputPath);
    Path expPath = new Path(inputPath.toUri() + "/" + srcPath.getName());
    assertTrue(hdfs.exists(expPath));
    origList[1] = hdfs.getFileStatus(expPath);

    FileStatus[] prunedList = FileLister.pruneFileListBySize(maxFileSize, origList, hdfs, inputPath);
    assertNotNull(prunedList);
    assertTrue(prunedList.length == 0);
}

From source file:com.twitter.hraven.etl.TestFileLister.java

License:Apache License

/**
 * tests the case when several files are spread out in the dir and need to be removed
 *
 * @throws IOException// w w  w  . j  a  v a2  s  . c o m
 */
@Test
public void testPruneFileListMultipleFilesAlreadyMovedCases() throws IOException {

    long maxFileSize = 20L;
    FileStatus[] origList = new FileStatus[12];
    FileSystem hdfs = FileSystem.get(UTIL.getConfiguration());
    Path inputPath = new Path("/inputdir_filesize_multiple");
    boolean os = hdfs.mkdirs(inputPath);
    assertTrue(os);
    assertTrue(hdfs.exists(inputPath));

    Path relocationPath = new Path("/relocation_filesize_multiple");
    os = hdfs.mkdirs(relocationPath);
    assertTrue(os);
    assertTrue(hdfs.exists(relocationPath));

    Path emptyFile = new Path(
            inputPath.toUri() + "/" + "job_1329341111111_0101-1329111113227-user2-Sleep.jhist");
    os = hdfs.createNewFile(emptyFile);
    assertTrue(os);
    assertTrue(hdfs.exists(emptyFile));
    origList[0] = hdfs.getFileStatus(emptyFile);

    Path emptyConfFile = new Path(inputPath.toUri() + "/" + "job_1329341111111_0101_conf.xml");
    os = hdfs.createNewFile(emptyConfFile);

    assertTrue(os);
    assertTrue(hdfs.exists(emptyConfFile));
    origList[1] = hdfs.getFileStatus(emptyConfFile);

    final String JOB_HISTORY_FILE_NAME = "src/test/resources/job_1329348432655_0001-1329348443227-user-Sleep+job-1329348468601-10-1-SUCCEEDED-default.jhist";
    File jobHistoryfile = new File(JOB_HISTORY_FILE_NAME);
    Path srcPath = new Path(jobHistoryfile.toURI());
    hdfs.copyFromLocalFile(srcPath, inputPath);
    Path expPath = new Path(inputPath.toUri() + "/" + srcPath.getName());
    assertTrue(hdfs.exists(expPath));
    origList[2] = hdfs.getFileStatus(expPath);

    final String JOB_CONF_FILE_NAME = "src/test/resources/job_1329348432655_0001_conf.xml";
    File jobConfFile = new File(JOB_CONF_FILE_NAME);
    srcPath = new Path(jobConfFile.toURI());
    hdfs.copyFromLocalFile(srcPath, inputPath);
    expPath = new Path(inputPath.toUri() + "/" + srcPath.getName());
    assertTrue(hdfs.exists(expPath));
    origList[3] = hdfs.getFileStatus(expPath);

    Path inputPath2 = new Path(inputPath.toUri() + "/"
            + "job_1311222222255_0221-1311111143227-user10101-WordCount-1-SUCCEEDED-default.jhist");
    hdfs.copyFromLocalFile(srcPath, inputPath2);
    assertTrue(hdfs.exists(inputPath2));
    origList[4] = hdfs.getFileStatus(inputPath2);

    Path inputPath3 = new Path(inputPath.toUri() + "/"
            + "job_1399999999155_0991-1311111143227-user3321-TeraGen-1-SUCCEEDED-default.jhist");
    hdfs.copyFromLocalFile(srcPath, inputPath3);
    assertTrue(hdfs.exists(inputPath3));
    origList[5] = hdfs.getFileStatus(inputPath3);

    Path inputPath4 = new Path(inputPath.toUri() + "/"
            + "job_1399977777177_0771-1311111143227-user3321-TeraSort-1-SUCCEEDED-default.jhist");
    hdfs.copyFromLocalFile(srcPath, inputPath4);
    assertTrue(hdfs.exists(inputPath4));
    origList[6] = hdfs.getFileStatus(inputPath4);

    Path emptyFile2 = new Path(
            inputPath.toUri() + "/" + "job_1329343333333_5551-1329111113227-user2-SomethingElse.jhist");
    os = hdfs.createNewFile(emptyFile2);
    assertTrue(os);
    assertTrue(hdfs.exists(emptyFile2));
    origList[7] = hdfs.getFileStatus(emptyFile2);

    Path emptyConfFile2 = new Path(inputPath.toUri() + "/" + "job_1329343333333_5551_conf.xml");
    os = hdfs.createNewFile(emptyConfFile2);
    assertTrue(os);
    assertTrue(hdfs.exists(emptyConfFile2));
    origList[8] = hdfs.getFileStatus(emptyConfFile2);

    // this is an empty file which tests the toBeRemovedFileList
    // at the end of function pruneFileListBySize
    Path emptyConfFile3 = new Path(inputPath.toUri() + "/" + "job_1399999999155_0991_conf.xml");
    os = hdfs.createNewFile(emptyConfFile3);
    assertTrue(os);
    assertTrue(hdfs.exists(emptyConfFile3));
    origList[9] = hdfs.getFileStatus(emptyConfFile3);

    Path inputConfPath2 = new Path(inputPath.toUri() + "/" + "job_1311222222255_0221_conf.xml");
    srcPath = new Path(jobConfFile.toURI());
    hdfs.copyFromLocalFile(srcPath, inputConfPath2);
    assertTrue(hdfs.exists(inputConfPath2));
    origList[10] = hdfs.getFileStatus(inputConfPath2);

    // this is an empty file which tests the toBeRemovedFileList
    // at the end of function pruneFileListBySize
    Path emptyConfFile4 = new Path(inputPath.toUri() + "/" + "job_1399977777177_0771_conf.xml");
    os = hdfs.createNewFile(emptyConfFile4);
    assertTrue(os);
    assertTrue(hdfs.exists(emptyConfFile4));
    origList[11] = hdfs.getFileStatus(emptyConfFile4);

    FileStatus[] prunedList = FileLister.pruneFileListBySize(maxFileSize, origList, hdfs, inputPath);
    assertNotNull(prunedList);
    assertTrue(prunedList.length == 4);
}

From source file:com.twitter.pig.backend.hadoop.executionengine.tez.TezJobControlCompiler.java

License:Apache License

public DAG createDAG(TezOperPlan tezPlan, FileSystem remoteFs, TezConfiguration conf, ApplicationId appId,
        Path remoteStagingDir) throws IOException, YarnException {

    DAG dag = new DAG("MRRSleepJob");
    /*//from www  .  j  av  a  2s .  co  m
          String jarPath = ClassUtil.findContainingJar(getClass());
          Path remoteJarPath = remoteFs.makeQualified(
    new Path(remoteStagingDir, "dag_job.jar"));
          remoteFs.copyFromLocalFile(new Path(jarPath), remoteJarPath);
          FileStatus jarFileStatus = remoteFs.getFileStatus(remoteJarPath);
    */
    Map<String, LocalResource> commonLocalResources = new HashMap<String, LocalResource>();

    if (!pigContext.inIllustrator && pigContext.getExecType() != ExecType.TEZ_LOCAL) {

        // Setup the DistributedCache for this job
        for (URL extraJar : pigContext.extraJars) {
            //log.debug("Adding jar to DistributedCache: " + extraJar.toString());
            TezJobControlCompiler.putJarOnClassPathThroughDistributedCache(pigContext, conf, extraJar);
        }

        //Create the jar of all functions and classes required
        File submitJarFile = File.createTempFile("Job", ".jar");
        //log.info("creating jar file "+submitJarFile.getName());
        // ensure the job jar is deleted on exit
        submitJarFile.deleteOnExit();
        FileOutputStream fos = new FileOutputStream(submitJarFile);
        try {
            JarManager.createJar(fos, new HashSet<String>(), pigContext);
        } catch (ClassNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

        Path remoteJarPath = remoteFs.makeQualified(new Path(remoteStagingDir, "dag_job.jar"));
        remoteFs.copyFromLocalFile(new Path(submitJarFile.getAbsolutePath()), remoteJarPath);
        FileStatus jarFileStatus = remoteFs.getFileStatus(remoteJarPath);

        LocalResource dagJarLocalRsrc = LocalResource.newInstance(
                ConverterUtils.getYarnUrlFromPath(remoteJarPath), LocalResourceType.FILE,
                LocalResourceVisibility.APPLICATION, jarFileStatus.getLen(),
                jarFileStatus.getModificationTime());
        commonLocalResources.put("dag_job.jar", dagJarLocalRsrc);

        Path remoteTezJarPath = remoteFs.makeQualified(new Path(remoteStagingDir, "pig-tez.jar"));
        remoteFs.copyFromLocalFile(new Path("pig-tez.jar"), remoteTezJarPath);
        FileStatus tezJarFileStatus = remoteFs.getFileStatus(remoteTezJarPath);

        LocalResource tezJarLocalRsrc = LocalResource.newInstance(
                ConverterUtils.getYarnUrlFromPath(remoteTezJarPath), LocalResourceType.FILE,
                LocalResourceVisibility.APPLICATION, tezJarFileStatus.getLen(),
                tezJarFileStatus.getModificationTime());
        commonLocalResources.put("pig-tez.jar", tezJarLocalRsrc);

        //log.info("jar file "+submitJarFile.getName()+" created");
        //Start setting the JobConf properties
        conf.set("mapred.jar", submitJarFile.getPath());
    }

    /*
    LocalResource dagJarLocalRsrc = LocalResource.newInstance(
    ConverterUtils.getYarnUrlFromPath(remoteJarPath),
    LocalResourceType.FILE,
    LocalResourceVisibility.APPLICATION,
    jarFileStatus.getLen(),
    jarFileStatus.getModificationTime());
    commonLocalResources.put("dag_job.jar", dagJarLocalRsrc);
    */

    Hashtable<TezOperator, Pair<Vertex, Configuration>> vertexMap = new Hashtable<TezOperator, Pair<Vertex, Configuration>>();

    List<TezOperator> operators = tezPlan.getRoots();

    // add settings for pig statistics
    String setScriptProp = conf.get(ScriptState.INSERT_ENABLED, "true");
    ScriptState ss = null;

    if (setScriptProp.equalsIgnoreCase("true")) {
        ss = ScriptState.get();
    }

    while (operators != null && operators.size() != 0) {

        List<TezOperator> successors = new ArrayList<TezOperator>();

        for (TezOperator oper : operators) {

            Configuration operConf = oper.configure(pigContext, conf);
            /*
            if (ss != null){
               ss.addSettingsToConf(oper, conf);
            }
            */
            List<TezOperator> predecessors = plan.getPredecessors(oper);

            if (predecessors != null && predecessors.size() != 0) {
                MultiStageMRConfToTezTranslator.translateVertexConfToTez(operConf,
                        vertexMap.get(predecessors.get(0)).second);
            } else {
                MultiStageMRConfToTezTranslator.translateVertexConfToTez(operConf, null);
            }

            List<TezOperator> operSuccessors = tezPlan.getSuccessors(oper);
            if (operSuccessors != null) {
                successors.addAll(operSuccessors);
            }

            MRHelpers.doJobClientMagic(operConf);

            //mapStageConf.setInt(MRJobConfig.NUM_MAPS, numMapper);

            Vertex operVertex = new Vertex(oper.name(),
                    new ProcessorDescriptor(oper.getProcessor(), MRHelpers.createUserPayloadFromConf(operConf)),
                    oper.getParallelism(), MRHelpers.getMapResource(operConf));

            oper.configureVertex(operVertex, operConf, commonLocalResources, remoteStagingDir);

            dag.addVertex(operVertex);
            if (predecessors != null) {

                for (TezOperator predecessor : predecessors) {
                    dag.addEdge(new Edge(vertexMap.get(predecessor).first, operVertex,
                            tezPlan.getEdgeProperty(predecessor, oper)));
                }

            }

            vertexMap.put(oper, new Pair<Vertex, Configuration>(operVertex, operConf));
        }

        operators = successors;
    }
    return dag;
}

From source file:com.uber.hoodie.common.table.log.HoodieLogFileReader.java

License:Apache License

HoodieLogFileReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema, int bufferSize,
        boolean readBlockLazily, boolean reverseReader) throws IOException {
    FSDataInputStream fsDataInputStream = fs.open(logFile.getPath(), bufferSize);
    if (fsDataInputStream.getWrappedStream() instanceof FSInputStream) {
        this.inputStream = new FSDataInputStream(
                new BufferedFSInputStream((FSInputStream) fsDataInputStream.getWrappedStream(), bufferSize));
    } else {/*from  ww  w  . jav a2  s.com*/
        // fsDataInputStream.getWrappedStream() maybe a BufferedFSInputStream
        // need to wrap in another BufferedFSInputStream the make bufferSize work?
        this.inputStream = fsDataInputStream;
    }

    this.logFile = logFile;
    this.readerSchema = readerSchema;
    this.readBlockLazily = readBlockLazily;
    this.reverseReader = reverseReader;
    if (this.reverseReader) {
        this.reverseLogFilePosition = this.lastReverseLogFilePosition = fs.getFileStatus(logFile.getPath())
                .getLen();
    }
    addShutDownHook();
}