Example usage for org.apache.hadoop.fs FileSystem getFileStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getFileStatus.

Prototype

public abstract FileStatus getFileStatus(Path f) throws IOException;

Source Link

Document

Return a file status object that represents the path.

Usage

From source file:com.twitter.elephanttwin.util.HdfsUtils.java

License:Apache License

/**
 * Get the last modification date of an HDFS file.
 *
 * @param fs The file system.//from w ww.j a  va  2 s.  c  o  m
 * @param fileUri URI of the file.
 * @return The last modification date of the file, in msecs since epoch, or -1 if unknown.
 * @throws IOException
 */
public static long getModificationTime(FileSystem fs, String fileUri) throws IOException {
    Preconditions.checkNotNull(fs);
    Preconditions.checkNotNull(fileUri);
    FileStatus fileStatus = fs.getFileStatus(new Path(fileUri));
    return fileStatus == null ? -1 : fileStatus.getModificationTime();
}

From source file:com.twitter.elephanttwin.util.HdfsUtils.java

License:Apache License

/**
 * @param fs The file system./*from w  w w  .ja  va  2s . c om*/
 * @param fileUri URI of the file.
 * @return True if the file is an HDFS directory.
 * @throws IOException
 */
public static boolean isDirectory(FileSystem fs, String fileUri) throws IOException {
    Preconditions.checkNotNull(fs);
    Preconditions.checkNotNull(fileUri);
    FileStatus fileStatus = fs.getFileStatus(new Path(fileUri));
    return fileStatus != null && fileStatus.isDir();
}

From source file:com.twitter.hraven.etl.JobFileModifiedRangePathFilter.java

License:Apache License

@Override
public boolean accept(Path path) {
    if (!super.accept(path)) {
        return false;
    }/*from   w  w  w  .  j  a  v  a 2 s.c  o m*/

    JobFile jobFile = new JobFile(path.getName());
    if (jobFile.isJobConfFile() || jobFile.isJobHistoryFile()) {
        try {
            FileSystem fs = path.getFileSystem(myConf);
            FileStatus fileStatus = fs.getFileStatus(path);
            long fileModificationTimeMillis = fileStatus.getModificationTime();
            return accept(fileModificationTimeMillis);
        } catch (IOException e) {
            throw new ImportException("Cannot determine file modification time of " + path.getName(), e);
        }
    } else {
        // Reject anything that does not match a job conf filename.
        LOG.info(" Not a valid job conf / job history file " + path.getName());
        return false;
    }
}

From source file:com.twitter.hraven.etl.JobFileModifiedRangeSubstringPathFilter.java

License:Apache License

@Override
public boolean accept(Path path) {
    if (!super.accept(path)) {
        return false;
    }//from   w w w.j  a va 2s  .c  o m

    JobFile jobFile = new JobFile(path.getName());
    if (jobFile.isJobConfFile() || jobFile.isJobHistoryFile()) {
        if (jobFile.isJobHistoryFile()) {
            if (!includesPathSubstrings(path) || !excludesPathSubstrings(path)) {
                return false;
            }
        }
        try {
            FileSystem fs = path.getFileSystem(myConf);
            FileStatus fileStatus = fs.getFileStatus(path);
            long fileModificationTimeMillis = fileStatus.getModificationTime();
            return accept(fileModificationTimeMillis);
        } catch (IOException e) {
            throw new ImportException("Cannot determine file modification time of " + path.getName(), e);
        }
    } else {
        // Reject anything that does not match a job conf filename.
        LOG.info(" Not a valid job conf / job history file " + path.getName());
        return false;
    }
}

From source file:com.twitter.hraven.etl.JobFilePreprocessor.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    // When we started processing. This is also the upper limit of files we
    // accept, next run will pick up the new incoming files.
    long processingStartMillis = System.currentTimeMillis();

    Configuration hbaseConf = HBaseConfiguration.create(getConf());

    // Grab input args and allow for -Dxyz style arguments
    String[] otherArgs = new GenericOptionsParser(hbaseConf, args).getRemainingArgs();

    // Grab the arguments we're looking for.
    CommandLine commandLine = parseArgs(otherArgs);

    // Output should be an hdfs path.
    FileSystem hdfs = FileSystem.get(hbaseConf);

    // Grab the input path argument
    String output = commandLine.getOptionValue("o");
    LOG.info(" output=" + output);
    Path outputPath = new Path(output);
    FileStatus outputFileStatus = hdfs.getFileStatus(outputPath);

    if (!outputFileStatus.isDir()) {
        throw new IOException("Output is not a directory" + outputFileStatus.getPath().getName());
    }//  w w  w  .  j  a  v a2 s .  c  o m

    // Grab the input path argument
    String input;
    if (commandLine.hasOption("i")) {
        input = commandLine.getOptionValue("i");
    } else {
        input = hbaseConf.get("mapred.job.tracker.history.completed.location");
    }
    LOG.info("input=" + input);

    // Grab the batch-size argument
    int batchSize;
    if (commandLine.hasOption("b")) {
        try {
            batchSize = Integer.parseInt(commandLine.getOptionValue("b"));
        } catch (NumberFormatException nfe) {
            throw new IllegalArgumentException(
                    "batch size option -b is is not a valid number: " + commandLine.getOptionValue("b"), nfe);
        }
        // Additional check
        if (batchSize < 1) {
            throw new IllegalArgumentException(
                    "Cannot process files in batches smaller than 1. Specified batch size option -b is: "
                            + commandLine.getOptionValue("b"));
        }
    } else {
        batchSize = DEFAULT_BATCH_SIZE;
    }

    boolean forceAllFiles = commandLine.hasOption("f");
    LOG.info("forceAllFiles: " + forceAllFiles);

    Path inputPath = new Path(input);
    FileStatus inputFileStatus = hdfs.getFileStatus(inputPath);

    if (!inputFileStatus.isDir()) {
        throw new IOException("Input is not a directory" + inputFileStatus.getPath().getName());
    }

    // Grab the cluster argument
    String cluster = commandLine.getOptionValue("c");
    LOG.info("cluster=" + cluster);

    /**
     * Grab the size of huge files to be moved argument
     * hbase cell can't store files bigger than
     * maxFileSize, hence no need to consider them for rawloading
     * Reference:
     * {@link https://github.com/twitter/hraven/issues/59}
     */
    String maxFileSizeStr = commandLine.getOptionValue("s");
    LOG.info("maxFileSize=" + maxFileSizeStr);
    long maxFileSize = DEFAULT_RAW_FILE_SIZE_LIMIT;
    try {
        maxFileSize = Long.parseLong(maxFileSizeStr);
    } catch (NumberFormatException nfe) {
        throw new ProcessingException(
                "Caught NumberFormatException during conversion " + " of maxFileSize to long", nfe);
    }

    ProcessRecordService processRecordService = new ProcessRecordService(hbaseConf);

    boolean success = true;
    try {

        // Figure out where we last left off (if anywhere at all)
        ProcessRecord lastProcessRecord = null;

        if (!forceAllFiles) {
            lastProcessRecord = processRecordService.getLastSuccessfulProcessRecord(cluster);
        }

        long minModificationTimeMillis = 0;
        if (lastProcessRecord != null) {
            // Start of this time period is the end of the last period.
            minModificationTimeMillis = lastProcessRecord.getMaxModificationTimeMillis();
        }

        // Do a sanity check. The end time of the last scan better not be later
        // than when we started processing.
        if (minModificationTimeMillis > processingStartMillis) {
            throw new RuntimeException("The last processing record has maxModificationMillis later than now: "
                    + lastProcessRecord);
        }

        // Accept only jobFiles and only those that fall in the desired range of
        // modification time.
        JobFileModifiedRangePathFilter jobFileModifiedRangePathFilter = new JobFileModifiedRangePathFilter(
                hbaseConf, minModificationTimeMillis);

        String timestamp = Constants.TIMESTAMP_FORMAT.format(new Date(minModificationTimeMillis));

        ContentSummary contentSummary = hdfs.getContentSummary(inputPath);
        LOG.info("Listing / filtering (" + contentSummary.getFileCount() + ") files in: " + inputPath
                + " that are modified since " + timestamp);

        // get the files in the done folder,
        // need to traverse dirs under done recursively for versions
        // that include MAPREDUCE-323: on/after hadoop 0.20.203.0
        // on/after cdh3u5
        FileStatus[] jobFileStatusses = FileLister.getListFilesToProcess(maxFileSize, true, hdfs, inputPath,
                jobFileModifiedRangePathFilter);

        LOG.info("Sorting " + jobFileStatusses.length + " job files.");

        Arrays.sort(jobFileStatusses, new FileStatusModificationComparator());

        // Process these files in batches at a time.
        int batchCount = BatchUtil.getBatchCount(jobFileStatusses.length, batchSize);
        LOG.info("Batch count: " + batchCount);
        for (int b = 0; b < batchCount; b++) {
            processBatch(jobFileStatusses, b, batchSize, processRecordService, cluster, outputPath);
        }

    } finally {
        processRecordService.close();
    }

    Statistics statistics = FileSystem.getStatistics(inputPath.toUri().getScheme(), hdfs.getClass());
    if (statistics != null) {
        LOG.info("HDFS bytes read: " + statistics.getBytesRead());
        LOG.info("HDFS bytes written: " + statistics.getBytesWritten());
        LOG.info("HDFS read ops: " + statistics.getReadOps());
        LOG.info("HDFS large read ops: " + statistics.getLargeReadOps());
        LOG.info("HDFS write ops: " + statistics.getWriteOps());
    }

    // Return the status
    return success ? 0 : 1;
}

From source file:com.twitter.hraven.etl.TestFileLister.java

License:Apache License

@Test
public void testPruneFileListBySize() throws IOException {

    long maxFileSize = 20L;
    FileStatus[] origList = new FileStatus[2];
    FileSystem hdfs = FileSystem.get(UTIL.getConfiguration());
    Path inputPath = new Path("/inputdir_filesize");
    boolean os = hdfs.mkdirs(inputPath);
    assertTrue(os);//from w w w . ja  v a2s. com
    assertTrue(hdfs.exists(inputPath));

    final String JOB_HISTORY_FILE_NAME = "src/test/resources/job_1329348432655_0001-1329348443227-user-Sleep+job-1329348468601-10-1-SUCCEEDED-default.jhist";
    File jobHistoryfile = new File(JOB_HISTORY_FILE_NAME);
    Path srcPath = new Path(jobHistoryfile.toURI());
    hdfs.copyFromLocalFile(srcPath, inputPath);
    Path expPath = new Path(inputPath.toUri() + "/" + srcPath.getName());
    assertTrue(hdfs.exists(expPath));
    origList[0] = hdfs.getFileStatus(expPath);

    final String JOB_CONF_FILE_NAME = "src/test/resources/job_1329348432655_0001_conf.xml";
    File jobConfFile = new File(JOB_CONF_FILE_NAME);
    srcPath = new Path(jobConfFile.toURI());
    hdfs.copyFromLocalFile(srcPath, inputPath);
    expPath = new Path(inputPath.toUri() + "/" + srcPath.getName());
    assertTrue(hdfs.exists(expPath));
    origList[1] = hdfs.getFileStatus(expPath);

    FileStatus[] prunedList = FileLister.pruneFileListBySize(maxFileSize, origList, hdfs, inputPath);
    assertNotNull(prunedList);
    assertTrue(prunedList.length == 0);

    Path emptyFile = new Path(
            inputPath.toUri() + "/" + "job_1329341111111_0101-1329111113227-user2-Sleep.jhist");
    os = hdfs.createNewFile(emptyFile);
    assertTrue(os);
    assertTrue(hdfs.exists(emptyFile));
    origList[0] = hdfs.getFileStatus(emptyFile);

    Path emptyConfFile = new Path(inputPath.toUri() + "/" + "job_1329341111111_0101_conf.xml");
    os = hdfs.createNewFile(emptyConfFile);

    assertTrue(os);
    assertTrue(hdfs.exists(emptyConfFile));
    origList[1] = hdfs.getFileStatus(emptyConfFile);

    prunedList = FileLister.pruneFileListBySize(maxFileSize, origList, hdfs, inputPath);
    assertNotNull(prunedList);
    assertTrue(prunedList.length == 2);

}

From source file:com.twitter.hraven.etl.TestFileLister.java

License:Apache License

/**
 * removes conf file which has already been put in prunedList
 *
 * @throws IOException/* w  ww.j  a v a 2 s  . c  o  m*/
 */
@Test
public void testPruneFileListRemovingConfFromPruneList() throws IOException {

    long maxFileSize = 20L;
    FileStatus[] origList = new FileStatus[2];
    FileSystem hdfs = FileSystem.get(UTIL.getConfiguration());
    Path inputPath = new Path("/inputdir_filesize_pruneList");
    boolean os = hdfs.mkdirs(inputPath);
    assertTrue(os);
    assertTrue(hdfs.exists(inputPath));

    Path relocationPath = new Path("/relocation_filesize_pruneList");
    os = hdfs.mkdirs(relocationPath);
    assertTrue(os);
    assertTrue(hdfs.exists(relocationPath));

    Path emptyConfFile = new Path(inputPath.toUri() + "/" + "job_1329348432655_0001_conf.xml");
    os = hdfs.createNewFile(emptyConfFile);
    assertTrue(os);
    assertTrue(hdfs.exists(emptyConfFile));
    origList[0] = hdfs.getFileStatus(emptyConfFile);

    final String JOB_HISTORY_FILE_NAME = "src/test/resources/job_1329348432655_0001-1329348443227-user-Sleep+job-1329348468601-10-1-SUCCEEDED-default.jhist";
    File jobHistoryfile = new File(JOB_HISTORY_FILE_NAME);
    Path srcPath = new Path(jobHistoryfile.toURI());
    hdfs.copyFromLocalFile(srcPath, inputPath);
    Path expPath = new Path(inputPath.toUri() + "/" + srcPath.getName());
    assertTrue(hdfs.exists(expPath));
    origList[1] = hdfs.getFileStatus(expPath);

    FileStatus[] prunedList = FileLister.pruneFileListBySize(maxFileSize, origList, hdfs, inputPath);
    assertNotNull(prunedList);
    assertTrue(prunedList.length == 0);
}

From source file:com.twitter.hraven.etl.TestFileLister.java

License:Apache License

/**
 * tests the case when several files are spread out in the dir and need to be removed
 *
 * @throws IOException// w w  w  . j  a  v a2  s  . c o m
 */
@Test
public void testPruneFileListMultipleFilesAlreadyMovedCases() throws IOException {

    long maxFileSize = 20L;
    FileStatus[] origList = new FileStatus[12];
    FileSystem hdfs = FileSystem.get(UTIL.getConfiguration());
    Path inputPath = new Path("/inputdir_filesize_multiple");
    boolean os = hdfs.mkdirs(inputPath);
    assertTrue(os);
    assertTrue(hdfs.exists(inputPath));

    Path relocationPath = new Path("/relocation_filesize_multiple");
    os = hdfs.mkdirs(relocationPath);
    assertTrue(os);
    assertTrue(hdfs.exists(relocationPath));

    Path emptyFile = new Path(
            inputPath.toUri() + "/" + "job_1329341111111_0101-1329111113227-user2-Sleep.jhist");
    os = hdfs.createNewFile(emptyFile);
    assertTrue(os);
    assertTrue(hdfs.exists(emptyFile));
    origList[0] = hdfs.getFileStatus(emptyFile);

    Path emptyConfFile = new Path(inputPath.toUri() + "/" + "job_1329341111111_0101_conf.xml");
    os = hdfs.createNewFile(emptyConfFile);

    assertTrue(os);
    assertTrue(hdfs.exists(emptyConfFile));
    origList[1] = hdfs.getFileStatus(emptyConfFile);

    final String JOB_HISTORY_FILE_NAME = "src/test/resources/job_1329348432655_0001-1329348443227-user-Sleep+job-1329348468601-10-1-SUCCEEDED-default.jhist";
    File jobHistoryfile = new File(JOB_HISTORY_FILE_NAME);
    Path srcPath = new Path(jobHistoryfile.toURI());
    hdfs.copyFromLocalFile(srcPath, inputPath);
    Path expPath = new Path(inputPath.toUri() + "/" + srcPath.getName());
    assertTrue(hdfs.exists(expPath));
    origList[2] = hdfs.getFileStatus(expPath);

    final String JOB_CONF_FILE_NAME = "src/test/resources/job_1329348432655_0001_conf.xml";
    File jobConfFile = new File(JOB_CONF_FILE_NAME);
    srcPath = new Path(jobConfFile.toURI());
    hdfs.copyFromLocalFile(srcPath, inputPath);
    expPath = new Path(inputPath.toUri() + "/" + srcPath.getName());
    assertTrue(hdfs.exists(expPath));
    origList[3] = hdfs.getFileStatus(expPath);

    Path inputPath2 = new Path(inputPath.toUri() + "/"
            + "job_1311222222255_0221-1311111143227-user10101-WordCount-1-SUCCEEDED-default.jhist");
    hdfs.copyFromLocalFile(srcPath, inputPath2);
    assertTrue(hdfs.exists(inputPath2));
    origList[4] = hdfs.getFileStatus(inputPath2);

    Path inputPath3 = new Path(inputPath.toUri() + "/"
            + "job_1399999999155_0991-1311111143227-user3321-TeraGen-1-SUCCEEDED-default.jhist");
    hdfs.copyFromLocalFile(srcPath, inputPath3);
    assertTrue(hdfs.exists(inputPath3));
    origList[5] = hdfs.getFileStatus(inputPath3);

    Path inputPath4 = new Path(inputPath.toUri() + "/"
            + "job_1399977777177_0771-1311111143227-user3321-TeraSort-1-SUCCEEDED-default.jhist");
    hdfs.copyFromLocalFile(srcPath, inputPath4);
    assertTrue(hdfs.exists(inputPath4));
    origList[6] = hdfs.getFileStatus(inputPath4);

    Path emptyFile2 = new Path(
            inputPath.toUri() + "/" + "job_1329343333333_5551-1329111113227-user2-SomethingElse.jhist");
    os = hdfs.createNewFile(emptyFile2);
    assertTrue(os);
    assertTrue(hdfs.exists(emptyFile2));
    origList[7] = hdfs.getFileStatus(emptyFile2);

    Path emptyConfFile2 = new Path(inputPath.toUri() + "/" + "job_1329343333333_5551_conf.xml");
    os = hdfs.createNewFile(emptyConfFile2);
    assertTrue(os);
    assertTrue(hdfs.exists(emptyConfFile2));
    origList[8] = hdfs.getFileStatus(emptyConfFile2);

    // this is an empty file which tests the toBeRemovedFileList
    // at the end of function pruneFileListBySize
    Path emptyConfFile3 = new Path(inputPath.toUri() + "/" + "job_1399999999155_0991_conf.xml");
    os = hdfs.createNewFile(emptyConfFile3);
    assertTrue(os);
    assertTrue(hdfs.exists(emptyConfFile3));
    origList[9] = hdfs.getFileStatus(emptyConfFile3);

    Path inputConfPath2 = new Path(inputPath.toUri() + "/" + "job_1311222222255_0221_conf.xml");
    srcPath = new Path(jobConfFile.toURI());
    hdfs.copyFromLocalFile(srcPath, inputConfPath2);
    assertTrue(hdfs.exists(inputConfPath2));
    origList[10] = hdfs.getFileStatus(inputConfPath2);

    // this is an empty file which tests the toBeRemovedFileList
    // at the end of function pruneFileListBySize
    Path emptyConfFile4 = new Path(inputPath.toUri() + "/" + "job_1399977777177_0771_conf.xml");
    os = hdfs.createNewFile(emptyConfFile4);
    assertTrue(os);
    assertTrue(hdfs.exists(emptyConfFile4));
    origList[11] = hdfs.getFileStatus(emptyConfFile4);

    FileStatus[] prunedList = FileLister.pruneFileListBySize(maxFileSize, origList, hdfs, inputPath);
    assertNotNull(prunedList);
    assertTrue(prunedList.length == 4);
}

From source file:com.twitter.pig.backend.hadoop.executionengine.tez.TezJobControlCompiler.java

License:Apache License

public DAG createDAG(TezOperPlan tezPlan, FileSystem remoteFs, TezConfiguration conf, ApplicationId appId,
        Path remoteStagingDir) throws IOException, YarnException {

    DAG dag = new DAG("MRRSleepJob");
    /*//from www  .  j  av  a  2s .  co  m
          String jarPath = ClassUtil.findContainingJar(getClass());
          Path remoteJarPath = remoteFs.makeQualified(
    new Path(remoteStagingDir, "dag_job.jar"));
          remoteFs.copyFromLocalFile(new Path(jarPath), remoteJarPath);
          FileStatus jarFileStatus = remoteFs.getFileStatus(remoteJarPath);
    */
    Map<String, LocalResource> commonLocalResources = new HashMap<String, LocalResource>();

    if (!pigContext.inIllustrator && pigContext.getExecType() != ExecType.TEZ_LOCAL) {

        // Setup the DistributedCache for this job
        for (URL extraJar : pigContext.extraJars) {
            //log.debug("Adding jar to DistributedCache: " + extraJar.toString());
            TezJobControlCompiler.putJarOnClassPathThroughDistributedCache(pigContext, conf, extraJar);
        }

        //Create the jar of all functions and classes required
        File submitJarFile = File.createTempFile("Job", ".jar");
        //log.info("creating jar file "+submitJarFile.getName());
        // ensure the job jar is deleted on exit
        submitJarFile.deleteOnExit();
        FileOutputStream fos = new FileOutputStream(submitJarFile);
        try {
            JarManager.createJar(fos, new HashSet<String>(), pigContext);
        } catch (ClassNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

        Path remoteJarPath = remoteFs.makeQualified(new Path(remoteStagingDir, "dag_job.jar"));
        remoteFs.copyFromLocalFile(new Path(submitJarFile.getAbsolutePath()), remoteJarPath);
        FileStatus jarFileStatus = remoteFs.getFileStatus(remoteJarPath);

        LocalResource dagJarLocalRsrc = LocalResource.newInstance(
                ConverterUtils.getYarnUrlFromPath(remoteJarPath), LocalResourceType.FILE,
                LocalResourceVisibility.APPLICATION, jarFileStatus.getLen(),
                jarFileStatus.getModificationTime());
        commonLocalResources.put("dag_job.jar", dagJarLocalRsrc);

        Path remoteTezJarPath = remoteFs.makeQualified(new Path(remoteStagingDir, "pig-tez.jar"));
        remoteFs.copyFromLocalFile(new Path("pig-tez.jar"), remoteTezJarPath);
        FileStatus tezJarFileStatus = remoteFs.getFileStatus(remoteTezJarPath);

        LocalResource tezJarLocalRsrc = LocalResource.newInstance(
                ConverterUtils.getYarnUrlFromPath(remoteTezJarPath), LocalResourceType.FILE,
                LocalResourceVisibility.APPLICATION, tezJarFileStatus.getLen(),
                tezJarFileStatus.getModificationTime());
        commonLocalResources.put("pig-tez.jar", tezJarLocalRsrc);

        //log.info("jar file "+submitJarFile.getName()+" created");
        //Start setting the JobConf properties
        conf.set("mapred.jar", submitJarFile.getPath());
    }

    /*
    LocalResource dagJarLocalRsrc = LocalResource.newInstance(
    ConverterUtils.getYarnUrlFromPath(remoteJarPath),
    LocalResourceType.FILE,
    LocalResourceVisibility.APPLICATION,
    jarFileStatus.getLen(),
    jarFileStatus.getModificationTime());
    commonLocalResources.put("dag_job.jar", dagJarLocalRsrc);
    */

    Hashtable<TezOperator, Pair<Vertex, Configuration>> vertexMap = new Hashtable<TezOperator, Pair<Vertex, Configuration>>();

    List<TezOperator> operators = tezPlan.getRoots();

    // add settings for pig statistics
    String setScriptProp = conf.get(ScriptState.INSERT_ENABLED, "true");
    ScriptState ss = null;

    if (setScriptProp.equalsIgnoreCase("true")) {
        ss = ScriptState.get();
    }

    while (operators != null && operators.size() != 0) {

        List<TezOperator> successors = new ArrayList<TezOperator>();

        for (TezOperator oper : operators) {

            Configuration operConf = oper.configure(pigContext, conf);
            /*
            if (ss != null){
               ss.addSettingsToConf(oper, conf);
            }
            */
            List<TezOperator> predecessors = plan.getPredecessors(oper);

            if (predecessors != null && predecessors.size() != 0) {
                MultiStageMRConfToTezTranslator.translateVertexConfToTez(operConf,
                        vertexMap.get(predecessors.get(0)).second);
            } else {
                MultiStageMRConfToTezTranslator.translateVertexConfToTez(operConf, null);
            }

            List<TezOperator> operSuccessors = tezPlan.getSuccessors(oper);
            if (operSuccessors != null) {
                successors.addAll(operSuccessors);
            }

            MRHelpers.doJobClientMagic(operConf);

            //mapStageConf.setInt(MRJobConfig.NUM_MAPS, numMapper);

            Vertex operVertex = new Vertex(oper.name(),
                    new ProcessorDescriptor(oper.getProcessor(), MRHelpers.createUserPayloadFromConf(operConf)),
                    oper.getParallelism(), MRHelpers.getMapResource(operConf));

            oper.configureVertex(operVertex, operConf, commonLocalResources, remoteStagingDir);

            dag.addVertex(operVertex);
            if (predecessors != null) {

                for (TezOperator predecessor : predecessors) {
                    dag.addEdge(new Edge(vertexMap.get(predecessor).first, operVertex,
                            tezPlan.getEdgeProperty(predecessor, oper)));
                }

            }

            vertexMap.put(oper, new Pair<Vertex, Configuration>(operVertex, operConf));
        }

        operators = successors;
    }
    return dag;
}

From source file:com.uber.hoodie.common.table.log.HoodieLogFileReader.java

License:Apache License

HoodieLogFileReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema, int bufferSize,
        boolean readBlockLazily, boolean reverseReader) throws IOException {
    FSDataInputStream fsDataInputStream = fs.open(logFile.getPath(), bufferSize);
    if (fsDataInputStream.getWrappedStream() instanceof FSInputStream) {
        this.inputStream = new FSDataInputStream(
                new BufferedFSInputStream((FSInputStream) fsDataInputStream.getWrappedStream(), bufferSize));
    } else {/*from  ww  w  . jav a2  s.com*/
        // fsDataInputStream.getWrappedStream() maybe a BufferedFSInputStream
        // need to wrap in another BufferedFSInputStream the make bufferSize work?
        this.inputStream = fsDataInputStream;
    }

    this.logFile = logFile;
    this.readerSchema = readerSchema;
    this.readBlockLazily = readBlockLazily;
    this.reverseReader = reverseReader;
    if (this.reverseReader) {
        this.reverseLogFilePosition = this.lastReverseLogFilePosition = fs.getFileStatus(logFile.getPath())
                .getLen();
    }
    addShutDownHook();
}