Example usage for org.apache.hadoop.fs FileStatus isFile

List of usage examples for org.apache.hadoop.fs FileStatus isFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus isFile.

Prototype

public boolean isFile() 

Source Link

Document

Is this a file?

Usage

From source file:org.apache.tajo.parser.sql.TestSQLAnalyzer.java

License:Apache License

public Collection<File> getResourceFiles(String subdir) throws URISyntaxException, IOException {
    URL uri = ClassLoader.getSystemResource("queries/TestSQLAnalyzer");
    Path positiveQueryDir = StorageUtil.concatPath(new Path(uri.toURI()), subdir);
    FileSystem fs = positiveQueryDir.getFileSystem(new TajoConf());

    if (!fs.exists(positiveQueryDir)) {
        throw new IOException("Cannot find " + positiveQueryDir);
    }// w  w  w .j  a va 2  s. c  o  m

    // get only files
    Collection<FileStatus> files = filter(Lists.newArrayList(fs.listStatus(positiveQueryDir)),
            new Predicate<FileStatus>() {
                @Override
                public boolean apply(@Nullable FileStatus input) {
                    // TODO: This should be removed at TAJO-1891
                    if (input.getPath().getName().indexOf("add_partition") > -1) {
                        return false;
                    } else {
                        return input.isFile();
                    }
                }
            });

    // transform FileStatus into File
    return transform(files, new Function<FileStatus, File>() {
        @Override
        public File apply(@Nullable FileStatus fileStatus) {
            return new File(URI.create(fileStatus.getPath().toString()));
        }
    });
}

From source file:org.apache.tajo.plan.LogicalPlanner.java

License:Apache License

private void updatePhysicalInfo(TableDesc desc) {
    if (desc.getPath() != null && desc.getMeta().getStoreType() != StoreType.SYSTEM) {
        try {//w  w  w.j  a va  2s  .c  o m
            Path path = new Path(desc.getPath());
            FileSystem fs = path.getFileSystem(new Configuration());
            FileStatus status = fs.getFileStatus(path);
            if (desc.getStats() != null && (status.isDirectory() || status.isFile())) {
                ContentSummary summary = fs.getContentSummary(path);
                if (summary != null) {
                    long volume = summary.getLength();
                    desc.getStats().setNumBytes(volume);
                }
            }
        } catch (Throwable t) {
            LOG.warn(t, t);
        }
    }
}

From source file:org.apache.tajo.storage.FileStorageManager.java

License:Apache License

/**
 *
 * @param fs//from   w w  w .  j a v a  2  s.  co m
 * @param path The table path
 * @param result The final result files to be used
 * @param startFileIndex
 * @param numResultFiles
 * @param currentFileIndex
 * @param partitioned A flag to indicate if this table is partitioned
 * @param currentDepth Current visiting depth of partition directories
 * @param maxDepth The partition depth of this table
 * @throws IOException
 */
private void getNonZeroLengthDataFiles(FileSystem fs, Path path, List<FileStatus> result, int startFileIndex,
        int numResultFiles, AtomicInteger currentFileIndex, boolean partitioned, int currentDepth, int maxDepth)
        throws IOException {
    // Intermediate directory
    if (fs.isDirectory(path)) {

        FileStatus[] files = fs.listStatus(path, StorageManager.hiddenFileFilter);

        if (files != null && files.length > 0) {

            for (FileStatus eachFile : files) {

                // checking if the enough number of files are found
                if (result.size() >= numResultFiles) {
                    return;
                }
                if (eachFile.isDirectory()) {

                    getNonZeroLengthDataFiles(fs, eachFile.getPath(), result, startFileIndex, numResultFiles,
                            currentFileIndex, partitioned, currentDepth + 1, // increment a visiting depth
                            maxDepth);

                    // if partitioned table, we should ignore files located in the intermediate directory.
                    // we can ensure that this file is in leaf directory if currentDepth == maxDepth.
                } else if (eachFile.isFile() && eachFile.getLen() > 0
                        && (!partitioned || currentDepth == maxDepth)) {
                    if (currentFileIndex.get() >= startFileIndex) {
                        result.add(eachFile);
                    }
                    currentFileIndex.incrementAndGet();
                }
            }
        }

        // Files located in leaf directory
    } else {
        FileStatus fileStatus = fs.getFileStatus(path);
        if (fileStatus != null && fileStatus.getLen() > 0) {
            if (currentFileIndex.get() >= startFileIndex) {
                result.add(fileStatus);
            }
            currentFileIndex.incrementAndGet();
            if (result.size() >= numResultFiles) {
                return;
            }
        }
    }
}

From source file:org.apache.tajo.storage.FileTablespace.java

License:Apache License

/**
 * Finalizes result data. Tajo stores result data in the staging directory.
 * If the query fails, clean up the staging directory.
 * Otherwise the query is successful, move to the final directory from the staging directory.
 *
 * @param queryContext The query property
 * @param changeFileSeq If true change result file name with max sequence.
 * @return Saved path//  ww  w.j av  a2s .  c  o m
 * @throws java.io.IOException
 */
protected Path commitOutputData(OverridableConf queryContext, boolean changeFileSeq) throws IOException {
    Path stagingDir = new Path(queryContext.get(QueryVars.STAGING_DIR));
    Path stagingResultDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME);
    Path finalOutputDir;
    if (!queryContext.get(QueryVars.OUTPUT_TABLE_URI, "").isEmpty()) {
        finalOutputDir = new Path(queryContext.get(QueryVars.OUTPUT_TABLE_URI));
        try {
            FileSystem fs = stagingResultDir.getFileSystem(conf);

            if (queryContext.getBool(QueryVars.OUTPUT_OVERWRITE, false)) { // INSERT OVERWRITE INTO

                // It moves the original table into the temporary location.
                // Then it moves the new result table into the original table location.
                // Upon failed, it recovers the original table if possible.
                boolean movedToOldTable = false;
                boolean committed = false;
                Path oldTableDir = new Path(stagingDir, TajoConstants.INSERT_OVERWIRTE_OLD_TABLE_NAME);
                ContentSummary summary = fs.getContentSummary(stagingResultDir);

                // When inserting empty data into a partitioned table, check if keep existing data need to be remove or not.
                boolean overwriteEnabled = queryContext
                        .getBool(SessionVars.PARTITION_NO_RESULT_OVERWRITE_ENABLED);

                // If existing data doesn't need to keep, check if there are some files.
                if ((!queryContext.get(QueryVars.OUTPUT_PARTITIONS, "").isEmpty())
                        && (!overwriteEnabled || (overwriteEnabled && summary.getFileCount() > 0L))) {
                    // This is a map for existing non-leaf directory to rename. A key is current directory and a value is
                    // renaming directory.
                    Map<Path, Path> renameDirs = TUtil.newHashMap();
                    // This is a map for recovering existing partition directory. A key is current directory and a value is
                    // temporary directory to back up.
                    Map<Path, Path> recoveryDirs = TUtil.newHashMap();

                    try {
                        if (!fs.exists(finalOutputDir)) {
                            fs.mkdirs(finalOutputDir);
                        }

                        visitPartitionedDirectory(fs, stagingResultDir, finalOutputDir,
                                stagingResultDir.toString(), renameDirs, oldTableDir);

                        // Rename target partition directories
                        for (Map.Entry<Path, Path> entry : renameDirs.entrySet()) {
                            // Backup existing data files for recovering
                            if (fs.exists(entry.getValue())) {
                                String recoveryPathString = entry.getValue().toString()
                                        .replaceAll(finalOutputDir.toString(), oldTableDir.toString());
                                Path recoveryPath = new Path(recoveryPathString);
                                fs.rename(entry.getValue(), recoveryPath);
                                fs.exists(recoveryPath);
                                recoveryDirs.put(entry.getValue(), recoveryPath);
                            }
                            // Delete existing directory
                            fs.delete(entry.getValue(), true);
                            // Rename staging directory to final output directory
                            fs.rename(entry.getKey(), entry.getValue());
                        }

                    } catch (IOException ioe) {
                        // Remove created dirs
                        for (Map.Entry<Path, Path> entry : renameDirs.entrySet()) {
                            fs.delete(entry.getValue(), true);
                        }

                        // Recovery renamed dirs
                        for (Map.Entry<Path, Path> entry : recoveryDirs.entrySet()) {
                            fs.delete(entry.getValue(), true);
                            fs.rename(entry.getValue(), entry.getKey());
                        }

                        throw new IOException(ioe.getMessage());
                    }
                } else { // no partition
                    try {

                        // if the final output dir exists, move all contents to the temporary table dir.
                        // Otherwise, just make the final output dir. As a result, the final output dir will be empty.
                        if (fs.exists(finalOutputDir)) {
                            fs.mkdirs(oldTableDir);

                            for (FileStatus status : fs.listStatus(finalOutputDir, hiddenFileFilter)) {
                                fs.rename(status.getPath(), oldTableDir);
                            }

                            movedToOldTable = fs.exists(oldTableDir);
                        } else { // if the parent does not exist, make its parent directory.
                            fs.mkdirs(finalOutputDir);
                        }

                        // Move the results to the final output dir.
                        for (FileStatus status : fs.listStatus(stagingResultDir)) {
                            fs.rename(status.getPath(), finalOutputDir);
                        }

                        // Check the final output dir
                        committed = fs.exists(finalOutputDir);

                    } catch (IOException ioe) {
                        // recover the old table
                        if (movedToOldTable && !committed) {

                            // if commit is failed, recover the old data
                            for (FileStatus status : fs.listStatus(finalOutputDir, hiddenFileFilter)) {
                                fs.delete(status.getPath(), true);
                            }

                            for (FileStatus status : fs.listStatus(oldTableDir)) {
                                fs.rename(status.getPath(), finalOutputDir);
                            }
                        }

                        throw new IOException(ioe.getMessage());
                    }
                }
            } else {
                String queryType = queryContext.get(QueryVars.COMMAND_TYPE);

                if (queryType != null && queryType.equals(NodeType.INSERT.name())) { // INSERT INTO an existing table

                    NumberFormat fmt = NumberFormat.getInstance();
                    fmt.setGroupingUsed(false);
                    fmt.setMinimumIntegerDigits(3);

                    if (!queryContext.get(QueryVars.OUTPUT_PARTITIONS, "").isEmpty()) {
                        for (FileStatus eachFile : fs.listStatus(stagingResultDir)) {
                            if (eachFile.isFile()) {
                                LOG.warn("Partition table can't have file in a staging dir: "
                                        + eachFile.getPath());
                                continue;
                            }
                            moveResultFromStageToFinal(fs, stagingResultDir, eachFile, finalOutputDir, fmt, -1,
                                    changeFileSeq);
                        }
                    } else {
                        int maxSeq = StorageUtil.getMaxFileSequence(fs, finalOutputDir, false) + 1;
                        for (FileStatus eachFile : fs.listStatus(stagingResultDir)) {
                            if (eachFile.getPath().getName().startsWith("_")) {
                                continue;
                            }
                            moveResultFromStageToFinal(fs, stagingResultDir, eachFile, finalOutputDir, fmt,
                                    maxSeq++, changeFileSeq);
                        }
                    }
                    // checking all file moved and remove empty dir
                    verifyAllFileMoved(fs, stagingResultDir);
                    FileStatus[] files = fs.listStatus(stagingResultDir);
                    if (files != null && files.length != 0) {
                        for (FileStatus eachFile : files) {
                            LOG.error("There are some unmoved files in staging dir:" + eachFile.getPath());
                        }
                    }
                } else { // CREATE TABLE AS SELECT (CTAS)
                    if (fs.exists(finalOutputDir)) {
                        for (FileStatus status : fs.listStatus(stagingResultDir)) {
                            fs.rename(status.getPath(), finalOutputDir);
                        }
                    } else {
                        fs.rename(stagingResultDir, finalOutputDir);
                    }
                    LOG.info("Moved from the staging dir to the output directory '" + finalOutputDir);
                }
            }

            // remove the staging directory if the final output dir is given.
            Path stagingDirRoot = stagingDir.getParent();
            fs.delete(stagingDirRoot, true);
        } catch (Throwable t) {
            LOG.error(t);
            throw new IOException(t);
        }
    } else {
        finalOutputDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME);
    }

    return finalOutputDir;
}

From source file:org.apache.tajo.storage.FileTablespace.java

License:Apache License

/**
 * Make sure all files are moved./*from   w  ww .j a va 2s.c o m*/
 * @param fs FileSystem
 * @param stagingPath The stagind directory
 * @return
 * @throws java.io.IOException
 */
private boolean verifyAllFileMoved(FileSystem fs, Path stagingPath) throws IOException {
    FileStatus[] files = fs.listStatus(stagingPath);
    if (files != null && files.length != 0) {
        for (FileStatus eachFile : files) {
            if (eachFile.isFile()) {
                LOG.error("There are some unmoved files in staging dir:" + eachFile.getPath());
                return false;
            } else {
                if (verifyAllFileMoved(fs, eachFile.getPath())) {
                    fs.delete(eachFile.getPath(), false);
                } else {
                    return false;
                }
            }
        }
    }

    return true;
}

From source file:org.apache.tajo.storage.StorageManager.java

License:Apache License

/**
 * Finalizes result data. Tajo stores result data in the staging directory.
 * If the query fails, clean up the staging directory.
 * Otherwise the query is successful, move to the final directory from the staging directory.
 *
 * @param queryContext The query property
 * @param finalEbId The final execution block id
 * @param plan The query plan/*from   w  w w  . j  a  va2  s  .com*/
 * @param schema The final output schema
 * @param tableDesc The description of the target table
 * @param changeFileSeq If true change result file name with max sequence.
 * @return Saved path
 * @throws java.io.IOException
 */
protected Path commitOutputData(OverridableConf queryContext, ExecutionBlockId finalEbId, LogicalPlan plan,
        Schema schema, TableDesc tableDesc, boolean changeFileSeq) throws IOException {
    Path stagingDir = new Path(queryContext.get(QueryVars.STAGING_DIR));
    Path stagingResultDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME);
    Path finalOutputDir;
    if (!queryContext.get(QueryVars.OUTPUT_TABLE_PATH, "").isEmpty()) {
        finalOutputDir = new Path(queryContext.get(QueryVars.OUTPUT_TABLE_PATH));
        try {
            FileSystem fs = stagingResultDir.getFileSystem(conf);

            if (queryContext.getBool(QueryVars.OUTPUT_OVERWRITE, false)) { // INSERT OVERWRITE INTO

                // It moves the original table into the temporary location.
                // Then it moves the new result table into the original table location.
                // Upon failed, it recovers the original table if possible.
                boolean movedToOldTable = false;
                boolean committed = false;
                Path oldTableDir = new Path(stagingDir, TajoConstants.INSERT_OVERWIRTE_OLD_TABLE_NAME);
                ContentSummary summary = fs.getContentSummary(stagingResultDir);

                if (!queryContext.get(QueryVars.OUTPUT_PARTITIONS, "").isEmpty()
                        && summary.getFileCount() > 0L) {
                    // This is a map for existing non-leaf directory to rename. A key is current directory and a value is
                    // renaming directory.
                    Map<Path, Path> renameDirs = TUtil.newHashMap();
                    // This is a map for recovering existing partition directory. A key is current directory and a value is
                    // temporary directory to back up.
                    Map<Path, Path> recoveryDirs = TUtil.newHashMap();

                    try {
                        if (!fs.exists(finalOutputDir)) {
                            fs.mkdirs(finalOutputDir);
                        }

                        visitPartitionedDirectory(fs, stagingResultDir, finalOutputDir,
                                stagingResultDir.toString(), renameDirs, oldTableDir);

                        // Rename target partition directories
                        for (Map.Entry<Path, Path> entry : renameDirs.entrySet()) {
                            // Backup existing data files for recovering
                            if (fs.exists(entry.getValue())) {
                                String recoveryPathString = entry.getValue().toString()
                                        .replaceAll(finalOutputDir.toString(), oldTableDir.toString());
                                Path recoveryPath = new Path(recoveryPathString);
                                fs.rename(entry.getValue(), recoveryPath);
                                fs.exists(recoveryPath);
                                recoveryDirs.put(entry.getValue(), recoveryPath);
                            }
                            // Delete existing directory
                            fs.delete(entry.getValue(), true);
                            // Rename staging directory to final output directory
                            fs.rename(entry.getKey(), entry.getValue());
                        }

                    } catch (IOException ioe) {
                        // Remove created dirs
                        for (Map.Entry<Path, Path> entry : renameDirs.entrySet()) {
                            fs.delete(entry.getValue(), true);
                        }

                        // Recovery renamed dirs
                        for (Map.Entry<Path, Path> entry : recoveryDirs.entrySet()) {
                            fs.delete(entry.getValue(), true);
                            fs.rename(entry.getValue(), entry.getKey());
                        }

                        throw new IOException(ioe.getMessage());
                    }
                } else { // no partition
                    try {

                        // if the final output dir exists, move all contents to the temporary table dir.
                        // Otherwise, just make the final output dir. As a result, the final output dir will be empty.
                        if (fs.exists(finalOutputDir)) {
                            fs.mkdirs(oldTableDir);

                            for (FileStatus status : fs.listStatus(finalOutputDir,
                                    StorageManager.hiddenFileFilter)) {
                                fs.rename(status.getPath(), oldTableDir);
                            }

                            movedToOldTable = fs.exists(oldTableDir);
                        } else { // if the parent does not exist, make its parent directory.
                            fs.mkdirs(finalOutputDir);
                        }

                        // Move the results to the final output dir.
                        for (FileStatus status : fs.listStatus(stagingResultDir)) {
                            fs.rename(status.getPath(), finalOutputDir);
                        }

                        // Check the final output dir
                        committed = fs.exists(finalOutputDir);

                    } catch (IOException ioe) {
                        // recover the old table
                        if (movedToOldTable && !committed) {

                            // if commit is failed, recover the old data
                            for (FileStatus status : fs.listStatus(finalOutputDir,
                                    StorageManager.hiddenFileFilter)) {
                                fs.delete(status.getPath(), true);
                            }

                            for (FileStatus status : fs.listStatus(oldTableDir)) {
                                fs.rename(status.getPath(), finalOutputDir);
                            }
                        }

                        throw new IOException(ioe.getMessage());
                    }
                }
            } else {
                String queryType = queryContext.get(QueryVars.COMMAND_TYPE);

                if (queryType != null && queryType.equals(NodeType.INSERT.name())) { // INSERT INTO an existing table

                    NumberFormat fmt = NumberFormat.getInstance();
                    fmt.setGroupingUsed(false);
                    fmt.setMinimumIntegerDigits(3);

                    if (!queryContext.get(QueryVars.OUTPUT_PARTITIONS, "").isEmpty()) {
                        for (FileStatus eachFile : fs.listStatus(stagingResultDir)) {
                            if (eachFile.isFile()) {
                                LOG.warn("Partition table can't have file in a staging dir: "
                                        + eachFile.getPath());
                                continue;
                            }
                            moveResultFromStageToFinal(fs, stagingResultDir, eachFile, finalOutputDir, fmt, -1,
                                    changeFileSeq);
                        }
                    } else {
                        int maxSeq = StorageUtil.getMaxFileSequence(fs, finalOutputDir, false) + 1;
                        for (FileStatus eachFile : fs.listStatus(stagingResultDir)) {
                            if (eachFile.getPath().getName().startsWith("_")) {
                                continue;
                            }
                            moveResultFromStageToFinal(fs, stagingResultDir, eachFile, finalOutputDir, fmt,
                                    maxSeq++, changeFileSeq);
                        }
                    }
                    // checking all file moved and remove empty dir
                    verifyAllFileMoved(fs, stagingResultDir);
                    FileStatus[] files = fs.listStatus(stagingResultDir);
                    if (files != null && files.length != 0) {
                        for (FileStatus eachFile : files) {
                            LOG.error("There are some unmoved files in staging dir:" + eachFile.getPath());
                        }
                    }
                } else { // CREATE TABLE AS SELECT (CTAS)
                    if (fs.exists(finalOutputDir)) {
                        for (FileStatus status : fs.listStatus(stagingResultDir)) {
                            fs.rename(status.getPath(), finalOutputDir);
                        }
                    } else {
                        fs.rename(stagingResultDir, finalOutputDir);
                    }
                    LOG.info("Moved from the staging dir to the output directory '" + finalOutputDir);
                }
            }

            // remove the staging directory if the final output dir is given.
            Path stagingDirRoot = stagingDir.getParent();
            fs.delete(stagingDirRoot, true);
        } catch (Throwable t) {
            LOG.error(t);
            throw new IOException(t);
        }
    } else {
        finalOutputDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME);
    }

    return finalOutputDir;
}

From source file:org.apache.tajo.util.history.HistoryReader.java

License:Apache License

public List<QueryInfo> getQueries(String keyword) throws IOException {
    List<QueryInfo> queryInfos = new ArrayList<QueryInfo>();

    FileSystem fs = HistoryWriter.getNonCrcFileSystem(historyParentPath, tajoConf);
    try {//from   w  w  w .j  a  v  a 2  s.c o  m
        if (!fs.exists(historyParentPath)) {
            return queryInfos;
        }
    } catch (Throwable e) {
        return queryInfos;
    }

    FileStatus[] files = fs.listStatus(historyParentPath);
    if (files == null || files.length == 0) {
        return queryInfos;
    }

    for (FileStatus eachDateFile : files) {
        Path queryListPath = new Path(eachDateFile.getPath(), HistoryWriter.QUERY_LIST);
        if (eachDateFile.isFile() || !fs.exists(queryListPath)) {
            continue;
        }

        FileStatus[] dateFiles = fs.listStatus(queryListPath);
        if (dateFiles == null || dateFiles.length == 0) {
            continue;
        }

        for (FileStatus eachFile : dateFiles) {
            Path path = eachFile.getPath();
            if (eachFile.isDirectory() || !path.getName().endsWith(HistoryWriter.HISTORY_FILE_POSTFIX)) {
                continue;
            }

            FSDataInputStream in = null;
            try {
                in = fs.open(path);

                byte[] buf = new byte[100 * 1024];
                while (true) {
                    int length = in.readInt();
                    if (length > buf.length) {
                        buf = new byte[length];
                    }
                    in.readFully(buf, 0, length);
                    String queryInfoJson = new String(buf, 0, length, Bytes.UTF8_CHARSET);
                    QueryInfo queryInfo = QueryInfo.fromJson(queryInfoJson);
                    if (keyword != null) {
                        if (queryInfo.getSql().indexOf(keyword) >= 0) {
                            queryInfos.add(queryInfo);
                        }
                    } else {
                        queryInfos.add(queryInfo);
                    }
                }
            } catch (EOFException e) {
            } catch (Throwable e) {
                LOG.warn("Reading error:" + path + ", " + e.getMessage());
            } finally {
                IOUtils.cleanup(LOG, in);
            }
        }
    }

    Collections.sort(queryInfos, new Comparator<QueryInfo>() {
        @Override
        public int compare(QueryInfo query1, QueryInfo query2) {
            return query2.getQueryIdStr().toString().compareTo(query1.getQueryIdStr().toString());
        }
    });

    return queryInfos;
}

From source file:org.apache.tez.test.TestTezJobs.java

License:Apache License

private void verifyOutput(Path outputDir, FileSystem fs) throws IOException {
    FileStatus[] fileStatuses = fs.listStatus(outputDir);
    Path resultFile = null;/*from  www.  j a  v a2s .co m*/
    boolean foundResult = false;
    boolean foundSuccessFile = false;
    for (FileStatus fileStatus : fileStatuses) {
        if (!fileStatus.isFile()) {
            continue;
        }
        if (fileStatus.getPath().getName().equals("_SUCCESS")) {
            foundSuccessFile = true;
            continue;
        }
        if (fileStatus.getPath().getName().startsWith("part-")) {
            if (foundResult) {
                fail("Found 2 part files instead of 1" + ", paths=" + resultFile + "," + fileStatus.getPath());
            }
            foundResult = true;
            resultFile = fileStatus.getPath();
            LOG.info("Found output at " + resultFile);
        }
    }
    assertTrue(foundResult);
    assertTrue(resultFile != null);
    assertTrue(foundSuccessFile);
    verifyOrderedWordCountOutput(resultFile, fs);
}

From source file:org.bgi.flexlab.gaea.data.mapreduce.input.header.SamHdfsFileHeader.java

License:Open Source License

public static SAMFileHeader traversal(Path input, FileSystem fs, Configuration conf, boolean cram) {
    ArrayList<SAMFileHeader> mergeHeaders = new ArrayList<SAMFileHeader>();
    SAMFileHeader mergedHeader = null;//from   w  w  w.j  ava 2  s  .  co  m
    boolean matchedSortOrders = true;

    FileStatus status = null;
    try {
        status = fs.getFileStatus(input);
    } catch (IOException e2) {
        throw new FileNotExistException(input.getName());
    }

    if (status.isFile()) {
        SAMFileHeader header = null;
        if (!cram)
            try {
                header = getSAMHeader(fs, input);
            } catch (IOException e) {
                throw new RuntimeException(e.toString());
            }
        else
            try {
                header = getCramHeader(fs, input);
            } catch (IOException e) {
                throw new RuntimeException(e.toString());
            }
        matchedSortOrders = matchedSortOrders && header.getSortOrder() == SORT_ORDER;
        if (!contains(header, mergeHeaders))
            mergeHeaders.add(header);
    } else {
        FileStatus[] stats = null;
        try {
            stats = fs.listStatus(input, new HeaderPathFilter());
        } catch (IOException e) {
            throw new RuntimeException(e.toString());
        }

        for (FileStatus file : stats) {
            Path filePath = file.getPath();
            SAMFileHeader header = null;
            if (file.isFile()) {
                if (!cram)
                    try {
                        header = getSAMHeader(fs, filePath);
                    } catch (IOException e) {
                        throw new RuntimeException(e.toString());
                    }
                else
                    try {
                        header = getCramHeader(fs, filePath);
                    } catch (IOException e) {
                        throw new RuntimeException(e.toString());
                    }
            } else {
                header = traversal(filePath, fs, conf, cram);
            }
            matchedSortOrders = matchedSortOrders && header.getSortOrder() == SORT_ORDER;
            if (!contains(header, mergeHeaders))
                mergeHeaders.add(header);
        }
    }
    if (matchedSortOrders || SORT_ORDER == SAMFileHeader.SortOrder.unsorted || ASSUME_SORTED) {
        headerMergerSortOrder = SORT_ORDER;
    } else {
        headerMergerSortOrder = SAMFileHeader.SortOrder.unsorted;
    }
    mergedHeader = new SamFileHeaderMerger(headerMergerSortOrder, mergeHeaders, MERGE_SEQUENCE_DICTIONARIES)
            .getMergedHeader();
    return mergedHeader;
}

From source file:org.bgi.flexlab.gaea.data.mapreduce.util.HdfsFilesReader.java

License:Open Source License

public void traversal(String path, PathFilter pathFilter) {
    Path p = new Path(path);

    fs = HdfsFileManager.getFileSystem(p, conf);
    FileStatus status = null;
    try {/*from w w w.  j a va  2s. co  m*/
        status = fs.getFileStatus(p);
    } catch (IOException e2) {
        throw new FileNotExistException(p.getName());
    }

    if (status.isFile()) {
        if (!filter(p.getName()))
            files.add(p);
    } else {
        FileStatus[] stats = null;
        try {
            stats = fs.listStatus(p, pathFilter);
        } catch (IOException e) {
            throw new RuntimeException(e.toString());
        }

        for (FileStatus file : stats) {
            if (!file.isFile()) {
                traversal(file.toString(), pathFilter);
            } else {
                if (!filter(file.getPath().getName()))
                    files.add(file.getPath());
            }
        }
    }

    if (size() == 0)
        return;
    FSDataInputStream currInput;
    try {
        currInput = fs.open(files.get(0));
        lineReader = new LineReader(currInput, conf);
    } catch (IOException e) {
        throw new RuntimeException(e.toString());
    }
}