Example usage for org.apache.hadoop.fs FileSystem isFile

List of usage examples for org.apache.hadoop.fs FileSystem isFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem isFile.

Prototype

@Deprecated
public boolean isFile(Path f) throws IOException 

Source Link

Document

True iff the named path is a regular file.

Usage

From source file:org.apache.tajo.storage.AbstractStorageManager.java

License:Apache License

/**
 * Generate the list of files and make them into FileSplits.
 *
 * @throws IOException/*from w w w  .  j  a va  2 s. com*/
 */
public List<FileFragment> getSplits(String tableName, TableMeta meta, Schema schema, Path... inputs)
        throws IOException {
    // generate splits'

    List<FileFragment> splits = Lists.newArrayList();
    List<FileFragment> volumeSplits = Lists.newArrayList();
    List<BlockLocation> blockLocations = Lists.newArrayList();

    for (Path p : inputs) {
        FileSystem fs = p.getFileSystem(conf);
        ArrayList<FileStatus> files = Lists.newArrayList();
        if (fs.isFile(p)) {
            files.addAll(Lists.newArrayList(fs.getFileStatus(p)));
        } else {
            files.addAll(listStatus(p));
        }

        int previousSplitSize = splits.size();
        for (FileStatus file : files) {
            Path path = file.getPath();
            long length = file.getLen();
            if (length > 0) {
                // Get locations of blocks of file
                BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
                boolean splittable = isSplittable(meta, schema, path, file);
                if (blocksMetadataEnabled && fs instanceof DistributedFileSystem) {

                    if (splittable) {
                        for (BlockLocation blockLocation : blkLocations) {
                            volumeSplits.add(makeSplit(tableName, path, blockLocation));
                        }
                        blockLocations.addAll(Arrays.asList(blkLocations));

                    } else { // Non splittable
                        long blockSize = blkLocations[0].getLength();
                        if (blockSize >= length) {
                            blockLocations.addAll(Arrays.asList(blkLocations));
                            for (BlockLocation blockLocation : blkLocations) {
                                volumeSplits.add(makeSplit(tableName, path, blockLocation));
                            }
                        } else {
                            splits.add(makeNonSplit(tableName, path, 0, length, blkLocations));
                        }
                    }

                } else {
                    if (splittable) {

                        long minSize = Math.max(getMinSplitSize(), 1);

                        long blockSize = file.getBlockSize(); // s3n rest api contained block size but blockLocations is one
                        long splitSize = Math.max(minSize, blockSize);
                        long bytesRemaining = length;

                        // for s3
                        while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                            int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                            splits.add(makeSplit(tableName, path, length - bytesRemaining, splitSize,
                                    blkLocations[blkIndex].getHosts()));
                            bytesRemaining -= splitSize;
                        }
                        if (bytesRemaining > 0) {
                            int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                            splits.add(makeSplit(tableName, path, length - bytesRemaining, bytesRemaining,
                                    blkLocations[blkIndex].getHosts()));
                        }
                    } else { // Non splittable
                        splits.add(makeNonSplit(tableName, path, 0, length, blkLocations));
                    }
                }
            } else {
                //for zero length files
                splits.add(makeSplit(tableName, path, 0, length));
            }
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("# of splits per partition: " + (splits.size() - previousSplitSize));
        }
    }

    // Combine original fileFragments with new VolumeId information
    setVolumeMeta(volumeSplits, blockLocations);
    splits.addAll(volumeSplits);
    LOG.info("Total # of splits: " + splits.size());
    return splits;
}

From source file:org.apache.tajo.storage.FileStorageManager.java

License:Apache License

/**
 * Generate the list of files and make them into FileSplits.
 *
 * @throws IOException//w w w.j  av a2s. com
 */
public List<Fragment> getSplits(String tableName, TableMeta meta, Schema schema, Path... inputs)
        throws IOException {
    // generate splits'

    List<Fragment> splits = Lists.newArrayList();
    List<Fragment> volumeSplits = Lists.newArrayList();
    List<BlockLocation> blockLocations = Lists.newArrayList();

    for (Path p : inputs) {
        FileSystem fs = p.getFileSystem(conf);

        ArrayList<FileStatus> files = Lists.newArrayList();
        if (fs.isFile(p)) {
            files.addAll(Lists.newArrayList(fs.getFileStatus(p)));
        } else {
            files.addAll(listStatus(p));
        }

        int previousSplitSize = splits.size();
        for (FileStatus file : files) {
            Path path = file.getPath();
            long length = file.getLen();
            if (length > 0) {
                // Get locations of blocks of file
                BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
                boolean splittable = isSplittable(meta, schema, path, file);
                if (blocksMetadataEnabled && fs instanceof DistributedFileSystem) {

                    if (splittable) {
                        for (BlockLocation blockLocation : blkLocations) {
                            volumeSplits.add(makeSplit(tableName, path, blockLocation));
                        }
                        blockLocations.addAll(Arrays.asList(blkLocations));

                    } else { // Non splittable
                        long blockSize = blkLocations[0].getLength();
                        if (blockSize >= length) {
                            blockLocations.addAll(Arrays.asList(blkLocations));
                            for (BlockLocation blockLocation : blkLocations) {
                                volumeSplits.add(makeSplit(tableName, path, blockLocation));
                            }
                        } else {
                            splits.add(makeNonSplit(tableName, path, 0, length, blkLocations));
                        }
                    }

                } else {
                    if (splittable) {

                        long minSize = Math.max(getMinSplitSize(), 1);

                        long blockSize = file.getBlockSize(); // s3n rest api contained block size but blockLocations is one
                        long splitSize = Math.max(minSize, blockSize);
                        long bytesRemaining = length;

                        // for s3
                        while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                            int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                            splits.add(makeSplit(tableName, path, length - bytesRemaining, splitSize,
                                    blkLocations[blkIndex].getHosts()));
                            bytesRemaining -= splitSize;
                        }
                        if (bytesRemaining > 0) {
                            int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                            splits.add(makeSplit(tableName, path, length - bytesRemaining, bytesRemaining,
                                    blkLocations[blkIndex].getHosts()));
                        }
                    } else { // Non splittable
                        splits.add(makeNonSplit(tableName, path, 0, length, blkLocations));
                    }
                }
            } else {
                //for zero length files
                splits.add(makeSplit(tableName, path, 0, length));
            }
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("# of splits per partition: " + (splits.size() - previousSplitSize));
        }
    }

    // Combine original fileFragments with new VolumeId information
    setVolumeMeta(volumeSplits, blockLocations);
    splits.addAll(volumeSplits);
    LOG.info("Total # of splits: " + splits.size());
    return splits;
}

From source file:org.apache.tinkerpop.gremlin.hadoop.structure.hdfs.HDFSTools.java

License:Apache License

public static List<Path> getAllFilePaths(final FileSystem fs, Path path, final PathFilter filter)
        throws IOException {
    if (null == path)
        path = fs.getHomeDirectory();//from   ww  w. j  a  v a2 s.  co  m
    if (path.toString().equals(FORWARD_SLASH))
        path = new Path("");

    final List<Path> paths = new ArrayList<Path>();
    if (fs.isFile(path))
        paths.add(path);
    else {
        for (final FileStatus status : fs.globStatus(new Path(path + FORWARD_ASTERISK), filter)) {
            final Path next = status.getPath();
            paths.addAll(getAllFilePaths(fs, next, filter));
        }
    }
    return paths;
}

From source file:org.apache.tinkerpop.gremlin.hadoop.structure.hdfs.HDFSTools.java

License:Apache License

public static void decompressPath(final FileSystem fs, final String in, final String out,
        final String compressedFileSuffix, final boolean deletePrevious) throws IOException {
    final Path inPath = new Path(in);

    if (fs.isFile(inPath))
        HDFSTools.decompressFile(fs, in, out, deletePrevious);
    else {//  w ww  .ja  v a  2s. c o m
        final Path outPath = new Path(out);
        if (!fs.exists(outPath))
            fs.mkdirs(outPath);
        for (final Path path : FileUtil.stat2Paths(fs.globStatus(new Path(in + FORWARD_ASTERISK)))) {
            if (path.getName().endsWith(compressedFileSuffix))
                HDFSTools.decompressFile(fs, path.toString(),
                        outPath.toString() + FORWARD_SLASH + path.getName().split("\\.")[0], deletePrevious);
        }
    }
}

From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.FileSystemStorage.java

License:Apache License

private static List<Path> getAllFilePaths(final FileSystem fs, Path path, final PathFilter filter)
        throws IOException {
    if (null == path)
        path = fs.getHomeDirectory();/*  w w w.  j ava 2 s.c om*/
    if (path.toString().equals(FORWARD_SLASH))
        path = new Path("");

    final List<Path> paths = new ArrayList<Path>();
    if (fs.isFile(path))
        paths.add(path);
    else {
        for (final FileStatus status : fs.globStatus(new Path(path + FORWARD_ASTERISK), filter)) {
            final Path next = status.getPath();
            paths.addAll(getAllFilePaths(fs, next, filter));
        }
    }
    return paths;
}

From source file:org.apache.vxquery.metadata.VXQueryCollectionOperatorDescriptor.java

License:Apache License

@Override
public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx,
        IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions)
        throws HyracksDataException {
    final FrameTupleAccessor fta = new FrameTupleAccessor(ctx.getFrameSize(),
            recordDescProvider.getInputRecordDescriptor(getActivityId(), 0));
    final int fieldOutputCount = recordDescProvider.getOutputRecordDescriptor(getActivityId(), 0)
            .getFieldCount();/*from  ww  w.j a v  a 2 s  .com*/
    final ByteBuffer frame = ctx.allocateFrame();
    final FrameTupleAppender appender = new FrameTupleAppender(ctx.getFrameSize(), fieldOutputCount);
    final short partitionId = (short) ctx.getTaskAttemptId().getTaskId().getPartition();
    final ITreeNodeIdProvider nodeIdProvider = new TreeNodeIdProvider(partitionId, dataSourceId,
            totalDataSources);
    final String nodeId = ctx.getJobletContext().getApplicationContext().getNodeId();
    final DynamicContext dCtx = (DynamicContext) ctx.getJobletContext().getGlobalJobData();

    final String collectionName = collectionPartitions[partition % collectionPartitions.length];
    final XMLParser parser = new XMLParser(false, nodeIdProvider, nodeId, frame, appender, childSeq,
            dCtx.getStaticContext());

    return new AbstractUnaryInputUnaryOutputOperatorNodePushable() {
        @Override
        public void open() throws HyracksDataException {
            appender.reset(frame, true);
            writer.open();
            hdfs = new HDFSFunctions();
        }

        @Override
        public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
            fta.reset(buffer);
            String collectionModifiedName = collectionName.replace("${nodeId}", nodeId);
            if (!collectionModifiedName.contains("hdfs:/")) {
                File collectionDirectory = new File(collectionModifiedName);
                //check if directory is in the local file system
                if (collectionDirectory.exists()) {
                    // Go through each tuple.
                    if (collectionDirectory.isDirectory()) {
                        for (int tupleIndex = 0; tupleIndex < fta.getTupleCount(); ++tupleIndex) {
                            Iterator<File> it = FileUtils.iterateFiles(collectionDirectory,
                                    new VXQueryIOFileFilter(), TrueFileFilter.INSTANCE);
                            while (it.hasNext()) {
                                File xmlDocument = it.next();
                                if (LOGGER.isLoggable(Level.FINE)) {
                                    LOGGER.fine(
                                            "Starting to read XML document: " + xmlDocument.getAbsolutePath());
                                }
                                parser.parseElements(xmlDocument, writer, fta, tupleIndex);
                            }
                        }
                    } else {
                        throw new HyracksDataException("Invalid directory parameter (" + nodeId + ":"
                                + collectionDirectory.getAbsolutePath() + ") passed to collection.");
                    }
                }
            } else {
                // Else check in HDFS file system
                // Get instance of the HDFS filesystem
                FileSystem fs = hdfs.getFileSystem();
                if (fs != null) {
                    collectionModifiedName = collectionModifiedName.replaceAll("hdfs:/", "");
                    Path directory = new Path(collectionModifiedName);
                    Path xmlDocument;
                    if (tag != null) {
                        hdfs.setJob(directory.getName(), tag);
                        tag = "<" + tag + ">";
                        Job job = hdfs.getJob();
                        InputFormat inputFormat = hdfs.getinputFormat();
                        try {
                            hdfs.scheduleSplits();
                            ArrayList<Integer> schedule = hdfs
                                    .getScheduleForNode(InetAddress.getLocalHost().getHostName());
                            List<InputSplit> splits = hdfs.getSplits();
                            List<FileSplit> fileSplits = new ArrayList<FileSplit>();
                            for (int i : schedule) {
                                fileSplits.add((FileSplit) splits.get(i));
                            }
                            FileSplitsFactory splitsFactory = new FileSplitsFactory(fileSplits);
                            List<FileSplit> inputSplits = splitsFactory.getSplits();
                            ContextFactory ctxFactory = new ContextFactory();
                            int size = inputSplits.size();
                            InputStream stream;
                            String value;
                            RecordReader reader;
                            TaskAttemptContext context;
                            for (int i = 0; i < size; i++) {
                                //read split
                                context = ctxFactory.createContext(job.getConfiguration(), i);
                                try {
                                    reader = inputFormat.createRecordReader(inputSplits.get(i), context);
                                    reader.initialize(inputSplits.get(i), context);
                                    while (reader.nextKeyValue()) {
                                        value = reader.getCurrentValue().toString();
                                        //Split value if it contains more than one item with the tag
                                        if (StringUtils.countMatches(value, tag) > 1) {
                                            String items[] = value.split(tag);
                                            for (String item : items) {
                                                if (item.length() > 0) {
                                                    item = START_TAG + tag + item;
                                                    stream = new ByteArrayInputStream(
                                                            item.getBytes(StandardCharsets.UTF_8));
                                                    parser.parseHDFSElements(stream, writer, fta, i);
                                                }
                                            }
                                        } else {
                                            value = START_TAG + value;
                                            //create an input stream to the file currently reading and send it to parser
                                            stream = new ByteArrayInputStream(
                                                    value.getBytes(StandardCharsets.UTF_8));
                                            parser.parseHDFSElements(stream, writer, fta, i);
                                        }
                                    }

                                } catch (InterruptedException e) {
                                    if (LOGGER.isLoggable(Level.SEVERE)) {
                                        LOGGER.severe(e.getMessage());
                                    }
                                }
                            }

                        } catch (IOException e) {
                            if (LOGGER.isLoggable(Level.SEVERE)) {
                                LOGGER.severe(e.getMessage());
                            }
                        } catch (ParserConfigurationException e) {
                            if (LOGGER.isLoggable(Level.SEVERE)) {
                                LOGGER.severe(e.getMessage());
                            }
                        } catch (SAXException e) {
                            if (LOGGER.isLoggable(Level.SEVERE)) {
                                LOGGER.severe(e.getMessage());
                            }
                        }
                    } else {
                        try {
                            //check if the path exists and is a directory
                            if (fs.exists(directory) && fs.isDirectory(directory)) {
                                for (int tupleIndex = 0; tupleIndex < fta.getTupleCount(); ++tupleIndex) {
                                    //read every file in the directory
                                    RemoteIterator<LocatedFileStatus> it = fs.listFiles(directory, true);
                                    while (it.hasNext()) {
                                        xmlDocument = it.next().getPath();
                                        if (fs.isFile(xmlDocument)) {
                                            if (LOGGER.isLoggable(Level.FINE)) {
                                                LOGGER.fine("Starting to read XML document: "
                                                        + xmlDocument.getName());
                                            }
                                            //create an input stream to the file currently reading and send it to parser
                                            InputStream in = fs.open(xmlDocument).getWrappedStream();
                                            parser.parseHDFSElements(in, writer, fta, tupleIndex);
                                        }
                                    }
                                }
                            } else {
                                throw new HyracksDataException("Invalid HDFS directory parameter (" + nodeId
                                        + ":" + directory + ") passed to collection.");
                            }
                        } catch (FileNotFoundException e) {
                            if (LOGGER.isLoggable(Level.SEVERE)) {
                                LOGGER.severe(e.getMessage());
                            }
                        } catch (IOException e) {
                            if (LOGGER.isLoggable(Level.SEVERE)) {
                                LOGGER.severe(e.getMessage());
                            }
                        }
                    }
                    try {
                        fs.close();
                    } catch (IOException e) {
                        if (LOGGER.isLoggable(Level.SEVERE)) {
                            LOGGER.severe(e.getMessage());
                        }
                    }
                }
            }
        }

        @Override
        public void fail() throws HyracksDataException {
            writer.fail();
        }

        @Override
        public void close() throws HyracksDataException {
            // Check if needed?
            fta.reset(frame);
            if (fta.getTupleCount() > 0) {
                FrameUtils.flushFrame(frame, writer);
            }
            writer.close();
        }
    };
}

From source file:org.bgi.flexlab.gaea.data.structure.header.MultipleVCFHeader.java

License:Open Source License

public void mergeHeader(Path inputPath, String output, Job job, boolean distributeCacheHeader) {
    Configuration conf = job.getConfiguration();
    try {/*from   w ww. j av a2s  .c om*/
        FileSystem fs = inputPath.getFileSystem(conf);
        fs = inputPath.getFileSystem(conf);
        if (!fs.exists(inputPath)) {
            System.out.println("Input File Path is not exist! Please check input var.");
            System.exit(-1);
        }
        if (fs.isFile(inputPath)) {
            if (validPath(inputPath, fs)) {
                readVcfHeader(inputPath, conf);
            }
        } else {
            FileStatus stats[] = fs.listStatus(inputPath);

            for (FileStatus file : stats) {
                Path filePath = file.getPath();
                mergeHeader(filePath, output, job, distributeCacheHeader);
            }
        }
        fs.close();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    if (distributeCacheHeader) {
        distributeCacheVcfHeader(output, job, conf);
    } else {
        writeHeaderToHDFS(output, conf);
    }
}

From source file:org.bgi.flexlab.gaea.data.structure.header.SingleVCFHeader.java

License:Open Source License

public void readSingleHeader(Path vcfPath, Configuration conf) throws IOException {
    FileSystem fs = vcfPath.getFileSystem(conf);
    if (!fs.exists(vcfPath))
        throw new RuntimeException(vcfPath.toString() + " don't exists.");
    if (!fs.isFile(vcfPath)) {
        throw new RuntimeException(
                vcfPath.toString() + " is not a file. GaeaSingleVcfHeader parser only support one vcf file.");
    }/*from w  w w  . j  a v  a 2 s . c om*/
    FSDataInputStream in = fs.open(vcfPath);
    AsciiLineReaderIterator it = new AsciiLineReaderIterator(new AsciiLineReader(in));
    VCFCodec codec = new VCFCodec();
    Object header = codec.readHeader(it);
    vcfHeader = (VCFHeader) (((FeatureCodecHeader) header).getHeaderValue());
    sampleNames.addAll(vcfHeader.getGenotypeSamples());
    buildHeaderInfo();
    it.close();
}

From source file:org.bgi.flexlab.gaea.tools.annotator.config.Config.java

License:Open Source License

/**
 * Load properties from configuration file
 * @return true if success//from   w  ww .  j  av a 2 s .c  o m
 */
boolean loadProperties(String configFileName) {
    properties = new Properties();
    try {
        Path confFilePath = new Path(configFileName);
        FileSystem fs = confFilePath.getFileSystem(conf);
        if (!fs.exists(confFilePath)) {
            throw new RuntimeException(confFilePath.toString() + " don't exist.");
        }
        if (!fs.isFile(confFilePath)) {
            throw new RuntimeException(confFilePath.toString() + " is not a file.");
        }
        properties.load(fs.open(confFilePath));

        if (!properties.isEmpty()) {
            return true;
        }
    } catch (Exception e) {
        properties = null;
        throw new RuntimeException(e);
    }

    return false;
}

From source file:org.bgi.flexlab.gaea.tools.mapreduce.vcf.sort.VCFSortOptions.java

License:Open Source License

private void traversalInputPath(Path path) {
    // TODO Auto-generated method stub
    Configuration conf = new Configuration();
    FileSystem fs = HdfsFileManager.getFileSystem(path, conf);
    try {/*from w ww.  j a  v  a2s .  c o m*/
        if (!fs.exists(path)) {
            System.err.println("Input File Path is not exist! Please check -i var.");
            System.exit(-1);
        }
        if (fs.isFile(path)) {
            inputList.add(path);
        } else {
            FileStatus stats[] = fs.listStatus(path);

            for (FileStatus file : stats) {
                Path filePath = file.getPath();

                if (!fs.isFile(filePath)) {
                    traversalInputPath(filePath);
                } else {
                    inputList.add(filePath);
                }
            }
        }
    } catch (IOException ioe) {
        throw new RuntimeException(ioe);
    }
}