List of usage examples for org.apache.hadoop.fs FileSystem isFile
@Deprecated public boolean isFile(Path f) throws IOException
From source file:org.apache.tajo.storage.AbstractStorageManager.java
License:Apache License
/** * Generate the list of files and make them into FileSplits. * * @throws IOException/*from w w w . j a va 2 s. com*/ */ public List<FileFragment> getSplits(String tableName, TableMeta meta, Schema schema, Path... inputs) throws IOException { // generate splits' List<FileFragment> splits = Lists.newArrayList(); List<FileFragment> volumeSplits = Lists.newArrayList(); List<BlockLocation> blockLocations = Lists.newArrayList(); for (Path p : inputs) { FileSystem fs = p.getFileSystem(conf); ArrayList<FileStatus> files = Lists.newArrayList(); if (fs.isFile(p)) { files.addAll(Lists.newArrayList(fs.getFileStatus(p))); } else { files.addAll(listStatus(p)); } int previousSplitSize = splits.size(); for (FileStatus file : files) { Path path = file.getPath(); long length = file.getLen(); if (length > 0) { // Get locations of blocks of file BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); boolean splittable = isSplittable(meta, schema, path, file); if (blocksMetadataEnabled && fs instanceof DistributedFileSystem) { if (splittable) { for (BlockLocation blockLocation : blkLocations) { volumeSplits.add(makeSplit(tableName, path, blockLocation)); } blockLocations.addAll(Arrays.asList(blkLocations)); } else { // Non splittable long blockSize = blkLocations[0].getLength(); if (blockSize >= length) { blockLocations.addAll(Arrays.asList(blkLocations)); for (BlockLocation blockLocation : blkLocations) { volumeSplits.add(makeSplit(tableName, path, blockLocation)); } } else { splits.add(makeNonSplit(tableName, path, 0, length, blkLocations)); } } } else { if (splittable) { long minSize = Math.max(getMinSplitSize(), 1); long blockSize = file.getBlockSize(); // s3n rest api contained block size but blockLocations is one long splitSize = Math.max(minSize, blockSize); long bytesRemaining = length; // for s3 while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add(makeSplit(tableName, path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts())); bytesRemaining -= splitSize; } if (bytesRemaining > 0) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add(makeSplit(tableName, path, length - bytesRemaining, bytesRemaining, blkLocations[blkIndex].getHosts())); } } else { // Non splittable splits.add(makeNonSplit(tableName, path, 0, length, blkLocations)); } } } else { //for zero length files splits.add(makeSplit(tableName, path, 0, length)); } } if (LOG.isDebugEnabled()) { LOG.debug("# of splits per partition: " + (splits.size() - previousSplitSize)); } } // Combine original fileFragments with new VolumeId information setVolumeMeta(volumeSplits, blockLocations); splits.addAll(volumeSplits); LOG.info("Total # of splits: " + splits.size()); return splits; }
From source file:org.apache.tajo.storage.FileStorageManager.java
License:Apache License
/** * Generate the list of files and make them into FileSplits. * * @throws IOException//w w w.j av a2s. com */ public List<Fragment> getSplits(String tableName, TableMeta meta, Schema schema, Path... inputs) throws IOException { // generate splits' List<Fragment> splits = Lists.newArrayList(); List<Fragment> volumeSplits = Lists.newArrayList(); List<BlockLocation> blockLocations = Lists.newArrayList(); for (Path p : inputs) { FileSystem fs = p.getFileSystem(conf); ArrayList<FileStatus> files = Lists.newArrayList(); if (fs.isFile(p)) { files.addAll(Lists.newArrayList(fs.getFileStatus(p))); } else { files.addAll(listStatus(p)); } int previousSplitSize = splits.size(); for (FileStatus file : files) { Path path = file.getPath(); long length = file.getLen(); if (length > 0) { // Get locations of blocks of file BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); boolean splittable = isSplittable(meta, schema, path, file); if (blocksMetadataEnabled && fs instanceof DistributedFileSystem) { if (splittable) { for (BlockLocation blockLocation : blkLocations) { volumeSplits.add(makeSplit(tableName, path, blockLocation)); } blockLocations.addAll(Arrays.asList(blkLocations)); } else { // Non splittable long blockSize = blkLocations[0].getLength(); if (blockSize >= length) { blockLocations.addAll(Arrays.asList(blkLocations)); for (BlockLocation blockLocation : blkLocations) { volumeSplits.add(makeSplit(tableName, path, blockLocation)); } } else { splits.add(makeNonSplit(tableName, path, 0, length, blkLocations)); } } } else { if (splittable) { long minSize = Math.max(getMinSplitSize(), 1); long blockSize = file.getBlockSize(); // s3n rest api contained block size but blockLocations is one long splitSize = Math.max(minSize, blockSize); long bytesRemaining = length; // for s3 while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add(makeSplit(tableName, path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts())); bytesRemaining -= splitSize; } if (bytesRemaining > 0) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add(makeSplit(tableName, path, length - bytesRemaining, bytesRemaining, blkLocations[blkIndex].getHosts())); } } else { // Non splittable splits.add(makeNonSplit(tableName, path, 0, length, blkLocations)); } } } else { //for zero length files splits.add(makeSplit(tableName, path, 0, length)); } } if (LOG.isDebugEnabled()) { LOG.debug("# of splits per partition: " + (splits.size() - previousSplitSize)); } } // Combine original fileFragments with new VolumeId information setVolumeMeta(volumeSplits, blockLocations); splits.addAll(volumeSplits); LOG.info("Total # of splits: " + splits.size()); return splits; }
From source file:org.apache.tinkerpop.gremlin.hadoop.structure.hdfs.HDFSTools.java
License:Apache License
public static List<Path> getAllFilePaths(final FileSystem fs, Path path, final PathFilter filter) throws IOException { if (null == path) path = fs.getHomeDirectory();//from ww w. j a v a2 s. co m if (path.toString().equals(FORWARD_SLASH)) path = new Path(""); final List<Path> paths = new ArrayList<Path>(); if (fs.isFile(path)) paths.add(path); else { for (final FileStatus status : fs.globStatus(new Path(path + FORWARD_ASTERISK), filter)) { final Path next = status.getPath(); paths.addAll(getAllFilePaths(fs, next, filter)); } } return paths; }
From source file:org.apache.tinkerpop.gremlin.hadoop.structure.hdfs.HDFSTools.java
License:Apache License
public static void decompressPath(final FileSystem fs, final String in, final String out, final String compressedFileSuffix, final boolean deletePrevious) throws IOException { final Path inPath = new Path(in); if (fs.isFile(inPath)) HDFSTools.decompressFile(fs, in, out, deletePrevious); else {// w ww .ja v a 2s. c o m final Path outPath = new Path(out); if (!fs.exists(outPath)) fs.mkdirs(outPath); for (final Path path : FileUtil.stat2Paths(fs.globStatus(new Path(in + FORWARD_ASTERISK)))) { if (path.getName().endsWith(compressedFileSuffix)) HDFSTools.decompressFile(fs, path.toString(), outPath.toString() + FORWARD_SLASH + path.getName().split("\\.")[0], deletePrevious); } } }
From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.FileSystemStorage.java
License:Apache License
private static List<Path> getAllFilePaths(final FileSystem fs, Path path, final PathFilter filter) throws IOException { if (null == path) path = fs.getHomeDirectory();/* w w w. j ava 2 s.c om*/ if (path.toString().equals(FORWARD_SLASH)) path = new Path(""); final List<Path> paths = new ArrayList<Path>(); if (fs.isFile(path)) paths.add(path); else { for (final FileStatus status : fs.globStatus(new Path(path + FORWARD_ASTERISK), filter)) { final Path next = status.getPath(); paths.addAll(getAllFilePaths(fs, next, filter)); } } return paths; }
From source file:org.apache.vxquery.metadata.VXQueryCollectionOperatorDescriptor.java
License:Apache License
@Override public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException { final FrameTupleAccessor fta = new FrameTupleAccessor(ctx.getFrameSize(), recordDescProvider.getInputRecordDescriptor(getActivityId(), 0)); final int fieldOutputCount = recordDescProvider.getOutputRecordDescriptor(getActivityId(), 0) .getFieldCount();/*from ww w.j a v a 2 s .com*/ final ByteBuffer frame = ctx.allocateFrame(); final FrameTupleAppender appender = new FrameTupleAppender(ctx.getFrameSize(), fieldOutputCount); final short partitionId = (short) ctx.getTaskAttemptId().getTaskId().getPartition(); final ITreeNodeIdProvider nodeIdProvider = new TreeNodeIdProvider(partitionId, dataSourceId, totalDataSources); final String nodeId = ctx.getJobletContext().getApplicationContext().getNodeId(); final DynamicContext dCtx = (DynamicContext) ctx.getJobletContext().getGlobalJobData(); final String collectionName = collectionPartitions[partition % collectionPartitions.length]; final XMLParser parser = new XMLParser(false, nodeIdProvider, nodeId, frame, appender, childSeq, dCtx.getStaticContext()); return new AbstractUnaryInputUnaryOutputOperatorNodePushable() { @Override public void open() throws HyracksDataException { appender.reset(frame, true); writer.open(); hdfs = new HDFSFunctions(); } @Override public void nextFrame(ByteBuffer buffer) throws HyracksDataException { fta.reset(buffer); String collectionModifiedName = collectionName.replace("${nodeId}", nodeId); if (!collectionModifiedName.contains("hdfs:/")) { File collectionDirectory = new File(collectionModifiedName); //check if directory is in the local file system if (collectionDirectory.exists()) { // Go through each tuple. if (collectionDirectory.isDirectory()) { for (int tupleIndex = 0; tupleIndex < fta.getTupleCount(); ++tupleIndex) { Iterator<File> it = FileUtils.iterateFiles(collectionDirectory, new VXQueryIOFileFilter(), TrueFileFilter.INSTANCE); while (it.hasNext()) { File xmlDocument = it.next(); if (LOGGER.isLoggable(Level.FINE)) { LOGGER.fine( "Starting to read XML document: " + xmlDocument.getAbsolutePath()); } parser.parseElements(xmlDocument, writer, fta, tupleIndex); } } } else { throw new HyracksDataException("Invalid directory parameter (" + nodeId + ":" + collectionDirectory.getAbsolutePath() + ") passed to collection."); } } } else { // Else check in HDFS file system // Get instance of the HDFS filesystem FileSystem fs = hdfs.getFileSystem(); if (fs != null) { collectionModifiedName = collectionModifiedName.replaceAll("hdfs:/", ""); Path directory = new Path(collectionModifiedName); Path xmlDocument; if (tag != null) { hdfs.setJob(directory.getName(), tag); tag = "<" + tag + ">"; Job job = hdfs.getJob(); InputFormat inputFormat = hdfs.getinputFormat(); try { hdfs.scheduleSplits(); ArrayList<Integer> schedule = hdfs .getScheduleForNode(InetAddress.getLocalHost().getHostName()); List<InputSplit> splits = hdfs.getSplits(); List<FileSplit> fileSplits = new ArrayList<FileSplit>(); for (int i : schedule) { fileSplits.add((FileSplit) splits.get(i)); } FileSplitsFactory splitsFactory = new FileSplitsFactory(fileSplits); List<FileSplit> inputSplits = splitsFactory.getSplits(); ContextFactory ctxFactory = new ContextFactory(); int size = inputSplits.size(); InputStream stream; String value; RecordReader reader; TaskAttemptContext context; for (int i = 0; i < size; i++) { //read split context = ctxFactory.createContext(job.getConfiguration(), i); try { reader = inputFormat.createRecordReader(inputSplits.get(i), context); reader.initialize(inputSplits.get(i), context); while (reader.nextKeyValue()) { value = reader.getCurrentValue().toString(); //Split value if it contains more than one item with the tag if (StringUtils.countMatches(value, tag) > 1) { String items[] = value.split(tag); for (String item : items) { if (item.length() > 0) { item = START_TAG + tag + item; stream = new ByteArrayInputStream( item.getBytes(StandardCharsets.UTF_8)); parser.parseHDFSElements(stream, writer, fta, i); } } } else { value = START_TAG + value; //create an input stream to the file currently reading and send it to parser stream = new ByteArrayInputStream( value.getBytes(StandardCharsets.UTF_8)); parser.parseHDFSElements(stream, writer, fta, i); } } } catch (InterruptedException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } } } catch (IOException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } catch (ParserConfigurationException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } catch (SAXException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } } else { try { //check if the path exists and is a directory if (fs.exists(directory) && fs.isDirectory(directory)) { for (int tupleIndex = 0; tupleIndex < fta.getTupleCount(); ++tupleIndex) { //read every file in the directory RemoteIterator<LocatedFileStatus> it = fs.listFiles(directory, true); while (it.hasNext()) { xmlDocument = it.next().getPath(); if (fs.isFile(xmlDocument)) { if (LOGGER.isLoggable(Level.FINE)) { LOGGER.fine("Starting to read XML document: " + xmlDocument.getName()); } //create an input stream to the file currently reading and send it to parser InputStream in = fs.open(xmlDocument).getWrappedStream(); parser.parseHDFSElements(in, writer, fta, tupleIndex); } } } } else { throw new HyracksDataException("Invalid HDFS directory parameter (" + nodeId + ":" + directory + ") passed to collection."); } } catch (FileNotFoundException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } catch (IOException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } } try { fs.close(); } catch (IOException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } } } } @Override public void fail() throws HyracksDataException { writer.fail(); } @Override public void close() throws HyracksDataException { // Check if needed? fta.reset(frame); if (fta.getTupleCount() > 0) { FrameUtils.flushFrame(frame, writer); } writer.close(); } }; }
From source file:org.bgi.flexlab.gaea.data.structure.header.MultipleVCFHeader.java
License:Open Source License
public void mergeHeader(Path inputPath, String output, Job job, boolean distributeCacheHeader) { Configuration conf = job.getConfiguration(); try {/*from w ww. j av a2s .c om*/ FileSystem fs = inputPath.getFileSystem(conf); fs = inputPath.getFileSystem(conf); if (!fs.exists(inputPath)) { System.out.println("Input File Path is not exist! Please check input var."); System.exit(-1); } if (fs.isFile(inputPath)) { if (validPath(inputPath, fs)) { readVcfHeader(inputPath, conf); } } else { FileStatus stats[] = fs.listStatus(inputPath); for (FileStatus file : stats) { Path filePath = file.getPath(); mergeHeader(filePath, output, job, distributeCacheHeader); } } fs.close(); } catch (Exception e) { throw new RuntimeException(e); } if (distributeCacheHeader) { distributeCacheVcfHeader(output, job, conf); } else { writeHeaderToHDFS(output, conf); } }
From source file:org.bgi.flexlab.gaea.data.structure.header.SingleVCFHeader.java
License:Open Source License
public void readSingleHeader(Path vcfPath, Configuration conf) throws IOException { FileSystem fs = vcfPath.getFileSystem(conf); if (!fs.exists(vcfPath)) throw new RuntimeException(vcfPath.toString() + " don't exists."); if (!fs.isFile(vcfPath)) { throw new RuntimeException( vcfPath.toString() + " is not a file. GaeaSingleVcfHeader parser only support one vcf file."); }/*from w w w . j a v a 2 s . c om*/ FSDataInputStream in = fs.open(vcfPath); AsciiLineReaderIterator it = new AsciiLineReaderIterator(new AsciiLineReader(in)); VCFCodec codec = new VCFCodec(); Object header = codec.readHeader(it); vcfHeader = (VCFHeader) (((FeatureCodecHeader) header).getHeaderValue()); sampleNames.addAll(vcfHeader.getGenotypeSamples()); buildHeaderInfo(); it.close(); }
From source file:org.bgi.flexlab.gaea.tools.annotator.config.Config.java
License:Open Source License
/** * Load properties from configuration file * @return true if success//from w ww . j av a 2 s .c o m */ boolean loadProperties(String configFileName) { properties = new Properties(); try { Path confFilePath = new Path(configFileName); FileSystem fs = confFilePath.getFileSystem(conf); if (!fs.exists(confFilePath)) { throw new RuntimeException(confFilePath.toString() + " don't exist."); } if (!fs.isFile(confFilePath)) { throw new RuntimeException(confFilePath.toString() + " is not a file."); } properties.load(fs.open(confFilePath)); if (!properties.isEmpty()) { return true; } } catch (Exception e) { properties = null; throw new RuntimeException(e); } return false; }
From source file:org.bgi.flexlab.gaea.tools.mapreduce.vcf.sort.VCFSortOptions.java
License:Open Source License
private void traversalInputPath(Path path) { // TODO Auto-generated method stub Configuration conf = new Configuration(); FileSystem fs = HdfsFileManager.getFileSystem(path, conf); try {/*from w ww. j a v a2s . c o m*/ if (!fs.exists(path)) { System.err.println("Input File Path is not exist! Please check -i var."); System.exit(-1); } if (fs.isFile(path)) { inputList.add(path); } else { FileStatus stats[] = fs.listStatus(path); for (FileStatus file : stats) { Path filePath = file.getPath(); if (!fs.isFile(filePath)) { traversalInputPath(filePath); } else { inputList.add(filePath); } } } } catch (IOException ioe) { throw new RuntimeException(ioe); } }