List of usage examples for org.apache.hadoop.fs FileSystem isDirectory
@Deprecated public boolean isDirectory(Path f) throws IOException
From source file:org.apache.vxquery.metadata.VXQueryCollectionOperatorDescriptor.java
License:Apache License
@Override public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException { final FrameTupleAccessor fta = new FrameTupleAccessor(ctx.getFrameSize(), recordDescProvider.getInputRecordDescriptor(getActivityId(), 0)); final int fieldOutputCount = recordDescProvider.getOutputRecordDescriptor(getActivityId(), 0) .getFieldCount();/*w w w . j a v a 2 s.c o m*/ final ByteBuffer frame = ctx.allocateFrame(); final FrameTupleAppender appender = new FrameTupleAppender(ctx.getFrameSize(), fieldOutputCount); final short partitionId = (short) ctx.getTaskAttemptId().getTaskId().getPartition(); final ITreeNodeIdProvider nodeIdProvider = new TreeNodeIdProvider(partitionId, dataSourceId, totalDataSources); final String nodeId = ctx.getJobletContext().getApplicationContext().getNodeId(); final DynamicContext dCtx = (DynamicContext) ctx.getJobletContext().getGlobalJobData(); final String collectionName = collectionPartitions[partition % collectionPartitions.length]; final XMLParser parser = new XMLParser(false, nodeIdProvider, nodeId, frame, appender, childSeq, dCtx.getStaticContext()); return new AbstractUnaryInputUnaryOutputOperatorNodePushable() { @Override public void open() throws HyracksDataException { appender.reset(frame, true); writer.open(); hdfs = new HDFSFunctions(); } @Override public void nextFrame(ByteBuffer buffer) throws HyracksDataException { fta.reset(buffer); String collectionModifiedName = collectionName.replace("${nodeId}", nodeId); if (!collectionModifiedName.contains("hdfs:/")) { File collectionDirectory = new File(collectionModifiedName); //check if directory is in the local file system if (collectionDirectory.exists()) { // Go through each tuple. if (collectionDirectory.isDirectory()) { for (int tupleIndex = 0; tupleIndex < fta.getTupleCount(); ++tupleIndex) { Iterator<File> it = FileUtils.iterateFiles(collectionDirectory, new VXQueryIOFileFilter(), TrueFileFilter.INSTANCE); while (it.hasNext()) { File xmlDocument = it.next(); if (LOGGER.isLoggable(Level.FINE)) { LOGGER.fine( "Starting to read XML document: " + xmlDocument.getAbsolutePath()); } parser.parseElements(xmlDocument, writer, fta, tupleIndex); } } } else { throw new HyracksDataException("Invalid directory parameter (" + nodeId + ":" + collectionDirectory.getAbsolutePath() + ") passed to collection."); } } } else { // Else check in HDFS file system // Get instance of the HDFS filesystem FileSystem fs = hdfs.getFileSystem(); if (fs != null) { collectionModifiedName = collectionModifiedName.replaceAll("hdfs:/", ""); Path directory = new Path(collectionModifiedName); Path xmlDocument; if (tag != null) { hdfs.setJob(directory.getName(), tag); tag = "<" + tag + ">"; Job job = hdfs.getJob(); InputFormat inputFormat = hdfs.getinputFormat(); try { hdfs.scheduleSplits(); ArrayList<Integer> schedule = hdfs .getScheduleForNode(InetAddress.getLocalHost().getHostName()); List<InputSplit> splits = hdfs.getSplits(); List<FileSplit> fileSplits = new ArrayList<FileSplit>(); for (int i : schedule) { fileSplits.add((FileSplit) splits.get(i)); } FileSplitsFactory splitsFactory = new FileSplitsFactory(fileSplits); List<FileSplit> inputSplits = splitsFactory.getSplits(); ContextFactory ctxFactory = new ContextFactory(); int size = inputSplits.size(); InputStream stream; String value; RecordReader reader; TaskAttemptContext context; for (int i = 0; i < size; i++) { //read split context = ctxFactory.createContext(job.getConfiguration(), i); try { reader = inputFormat.createRecordReader(inputSplits.get(i), context); reader.initialize(inputSplits.get(i), context); while (reader.nextKeyValue()) { value = reader.getCurrentValue().toString(); //Split value if it contains more than one item with the tag if (StringUtils.countMatches(value, tag) > 1) { String items[] = value.split(tag); for (String item : items) { if (item.length() > 0) { item = START_TAG + tag + item; stream = new ByteArrayInputStream( item.getBytes(StandardCharsets.UTF_8)); parser.parseHDFSElements(stream, writer, fta, i); } } } else { value = START_TAG + value; //create an input stream to the file currently reading and send it to parser stream = new ByteArrayInputStream( value.getBytes(StandardCharsets.UTF_8)); parser.parseHDFSElements(stream, writer, fta, i); } } } catch (InterruptedException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } } } catch (IOException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } catch (ParserConfigurationException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } catch (SAXException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } } else { try { //check if the path exists and is a directory if (fs.exists(directory) && fs.isDirectory(directory)) { for (int tupleIndex = 0; tupleIndex < fta.getTupleCount(); ++tupleIndex) { //read every file in the directory RemoteIterator<LocatedFileStatus> it = fs.listFiles(directory, true); while (it.hasNext()) { xmlDocument = it.next().getPath(); if (fs.isFile(xmlDocument)) { if (LOGGER.isLoggable(Level.FINE)) { LOGGER.fine("Starting to read XML document: " + xmlDocument.getName()); } //create an input stream to the file currently reading and send it to parser InputStream in = fs.open(xmlDocument).getWrappedStream(); parser.parseHDFSElements(in, writer, fta, tupleIndex); } } } } else { throw new HyracksDataException("Invalid HDFS directory parameter (" + nodeId + ":" + directory + ") passed to collection."); } } catch (FileNotFoundException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } catch (IOException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } } try { fs.close(); } catch (IOException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } } } } @Override public void fail() throws HyracksDataException { writer.fail(); } @Override public void close() throws HyracksDataException { // Check if needed? fta.reset(frame); if (fta.getTupleCount() > 0) { FrameUtils.flushFrame(frame, writer); } writer.close(); } }; }
From source file:org.commoncrawl.util.SequenceFileUtils.java
License:Open Source License
@SuppressWarnings({ "unchecked", "deprecation" }) public static Class sniffValueTypeFromSequenceFile(FileSystem fs, Configuration conf, Path path) throws IOException { if (fs.isDirectory(path)) { path = new Path(path, "part-00000"); }/* ww w.ja v a2 s . c o m*/ SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); try { return reader.getValueClass(); } finally { reader.close(); } }
From source file:org.datacleaner.spark.utils.HdfsHelper.java
License:Open Source License
public boolean isDirectory(final URI path) { final Resource resource = getResourceToUse(path); if (!resource.isExists()) { return false; }/*w w w. ja va2s.c om*/ if (resource instanceof FileResource) { return ((FileResource) resource).getFile().isDirectory(); } if (resource instanceof HdfsResource) { final FileSystem fileSystem = ((HdfsResource) resource).getHadoopFileSystem(); final Path hadoopPath = ((HdfsResource) resource).getHadoopPath(); try { return fileSystem.isDirectory(hadoopPath); } catch (final IOException e) { throw new IllegalStateException(e); } } // actually we don't know, but most likely it's not a directory return false; }
From source file:org.deeplearning4j.hadoop.datasetiterator.BaseHdfsDataSetIterator.java
License:Apache License
/** * Forget if need to close file system here. * @param path the path to open/*from w w w .ja v a2 s . co m*/ * @return the input stream for the path * @throws Exception if one occurs */ public InputStream openInputStream(Path path) throws Exception { FileSystem fs = FileSystem.get(conf); if (!fs.exists(path)) throw new FileNotFoundException("File does not exist"); if (fs.isDirectory(path)) throw new IllegalArgumentException("Not a file"); InputStream is = fs.open(path); return is; }
From source file:org.godhuli.rhipe.FileUtils.java
License:Apache License
private void delete(Path src, FileSystem srcFs, boolean recursive) throws IOException { if (srcFs.isDirectory(src) && !recursive) { throw new IOException("Cannot remove directory \"" + src + "\", use -rmr instead"); }/*from w ww . ja v a 2 s .co m*/ Trash trashTmp = new Trash(srcFs, getConf()); if (trashTmp.moveToTrash(src)) { System.out.println("Moved to trash: " + src); return; } if (srcFs.delete(src, true)) { System.out.println("Deleted " + src); } else { if (!srcFs.exists(src)) { throw new FileNotFoundException("cannot remove " + src + ": No such file or directory."); } throw new IOException("Delete failed " + src); } }
From source file:org.icgc.dcc.release.core.hadoop.FileGlobInputStream.java
License:Open Source License
private static Collection<Path> getPaths(FileSystem fileSystem, Path pathPattern) throws IOException, FileNotFoundException { FileStatus[] matches = fileSystem.globStatus(pathPattern, HIDDEN_PATH_FILTER); val paths = Lists.<Path>newArrayList(); for (val match : matches) { if (fileSystem.isDirectory(match.getPath())) { FileStatus[] partFiles = fileSystem.listStatus(match.getPath(), HIDDEN_PATH_FILTER); for (val partFile : partFiles) { paths.add(partFile.getPath()); }//from w w w .j ava2 s.c o m } else { paths.add(match.getPath()); } } return paths; }
From source file:org.kepler.ddp.actor.pattern.DDPDataSink.java
License:Open Source License
/** Merge a directory containing multiple output files into a single file. * This method deletes the directory when finished. * /* w w w.j a v a 2 s . c om*/ * TODO move to parent class? */ private void _mergeMultiPartOutputs(String pathStr) throws IllegalActionException { Configuration configuration = new Configuration(); Path srcPath = new Path(pathStr); try { FileSystem srcPathFileSystem = srcPath.getFileSystem(configuration); // only merge if the output is a directory. if (srcPathFileSystem.isDirectory(srcPath)) { Path destPath = new Path(pathStr + "-TMP1234"); try { // TODO if there is only one part-r-nnnnnn file, copyMerge() will still // copy it instead of simply renaming it. if (!FileUtil.copyMerge(srcPath.getFileSystem(configuration), srcPath, destPath.getFileSystem(configuration), destPath, true, configuration, "")) { throw new IllegalActionException(this, "Unable to merge output files in " + srcPath + "/."); } } catch (IOException e) { throw new IllegalActionException(this, e, "Error merging multi-part output files in " + srcPath + "/."); } try { if (!destPath.getFileSystem(configuration).rename(destPath, srcPath)) { throw new IllegalActionException(this, "Unable to rename " + destPath + " to " + srcPath); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } catch (IOException e) { throw new IllegalActionException(this, e, "Error accessing output file " + srcPath); } }
From source file:org.kitesdk.data.spi.filesystem.FileSystemPartitionIterator.java
License:Apache License
@SuppressWarnings("deprecation") FileSystemPartitionIterator(FileSystem fs, Path root, PartitionStrategy strategy, Schema schema, final Predicate<StorageKey> predicate) throws IOException { Preconditions.checkArgument(fs.isDirectory(root)); this.fs = fs; this.rootDirectory = root; this.iterator = Iterators.filter(Iterators.transform( new FileSystemIterator(Accessor.getDefault().getFieldPartitioners(strategy).size()), new MakeKey(strategy, schema)), predicate); }
From source file:org.kitesdk.data.spi.filesystem.TestFileSystemView.java
License:Apache License
@SuppressWarnings("deprecation") public static void assertDirectoriesExist(FileSystem fs, Path... dirs) throws IOException { for (Path path : dirs) { assertTrue("Directory should exist: " + path, fs.exists(path) && fs.isDirectory(path)); }//from w w w. ja v a 2 s .c o m }
From source file:org.lab41.mapreduce.BlueprintsGraphDriver.java
License:Apache License
private Job configureJob2(Configuration baseConfiguration, FaunusGraph faunusGraph, FileSystem fs) throws IOException { Configuration job2Config = new Configuration(baseConfiguration); /** Job 2 Configuration **/ Job job2 = new Job(job2Config); job2.setInputFormatClass(SequenceFileInputFormat.class); job2.setOutputFormatClass(faunusGraph.getGraphOutputFormat()); job2.setJobName("BluePrintsGraphDriver Job2: " + faunusGraph.getInputLocation()); job2.setJarByClass(BlueprintsGraphDriver.class); job2.setMapperClass(BlueprintsGraphOutputMapReduce.EdgeMap.class); job2.setMapOutputKeyClass(NullWritable.class); job2.setMapOutputValueClass(FaunusVertex.class); Path inputPath = faunusGraph.getInputLocation(); FileInputFormat.setInputPaths(job2, inputPath); job2.setNumReduceTasks(0);/* w ww .j a va 2 s . c om*/ String strJob2OutputPath = faunusGraph.getOutputLocation().toString(); Path job2Path = new Path(strJob2OutputPath + "/job2"); if (fs.isDirectory(job2Path)) { logger.info("Exists" + strJob2OutputPath + " --deleteing"); fs.delete(job2Path, true); } FileOutputFormat.setOutputPath(job2, job2Path); //TODO -- I don't think this actually does anything //reduce the size of the splits: long splitSize = (long) job2.getConfiguration().getLong("mapred.max.split.size", 67108864); job2.getConfiguration().setLong("mapred.max.split.size", splitSize / 2); return job2; }