List of usage examples for org.apache.hadoop.fs RemoteIterator next
E next() throws IOException;
From source file:backup.namenode.NameNodeBackupBlockCheckProcessor.java
License:Apache License
private void addExtendedBlocksFromNameNode(BackupReportWriter writer, ExternalExtendedBlockSort<Addresses> nameNodeBlocks, Path path, Set<Path> pathSetToIgnore) throws IOException { RemoteIterator<LocatedFileStatus> iterator = fileSystem.listFiles(path, true); DFSClient client = fileSystem.getClient(); long st = System.nanoTime(); while (iterator.hasNext()) { FileStatus fs = iterator.next(); if (st + TimeUnit.SECONDS.toNanos(10) < System.nanoTime()) { writer.statusBlockMetaDataFetchFromNameNode(fs.getPath().toString()); st = System.nanoTime(); }/*ww w . j a v a 2s. c om*/ addExtendedBlocksFromNameNode(writer, nameNodeBlocks, client, fs, pathSetToIgnore); } }
From source file:br.com.lassal.nqueens.grid.job.GenerateSolutions.java
/** * NQueens working folder structure /nqueens/board-{x}/partial/solution_X-4 * * @param queensSize//from w ww. jav a 2 s . co m * @throws IOException */ private void setWorkingFolder(int queensSize, Job job) throws IOException { Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); if (fs.isDirectory(new Path("/nqueens/board-" + queensSize + "/final"))) { System.exit(0); // ja foi processado anteriormente nao processa de novo } String lastSolution = null; Path partialSolDir = new Path("/nqueens/board-" + queensSize + "/partial/"); Path inputPath = null; Path outputPath = null; if (fs.exists(partialSolDir)) { RemoteIterator<LocatedFileStatus> dirsFound = fs.listLocatedStatus(partialSolDir); while (dirsFound.hasNext()) { LocatedFileStatus path = dirsFound.next(); if (lastSolution == null) { lastSolution = path.getPath().getName(); inputPath = path.getPath(); } else { String currentDir = path.getPath().getName(); if (lastSolution.compareToIgnoreCase(currentDir) < 0) { lastSolution = currentDir; inputPath = path.getPath(); } } } } int currentSolutionSet = 0; if (inputPath == null) { inputPath = new Path("/nqueens/board-" + queensSize + "/seed"); if (!fs.exists(inputPath)) { FSDataOutputStream seedFile = fs.create(inputPath, true); seedFile.writeBytes(queensSize + "#"); seedFile.close(); } } // Input FileInputFormat.addInputPath(job, inputPath); job.setInputFormatClass(TextInputFormat.class); if (lastSolution != null) { String[] solution = lastSolution.split("-"); if (solution[0].equalsIgnoreCase("solution_" + queensSize)) { currentSolutionSet = Integer.parseInt(solution[1]) + 4; if (currentSolutionSet >= queensSize) { outputPath = new Path("/nqueens/board-" + queensSize + "/final"); } else { outputPath = new Path("/nqueens/board-" + queensSize + "/partial/solution_" + queensSize + "-" + currentSolutionSet); } } } else { outputPath = new Path("/nqueens/board-" + queensSize + "/partial/solution_" + queensSize + "-4"); } // Output FileOutputFormat.setOutputPath(job, outputPath); job.setOutputFormatClass(TextOutputFormat.class); }
From source file:br.com.lassal.nqueens.grid.job.NQueenCounter.java
private Path setWorkingFolder(int queensSize, String workingFolder, boolean isFinal, Job job) throws IOException { Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path returnPath = null;/*from www.j a v a2 s . c o m*/ if (workingFolder == null) { workingFolder = ""; } Path partialSolDir = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/"); Path inputPath = null; Path outputPath = null; String nextRunPath = "run_1"; if (fs.exists(partialSolDir)) { RemoteIterator<LocatedFileStatus> dirsFound = fs.listLocatedStatus(partialSolDir); String lastRunPath = null; Path lastPath = null; while (dirsFound.hasNext()) { LocatedFileStatus dir = dirsFound.next(); if (dir.isDirectory()) { if (lastRunPath == null || dir.getPath().getName().compareTo(lastRunPath) > 0) { lastPath = dir.getPath(); lastRunPath = lastPath.getName(); } } } if (lastRunPath != null) { String[] runParts = lastRunPath.split("_"); int lastRun = Integer.parseInt(runParts[1]); nextRunPath = runParts[0] + "_" + (++lastRun); inputPath = lastPath; } } if (inputPath == null) { inputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/seed"); if (!fs.exists(inputPath)) { FSDataOutputStream seedFile = fs.create(inputPath, true); seedFile.writeBytes(queensSize + ":"); seedFile.close(); } } else { returnPath = inputPath; } // Input FileInputFormat.addInputPath(job, inputPath); job.setInputFormatClass(TextInputFormat.class); if (isFinal) { outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/final"); } else { outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/" + nextRunPath); } // Output FileOutputFormat.setOutputPath(job, outputPath); job.setOutputFormatClass(TextOutputFormat.class); return returnPath; }
From source file:ch.cern.db.hdfs.DistributedFileSystemMetadata.java
License:GNU General Public License
public LinkedList<BlockLocation> getBlockLocations(Path path) throws IOException { LOG.info("Collecting block locations..."); LinkedList<BlockLocation> blockLocations = new LinkedList<BlockLocation>(); RemoteIterator<LocatedFileStatus> statuses = listFiles(path, true); int hasNextCode = hasNextCode(statuses); while (hasNextCode > 0) { if (hasNextCode > 1) { hasNextCode = hasNextCode(statuses); continue; }// w ww .ja v a 2s . c o m LocatedFileStatus fileStatus = statuses.next(); if (fileStatus.isFile()) { BlockLocation[] blockLocations_tmp = getFileBlockLocations(fileStatus, 0, fileStatus.getLen()); blockLocations.addAll(Arrays.asList(blockLocations_tmp)); } int size = blockLocations.size(); if (size > 0 && size % 5000 == 0) LOG.info("Collected " + size + " locations. Still in progress..."); if (size >= MAX_NUMBER_OF_LOCATIONS) { LOG.info("Reached max number of locations to collect. The amount will be representative enough."); break; } hasNextCode = hasNextCode(statuses); } LOG.info("Collected " + blockLocations.size() + " locations."); if (isHdfsBlocksMetadataEnabled()) { BlockStorageLocation[] blockStorageLocations = getFileBlockStorageLocations(blockLocations); blockLocations.clear(); blockLocations.addAll(Arrays.asList(blockStorageLocations)); } else { LOG.error("VolumnId/DiskId can not be collected since " + "dfs.datanode.hdfs-blocks-metadata.enabled is not enabled."); } return blockLocations; }
From source file:co.cask.cdap.common.io.Locations.java
License:Apache License
/** * Do some processing on the locations contained in the {@code startLocation}, using the {@code processor}. If this * location is a directory, all the locations contained in it will also be processed. If the {@code recursive} tag * is set to true, those locations that are directories will also be processed recursively. If the * {@code startLocation} is not a directory, this method will return the result of the processing of that location. * * @param startLocation location to start the processing from * @param recursive {@code true} if this method should be called on the directory {@link Location}s found from * {@code startLocation}. If the {@code startLocation} is a directory, all the locations under it * will be processed, regardless of the value of {@code recursive} * @param processor used to process locations. If the {@link Processor#process} method returns false on any * {@link Location} object processed, this method will return the current result of the processor. * @param <R> Type of the return value * @throws IOException if the locations could not be read *///from ww w . j a va 2 s.c om public static <R> R processLocations(Location startLocation, boolean recursive, Processor<LocationStatus, R> processor) throws IOException { boolean topLevel = true; LocationFactory lf = startLocation.getLocationFactory(); LinkedList<LocationStatus> statusStack = new LinkedList<>(); statusStack.push(getLocationStatus(startLocation)); while (!statusStack.isEmpty()) { LocationStatus status = statusStack.poll(); if (!processor.process(status)) { return processor.getResult(); } if (status.isDir() && (topLevel || recursive)) { topLevel = false; RemoteIterator<LocationStatus> itor = listLocationStatus(lf.create(status.getUri())); while (itor.hasNext()) { statusStack.add(0, itor.next()); } } } return processor.getResult(); }
From source file:co.cask.cdap.common.io.Locations.java
License:Apache License
/** * Transform a {@link RemoteIterator} using a {@link FunctionWithException}. *//*from ww w .j a va 2 s. co m*/ private static <F, T> RemoteIterator<T> transform(final RemoteIterator<F> itor, final FunctionWithException<F, T, IOException> transform) { return new RemoteIterator<T>() { @Override public boolean hasNext() throws IOException { return itor.hasNext(); } @Override public T next() throws IOException { return transform.apply(itor.next()); } }; }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java
License:Apache License
@Override public void commitJob(JobContext context) throws IOException { Configuration configuration = context.getConfiguration(); MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration); BasicMapReduceTaskContext taskContext = classLoader.getTaskContextProvider().get(this.taskContext); String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET); PartitionedFileSet outputDataset = taskContext.getDataset(outputDatasetName); Partitioning partitioning = outputDataset.getPartitioning(); Set<PartitionKey> partitionsToAdd = new HashSet<>(); Set<String> relativePaths = new HashSet<>(); // Go over all files in the temporary directory and keep track of partitions to add for them FileStatus[] allCommittedTaskPaths = getAllCommittedTaskPaths(context); for (FileStatus committedTaskPath : allCommittedTaskPaths) { FileSystem fs = committedTaskPath.getPath().getFileSystem(configuration); RemoteIterator<LocatedFileStatus> fileIter = fs.listFiles(committedTaskPath.getPath(), true); while (fileIter.hasNext()) { Path path = fileIter.next().getPath(); String relativePath = getRelative(committedTaskPath.getPath(), path); int lastPathSepIdx = relativePath.lastIndexOf(Path.SEPARATOR); if (lastPathSepIdx == -1) { // this shouldn't happen because each relative path should consist of at least one partition key and // the output file name LOG.warn("Skipping path '{}'. It's relative path '{}' has fewer than two parts", path, relativePath);/*from w ww .j a va 2 s . c o m*/ continue; } // relativePath = "../key1/key2/part-m-00000" // relativeDir = "../key1/key2" // fileName = "part-m-00000" String relativeDir = relativePath.substring(0, lastPathSepIdx); String fileName = relativePath.substring(lastPathSepIdx + 1); Path finalDir = new Path(FileOutputFormat.getOutputPath(context), relativeDir); Path finalPath = new Path(finalDir, fileName); if (fs.exists(finalPath)) { throw new FileAlreadyExistsException("Final output path " + finalPath + " already exists"); } PartitionKey partitionKey = getPartitionKey(partitioning, relativeDir); partitionsToAdd.add(partitionKey); relativePaths.add(relativeDir); } } // We need to copy to the parent of the FileOutputFormat's outputDir, since we added a _temporary_jobId suffix to // the original outputDir. Path finalOutput = FileOutputFormat.getOutputPath(context); FileSystem fs = finalOutput.getFileSystem(configuration); for (FileStatus stat : getAllCommittedTaskPaths(context)) { mergePaths(fs, stat, finalOutput); } // compute the metadata to be written to every output partition Map<String, String> metadata = ConfigurationUtil.getNamedConfigurations(this.taskContext.getConfiguration(), PartitionedFileSetArguments.OUTPUT_PARTITION_METADATA_PREFIX); // create all the necessary partitions for (PartitionKey partitionKey : partitionsToAdd) { PartitionOutput partitionOutput = outputDataset.getPartitionOutput(partitionKey); partitionOutput.setMetadata(metadata); partitionOutput.addPartition(); } // close the TaskContext, which flushes dataset operations try { taskContext.flushOperations(); } catch (Exception e) { Throwables.propagateIfPossible(e, IOException.class); throw new IOException(e); } // delete the job-specific _temporary folder and create a _done file in the o/p folder cleanupJob(context); // mark all the final output paths with a _SUCCESS file, if configured to do so (default = true) if (configuration.getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, true)) { for (String relativePath : relativePaths) { Path pathToMark = new Path(finalOutput, relativePath); Path markerPath = new Path(pathToMark, SUCCEEDED_FILE_NAME); fs.createNewFile(markerPath); } } }
From source file:com.alibaba.jstorm.hdfs.common.HdfsUtils.java
License:Apache License
/** list files sorted by modification time that have not been modified since 'olderThan'. if * 'olderThan' is <= 0 then the filtering is disabled */ public static ArrayList<Path> listFilesByModificationTime(FileSystem fs, Path directory, long olderThan) throws IOException { ArrayList<LocatedFileStatus> fstats = new ArrayList<>(); RemoteIterator<LocatedFileStatus> itr = fs.listFiles(directory, false); while (itr.hasNext()) { LocatedFileStatus fileStatus = itr.next(); if (olderThan > 0) { if (fileStatus.getModificationTime() <= olderThan) fstats.add(fileStatus);//from w ww .ja v a2 s . co m } else { fstats.add(fileStatus); } } Collections.sort(fstats, new ModifTimeComparator()); ArrayList<Path> result = new ArrayList<>(fstats.size()); for (LocatedFileStatus fstat : fstats) { result.add(fstat.getPath()); } return result; }
From source file:com.alibaba.jstorm.hdfs.HdfsCache.java
License:Apache License
public Collection<String> listFile(String dstPath, boolean recursive) throws IOException { Collection<String> files = new HashSet<String>(); Path path = new Path(dstPath); if (fs.exists(path)) { RemoteIterator<LocatedFileStatus> itr = fs.listFiles(path, recursive); while (itr.hasNext()) { LocatedFileStatus status = itr.next(); files.add(status.getPath().getName()); }//ww w. j a v a 2 s.c o m } return files; }
From source file:com.awcoleman.StandaloneJava.AvroCombinerByBlock.java
License:Apache License
public AvroCombinerByBlock(String inDirStr, String outDirStr, String handleExisting) throws IOException { //handle both an output directory and an output filename (ending with .avro) String outputFilename = DEFAULTOUTPUTFILENAME; if (outDirStr.endsWith(".avro")) { isOutputNameSpecifiedAndAFile = true; //String[] outputParts = outDirStr.split(":?\\\\"); String[] outputParts = outDirStr.split("/"); outputFilename = outputParts[outputParts.length - 1]; //remove outputFilename from outDirStr to get new outDirStr which is just directory (and trailing /) outDirStr = outDirStr.replaceAll(Pattern.quote(outputFilename), ""); outDirStr = outDirStr.substring(0, outDirStr.length() - (outDirStr.endsWith("/") ? 1 : 0)); }/*from w w w. ja v a 2 s .com*/ //Get block size - not needed //long hdfsBlockSize = getBlockSize(); //System.out.println("HDFS FS block size: "+hdfsBlockSize); //Get list of input files ArrayList<FileStatus> inputFileList = new ArrayList<FileStatus>(); Configuration conf = new Configuration(); conf.addResource(new Path("/etc/hadoop/conf/core-site.xml")); conf.set("dfs.replication", "1"); //see http://stackoverflow.com/questions/24548699/how-to-append-to-an-hdfs-file-on-an-extremely-small-cluster-3-nodes-or-less FileSystem hdfs = null; try { hdfs = FileSystem.get(conf); } catch (java.io.IOException ioe) { System.out.println("Error opening HDFS filesystem. Exiting. Error message: " + ioe.getMessage()); System.exit(1); } if (hdfs.getStatus() == null) { System.out.println("Unable to contact HDFS filesystem. Exiting."); System.exit(1); } //Check if input and output dirs exist Path inDir = new Path(inDirStr); Path outDir = new Path(outDirStr); if (!(hdfs.exists(inDir) || hdfs.isDirectory(inDir))) { System.out.println("Input directory ( " + inDirStr + " ) not found or is not directory. Exiting."); System.exit(1); } if (!(hdfs.exists(outDir) || hdfs.isDirectory(outDir))) { if (hdfs.exists(outDir)) { //outDir exists and is a symlink or file, must die System.out.println("Requested output directory name ( " + outDirStr + " ) exists but is not a directory. Exiting."); System.exit(1); } else { hdfs.mkdirs(outDir); } } RemoteIterator<LocatedFileStatus> fileStatusListIterator = hdfs.listFiles(inDir, true); while (fileStatusListIterator.hasNext()) { LocatedFileStatus fileStatus = fileStatusListIterator.next(); if (fileStatus.isFile() && !fileStatus.getPath().getName().equals("_SUCCESS")) { inputFileList.add((FileStatus) fileStatus); } } if (inputFileList.size() <= 1 && !isOutputNameSpecifiedAndAFile) { //If an output file is specified assume we just want a rename. System.out.println("Only one or zero files found in input directory ( " + inDirStr + " ). Exiting."); System.exit(1); } //Get Schema and Compression Codec from seed file since we need it for the writer Path firstFile = inputFileList.get(0).getPath(); FsInput fsin = new FsInput(firstFile, conf); DataFileReader<Object> dfrFirstFile = new DataFileReader<Object>(fsin, new GenericDatumReader<Object>()); Schema fileSchema = dfrFirstFile.getSchema(); String compCodecName = dfrFirstFile.getMetaString("avro.codec"); //compCodecName should be null, deflate, snappy, or bzip2 if (compCodecName == null) { compCodecName = "deflate"; //set to deflate even though original is no compression } dfrFirstFile.close(); //Create Empty HDFS file in output dir String seedFileStr = outDirStr + "/" + outputFilename; Path seedFile = new Path(seedFileStr); FSDataOutputStream hdfsdos = null; try { hdfsdos = hdfs.create(seedFile, false); } catch (org.apache.hadoop.fs.FileAlreadyExistsException faee) { if (handleExisting.equals("overwrite")) { hdfs.delete(seedFile, false); hdfsdos = hdfs.create(seedFile, false); } else if (handleExisting.equals("append")) { hdfsdos = hdfs.append(seedFile); } else { System.out .println("File " + seedFileStr + " exists and will not overwrite. handleExisting is set to " + handleExisting + ". Exiting."); System.exit(1); } } if (hdfsdos == null) { System.out.println("Unable to create or write to output file ( " + seedFileStr + " ). handleExisting is set to " + handleExisting + ". Exiting."); System.exit(1); } //Append other files GenericDatumWriter gdw = new GenericDatumWriter(fileSchema); DataFileWriter dfwBase = new DataFileWriter(gdw); //Set compression to that found in the first file dfwBase.setCodec(CodecFactory.fromString(compCodecName)); DataFileWriter dfw = dfwBase.create(fileSchema, hdfsdos); for (FileStatus thisFileStatus : inputFileList) { //_SUCCESS files are 0 bytes if (thisFileStatus.getLen() == 0) { continue; } FsInput fsin1 = new FsInput(thisFileStatus.getPath(), conf); DataFileReader dfr = new DataFileReader<Object>(fsin1, new GenericDatumReader<Object>()); dfw.appendAllFrom(dfr, false); dfr.close(); } dfw.close(); dfwBase.close(); }