Example usage for org.apache.hadoop.fs RemoteIterator hasNext

List of usage examples for org.apache.hadoop.fs RemoteIterator hasNext

Introduction

In this page you can find the example usage for org.apache.hadoop.fs RemoteIterator hasNext.

Prototype

boolean hasNext() throws IOException;

Source Link

Document

Returns true if the iteration has more elements.

Usage

From source file:backup.namenode.NameNodeBackupBlockCheckProcessor.java

License:Apache License

private void addExtendedBlocksFromNameNode(BackupReportWriter writer,
        ExternalExtendedBlockSort<Addresses> nameNodeBlocks, Path path, Set<Path> pathSetToIgnore)
        throws IOException {
    RemoteIterator<LocatedFileStatus> iterator = fileSystem.listFiles(path, true);
    DFSClient client = fileSystem.getClient();
    long st = System.nanoTime();
    while (iterator.hasNext()) {
        FileStatus fs = iterator.next();
        if (st + TimeUnit.SECONDS.toNanos(10) < System.nanoTime()) {
            writer.statusBlockMetaDataFetchFromNameNode(fs.getPath().toString());
            st = System.nanoTime();
        }/*from  ww w.  j  a  v  a2 s.c o  m*/
        addExtendedBlocksFromNameNode(writer, nameNodeBlocks, client, fs, pathSetToIgnore);
    }
}

From source file:br.com.lassal.nqueens.grid.job.GenerateSolutions.java

/**
 * NQueens working folder structure /nqueens/board-{x}/partial/solution_X-4
 *
 * @param queensSize/* ww  w  .j  av  a2s  .co m*/
 * @throws IOException
 */
private void setWorkingFolder(int queensSize, Job job) throws IOException {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    if (fs.isDirectory(new Path("/nqueens/board-" + queensSize + "/final"))) {
        System.exit(0); // ja foi processado anteriormente nao processa de novo
    }

    String lastSolution = null;
    Path partialSolDir = new Path("/nqueens/board-" + queensSize + "/partial/");
    Path inputPath = null;
    Path outputPath = null;

    if (fs.exists(partialSolDir)) {
        RemoteIterator<LocatedFileStatus> dirsFound = fs.listLocatedStatus(partialSolDir);

        while (dirsFound.hasNext()) {
            LocatedFileStatus path = dirsFound.next();
            if (lastSolution == null) {
                lastSolution = path.getPath().getName();
                inputPath = path.getPath();
            } else {
                String currentDir = path.getPath().getName();
                if (lastSolution.compareToIgnoreCase(currentDir) < 0) {
                    lastSolution = currentDir;
                    inputPath = path.getPath();
                }
            }
        }
    }
    int currentSolutionSet = 0;
    if (inputPath == null) {
        inputPath = new Path("/nqueens/board-" + queensSize + "/seed");
        if (!fs.exists(inputPath)) {
            FSDataOutputStream seedFile = fs.create(inputPath, true);
            seedFile.writeBytes(queensSize + "#");
            seedFile.close();
        }
    }
    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(TextInputFormat.class);

    if (lastSolution != null) {
        String[] solution = lastSolution.split("-");
        if (solution[0].equalsIgnoreCase("solution_" + queensSize)) {
            currentSolutionSet = Integer.parseInt(solution[1]) + 4;

            if (currentSolutionSet >= queensSize) {
                outputPath = new Path("/nqueens/board-" + queensSize + "/final");
            } else {
                outputPath = new Path("/nqueens/board-" + queensSize + "/partial/solution_" + queensSize + "-"
                        + currentSolutionSet);
            }
        }
    } else {
        outputPath = new Path("/nqueens/board-" + queensSize + "/partial/solution_" + queensSize + "-4");
    }

    // Output
    FileOutputFormat.setOutputPath(job, outputPath);
    job.setOutputFormatClass(TextOutputFormat.class);

}

From source file:br.com.lassal.nqueens.grid.job.NQueenCounter.java

private Path setWorkingFolder(int queensSize, String workingFolder, boolean isFinal, Job job)
        throws IOException {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path returnPath = null;// www .ja v  a 2  s.  c  o m

    if (workingFolder == null) {
        workingFolder = "";
    }

    Path partialSolDir = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/");
    Path inputPath = null;
    Path outputPath = null;
    String nextRunPath = "run_1";

    if (fs.exists(partialSolDir)) {
        RemoteIterator<LocatedFileStatus> dirsFound = fs.listLocatedStatus(partialSolDir);
        String lastRunPath = null;
        Path lastPath = null;

        while (dirsFound.hasNext()) {
            LocatedFileStatus dir = dirsFound.next();

            if (dir.isDirectory()) {
                if (lastRunPath == null || dir.getPath().getName().compareTo(lastRunPath) > 0) {
                    lastPath = dir.getPath();
                    lastRunPath = lastPath.getName();
                }
            }
        }
        if (lastRunPath != null) {
            String[] runParts = lastRunPath.split("_");
            int lastRun = Integer.parseInt(runParts[1]);
            nextRunPath = runParts[0] + "_" + (++lastRun);
            inputPath = lastPath;
        }

    }
    if (inputPath == null) {
        inputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/seed");
        if (!fs.exists(inputPath)) {
            FSDataOutputStream seedFile = fs.create(inputPath, true);
            seedFile.writeBytes(queensSize + ":");
            seedFile.close();
        }
    } else {
        returnPath = inputPath;
    }
    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(TextInputFormat.class);

    if (isFinal) {
        outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/final");
    } else {
        outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/" + nextRunPath);
    }

    // Output
    FileOutputFormat.setOutputPath(job, outputPath);
    job.setOutputFormatClass(TextOutputFormat.class);

    return returnPath;
}

From source file:ch.cern.db.hdfs.DistributedFileSystemMetadata.java

License:GNU General Public License

private int hasNextCode(RemoteIterator<LocatedFileStatus> statuses) throws IOException {
    try {/*  w w w.ja  v  a2 s .  co  m*/
        if (statuses.hasNext())
            return 1;
        else
            return 0;
    } catch (AccessControlException e) {
        String message = e.getMessage();

        LOG.warn("Skipped file or directory because: " + message.substring(0, message.indexOf("\n")));

        return 2;
    }
}

From source file:co.cask.cdap.common.io.Locations.java

License:Apache License

/**
 * Do some processing on the locations contained in the {@code startLocation}, using the {@code processor}. If this
 * location is a directory, all the locations contained in it will also be processed. If the {@code recursive} tag
 * is set to true, those locations that are directories will also be processed recursively. If the
 * {@code startLocation} is not a directory, this method will return the result of the processing of that location.
 *
 * @param startLocation location to start the processing from
 * @param recursive {@code true} if this method should be called on the directory {@link Location}s found from
 *                  {@code startLocation}. If the {@code startLocation} is a directory, all the locations under it
 *                  will be processed, regardless of the value of {@code recursive}
 * @param processor used to process locations. If the {@link Processor#process} method returns false on any
 *                  {@link Location} object processed, this method will return the current result of the processor.
 * @param <R> Type of the return value
 * @throws IOException if the locations could not be read
 *//*from  w  w w.  j a  va 2  s .  c  o  m*/
public static <R> R processLocations(Location startLocation, boolean recursive,
        Processor<LocationStatus, R> processor) throws IOException {
    boolean topLevel = true;
    LocationFactory lf = startLocation.getLocationFactory();
    LinkedList<LocationStatus> statusStack = new LinkedList<>();
    statusStack.push(getLocationStatus(startLocation));
    while (!statusStack.isEmpty()) {
        LocationStatus status = statusStack.poll();
        if (!processor.process(status)) {
            return processor.getResult();
        }
        if (status.isDir() && (topLevel || recursive)) {
            topLevel = false;
            RemoteIterator<LocationStatus> itor = listLocationStatus(lf.create(status.getUri()));
            while (itor.hasNext()) {
                statusStack.add(0, itor.next());
            }
        }
    }
    return processor.getResult();
}

From source file:co.cask.cdap.common.io.Locations.java

License:Apache License

/**
 * Transform a {@link RemoteIterator} using a {@link FunctionWithException}.
 *//*from  w w w  . j  a v  a2s . c  o m*/
private static <F, T> RemoteIterator<T> transform(final RemoteIterator<F> itor,
        final FunctionWithException<F, T, IOException> transform) {
    return new RemoteIterator<T>() {
        @Override
        public boolean hasNext() throws IOException {
            return itor.hasNext();
        }

        @Override
        public T next() throws IOException {
            return transform.apply(itor.next());
        }
    };
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java

License:Apache License

@Override
public void commitJob(JobContext context) throws IOException {
    Configuration configuration = context.getConfiguration();
    MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration);
    BasicMapReduceTaskContext taskContext = classLoader.getTaskContextProvider().get(this.taskContext);

    String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET);
    PartitionedFileSet outputDataset = taskContext.getDataset(outputDatasetName);
    Partitioning partitioning = outputDataset.getPartitioning();

    Set<PartitionKey> partitionsToAdd = new HashSet<>();
    Set<String> relativePaths = new HashSet<>();
    // Go over all files in the temporary directory and keep track of partitions to add for them
    FileStatus[] allCommittedTaskPaths = getAllCommittedTaskPaths(context);
    for (FileStatus committedTaskPath : allCommittedTaskPaths) {
        FileSystem fs = committedTaskPath.getPath().getFileSystem(configuration);
        RemoteIterator<LocatedFileStatus> fileIter = fs.listFiles(committedTaskPath.getPath(), true);
        while (fileIter.hasNext()) {
            Path path = fileIter.next().getPath();
            String relativePath = getRelative(committedTaskPath.getPath(), path);

            int lastPathSepIdx = relativePath.lastIndexOf(Path.SEPARATOR);
            if (lastPathSepIdx == -1) {
                // this shouldn't happen because each relative path should consist of at least one partition key and
                // the output file name
                LOG.warn("Skipping path '{}'. It's relative path '{}' has fewer than two parts", path,
                        relativePath);//w  w w .  j a v a 2s .c  om
                continue;
            }
            // relativePath = "../key1/key2/part-m-00000"
            // relativeDir = "../key1/key2"
            // fileName = "part-m-00000"
            String relativeDir = relativePath.substring(0, lastPathSepIdx);
            String fileName = relativePath.substring(lastPathSepIdx + 1);

            Path finalDir = new Path(FileOutputFormat.getOutputPath(context), relativeDir);
            Path finalPath = new Path(finalDir, fileName);
            if (fs.exists(finalPath)) {
                throw new FileAlreadyExistsException("Final output path " + finalPath + " already exists");
            }
            PartitionKey partitionKey = getPartitionKey(partitioning, relativeDir);
            partitionsToAdd.add(partitionKey);
            relativePaths.add(relativeDir);
        }
    }

    // We need to copy to the parent of the FileOutputFormat's outputDir, since we added a _temporary_jobId suffix to
    // the original outputDir.
    Path finalOutput = FileOutputFormat.getOutputPath(context);
    FileSystem fs = finalOutput.getFileSystem(configuration);
    for (FileStatus stat : getAllCommittedTaskPaths(context)) {
        mergePaths(fs, stat, finalOutput);
    }

    // compute the metadata to be written to every output partition
    Map<String, String> metadata = ConfigurationUtil.getNamedConfigurations(this.taskContext.getConfiguration(),
            PartitionedFileSetArguments.OUTPUT_PARTITION_METADATA_PREFIX);

    // create all the necessary partitions
    for (PartitionKey partitionKey : partitionsToAdd) {
        PartitionOutput partitionOutput = outputDataset.getPartitionOutput(partitionKey);
        partitionOutput.setMetadata(metadata);
        partitionOutput.addPartition();
    }

    // close the TaskContext, which flushes dataset operations
    try {
        taskContext.flushOperations();
    } catch (Exception e) {
        Throwables.propagateIfPossible(e, IOException.class);
        throw new IOException(e);
    }

    // delete the job-specific _temporary folder and create a _done file in the o/p folder
    cleanupJob(context);

    // mark all the final output paths with a _SUCCESS file, if configured to do so (default = true)
    if (configuration.getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, true)) {
        for (String relativePath : relativePaths) {
            Path pathToMark = new Path(finalOutput, relativePath);
            Path markerPath = new Path(pathToMark, SUCCEEDED_FILE_NAME);
            fs.createNewFile(markerPath);
        }
    }
}

From source file:com.alibaba.jstorm.hdfs.common.HdfsUtils.java

License:Apache License

/** list files sorted by modification time that have not been modified since 'olderThan'. if
 * 'olderThan' is <= 0 then the filtering is disabled */
public static ArrayList<Path> listFilesByModificationTime(FileSystem fs, Path directory, long olderThan)
        throws IOException {
    ArrayList<LocatedFileStatus> fstats = new ArrayList<>();

    RemoteIterator<LocatedFileStatus> itr = fs.listFiles(directory, false);
    while (itr.hasNext()) {
        LocatedFileStatus fileStatus = itr.next();
        if (olderThan > 0) {
            if (fileStatus.getModificationTime() <= olderThan)
                fstats.add(fileStatus);/*w  w  w  .j ava  2  s  . c  o m*/
        } else {
            fstats.add(fileStatus);
        }
    }
    Collections.sort(fstats, new ModifTimeComparator());

    ArrayList<Path> result = new ArrayList<>(fstats.size());
    for (LocatedFileStatus fstat : fstats) {
        result.add(fstat.getPath());
    }
    return result;
}

From source file:com.alibaba.jstorm.hdfs.HdfsCache.java

License:Apache License

public Collection<String> listFile(String dstPath, boolean recursive) throws IOException {
    Collection<String> files = new HashSet<String>();
    Path path = new Path(dstPath);
    if (fs.exists(path)) {
        RemoteIterator<LocatedFileStatus> itr = fs.listFiles(path, recursive);
        while (itr.hasNext()) {
            LocatedFileStatus status = itr.next();
            files.add(status.getPath().getName());
        }/*from w  w w.j  ava2 s. c o  m*/
    }
    return files;
}

From source file:com.awcoleman.StandaloneJava.AvroCombinerByBlock.java

License:Apache License

public AvroCombinerByBlock(String inDirStr, String outDirStr, String handleExisting) throws IOException {

    //handle both an output directory and an output filename (ending with .avro)
    String outputFilename = DEFAULTOUTPUTFILENAME;
    if (outDirStr.endsWith(".avro")) {
        isOutputNameSpecifiedAndAFile = true;
        //String[] outputParts = outDirStr.split(":?\\\\");
        String[] outputParts = outDirStr.split("/");

        outputFilename = outputParts[outputParts.length - 1];

        //remove outputFilename from outDirStr to get new outDirStr which is just directory (and trailing /)
        outDirStr = outDirStr.replaceAll(Pattern.quote(outputFilename), "");
        outDirStr = outDirStr.substring(0, outDirStr.length() - (outDirStr.endsWith("/") ? 1 : 0));
    }/*from w  w w.j  av a 2  s.c o  m*/

    //Get block size - not needed
    //long hdfsBlockSize = getBlockSize();
    //System.out.println("HDFS FS block size: "+hdfsBlockSize);

    //Get list of input files
    ArrayList<FileStatus> inputFileList = new ArrayList<FileStatus>();

    Configuration conf = new Configuration();
    conf.addResource(new Path("/etc/hadoop/conf/core-site.xml"));
    conf.set("dfs.replication", "1"); //see http://stackoverflow.com/questions/24548699/how-to-append-to-an-hdfs-file-on-an-extremely-small-cluster-3-nodes-or-less

    FileSystem hdfs = null;
    try {
        hdfs = FileSystem.get(conf);
    } catch (java.io.IOException ioe) {
        System.out.println("Error opening HDFS filesystem. Exiting. Error message: " + ioe.getMessage());
        System.exit(1);
    }
    if (hdfs.getStatus() == null) {
        System.out.println("Unable to contact HDFS filesystem. Exiting.");
        System.exit(1);
    }

    //Check if input and output dirs exist
    Path inDir = new Path(inDirStr);
    Path outDir = new Path(outDirStr);
    if (!(hdfs.exists(inDir) || hdfs.isDirectory(inDir))) {
        System.out.println("Input directory ( " + inDirStr + " ) not found or is not directory. Exiting.");
        System.exit(1);
    }

    if (!(hdfs.exists(outDir) || hdfs.isDirectory(outDir))) {
        if (hdfs.exists(outDir)) { //outDir exists and is a symlink or file, must die
            System.out.println("Requested output directory name ( " + outDirStr
                    + " ) exists but is not a directory. Exiting.");
            System.exit(1);
        } else {
            hdfs.mkdirs(outDir);
        }
    }

    RemoteIterator<LocatedFileStatus> fileStatusListIterator = hdfs.listFiles(inDir, true);
    while (fileStatusListIterator.hasNext()) {
        LocatedFileStatus fileStatus = fileStatusListIterator.next();

        if (fileStatus.isFile() && !fileStatus.getPath().getName().equals("_SUCCESS")) {
            inputFileList.add((FileStatus) fileStatus);
        }
    }

    if (inputFileList.size() <= 1 && !isOutputNameSpecifiedAndAFile) { //If an output file is specified assume we just want a rename.
        System.out.println("Only one or zero files found in input directory ( " + inDirStr + " ). Exiting.");
        System.exit(1);
    }

    //Get Schema and Compression Codec from seed file since we need it for the writer
    Path firstFile = inputFileList.get(0).getPath();
    FsInput fsin = new FsInput(firstFile, conf);
    DataFileReader<Object> dfrFirstFile = new DataFileReader<Object>(fsin, new GenericDatumReader<Object>());
    Schema fileSchema = dfrFirstFile.getSchema();
    String compCodecName = dfrFirstFile.getMetaString("avro.codec");
    //compCodecName should be null, deflate, snappy, or bzip2
    if (compCodecName == null) {
        compCodecName = "deflate"; //set to deflate even though original is no compression
    }
    dfrFirstFile.close();

    //Create Empty HDFS file in output dir
    String seedFileStr = outDirStr + "/" + outputFilename;
    Path seedFile = new Path(seedFileStr);
    FSDataOutputStream hdfsdos = null;
    try {
        hdfsdos = hdfs.create(seedFile, false);
    } catch (org.apache.hadoop.fs.FileAlreadyExistsException faee) {
        if (handleExisting.equals("overwrite")) {
            hdfs.delete(seedFile, false);
            hdfsdos = hdfs.create(seedFile, false);
        } else if (handleExisting.equals("append")) {
            hdfsdos = hdfs.append(seedFile);
        } else {
            System.out
                    .println("File " + seedFileStr + " exists and will not overwrite. handleExisting is set to "
                            + handleExisting + ". Exiting.");
            System.exit(1);
        }
    }
    if (hdfsdos == null) {
        System.out.println("Unable to create or write to output file ( " + seedFileStr
                + " ). handleExisting is set to " + handleExisting + ". Exiting.");
        System.exit(1);
    }

    //Append other files
    GenericDatumWriter gdw = new GenericDatumWriter(fileSchema);
    DataFileWriter dfwBase = new DataFileWriter(gdw);
    //Set compression to that found in the first file
    dfwBase.setCodec(CodecFactory.fromString(compCodecName));

    DataFileWriter dfw = dfwBase.create(fileSchema, hdfsdos);
    for (FileStatus thisFileStatus : inputFileList) {

        //_SUCCESS files are 0 bytes
        if (thisFileStatus.getLen() == 0) {
            continue;
        }

        FsInput fsin1 = new FsInput(thisFileStatus.getPath(), conf);
        DataFileReader dfr = new DataFileReader<Object>(fsin1, new GenericDatumReader<Object>());

        dfw.appendAllFrom(dfr, false);

        dfr.close();
    }

    dfw.close();
    dfwBase.close();

}