Example usage for org.apache.hadoop.fs FileSystem isFile

List of usage examples for org.apache.hadoop.fs FileSystem isFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem isFile.

Prototype

@Deprecated
public boolean isFile(Path f) throws IOException 

Source Link

Document

True iff the named path is a regular file.

Usage

From source file:org.apache.mahout.classifier.rbm.test.TestRBMClassifierJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    addInputOption();/*ww  w .  j  a  v  a2  s.  co m*/
    addOption("model", "m", "The path to the model built during training", true);
    addOption("labelcount", "lc", "total count of labels existent in the training set", true);
    addOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION, "max",
            "least number of stable iterations in classification layer when classifying", "10");
    addOption(new DefaultOptionBuilder().withLongName(DefaultOptionCreator.MAPREDUCE_METHOD).withRequired(false)
            .withDescription("Run tests with map/reduce").withShortName("mr").create());

    Map<String, String> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }

    int labelcount = Integer.parseInt(getOption("labelcount"));
    iterations = Integer.parseInt(getOption("maxIter"));

    //check models existence
    Path model = new Path(parsedArgs.get("--model"));
    if (!model.getFileSystem(getConf()).exists(model)) {
        log.error("Model file does not exist!");
        return -1;
    }

    //create the list of all labels
    List<String> lables = new ArrayList<String>();
    for (int i = 0; i < labelcount; i++)
        lables.add(String.valueOf(i));

    FileSystem fs = getInputPath().getFileSystem(getConf());
    ResultAnalyzer analyzer = new ResultAnalyzer(lables, "-1");
    //initiate the paths to the test batches
    Path[] batches;
    if (fs.isFile(getInputPath()))
        batches = new Path[] { getInputPath() };
    else {
        FileStatus[] stati = fs.listStatus(getInputPath());
        batches = new Path[stati.length];
        for (int i = 0; i < stati.length; i++) {
            batches[i] = stati[i].getPath();
        }
    }

    if (hasOption("mapreduce"))
        HadoopUtil.delete(getConf(), getTempPath("testresults"));

    for (Path input : batches) {
        if (hasOption("mapreduce")) {
            HadoopUtil.cacheFiles(model, getConf());
            //the output key is the expected value, the output value are the scores for all the labels
            Job testJob = prepareJob(input, getTempPath("testresults"), SequenceFileInputFormat.class,
                    TestRBMClassifierMapper.class, IntWritable.class, VectorWritable.class,
                    SequenceFileOutputFormat.class);
            testJob.getConfiguration().set("maxIter", String.valueOf(iterations));
            testJob.waitForCompletion(true);

            //loop over the results and create the confusion matrix
            SequenceFileDirIterable<IntWritable, VectorWritable> dirIterable = new SequenceFileDirIterable<IntWritable, VectorWritable>(
                    getTempPath("testresults"), PathType.LIST, PathFilters.partFilter(), getConf());

            analyzeResults(dirIterable, analyzer);

        } else {
            //test job locally
            runTestsLocally(model, analyzer, input);
        }
    }

    //output the result of the tests
    log.info("RBMClassifier Results: {}", analyzer);

    //stop all running threads
    if (executor != null)
        executor.shutdownNow();
    return 0;
}

From source file:org.apache.mahout.classifier.rbm.training.RBMClassifierTrainingJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    addInputOption();/*from   w  ww .  j  a v a2  s .  co  m*/
    addOutputOption();
    addOption("epochs", "e", "number of training epochs through the trainingset", true);
    addOption("structure", "s", "comma-separated list of layer sizes", false);
    addOption("labelcount", "lc", "total count of labels existent in the training set", true);
    addOption("learningrate", "lr", "learning rate at the beginning of training", "0.005");
    addOption("momentum", "m", "momentum of learning at the beginning", "0.5");
    addOption("rbmnr", "nr", "rbm to train, < 0 means train all", "-1");
    addOption("nrgibbs", "gn", "number of gibbs sampling used in contrastive divergence", "5");
    addOption(new DefaultOptionBuilder().withLongName(DefaultOptionCreator.MAPREDUCE_METHOD).withRequired(false)
            .withDescription("Run training with map/reduce").withShortName("mr").create());
    addOption(new DefaultOptionBuilder().withLongName("nogreedy").withRequired(false)
            .withDescription("Don't run greedy pre training").withShortName("ng").create());
    addOption(new DefaultOptionBuilder().withLongName("nofinetuning").withRequired(false)
            .withDescription("Don't run fine tuning at the end").withShortName("nf").create());
    addOption(new DefaultOptionBuilder().withLongName("nobiases").withRequired(false)
            .withDescription("Don't initialize biases").withShortName("nb").create());
    addOption(new DefaultOptionBuilder().withLongName("monitor").withRequired(false)
            .withDescription("If present, errors can be monitored in cosole").withShortName("mon").create());
    addOption(DefaultOptionCreator.overwriteOption().create());

    Map<String, String> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }

    Path input = getInputPath();
    Path output = getOutputPath();
    FileSystem fs = FileSystem.get(output.toUri(), getConf());
    labelcount = Integer.parseInt(getOption("labelcount"));

    boolean local = !hasOption("mapreduce");
    monitor = hasOption("monitor");
    initbiases = !hasOption("nobiases");
    finetuning = !hasOption("nofinetuning");
    greedy = !hasOption("nogreedy");

    if (fs.isFile(input))
        batches = new Path[] { input };
    else {
        FileStatus[] stati = fs.listStatus(input);
        batches = new Path[stati.length];
        for (int i = 0; i < stati.length; i++) {
            batches[i] = stati[i].getPath();
        }
    }

    epochs = Integer.valueOf(getOption("epochs"));
    learningrate = Double.parseDouble(getOption("learningrate"));
    momentum = Double.parseDouble(getOption("momentum"));
    rbmNrtoTrain = Integer.parseInt(getOption("rbmnr"));
    nrGibbsSampling = Integer.parseInt(getOption("nrgibbs"));

    boolean initialize = hasOption(DefaultOptionCreator.OVERWRITE_OPTION) || !fs.exists(output)
            || fs.listStatus(output).length <= 0;

    if (initialize) {
        String structure = getOption("structure");
        if (structure == null || structure.isEmpty())
            return -1;

        String[] layers = structure.split(",");
        if (layers.length < 2) {
            return -1;
        }

        int[] actualLayerSizes = new int[layers.length];
        for (int i = 0; i < layers.length; i++) {
            actualLayerSizes[i] = Integer.parseInt(layers[i]);
        }

        rbmCl = new RBMClassifier(labelcount, actualLayerSizes);
        logger.info("New model initialized!");
    } else {
        rbmCl = RBMClassifier.materialize(output, getConf());
        logger.info("Model found and materialized!");
    }

    HadoopUtil.setSerializations(getConf());
    lastUpdate = new Matrix[rbmCl.getDbm().getRbmCount()];

    if (initbiases) {
        //init biases!
        Vector biases = null;
        int counter = 0;
        for (Path batch : batches) {
            for (Pair<IntWritable, VectorWritable> record : new SequenceFileIterable<IntWritable, VectorWritable>(
                    batch, getConf())) {
                if (biases == null)
                    biases = record.getSecond().get().clone();
                else
                    biases.plus(record.getSecond().get());
                counter++;
            }
        }
        if (biases == null) {
            logger.info("No training data found!");
            return -1;
        }

        rbmCl.getDbm().getLayer(0).setBiases(biases.divide(counter));
        logger.info("Biases initialized");
    }

    //greedy pre training with gradually decreasing learningrates
    if (greedy) {
        if (!local)
            rbmCl.serialize(output, getConf());

        double tempLearningrate = learningrate;
        if (rbmNrtoTrain < 0)
            //train all rbms
            for (int rbmNr = 0; rbmNr < rbmCl.getDbm().getRbmCount(); rbmNr++) {
                tempLearningrate = learningrate;

                //double weights if dbm was materialized, because it was halved after greedy pretraining
                if (!initialize && rbmNrtoTrain > 0 && rbmNrtoTrain < rbmCl.getDbm().getRbmCount() - 1) {
                    ((SimpleRBM) rbmCl.getDbm().getRBM(rbmNr)).setWeightMatrix(
                            ((SimpleRBM) rbmCl.getDbm().getRBM(rbmNr)).getWeightMatrix().times(2));
                }

                for (int j = 0; j < epochs; j++) {
                    logger.info("Greedy training, epoch " + (j + 1) + "\nCurrent learningrate: "
                            + tempLearningrate);
                    for (int b = 0; b < batches.length; b++) {
                        tempLearningrate -= learningrate / (epochs * batches.length + epochs);
                        if (local) {
                            if (!trainGreedySeq(rbmNr, batches[b], j, tempLearningrate))
                                return -1;
                        } else if (!trainGreedyMR(rbmNr, batches[b], j, tempLearningrate))
                            return -1;
                        if (monitor && (batches.length > 19) && (b + 1) % (batches.length / 20) == 0)
                            logger.info(rbmNr + "-RBM: " + Math.round(((double) b + 1) / batches.length * 100.0)
                                    + "% in epoch done!");
                    }
                    logger.info(Math.round(((double) j + 1) / epochs * 100) + "% of training on rbm number "
                            + rbmNr + " is done!");

                    if (monitor) {
                        double error = rbmError(batches[0], rbmNr);
                        logger.info(
                                "Average reconstruction error on batch " + batches[0].getName() + ": " + error);
                    }

                    rbmCl.serialize(output, getConf());
                }

                //weight normalization to avoid double counting
                if (rbmNr > 0 && rbmNr < rbmCl.getDbm().getRbmCount() - 1) {
                    ((SimpleRBM) rbmCl.getDbm().getRBM(rbmNrtoTrain)).setWeightMatrix(
                            ((SimpleRBM) rbmCl.getDbm().getRBM(rbmNrtoTrain)).getWeightMatrix().times(0.5));
                }
            }
        else {
            //double weights if dbm was materialized, because it was halved after greedy pretraining
            if (!initialize && rbmNrtoTrain > 0 && rbmNrtoTrain < rbmCl.getDbm().getRbmCount() - 1) {
                ((SimpleRBM) rbmCl.getDbm().getRBM(rbmNrtoTrain)).setWeightMatrix(
                        ((SimpleRBM) rbmCl.getDbm().getRBM(rbmNrtoTrain)).getWeightMatrix().times(2));
            }
            //train just wanted rbm
            for (int j = 0; j < epochs; j++) {
                logger.info(
                        "Greedy training, epoch " + (j + 1) + "\nCurrent learningrate: " + tempLearningrate);
                for (int b = 0; b < batches.length; b++) {
                    tempLearningrate -= learningrate / (epochs * batches.length + epochs);
                    if (local) {
                        if (!trainGreedySeq(rbmNrtoTrain, batches[b], j, tempLearningrate))
                            return -1;
                    } else if (!trainGreedyMR(rbmNrtoTrain, batches[b], j, tempLearningrate))
                        return -1;
                    if (monitor && (batches.length > 19) && (b + 1) % (batches.length / 20) == 0)
                        logger.info(rbmNrtoTrain + "-RBM: "
                                + Math.round(((double) b + 1) / batches.length * 100.0) + "% in epoch done!");
                }
                logger.info(Math.round(((double) j + 1) / epochs * 100) + "% of training is done!");

                if (monitor) {
                    double error = rbmError(batches[0], rbmNrtoTrain);
                    logger.info("Average reconstruction error on batch " + batches[0].getName() + ": " + error);
                }
            }

            //weight normalization to avoid double counting
            if (rbmNrtoTrain > 0 && rbmNrtoTrain < rbmCl.getDbm().getRbmCount() - 1) {
                ((SimpleRBM) rbmCl.getDbm().getRBM(rbmNrtoTrain)).setWeightMatrix(
                        ((SimpleRBM) rbmCl.getDbm().getRBM(rbmNrtoTrain)).getWeightMatrix().times(0.5));
            }
        }

        rbmCl.serialize(output, getConf());
        logger.info("Pretraining done and model written to output");
    }

    if (finetuning) {
        DeepBoltzmannMachine multiLayerDbm = null;

        double tempLearningrate = learningrate;
        //finetuning job
        for (int j = 0; j < epochs; j++) {
            for (int b = 0; b < batches.length; b++) {
                multiLayerDbm = rbmCl.initializeMultiLayerNN();
                logger.info("Finetuning on batch " + batches[b].getName() + "\nCurrent learningrate: "
                        + tempLearningrate);
                tempLearningrate -= learningrate / (epochs * batches.length + epochs);
                if (local) {
                    if (!finetuneSeq(batches[b], j, multiLayerDbm, tempLearningrate))
                        return -1;
                } else if (!fintuneMR(batches[b], j, tempLearningrate))
                    return -1;
                logger.info("Finetuning: " + Math.round(((double) b + 1) / batches.length * 100.0)
                        + "% in epoch done!");
            }
            logger.info(Math.round(((double) j + 1) / epochs * 100) + "% of training is done!");

            if (monitor) {
                double error = feedForwardError(multiLayerDbm, batches[0]);
                logger.info("Average discriminative error on batch " + batches[0].getName() + ": " + error);
            }
        }
        //final serialization
        rbmCl.serialize(output, getConf());
        logger.info("RBM finetuning done and model written to output");
    }

    if (executor != null)
        executor.shutdownNow();

    return 0;
}

From source file:org.apache.mahout.classifier.svm.datastore.HDFSReader.java

License:Apache License

public List<String> readADirectory(Path filePath) {
    List<String> lines = new ArrayList<String>();
    try {/*from   ww w .j a  v a  2s  .com*/
        FileSystem fs = FileSystem.get(this.conf);
        if (!fs.isFile(filePath)) {
            FileStatus[] fsList = fs.listStatus(filePath);
            for (FileStatus file : fsList) {
                if (!file.isDir()) {
                    lines.addAll(readAllLines(new Path(filePath.toString() + "/" + file.getPath().getName())));
                }
            }
        }
    } catch (Exception e) {
        log.error("Exception: " + e.getMessage());
    }
    return lines;
}

From source file:org.apache.mahout.classifier.svm.datastore.HDFSReader.java

License:Apache License

public boolean isDir(Path filePath) {
    try {//w  ww .  j  av  a  2 s . co  m
        FileSystem fs = FileSystem.get(this.conf);
        if (fs.isFile(filePath)) {
            return false;
        } else {
            return true;
        }
    } catch (Exception e) {
        log.error("Exception: " + e.getMessage());
        return false;
    }
}

From source file:org.apache.mahout.clustering.lda.cvb.InMemoryCollapsedVariationalBayes0.java

License:Apache License

private static Matrix loadVectors(String vectorPathString, Configuration conf) throws IOException {
    Path vectorPath = new Path(vectorPathString);
    FileSystem fs = vectorPath.getFileSystem(conf);
    List<Path> subPaths = Lists.newArrayList();
    if (fs.isFile(vectorPath)) {
        subPaths.add(vectorPath);/*from   ww w  .  ja  v  a  2s. co m*/
    } else {
        for (FileStatus fileStatus : fs.listStatus(vectorPath, PathFilters.logsCRCFilter())) {
            subPaths.add(fileStatus.getPath());
        }
    }
    List<Pair<Integer, Vector>> rowList = Lists.newArrayList();
    int numRows = Integer.MIN_VALUE;
    int numCols = -1;
    boolean sequentialAccess = false;
    for (Path subPath : subPaths) {
        for (Pair<IntWritable, VectorWritable> record : new SequenceFileIterable<IntWritable, VectorWritable>(
                subPath, true, conf)) {
            int id = record.getFirst().get();
            Vector vector = record.getSecond().get();
            if (vector instanceof NamedVector) {
                vector = ((NamedVector) vector).getDelegate();
            }
            if (numCols < 0) {
                numCols = vector.size();
                sequentialAccess = vector.isSequentialAccess();
            }
            rowList.add(Pair.of(id, vector));
            numRows = Math.max(numRows, id);
        }
    }
    numRows++;
    Vector[] rowVectors = new Vector[numRows];
    for (Pair<Integer, Vector> pair : rowList) {
        rowVectors[pair.getFirst()] = pair.getSecond();
    }
    return new SparseRowMatrix(numRows, numCols, rowVectors, true, !sequentialAccess);

}

From source file:org.apache.mahout.text.WholeFileRecordReader.java

License:Apache License

@Override
public boolean nextKeyValue() throws IOException {
    if (!processed) {
        byte[] contents = new byte[(int) fileSplit.getLength()];
        Path file = fileSplit.getPath();
        FileSystem fs = file.getFileSystem(this.configuration);

        if (!fs.isFile(file)) {
            return false;
        }/*from ww  w.java  2s.c o m*/

        FileStatus[] fileStatuses;
        if (pathFilter != null) {
            fileStatuses = fs.listStatus(file, pathFilter);
        } else {
            fileStatuses = fs.listStatus(file);
        }

        FSDataInputStream in = null;
        if (fileStatuses.length == 1) {
            try {
                in = fs.open(fileStatuses[0].getPath());
                IOUtils.readFully(in, contents, 0, contents.length);
                value.setCapacity(contents.length);
                value.set(contents, 0, contents.length);
            } finally {
                Closeables.close(in, false);
            }
            processed = true;
            return true;
        }
    }
    return false;
}

From source file:org.apache.metamodel.util.HdfsResource.java

License:Apache License

@Override
public long getSize() {
    final FileSystem fs = getHadoopFileSystem();
    try {/*from w  w  w. ja v  a2 s.co  m*/
        if (fs.isFile(getHadoopPath())) {
            return fs.getFileStatus(getHadoopPath()).getLen();
        } else {
            return fs.getContentSummary(getHadoopPath()).getLength();
        }
    } catch (Exception e) {
        throw wrapException(e);
    } finally {
        FileHelper.safeClose(fs);
    }
}

From source file:org.apache.metamodel.util.HdfsResource.java

License:Apache License

@Override
public InputStream read() throws ResourceException {
    final FileSystem fs = getHadoopFileSystem();
    final InputStream in;
    try {//from   w  w  w  . j av  a  2 s .  c  o  m
        final Path hadoopPath = getHadoopPath();
        // return a wrapper InputStream which manages the 'fs' closeable
        if (fs.isFile(hadoopPath)) {
            in = fs.open(hadoopPath);
            return new HdfsFileInputStream(in, fs);
        } else {
            return new HdfsDirectoryInputStream(hadoopPath, fs);
        }
    } catch (Exception e) {
        // we can close 'fs' in case of an exception
        FileHelper.safeClose(fs);
        throw wrapException(e);
    }
}

From source file:org.apache.oozie.action.hadoop.FsActionExecutor.java

License:Apache License

@SuppressWarnings("unchecked")
void doOperations(Context context, Element element) throws ActionExecutorException {
    try {/*  w  ww. ja  v a 2  s  .  co m*/
        FileSystem fs = context.getAppFileSystem();
        boolean recovery = fs.exists(getRecoveryPath(context));
        if (!recovery) {
            fs.mkdirs(getRecoveryPath(context));
        }

        Path nameNodePath = null;
        Element nameNodeElement = element.getChild("name-node", element.getNamespace());
        if (nameNodeElement != null) {
            String nameNode = nameNodeElement.getTextTrim();
            if (nameNode != null) {
                nameNodePath = new Path(nameNode);
                // Verify the name node now
                validatePath(nameNodePath, true);
            }
        }

        XConfiguration fsConf = new XConfiguration();
        Path appPath = new Path(context.getWorkflow().getAppPath());
        // app path could be a file
        if (fs.isFile(appPath)) {
            appPath = appPath.getParent();
        }
        JavaActionExecutor.parseJobXmlAndConfiguration(context, element, appPath, fsConf);

        for (Element commandElement : (List<Element>) element.getChildren()) {
            String command = commandElement.getName();
            if (command.equals("mkdir")) {
                Path path = getPath(commandElement, "path");
                mkdir(context, fsConf, nameNodePath, path);
            } else {
                if (command.equals("delete")) {
                    Path path = getPath(commandElement, "path");
                    boolean skipTrash = true;
                    if (commandElement.getAttributeValue("skip-trash") != null
                            && commandElement.getAttributeValue("skip-trash").equals("false")) {
                        skipTrash = false;
                    }
                    delete(context, fsConf, nameNodePath, path, skipTrash);
                } else {
                    if (command.equals("move")) {
                        Path source = getPath(commandElement, "source");
                        Path target = getPath(commandElement, "target");
                        move(context, fsConf, nameNodePath, source, target, recovery);
                    } else {
                        if (command.equals("chmod")) {
                            Path path = getPath(commandElement, "path");
                            boolean recursive = commandElement.getChild("recursive",
                                    commandElement.getNamespace()) != null;
                            String str = commandElement.getAttributeValue("dir-files");
                            boolean dirFiles = (str == null) || Boolean.parseBoolean(str);
                            String permissionsMask = commandElement.getAttributeValue("permissions").trim();
                            chmod(context, fsConf, nameNodePath, path, permissionsMask, dirFiles, recursive);
                        } else {
                            if (command.equals("touchz")) {
                                Path path = getPath(commandElement, "path");
                                touchz(context, fsConf, nameNodePath, path);
                            } else {
                                if (command.equals("chgrp")) {
                                    Path path = getPath(commandElement, "path");
                                    boolean recursive = commandElement.getChild("recursive",
                                            commandElement.getNamespace()) != null;
                                    String group = commandElement.getAttributeValue("group");
                                    String str = commandElement.getAttributeValue("dir-files");
                                    boolean dirFiles = (str == null) || Boolean.parseBoolean(str);
                                    chgrp(context, fsConf, nameNodePath, path, context.getWorkflow().getUser(),
                                            group, dirFiles, recursive);
                                }
                            }
                        }
                    }
                }
            }
        }
    } catch (Exception ex) {
        throw convertException(ex);
    }
}

From source file:org.apache.oozie.action.hadoop.FsActionExecutor.java

License:Apache License

/**
 * Move source to target//from   www .  j  ava 2 s  .co  m
 *
 * @param context
 * @param fsConf
 * @param nameNodePath
 * @param source
 * @param target
 * @param recovery
 * @throws ActionExecutorException
 */
public void move(Context context, XConfiguration fsConf, Path nameNodePath, Path source, Path target,
        boolean recovery) throws ActionExecutorException {
    try {
        source = resolveToFullPath(nameNodePath, source, true);
        validateSameNN(source, target);
        FileSystem fs = getFileSystemFor(source, context, fsConf);
        Path[] pathArr = FileUtil.stat2Paths(fs.globStatus(source));
        if ((pathArr == null || pathArr.length == 0)) {
            if (!recovery) {
                throw new ActionExecutorException(ActionExecutorException.ErrorType.ERROR, "FS006",
                        "move, source path [{0}] does not exist", source);
            } else {
                return;
            }
        }
        if (pathArr.length > 1 && (!fs.exists(target) || fs.isFile(target))) {
            if (!recovery) {
                throw new ActionExecutorException(ActionExecutorException.ErrorType.ERROR, "FS012",
                        "move, could not rename multiple sources to the same target name");
            } else {
                return;
            }
        }
        checkGlobMax(pathArr);
        for (Path p : pathArr) {
            if (!fs.rename(p, target) && !recovery) {
                throw new ActionExecutorException(ActionExecutorException.ErrorType.ERROR, "FS008",
                        "move, could not move [{0}] to [{1}]", p, target);
            }
        }
    } catch (Exception ex) {
        throw convertException(ex);
    }
}