Example usage for org.apache.mahout.common.iterator.sequencefile SequenceFileDirIterable SequenceFileDirIterable

List of usage examples for org.apache.mahout.common.iterator.sequencefile SequenceFileDirIterable SequenceFileDirIterable

Introduction

In this page you can find the example usage for org.apache.mahout.common.iterator.sequencefile SequenceFileDirIterable SequenceFileDirIterable.

Prototype

public SequenceFileDirIterable(Path path, PathType pathType, PathFilter filter, Configuration conf) 

Source Link

Usage

From source file:com.ikanow.infinit.e.processing.custom.utils.HadoopUtils.java

License:Open Source License

public static BasicDBList getBsonFromSequenceFile(CustomMapReduceJobPojo cmr, int nLimit, String fields)
        throws SAXException, IOException, ParserConfigurationException {

    BasicDBList dbl = new BasicDBList();

    PropertiesManager props = new PropertiesManager();
    Configuration conf = getConfiguration(props);

    Path pathDir = HadoopUtils.getPathForJob(cmr, conf, false);

    @SuppressWarnings({ "unchecked", "rawtypes" })
    SequenceFileDirIterable<? extends Writable, ? extends Writable> seqFileDir = new SequenceFileDirIterable(
            pathDir, PathType.LIST, PathFilters.logsCRCFilter(), conf);

    // Very basic, only allow top level, 1 level of nesting, and field removal
    HashSet<String> fieldLookup = null;
    if (null != fields) {
        fieldLookup = new HashSet<String>();
        String[] fieldArray = fields.split(",");
        for (String field : fieldArray) {
            String[] fieldDecomp = field.split(":");
            fieldLookup.add(fieldDecomp[0]);
        }/*from  ww w.j a  va  2  s.  co m*/
    } //TOTEST

    int nRecords = 0;
    for (Pair<? extends Writable, ? extends Writable> record : seqFileDir) {
        BasicDBObject element = new BasicDBObject();

        // KEY

        Writable key = record.getFirst();
        if (key instanceof org.apache.hadoop.io.Text) {
            org.apache.hadoop.io.Text writable = (org.apache.hadoop.io.Text) key;
            element.put("key", writable.toString());
        } else if (key instanceof org.apache.hadoop.io.DoubleWritable) {
            org.apache.hadoop.io.DoubleWritable writable = (org.apache.hadoop.io.DoubleWritable) key;
            element.put("key", Double.toString(writable.get()));
        } else if (key instanceof org.apache.hadoop.io.IntWritable) {
            org.apache.hadoop.io.IntWritable writable = (org.apache.hadoop.io.IntWritable) key;
            element.put("key", Integer.toString(writable.get()));
        } else if (key instanceof org.apache.hadoop.io.LongWritable) {
            org.apache.hadoop.io.LongWritable writable = (org.apache.hadoop.io.LongWritable) key;
            element.put("key", Long.toString(writable.get()));
        } else if (key instanceof BSONWritable) {
            element.put("key", MongoDbUtil.convert((BSONWritable) key));
        }

        // VALUE

        Writable value = record.getSecond();
        if (value instanceof org.apache.hadoop.io.Text) {
            org.apache.hadoop.io.Text writable = (org.apache.hadoop.io.Text) value;
            element.put("value", writable.toString());
        } else if (value instanceof org.apache.hadoop.io.DoubleWritable) {
            org.apache.hadoop.io.DoubleWritable writable = (org.apache.hadoop.io.DoubleWritable) value;
            element.put("value", Double.toString(writable.get()));
        } else if (value instanceof org.apache.hadoop.io.IntWritable) {
            org.apache.hadoop.io.IntWritable writable = (org.apache.hadoop.io.IntWritable) value;
            element.put("value", Integer.toString(writable.get()));
        } else if (value instanceof org.apache.hadoop.io.LongWritable) {
            org.apache.hadoop.io.LongWritable writable = (org.apache.hadoop.io.LongWritable) value;
            element.put("value", Long.toString(writable.get()));
        } else if (value instanceof BSONWritable) {
            element.put("value", MongoDbUtil.convert((BSONWritable) value));
        } else if (value instanceof org.apache.mahout.math.VectorWritable) {
            Vector vec = ((org.apache.mahout.math.VectorWritable) value).get();
            BasicDBList dbl2 = listFromMahoutVector(vec, "value", element);
            element.put("value", dbl2);
        } else if (value instanceof org.apache.mahout.clustering.classify.WeightedVectorWritable) {
            org.apache.mahout.clustering.classify.WeightedVectorWritable vecW = (org.apache.mahout.clustering.classify.WeightedVectorWritable) value;
            element.put("valueWeight", vecW.getWeight());
            BasicDBList dbl2 = listFromMahoutVector(vecW.getVector(), "value", element);
            element.put("value", dbl2);
        } else if (value instanceof org.apache.mahout.clustering.iterator.ClusterWritable) {
            Cluster cluster = ((org.apache.mahout.clustering.iterator.ClusterWritable) value).getValue();
            BasicDBObject clusterVal = new BasicDBObject();
            clusterVal.put("center", listFromMahoutVector(cluster.getCenter(), "center", clusterVal));
            clusterVal.put("radius", listFromMahoutVector(cluster.getRadius(), "radius", clusterVal));
            element.put("value", clusterVal);
        } else {
            element.put("unknownValue", value.getClass().toString());
        }

        // Check the fields settings:
        // Only handle a few...
        if (null != fieldLookup) {
            for (String fieldToRemove : fieldLookup) {
                if (fieldToRemove.startsWith("value.")) {
                    fieldToRemove = fieldToRemove.substring(6);
                    BasicDBObject nested = (BasicDBObject) element.get("value.");
                    if (null != nested) {
                        nested.remove(fieldToRemove);
                    }
                } else {
                    element.remove(fieldToRemove);
                }
            } //TOTEST
        }

        dbl.add(element);
        nRecords++;
        if ((nLimit > 0) && (nRecords >= nLimit)) {
            break;
        }
    }

    return dbl;
}

From source file:com.luca.filipponi.tweetAnalysis.SentimentClassifier.CustomTestNaiveBayesDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    addInputOption();//from  ww w . ja v  a 2s  .c  o m
    addOutputOption();
    addOption(addOption(DefaultOptionCreator.overwriteOption().create()));
    addOption("model", "m", "The path to the model built during training", true);
    addOption(
            buildOption("testComplementary", "c", "test complementary?", false, false, String.valueOf(false)));
    addOption(buildOption("runSequential", "seq", "run sequential?", false, false, String.valueOf(false)));
    addOption("labelIndex", "l", "The path to the location of the label index", true);
    Map<String, List<String>> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }
    if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
        HadoopUtil.delete(getConf(), getOutputPath());
    }

    boolean complementary = hasOption("testComplementary");
    boolean sequential = hasOption("runSequential");
    if (sequential) {
        FileSystem fs = FileSystem.get(getConf());
        NaiveBayesModel model = NaiveBayesModel.materialize(new Path(getOption("model")), getConf());
        AbstractNaiveBayesClassifier classifier;
        if (complementary) {
            classifier = new ComplementaryNaiveBayesClassifier(model);
        } else {
            classifier = new StandardNaiveBayesClassifier(model);
        }
        SequenceFile.Writer writer = new SequenceFile.Writer(fs, getConf(), getOutputPath(), Text.class,
                VectorWritable.class);
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, getInputPath(), getConf());
        Text key = new Text();
        VectorWritable vw = new VectorWritable();
        while (reader.next(key, vw)) {
            writer.append(new Text(SLASH.split(key.toString())[1]),
                    new VectorWritable(classifier.classifyFull(vw.get())));
        }
        writer.close();
        reader.close();
    } else {
        boolean succeeded = runMapReduce(parsedArgs);
        if (!succeeded) {
            return -1;
        }
    }

    //load the labels
    Map<Integer, String> labelMap = BayesUtils.readLabelIndex(getConf(), new Path(getOption("labelIndex")));

    //loop over the results and create the confusion matrix
    SequenceFileDirIterable<Text, VectorWritable> dirIterable = new SequenceFileDirIterable<Text, VectorWritable>(
            getOutputPath(), PathType.LIST, PathFilters.partFilter(), getConf());
    ResultAnalyzer analyzer = new ResultAnalyzer(labelMap.values(), "DEFAULT");
    analyzeResults(labelMap, dirIterable, analyzer);

    log.info("{} Results: {}", complementary ? "Complementary" : "Standard NB", analyzer);
    return 0;
}

From source file:com.missionsky.scp.dataanalysis.mahout.TestNaiveBayesDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    addInputOption();/* ww w .  j a v a 2s.  c o m*/
    addOutputOption();
    addOption(addOption(DefaultOptionCreator.overwriteOption().create()));
    addOption("model", "m", "The path to the model built during training", true);
    addOption(
            buildOption("testComplementary", "c", "test complementary?", false, false, String.valueOf(false)));
    addOption(buildOption("runSequential", "seq", "run sequential?", false, false, String.valueOf(false)));
    addOption("labelIndex", "l", "The path to the location of the label index", true);
    Map<String, List<String>> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }
    if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
        HadoopUtil.delete(getConf(), getOutputPath());
    }

    boolean complementary = hasOption("testComplementary");
    boolean sequential = hasOption("runSequential");
    if (sequential) {
        FileSystem fs = FileSystem.get(getConf());
        NaiveBayesModel model = NaiveBayesModel.materialize(new Path(getOption("model")), getConf());
        AbstractNaiveBayesClassifier classifier;
        if (complementary) {
            classifier = new ComplementaryNaiveBayesClassifier(model);
        } else {
            classifier = new StandardNaiveBayesClassifier(model);
        }
        SequenceFile.Writer writer = new SequenceFile.Writer(fs, getConf(), getOutputPath(), Text.class,
                VectorWritable.class);
        Reader reader = new Reader(fs, getInputPath(), getConf());
        Text key = new Text();
        VectorWritable vw = new VectorWritable();
        while (reader.next(key, vw)) {
            writer.append(new Text(SLASH.split(key.toString())[1]),
                    new VectorWritable(classifier.classifyFull(vw.get())));
        }
        writer.close();
        reader.close();
    } else {
        boolean succeeded = runMapReduce(parsedArgs);
        if (!succeeded) {
            return -1;
        }
    }

    //load the labels
    Map<Integer, String> labelMap = BayesUtils.readLabelIndex(getConf(), new Path(getOption("labelIndex")));

    //loop over the results and create the confusion matrix
    SequenceFileDirIterable<Text, VectorWritable> dirIterable = new SequenceFileDirIterable<Text, VectorWritable>(
            getOutputPath(), PathType.LIST, PathFilters.partFilter(), getConf());
    ResultAnalyzer analyzer = new ResultAnalyzer(labelMap.values(), "DEFAULT");
    analyzeResults(labelMap, dirIterable, analyzer);

    log.info("{} Results: {}", complementary ? "Complementary" : "Standard NB", analyzer);
    return 0;
}

From source file:com.netease.news.classifier.naivebayes.TrainNaiveBayesJob.java

License:Apache License

private long createLabelIndex(Path labPath) throws IOException {
    long labelSize = 0;
    if (hasOption(LABELS)) {
        Iterable<String> labels = Splitter.on(",").split(getOption(LABELS));
        labelSize = BayesUtils.writeLabelIndex(getConf(), labels, labPath);
    } else if (hasOption(EXTRACT_LABELS)) {
        Iterable<Pair<Text, IntWritable>> iterable = new SequenceFileDirIterable<Text, IntWritable>(
                getInputPath(), PathType.LIST, PathFilters.logsCRCFilter(), getConf());
        labelSize = BayesUtils.writeLabelIndex(getConf(), labPath, iterable);
    }/* w ww .j  a  va 2s  . c om*/
    return labelSize;
}

From source file:de.tuberlin.dima.recsys.ssnmm.ratingprediction.Evaluate.java

License:Apache License

public static void main(String[] args) throws IOException {

    int numUsers = 1823179;
    int numItems = 136736;
    double mu = 3.157255412010664;

    String distributedSimilarityMatrixPath = "/home/ssc/Desktop/yahoo/similarityMatrix/";
    String itemBiasesFilePath = "/home/ssc/Desktop/yahoo/itemBiases.tsv";
    String userBiasesFilePath = "/home/ssc/Desktop/yahoo/userBiases.tsv";
    String trainingSetPath = "/home/ssc/Entwicklung/datasets/yahoo-songs/songs.tsv";
    String holdoutSetPath = "home/ssc/Entwicklung/datasets/yahoo-songs/holdout.tsv";

    Matrix similarities = new SparseRowMatrix(numItems, numItems);

    System.out.println("Reading similarities...");
    int similaritiesRead = 0;
    Configuration conf = new Configuration();
    for (Pair<IntWritable, VectorWritable> pair : new SequenceFileDirIterable<IntWritable, VectorWritable>(
            new Path(distributedSimilarityMatrixPath), PathType.LIST, PathFilters.partFilter(), conf)) {

        int item = pair.getFirst().get();
        Iterator<Vector.Element> elements = pair.getSecond().get().iterateNonZero();

        while (elements.hasNext()) {
            Vector.Element elem = elements.next();
            similarities.setQuick(item, elem.index(), elem.get());
            similaritiesRead++;//www  .j a  v  a2  s.c  om
        }
    }
    System.out.println("Found " + similaritiesRead + " similarities");

    Pattern sep = Pattern.compile("\t");

    double[] itemBiases = new double[numItems];
    double[] userBiases = new double[numUsers];

    System.out.println("Reading item biases");
    for (String line : new FileLineIterable(new File(itemBiasesFilePath))) {
        String[] parts = sep.split(line);
        itemBiases[Integer.parseInt(parts[0])] = Double.parseDouble(parts[1]);
    }

    System.out.println("Reading user biases");
    for (String line : new FileLineIterable(new File(userBiasesFilePath))) {
        String[] parts = sep.split(line);
        userBiases[Integer.parseInt(parts[0])] = Double.parseDouble(parts[1]);
    }

    Iterator<Rating> trainRatings = new RatingsIterable(new File(trainingSetPath)).iterator();
    Iterator<Rating> heldOutRatings = new RatingsIterable(new File(holdoutSetPath)).iterator();

    int currentUser = 0;
    OpenIntDoubleHashMap prefs = new OpenIntDoubleHashMap();

    int usersProcessed = 0;
    RunningAverage rmse = new FullRunningAverage();
    RunningAverage mae = new FullRunningAverage();

    RunningAverage rmseBase = new FullRunningAverage();
    RunningAverage maeBase = new FullRunningAverage();

    while (trainRatings.hasNext()) {
        Rating rating = trainRatings.next();
        if (rating.user() != currentUser) {

            for (int n = 0; n < 10; n++) {
                Rating heldOutRating = heldOutRatings.next();
                Preconditions.checkState(heldOutRating.user() == currentUser);

                double preference = 0.0;
                double totalSimilarity = 0.0;
                int count = 0;

                Iterator<Vector.Element> similarItems = similarities.viewRow(heldOutRating.item())
                        .iterateNonZero();
                while (similarItems.hasNext()) {
                    Vector.Element similarity = similarItems.next();
                    int similarItem = similarity.index();
                    if (prefs.containsKey(similarItem)) {
                        preference += similarity.get() * (prefs.get(similarItem)
                                - (mu + userBiases[currentUser] + itemBiases[similarItem]));
                        totalSimilarity += Math.abs(similarity.get());
                        count++;

                    }
                }

                double baselineEstimate = mu + userBiases[currentUser] + itemBiases[heldOutRating.item()];
                double estimate = baselineEstimate;

                if (count > 1) {
                    estimate += preference / totalSimilarity;
                }

                double baseError = Math.abs(heldOutRating.rating() - baselineEstimate);
                maeBase.addDatum(baseError);
                rmseBase.addDatum(baseError * baseError);

                double error = Math.abs(heldOutRating.rating() - estimate);
                mae.addDatum(error);
                rmse.addDatum(error * error);

            }

            if (++usersProcessed % 10000 == 0) {
                System.out.println(usersProcessed + " users processed, MAE " + mae.getAverage() + ", RMSE "
                        + Math.sqrt(rmse.getAverage()) + " | baseline MAE " + maeBase.getAverage()
                        + ", baseline RMSE " + Math.sqrt(rmseBase.getAverage()));
            }

            currentUser = rating.user();
            prefs.clear();

        }
        prefs.put(rating.item(), rating.rating());

    }

    System.out.println(usersProcessed + " users processed, MAE " + mae.getAverage() + ", RMSE "
            + Math.sqrt(rmse.getAverage()) + " | baseline MAE " + maeBase.getAverage() + ", baseline RMSE "
            + Math.sqrt(rmseBase.getAverage()));
}

From source file:mlbench.bayes.BayesUtils.java

License:Apache License

public static NaiveBayesModel readModelFromDir(Path base, Configuration conf) {

    float alphaI = conf.getFloat(ThetaMapper.ALPHA_I, 1.0f);

    // read feature sums and label sums
    Vector scoresPerLabel = null;
    Vector scoresPerFeature = null;
    for (Pair<Text, VectorWritable> record : new SequenceFileDirIterable<Text, VectorWritable>(
            new Path(base, TrainNaiveBayesJob.WEIGHTS), PathType.LIST, PathFilters.partFilter(), conf)) {
        String key = record.getFirst().toString();
        VectorWritable value = record.getSecond();
        if (key.equals(TrainNaiveBayesJob.WEIGHTS_PER_FEATURE)) {
            scoresPerFeature = value.get();
        } else if (key.equals(TrainNaiveBayesJob.WEIGHTS_PER_LABEL)) {
            scoresPerLabel = value.get();
        }/*w  w w . ja v  a  2 s  .  co  m*/
    }

    // Preconditions.checkNotNull(scoresPerFeature);
    // Preconditions.checkNotNull(scoresPerLabel);

    Matrix scoresPerLabelAndFeature = new SparseMatrix(scoresPerLabel.size(), scoresPerFeature.size());
    for (Pair<IntWritable, VectorWritable> entry : new SequenceFileDirIterable<IntWritable, VectorWritable>(
            new Path(base, TrainNaiveBayesJob.SUMMED_OBSERVATIONS), PathType.LIST, PathFilters.partFilter(),
            conf)) {
        scoresPerLabelAndFeature.assignRow(entry.getFirst().get(), entry.getSecond().get());
    }

    Vector perlabelThetaNormalizer = scoresPerLabel.like();
    /*
     * for (Pair<Text,VectorWritable> entry : new
     * SequenceFileDirIterable<Text,VectorWritable>( new Path(base,
     * TrainNaiveBayesJob.THETAS), PathType.LIST, PathFilters.partFilter(),
     * conf)) { if (entry.getFirst().toString().equals(TrainNaiveBayesJob.
     * LABEL_THETA_NORMALIZER)) { perlabelThetaNormalizer =
     * entry.getSecond().get(); } }
     * 
     * Preconditions.checkNotNull(perlabelThetaNormalizer);
     */
    return new NaiveBayesModel(scoresPerLabelAndFeature, scoresPerFeature, scoresPerLabel,
            perlabelThetaNormalizer, alphaI, false);
}

From source file:mlbench.bayes.test.BayesTest.java

License:Apache License

@SuppressWarnings("deprecation")
public static void main(String[] args) throws MPI_D_Exception, IOException, MPIException {
    parseArgs(args);//from   w  w w . j  ava2  s.  c  o  m
    HashMap<String, String> conf = new HashMap<String, String>();
    initConf(conf);
    MPI_D.Init(args, MPI_D.Mode.Common, conf);

    if (MPI_D.COMM_BIPARTITE_O != null) {
        rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
        size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);
        NaiveBayesModel model = NaiveBayesModel.materialize(modelPath, config);
        classifier = new StandardNaiveBayesClassifier(model);

        MPI_D.COMM_BIPARTITE_O.Barrier();
        FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O,
                (JobConf) config, inDir, rank);

        for (int i = 0; i < inputs.length; i++) {
            FileSplit fsplit = inputs[i];
            SequenceFileRecordReader<Text, VectorWritable> kvrr = new SequenceFileRecordReader<>(config,
                    fsplit);
            Text key = kvrr.createKey();
            VectorWritable value = kvrr.createValue();
            while (kvrr.next(key, value)) {
                Vector result = classifier.classifyFull(value.get());
                MPI_D.Send(new Text(SLASH.split(key.toString())[1]), new VectorWritable(result));
            }
        }
    } else if (MPI_D.COMM_BIPARTITE_A != null) {
        int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A);
        config.set(MAPRED_OUTPUT_DIR, outDir);
        config.set("mapred.task.id", DataMPIUtil.getHadoopTaskAttemptID().toString().toString());
        ((JobConf) config).setOutputKeyClass(Text.class);
        ((JobConf) config).setOutputValueClass(VectorWritable.class);
        TaskAttemptContext taskContext = new TaskAttemptContextImpl(config,
                DataMPIUtil.getHadoopTaskAttemptID());
        SequenceFileOutputFormat<Text, VectorWritable> outfile = new SequenceFileOutputFormat<>();
        FileSystem fs = FileSystem.get(config);

        Path output = new Path(config.get(MAPRED_OUTPUT_DIR));
        FileOutputCommitter fcommitter = new FileOutputCommitter(output, taskContext);
        RecordWriter<Text, VectorWritable> outrw = null;
        try {
            fcommitter.setupJob(taskContext);
            outrw = outfile.getRecordWriter(fs, (JobConf) config, getOutputName(rank), null);
        } catch (IOException e) {
            e.printStackTrace();
            System.err.println("ERROR: Please set the HDFS configuration properly\n");
            System.exit(-1);
        }
        Text key = null;
        VectorWritable point = null;
        Vector vector = null;
        Object[] vals = MPI_D.Recv();
        while (vals != null) {
            key = (Text) vals[0];
            point = (VectorWritable) vals[1];
            if (key != null && point != null) {
                vector = point.get();
                outrw.write(key, new VectorWritable(vector));
            }
            vals = MPI_D.Recv();
        }
        outrw.close(null);
        if (fcommitter.needsTaskCommit(taskContext)) {
            fcommitter.commitTask(taskContext);
        }

        MPI_D.COMM_BIPARTITE_A.Barrier();
        if (rank == 0) {
            // load the labels
            Map<Integer, String> labelMap = BayesUtils.readLabelIndex(config, labPath);
            // loop over the results and create the confusion matrix
            SequenceFileDirIterable<Text, VectorWritable> dirIterable = new SequenceFileDirIterable<Text, VectorWritable>(
                    output, PathType.LIST, PathFilters.partFilter(), config);
            ResultAnalyzer analyzer = new ResultAnalyzer(labelMap.values(), "DEFAULT");
            analyzeResults(labelMap, dirIterable, analyzer);
        }
    }
    MPI_D.Finalize();
}

From source file:mlbench.bayes.train.IndexInstances.java

License:Apache License

private static long createLabelIndex(Path labPath) throws IOException {
    long labelSize = 0;
    Iterable<Pair<Text, IntWritable>> iterable = new SequenceFileDirIterable<Text, IntWritable>(inPath,
            PathType.LIST, PathFilters.logsCRCFilter(), config);
    labelSize = BayesUtils.writeLabelIndex(config, labPath, iterable);
    return labelSize;
}

From source file:org.gpfvic.mahout.cf.taste.hadoop.als.ALS.java

License:Apache License

public static OpenIntObjectHashMap<Vector> readMatrixByRows(Path dir, Configuration conf) {
    OpenIntObjectHashMap<Vector> matrix = new OpenIntObjectHashMap<>();
    for (Pair<IntWritable, VectorWritable> pair : new SequenceFileDirIterable<IntWritable, VectorWritable>(dir,
            PathType.LIST, PathFilters.partFilter(), conf)) {
        int rowIndex = pair.getFirst().get();
        Vector row = pair.getSecond().get();
        matrix.put(rowIndex, row);/*from  ww w .  j  a va  2  s  . c o m*/
    }
    return matrix;
}

From source file:org.gpfvic.mahout.cf.taste.hadoop.als.FactorizationEvaluator.java

License:Apache License

double computeRmse(Path errors) {
    RunningAverage average = new FullRunningAverage();
    for (Pair<DoubleWritable, NullWritable> entry : new SequenceFileDirIterable<DoubleWritable, NullWritable>(
            errors, PathType.LIST, PathFilters.logsCRCFilter(), getConf())) {
        DoubleWritable error = entry.getFirst();
        average.addDatum(error.get() * error.get());
    }/*from  ww w  . ja  v a 2 s .c  om*/

    return Math.sqrt(average.getAverage());
}