Example usage for org.apache.mahout.common HadoopUtil getSingleCachedFile

List of usage examples for org.apache.mahout.common HadoopUtil getSingleCachedFile

Introduction

In this page you can find the example usage for org.apache.mahout.common HadoopUtil getSingleCachedFile.

Prototype

public static Path getSingleCachedFile(Configuration conf) throws IOException 

Source Link

Document

Return the first cached file in the list, else null if thre are no cached files.

Usage

From source file:com.netease.news.classifier.naivebayes.BayesTestMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();
    Path modelPath = HadoopUtil.getSingleCachedFile(conf);
    NaiveBayesModel model = NaiveBayesModel.materialize(modelPath, conf);
    boolean compl = Boolean.parseBoolean(conf.get(TestNaiveBayesDriver.COMPLEMENTARY));
    if (compl) {/*w w w.  ja  v a  2s .c  om*/
        classifier = new ComplementaryNaiveBayesClassifier(model);
    } else {
        classifier = new StandardNaiveBayesClassifier(model);
    }
}

From source file:edu.rosehulman.TFPartialVectorReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();

    dimension = conf.getInt(PartialVectorMerger.DIMENSION, Integer.MAX_VALUE);
    sequentialAccess = conf.getBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, false);
    namedVector = conf.getBoolean(PartialVectorMerger.NAMED_VECTOR, false);
    maxNGramSize = conf.getInt(DictionaryVectorizer.MAX_NGRAMS, maxNGramSize);

    //MAHOUT-1247
    Path dictionaryFile = HadoopUtil.getSingleCachedFile(conf);
    // key is word value is id
    for (Pair<Writable, IntWritable> record : new SequenceFileIterable<Writable, IntWritable>(dictionaryFile,
            true, conf)) {//from ww w .  j  a v  a2s . c o m
        dictionary.put(record.getFirst().toString(), record.getSecond().get());
    }
}

From source file:mlbench.bayes.BayesUtils.java

License:Apache License

public static OpenObjectIntHashMap<String> readIndexFromCache(Configuration conf) throws IOException {
    OpenObjectIntHashMap<String> index = new OpenObjectIntHashMap<String>();
    for (Pair<Writable, IntWritable> entry : new SequenceFileIterable<Writable, IntWritable>(
            HadoopUtil.getSingleCachedFile(conf), conf)) {
        index.put(entry.getFirst().toString(), entry.getSecond().get());
    }//www  .j  a  va 2  s  . com
    return index;
}