Example usage for org.apache.mahout.common HadoopUtil getCachedFiles

List of usage examples for org.apache.mahout.common HadoopUtil getCachedFiles

Introduction

In this page you can find the example usage for org.apache.mahout.common HadoopUtil getCachedFiles.

Prototype

public static Path[] getCachedFiles(Configuration conf) throws IOException 

Source Link

Document

Retrieves paths to cached files.

Usage

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.PFPGrowth.java

License:Apache License

/**
 * Generates the fList from the serialized string representation
 * /*from  w  ww  . j a v  a 2  s .c  o  m*/
 * @return Deserialized Feature Frequency List
 */
public static List<Pair<String, Long>> readFList(Configuration conf) throws IOException {
    List<Pair<String, Long>> list = Lists.newArrayList();

    Path[] files = HadoopUtil.getCachedFiles(conf);
    if (files.length != 1) {
        throw new IOException("Cannot read Frequency list from Distributed Cache (" + files.length + ')');
    }

    for (Pair<Text, LongWritable> record : new SequenceFileIterable<Text, LongWritable>(files[0], true, conf)) {
        list.add(new Pair<String, Long>(record.getFirst().toString(), record.getSecond().get()));
    }
    return list;
}

From source file:com.cg.mapreduce.myfpgrowth.ParallelFPGrowthMapper.java

License:Apache License

/**
 * Generates the fList from the serialized string representation
 * //from w w w .ja v a  2 s. co m
 * @return Deserialized Feature Frequency List
 */
public List<Pair<String, Long>> readFList(Configuration conf) throws IOException {
    List<Pair<String, Long>> list = Lists.newArrayList();

    Path[] files = HadoopUtil.getCachedFiles(conf);
    if (files.length != 1) {
        throw new IOException("Cannot read Frequency list from Distributed Cache (" + files.length + ')');
    }

    for (Pair<Text, LongWritable> record : new SequenceFileIterable<Text, LongWritable>(files[0], true, conf)) {
        list.add(new Pair<String, Long>(record.getFirst().toString(), record.getSecond().get()));
    }
    return list;
}

From source file:org.gpfvic.mahout.cf.taste.hadoop.als.ALS.java

License:Apache License

public static OpenIntObjectHashMap<Vector> readMatrixByRowsFromDistributedCache(int numEntities,
        Configuration conf) throws IOException {

    IntWritable rowIndex = new IntWritable();
    VectorWritable row = new VectorWritable();

    OpenIntObjectHashMap<Vector> featureMatrix = numEntities > 0 ? new OpenIntObjectHashMap<Vector>(numEntities)
            : new OpenIntObjectHashMap<Vector>();

    Path[] cachedFiles = HadoopUtil.getCachedFiles(conf);
    LocalFileSystem localFs = FileSystem.getLocal(conf);

    for (Path cachedFile : cachedFiles) {
        try (SequenceFile.Reader reader = new SequenceFile.Reader(localFs, cachedFile, conf)) {
            while (reader.next(rowIndex, row)) {
                featureMatrix.put(rowIndex.get(), row.get());
            }//from www.  j  a  v  a2s  . c  om
        }
    }

    Preconditions.checkState(!featureMatrix.isEmpty(), "Feature matrix is empty");
    return featureMatrix;
}