List of usage examples for org.apache.mahout.common HadoopUtil getCachedFiles
public static Path[] getCachedFiles(Configuration conf) throws IOException
From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.PFPGrowth.java
License:Apache License
/** * Generates the fList from the serialized string representation * /*from w ww . j a v a 2 s .c o m*/ * @return Deserialized Feature Frequency List */ public static List<Pair<String, Long>> readFList(Configuration conf) throws IOException { List<Pair<String, Long>> list = Lists.newArrayList(); Path[] files = HadoopUtil.getCachedFiles(conf); if (files.length != 1) { throw new IOException("Cannot read Frequency list from Distributed Cache (" + files.length + ')'); } for (Pair<Text, LongWritable> record : new SequenceFileIterable<Text, LongWritable>(files[0], true, conf)) { list.add(new Pair<String, Long>(record.getFirst().toString(), record.getSecond().get())); } return list; }
From source file:com.cg.mapreduce.myfpgrowth.ParallelFPGrowthMapper.java
License:Apache License
/** * Generates the fList from the serialized string representation * //from w w w .ja v a 2 s. co m * @return Deserialized Feature Frequency List */ public List<Pair<String, Long>> readFList(Configuration conf) throws IOException { List<Pair<String, Long>> list = Lists.newArrayList(); Path[] files = HadoopUtil.getCachedFiles(conf); if (files.length != 1) { throw new IOException("Cannot read Frequency list from Distributed Cache (" + files.length + ')'); } for (Pair<Text, LongWritable> record : new SequenceFileIterable<Text, LongWritable>(files[0], true, conf)) { list.add(new Pair<String, Long>(record.getFirst().toString(), record.getSecond().get())); } return list; }
From source file:org.gpfvic.mahout.cf.taste.hadoop.als.ALS.java
License:Apache License
public static OpenIntObjectHashMap<Vector> readMatrixByRowsFromDistributedCache(int numEntities, Configuration conf) throws IOException { IntWritable rowIndex = new IntWritable(); VectorWritable row = new VectorWritable(); OpenIntObjectHashMap<Vector> featureMatrix = numEntities > 0 ? new OpenIntObjectHashMap<Vector>(numEntities) : new OpenIntObjectHashMap<Vector>(); Path[] cachedFiles = HadoopUtil.getCachedFiles(conf); LocalFileSystem localFs = FileSystem.getLocal(conf); for (Path cachedFile : cachedFiles) { try (SequenceFile.Reader reader = new SequenceFile.Reader(localFs, cachedFile, conf)) { while (reader.next(rowIndex, row)) { featureMatrix.put(rowIndex.get(), row.get()); }//from www. j a v a2s . c om } } Preconditions.checkState(!featureMatrix.isEmpty(), "Feature matrix is empty"); return featureMatrix; }