List of usage examples for org.apache.hadoop.io LongWritable get
public long get()
From source file:org.apache.mahout.feature.mrmr.MRMRMapper.java
License:Apache License
/** * @param index it's the column index of the original dataset * @param vector values of the column identified by index param *//* ww w. j a v a 2 s . c o m*/ public void map(LongWritable index, Text record, Context context) throws IOException, InterruptedException { if (index.get() == 0) { return; } ArrayList<String> Sindex = new ArrayList<String>(); String[] StringSetS = listSetS.toArray(new String[0]); for (int i = 0; i < StringSetS.length; i++) { String[] feature = StringSetS[i].split("\t"); Sindex.add(feature[1]); // feature[1] is the index } String[] values = record.toString().split(","); for (int i = 0; i < columnNumber; i++) { // i is the index of the candidate feature if (Sindex.contains("" + i) || i == targetIndex) continue; keyOut.set(i); textOut.set(values[i] + "," + values[targetIndex] + ",t"); context.write(keyOut, textOut); //System.out.println("-- "+keyOut.toString()+", "+textOut.toString()); for (int j = 0; j < columnNumber; j++) { // j is the index of the already selected feature if (!Sindex.contains("" + j)) continue; keyOut.set(i); textOut.set(values[i] + "," + values[j] + ",f," + j); context.write(keyOut, textOut); //System.out.println("-- "+keyOut.toString()+", "+textOut.toString()); } } }
From source file:org.apache.mahout.fpm.pfpgrowth.ParallelFPGrowthReducer.java
License:Apache License
@Override protected void reduce(LongWritable key, Iterable<TransactionTree> values, Context context) throws IOException { TransactionTree cTree = new TransactionTree(); int nodes = 0; for (TransactionTree tr : values) { Iterator<Pair<List<Integer>, Long>> it = tr.getIterator(); while (it.hasNext()) { Pair<List<Integer>, Long> p = it.next(); nodes += cTree.addPattern(p.getFirst(), p.getSecond()); }// w w w. ja v a 2 s . c o m } List<Pair<Integer, Long>> localFList = new ArrayList<Pair<Integer, Long>>(); for (Entry<Integer, MutableLong> fItem : cTree.generateFList().entrySet()) { localFList.add(new Pair<Integer, Long>(fItem.getKey(), fItem.getValue().toLong())); } Collections.sort(localFList, new Comparator<Pair<Integer, Long>>() { @Override public int compare(Pair<Integer, Long> o1, Pair<Integer, Long> o2) { int ret = o2.getSecond().compareTo(o1.getSecond()); if (ret != 0) { return ret; } return o1.getFirst().compareTo(o2.getFirst()); } }); FPGrowth<Integer> fpGrowth = new FPGrowth<Integer>(); fpGrowth.generateTopKFrequentPatterns(cTree.getIterator(), localFList, minSupport, maxHeapSize, new HashSet<Integer>(groupFeatures.get(key.get()).toList()), new IntegerStringOutputConverter( new ContextWriteOutputCollector<LongWritable, TransactionTree, Text, TopKStringPatterns>( context), featureReverseMap), new ContextStatusUpdater<LongWritable, TransactionTree, Text, TopKStringPatterns>(context)); }
From source file:org.apache.mahout.fpm.pfpgrowth.PFPGrowth.java
License:Apache License
/** * read the feature frequency List which is built at the end of the Parallel counting job * /*from ww w . j a va 2s . co m*/ * @param params * @return Feature Frequency List * @throws IOException */ public static List<Pair<String, Long>> readFList(Parameters params) throws IOException { Writable key = new Text(); LongWritable value = new LongWritable(); int minSupport = Integer.valueOf(params.get("minSupport", "3")); Configuration conf = new Configuration(); Path parallelCountingPath = new Path(params.get("output"), "parallelcounting"); FileSystem fs = FileSystem.get(parallelCountingPath.toUri(), conf); FileStatus[] outputFiles = fs.globStatus(new Path(parallelCountingPath, "part-*")); PriorityQueue<Pair<String, Long>> queue = new PriorityQueue<Pair<String, Long>>(11, new Comparator<Pair<String, Long>>() { @Override public int compare(Pair<String, Long> o1, Pair<String, Long> o2) { int ret = o2.getSecond().compareTo(o1.getSecond()); if (ret != 0) { return ret; } return o1.getFirst().compareTo(o2.getFirst()); } }); for (FileStatus fileStatus : outputFiles) { Path path = fileStatus.getPath(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); // key is feature value is count while (reader.next(key, value)) { if (value.get() >= minSupport) { queue.add(new Pair<String, Long>(key.toString(), value.get())); } } } List<Pair<String, Long>> fList = new ArrayList<Pair<String, Long>>(); while (!queue.isEmpty()) { fList.add(queue.poll()); } return fList; }
From source file:org.apache.mahout.ga.watchmaker.cd.hadoop.CDMapperTest.java
License:Apache License
public void testMap() throws Exception { EasyMock.replay(rule);// w w w . j a va2s. c o m EasyMock.replay(dl); // create and configure the mapper CDMapper mapper = new CDMapper(); List<Rule> rules = Arrays.asList(rule, rule, rule, rule); mapper.configure(rules, 1); // test the mapper DummyOutputCollector<LongWritable, CDFitness> collector = new DummyOutputCollector<LongWritable, CDFitness>(); for (int index1 = 0; index1 < mapper.rules.size(); index1++) { CDFitness eval1 = CDMapper.evaluate(mapper.target, mapper.rules.get(index1).classify(dl), dl.getLabel()); collector.collect(new LongWritable(index1), eval1); } // check the evaluations Set<LongWritable> keys = collector.getKeys(); assertEquals("Number of evaluations", rules.size(), keys.size()); CDFitness[] expected = { TP, FP, TN, FN }; for (LongWritable key : keys) { int index = (int) key.get(); assertEquals("Values for key " + key, 1, collector.getValue(key).size()); CDFitness eval = collector.getValue(key).get(0); assertEquals("Evaluation of the rule " + key, expected[index], eval); } EasyMock.verify(rule); EasyMock.verify(dl); }
From source file:org.apache.mahout.ga.watchmaker.cd.tool.ToolCombiner.java
License:Apache License
@Override protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { context.write(key, new Text(createDescription((int) key.get(), values.iterator()))); }
From source file:org.apache.mahout.ga.watchmaker.cd.tool.ToolReducer.java
License:Apache License
@Override protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { context.write(key, new Text(combineDescriptions((int) key.get(), values.iterator()))); }
From source file:org.apache.mahout.ga.watchmaker.EvalMapperTest.java
License:Apache License
public void testMap() throws Exception { // population to evaluate int populationSize = 100; List<DummyCandidate> population = DummyCandidate.generatePopulation(populationSize); // fitness evaluator DummyEvaluator.clearEvaluations();/*from w ww . j a va 2 s . c o m*/ FitnessEvaluator<DummyCandidate> evaluator = new DummyEvaluator(); // Mapper EvalMapper mapper = new EvalMapper(); Configuration conf = new Configuration(); conf.set(EvalMapper.MAHOUT_GA_EVALUATOR, StringUtils.toString(evaluator)); DummyRecordWriter<LongWritable, DoubleWritable> output = new DummyRecordWriter<LongWritable, DoubleWritable>(); Mapper<LongWritable, Text, LongWritable, DoubleWritable>.Context context = DummyRecordWriter.build(mapper, conf, output); mapper.setup(context); // evaluate the population using the mapper for (int index = 0; index < population.size(); index++) { DummyCandidate candidate = population.get(index); mapper.map(new LongWritable(index), new Text(StringUtils.toString(candidate)), context); } // check that the evaluations are correct Set<LongWritable> keys = output.getKeys(); assertEquals("Number of evaluations", populationSize, keys.size()); for (LongWritable key : keys) { DummyCandidate candidate = population.get((int) key.get()); assertEquals("Values for key " + key, 1, output.getValue(key).size()); double fitness = output.getValue(key).get(0).get(); assertEquals("Evaluation of the candidate " + key, DummyEvaluator.getFitness(candidate.getIndex()), fitness); } }
From source file:org.apache.mahout.regression.extractor.FeatureExtractorMapper.java
License:Apache License
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { if (key.get() != 0L) { String[] numberString = value.toString().trim().split(pattern); try {// w w w . j a va 2 s . c o m int size = 1; if (interactionPairList != null) { size += interactionPairList.size(); } if (independentID != null) { size += independentID.length; } Vector result = (Vector) constructor.newInstance(size); result.set(0, Double.valueOf(numberString[dependentID])); int index = 1; if (independentID != null) { for (int i = 0; i < independentID.length; ++i) { result.set(index++, Double.valueOf(numberString[independentID[i]])); } } if (interactionPairList != null) { for (int i = 0; i < interactionPairList.size(); ++i) { Pair<Integer, Integer> pair = interactionPairList.get(i); result.set(index++, Double.valueOf(numberString[pair.getFirst()]) * Double.valueOf(numberString[pair.getSecond()])); } } VectorWritable vectorWritable = new VectorWritable(result); context.write(new Text(String.valueOf(index)), vectorWritable); } catch (InstantiationException e) { throw new IllegalStateException(e); } catch (IllegalAccessException e) { throw new IllegalStateException(e); } catch (InvocationTargetException e) { throw new IllegalStateException(e); } } }
From source file:org.apache.mahout.utils.vectors.text.term.TermCountReducer.java
License:Apache License
@Override protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { long sum = 0; for (LongWritable value : values) { sum += value.get(); }//from ww w . java 2s .co m if (sum >= minSupport) { context.write(key, new LongWritable(sum)); } }
From source file:org.apache.mahout.utils.vectors.text.term.TermDocumentCountReducer.java
License:Apache License
@Override protected void reduce(IntWritable key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { long sum = 0; for (LongWritable value : values) { sum += value.get(); }/*w ww . j a v a 2 s .c o m*/ context.write(key, new LongWritable(sum)); }