List of usage examples for org.apache.hadoop.io IntWritable get
public int get()
From source file:org.apache.mahout.feature.mrmr.MRMRReducer.java
License:Apache License
public void reduce(IntWritable index, Iterable<Text> items, Context context) throws IOException, InterruptedException { MatrixList target = new MatrixList(); ArrayList<MatrixList> features = new ArrayList<MatrixList>(); for (Text item : items) { String[] values = item.toString().split(","); int candidateValue = Integer.parseInt(values[0]); String type = values[2];/*from w ww . ja va2s . c o m*/ if (type.equals("t")) { int targetValue = Integer.parseInt(values[1]); target.store(candidateValue, targetValue); } else if (type.equals("f")) { int featureValue = Integer.parseInt(values[1]); String featureName = values[3]; boolean isNew = true; for (MatrixList matrix : features) { if (matrix.getName().equals(featureName)) { isNew = false; matrix.store(candidateValue, featureValue); break; } } if (isNew) { MatrixList matrix = new MatrixList(); matrix.setName(featureName); matrix.store(candidateValue, featureValue); features.add(matrix); } } } MutualInformation mi = new MutualInformation(); double sum_features = 0.0; for (MatrixList f : features) { sum_features = sum_features + mi.computeResult(f); } double sum_target = mi.computeResult(target); double coefficient = 1.0; if (features.size() > 1) coefficient = (1.0 / ((double) features.size())); double correlation = sum_target - (coefficient * sum_features); context.write(new LongWritable(0), new Text(index.get() + "," + String.format("%.5f", correlation))); }
From source file:org.apache.mahout.fpm.bigfim.AprioriPhaseReducer.java
License:Apache License
@Override public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sup = 0;/* w ww . j a v a2s .c om*/ for (IntWritable localSup : values) { sup += localSup.get(); } if (sup >= minSup) { context.write(key, new Text(sup + "")); } }
From source file:org.apache.mahout.freqtermsets.ParallelFPGrowthReducer.java
License:Apache License
@Override protected void reduce(IntWritable key, Iterable<TransactionTree> values, Context context) throws IOException { TransactionTree cTree = new TransactionTree(); for (TransactionTree tr : values) { for (Pair<IntArrayList, Long> p : tr) { cTree.addPattern(p.getFirst(), p.getSecond()); }/*from ww w . j a va2s. c o m*/ } List<Pair<Integer, Long>> localFList = Lists.newArrayList(); for (Entry<Integer, MutableLong> fItem : cTree.generateFList().entrySet()) { localFList.add(new Pair<Integer, Long>(fItem.getKey(), fItem.getValue().toLong())); } Collections.sort(localFList, new CountDescendingPairComparator<Integer, Long>()); // if (useFP2) { // org.apache.mahout.freqtermsets.fpgrowth2.FPGrowthIds fpGrowth = // new org.apache.mahout.freqtermsets.fpgrowth2.FPGrowthIds(featureReverseMap); // fpGrowth.generateTopKFrequentPatterns( // cTree.iterator(), // freqList, // minSupport, // maxHeapSize, // PFPGrowth.getGroupMembers(key.get(), maxPerGroup, numFeatures), // new IntegerStringOutputConverter( // new ContextWriteOutputCollector<IntWritable,TransactionTree,Text,TopKStringPatterns>(context), // featureReverseMap,minWordsForLangDetection/*, superiorityRatio*/), // new ContextStatusUpdater<IntWritable,TransactionTree,Text,TopKStringPatterns>(context)); // } else { FPGrowth<Integer> fpGrowth = new FPGrowth<Integer>(); fpGrowth.generateTopKFrequentPatterns( // new IteratorAdapter(cTree.iterator()), cTree, localFList, minSupport, maxHeapSize, new HashSet<Integer>(PFPGrowth.getGroupMembers(key.get(), maxPerGroup, numFeatures).toList()), new IntegerStringOutputConverter( new ContextWriteOutputCollector<IntWritable, TransactionTree, Text, TopKStringPatterns>( context), featureReverseMap, minWordsForLangDetection/*, superiorityRatio*/, repeatHashTag), new ContextStatusUpdater<IntWritable, TransactionTree, Text, TopKStringPatterns>(context), -1, -1); //those will not be used as long as there is something in the returnable features // } }
From source file:org.apache.mahout.freqtermsets.ParallelFPStreamReducer.java
License:Apache License
@Override protected void reduce(IntWritable key, Iterable<TransactionTree> values, Context context) throws IOException { TransactionTree cTree = new TransactionTree(); int numPatterns = 0; for (TransactionTree tr : values) { for (Pair<IntArrayList, Long> p : tr) { cTree.addPattern(p.getFirst(), p.getSecond()); ++numPatterns;//w ww. j a va 2 s .co m } } // if (fisIxMultiReader != null) { if (fisIxReader != null) { BooleanQuery.setMaxClauseCount(numPatterns); BooleanQuery allPatternsQuery = new BooleanQuery(); Iterator<Pair<IntArrayList, Long>> cTreeIter = cTree.iterator(true); while (cTreeIter.hasNext()) { IntArrayList newPatternIds = cTreeIter.next().getFirst(); if (newPatternIds.size() == 1) { // This is already carried over by loading the older flists continue; } StringBuilder newPattenStr = new StringBuilder(); for (int i = 0; i < newPatternIds.size(); ++i) { int id = newPatternIds.getQuick(i); String str = idStringMap.get(id); newPattenStr.append(str).append(" "); } try { allPatternsQuery.add(fisQparser.parse(newPattenStr.toString()), Occur.SHOULD); // fisSearcher.search(fisQparser.parse(newPattenStr.toString()),oldPatternsCollector); } catch (ParseException e) { context.setStatus("Parallel FPGrowth: caught a parse exception: " + e.getMessage()); continue; } } fisSearcher.search(allPatternsQuery, new OldPatternsCollector(context, cTree)); } List<Pair<Integer, Long>> localFList = Lists.newArrayList(); for (Entry<Integer, MutableLong> fItem : cTree.generateFList().entrySet()) { localFList.add(new Pair<Integer, Long>(fItem.getKey(), fItem.getValue().toLong())); } Collections.sort(localFList, new CountDescendingPairComparator<Integer, Long>()); FPGrowth<Integer> fpGrowth = new FPGrowth<Integer>(); fpGrowth.generateTopKFrequentPatterns( //new IteratorAdapter(cTree.iterator()), cTree, localFList, minSupport, maxHeapSize, null, new IntegerStringOutputConverter( new ContextWriteOutputCollector<IntWritable, TransactionTree, Text, TopKStringPatterns>( context), idStringMap, minWordsForLangDetection, repeatHashTag), new ContextStatusUpdater<IntWritable, TransactionTree, Text, TopKStringPatterns>(context), key.get(), numGroups); }
From source file:org.apache.mahout.math.hadoop.decomposer.HdfsBackedLanczosState.java
License:Apache License
protected Vector fetchVector(Path p, int keyIndex) throws IOException { if (!fs.exists(p)) { return null; }/* w w w . j a va 2 s .c o m*/ SequenceFile.Reader reader = new SequenceFile.Reader(fs, p, conf); IntWritable key = new IntWritable(); VectorWritable vw = new VectorWritable(); while (reader.next(key, vw)) { if (key.get() == keyIndex) { return vw.get(); } } return null; }
From source file:org.apache.mahout.math.hadoop.MathHelper.java
License:Apache License
/** * read a {@link Matrix} from a SequenceFile<IntWritable,VectorWritable> *//*ww w . j a va 2 s . c o m*/ public static Matrix readMatrix(Configuration conf, Path path, int rows, int columns) { boolean readOneRow = false; Matrix matrix = new DenseMatrix(rows, columns); for (Pair<IntWritable, VectorWritable> record : new SequenceFileIterable<IntWritable, VectorWritable>(path, true, conf)) { IntWritable key = record.getFirst(); VectorWritable value = record.getSecond(); readOneRow = true; int row = key.get(); for (Element element : value.get().nonZeroes()) { matrix.set(row, element.index(), element.get()); } } if (!readOneRow) { throw new IllegalStateException("Not a single row read!"); } return matrix; }
From source file:org.apache.mahout.math.hadoop.MathHelper.java
License:Apache License
/** * read a {@link Matrix} from a SequenceFile<IntWritable,VectorWritable> *//*from w w w . j a v a2 s . c o m*/ public static OpenIntObjectHashMap<Vector> readMatrixRows(Configuration conf, Path path) { boolean readOneRow = false; OpenIntObjectHashMap<Vector> rows = new OpenIntObjectHashMap<Vector>(); for (Pair<IntWritable, VectorWritable> record : new SequenceFileIterable<IntWritable, VectorWritable>(path, true, conf)) { IntWritable key = record.getFirst(); readOneRow = true; rows.put(key.get(), record.getSecond().get()); } if (!readOneRow) { throw new IllegalStateException("Not a single row read!"); } return rows; }
From source file:org.apache.mahout.math.hadoop.stats.StandardDeviationCalculatorMapper.java
License:Apache License
@Override protected void map(IntWritable key, Writable value, Context context) throws IOException, InterruptedException { if (key.get() == -1) { return;/* w w w . j a va 2 s . c om*/ } //Kind of ugly, but such is life double df = Double.NaN; if (value instanceof LongWritable) { df = ((LongWritable) value).get(); } else if (value instanceof DoubleWritable) { df = ((DoubleWritable) value).get(); } if (!Double.isNaN(df)) { // For calculating the sum of squares context.write(SUM_OF_SQUARES, new DoubleWritable(df * df)); context.write(SUM, new DoubleWritable(df)); // For calculating the total number of entries context.write(TOTAL_COUNT, new DoubleWritable(1)); } }
From source file:org.apache.mahout.text.SequenceFilesFromDirectoryMapper.java
License:Apache License
public void map(IntWritable key, BytesWritable value, Context context) throws IOException, InterruptedException { Configuration configuration = context.getConfiguration(); Path filePath = ((CombineFileSplit) context.getInputSplit()).getPath(key.get()); String relativeFilePath = HadoopUtil.calcRelativeFilePath(configuration, filePath); String filename = this.keyPrefix.length() > 0 ? this.keyPrefix + Path.SEPARATOR + relativeFilePath : Path.SEPARATOR + relativeFilePath; fileValue.set(value.getBytes(), 0, value.getBytes().length); context.write(new Text(filename), fileValue); }
From source file:org.apache.mahout.text.SequenceFilesFromMailArchivesMapper.java
License:Apache License
public void map(IntWritable key, BytesWritable value, Context context) throws IOException, InterruptedException { Configuration configuration = context.getConfiguration(); Path filePath = ((CombineFileSplit) context.getInputSplit()).getPath(key.get()); String relativeFilePath = HadoopUtil.calcRelativeFilePath(configuration, filePath); ByteArrayInputStream is = new ByteArrayInputStream(value.getBytes()); parseMailboxLineByLine(relativeFilePath, is, context); }