Example usage for org.apache.hadoop.io IntWritable get

List of usage examples for org.apache.hadoop.io IntWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable get.

Prototype

public int get() 

Source Link

Document

Return the value of this IntWritable.

Usage

From source file:org.apache.mahout.feature.mrmr.MRMRReducer.java

License:Apache License

public void reduce(IntWritable index, Iterable<Text> items, Context context)
        throws IOException, InterruptedException {

    MatrixList target = new MatrixList();
    ArrayList<MatrixList> features = new ArrayList<MatrixList>();

    for (Text item : items) {
        String[] values = item.toString().split(",");

        int candidateValue = Integer.parseInt(values[0]);
        String type = values[2];/*from  w  ww  .  ja  va2s  .  c  o m*/

        if (type.equals("t")) {

            int targetValue = Integer.parseInt(values[1]);
            target.store(candidateValue, targetValue);

        } else if (type.equals("f")) {

            int featureValue = Integer.parseInt(values[1]);
            String featureName = values[3];

            boolean isNew = true;
            for (MatrixList matrix : features) {
                if (matrix.getName().equals(featureName)) {
                    isNew = false;
                    matrix.store(candidateValue, featureValue);
                    break;
                }
            }
            if (isNew) {
                MatrixList matrix = new MatrixList();
                matrix.setName(featureName);
                matrix.store(candidateValue, featureValue);
                features.add(matrix);
            }

        }
    }

    MutualInformation mi = new MutualInformation();

    double sum_features = 0.0;
    for (MatrixList f : features) {
        sum_features = sum_features + mi.computeResult(f);
    }

    double sum_target = mi.computeResult(target);

    double coefficient = 1.0;
    if (features.size() > 1)
        coefficient = (1.0 / ((double) features.size()));
    double correlation = sum_target - (coefficient * sum_features);

    context.write(new LongWritable(0), new Text(index.get() + "," + String.format("%.5f", correlation)));
}

From source file:org.apache.mahout.fpm.bigfim.AprioriPhaseReducer.java

License:Apache License

@Override
public void reduce(Text key, Iterable<IntWritable> values, Context context)
        throws IOException, InterruptedException {
    int sup = 0;/*  w ww . j  a v  a2s  .c  om*/
    for (IntWritable localSup : values) {
        sup += localSup.get();
    }

    if (sup >= minSup) {
        context.write(key, new Text(sup + ""));
    }
}

From source file:org.apache.mahout.freqtermsets.ParallelFPGrowthReducer.java

License:Apache License

@Override
protected void reduce(IntWritable key, Iterable<TransactionTree> values, Context context) throws IOException {
    TransactionTree cTree = new TransactionTree();
    for (TransactionTree tr : values) {
        for (Pair<IntArrayList, Long> p : tr) {
            cTree.addPattern(p.getFirst(), p.getSecond());
        }/*from   ww  w .  j a va2s.  c o  m*/
    }

    List<Pair<Integer, Long>> localFList = Lists.newArrayList();
    for (Entry<Integer, MutableLong> fItem : cTree.generateFList().entrySet()) {
        localFList.add(new Pair<Integer, Long>(fItem.getKey(), fItem.getValue().toLong()));
    }

    Collections.sort(localFList, new CountDescendingPairComparator<Integer, Long>());

    //    if (useFP2) {
    //      org.apache.mahout.freqtermsets.fpgrowth2.FPGrowthIds fpGrowth = 
    //        new org.apache.mahout.freqtermsets.fpgrowth2.FPGrowthIds(featureReverseMap);
    //      fpGrowth.generateTopKFrequentPatterns(
    //          cTree.iterator(),
    //          freqList,
    //          minSupport,
    //          maxHeapSize,
    //          PFPGrowth.getGroupMembers(key.get(), maxPerGroup, numFeatures),
    //          new IntegerStringOutputConverter(
    //              new ContextWriteOutputCollector<IntWritable,TransactionTree,Text,TopKStringPatterns>(context),
    //              featureReverseMap,minWordsForLangDetection/*, superiorityRatio*/),
    //          new ContextStatusUpdater<IntWritable,TransactionTree,Text,TopKStringPatterns>(context));
    //    } else {
    FPGrowth<Integer> fpGrowth = new FPGrowth<Integer>();
    fpGrowth.generateTopKFrequentPatterns(
            //          new IteratorAdapter(cTree.iterator()),
            cTree, localFList, minSupport, maxHeapSize,
            new HashSet<Integer>(PFPGrowth.getGroupMembers(key.get(), maxPerGroup, numFeatures).toList()),
            new IntegerStringOutputConverter(
                    new ContextWriteOutputCollector<IntWritable, TransactionTree, Text, TopKStringPatterns>(
                            context),
                    featureReverseMap, minWordsForLangDetection/*, superiorityRatio*/, repeatHashTag),
            new ContextStatusUpdater<IntWritable, TransactionTree, Text, TopKStringPatterns>(context), -1, -1); //those will not be used as long as there is something in the returnable features
    //    }
}

From source file:org.apache.mahout.freqtermsets.ParallelFPStreamReducer.java

License:Apache License

@Override
protected void reduce(IntWritable key, Iterable<TransactionTree> values, Context context) throws IOException {

    TransactionTree cTree = new TransactionTree();
    int numPatterns = 0;
    for (TransactionTree tr : values) {
        for (Pair<IntArrayList, Long> p : tr) {
            cTree.addPattern(p.getFirst(), p.getSecond());
            ++numPatterns;//w ww. j a va 2 s .co m
        }
    }

    //    if (fisIxMultiReader != null) {
    if (fisIxReader != null) {
        BooleanQuery.setMaxClauseCount(numPatterns);
        BooleanQuery allPatternsQuery = new BooleanQuery();

        Iterator<Pair<IntArrayList, Long>> cTreeIter = cTree.iterator(true);
        while (cTreeIter.hasNext()) {
            IntArrayList newPatternIds = cTreeIter.next().getFirst();
            if (newPatternIds.size() == 1) {
                // This is already carried over by loading the older flists
                continue;
            }
            StringBuilder newPattenStr = new StringBuilder();
            for (int i = 0; i < newPatternIds.size(); ++i) {
                int id = newPatternIds.getQuick(i);
                String str = idStringMap.get(id);
                newPattenStr.append(str).append(" ");
            }
            try {
                allPatternsQuery.add(fisQparser.parse(newPattenStr.toString()), Occur.SHOULD);
                // fisSearcher.search(fisQparser.parse(newPattenStr.toString()),oldPatternsCollector);
            } catch (ParseException e) {
                context.setStatus("Parallel FPGrowth: caught a parse exception: " + e.getMessage());
                continue;
            }
        }

        fisSearcher.search(allPatternsQuery, new OldPatternsCollector(context, cTree));

    }

    List<Pair<Integer, Long>> localFList = Lists.newArrayList();
    for (Entry<Integer, MutableLong> fItem : cTree.generateFList().entrySet()) {
        localFList.add(new Pair<Integer, Long>(fItem.getKey(), fItem.getValue().toLong()));
    }

    Collections.sort(localFList, new CountDescendingPairComparator<Integer, Long>());

    FPGrowth<Integer> fpGrowth = new FPGrowth<Integer>();
    fpGrowth.generateTopKFrequentPatterns(
            //new IteratorAdapter(cTree.iterator()),
            cTree, localFList, minSupport, maxHeapSize, null,
            new IntegerStringOutputConverter(
                    new ContextWriteOutputCollector<IntWritable, TransactionTree, Text, TopKStringPatterns>(
                            context),
                    idStringMap, minWordsForLangDetection, repeatHashTag),
            new ContextStatusUpdater<IntWritable, TransactionTree, Text, TopKStringPatterns>(context),
            key.get(), numGroups);

}

From source file:org.apache.mahout.math.hadoop.decomposer.HdfsBackedLanczosState.java

License:Apache License

protected Vector fetchVector(Path p, int keyIndex) throws IOException {
    if (!fs.exists(p)) {
        return null;
    }/*  w w w  . j a va  2  s .c  o  m*/
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, p, conf);
    IntWritable key = new IntWritable();
    VectorWritable vw = new VectorWritable();
    while (reader.next(key, vw)) {
        if (key.get() == keyIndex) {
            return vw.get();
        }
    }
    return null;
}

From source file:org.apache.mahout.math.hadoop.MathHelper.java

License:Apache License

/**
 * read a {@link Matrix} from a SequenceFile<IntWritable,VectorWritable>
 *//*ww  w  . j  a  va  2  s  .  c  o  m*/
public static Matrix readMatrix(Configuration conf, Path path, int rows, int columns) {
    boolean readOneRow = false;
    Matrix matrix = new DenseMatrix(rows, columns);
    for (Pair<IntWritable, VectorWritable> record : new SequenceFileIterable<IntWritable, VectorWritable>(path,
            true, conf)) {
        IntWritable key = record.getFirst();
        VectorWritable value = record.getSecond();
        readOneRow = true;
        int row = key.get();
        for (Element element : value.get().nonZeroes()) {
            matrix.set(row, element.index(), element.get());
        }
    }
    if (!readOneRow) {
        throw new IllegalStateException("Not a single row read!");
    }
    return matrix;
}

From source file:org.apache.mahout.math.hadoop.MathHelper.java

License:Apache License

/**
 * read a {@link Matrix} from a SequenceFile<IntWritable,VectorWritable>
 *//*from   w w  w  .  j a v  a2 s  . c o m*/
public static OpenIntObjectHashMap<Vector> readMatrixRows(Configuration conf, Path path) {
    boolean readOneRow = false;
    OpenIntObjectHashMap<Vector> rows = new OpenIntObjectHashMap<Vector>();
    for (Pair<IntWritable, VectorWritable> record : new SequenceFileIterable<IntWritable, VectorWritable>(path,
            true, conf)) {
        IntWritable key = record.getFirst();
        readOneRow = true;
        rows.put(key.get(), record.getSecond().get());
    }
    if (!readOneRow) {
        throw new IllegalStateException("Not a single row read!");
    }
    return rows;
}

From source file:org.apache.mahout.math.hadoop.stats.StandardDeviationCalculatorMapper.java

License:Apache License

@Override
protected void map(IntWritable key, Writable value, Context context) throws IOException, InterruptedException {
    if (key.get() == -1) {
        return;/* w  w  w  .  j a  va 2  s .  c  om*/
    }
    //Kind of ugly, but such is life
    double df = Double.NaN;
    if (value instanceof LongWritable) {
        df = ((LongWritable) value).get();
    } else if (value instanceof DoubleWritable) {
        df = ((DoubleWritable) value).get();
    }
    if (!Double.isNaN(df)) {
        // For calculating the sum of squares
        context.write(SUM_OF_SQUARES, new DoubleWritable(df * df));
        context.write(SUM, new DoubleWritable(df));
        // For calculating the total number of entries
        context.write(TOTAL_COUNT, new DoubleWritable(1));
    }
}

From source file:org.apache.mahout.text.SequenceFilesFromDirectoryMapper.java

License:Apache License

public void map(IntWritable key, BytesWritable value, Context context)
        throws IOException, InterruptedException {

    Configuration configuration = context.getConfiguration();
    Path filePath = ((CombineFileSplit) context.getInputSplit()).getPath(key.get());
    String relativeFilePath = HadoopUtil.calcRelativeFilePath(configuration, filePath);

    String filename = this.keyPrefix.length() > 0 ? this.keyPrefix + Path.SEPARATOR + relativeFilePath
            : Path.SEPARATOR + relativeFilePath;

    fileValue.set(value.getBytes(), 0, value.getBytes().length);
    context.write(new Text(filename), fileValue);
}

From source file:org.apache.mahout.text.SequenceFilesFromMailArchivesMapper.java

License:Apache License

public void map(IntWritable key, BytesWritable value, Context context)
        throws IOException, InterruptedException {
    Configuration configuration = context.getConfiguration();
    Path filePath = ((CombineFileSplit) context.getInputSplit()).getPath(key.get());
    String relativeFilePath = HadoopUtil.calcRelativeFilePath(configuration, filePath);
    ByteArrayInputStream is = new ByteArrayInputStream(value.getBytes());
    parseMailboxLineByLine(relativeFilePath, is, context);
}