Example usage for org.apache.hadoop.io LongWritable get

List of usage examples for org.apache.hadoop.io LongWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io LongWritable get.

Prototype

public long get() 

Source Link

Document

Return the value of this LongWritable.

Usage

From source file:org.apache.mahout.feature.mrmr.MRMRMapper.java

License:Apache License

/**
 * @param index         it's the column index of the original dataset
 * @param vector      values of the column identified by index param
 *//*  ww  w. j  a v  a  2 s . c  o  m*/
public void map(LongWritable index, Text record, Context context) throws IOException, InterruptedException {

    if (index.get() == 0) {
        return;
    }

    ArrayList<String> Sindex = new ArrayList<String>();
    String[] StringSetS = listSetS.toArray(new String[0]);

    for (int i = 0; i < StringSetS.length; i++) {
        String[] feature = StringSetS[i].split("\t");
        Sindex.add(feature[1]); // feature[1] is the index
    }

    String[] values = record.toString().split(",");
    for (int i = 0; i < columnNumber; i++) {
        // i is the index of the candidate feature
        if (Sindex.contains("" + i) || i == targetIndex)
            continue;

        keyOut.set(i);
        textOut.set(values[i] + "," + values[targetIndex] + ",t");
        context.write(keyOut, textOut);
        //System.out.println("-- "+keyOut.toString()+", "+textOut.toString());

        for (int j = 0; j < columnNumber; j++) {
            // j is the index of the already selected feature
            if (!Sindex.contains("" + j))
                continue;

            keyOut.set(i);
            textOut.set(values[i] + "," + values[j] + ",f," + j);
            context.write(keyOut, textOut);
            //System.out.println("-- "+keyOut.toString()+", "+textOut.toString());
        }
    }
}

From source file:org.apache.mahout.fpm.pfpgrowth.ParallelFPGrowthReducer.java

License:Apache License

@Override
protected void reduce(LongWritable key, Iterable<TransactionTree> values, Context context) throws IOException {
    TransactionTree cTree = new TransactionTree();
    int nodes = 0;
    for (TransactionTree tr : values) {
        Iterator<Pair<List<Integer>, Long>> it = tr.getIterator();
        while (it.hasNext()) {
            Pair<List<Integer>, Long> p = it.next();
            nodes += cTree.addPattern(p.getFirst(), p.getSecond());
        }// w  w w. ja  v  a  2  s  .  c  o m
    }

    List<Pair<Integer, Long>> localFList = new ArrayList<Pair<Integer, Long>>();
    for (Entry<Integer, MutableLong> fItem : cTree.generateFList().entrySet()) {
        localFList.add(new Pair<Integer, Long>(fItem.getKey(), fItem.getValue().toLong()));

    }

    Collections.sort(localFList, new Comparator<Pair<Integer, Long>>() {

        @Override
        public int compare(Pair<Integer, Long> o1, Pair<Integer, Long> o2) {
            int ret = o2.getSecond().compareTo(o1.getSecond());
            if (ret != 0) {
                return ret;
            }
            return o1.getFirst().compareTo(o2.getFirst());
        }

    });

    FPGrowth<Integer> fpGrowth = new FPGrowth<Integer>();
    fpGrowth.generateTopKFrequentPatterns(cTree.getIterator(), localFList, minSupport, maxHeapSize,
            new HashSet<Integer>(groupFeatures.get(key.get()).toList()),
            new IntegerStringOutputConverter(
                    new ContextWriteOutputCollector<LongWritable, TransactionTree, Text, TopKStringPatterns>(
                            context),
                    featureReverseMap),
            new ContextStatusUpdater<LongWritable, TransactionTree, Text, TopKStringPatterns>(context));
}

From source file:org.apache.mahout.fpm.pfpgrowth.PFPGrowth.java

License:Apache License

/**
 * read the feature frequency List which is built at the end of the Parallel counting job
 * /*from  ww w . j  a va 2s .  co  m*/
 * @param params
 * @return Feature Frequency List
 * @throws IOException
 */
public static List<Pair<String, Long>> readFList(Parameters params) throws IOException {
    Writable key = new Text();
    LongWritable value = new LongWritable();
    int minSupport = Integer.valueOf(params.get("minSupport", "3"));
    Configuration conf = new Configuration();

    Path parallelCountingPath = new Path(params.get("output"), "parallelcounting");
    FileSystem fs = FileSystem.get(parallelCountingPath.toUri(), conf);
    FileStatus[] outputFiles = fs.globStatus(new Path(parallelCountingPath, "part-*"));

    PriorityQueue<Pair<String, Long>> queue = new PriorityQueue<Pair<String, Long>>(11,
            new Comparator<Pair<String, Long>>() {

                @Override
                public int compare(Pair<String, Long> o1, Pair<String, Long> o2) {
                    int ret = o2.getSecond().compareTo(o1.getSecond());
                    if (ret != 0) {
                        return ret;
                    }
                    return o1.getFirst().compareTo(o2.getFirst());
                }

            });
    for (FileStatus fileStatus : outputFiles) {
        Path path = fileStatus.getPath();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
        // key is feature value is count
        while (reader.next(key, value)) {
            if (value.get() >= minSupport) {
                queue.add(new Pair<String, Long>(key.toString(), value.get()));
            }
        }
    }
    List<Pair<String, Long>> fList = new ArrayList<Pair<String, Long>>();
    while (!queue.isEmpty()) {
        fList.add(queue.poll());
    }
    return fList;
}

From source file:org.apache.mahout.ga.watchmaker.cd.hadoop.CDMapperTest.java

License:Apache License

public void testMap() throws Exception {
    EasyMock.replay(rule);// w w w  .  j a va2s. c  o  m
    EasyMock.replay(dl);

    // create and configure the mapper
    CDMapper mapper = new CDMapper();
    List<Rule> rules = Arrays.asList(rule, rule, rule, rule);
    mapper.configure(rules, 1);

    // test the mapper
    DummyOutputCollector<LongWritable, CDFitness> collector = new DummyOutputCollector<LongWritable, CDFitness>();
    for (int index1 = 0; index1 < mapper.rules.size(); index1++) {
        CDFitness eval1 = CDMapper.evaluate(mapper.target, mapper.rules.get(index1).classify(dl),
                dl.getLabel());
        collector.collect(new LongWritable(index1), eval1);
    }

    // check the evaluations
    Set<LongWritable> keys = collector.getKeys();
    assertEquals("Number of evaluations", rules.size(), keys.size());

    CDFitness[] expected = { TP, FP, TN, FN };
    for (LongWritable key : keys) {
        int index = (int) key.get();
        assertEquals("Values for key " + key, 1, collector.getValue(key).size());
        CDFitness eval = collector.getValue(key).get(0);

        assertEquals("Evaluation of the rule " + key, expected[index], eval);
    }

    EasyMock.verify(rule);
    EasyMock.verify(dl);
}

From source file:org.apache.mahout.ga.watchmaker.cd.tool.ToolCombiner.java

License:Apache License

@Override
protected void reduce(LongWritable key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
    context.write(key, new Text(createDescription((int) key.get(), values.iterator())));
}

From source file:org.apache.mahout.ga.watchmaker.cd.tool.ToolReducer.java

License:Apache License

@Override
protected void reduce(LongWritable key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
    context.write(key, new Text(combineDescriptions((int) key.get(), values.iterator())));
}

From source file:org.apache.mahout.ga.watchmaker.EvalMapperTest.java

License:Apache License

public void testMap() throws Exception {
    // population to evaluate
    int populationSize = 100;
    List<DummyCandidate> population = DummyCandidate.generatePopulation(populationSize);

    // fitness evaluator
    DummyEvaluator.clearEvaluations();/*from  w ww . j a va 2 s  .  c  o m*/
    FitnessEvaluator<DummyCandidate> evaluator = new DummyEvaluator();

    // Mapper
    EvalMapper mapper = new EvalMapper();
    Configuration conf = new Configuration();
    conf.set(EvalMapper.MAHOUT_GA_EVALUATOR, StringUtils.toString(evaluator));
    DummyRecordWriter<LongWritable, DoubleWritable> output = new DummyRecordWriter<LongWritable, DoubleWritable>();
    Mapper<LongWritable, Text, LongWritable, DoubleWritable>.Context context = DummyRecordWriter.build(mapper,
            conf, output);

    mapper.setup(context);

    // evaluate the population using the mapper
    for (int index = 0; index < population.size(); index++) {
        DummyCandidate candidate = population.get(index);
        mapper.map(new LongWritable(index), new Text(StringUtils.toString(candidate)), context);
    }

    // check that the evaluations are correct
    Set<LongWritable> keys = output.getKeys();
    assertEquals("Number of evaluations", populationSize, keys.size());
    for (LongWritable key : keys) {
        DummyCandidate candidate = population.get((int) key.get());
        assertEquals("Values for key " + key, 1, output.getValue(key).size());
        double fitness = output.getValue(key).get(0).get();
        assertEquals("Evaluation of the candidate " + key, DummyEvaluator.getFitness(candidate.getIndex()),
                fitness);
    }
}

From source file:org.apache.mahout.regression.extractor.FeatureExtractorMapper.java

License:Apache License

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    if (key.get() != 0L) {
        String[] numberString = value.toString().trim().split(pattern);
        try {// w  w  w  . j a va  2  s  .  c o m
            int size = 1;
            if (interactionPairList != null) {
                size += interactionPairList.size();
            }
            if (independentID != null) {
                size += independentID.length;
            }
            Vector result = (Vector) constructor.newInstance(size);
            result.set(0, Double.valueOf(numberString[dependentID]));
            int index = 1;
            if (independentID != null) {
                for (int i = 0; i < independentID.length; ++i) {
                    result.set(index++, Double.valueOf(numberString[independentID[i]]));
                }
            }
            if (interactionPairList != null) {
                for (int i = 0; i < interactionPairList.size(); ++i) {
                    Pair<Integer, Integer> pair = interactionPairList.get(i);
                    result.set(index++, Double.valueOf(numberString[pair.getFirst()])
                            * Double.valueOf(numberString[pair.getSecond()]));
                }
            }
            VectorWritable vectorWritable = new VectorWritable(result);
            context.write(new Text(String.valueOf(index)), vectorWritable);
        } catch (InstantiationException e) {
            throw new IllegalStateException(e);
        } catch (IllegalAccessException e) {
            throw new IllegalStateException(e);
        } catch (InvocationTargetException e) {
            throw new IllegalStateException(e);
        }
    }
}

From source file:org.apache.mahout.utils.vectors.text.term.TermCountReducer.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<LongWritable> values, Context context)
        throws IOException, InterruptedException {
    long sum = 0;
    for (LongWritable value : values) {
        sum += value.get();
    }//from   ww w . java  2s  .co m
    if (sum >= minSupport) {
        context.write(key, new LongWritable(sum));
    }
}

From source file:org.apache.mahout.utils.vectors.text.term.TermDocumentCountReducer.java

License:Apache License

@Override
protected void reduce(IntWritable key, Iterable<LongWritable> values, Context context)
        throws IOException, InterruptedException {
    long sum = 0;
    for (LongWritable value : values) {
        sum += value.get();
    }/*w  ww .  j  a v  a 2  s .c  o  m*/
    context.write(key, new LongWritable(sum));
}