Example usage for org.apache.hadoop.io LongWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io LongWritable get.

Prototype

public long get()

Source Link

Document

Return the value of this LongWritable.

Usage

From source file:org.apache.kylin.storage.hbase.steps.RangeKeyDistributionReducer.java

License:Apache License

@Override
public void doReduce(Text key, Iterable<LongWritable> values, Context context)
        throws IOException, InterruptedException {
    for (LongWritable v : values) {
        bytesRead += v.get();
    }//w  w w  . ja  v a  2 s . c o  m

    if (bytesRead >= ONE_GIGA_BYTES) {
        gbPoints.add(new Text(key));
        bytesRead = 0; // reset bytesRead
    }
}

From source file:org.apache.mahout.cf.taste.hadoop.RecommenderMapper.java

License:Apache License

@Override
protected void map(LongWritable key, LongWritable value, Context context)
        throws IOException, InterruptedException {
    long userID = value.get();
    List<RecommendedItem> recommendedItems;
    try {/*from  w w  w  . j  av  a2s . co  m*/
        recommendedItems = recommender.recommend(userID, recommendationsPerUser);
    } catch (TasteException te) {
        throw new RuntimeException(te);
    }
    RecommendedItemsWritable writable = new RecommendedItemsWritable(recommendedItems);
    context.write(value, writable);
    context.getCounter(ReducerMetrics.USERS_PROCESSED).increment(1L);
    context.getCounter(ReducerMetrics.RECOMMENDATIONS_MADE).increment(recommendedItems.size());
}

From source file:org.apache.mahout.clustering.spectral.AffinityMatrixInputMapper.java

License:Apache License

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

    String[] elements = COMMA_PATTERN.split(value.toString());
    log.debug("(DEBUG - MAP) Key[{}], Value[{}]", key.get(), value);

    // enforce well-formed textual representation of the graph
    if (elements.length != 3) {
        throw new IOException(
                "Expected input of length 3, received " + elements.length + ". Please make sure you adhere to "
                        + "the structure of (i,j,value) for representing a graph in text. "
                        + "Input line was: '" + value + "'.");
    }/*from w w  w . jav  a2s  .  c o  m*/
    if (elements[0].isEmpty() || elements[1].isEmpty() || elements[2].isEmpty()) {
        throw new IOException("Found an element of 0 length. Please be sure you adhere to the structure of "
                + "(i,j,value) for  representing a graph in text.");
    }

    // parse the line of text into a DistributedRowMatrix entry,
    // making the row (elements[0]) the key to the Reducer, and
    // setting the column (elements[1]) in the entry itself
    DistributedRowMatrix.MatrixEntryWritable toAdd = new DistributedRowMatrix.MatrixEntryWritable();
    IntWritable row = new IntWritable(Integer.valueOf(elements[0]));
    toAdd.setRow(-1); // already set as the Reducer's key
    toAdd.setCol(Integer.valueOf(elements[1]));
    toAdd.setVal(Double.valueOf(elements[2]));
    context.write(row, toAdd);
}

From source file:org.apache.mahout.df.mapred.partial.Step0JobTest.java

License:Apache License

public void testStep0Mapper() throws Exception {
    Random rng = RandomUtils.getRandom();

    // create a dataset large enough to be split up
    String descriptor = Utils.randomDescriptor(rng, numAttributes);
    double[][] source = Utils.randomDoubles(rng, descriptor, numInstances);
    String[] sData = Utils.double2String(source);

    // write the data to a file
    Path dataPath = Utils.writeDataToTestFile(sData);

    JobConf job = new JobConf();
    job.setNumMapTasks(numMaps);//from w  ww .  j  av a  2 s  .co m

    FileInputFormat.setInputPaths(job, dataPath);

    // retrieve the splits
    TextInputFormat input = (TextInputFormat) job.getInputFormat();
    InputSplit[] splits = input.getSplits(job, numMaps);

    InputSplit[] sorted = Arrays.copyOf(splits, splits.length);
    Builder.sortSplits(sorted);

    Step0OutputCollector collector = new Step0OutputCollector(numMaps);
    Reporter reporter = Reporter.NULL;

    for (int p = 0; p < numMaps; p++) {
        InputSplit split = sorted[p];
        RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter);

        LongWritable key = reader.createKey();
        Text value = reader.createValue();

        Step0Mapper mapper = new Step0Mapper();
        mapper.configure(p);

        Long firstKey = null;
        int size = 0;

        while (reader.next(key, value)) {
            if (firstKey == null) {
                firstKey = key.get();
            }

            mapper.map(key, value, collector, reporter);

            size++;
        }

        mapper.close();

        // validate the mapper's output
        assertEquals(p, collector.keys[p]);
        assertEquals(firstKey.longValue(), collector.values[p].getFirstId());
        assertEquals(size, collector.values[p].getSize());
    }

}

From source file:org.apache.mahout.df.mapred.partial.Step0JobTest.java

License:Apache License

public void testProcessOutput() throws Exception {
    Random rng = RandomUtils.getRandom();

    // create a dataset large enough to be split up
    String descriptor = Utils.randomDescriptor(rng, numAttributes);
    double[][] source = Utils.randomDoubles(rng, descriptor, numInstances);

    // each instance label is its index in the dataset
    int labelId = Utils.findLabel(descriptor);
    for (int index = 0; index < numInstances; index++) {
        source[index][labelId] = index;//w ww  .  j  a  v a 2 s  . c  o  m
    }

    String[] sData = Utils.double2String(source);

    // write the data to a file
    Path dataPath = Utils.writeDataToTestFile(sData);

    // prepare a data converter
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    DataConverter converter = new DataConverter(dataset);

    JobConf job = new JobConf();
    job.setNumMapTasks(numMaps);
    FileInputFormat.setInputPaths(job, dataPath);

    // retrieve the splits
    TextInputFormat input = (TextInputFormat) job.getInputFormat();
    InputSplit[] splits = input.getSplits(job, numMaps);

    InputSplit[] sorted = Arrays.copyOf(splits, splits.length);
    Builder.sortSplits(sorted);

    Reporter reporter = Reporter.NULL;

    int[] keys = new int[numMaps];
    Step0Output[] values = new Step0Output[numMaps];

    int[] expectedIds = new int[numMaps];

    for (int p = 0; p < numMaps; p++) {
        InputSplit split = sorted[p];
        RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter);

        LongWritable key = reader.createKey();
        Text value = reader.createValue();

        Long firstKey = null;
        int size = 0;

        while (reader.next(key, value)) {
            if (firstKey == null) {
                firstKey = key.get();
                expectedIds[p] = converter.convert(0, value.toString()).label;
            }

            size++;
        }

        keys[p] = p;
        values[p] = new Step0Output(firstKey, size);
    }

    Step0Output[] partitions = Step0Job.processOutput(keys, values);

    int[] actualIds = Step0Output.extractFirstIds(partitions);

    assertTrue("Expected: " + Arrays.toString(expectedIds) + " But was: " + Arrays.toString(actualIds),
            Arrays.equals(expectedIds, actualIds));
}

From source file:org.apache.mahout.df.mapred.partial.Step1Mapper.java

License:Apache License

@Override
public void map(LongWritable key, Text value, OutputCollector<TreeID, MapredOutput> output, Reporter reporter)
        throws IOException {
    if (this.output == null) {
        this.output = output;
    }/*from  ww w .j a  va 2s  .c o  m*/

    instances.add(converter.convert((int) key.get(), value.toString()));
}

From source file:org.apache.mahout.df.mapreduce.partial.Step0JobTest.java

License:Apache License

public void testStep0Mapper() throws Exception {
    Random rng = RandomUtils.getRandom();

    // create a dataset large enough to be split up
    String descriptor = Utils.randomDescriptor(rng, NUM_ATTRIBUTES);
    double[][] source = Utils.randomDoubles(rng, descriptor, NUM_INSTANCES);
    String[] sData = Utils.double2String(source);

    // write the data to a file
    Path dataPath = Utils.writeDataToTestFile(sData);

    Job job = new Job();
    job.setInputFormatClass(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, dataPath);

    setMaxSplitSize(job.getConfiguration(), dataPath, NUM_MAPS);

    // retrieve the splits
    TextInputFormat input = new TextInputFormat();
    List<InputSplit> splits = input.getSplits(job);
    assertEquals(NUM_MAPS, splits.size());

    InputSplit[] sorted = new InputSplit[NUM_MAPS];
    splits.toArray(sorted);/*from  ww w.  j a  v  a  2  s.  c o m*/
    Builder.sortSplits(sorted);

    Step0Context context = new Step0Context(new Step0Mapper(), job.getConfiguration(), new TaskAttemptID(),
            NUM_MAPS);

    for (int p = 0; p < NUM_MAPS; p++) {
        InputSplit split = sorted[p];

        RecordReader<LongWritable, Text> reader = input.createRecordReader(split, context);
        reader.initialize(split, context);

        Step0Mapper mapper = new Step0Mapper();
        mapper.configure(p);

        Long firstKey = null;
        int size = 0;

        while (reader.nextKeyValue()) {
            LongWritable key = reader.getCurrentKey();

            if (firstKey == null) {
                firstKey = key.get();
            }

            mapper.map(key, reader.getCurrentValue(), context);

            size++;
        }

        mapper.cleanup(context);

        // validate the mapper's output
        assertEquals(p, context.keys[p]);
        assertEquals(firstKey.longValue(), context.values[p].getFirstId());
        assertEquals(size, context.values[p].getSize());
    }

}

From source file:org.apache.mahout.df.mapreduce.partial.Step0JobTest.java

License:Apache License

public void testProcessOutput() throws Exception {
    Random rng = RandomUtils.getRandom();

    // create a dataset large enough to be split up
    String descriptor = Utils.randomDescriptor(rng, NUM_ATTRIBUTES);
    double[][] source = Utils.randomDoubles(rng, descriptor, NUM_INSTANCES);

    // each instance label is its index in the dataset
    int labelId = Utils.findLabel(descriptor);
    for (int index = 0; index < NUM_INSTANCES; index++) {
        source[index][labelId] = index;//from   w w  w.j  a  v  a  2  s  . c  o m
    }

    String[] sData = Utils.double2String(source);

    // write the data to a file
    Path dataPath = Utils.writeDataToTestFile(sData);

    // prepare a data converter
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    DataConverter converter = new DataConverter(dataset);

    Job job = new Job();
    job.setInputFormatClass(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, dataPath);

    setMaxSplitSize(job.getConfiguration(), dataPath, NUM_MAPS);

    // retrieve the splits
    TextInputFormat input = new TextInputFormat();
    List<InputSplit> splits = input.getSplits(job);
    assertEquals(NUM_MAPS, splits.size());

    InputSplit[] sorted = new InputSplit[NUM_MAPS];
    splits.toArray(sorted);
    Builder.sortSplits(sorted);

    List<Integer> keys = new ArrayList<Integer>();
    List<Step0Output> values = new ArrayList<Step0Output>();

    int[] expectedIds = new int[NUM_MAPS];

    TaskAttemptContext context = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID());

    for (int p = 0; p < NUM_MAPS; p++) {
        InputSplit split = sorted[p];
        RecordReader<LongWritable, Text> reader = input.createRecordReader(split, context);
        reader.initialize(split, context);

        Long firstKey = null;
        int size = 0;

        while (reader.nextKeyValue()) {
            LongWritable key = reader.getCurrentKey();
            Text value = reader.getCurrentValue();

            if (firstKey == null) {
                firstKey = key.get();
                expectedIds[p] = converter.convert(0, value.toString()).getLabel();
            }

            size++;
        }

        keys.add(p);
        values.add(new Step0Output(firstKey, size));
    }

    Step0Output[] partitions = Step0Job.processOutput(keys, values);

    int[] actualIds = Step0Output.extractFirstIds(partitions);

    assertTrue("Expected: " + Arrays.toString(expectedIds) + " But was: " + Arrays.toString(actualIds),
            Arrays.equals(expectedIds, actualIds));
}

From source file:org.apache.mahout.df.mapreduce.partial.Step1Mapper.java

License:Apache License

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    instances.add(converter.convert((int) key.get(), value.toString()));
}

From source file:org.apache.mahout.feature.common.csv.CsvToVectorMapper.java

License:Apache License

public void map(LongWritable key, Text line, Context context) throws IOException, InterruptedException {

    // TODO: not always this problem
    // first line (column name), don't care
    if (key.get() == 0) {
        return;//  w  w  w  .j  av a 2 s  . c  o m
    }

    Vector input = new RandomAccessSparseVector(columnNumber);
    List<String> values = Lists.newArrayList(line.toString().split(","));

    int k = 0;
    double v = 0.0;
    for (String value : values) {

        try {
            v = Double.parseDouble(value);
        } catch (NumberFormatException e) {
            throw new IOException("CSV file contains non-numeric data");
        }

        input.setQuick(k, v);
        k++;
    }

    // Text type as key is required since "rowid" job takes as argument
    // SequenceFile<Text,VectorWritable>
    context.write(new Text("" + key.get()), new VectorWritable(input));
}