Example usage for org.apache.hadoop.io IntWritable get

List of usage examples for org.apache.hadoop.io IntWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable get.

Prototype

public int get() 

Source Link

Document

Return the value of this IntWritable.

Usage

From source file:org.apache.tez.runtime.library.common.writers.TestUnorderedPartitionedKVWriter.java

License:Apache License

private void baseTest(int numRecords, int numPartitions, Set<Integer> skippedPartitions, boolean shouldCompress)
        throws IOException, InterruptedException {
    PartitionerForTest partitioner = new PartitionerForTest();
    ApplicationId appId = ApplicationId.newInstance(10000, 1);
    TezCounters counters = new TezCounters();
    String uniqueId = UUID.randomUUID().toString();
    OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId);

    Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class,
            shouldCompress, -1);/*from   w w w . ja v a 2s  .c  o  m*/
    CompressionCodec codec = null;
    if (shouldCompress) {
        codec = new DefaultCodec();
        ((Configurable) codec).setConf(conf);
    }

    int numOutputs = numPartitions;
    long availableMemory = 2048;
    int numRecordsWritten = 0;

    Map<Integer, Multimap<Integer, Long>> expectedValues = new HashMap<Integer, Multimap<Integer, Long>>();
    for (int i = 0; i < numOutputs; i++) {
        expectedValues.put(i, LinkedListMultimap.<Integer, Long>create());
    }

    UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf,
            numOutputs, availableMemory);

    int sizePerBuffer = kvWriter.sizePerBuffer;
    int sizePerRecord = 4 + 8; // IntW + LongW
    int sizePerRecordWithOverhead = sizePerRecord + 12; // Record + META_OVERHEAD

    IntWritable intWritable = new IntWritable();
    LongWritable longWritable = new LongWritable();
    for (int i = 0; i < numRecords; i++) {
        intWritable.set(i);
        longWritable.set(i);
        int partition = partitioner.getPartition(intWritable, longWritable, numOutputs);
        if (skippedPartitions != null && skippedPartitions.contains(partition)) {
            continue;
        }
        expectedValues.get(partition).put(intWritable.get(), longWritable.get());
        kvWriter.write(intWritable, longWritable);
        numRecordsWritten++;
    }
    List<Event> events = kvWriter.close();

    int recordsPerBuffer = sizePerBuffer / sizePerRecordWithOverhead;
    int numExpectedSpills = numRecordsWritten / recordsPerBuffer;

    verify(outputContext, never()).fatalError(any(Throwable.class), any(String.class));

    // Verify the status of the buffers
    if (numExpectedSpills == 0) {
        assertEquals(1, kvWriter.numInitializedBuffers);
    } else {
        assertTrue(kvWriter.numInitializedBuffers > 1);
    }
    assertNull(kvWriter.currentBuffer);
    assertEquals(0, kvWriter.availableBuffers.size());

    // Verify the counters
    TezCounter outputRecordBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES);
    TezCounter outputRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_RECORDS);
    TezCounter outputBytesWithOverheadCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
    TezCounter fileOutputBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
    TezCounter spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS);
    TezCounter additionalSpillBytesWritternCounter = counters
            .findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
    TezCounter additionalSpillBytesReadCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);
    TezCounter numAdditionalSpillsCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);
    assertEquals(numRecordsWritten * sizePerRecord, outputRecordBytesCounter.getValue());
    assertEquals(numRecordsWritten, outputRecordsCounter.getValue());
    assertEquals(numRecordsWritten * sizePerRecordWithOverhead, outputBytesWithOverheadCounter.getValue());
    long fileOutputBytes = fileOutputBytesCounter.getValue();
    if (numRecordsWritten > 0) {
        assertTrue(fileOutputBytes > 0);
        if (!shouldCompress) {
            assertTrue(fileOutputBytes > outputRecordBytesCounter.getValue());
        }
    } else {
        assertEquals(0, fileOutputBytes);
    }
    assertEquals(recordsPerBuffer * numExpectedSpills, spilledRecordsCounter.getValue());
    long additionalSpillBytesWritten = additionalSpillBytesWritternCounter.getValue();
    long additionalSpillBytesRead = additionalSpillBytesReadCounter.getValue();
    if (numExpectedSpills == 0) {
        assertEquals(0, additionalSpillBytesWritten);
        assertEquals(0, additionalSpillBytesRead);
    } else {
        assertTrue(additionalSpillBytesWritten > 0);
        assertTrue(additionalSpillBytesRead > 0);
        if (!shouldCompress) {
            assertTrue(additionalSpillBytesWritten > (recordsPerBuffer * numExpectedSpills * sizePerRecord));
            assertTrue(additionalSpillBytesRead > (recordsPerBuffer * numExpectedSpills * sizePerRecord));
        }
    }
    assertTrue(additionalSpillBytesWritten == additionalSpillBytesRead);
    assertEquals(numExpectedSpills, numAdditionalSpillsCounter.getValue());

    BitSet emptyPartitionBits = null;
    // Verify the event returned
    assertEquals(1, events.size());
    assertTrue(events.get(0) instanceof CompositeDataMovementEvent);
    CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(0);
    assertEquals(0, cdme.getSourceIndexStart());
    assertEquals(numOutputs, cdme.getCount());
    DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto
            .parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
    assertFalse(eventProto.hasData());
    if (skippedPartitions == null && numRecordsWritten > 0) {
        assertFalse(eventProto.hasEmptyPartitions());
        emptyPartitionBits = new BitSet(numPartitions);
    } else {
        assertTrue(eventProto.hasEmptyPartitions());
        byte[] emptyPartitions = TezCommonUtils
                .decompressByteStringToByteArray(eventProto.getEmptyPartitions());
        emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions);
        if (numRecordsWritten == 0) {
            assertEquals(numPartitions, emptyPartitionBits.cardinality());
        } else {
            for (Integer e : skippedPartitions) {
                assertTrue(emptyPartitionBits.get(e));
            }
            assertEquals(skippedPartitions.size(), emptyPartitionBits.cardinality());
        }
    }
    if (emptyPartitionBits.cardinality() != numPartitions) {
        assertEquals(HOST_STRING, eventProto.getHost());
        assertEquals(SHUFFLE_PORT, eventProto.getPort());
        assertEquals(uniqueId, eventProto.getPathComponent());
    } else {
        assertFalse(eventProto.hasHost());
        assertFalse(eventProto.hasPort());
        assertFalse(eventProto.hasPathComponent());
    }

    // Verify the actual data
    TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId);
    Path outputFilePath = kvWriter.finalOutPath;
    Path spillFilePath = kvWriter.finalIndexPath;

    if (numRecordsWritten > 0) {
        assertTrue(localFs.exists(outputFilePath));
        assertTrue(localFs.exists(spillFilePath));
    } else {
        return;
    }

    // Special case for 0 records.
    TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf);
    DataInputBuffer keyBuffer = new DataInputBuffer();
    DataInputBuffer valBuffer = new DataInputBuffer();
    IntWritable keyDeser = new IntWritable();
    LongWritable valDeser = new LongWritable();
    for (int i = 0; i < numOutputs; i++) {
        if (skippedPartitions != null && skippedPartitions.contains(i)) {
            continue;
        }
        TezIndexRecord indexRecord = spillRecord.getIndex(i);
        FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath);
        inStream.seek(indexRecord.getStartOffset());
        IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null, null, false,
                0, -1);
        while (reader.nextRawKey(keyBuffer)) {
            reader.nextRawValue(valBuffer);
            keyDeser.readFields(keyBuffer);
            valDeser.readFields(valBuffer);
            int partition = partitioner.getPartition(keyDeser, valDeser, numOutputs);
            assertTrue(expectedValues.get(partition).remove(keyDeser.get(), valDeser.get()));
        }
        inStream.close();
    }
    for (int i = 0; i < numOutputs; i++) {
        assertEquals(0, expectedValues.get(i).size());
        expectedValues.remove(i);
    }
    assertEquals(0, expectedValues.size());
}

From source file:org.bgi.flexlab.gaea.tools.mapreduce.vcfqualitycontrol.variantrecalibratioin.VariantRecalibrationReducer.java

License:Open Source License

@Override
public void reduce(IntWritable key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
    fileId = key.get();
    for (Text value : values) {
        VariantDatumMessenger msg = new VariantDatumMessenger.Builder().buildFrom(value.toString(),
                genomeLocParser);// w  w  w. j a v a  2s .  c  o m
        recal.addData(msg);
    }
    recal.recalVCF(fileId, context);

    VCFHeader header = headers.getVcfHeader(fileId);
    header = recal.addHeaderLine(header);
    VCFCodec codec = new VCFCodec();
    codec.setVCFHeader(header, VCFHeaderVersion.VCF4_2);
    InputStream is = HdfsFileManager.getInputStream(new Path(headers.getFile(fileId)),
            context.getConfiguration());
    AsciiLineReaderIterator iterator = new AsciiLineReaderIterator(new AsciiLineReader(is));
    while (iterator.hasNext()) {
        VariantContext vc = codec.decode(iterator.next());
        if (vc == null)
            continue;
        vc = recal.applyRecalibration(vc);
        statistic(vc);
        VariantContextWritable vcWritable = new VariantContextWritable();
        vcWritable.set(vc);
        context.write(NullWritable.get(), vcWritable);
    }
    iterator.close();
}

From source file:org.commoncrawl.util.JoinValue.java

License:Open Source License

public JoinValue(TextBytes tag, IntWritable value) {
    _tag = tag;
    _type = INT_TYPE_JOIN_VALUE;
    _longValue = value.get();
}

From source file:org.commoncrawl.util.MapReduceJobStatsWriter.java

License:Open Source License

public static void main(String[] args) {
    LOG.info("Initializing Hadoop Config");

    Configuration conf = new Configuration();

    conf.addResource("nutch-default.xml");
    conf.addResource("nutch-site.xml");
    conf.addResource("hadoop-default.xml");
    conf.addResource("hadoop-site.xml");
    conf.addResource("commoncrawl-default.xml");
    conf.addResource("commoncrawl-site.xml");

    CrawlEnvironment.setHadoopConfig(conf);
    CrawlEnvironment.setDefaultHadoopFSURI("hdfs://ccn01:9000/");

    // test the stats Writer ... 
    try {/*from  w  w w.j ava 2  s  .  c om*/

        LOG.info("Opening Stats Writer");
        MapReduceJobStatsWriter<IntWritable, Text> statsWriter = new MapReduceJobStatsWriter<IntWritable, Text>(
                CrawlEnvironment.getDefaultFileSystem(), conf, IntWritable.class, Text.class, "test", "group1",
                12345L);

        LOG.info("Writing Entries");
        for (int i = 0; i < 1000; ++i) {
            statsWriter.appendLogEntry(new IntWritable(i), new Text("Log Entry #" + i));
        }
        LOG.info("Flushing / Closing");
        final Semaphore blockingSempahore = new Semaphore(0);
        statsWriter.close(new Callback() {

            @Override
            public void execute() {
                LOG.info("Completion Callback Triggered");
                blockingSempahore.release();
            }

        });
        LOG.info("Waiting on Semaphore");
        blockingSempahore.acquireUninterruptibly();
        LOG.info("Acquired Semaphore");

        LOG.info("Closed");

        Path hdfsPath = new Path(Environment.HDFS_LOGCOLLECTOR_BASEDIR,
                "test" + "/" + "group1" + "/" + Long.toString(12345L));

        LOG.info("Opening Reader");
        SequenceFile.Reader reader = new SequenceFile.Reader(CrawlEnvironment.getDefaultFileSystem(), hdfsPath,
                conf);
        IntWritable key = new IntWritable();
        Text value = new Text();
        while (reader.next(key, value)) {
            LOG.info("Key:" + key.get() + " Value:" + value.toString());
        }
        reader.close();

    } catch (IOException e) {
        LOG.error(CCStringUtils.stringifyException(e));
    }

}

From source file:org.goldenorb.io.checkpoint.CheckPointDataTest.java

License:Apache License

/**
 * Tests the CheckPointDataInput class by reading several different types of Writables from the checkpoint.
 * Asserts that Writables that were written in are of the same value and type when reading in from HDFS.
 * /*from w ww . java2s. c o  m*/
 * @throws Exception
 */
@Test
public void testCheckpointInput() throws Exception {

    int superStep = 0;
    int partition = 0;
    OrbConfiguration orbConf = new OrbConfiguration();
    orbConf.set("fs.default.name", "hdfs://localhost:" + cluster.getNameNodePort());
    orbConf.setJobNumber("0");
    orbConf.setFileOutputPath("test");

    CheckPointDataInput checkpointInput = new CheckPointDataInput(orbConf, superStep, partition);

    // Data is read on a FIFO basis

    IntWritable intInput = new IntWritable();
    intInput.readFields(checkpointInput);

    LongWritable longInput = new LongWritable();
    longInput.readFields(checkpointInput);

    Text textInput = new Text();
    textInput.readFields(checkpointInput);

    FloatWritable floatInput = new FloatWritable();
    floatInput.readFields(checkpointInput);

    checkpointInput.close();

    assertThat(checkpointInput, notNullValue());
    assertEquals(intInput.get(), 4);
    assertEquals(longInput.get(), 9223372036854775807L);
    assertEquals(textInput.toString(), "test");
    assertTrue(floatInput.get() == 3.14159F);
}

From source file:org.gpfvic.mahout.cf.taste.hadoop.als.ALS.java

License:Apache License

public static OpenIntObjectHashMap<Vector> readMatrixByRowsFromDistributedCache(int numEntities,
        Configuration conf) throws IOException {

    IntWritable rowIndex = new IntWritable();
    VectorWritable row = new VectorWritable();

    OpenIntObjectHashMap<Vector> featureMatrix = numEntities > 0 ? new OpenIntObjectHashMap<Vector>(numEntities)
            : new OpenIntObjectHashMap<Vector>();

    Path[] cachedFiles = HadoopUtil.getCachedFiles(conf);
    LocalFileSystem localFs = FileSystem.getLocal(conf);

    for (Path cachedFile : cachedFiles) {
        try (SequenceFile.Reader reader = new SequenceFile.Reader(localFs, cachedFile, conf)) {
            while (reader.next(rowIndex, row)) {
                featureMatrix.put(rowIndex.get(), row.get());
            }/*www  .ja  va  2  s .  co m*/
        }
    }

    Preconditions.checkState(!featureMatrix.isEmpty(), "Feature matrix is empty");
    return featureMatrix;
}

From source file:org.huahinframework.examples.wordcount.natural.WordSummarizer.java

License:Apache License

public void reduce(Text key, Iterable<IntWritable> values, Context context)
        throws IOException, InterruptedException {
    int count = 0;
    for (IntWritable i : values) {
        count += i.get();
    }//from   www .  ja va  2 s  .co  m

    context.write(key, new IntWritable(count));
}

From source file:org.juanitodread.bigdatalab.wordcount.WordCountReducer.java

License:Apache License

@Override
public void reduce(Text key, Iterable<IntWritable> values, Context context)
        throws IOException, InterruptedException {
    int sum = 0;// w  w  w  .  ja v  a  2s.  co  m

    for (IntWritable value : values) {
        sum += value.get();
    }

    context.write(key, new IntWritable(sum));
}

From source file:org.lib.example.wcount.MCountReducer.java

License:Apache License

/**
 * Performs words reducing./*www. j  a v a  2  s  .  c o m*/
 */
@Override
public void reduce(Text key, Iterable<IntWritable> values,
        DistributedContext<Text, IntWritable, Text, IntWritable> context)
        throws IOException, InterruptedException {

    int frequency = 0;
    for (IntWritable value : values) {
        frequency += value.get();
    }

    if (frequency >= threshold) {
        context.write(key, new IntWritable(frequency));
    }
}

From source file:org.openflamingo.mapreduce.aggregator.IntMaxAggregator.java

License:Apache License

@Override
public void aggregate(IntWritable value) {
    int val = value.get();
    if (val > max) {
        max = val;
    }//w  w  w. j a  va 2 s. c  o m
}