List of usage examples for org.apache.hadoop.io IntWritable get
public int get()
From source file:org.apache.tez.runtime.library.common.writers.TestUnorderedPartitionedKVWriter.java
License:Apache License
private void baseTest(int numRecords, int numPartitions, Set<Integer> skippedPartitions, boolean shouldCompress) throws IOException, InterruptedException { PartitionerForTest partitioner = new PartitionerForTest(); ApplicationId appId = ApplicationId.newInstance(10000, 1); TezCounters counters = new TezCounters(); String uniqueId = UUID.randomUUID().toString(); OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId); Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class, shouldCompress, -1);/*from w w w . ja v a 2s .c o m*/ CompressionCodec codec = null; if (shouldCompress) { codec = new DefaultCodec(); ((Configurable) codec).setConf(conf); } int numOutputs = numPartitions; long availableMemory = 2048; int numRecordsWritten = 0; Map<Integer, Multimap<Integer, Long>> expectedValues = new HashMap<Integer, Multimap<Integer, Long>>(); for (int i = 0; i < numOutputs; i++) { expectedValues.put(i, LinkedListMultimap.<Integer, Long>create()); } UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, availableMemory); int sizePerBuffer = kvWriter.sizePerBuffer; int sizePerRecord = 4 + 8; // IntW + LongW int sizePerRecordWithOverhead = sizePerRecord + 12; // Record + META_OVERHEAD IntWritable intWritable = new IntWritable(); LongWritable longWritable = new LongWritable(); for (int i = 0; i < numRecords; i++) { intWritable.set(i); longWritable.set(i); int partition = partitioner.getPartition(intWritable, longWritable, numOutputs); if (skippedPartitions != null && skippedPartitions.contains(partition)) { continue; } expectedValues.get(partition).put(intWritable.get(), longWritable.get()); kvWriter.write(intWritable, longWritable); numRecordsWritten++; } List<Event> events = kvWriter.close(); int recordsPerBuffer = sizePerBuffer / sizePerRecordWithOverhead; int numExpectedSpills = numRecordsWritten / recordsPerBuffer; verify(outputContext, never()).fatalError(any(Throwable.class), any(String.class)); // Verify the status of the buffers if (numExpectedSpills == 0) { assertEquals(1, kvWriter.numInitializedBuffers); } else { assertTrue(kvWriter.numInitializedBuffers > 1); } assertNull(kvWriter.currentBuffer); assertEquals(0, kvWriter.availableBuffers.size()); // Verify the counters TezCounter outputRecordBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES); TezCounter outputRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_RECORDS); TezCounter outputBytesWithOverheadCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD); TezCounter fileOutputBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL); TezCounter spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS); TezCounter additionalSpillBytesWritternCounter = counters .findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN); TezCounter additionalSpillBytesReadCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ); TezCounter numAdditionalSpillsCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT); assertEquals(numRecordsWritten * sizePerRecord, outputRecordBytesCounter.getValue()); assertEquals(numRecordsWritten, outputRecordsCounter.getValue()); assertEquals(numRecordsWritten * sizePerRecordWithOverhead, outputBytesWithOverheadCounter.getValue()); long fileOutputBytes = fileOutputBytesCounter.getValue(); if (numRecordsWritten > 0) { assertTrue(fileOutputBytes > 0); if (!shouldCompress) { assertTrue(fileOutputBytes > outputRecordBytesCounter.getValue()); } } else { assertEquals(0, fileOutputBytes); } assertEquals(recordsPerBuffer * numExpectedSpills, spilledRecordsCounter.getValue()); long additionalSpillBytesWritten = additionalSpillBytesWritternCounter.getValue(); long additionalSpillBytesRead = additionalSpillBytesReadCounter.getValue(); if (numExpectedSpills == 0) { assertEquals(0, additionalSpillBytesWritten); assertEquals(0, additionalSpillBytesRead); } else { assertTrue(additionalSpillBytesWritten > 0); assertTrue(additionalSpillBytesRead > 0); if (!shouldCompress) { assertTrue(additionalSpillBytesWritten > (recordsPerBuffer * numExpectedSpills * sizePerRecord)); assertTrue(additionalSpillBytesRead > (recordsPerBuffer * numExpectedSpills * sizePerRecord)); } } assertTrue(additionalSpillBytesWritten == additionalSpillBytesRead); assertEquals(numExpectedSpills, numAdditionalSpillsCounter.getValue()); BitSet emptyPartitionBits = null; // Verify the event returned assertEquals(1, events.size()); assertTrue(events.get(0) instanceof CompositeDataMovementEvent); CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(0); assertEquals(0, cdme.getSourceIndexStart()); assertEquals(numOutputs, cdme.getCount()); DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto .parseFrom(ByteString.copyFrom(cdme.getUserPayload())); assertFalse(eventProto.hasData()); if (skippedPartitions == null && numRecordsWritten > 0) { assertFalse(eventProto.hasEmptyPartitions()); emptyPartitionBits = new BitSet(numPartitions); } else { assertTrue(eventProto.hasEmptyPartitions()); byte[] emptyPartitions = TezCommonUtils .decompressByteStringToByteArray(eventProto.getEmptyPartitions()); emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions); if (numRecordsWritten == 0) { assertEquals(numPartitions, emptyPartitionBits.cardinality()); } else { for (Integer e : skippedPartitions) { assertTrue(emptyPartitionBits.get(e)); } assertEquals(skippedPartitions.size(), emptyPartitionBits.cardinality()); } } if (emptyPartitionBits.cardinality() != numPartitions) { assertEquals(HOST_STRING, eventProto.getHost()); assertEquals(SHUFFLE_PORT, eventProto.getPort()); assertEquals(uniqueId, eventProto.getPathComponent()); } else { assertFalse(eventProto.hasHost()); assertFalse(eventProto.hasPort()); assertFalse(eventProto.hasPathComponent()); } // Verify the actual data TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId); Path outputFilePath = kvWriter.finalOutPath; Path spillFilePath = kvWriter.finalIndexPath; if (numRecordsWritten > 0) { assertTrue(localFs.exists(outputFilePath)); assertTrue(localFs.exists(spillFilePath)); } else { return; } // Special case for 0 records. TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf); DataInputBuffer keyBuffer = new DataInputBuffer(); DataInputBuffer valBuffer = new DataInputBuffer(); IntWritable keyDeser = new IntWritable(); LongWritable valDeser = new LongWritable(); for (int i = 0; i < numOutputs; i++) { if (skippedPartitions != null && skippedPartitions.contains(i)) { continue; } TezIndexRecord indexRecord = spillRecord.getIndex(i); FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath); inStream.seek(indexRecord.getStartOffset()); IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null, null, false, 0, -1); while (reader.nextRawKey(keyBuffer)) { reader.nextRawValue(valBuffer); keyDeser.readFields(keyBuffer); valDeser.readFields(valBuffer); int partition = partitioner.getPartition(keyDeser, valDeser, numOutputs); assertTrue(expectedValues.get(partition).remove(keyDeser.get(), valDeser.get())); } inStream.close(); } for (int i = 0; i < numOutputs; i++) { assertEquals(0, expectedValues.get(i).size()); expectedValues.remove(i); } assertEquals(0, expectedValues.size()); }
From source file:org.bgi.flexlab.gaea.tools.mapreduce.vcfqualitycontrol.variantrecalibratioin.VariantRecalibrationReducer.java
License:Open Source License
@Override public void reduce(IntWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { fileId = key.get(); for (Text value : values) { VariantDatumMessenger msg = new VariantDatumMessenger.Builder().buildFrom(value.toString(), genomeLocParser);// w w w. j a v a 2s . c o m recal.addData(msg); } recal.recalVCF(fileId, context); VCFHeader header = headers.getVcfHeader(fileId); header = recal.addHeaderLine(header); VCFCodec codec = new VCFCodec(); codec.setVCFHeader(header, VCFHeaderVersion.VCF4_2); InputStream is = HdfsFileManager.getInputStream(new Path(headers.getFile(fileId)), context.getConfiguration()); AsciiLineReaderIterator iterator = new AsciiLineReaderIterator(new AsciiLineReader(is)); while (iterator.hasNext()) { VariantContext vc = codec.decode(iterator.next()); if (vc == null) continue; vc = recal.applyRecalibration(vc); statistic(vc); VariantContextWritable vcWritable = new VariantContextWritable(); vcWritable.set(vc); context.write(NullWritable.get(), vcWritable); } iterator.close(); }
From source file:org.commoncrawl.util.JoinValue.java
License:Open Source License
public JoinValue(TextBytes tag, IntWritable value) { _tag = tag; _type = INT_TYPE_JOIN_VALUE; _longValue = value.get(); }
From source file:org.commoncrawl.util.MapReduceJobStatsWriter.java
License:Open Source License
public static void main(String[] args) { LOG.info("Initializing Hadoop Config"); Configuration conf = new Configuration(); conf.addResource("nutch-default.xml"); conf.addResource("nutch-site.xml"); conf.addResource("hadoop-default.xml"); conf.addResource("hadoop-site.xml"); conf.addResource("commoncrawl-default.xml"); conf.addResource("commoncrawl-site.xml"); CrawlEnvironment.setHadoopConfig(conf); CrawlEnvironment.setDefaultHadoopFSURI("hdfs://ccn01:9000/"); // test the stats Writer ... try {/*from w w w.j ava 2 s . c om*/ LOG.info("Opening Stats Writer"); MapReduceJobStatsWriter<IntWritable, Text> statsWriter = new MapReduceJobStatsWriter<IntWritable, Text>( CrawlEnvironment.getDefaultFileSystem(), conf, IntWritable.class, Text.class, "test", "group1", 12345L); LOG.info("Writing Entries"); for (int i = 0; i < 1000; ++i) { statsWriter.appendLogEntry(new IntWritable(i), new Text("Log Entry #" + i)); } LOG.info("Flushing / Closing"); final Semaphore blockingSempahore = new Semaphore(0); statsWriter.close(new Callback() { @Override public void execute() { LOG.info("Completion Callback Triggered"); blockingSempahore.release(); } }); LOG.info("Waiting on Semaphore"); blockingSempahore.acquireUninterruptibly(); LOG.info("Acquired Semaphore"); LOG.info("Closed"); Path hdfsPath = new Path(Environment.HDFS_LOGCOLLECTOR_BASEDIR, "test" + "/" + "group1" + "/" + Long.toString(12345L)); LOG.info("Opening Reader"); SequenceFile.Reader reader = new SequenceFile.Reader(CrawlEnvironment.getDefaultFileSystem(), hdfsPath, conf); IntWritable key = new IntWritable(); Text value = new Text(); while (reader.next(key, value)) { LOG.info("Key:" + key.get() + " Value:" + value.toString()); } reader.close(); } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); } }
From source file:org.goldenorb.io.checkpoint.CheckPointDataTest.java
License:Apache License
/** * Tests the CheckPointDataInput class by reading several different types of Writables from the checkpoint. * Asserts that Writables that were written in are of the same value and type when reading in from HDFS. * /*from w ww . java2s. c o m*/ * @throws Exception */ @Test public void testCheckpointInput() throws Exception { int superStep = 0; int partition = 0; OrbConfiguration orbConf = new OrbConfiguration(); orbConf.set("fs.default.name", "hdfs://localhost:" + cluster.getNameNodePort()); orbConf.setJobNumber("0"); orbConf.setFileOutputPath("test"); CheckPointDataInput checkpointInput = new CheckPointDataInput(orbConf, superStep, partition); // Data is read on a FIFO basis IntWritable intInput = new IntWritable(); intInput.readFields(checkpointInput); LongWritable longInput = new LongWritable(); longInput.readFields(checkpointInput); Text textInput = new Text(); textInput.readFields(checkpointInput); FloatWritable floatInput = new FloatWritable(); floatInput.readFields(checkpointInput); checkpointInput.close(); assertThat(checkpointInput, notNullValue()); assertEquals(intInput.get(), 4); assertEquals(longInput.get(), 9223372036854775807L); assertEquals(textInput.toString(), "test"); assertTrue(floatInput.get() == 3.14159F); }
From source file:org.gpfvic.mahout.cf.taste.hadoop.als.ALS.java
License:Apache License
public static OpenIntObjectHashMap<Vector> readMatrixByRowsFromDistributedCache(int numEntities, Configuration conf) throws IOException { IntWritable rowIndex = new IntWritable(); VectorWritable row = new VectorWritable(); OpenIntObjectHashMap<Vector> featureMatrix = numEntities > 0 ? new OpenIntObjectHashMap<Vector>(numEntities) : new OpenIntObjectHashMap<Vector>(); Path[] cachedFiles = HadoopUtil.getCachedFiles(conf); LocalFileSystem localFs = FileSystem.getLocal(conf); for (Path cachedFile : cachedFiles) { try (SequenceFile.Reader reader = new SequenceFile.Reader(localFs, cachedFile, conf)) { while (reader.next(rowIndex, row)) { featureMatrix.put(rowIndex.get(), row.get()); }/*www .ja va 2 s . co m*/ } } Preconditions.checkState(!featureMatrix.isEmpty(), "Feature matrix is empty"); return featureMatrix; }
From source file:org.huahinframework.examples.wordcount.natural.WordSummarizer.java
License:Apache License
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int count = 0; for (IntWritable i : values) { count += i.get(); }//from www . ja va 2 s .co m context.write(key, new IntWritable(count)); }
From source file:org.juanitodread.bigdatalab.wordcount.WordCountReducer.java
License:Apache License
@Override public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0;// w w w . ja v a 2s. co m for (IntWritable value : values) { sum += value.get(); } context.write(key, new IntWritable(sum)); }
From source file:org.lib.example.wcount.MCountReducer.java
License:Apache License
/** * Performs words reducing./*www. j a v a 2 s . c o m*/ */ @Override public void reduce(Text key, Iterable<IntWritable> values, DistributedContext<Text, IntWritable, Text, IntWritable> context) throws IOException, InterruptedException { int frequency = 0; for (IntWritable value : values) { frequency += value.get(); } if (frequency >= threshold) { context.write(key, new IntWritable(frequency)); } }
From source file:org.openflamingo.mapreduce.aggregator.IntMaxAggregator.java
License:Apache License
@Override public void aggregate(IntWritable value) { int val = value.get(); if (val > max) { max = val; }//w w w. j a va 2 s. c o m }