List of usage examples for org.apache.hadoop.io LongWritable set
public void set(long value)
From source file:org.apache.nutch.crawl.CrawlDbReader.java
License:Apache License
public void processStatJob(String crawlDb, Configuration config, boolean sort) throws IOException { if (LOG.isInfoEnabled()) { LOG.info("CrawlDb statistics start: " + crawlDb); }//from ww w . jav a 2 s . co m Path tmpFolder = new Path(crawlDb, "stat_tmp" + System.currentTimeMillis()); JobConf job = new NutchJob(config); job.setJobName("stats " + crawlDb); job.setBoolean("db.reader.stats.sort", sort); FileInputFormat.addInputPath(job, new Path(crawlDb, CrawlDb.CURRENT_NAME)); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(CrawlDbStatMapper.class); job.setCombinerClass(CrawlDbStatCombiner.class); job.setReducerClass(CrawlDbStatReducer.class); FileOutputFormat.setOutputPath(job, tmpFolder); job.setOutputFormat(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); // https://issues.apache.org/jira/browse/NUTCH-1029 job.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false); JobClient.runJob(job); // reading the result FileSystem fileSystem = FileSystem.get(config); SequenceFile.Reader[] readers = SequenceFileOutputFormat.getReaders(config, tmpFolder); Text key = new Text(); LongWritable value = new LongWritable(); TreeMap<String, LongWritable> stats = new TreeMap<String, LongWritable>(); for (int i = 0; i < readers.length; i++) { SequenceFile.Reader reader = readers[i]; while (reader.next(key, value)) { String k = key.toString(); LongWritable val = stats.get(k); if (val == null) { val = new LongWritable(); if (k.equals("scx")) val.set(Long.MIN_VALUE); if (k.equals("scn")) val.set(Long.MAX_VALUE); stats.put(k, val); } if (k.equals("scx")) { if (val.get() < value.get()) val.set(value.get()); } else if (k.equals("scn")) { if (val.get() > value.get()) val.set(value.get()); } else { val.set(val.get() + value.get()); } } reader.close(); } if (LOG.isInfoEnabled()) { LOG.info("Statistics for CrawlDb: " + crawlDb); LongWritable totalCnt = stats.get("T"); stats.remove("T"); LOG.info("TOTAL urls:\t" + totalCnt.get()); for (Map.Entry<String, LongWritable> entry : stats.entrySet()) { String k = entry.getKey(); LongWritable val = entry.getValue(); if (k.equals("scn")) { LOG.info("min score:\t" + (float) (val.get() / 1000.0f)); } else if (k.equals("scx")) { LOG.info("max score:\t" + (float) (val.get() / 1000.0f)); } else if (k.equals("sct")) { LOG.info("avg score:\t" + (float) ((((double) val.get()) / totalCnt.get()) / 1000.0)); } else if (k.startsWith("status")) { String[] st = k.split(" "); int code = Integer.parseInt(st[1]); if (st.length > 2) LOG.info(" " + st[2] + " :\t" + val); else LOG.info(st[0] + " " + code + " (" + CrawlDatum.getStatusName((byte) code) + "):\t" + val); } else LOG.info(k + ":\t" + val); } } // removing the tmp folder fileSystem.delete(tmpFolder, true); if (LOG.isInfoEnabled()) { LOG.info("CrawlDb statistics: done"); } }
From source file:org.apache.nutch.crawl.TestMapWritable.java
License:Apache License
public void testPerformance() throws Exception { FileSystem fs = FileSystem.get(configuration); Path file = new Path(System.getProperty("java.io.tmpdir"), "mapTestFile"); fs.delete(file);//from www. jav a 2 s. c om org.apache.hadoop.io.SequenceFile.Writer writer = SequenceFile.createWriter(fs, configuration, file, IntWritable.class, MapWritable.class); // write map System.out.println("start writing map's"); long start = System.currentTimeMillis(); IntWritable key = new IntWritable(); MapWritable map = new MapWritable(); LongWritable mapValue = new LongWritable(); for (int i = 0; i < 1000000; i++) { key.set(i); mapValue.set(i); map.put(key, mapValue); writer.append(key, map); } long needed = System.currentTimeMillis() - start; writer.close(); System.out.println("needed time for writing map's: " + needed); // read map org.apache.hadoop.io.SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, configuration); System.out.println("start reading map's"); start = System.currentTimeMillis(); while (reader.next(key, map)) { } reader.close(); needed = System.currentTimeMillis() - start; System.out.println("needed time for reading map's: " + needed); fs.delete(file); // Text System.out.println("start writing Text's"); writer = SequenceFile.createWriter(fs, configuration, file, IntWritable.class, Text.class); // write map start = System.currentTimeMillis(); key = new IntWritable(); Text value = new Text(); String s = "15726:15726"; for (int i = 0; i < 1000000; i++) { key.set(i); value.set(s); writer.append(key, value); } needed = System.currentTimeMillis() - start; writer.close(); System.out.println("needed time for writing Text's: " + needed); // read map System.out.println("start reading Text's"); reader = new SequenceFile.Reader(fs, file, configuration); start = System.currentTimeMillis(); while (reader.next(key, value)) { } needed = System.currentTimeMillis() - start; System.out.println("needed time for reading Text: " + needed); fs.delete(file); }
From source file:org.apache.nutch.crawl.WebTableReader.java
License:Apache License
@Override public Map<String, Object> run(Map<String, Object> args) throws Exception { Path tmpFolder = new Path(getConf().get("mapred.temp.dir", ".") + "stat_tmp" + System.currentTimeMillis()); numJobs = 1;/* w w w . jav a 2 s .co m*/ currentJob = new NutchJob(getConf(), "db_stats"); currentJob.getConfiguration().setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false); Boolean sort = (Boolean) args.get(Nutch.ARG_SORT); if (sort == null) sort = Boolean.FALSE; currentJob.getConfiguration().setBoolean("db.reader.stats.sort", sort); DataStore<String, WebPage> store = StorageUtils.createWebStore(currentJob.getConfiguration(), String.class, WebPage.class); Query<String, WebPage> query = store.newQuery(); // remove the __g__dirty field since it is not stored String[] fields = Arrays.copyOfRange(WebPage._ALL_FIELDS, 1, WebPage._ALL_FIELDS.length); query.setFields(fields); GoraMapper.initMapperJob(currentJob, query, store, Text.class, LongWritable.class, WebTableStatMapper.class, null, true); currentJob.setCombinerClass(WebTableStatCombiner.class); currentJob.setReducerClass(WebTableStatReducer.class); FileOutputFormat.setOutputPath(currentJob, tmpFolder); currentJob.setOutputFormatClass(SequenceFileOutputFormat.class); currentJob.setOutputKeyClass(Text.class); currentJob.setOutputValueClass(LongWritable.class); FileSystem fileSystem = FileSystem.get(getConf()); try { currentJob.waitForCompletion(true); } finally { ToolUtil.recordJobStatus(null, currentJob, results); if (!currentJob.isSuccessful()) { fileSystem.delete(tmpFolder, true); return results; } } Text key = new Text(); LongWritable value = new LongWritable(); SequenceFile.Reader[] readers = org.apache.hadoop.mapred.SequenceFileOutputFormat.getReaders(getConf(), tmpFolder); TreeMap<String, LongWritable> stats = new TreeMap<String, LongWritable>(); for (int i = 0; i < readers.length; i++) { SequenceFile.Reader reader = readers[i]; while (reader.next(key, value)) { String k = key.toString(); LongWritable val = stats.get(k); if (val == null) { val = new LongWritable(); if (k.equals("scx")) val.set(Long.MIN_VALUE); if (k.equals("scn")) val.set(Long.MAX_VALUE); stats.put(k, val); } if (k.equals("scx")) { if (val.get() < value.get()) val.set(value.get()); } else if (k.equals("scn")) { if (val.get() > value.get()) val.set(value.get()); } else { val.set(val.get() + value.get()); } } reader.close(); } LongWritable totalCnt = stats.get("T"); if (totalCnt == null) totalCnt = new LongWritable(0); stats.remove("T"); results.put("TOTAL urls", totalCnt.get()); for (Map.Entry<String, LongWritable> entry : stats.entrySet()) { String k = entry.getKey(); LongWritable val = entry.getValue(); if (k.equals("scn")) { results.put("min score", (val.get() / 1000.0f)); } else if (k.equals("scx")) { results.put("max score", (val.get() / 1000.0f)); } else if (k.equals("sct")) { results.put("avg score", (float) ((((double) val.get()) / totalCnt.get()) / 1000.0)); } else if (k.startsWith("status")) { String[] st = k.split(" "); int code = Integer.parseInt(st[1]); if (st.length > 2) results.put(st[2], val.get()); else results.put(st[0] + " " + code + " (" + CrawlStatus.getName((byte) code) + ")", val.get()); } else results.put(k, val.get()); } // removing the tmp folder fileSystem.delete(tmpFolder, true); return results; }
From source file:org.apache.nutch.mapreduce.WebTableReader.java
License:Apache License
@Override protected void doRun(Map<String, Object> args) throws Exception { Path tmpFolder = new Path(getConf().get("mapred.temp.dir", ".") + "stat_tmp" + System.currentTimeMillis()); currentJob.getConfiguration().setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false); Boolean sort = (Boolean) args.get(Nutch.ARG_SORT); if (sort == null) sort = Boolean.FALSE;//from ww w .jav a 2 s . c om currentJob.getConfiguration().setBoolean("db.reader.stats.sort", sort); DataStore<String, WebPage> store = StorageUtils.createWebStore(currentJob.getConfiguration(), String.class, WebPage.class); Query<String, WebPage> query = store.newQuery(); // remove the __g__dirty field since it is not stored String[] fields = Arrays.copyOfRange(WebPage._ALL_FIELDS, 1, WebPage._ALL_FIELDS.length); query.setFields(fields); GoraMapper.initMapperJob(currentJob, query, store, Text.class, LongWritable.class, WebTableStatMapper.class, null, true); currentJob.setCombinerClass(WebTableStatCombiner.class); currentJob.setReducerClass(WebTableStatReducer.class); FileOutputFormat.setOutputPath(currentJob, tmpFolder); currentJob.setOutputFormatClass(SequenceFileOutputFormat.class); currentJob.setOutputKeyClass(Text.class); currentJob.setOutputValueClass(LongWritable.class); FileSystem fileSystem = FileSystem.get(getConf()); try { currentJob.waitForCompletion(true); } finally { if (!currentJob.isSuccessful()) { fileSystem.delete(tmpFolder, true); return; } } Text key = new Text(); LongWritable value = new LongWritable(); SequenceFile.Reader[] readers = org.apache.hadoop.mapred.SequenceFileOutputFormat.getReaders(getConf(), tmpFolder); TreeMap<String, LongWritable> stats = new TreeMap<String, LongWritable>(); for (int i = 0; i < readers.length; i++) { SequenceFile.Reader reader = readers[i]; while (reader.next(key, value)) { String k = key.toString(); LongWritable val = stats.get(k); if (val == null) { val = new LongWritable(); if (k.equals("scx")) val.set(Long.MIN_VALUE); if (k.equals("scn")) val.set(Long.MAX_VALUE); stats.put(k, val); } if (k.equals("scx")) { if (val.get() < value.get()) val.set(value.get()); } else if (k.equals("scn")) { if (val.get() > value.get()) val.set(value.get()); } else { val.set(val.get() + value.get()); } } reader.close(); } LongWritable totalCnt = stats.get("T"); if (totalCnt == null) totalCnt = new LongWritable(0); stats.remove("T"); results.put("TOTAL urls", totalCnt.get()); for (Map.Entry<String, LongWritable> entry : stats.entrySet()) { String k = entry.getKey(); LongWritable val = entry.getValue(); if (k.equals("scn")) { results.put("min score", (val.get() / 1000.0f)); } else if (k.equals("scx")) { results.put("max score", (val.get() / 1000.0f)); } else if (k.equals("sct")) { results.put("avg score", (float) ((((double) val.get()) / totalCnt.get()) / 1000.0)); } else if (k.startsWith("status")) { String[] st = k.split(" "); int code = Integer.parseInt(st[1]); if (st.length > 2) results.put(st[2], val.get()); else results.put(st[0] + " " + code + " (" + CrawlStatus.getName((byte) code) + ")", val.get()); } else results.put(k, val.get()); } // removing the tmp folder fileSystem.delete(tmpFolder, true); }
From source file:org.apache.orc.mapred.OrcMapredRecordReader.java
License:Apache License
static LongWritable nextLong(ColumnVector vector, int row, Object previous) { if (vector.isRepeating) { row = 0;// w ww .j a va 2 s. c o m } if (vector.noNulls || !vector.isNull[row]) { LongWritable result; if (previous == null || previous.getClass() != LongWritable.class) { result = new LongWritable(); } else { result = (LongWritable) previous; } result.set(((LongColumnVector) vector).vector[row]); return result; } else { return null; } }
From source file:org.apache.orc.mapred.TestOrcOutputFormat.java
License:Apache License
/** * Test the case where the top level isn't a struct, but a long. *///from w w w . java 2s .c om @Test public void testLongRoot() throws Exception { conf.set("mapreduce.task.attempt.id", "attempt_20160101_0001_m_000001_0"); conf.setOutputCommitter(NullOutputCommitter.class); conf.set(OrcConf.COMPRESS.getAttribute(), "SNAPPY"); conf.setInt(OrcConf.ROW_INDEX_STRIDE.getAttribute(), 1000); conf.setInt(OrcConf.BUFFER_SIZE.getAttribute(), 64 * 1024); conf.set(OrcConf.WRITE_FORMAT.getAttribute(), "0.11"); final String typeStr = "bigint"; OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr); FileOutputFormat.setOutputPath(conf, workDir); TypeDescription type = TypeDescription.fromString(typeStr); LongWritable value = new LongWritable(); NullWritable nada = NullWritable.get(); RecordWriter<NullWritable, LongWritable> writer = new OrcOutputFormat<LongWritable>().getRecordWriter(fs, conf, "long.orc", Reporter.NULL); for (long lo = 0; lo < 2000; ++lo) { value.set(lo); writer.write(nada, value); } writer.close(Reporter.NULL); Path path = new Path(workDir, "long.orc"); Reader file = OrcFile.createReader(path, OrcFile.readerOptions(conf)); assertEquals(CompressionKind.SNAPPY, file.getCompressionKind()); assertEquals(2000, file.getNumberOfRows()); assertEquals(1000, file.getRowIndexStride()); assertEquals(64 * 1024, file.getCompressionSize()); assertEquals(OrcFile.Version.V_0_11, file.getFileVersion()); FileSplit split = new FileSplit(path, 0, 100000, new String[0]); RecordReader<NullWritable, LongWritable> reader = new OrcInputFormat<LongWritable>().getRecordReader(split, conf, Reporter.NULL); nada = reader.createKey(); value = reader.createValue(); for (long lo = 0; lo < 2000; ++lo) { assertEquals(true, reader.next(nada, value)); assertEquals(lo, value.get()); } assertEquals(false, reader.next(nada, value)); }
From source file:org.apache.sysml.runtime.io.FrameWriterBinaryBlock.java
License:Apache License
/** * Internal primitive to write a block-aligned row range of a frame to a single sequence file, * which is used for both single- and multi-threaded writers (for consistency). * //from w w w. j av a2s .com * @param path file path * @param job job configuration * @param fs file system * @param src frame block * @param blen block length * @param rl lower row * @param ru upper row * @throws DMLRuntimeException if DMLRuntimeException occurs * @throws IOException if IOException occurs */ @SuppressWarnings("deprecation") protected static void writeBinaryBlockFrameToSequenceFile(Path path, JobConf job, FileSystem fs, FrameBlock src, int blen, int rl, int ru) throws DMLRuntimeException, IOException { //1) create sequence file writer SequenceFile.Writer writer = null; writer = new SequenceFile.Writer(fs, job, path, LongWritable.class, FrameBlock.class); try { //2) reblock and write LongWritable index = new LongWritable(); if (src.getNumRows() <= blen) //opt for single block { //directly write single block index.set(1); writer.append(index, src); } else //general case { //initialize blocks for reuse (at most 4 different blocks required) FrameBlock[] blocks = createFrameBlocksForReuse(src.getSchema(), src.getColumnNames(), src.getNumRows()); //create and write subblocks of frame for (int bi = rl; bi < ru; bi += blen) { int len = Math.min(blen, src.getNumRows() - bi); //get reuse frame block and copy subpart to block (incl meta on first) FrameBlock block = getFrameBlockForReuse(blocks); src.sliceOperations(bi, bi + len - 1, 0, src.getNumColumns() - 1, block); if (bi == 0) //first block block.setColumnMetadata(src.getColumnMetadata()); //append block to sequence file index.set(bi + 1); writer.append(index, block); } } } finally { IOUtilFunctions.closeSilently(writer); } }
From source file:org.apache.tez.mapreduce.input.TestMultiMRInput.java
License:Apache License
public static LinkedHashMap<LongWritable, Text> createInputData(FileSystem fs, Path workDir, JobConf job, String filename, long startKey, long numKeys) throws IOException { LinkedHashMap<LongWritable, Text> data = new LinkedHashMap<LongWritable, Text>(); Path file = new Path(workDir, filename); LOG.info("Generating data at path: " + file); // create a file with length entries @SuppressWarnings("deprecation") SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, file, LongWritable.class, Text.class); try {//from w w w. j a va 2 s .c o m Random r = new Random(System.currentTimeMillis()); LongWritable key = new LongWritable(); Text value = new Text(); for (long i = startKey; i < numKeys; i++) { key.set(i); value.set(Integer.toString(r.nextInt(10000))); data.put(new LongWritable(key.get()), new Text(value.toString())); writer.append(key, value); LOG.info("<k, v> : <" + key.get() + ", " + value + ">"); } } finally { writer.close(); } return data; }
From source file:org.apache.tez.mapreduce.processor.MapUtils.java
License:Apache License
private static InputSplit createInputSplit(FileSystem fs, Path workDir, JobConf job, Path file) throws IOException { FileInputFormat.setInputPaths(job, workDir); LOG.info("Generating data at path: " + file); // create a file with length entries @SuppressWarnings("deprecation") SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, file, LongWritable.class, Text.class); try {//from w w w .ja v a 2 s .c o m Random r = new Random(System.currentTimeMillis()); LongWritable key = new LongWritable(); Text value = new Text(); for (int i = 10; i > 0; i--) { key.set(r.nextInt(1000)); value.set(Integer.toString(i)); writer.append(key, value); LOG.info("<k, v> : <" + key.get() + ", " + value + ">"); } } finally { writer.close(); } SequenceFileInputFormat<LongWritable, Text> format = new SequenceFileInputFormat<LongWritable, Text>(); InputSplit[] splits = format.getSplits(job, 1); System.err.println("#split = " + splits.length + " ; " + "#locs = " + splits[0].getLocations().length + "; " + "loc = " + splits[0].getLocations()[0] + "; " + "off = " + splits[0].getLength() + "; " + "file = " + ((FileSplit) splits[0]).getPath()); return splits[0]; }
From source file:org.apache.tez.runtime.library.common.writers.TestUnorderedPartitionedKVWriter.java
License:Apache License
private void baseTest(int numRecords, int numPartitions, Set<Integer> skippedPartitions, boolean shouldCompress) throws IOException, InterruptedException { PartitionerForTest partitioner = new PartitionerForTest(); ApplicationId appId = ApplicationId.newInstance(10000, 1); TezCounters counters = new TezCounters(); String uniqueId = UUID.randomUUID().toString(); OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId); Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class, shouldCompress, -1);//from w w w.j ava2 s. c o m CompressionCodec codec = null; if (shouldCompress) { codec = new DefaultCodec(); ((Configurable) codec).setConf(conf); } int numOutputs = numPartitions; long availableMemory = 2048; int numRecordsWritten = 0; Map<Integer, Multimap<Integer, Long>> expectedValues = new HashMap<Integer, Multimap<Integer, Long>>(); for (int i = 0; i < numOutputs; i++) { expectedValues.put(i, LinkedListMultimap.<Integer, Long>create()); } UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, availableMemory); int sizePerBuffer = kvWriter.sizePerBuffer; int sizePerRecord = 4 + 8; // IntW + LongW int sizePerRecordWithOverhead = sizePerRecord + 12; // Record + META_OVERHEAD IntWritable intWritable = new IntWritable(); LongWritable longWritable = new LongWritable(); for (int i = 0; i < numRecords; i++) { intWritable.set(i); longWritable.set(i); int partition = partitioner.getPartition(intWritable, longWritable, numOutputs); if (skippedPartitions != null && skippedPartitions.contains(partition)) { continue; } expectedValues.get(partition).put(intWritable.get(), longWritable.get()); kvWriter.write(intWritable, longWritable); numRecordsWritten++; } List<Event> events = kvWriter.close(); int recordsPerBuffer = sizePerBuffer / sizePerRecordWithOverhead; int numExpectedSpills = numRecordsWritten / recordsPerBuffer; verify(outputContext, never()).fatalError(any(Throwable.class), any(String.class)); // Verify the status of the buffers if (numExpectedSpills == 0) { assertEquals(1, kvWriter.numInitializedBuffers); } else { assertTrue(kvWriter.numInitializedBuffers > 1); } assertNull(kvWriter.currentBuffer); assertEquals(0, kvWriter.availableBuffers.size()); // Verify the counters TezCounter outputRecordBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES); TezCounter outputRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_RECORDS); TezCounter outputBytesWithOverheadCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD); TezCounter fileOutputBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL); TezCounter spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS); TezCounter additionalSpillBytesWritternCounter = counters .findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN); TezCounter additionalSpillBytesReadCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ); TezCounter numAdditionalSpillsCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT); assertEquals(numRecordsWritten * sizePerRecord, outputRecordBytesCounter.getValue()); assertEquals(numRecordsWritten, outputRecordsCounter.getValue()); assertEquals(numRecordsWritten * sizePerRecordWithOverhead, outputBytesWithOverheadCounter.getValue()); long fileOutputBytes = fileOutputBytesCounter.getValue(); if (numRecordsWritten > 0) { assertTrue(fileOutputBytes > 0); if (!shouldCompress) { assertTrue(fileOutputBytes > outputRecordBytesCounter.getValue()); } } else { assertEquals(0, fileOutputBytes); } assertEquals(recordsPerBuffer * numExpectedSpills, spilledRecordsCounter.getValue()); long additionalSpillBytesWritten = additionalSpillBytesWritternCounter.getValue(); long additionalSpillBytesRead = additionalSpillBytesReadCounter.getValue(); if (numExpectedSpills == 0) { assertEquals(0, additionalSpillBytesWritten); assertEquals(0, additionalSpillBytesRead); } else { assertTrue(additionalSpillBytesWritten > 0); assertTrue(additionalSpillBytesRead > 0); if (!shouldCompress) { assertTrue(additionalSpillBytesWritten > (recordsPerBuffer * numExpectedSpills * sizePerRecord)); assertTrue(additionalSpillBytesRead > (recordsPerBuffer * numExpectedSpills * sizePerRecord)); } } assertTrue(additionalSpillBytesWritten == additionalSpillBytesRead); assertEquals(numExpectedSpills, numAdditionalSpillsCounter.getValue()); BitSet emptyPartitionBits = null; // Verify the event returned assertEquals(1, events.size()); assertTrue(events.get(0) instanceof CompositeDataMovementEvent); CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(0); assertEquals(0, cdme.getSourceIndexStart()); assertEquals(numOutputs, cdme.getCount()); DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto .parseFrom(ByteString.copyFrom(cdme.getUserPayload())); assertFalse(eventProto.hasData()); if (skippedPartitions == null && numRecordsWritten > 0) { assertFalse(eventProto.hasEmptyPartitions()); emptyPartitionBits = new BitSet(numPartitions); } else { assertTrue(eventProto.hasEmptyPartitions()); byte[] emptyPartitions = TezCommonUtils .decompressByteStringToByteArray(eventProto.getEmptyPartitions()); emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions); if (numRecordsWritten == 0) { assertEquals(numPartitions, emptyPartitionBits.cardinality()); } else { for (Integer e : skippedPartitions) { assertTrue(emptyPartitionBits.get(e)); } assertEquals(skippedPartitions.size(), emptyPartitionBits.cardinality()); } } if (emptyPartitionBits.cardinality() != numPartitions) { assertEquals(HOST_STRING, eventProto.getHost()); assertEquals(SHUFFLE_PORT, eventProto.getPort()); assertEquals(uniqueId, eventProto.getPathComponent()); } else { assertFalse(eventProto.hasHost()); assertFalse(eventProto.hasPort()); assertFalse(eventProto.hasPathComponent()); } // Verify the actual data TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId); Path outputFilePath = kvWriter.finalOutPath; Path spillFilePath = kvWriter.finalIndexPath; if (numRecordsWritten > 0) { assertTrue(localFs.exists(outputFilePath)); assertTrue(localFs.exists(spillFilePath)); } else { return; } // Special case for 0 records. TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf); DataInputBuffer keyBuffer = new DataInputBuffer(); DataInputBuffer valBuffer = new DataInputBuffer(); IntWritable keyDeser = new IntWritable(); LongWritable valDeser = new LongWritable(); for (int i = 0; i < numOutputs; i++) { if (skippedPartitions != null && skippedPartitions.contains(i)) { continue; } TezIndexRecord indexRecord = spillRecord.getIndex(i); FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath); inStream.seek(indexRecord.getStartOffset()); IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null, null, false, 0, -1); while (reader.nextRawKey(keyBuffer)) { reader.nextRawValue(valBuffer); keyDeser.readFields(keyBuffer); valDeser.readFields(valBuffer); int partition = partitioner.getPartition(keyDeser, valDeser, numOutputs); assertTrue(expectedValues.get(partition).remove(keyDeser.get(), valDeser.get())); } inStream.close(); } for (int i = 0; i < numOutputs; i++) { assertEquals(0, expectedValues.get(i).size()); expectedValues.remove(i); } assertEquals(0, expectedValues.size()); }