List of usage examples for org.apache.hadoop.io IntWritable set
public void set(int value)
From source file:org.apache.pig.piggybank.test.storage.TestSequenceFileLoader.java
License:Apache License
@Override public void setUp() throws Exception { pigServer = new PigServer(LOCAL); File tmpFile = File.createTempFile("test", ".txt"); tmpFileName = tmpFile.getAbsolutePath(); System.err.println("fileName: " + tmpFileName); Path path = new Path("file:///" + tmpFileName); JobConf conf = new JobConf(); FileSystem fs = FileSystem.get(path.toUri(), conf); IntWritable key = new IntWritable(); Text value = new Text(); SequenceFile.Writer writer = null; try {// www.ja v a2 s.c om writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass()); for (int i = 0; i < DATA.length; i++) { key.set(i); value.set(DATA[i]); writer.append(key, value); } } finally { IOUtils.closeStream(writer); } }
From source file:org.apache.pig.piggybank.test.storage.TestSequenceFileLoader.java
License:Apache License
@Test public void testReadBytesWritable() throws IOException { File inputFile = File.createTempFile("test", ".txt"); System.err.println("fileName: " + inputFile.getAbsolutePath()); Path path = new Path("file:///" + inputFile.getAbsolutePath()); JobConf conf = new JobConf(); FileSystem fs = FileSystem.get(path.toUri(), conf); IntWritable key = new IntWritable(); SequenceFile.Writer writer = null; try {//w w w. j av a 2 s . c om writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), BytesWritable.class); int numRecords = 3; for (int i = 0; i < numRecords; i++) { key.set(i); String val = "" + Math.pow(10, (numRecords - i)); writer.append(key, new BytesWritable(val.getBytes())); } } finally { IOUtils.closeStream(writer); } Data data = resetData(pigServer); data.set("expected", tuple(0L, new DataByteArray("1000.0")), tuple(1L, new DataByteArray("100.0")), tuple(2L, new DataByteArray("10.0"))); pigServer.registerQuery("A = LOAD '" + Util.encodeEscape(inputFile.getAbsolutePath()) + "' USING org.apache.pig.piggybank.storage.SequenceFileLoader() AS (key:long, val);"); pigServer.registerQuery("STORE A into 'actual' USING mock.Storage();"); assertEquals(data.get("expected"), data.get("actual")); }
From source file:org.apache.sysml.runtime.matrix.sort.ReadWithZeros.java
License:Apache License
public boolean readNextKeyValuePairs(DoubleWritable readKey, IntWritable readValue) throws IOException { boolean ret = true; try {// w w w . j a v a2 s . c o m if (contain0s && justFound0) { readKey.set(keyAfterZero.get()); readValue.set(valueAfterZero.get()); contain0s = false; } else { readKey.readFields(currentStream); readValue.readFields(currentStream); } } catch (EOFException e) { // case in which zero is the maximum value in the matrix. // The zero value from the last entry is not present in the input sorted matrix, but needs to be accounted for. if (contain0s && !justFound0) { justFound0 = true; readKey.set(0); readValue.set((int) numZeros); } else { ret = false; } } if (contain0s && !justFound0 && readKey.get() >= 0) { justFound0 = true; keyAfterZero.set(readKey.get()); valueAfterZero.set(readValue.get()); readKey.set(0); readValue.set((int) numZeros); } return ret; }
From source file:org.apache.tez.engine.common.shuffle.impl.ShuffleScheduler.java
License:Apache License
public synchronized void copyFailed(TezTaskAttemptID mapId, MapHost host, boolean readError) { host.penalize();//from w ww . j a v a 2 s . c o m int failures = 1; if (failureCounts.containsKey(mapId)) { IntWritable x = failureCounts.get(mapId); x.set(x.get() + 1); failures = x.get(); } else { failureCounts.put(mapId, new IntWritable(1)); } String hostname = host.getHostName(); if (hostFailures.containsKey(hostname)) { IntWritable x = hostFailures.get(hostname); x.set(x.get() + 1); } else { hostFailures.put(hostname, new IntWritable(1)); } if (failures >= abortFailureLimit) { try { throw new IOException(failures + " failures downloading " + mapId); } catch (IOException ie) { reporter.reportException(ie); } } checkAndInformJobTracker(failures, mapId, readError); checkReducerHealth(); long delay = (long) (INITIAL_PENALTY * Math.pow(PENALTY_GROWTH_RATE, failures)); penalties.add(new Penalty(host, delay)); failedShuffleCounter.increment(1); }
From source file:org.apache.tez.runtime.library.common.shuffle.impl.ShuffleScheduler.java
License:Apache License
public synchronized void copyFailed(InputAttemptIdentifier srcAttempt, MapHost host, boolean readError) { host.penalize();//w ww. ja va2s . c o m int failures = 1; if (failureCounts.containsKey(srcAttempt)) { IntWritable x = failureCounts.get(srcAttempt); x.set(x.get() + 1); failures = x.get(); } else { failureCounts.put(srcAttempt, new IntWritable(1)); } String hostname = host.getHostName(); if (hostFailures.containsKey(hostname)) { IntWritable x = hostFailures.get(hostname); x.set(x.get() + 1); } else { hostFailures.put(hostname, new IntWritable(1)); } if (failures >= abortFailureLimit) { IOException ioe = new IOException(failures + " failures downloading " + TezRuntimeUtils.getTaskAttemptIdentifier(inputContext.getSourceVertexName(), srcAttempt.getInputIdentifier().getSrcTaskIndex(), srcAttempt.getAttemptNumber())); ioe.fillInStackTrace(); shuffle.reportException(ioe); } checkAndInformJobTracker(failures, srcAttempt, readError); checkReducerHealth(); long delay = (long) (INITIAL_PENALTY * Math.pow(PENALTY_GROWTH_RATE, failures)); penalties.add(new Penalty(host, delay)); failedShuffleCounter.increment(1); }
From source file:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.ShuffleScheduler.java
License:Apache License
public synchronized void copyFailed(InputAttemptIdentifier srcAttempt, MapHost host, boolean readError, boolean connectError) { host.penalize();/*from w w w . jav a 2s. c o m*/ int failures = 1; if (failureCounts.containsKey(srcAttempt)) { IntWritable x = failureCounts.get(srcAttempt); x.set(x.get() + 1); failures = x.get(); } else { failureCounts.put(srcAttempt, new IntWritable(1)); } String hostPort = host.getHostIdentifier(); // TODO TEZ-922 hostFailures isn't really used for anything. Factor it into error // reporting / potential blacklisting of hosts. if (hostFailures.containsKey(hostPort)) { IntWritable x = hostFailures.get(hostPort); x.set(x.get() + 1); } else { hostFailures.put(hostPort, new IntWritable(1)); } if (failures >= abortFailureLimit) { // This task has seen too many fetch failures - report it as failed. The // AM may retry it if max failures has not been reached. // Between the task and the AM - someone needs to determine who is at // fault. If there's enough errors seen on the task, before the AM informs // it about source failure, the task considers itself to have failed and // allows the AM to re-schedule it. IOException ioe = new IOException(failures + " failures downloading " + TezRuntimeUtils.getTaskAttemptIdentifier(inputContext.getSourceVertexName(), srcAttempt.getInputIdentifier().getInputIndex(), srcAttempt.getAttemptNumber())); ioe.fillInStackTrace(); // Shuffle knows how to deal with failures post shutdown via the onFailure hook shuffle.reportException(ioe); } failedShuffleCounter.increment(1); checkAndInformAM(failures, srcAttempt, readError, connectError); checkReducerHealth(); long delay = (long) (INITIAL_PENALTY * Math.pow(PENALTY_GROWTH_RATE, failures)); penalties.add(new Penalty(host, delay)); }
From source file:org.apache.tez.runtime.library.common.writers.TestUnorderedPartitionedKVWriter.java
License:Apache License
private void baseTest(int numRecords, int numPartitions, Set<Integer> skippedPartitions, boolean shouldCompress) throws IOException, InterruptedException { PartitionerForTest partitioner = new PartitionerForTest(); ApplicationId appId = ApplicationId.newInstance(10000, 1); TezCounters counters = new TezCounters(); String uniqueId = UUID.randomUUID().toString(); OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId); Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class, shouldCompress, -1);// ww w. j a va 2 s . c o m CompressionCodec codec = null; if (shouldCompress) { codec = new DefaultCodec(); ((Configurable) codec).setConf(conf); } int numOutputs = numPartitions; long availableMemory = 2048; int numRecordsWritten = 0; Map<Integer, Multimap<Integer, Long>> expectedValues = new HashMap<Integer, Multimap<Integer, Long>>(); for (int i = 0; i < numOutputs; i++) { expectedValues.put(i, LinkedListMultimap.<Integer, Long>create()); } UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, availableMemory); int sizePerBuffer = kvWriter.sizePerBuffer; int sizePerRecord = 4 + 8; // IntW + LongW int sizePerRecordWithOverhead = sizePerRecord + 12; // Record + META_OVERHEAD IntWritable intWritable = new IntWritable(); LongWritable longWritable = new LongWritable(); for (int i = 0; i < numRecords; i++) { intWritable.set(i); longWritable.set(i); int partition = partitioner.getPartition(intWritable, longWritable, numOutputs); if (skippedPartitions != null && skippedPartitions.contains(partition)) { continue; } expectedValues.get(partition).put(intWritable.get(), longWritable.get()); kvWriter.write(intWritable, longWritable); numRecordsWritten++; } List<Event> events = kvWriter.close(); int recordsPerBuffer = sizePerBuffer / sizePerRecordWithOverhead; int numExpectedSpills = numRecordsWritten / recordsPerBuffer; verify(outputContext, never()).fatalError(any(Throwable.class), any(String.class)); // Verify the status of the buffers if (numExpectedSpills == 0) { assertEquals(1, kvWriter.numInitializedBuffers); } else { assertTrue(kvWriter.numInitializedBuffers > 1); } assertNull(kvWriter.currentBuffer); assertEquals(0, kvWriter.availableBuffers.size()); // Verify the counters TezCounter outputRecordBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES); TezCounter outputRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_RECORDS); TezCounter outputBytesWithOverheadCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD); TezCounter fileOutputBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL); TezCounter spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS); TezCounter additionalSpillBytesWritternCounter = counters .findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN); TezCounter additionalSpillBytesReadCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ); TezCounter numAdditionalSpillsCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT); assertEquals(numRecordsWritten * sizePerRecord, outputRecordBytesCounter.getValue()); assertEquals(numRecordsWritten, outputRecordsCounter.getValue()); assertEquals(numRecordsWritten * sizePerRecordWithOverhead, outputBytesWithOverheadCounter.getValue()); long fileOutputBytes = fileOutputBytesCounter.getValue(); if (numRecordsWritten > 0) { assertTrue(fileOutputBytes > 0); if (!shouldCompress) { assertTrue(fileOutputBytes > outputRecordBytesCounter.getValue()); } } else { assertEquals(0, fileOutputBytes); } assertEquals(recordsPerBuffer * numExpectedSpills, spilledRecordsCounter.getValue()); long additionalSpillBytesWritten = additionalSpillBytesWritternCounter.getValue(); long additionalSpillBytesRead = additionalSpillBytesReadCounter.getValue(); if (numExpectedSpills == 0) { assertEquals(0, additionalSpillBytesWritten); assertEquals(0, additionalSpillBytesRead); } else { assertTrue(additionalSpillBytesWritten > 0); assertTrue(additionalSpillBytesRead > 0); if (!shouldCompress) { assertTrue(additionalSpillBytesWritten > (recordsPerBuffer * numExpectedSpills * sizePerRecord)); assertTrue(additionalSpillBytesRead > (recordsPerBuffer * numExpectedSpills * sizePerRecord)); } } assertTrue(additionalSpillBytesWritten == additionalSpillBytesRead); assertEquals(numExpectedSpills, numAdditionalSpillsCounter.getValue()); BitSet emptyPartitionBits = null; // Verify the event returned assertEquals(1, events.size()); assertTrue(events.get(0) instanceof CompositeDataMovementEvent); CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(0); assertEquals(0, cdme.getSourceIndexStart()); assertEquals(numOutputs, cdme.getCount()); DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto .parseFrom(ByteString.copyFrom(cdme.getUserPayload())); assertFalse(eventProto.hasData()); if (skippedPartitions == null && numRecordsWritten > 0) { assertFalse(eventProto.hasEmptyPartitions()); emptyPartitionBits = new BitSet(numPartitions); } else { assertTrue(eventProto.hasEmptyPartitions()); byte[] emptyPartitions = TezCommonUtils .decompressByteStringToByteArray(eventProto.getEmptyPartitions()); emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions); if (numRecordsWritten == 0) { assertEquals(numPartitions, emptyPartitionBits.cardinality()); } else { for (Integer e : skippedPartitions) { assertTrue(emptyPartitionBits.get(e)); } assertEquals(skippedPartitions.size(), emptyPartitionBits.cardinality()); } } if (emptyPartitionBits.cardinality() != numPartitions) { assertEquals(HOST_STRING, eventProto.getHost()); assertEquals(SHUFFLE_PORT, eventProto.getPort()); assertEquals(uniqueId, eventProto.getPathComponent()); } else { assertFalse(eventProto.hasHost()); assertFalse(eventProto.hasPort()); assertFalse(eventProto.hasPathComponent()); } // Verify the actual data TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId); Path outputFilePath = kvWriter.finalOutPath; Path spillFilePath = kvWriter.finalIndexPath; if (numRecordsWritten > 0) { assertTrue(localFs.exists(outputFilePath)); assertTrue(localFs.exists(spillFilePath)); } else { return; } // Special case for 0 records. TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf); DataInputBuffer keyBuffer = new DataInputBuffer(); DataInputBuffer valBuffer = new DataInputBuffer(); IntWritable keyDeser = new IntWritable(); LongWritable valDeser = new LongWritable(); for (int i = 0; i < numOutputs; i++) { if (skippedPartitions != null && skippedPartitions.contains(i)) { continue; } TezIndexRecord indexRecord = spillRecord.getIndex(i); FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath); inStream.seek(indexRecord.getStartOffset()); IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null, null, false, 0, -1); while (reader.nextRawKey(keyBuffer)) { reader.nextRawValue(valBuffer); keyDeser.readFields(keyBuffer); valDeser.readFields(valBuffer); int partition = partitioner.getPartition(keyDeser, valDeser, numOutputs); assertTrue(expectedValues.get(partition).remove(keyDeser.get(), valDeser.get())); } inStream.close(); } for (int i = 0; i < numOutputs; i++) { assertEquals(0, expectedValues.get(i).size()); expectedValues.remove(i); } assertEquals(0, expectedValues.size()); }
From source file:org.apache.tez.runtime.library.testutils.KVDataGen.java
License:Apache License
/** * Generate key value pair//from w w w . j a v a 2s.c o m * * @param sorted whether data should be sorted by key * @param repeatCount number of keys to be repeated * @return */ public static List<KVPair> generateTestData(boolean sorted, int repeatCount) { List<KVPair> data = new LinkedList<KVPair>(); Random rnd = new Random(); KVPair kvp = null; for (int i = 0; i < 5; i++) { String keyStr = (sorted) ? ("key" + i) : (rnd.nextLong() + "key" + i); Text key = new Text(keyStr); IntWritable value = new IntWritable(i + repeatCount); kvp = new KVPair(key, value); data.add(kvp); if ((repeatCount > 0) && (i % 2 == 0)) { // Repeat this key for random number of times int count = rnd.nextInt(5); for (int j = 0; j < count; j++) { repeatCount++; value.set(i + rnd.nextInt()); kvp = new KVPair(key, value); data.add(kvp); } } } //If we need to generated repeated keys, try to add some repeated keys to the end of file also. if (repeatCount > 0 && kvp != null) { data.add(kvp); data.add(kvp); } return data; }
From source file:org.gpfvic.mahout.cf.taste.hadoop.als.ParallelALSFactorizationJob.java
License:Apache License
private void initializeM(Vector averageRatings) throws IOException { Random random = RandomUtils.getRandom(); FileSystem fs = FileSystem.get(pathToM(-1).toUri(), getConf()); try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, getConf(), new Path(pathToM(-1), "part-m-00000"), IntWritable.class, VectorWritable.class)) { IntWritable index = new IntWritable(); VectorWritable featureVector = new VectorWritable(); for (Vector.Element e : averageRatings.nonZeroes()) { Vector row = new DenseVector(numFeatures); row.setQuick(0, e.get());//from w ww . j ava 2s .c om for (int m = 1; m < numFeatures; m++) { row.setQuick(m, random.nextDouble()); } index.set(e.index()); featureVector.set(row); writer.append(index, featureVector); } } }
From source file:org.hadoop.tdg.TestPseudoHadoop.java
License:Apache License
@Test public void sequenceFileIO() throws IOException { IntWritable key = new IntWritable(); Text value = new Text(); //write//from www . j a v a2 s . co m SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(fs, fs.getConf(), p, key.getClass(), value.getClass()); for (int i = 0; i < 100; i++) { key.set(100 - i); value.set(DATA[i % DATA.length]); writer.append(key, value); } } finally { IOUtils.closeStream(writer); } //read SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, p, fs.getConf()); Writable readerKey = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), fs.getConf()); Writable readerValue = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), fs.getConf()); long pos = reader.getPosition(); while (reader.next(readerKey, readerValue)) { String syncSeen = reader.syncSeen() ? "*" : ""; System.out.printf("[%s%s]\t%s\t%s\n", pos, syncSeen, readerKey, readerValue); pos = reader.getPosition(); } } finally { IOUtils.closeStream(writer); } }