Example usage for org.apache.hadoop.io IntWritable set

List of usage examples for org.apache.hadoop.io IntWritable set

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable set.

Prototype

public void set(int value) 

Source Link

Document

Set the value of this IntWritable.

Usage

From source file:org.apache.pig.piggybank.test.storage.TestSequenceFileLoader.java

License:Apache License

@Override
public void setUp() throws Exception {
    pigServer = new PigServer(LOCAL);
    File tmpFile = File.createTempFile("test", ".txt");
    tmpFileName = tmpFile.getAbsolutePath();
    System.err.println("fileName: " + tmpFileName);
    Path path = new Path("file:///" + tmpFileName);
    JobConf conf = new JobConf();
    FileSystem fs = FileSystem.get(path.toUri(), conf);

    IntWritable key = new IntWritable();
    Text value = new Text();
    SequenceFile.Writer writer = null;
    try {//  www.ja v a2  s.c  om
        writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass());
        for (int i = 0; i < DATA.length; i++) {
            key.set(i);
            value.set(DATA[i]);
            writer.append(key, value);
        }
    } finally {
        IOUtils.closeStream(writer);
    }
}

From source file:org.apache.pig.piggybank.test.storage.TestSequenceFileLoader.java

License:Apache License

@Test
public void testReadBytesWritable() throws IOException {
    File inputFile = File.createTempFile("test", ".txt");
    System.err.println("fileName: " + inputFile.getAbsolutePath());
    Path path = new Path("file:///" + inputFile.getAbsolutePath());
    JobConf conf = new JobConf();
    FileSystem fs = FileSystem.get(path.toUri(), conf);

    IntWritable key = new IntWritable();
    SequenceFile.Writer writer = null;
    try {//w w  w.  j av a 2  s .  c om
        writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), BytesWritable.class);
        int numRecords = 3;
        for (int i = 0; i < numRecords; i++) {
            key.set(i);
            String val = "" + Math.pow(10, (numRecords - i));
            writer.append(key, new BytesWritable(val.getBytes()));
        }
    } finally {
        IOUtils.closeStream(writer);
    }

    Data data = resetData(pigServer);
    data.set("expected", tuple(0L, new DataByteArray("1000.0")), tuple(1L, new DataByteArray("100.0")),
            tuple(2L, new DataByteArray("10.0")));

    pigServer.registerQuery("A = LOAD '" + Util.encodeEscape(inputFile.getAbsolutePath())
            + "' USING org.apache.pig.piggybank.storage.SequenceFileLoader() AS (key:long, val);");
    pigServer.registerQuery("STORE A into 'actual' USING mock.Storage();");

    assertEquals(data.get("expected"), data.get("actual"));

}

From source file:org.apache.sysml.runtime.matrix.sort.ReadWithZeros.java

License:Apache License

public boolean readNextKeyValuePairs(DoubleWritable readKey, IntWritable readValue) throws IOException {
    boolean ret = true;

    try {//  w w  w .  j  a v a2 s . c  o m
        if (contain0s && justFound0) {
            readKey.set(keyAfterZero.get());
            readValue.set(valueAfterZero.get());
            contain0s = false;
        } else {
            readKey.readFields(currentStream);
            readValue.readFields(currentStream);
        }
    } catch (EOFException e) {
        // case in which zero is the maximum value in the matrix. 
        // The zero value from the last entry is not present in the input sorted matrix, but needs to be accounted for.
        if (contain0s && !justFound0) {
            justFound0 = true;
            readKey.set(0);
            readValue.set((int) numZeros);
        } else {
            ret = false;
        }
    }

    if (contain0s && !justFound0 && readKey.get() >= 0) {
        justFound0 = true;
        keyAfterZero.set(readKey.get());
        valueAfterZero.set(readValue.get());
        readKey.set(0);
        readValue.set((int) numZeros);
    }

    return ret;
}

From source file:org.apache.tez.engine.common.shuffle.impl.ShuffleScheduler.java

License:Apache License

public synchronized void copyFailed(TezTaskAttemptID mapId, MapHost host, boolean readError) {
    host.penalize();//from w ww . j  a  v a  2 s . c  o  m
    int failures = 1;
    if (failureCounts.containsKey(mapId)) {
        IntWritable x = failureCounts.get(mapId);
        x.set(x.get() + 1);
        failures = x.get();
    } else {
        failureCounts.put(mapId, new IntWritable(1));
    }
    String hostname = host.getHostName();
    if (hostFailures.containsKey(hostname)) {
        IntWritable x = hostFailures.get(hostname);
        x.set(x.get() + 1);
    } else {
        hostFailures.put(hostname, new IntWritable(1));
    }
    if (failures >= abortFailureLimit) {
        try {
            throw new IOException(failures + " failures downloading " + mapId);
        } catch (IOException ie) {
            reporter.reportException(ie);
        }
    }

    checkAndInformJobTracker(failures, mapId, readError);

    checkReducerHealth();

    long delay = (long) (INITIAL_PENALTY * Math.pow(PENALTY_GROWTH_RATE, failures));

    penalties.add(new Penalty(host, delay));

    failedShuffleCounter.increment(1);
}

From source file:org.apache.tez.runtime.library.common.shuffle.impl.ShuffleScheduler.java

License:Apache License

public synchronized void copyFailed(InputAttemptIdentifier srcAttempt, MapHost host, boolean readError) {
    host.penalize();//w ww.  ja va2s  . c o m
    int failures = 1;
    if (failureCounts.containsKey(srcAttempt)) {
        IntWritable x = failureCounts.get(srcAttempt);
        x.set(x.get() + 1);
        failures = x.get();
    } else {
        failureCounts.put(srcAttempt, new IntWritable(1));
    }
    String hostname = host.getHostName();
    if (hostFailures.containsKey(hostname)) {
        IntWritable x = hostFailures.get(hostname);
        x.set(x.get() + 1);
    } else {
        hostFailures.put(hostname, new IntWritable(1));
    }
    if (failures >= abortFailureLimit) {
        IOException ioe = new IOException(failures + " failures downloading "
                + TezRuntimeUtils.getTaskAttemptIdentifier(inputContext.getSourceVertexName(),
                        srcAttempt.getInputIdentifier().getSrcTaskIndex(), srcAttempt.getAttemptNumber()));
        ioe.fillInStackTrace();
        shuffle.reportException(ioe);
    }

    checkAndInformJobTracker(failures, srcAttempt, readError);

    checkReducerHealth();

    long delay = (long) (INITIAL_PENALTY * Math.pow(PENALTY_GROWTH_RATE, failures));

    penalties.add(new Penalty(host, delay));

    failedShuffleCounter.increment(1);
}

From source file:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.ShuffleScheduler.java

License:Apache License

public synchronized void copyFailed(InputAttemptIdentifier srcAttempt, MapHost host, boolean readError,
        boolean connectError) {
    host.penalize();/*from  w w w .  jav a 2s. c o m*/
    int failures = 1;
    if (failureCounts.containsKey(srcAttempt)) {
        IntWritable x = failureCounts.get(srcAttempt);
        x.set(x.get() + 1);
        failures = x.get();
    } else {
        failureCounts.put(srcAttempt, new IntWritable(1));
    }
    String hostPort = host.getHostIdentifier();
    // TODO TEZ-922 hostFailures isn't really used for anything. Factor it into error
    // reporting / potential blacklisting of hosts.
    if (hostFailures.containsKey(hostPort)) {
        IntWritable x = hostFailures.get(hostPort);
        x.set(x.get() + 1);
    } else {
        hostFailures.put(hostPort, new IntWritable(1));
    }
    if (failures >= abortFailureLimit) {
        // This task has seen too many fetch failures - report it as failed. The
        // AM may retry it if max failures has not been reached.

        // Between the task and the AM - someone needs to determine who is at
        // fault. If there's enough errors seen on the task, before the AM informs
        // it about source failure, the task considers itself to have failed and
        // allows the AM to re-schedule it.
        IOException ioe = new IOException(failures + " failures downloading "
                + TezRuntimeUtils.getTaskAttemptIdentifier(inputContext.getSourceVertexName(),
                        srcAttempt.getInputIdentifier().getInputIndex(), srcAttempt.getAttemptNumber()));
        ioe.fillInStackTrace();
        // Shuffle knows how to deal with failures post shutdown via the onFailure hook
        shuffle.reportException(ioe);
    }

    failedShuffleCounter.increment(1);
    checkAndInformAM(failures, srcAttempt, readError, connectError);

    checkReducerHealth();

    long delay = (long) (INITIAL_PENALTY * Math.pow(PENALTY_GROWTH_RATE, failures));

    penalties.add(new Penalty(host, delay));
}

From source file:org.apache.tez.runtime.library.common.writers.TestUnorderedPartitionedKVWriter.java

License:Apache License

private void baseTest(int numRecords, int numPartitions, Set<Integer> skippedPartitions, boolean shouldCompress)
        throws IOException, InterruptedException {
    PartitionerForTest partitioner = new PartitionerForTest();
    ApplicationId appId = ApplicationId.newInstance(10000, 1);
    TezCounters counters = new TezCounters();
    String uniqueId = UUID.randomUUID().toString();
    OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId);

    Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class,
            shouldCompress, -1);//  ww  w.  j  a va 2 s .  c  o  m
    CompressionCodec codec = null;
    if (shouldCompress) {
        codec = new DefaultCodec();
        ((Configurable) codec).setConf(conf);
    }

    int numOutputs = numPartitions;
    long availableMemory = 2048;
    int numRecordsWritten = 0;

    Map<Integer, Multimap<Integer, Long>> expectedValues = new HashMap<Integer, Multimap<Integer, Long>>();
    for (int i = 0; i < numOutputs; i++) {
        expectedValues.put(i, LinkedListMultimap.<Integer, Long>create());
    }

    UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf,
            numOutputs, availableMemory);

    int sizePerBuffer = kvWriter.sizePerBuffer;
    int sizePerRecord = 4 + 8; // IntW + LongW
    int sizePerRecordWithOverhead = sizePerRecord + 12; // Record + META_OVERHEAD

    IntWritable intWritable = new IntWritable();
    LongWritable longWritable = new LongWritable();
    for (int i = 0; i < numRecords; i++) {
        intWritable.set(i);
        longWritable.set(i);
        int partition = partitioner.getPartition(intWritable, longWritable, numOutputs);
        if (skippedPartitions != null && skippedPartitions.contains(partition)) {
            continue;
        }
        expectedValues.get(partition).put(intWritable.get(), longWritable.get());
        kvWriter.write(intWritable, longWritable);
        numRecordsWritten++;
    }
    List<Event> events = kvWriter.close();

    int recordsPerBuffer = sizePerBuffer / sizePerRecordWithOverhead;
    int numExpectedSpills = numRecordsWritten / recordsPerBuffer;

    verify(outputContext, never()).fatalError(any(Throwable.class), any(String.class));

    // Verify the status of the buffers
    if (numExpectedSpills == 0) {
        assertEquals(1, kvWriter.numInitializedBuffers);
    } else {
        assertTrue(kvWriter.numInitializedBuffers > 1);
    }
    assertNull(kvWriter.currentBuffer);
    assertEquals(0, kvWriter.availableBuffers.size());

    // Verify the counters
    TezCounter outputRecordBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES);
    TezCounter outputRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_RECORDS);
    TezCounter outputBytesWithOverheadCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
    TezCounter fileOutputBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
    TezCounter spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS);
    TezCounter additionalSpillBytesWritternCounter = counters
            .findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
    TezCounter additionalSpillBytesReadCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);
    TezCounter numAdditionalSpillsCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);
    assertEquals(numRecordsWritten * sizePerRecord, outputRecordBytesCounter.getValue());
    assertEquals(numRecordsWritten, outputRecordsCounter.getValue());
    assertEquals(numRecordsWritten * sizePerRecordWithOverhead, outputBytesWithOverheadCounter.getValue());
    long fileOutputBytes = fileOutputBytesCounter.getValue();
    if (numRecordsWritten > 0) {
        assertTrue(fileOutputBytes > 0);
        if (!shouldCompress) {
            assertTrue(fileOutputBytes > outputRecordBytesCounter.getValue());
        }
    } else {
        assertEquals(0, fileOutputBytes);
    }
    assertEquals(recordsPerBuffer * numExpectedSpills, spilledRecordsCounter.getValue());
    long additionalSpillBytesWritten = additionalSpillBytesWritternCounter.getValue();
    long additionalSpillBytesRead = additionalSpillBytesReadCounter.getValue();
    if (numExpectedSpills == 0) {
        assertEquals(0, additionalSpillBytesWritten);
        assertEquals(0, additionalSpillBytesRead);
    } else {
        assertTrue(additionalSpillBytesWritten > 0);
        assertTrue(additionalSpillBytesRead > 0);
        if (!shouldCompress) {
            assertTrue(additionalSpillBytesWritten > (recordsPerBuffer * numExpectedSpills * sizePerRecord));
            assertTrue(additionalSpillBytesRead > (recordsPerBuffer * numExpectedSpills * sizePerRecord));
        }
    }
    assertTrue(additionalSpillBytesWritten == additionalSpillBytesRead);
    assertEquals(numExpectedSpills, numAdditionalSpillsCounter.getValue());

    BitSet emptyPartitionBits = null;
    // Verify the event returned
    assertEquals(1, events.size());
    assertTrue(events.get(0) instanceof CompositeDataMovementEvent);
    CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(0);
    assertEquals(0, cdme.getSourceIndexStart());
    assertEquals(numOutputs, cdme.getCount());
    DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto
            .parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
    assertFalse(eventProto.hasData());
    if (skippedPartitions == null && numRecordsWritten > 0) {
        assertFalse(eventProto.hasEmptyPartitions());
        emptyPartitionBits = new BitSet(numPartitions);
    } else {
        assertTrue(eventProto.hasEmptyPartitions());
        byte[] emptyPartitions = TezCommonUtils
                .decompressByteStringToByteArray(eventProto.getEmptyPartitions());
        emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions);
        if (numRecordsWritten == 0) {
            assertEquals(numPartitions, emptyPartitionBits.cardinality());
        } else {
            for (Integer e : skippedPartitions) {
                assertTrue(emptyPartitionBits.get(e));
            }
            assertEquals(skippedPartitions.size(), emptyPartitionBits.cardinality());
        }
    }
    if (emptyPartitionBits.cardinality() != numPartitions) {
        assertEquals(HOST_STRING, eventProto.getHost());
        assertEquals(SHUFFLE_PORT, eventProto.getPort());
        assertEquals(uniqueId, eventProto.getPathComponent());
    } else {
        assertFalse(eventProto.hasHost());
        assertFalse(eventProto.hasPort());
        assertFalse(eventProto.hasPathComponent());
    }

    // Verify the actual data
    TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId);
    Path outputFilePath = kvWriter.finalOutPath;
    Path spillFilePath = kvWriter.finalIndexPath;

    if (numRecordsWritten > 0) {
        assertTrue(localFs.exists(outputFilePath));
        assertTrue(localFs.exists(spillFilePath));
    } else {
        return;
    }

    // Special case for 0 records.
    TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf);
    DataInputBuffer keyBuffer = new DataInputBuffer();
    DataInputBuffer valBuffer = new DataInputBuffer();
    IntWritable keyDeser = new IntWritable();
    LongWritable valDeser = new LongWritable();
    for (int i = 0; i < numOutputs; i++) {
        if (skippedPartitions != null && skippedPartitions.contains(i)) {
            continue;
        }
        TezIndexRecord indexRecord = spillRecord.getIndex(i);
        FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath);
        inStream.seek(indexRecord.getStartOffset());
        IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null, null, false,
                0, -1);
        while (reader.nextRawKey(keyBuffer)) {
            reader.nextRawValue(valBuffer);
            keyDeser.readFields(keyBuffer);
            valDeser.readFields(valBuffer);
            int partition = partitioner.getPartition(keyDeser, valDeser, numOutputs);
            assertTrue(expectedValues.get(partition).remove(keyDeser.get(), valDeser.get()));
        }
        inStream.close();
    }
    for (int i = 0; i < numOutputs; i++) {
        assertEquals(0, expectedValues.get(i).size());
        expectedValues.remove(i);
    }
    assertEquals(0, expectedValues.size());
}

From source file:org.apache.tez.runtime.library.testutils.KVDataGen.java

License:Apache License

/**
 * Generate key value pair//from w  w w  .  j a v a 2s.c  o m
 *
 * @param sorted whether data should be sorted by key
 * @param repeatCount number of keys to be repeated
 * @return
 */
public static List<KVPair> generateTestData(boolean sorted, int repeatCount) {
    List<KVPair> data = new LinkedList<KVPair>();
    Random rnd = new Random();
    KVPair kvp = null;
    for (int i = 0; i < 5; i++) {
        String keyStr = (sorted) ? ("key" + i) : (rnd.nextLong() + "key" + i);
        Text key = new Text(keyStr);
        IntWritable value = new IntWritable(i + repeatCount);
        kvp = new KVPair(key, value);
        data.add(kvp);
        if ((repeatCount > 0) && (i % 2 == 0)) { // Repeat this key for random number of times
            int count = rnd.nextInt(5);
            for (int j = 0; j < count; j++) {
                repeatCount++;
                value.set(i + rnd.nextInt());
                kvp = new KVPair(key, value);
                data.add(kvp);
            }
        }
    }
    //If we need to generated repeated keys, try to add some repeated keys to the end of file also.
    if (repeatCount > 0 && kvp != null) {
        data.add(kvp);
        data.add(kvp);
    }
    return data;
}

From source file:org.gpfvic.mahout.cf.taste.hadoop.als.ParallelALSFactorizationJob.java

License:Apache License

private void initializeM(Vector averageRatings) throws IOException {
    Random random = RandomUtils.getRandom();

    FileSystem fs = FileSystem.get(pathToM(-1).toUri(), getConf());
    try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, getConf(),
            new Path(pathToM(-1), "part-m-00000"), IntWritable.class, VectorWritable.class)) {
        IntWritable index = new IntWritable();
        VectorWritable featureVector = new VectorWritable();

        for (Vector.Element e : averageRatings.nonZeroes()) {
            Vector row = new DenseVector(numFeatures);
            row.setQuick(0, e.get());//from w ww . j ava 2s .c om
            for (int m = 1; m < numFeatures; m++) {
                row.setQuick(m, random.nextDouble());
            }
            index.set(e.index());
            featureVector.set(row);
            writer.append(index, featureVector);
        }
    }
}

From source file:org.hadoop.tdg.TestPseudoHadoop.java

License:Apache License

@Test
public void sequenceFileIO() throws IOException {
    IntWritable key = new IntWritable();
    Text value = new Text();
    //write//from www  . j  a v  a2  s . co  m
    SequenceFile.Writer writer = null;
    try {
        writer = SequenceFile.createWriter(fs, fs.getConf(), p, key.getClass(), value.getClass());
        for (int i = 0; i < 100; i++) {
            key.set(100 - i);
            value.set(DATA[i % DATA.length]);
            writer.append(key, value);
        }
    } finally {
        IOUtils.closeStream(writer);
    }
    //read
    SequenceFile.Reader reader = null;
    try {
        reader = new SequenceFile.Reader(fs, p, fs.getConf());
        Writable readerKey = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), fs.getConf());
        Writable readerValue = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), fs.getConf());
        long pos = reader.getPosition();
        while (reader.next(readerKey, readerValue)) {
            String syncSeen = reader.syncSeen() ? "*" : "";
            System.out.printf("[%s%s]\t%s\t%s\n", pos, syncSeen, readerKey, readerValue);
            pos = reader.getPosition();
        }
    } finally {
        IOUtils.closeStream(writer);
    }
}