Example usage for org.apache.hadoop.io.compress DefaultCodec DefaultCodec

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress DefaultCodec DefaultCodec.

Prototype

DefaultCodec

Source Link

Usage

From source file:org.apache.pig.piggybank.test.storage.TestHiveColumnarLoader.java

License:Apache License

private static void produceYearMonthDayHourPartitionedData() throws IOException {

    yearMonthDayHourPartitionedDir = new File(
            "testhiveColumnarLoader-yearMonthDayHourDir-" + System.currentTimeMillis());
    yearMonthDayHourPartitionedDir.mkdir();
    yearMonthDayHourPartitionedDir.deleteOnExit();

    int years = 1;
    int months = 2;
    int days = 3;
    int hours = 4;

    yearMonthDayHourcalendar = Calendar.getInstance();

    yearMonthDayHourcalendar.set(Calendar.YEAR, 2010);
    yearMonthDayHourcalendar.set(Calendar.DAY_OF_MONTH, Calendar.MONDAY);
    yearMonthDayHourcalendar.set(Calendar.MONTH, Calendar.JANUARY);

    for (int i = 0; i < years; i++) {

        File file = new File(yearMonthDayHourPartitionedDir,
                "year=" + yearMonthDayHourcalendar.get(Calendar.YEAR));

        file.mkdir();/* w  ww  .j  ava2s . c o m*/
        file.deleteOnExit();

        for (int monthIndex = 0; monthIndex < months; monthIndex++) {

            File monthFile = new File(file, "month=" + yearMonthDayHourcalendar.get(Calendar.MONTH));
            monthFile.mkdir();
            monthFile.deleteOnExit();

            for (int dayIndex = 0; dayIndex < days; dayIndex++) {
                File dayFile = new File(monthFile,
                        "day=" + yearMonthDayHourcalendar.get(Calendar.DAY_OF_MONTH));
                dayFile.mkdir();
                dayFile.deleteOnExit();

                for (int hourIndex = 0; hourIndex < hours; hourIndex++) {
                    File hourFile = new File(dayFile,
                            "hour=" + yearMonthDayHourcalendar.get(Calendar.HOUR_OF_DAY));
                    hourFile.mkdir();
                    hourFile.deleteOnExit();

                    File rcFile = new File(hourFile.getAbsolutePath() + "/attempt-00000");
                    Path hourFilePath = new Path(rcFile.getAbsolutePath());
                    rcFile.deleteOnExit();

                    writeRCFileTest(fs, simpleRowCount, hourFilePath, columnCount, new DefaultCodec(),
                            columnCount);

                    yearMonthDayHourcalendar.add(Calendar.HOUR_OF_DAY, 1);
                }

                yearMonthDayHourcalendar.add(Calendar.DAY_OF_MONTH, 1);
            }
            yearMonthDayHourcalendar.add(Calendar.MONTH, 1);
        }

    }

    endingDate = dateFormat.format(calendar.getTime());
}

From source file:org.apache.pig.piggybank.test.storage.TestHiveColumnarLoader.java

License:Apache License

/**
 * Writes out a simple temporary file with 5 columns and 100 rows.<br/>
 * Data is random numbers./*w  w w  . ja v a 2 s  .c  o m*/
 *
 * @throws SerDeException
 * @throws IOException
 */
private static final void produceSimpleData() throws SerDeException, IOException {
    // produce on single file
    simpleDataFile = File.createTempFile("testhiveColumnarLoader", ".txt");
    simpleDataFile.deleteOnExit();

    Path path = new Path(simpleDataFile.getPath());

    writeRCFileTest(fs, simpleRowCount, path, columnCount, new DefaultCodec(), columnCount);

    // produce a folder of simple data
    simpleDataDir = new File("simpleDataDir" + System.currentTimeMillis());
    simpleDataDir.mkdir();

    for (int i = 0; i < simpleDirFileCount; i++) {

        simpleDataFile = new File(simpleDataDir, "testhiveColumnarLoader-" + i + ".txt");

        Path filePath = new Path(simpleDataFile.getPath());

        writeRCFileTest(fs, simpleRowCount, filePath, columnCount, new DefaultCodec(), columnCount);

    }

}

From source file:org.apache.pulsar.io.hdfs.AbstractHdfsConnector.java

License:Apache License

protected CompressionCodec getCompressionCodec() {
    if (connectorConfig.getCompression() == null) {
        return null;
    }//w  ww .  j av  a 2s  .  co  m

    CompressionCodec codec = getCompressionCodecFactory()
            .getCodecByName(connectorConfig.getCompression().name());

    return (codec != null) ? codec : new DefaultCodec();
}

From source file:org.apache.tajo.storage.rcfile.TestRCFile.java

License:Apache License

@Test
public void testSimpleReadAndWrite() throws IOException {
    fs.delete(file, true);/*from   www.  j  a va  2s.  com*/

    Datum[] r1 = new Datum[7];
    int idx = 0;
    r1[idx++] = DatumFactory.createInt4(123);
    r1[idx++] = DatumFactory.createInt8(456);
    r1[idx++] = DatumFactory.createFloat4(7.89f);
    r1[idx++] = DatumFactory.createFloat8(10.11d);
    r1[idx] = DatumFactory.createText("tajo and hadoop");

    //    byte[][] record_1 = {
    //        "123".getBytes("UTF-8"), "456".getBytes("UTF-8"),
    //        "789".getBytes("UTF-8"), "1000".getBytes("UTF-8"),
    //        "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"),
    //        new byte[0], "NULL".getBytes("UTF-8")};
    System.out.println("Original size: " + r1[4].asByteArray().length);
    byte[][] record_1 = { r1[0].asByteArray(), r1[1].asByteArray(), r1[2].asByteArray(), r1[3].asByteArray(),
            r1[4].asByteArray(), new byte[0], "NULL".getBytes("UTF-8") };

    Datum[] r2 = new Datum[7];
    idx = 0;
    r2[idx++] = DatumFactory.createInt4(100);
    r2[idx++] = DatumFactory.createInt8(200);
    r2[idx++] = DatumFactory.createFloat4(5.3f);
    r2[idx++] = DatumFactory.createFloat8(11.12d);
    r2[idx] = DatumFactory.createText("the second str");

    byte[][] record_2 = { r2[0].asByteArray(), r2[1].asByteArray(), r2[2].asByteArray(), r2[3].asByteArray(),
            r2[4].asByteArray(), new byte[0], "NULL".getBytes("UTF-8") };

    //    byte[][] record_2 = {"100".getBytes("UTF-8"), "200".getBytes("UTF-8"),
    //        "123".getBytes("UTF-8"), "1000".getBytes("UTF-8"),
    //        "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"),
    //        new byte[0], "NULL".getBytes("UTF-8")};

    conf.setInt(RCFile.COLUMN_NUMBER_CONF_STR, 7);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null,
            RCFile.createMetadata(new Text("apple"), new Text("block"), new Text("cat"), new Text("dog")),
            new DefaultCodec());
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(record_1.length);
    for (int i = 0; i < record_1.length; i++) {
        BytesRefWritable cu = new BytesRefWritable(record_1[i], 0, record_1[i].length);
        bytes.set(i, cu);
    }
    writer.append(bytes);
    bytes.clear();
    for (int i = 0; i < record_2.length; i++) {
        BytesRefWritable cu = new BytesRefWritable(record_2[i], 0, record_2[i].length);
        bytes.set(i, cu);
    }
    writer.append(bytes);
    writer.close();

    //    Object[] expectedRecord_1 = {new ByteWritable((byte) 123),
    //        new ShortWritable((short) 456), new IntWritable(789),
    //        new LongWritable(1000), new DoubleWritable(5.3),
    //        new Text("hive and hadoop"), null, null};
    //
    //    Object[] expectedRecord_2 = {new ByteWritable((byte) 100),
    //        new ShortWritable((short) 200), new IntWritable(123),
    //        new LongWritable(1000), new DoubleWritable(5.3),
    //        new Text("hive and hadoop"), null, null};

    ColumnProjectionUtils.setFullyReadColumns(conf);
    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
    assertEquals(new Text("block"), reader.getMetadata().get(new Text("apple")));
    assertEquals(new Text("block"), reader.getMetadataValueOf(new Text("apple")));
    assertEquals(new Text("dog"), reader.getMetadataValueOf(new Text("cat")));
    LongWritable rowID = new LongWritable();

    reader.next(rowID);
    BytesRefArrayWritable cols = new BytesRefArrayWritable();
    reader.getCurrentRow(cols);
    cols.resetValid(7);
    assertEquals(r1[0], new Int4Datum(cols.get(0).getBytesCopy()));
    assertEquals(r1[1], new Int8Datum(cols.get(1).getBytesCopy()));
    assertEquals(r1[2], new Float4Datum(cols.get(2).getBytesCopy()));
    assertEquals(r1[3], new Float8Datum(cols.get(3).getBytesCopy()));
    assertEquals(r1[4], new TextDatum(cols.get(4).getBytesCopy()));

    reader.next(rowID);
    cols = new BytesRefArrayWritable();
    reader.getCurrentRow(cols);
    cols.resetValid(7);
    assertEquals(r2[0], new Int4Datum(cols.get(0).getBytesCopy()));
    assertEquals(r2[1], new Int8Datum(cols.get(1).getBytesCopy()));
    assertEquals(r2[2], new Float4Datum(cols.get(2).getBytesCopy()));
    assertEquals(r2[3], new Float8Datum(cols.get(3).getBytesCopy()));
    assertEquals(r2[4], new TextDatum(cols.get(4).getBytesCopy()));
    /*
      cols.resetValid(8);
      Object row = serDe.deserialize(cols);
            
      StructObjectInspector oi = (StructObjectInspector) serDe
          .getObjectInspector();
      List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
      assertEquals("Field size should be 8", 8, fieldRefs.size());
      for (int j = 0; j < fieldRefs.size(); j++) {
        Object fieldData = oi.getStructFieldData(row, fieldRefs.get(j));
        Object standardWritableData = ObjectInspectorUtils
    .copyToStandardObject(fieldData, fieldRefs.get(j)
        .getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE);
        if (i == 0) {
          assertEquals("Field " + i, standardWritableData, expectedRecord_1[j]);
        } else {
          assertEquals("Field " + i, standardWritableData, expectedRecord_2[j]);
        }
      }
    }*/

    reader.close();
}

From source file:org.apache.tajo.storage.rcfile.TestRCFile.java

License:Apache License

private void writeTest(FileSystem fs, int count, Path file, byte[][] fieldsData, Configuration conf)
        throws IOException {
    fs.delete(file, true);//from ww w.j  av  a 2 s.co  m

    conf.setInt(RCFile.COLUMN_NUMBER_CONF_STR, fieldsData.length);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, new DefaultCodec());

    BytesRefArrayWritable bytes = new BytesRefArrayWritable(fieldsData.length);
    for (int i = 0; i < fieldsData.length; i++) {
        BytesRefWritable cu;
        cu = new BytesRefWritable(fieldsData[i], 0, fieldsData[i].length);
        bytes.set(i, cu);
    }

    for (int i = 0; i < count; i++) {
        writer.append(bytes);
    }
    writer.close();
    long fileLen = fs.getFileStatus(file).getLen();
    System.out.println("The file size of RCFile with " + bytes.size() + " number columns and " + count
            + " number rows is " + fileLen);
}

From source file:org.apache.tez.runtime.library.common.writers.TestUnorderedPartitionedKVWriter.java

License:Apache License

public void textTest(int numRegularRecords, int numPartitions, long availableMemory, int numLargeKeys,
        int numLargevalues, int numLargeKvPairs) throws IOException, InterruptedException {
    Partitioner partitioner = new HashPartitioner();
    ApplicationId appId = ApplicationId.newInstance(10000, 1);
    TezCounters counters = new TezCounters();
    String uniqueId = UUID.randomUUID().toString();
    OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId);
    Random random = new Random();

    Configuration conf = createConfiguration(outputContext, Text.class, Text.class, shouldCompress, -1,
            HashPartitioner.class);
    CompressionCodec codec = null;/*  w ww.j a  v a  2s . com*/
    if (shouldCompress) {
        codec = new DefaultCodec();
        ((Configurable) codec).setConf(conf);
    }

    int numRecordsWritten = 0;

    Map<Integer, Multimap<String, String>> expectedValues = new HashMap<Integer, Multimap<String, String>>();
    for (int i = 0; i < numPartitions; i++) {
        expectedValues.put(i, LinkedListMultimap.<String, String>create());
    }

    UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf,
            numPartitions, availableMemory);

    int sizePerBuffer = kvWriter.sizePerBuffer;

    BitSet partitionsWithData = new BitSet(numPartitions);
    Text keyText = new Text();
    Text valText = new Text();
    for (int i = 0; i < numRegularRecords; i++) {
        String key = createRandomString(Math.abs(random.nextInt(10)));
        String val = createRandomString(Math.abs(random.nextInt(20)));
        keyText.set(key);
        valText.set(val);
        int partition = partitioner.getPartition(keyText, valText, numPartitions);
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(key, val);
        kvWriter.write(keyText, valText);
        numRecordsWritten++;
    }

    // Write Large key records
    for (int i = 0; i < numLargeKeys; i++) {
        String key = createRandomString(sizePerBuffer + Math.abs(random.nextInt(100)));
        String val = createRandomString(Math.abs(random.nextInt(20)));
        keyText.set(key);
        valText.set(val);
        int partition = partitioner.getPartition(keyText, valText, numPartitions);
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(key, val);
        kvWriter.write(keyText, valText);
        numRecordsWritten++;
    }

    // Write Large val records
    for (int i = 0; i < numLargevalues; i++) {
        String key = createRandomString(Math.abs(random.nextInt(10)));
        String val = createRandomString(sizePerBuffer + Math.abs(random.nextInt(100)));
        keyText.set(key);
        valText.set(val);
        int partition = partitioner.getPartition(keyText, valText, numPartitions);
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(key, val);
        kvWriter.write(keyText, valText);
        numRecordsWritten++;
    }

    // Write records where key + val are large (but both can fit in the buffer individually)
    for (int i = 0; i < numLargeKvPairs; i++) {
        String key = createRandomString(sizePerBuffer / 2 + Math.abs(random.nextInt(100)));
        String val = createRandomString(sizePerBuffer / 2 + Math.abs(random.nextInt(100)));
        keyText.set(key);
        valText.set(val);
        int partition = partitioner.getPartition(keyText, valText, numPartitions);
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(key, val);
        kvWriter.write(keyText, valText);
        numRecordsWritten++;
    }

    List<Event> events = kvWriter.close();
    verify(outputContext, never()).fatalError(any(Throwable.class), any(String.class));

    TezCounter outputLargeRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_LARGE_RECORDS);
    assertEquals(numLargeKeys + numLargevalues + numLargeKvPairs, outputLargeRecordsCounter.getValue());

    // Validate the event
    assertEquals(1, events.size());
    assertTrue(events.get(0) instanceof CompositeDataMovementEvent);
    CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(0);
    assertEquals(0, cdme.getSourceIndexStart());
    assertEquals(numPartitions, cdme.getCount());
    DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto
            .parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
    assertFalse(eventProto.hasData());
    BitSet emptyPartitionBits = null;
    if (partitionsWithData.cardinality() != numPartitions) {
        assertTrue(eventProto.hasEmptyPartitions());
        byte[] emptyPartitions = TezCommonUtils
                .decompressByteStringToByteArray(eventProto.getEmptyPartitions());
        emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions);
        assertEquals(numPartitions - partitionsWithData.cardinality(), emptyPartitionBits.cardinality());
    } else {
        assertFalse(eventProto.hasEmptyPartitions());
        emptyPartitionBits = new BitSet(numPartitions);
    }
    assertEquals(HOST_STRING, eventProto.getHost());
    assertEquals(SHUFFLE_PORT, eventProto.getPort());
    assertEquals(uniqueId, eventProto.getPathComponent());

    // Verify the data
    // Verify the actual data
    TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId);
    Path outputFilePath = kvWriter.finalOutPath;
    Path spillFilePath = kvWriter.finalIndexPath;
    if (numRecordsWritten > 0) {
        assertTrue(localFs.exists(outputFilePath));
        assertTrue(localFs.exists(spillFilePath));
    } else {
        return;
    }

    // Special case for 0 records.
    TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf);
    DataInputBuffer keyBuffer = new DataInputBuffer();
    DataInputBuffer valBuffer = new DataInputBuffer();
    Text keyDeser = new Text();
    Text valDeser = new Text();
    for (int i = 0; i < numPartitions; i++) {
        if (emptyPartitionBits.get(i)) {
            continue;
        }
        TezIndexRecord indexRecord = spillRecord.getIndex(i);
        FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath);
        inStream.seek(indexRecord.getStartOffset());
        IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null, null, false,
                0, -1);
        while (reader.nextRawKey(keyBuffer)) {
            reader.nextRawValue(valBuffer);
            keyDeser.readFields(keyBuffer);
            valDeser.readFields(valBuffer);
            int partition = partitioner.getPartition(keyDeser, valDeser, numPartitions);
            assertTrue(expectedValues.get(partition).remove(keyDeser.toString(), valDeser.toString()));
        }
        inStream.close();
    }
    for (int i = 0; i < numPartitions; i++) {
        assertEquals(0, expectedValues.get(i).size());
        expectedValues.remove(i);
    }
    assertEquals(0, expectedValues.size());
}

From source file:org.apache.tez.runtime.library.common.writers.TestUnorderedPartitionedKVWriter.java

License:Apache License

private void baseTest(int numRecords, int numPartitions, Set<Integer> skippedPartitions, boolean shouldCompress)
        throws IOException, InterruptedException {
    PartitionerForTest partitioner = new PartitionerForTest();
    ApplicationId appId = ApplicationId.newInstance(10000, 1);
    TezCounters counters = new TezCounters();
    String uniqueId = UUID.randomUUID().toString();
    OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId);

    Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class,
            shouldCompress, -1);/*www  .j  a v a  2  s .  c  o  m*/
    CompressionCodec codec = null;
    if (shouldCompress) {
        codec = new DefaultCodec();
        ((Configurable) codec).setConf(conf);
    }

    int numOutputs = numPartitions;
    long availableMemory = 2048;
    int numRecordsWritten = 0;

    Map<Integer, Multimap<Integer, Long>> expectedValues = new HashMap<Integer, Multimap<Integer, Long>>();
    for (int i = 0; i < numOutputs; i++) {
        expectedValues.put(i, LinkedListMultimap.<Integer, Long>create());
    }

    UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf,
            numOutputs, availableMemory);

    int sizePerBuffer = kvWriter.sizePerBuffer;
    int sizePerRecord = 4 + 8; // IntW + LongW
    int sizePerRecordWithOverhead = sizePerRecord + 12; // Record + META_OVERHEAD

    IntWritable intWritable = new IntWritable();
    LongWritable longWritable = new LongWritable();
    for (int i = 0; i < numRecords; i++) {
        intWritable.set(i);
        longWritable.set(i);
        int partition = partitioner.getPartition(intWritable, longWritable, numOutputs);
        if (skippedPartitions != null && skippedPartitions.contains(partition)) {
            continue;
        }
        expectedValues.get(partition).put(intWritable.get(), longWritable.get());
        kvWriter.write(intWritable, longWritable);
        numRecordsWritten++;
    }
    List<Event> events = kvWriter.close();

    int recordsPerBuffer = sizePerBuffer / sizePerRecordWithOverhead;
    int numExpectedSpills = numRecordsWritten / recordsPerBuffer;

    verify(outputContext, never()).fatalError(any(Throwable.class), any(String.class));

    // Verify the status of the buffers
    if (numExpectedSpills == 0) {
        assertEquals(1, kvWriter.numInitializedBuffers);
    } else {
        assertTrue(kvWriter.numInitializedBuffers > 1);
    }
    assertNull(kvWriter.currentBuffer);
    assertEquals(0, kvWriter.availableBuffers.size());

    // Verify the counters
    TezCounter outputRecordBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES);
    TezCounter outputRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_RECORDS);
    TezCounter outputBytesWithOverheadCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
    TezCounter fileOutputBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
    TezCounter spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS);
    TezCounter additionalSpillBytesWritternCounter = counters
            .findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
    TezCounter additionalSpillBytesReadCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);
    TezCounter numAdditionalSpillsCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);
    assertEquals(numRecordsWritten * sizePerRecord, outputRecordBytesCounter.getValue());
    assertEquals(numRecordsWritten, outputRecordsCounter.getValue());
    assertEquals(numRecordsWritten * sizePerRecordWithOverhead, outputBytesWithOverheadCounter.getValue());
    long fileOutputBytes = fileOutputBytesCounter.getValue();
    if (numRecordsWritten > 0) {
        assertTrue(fileOutputBytes > 0);
        if (!shouldCompress) {
            assertTrue(fileOutputBytes > outputRecordBytesCounter.getValue());
        }
    } else {
        assertEquals(0, fileOutputBytes);
    }
    assertEquals(recordsPerBuffer * numExpectedSpills, spilledRecordsCounter.getValue());
    long additionalSpillBytesWritten = additionalSpillBytesWritternCounter.getValue();
    long additionalSpillBytesRead = additionalSpillBytesReadCounter.getValue();
    if (numExpectedSpills == 0) {
        assertEquals(0, additionalSpillBytesWritten);
        assertEquals(0, additionalSpillBytesRead);
    } else {
        assertTrue(additionalSpillBytesWritten > 0);
        assertTrue(additionalSpillBytesRead > 0);
        if (!shouldCompress) {
            assertTrue(additionalSpillBytesWritten > (recordsPerBuffer * numExpectedSpills * sizePerRecord));
            assertTrue(additionalSpillBytesRead > (recordsPerBuffer * numExpectedSpills * sizePerRecord));
        }
    }
    assertTrue(additionalSpillBytesWritten == additionalSpillBytesRead);
    assertEquals(numExpectedSpills, numAdditionalSpillsCounter.getValue());

    BitSet emptyPartitionBits = null;
    // Verify the event returned
    assertEquals(1, events.size());
    assertTrue(events.get(0) instanceof CompositeDataMovementEvent);
    CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(0);
    assertEquals(0, cdme.getSourceIndexStart());
    assertEquals(numOutputs, cdme.getCount());
    DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto
            .parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
    assertFalse(eventProto.hasData());
    if (skippedPartitions == null && numRecordsWritten > 0) {
        assertFalse(eventProto.hasEmptyPartitions());
        emptyPartitionBits = new BitSet(numPartitions);
    } else {
        assertTrue(eventProto.hasEmptyPartitions());
        byte[] emptyPartitions = TezCommonUtils
                .decompressByteStringToByteArray(eventProto.getEmptyPartitions());
        emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions);
        if (numRecordsWritten == 0) {
            assertEquals(numPartitions, emptyPartitionBits.cardinality());
        } else {
            for (Integer e : skippedPartitions) {
                assertTrue(emptyPartitionBits.get(e));
            }
            assertEquals(skippedPartitions.size(), emptyPartitionBits.cardinality());
        }
    }
    if (emptyPartitionBits.cardinality() != numPartitions) {
        assertEquals(HOST_STRING, eventProto.getHost());
        assertEquals(SHUFFLE_PORT, eventProto.getPort());
        assertEquals(uniqueId, eventProto.getPathComponent());
    } else {
        assertFalse(eventProto.hasHost());
        assertFalse(eventProto.hasPort());
        assertFalse(eventProto.hasPathComponent());
    }

    // Verify the actual data
    TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId);
    Path outputFilePath = kvWriter.finalOutPath;
    Path spillFilePath = kvWriter.finalIndexPath;

    if (numRecordsWritten > 0) {
        assertTrue(localFs.exists(outputFilePath));
        assertTrue(localFs.exists(spillFilePath));
    } else {
        return;
    }

    // Special case for 0 records.
    TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf);
    DataInputBuffer keyBuffer = new DataInputBuffer();
    DataInputBuffer valBuffer = new DataInputBuffer();
    IntWritable keyDeser = new IntWritable();
    LongWritable valDeser = new LongWritable();
    for (int i = 0; i < numOutputs; i++) {
        if (skippedPartitions != null && skippedPartitions.contains(i)) {
            continue;
        }
        TezIndexRecord indexRecord = spillRecord.getIndex(i);
        FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath);
        inStream.seek(indexRecord.getStartOffset());
        IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null, null, false,
                0, -1);
        while (reader.nextRawKey(keyBuffer)) {
            reader.nextRawValue(valBuffer);
            keyDeser.readFields(keyBuffer);
            valDeser.readFields(valBuffer);
            int partition = partitioner.getPartition(keyDeser, valDeser, numOutputs);
            assertTrue(expectedValues.get(partition).remove(keyDeser.get(), valDeser.get()));
        }
        inStream.close();
    }
    for (int i = 0; i < numOutputs; i++) {
        assertEquals(0, expectedValues.get(i).size());
        expectedValues.remove(i);
    }
    assertEquals(0, expectedValues.size());
}

From source file:org.springframework.data.hadoop.fs.HdfsResourceLoaderLegacyTest.java

License:Apache License

@Test
public void testDecompressedStream() throws Exception {
    DefaultCodec codec = new DefaultCodec();
    codec.setConf(fs.getConf());// w  w  w.  ja  v a2  s  .  co m
    String name = "local/" + UUID.randomUUID() + codec.getDefaultExtension();
    OutputStream outputStream = codec.createOutputStream(fs.create(new Path(name)));
    byte[] content = name.getBytes();
    outputStream.write(content);
    outputStream.close();

    Resource resource = loader.getResource(name);
    assertNotNull(resource);
    InputStream inputStream = resource.getInputStream();
    assertEquals(DecompressorStream.class, inputStream.getClass());
    assertTrue(TestUtils.compareStreams(new ByteArrayInputStream(content), inputStream));
}

From source file:org.springframework.data.hadoop.fs.HdfsResourceLoaderLegacyTest.java

License:Apache License

@Test
public void testCompressedStream() throws Exception {

    DefaultCodec codec = new DefaultCodec();
    codec.setConf(fs.getConf());/*from ww w. j av  a2  s  . c  om*/
    String name = "local/" + UUID.randomUUID() + codec.getDefaultExtension();
    OutputStream outputStream = codec.createOutputStream(fs.create(new Path(name)));
    byte[] content = name.getBytes();
    outputStream.write(content);
    outputStream.close();

    loader.setUseCodecs(false);

    try {
        Resource resource = loader.getResource(name);
        assertNotNull(resource);
        InputStream inputStream = resource.getInputStream();
        System.out.println(inputStream.getClass());
        assertFalse(DecompressorStream.class.equals(inputStream.getClass()));
        assertFalse(TestUtils.compareStreams(new ByteArrayInputStream(content), inputStream));
    } finally {
        loader.setUseCodecs(true);
    }
}

From source file:tajo.storage.rcfile.TestRCFile.java

License:Apache License

@Test
public void testSimpleReadAndWrite() throws IOException {
    fs.delete(file, true);/* ww w.  j  av  a 2s . co  m*/

    Datum[] r1 = new Datum[7];
    int idx = 0;
    r1[idx++] = DatumFactory.createInt(123);
    r1[idx++] = DatumFactory.createLong(456);
    r1[idx++] = DatumFactory.createFloat(7.89f);
    r1[idx++] = DatumFactory.createDouble(10.11d);
    r1[idx++] = DatumFactory.createString("tajo and hadoop");

    //    byte[][] record_1 = {
    //        "123".getBytes("UTF-8"), "456".getBytes("UTF-8"),
    //        "789".getBytes("UTF-8"), "1000".getBytes("UTF-8"),
    //        "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"),
    //        new byte[0], "NULL".getBytes("UTF-8")};
    System.out.println("Original size: " + r1[4].asByteArray().length);
    byte[][] record_1 = { r1[0].asByteArray(), r1[1].asByteArray(), r1[2].asByteArray(), r1[3].asByteArray(),
            r1[4].asByteArray(), new byte[0], "NULL".getBytes("UTF-8") };

    Datum[] r2 = new Datum[7];
    idx = 0;
    r2[idx++] = DatumFactory.createInt(100);
    r2[idx++] = DatumFactory.createLong(200);
    r2[idx++] = DatumFactory.createFloat(5.3f);
    r2[idx++] = DatumFactory.createDouble(11.12d);
    r2[idx++] = DatumFactory.createString("the second str");

    byte[][] record_2 = { r2[0].asByteArray(), r2[1].asByteArray(), r2[2].asByteArray(), r2[3].asByteArray(),
            r2[4].asByteArray(), new byte[0], "NULL".getBytes("UTF-8") };

    //    byte[][] record_2 = {"100".getBytes("UTF-8"), "200".getBytes("UTF-8"),
    //        "123".getBytes("UTF-8"), "1000".getBytes("UTF-8"),
    //        "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"),
    //        new byte[0], "NULL".getBytes("UTF-8")};

    conf.setInt(RCFile.COLUMN_NUMBER_CONF_STR, 7);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null,
            RCFile.createMetadata(new Text("apple"), new Text("block"), new Text("cat"), new Text("dog")),
            new DefaultCodec());
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(record_1.length);
    for (int i = 0; i < record_1.length; i++) {
        BytesRefWritable cu = new BytesRefWritable(record_1[i], 0, record_1[i].length);
        bytes.set(i, cu);
    }
    writer.append(bytes);
    bytes.clear();
    for (int i = 0; i < record_2.length; i++) {
        BytesRefWritable cu = new BytesRefWritable(record_2[i], 0, record_2[i].length);
        bytes.set(i, cu);
    }
    writer.append(bytes);
    writer.close();

    //    Object[] expectedRecord_1 = {new ByteWritable((byte) 123),
    //        new ShortWritable((short) 456), new IntWritable(789),
    //        new LongWritable(1000), new DoubleWritable(5.3),
    //        new Text("hive and hadoop"), null, null};
    //
    //    Object[] expectedRecord_2 = {new ByteWritable((byte) 100),
    //        new ShortWritable((short) 200), new IntWritable(123),
    //        new LongWritable(1000), new DoubleWritable(5.3),
    //        new Text("hive and hadoop"), null, null};

    ColumnProjectionUtils.setFullyReadColumns(conf);
    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
    assertEquals(new Text("block"), reader.getMetadata().get(new Text("apple")));
    assertEquals(new Text("block"), reader.getMetadataValueOf(new Text("apple")));
    assertEquals(new Text("dog"), reader.getMetadataValueOf(new Text("cat")));
    LongWritable rowID = new LongWritable();

    reader.next(rowID);
    BytesRefArrayWritable cols = new BytesRefArrayWritable();
    reader.getCurrentRow(cols);
    cols.resetValid(7);
    assertEquals(r1[0], new IntDatum(cols.get(0).getBytesCopy()));
    assertEquals(r1[1], new LongDatum(cols.get(1).getBytesCopy()));
    assertEquals(r1[2], new FloatDatum(cols.get(2).getBytesCopy()));
    assertEquals(r1[3], new DoubleDatum(cols.get(3).getBytesCopy()));
    assertEquals(r1[4], new StringDatum(cols.get(4).getBytesCopy()));

    reader.next(rowID);
    cols = new BytesRefArrayWritable();
    reader.getCurrentRow(cols);
    cols.resetValid(7);
    assertEquals(r2[0], new IntDatum(cols.get(0).getBytesCopy()));
    assertEquals(r2[1], new LongDatum(cols.get(1).getBytesCopy()));
    assertEquals(r2[2], new FloatDatum(cols.get(2).getBytesCopy()));
    assertEquals(r2[3], new DoubleDatum(cols.get(3).getBytesCopy()));
    assertEquals(r2[4], new StringDatum(cols.get(4).getBytesCopy()));
    /*
      cols.resetValid(8);
      Object row = serDe.deserialize(cols);
            
      StructObjectInspector oi = (StructObjectInspector) serDe
          .getObjectInspector();
      List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
      assertEquals("Field size should be 8", 8, fieldRefs.size());
      for (int j = 0; j < fieldRefs.size(); j++) {
        Object fieldData = oi.getStructFieldData(row, fieldRefs.get(j));
        Object standardWritableData = ObjectInspectorUtils
    .copyToStandardObject(fieldData, fieldRefs.get(j)
        .getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE);
        if (i == 0) {
          assertEquals("Field " + i, standardWritableData, expectedRecord_1[j]);
        } else {
          assertEquals("Field " + i, standardWritableData, expectedRecord_2[j]);
        }
      }
    }*/

    reader.close();
}