Example usage for org.apache.hadoop.io.compress DefaultCodec DefaultCodec

List of usage examples for org.apache.hadoop.io.compress DefaultCodec DefaultCodec

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress DefaultCodec DefaultCodec.

Prototype

DefaultCodec

Source Link

Usage

From source file:org.apache.pig.piggybank.test.storage.TestHiveColumnarLoader.java

License:Apache License

private static void produceYearMonthDayHourPartitionedData() throws IOException {

    yearMonthDayHourPartitionedDir = new File(
            "testhiveColumnarLoader-yearMonthDayHourDir-" + System.currentTimeMillis());
    yearMonthDayHourPartitionedDir.mkdir();
    yearMonthDayHourPartitionedDir.deleteOnExit();

    int years = 1;
    int months = 2;
    int days = 3;
    int hours = 4;

    yearMonthDayHourcalendar = Calendar.getInstance();

    yearMonthDayHourcalendar.set(Calendar.YEAR, 2010);
    yearMonthDayHourcalendar.set(Calendar.DAY_OF_MONTH, Calendar.MONDAY);
    yearMonthDayHourcalendar.set(Calendar.MONTH, Calendar.JANUARY);

    for (int i = 0; i < years; i++) {

        File file = new File(yearMonthDayHourPartitionedDir,
                "year=" + yearMonthDayHourcalendar.get(Calendar.YEAR));

        file.mkdir();/* w  ww  .j  ava2s . c o m*/
        file.deleteOnExit();

        for (int monthIndex = 0; monthIndex < months; monthIndex++) {

            File monthFile = new File(file, "month=" + yearMonthDayHourcalendar.get(Calendar.MONTH));
            monthFile.mkdir();
            monthFile.deleteOnExit();

            for (int dayIndex = 0; dayIndex < days; dayIndex++) {
                File dayFile = new File(monthFile,
                        "day=" + yearMonthDayHourcalendar.get(Calendar.DAY_OF_MONTH));
                dayFile.mkdir();
                dayFile.deleteOnExit();

                for (int hourIndex = 0; hourIndex < hours; hourIndex++) {
                    File hourFile = new File(dayFile,
                            "hour=" + yearMonthDayHourcalendar.get(Calendar.HOUR_OF_DAY));
                    hourFile.mkdir();
                    hourFile.deleteOnExit();

                    File rcFile = new File(hourFile.getAbsolutePath() + "/attempt-00000");
                    Path hourFilePath = new Path(rcFile.getAbsolutePath());
                    rcFile.deleteOnExit();

                    writeRCFileTest(fs, simpleRowCount, hourFilePath, columnCount, new DefaultCodec(),
                            columnCount);

                    yearMonthDayHourcalendar.add(Calendar.HOUR_OF_DAY, 1);
                }

                yearMonthDayHourcalendar.add(Calendar.DAY_OF_MONTH, 1);
            }
            yearMonthDayHourcalendar.add(Calendar.MONTH, 1);
        }

    }

    endingDate = dateFormat.format(calendar.getTime());
}

From source file:org.apache.pig.piggybank.test.storage.TestHiveColumnarLoader.java

License:Apache License

/**
 * Writes out a simple temporary file with 5 columns and 100 rows.<br/>
 * Data is random numbers./*w  w w  . ja v a 2 s  .c  o m*/
 *
 * @throws SerDeException
 * @throws IOException
 */
private static final void produceSimpleData() throws SerDeException, IOException {
    // produce on single file
    simpleDataFile = File.createTempFile("testhiveColumnarLoader", ".txt");
    simpleDataFile.deleteOnExit();

    Path path = new Path(simpleDataFile.getPath());

    writeRCFileTest(fs, simpleRowCount, path, columnCount, new DefaultCodec(), columnCount);

    // produce a folder of simple data
    simpleDataDir = new File("simpleDataDir" + System.currentTimeMillis());
    simpleDataDir.mkdir();

    for (int i = 0; i < simpleDirFileCount; i++) {

        simpleDataFile = new File(simpleDataDir, "testhiveColumnarLoader-" + i + ".txt");

        Path filePath = new Path(simpleDataFile.getPath());

        writeRCFileTest(fs, simpleRowCount, filePath, columnCount, new DefaultCodec(), columnCount);

    }

}

From source file:org.apache.pulsar.io.hdfs.AbstractHdfsConnector.java

License:Apache License

protected CompressionCodec getCompressionCodec() {
    if (connectorConfig.getCompression() == null) {
        return null;
    }//w  ww .  j av  a 2s  .  co  m

    CompressionCodec codec = getCompressionCodecFactory()
            .getCodecByName(connectorConfig.getCompression().name());

    return (codec != null) ? codec : new DefaultCodec();
}

From source file:org.apache.tajo.storage.rcfile.TestRCFile.java

License:Apache License

@Test
public void testSimpleReadAndWrite() throws IOException {
    fs.delete(file, true);/*from   www.  j  a va  2s.  com*/

    Datum[] r1 = new Datum[7];
    int idx = 0;
    r1[idx++] = DatumFactory.createInt4(123);
    r1[idx++] = DatumFactory.createInt8(456);
    r1[idx++] = DatumFactory.createFloat4(7.89f);
    r1[idx++] = DatumFactory.createFloat8(10.11d);
    r1[idx] = DatumFactory.createText("tajo and hadoop");

    //    byte[][] record_1 = {
    //        "123".getBytes("UTF-8"), "456".getBytes("UTF-8"),
    //        "789".getBytes("UTF-8"), "1000".getBytes("UTF-8"),
    //        "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"),
    //        new byte[0], "NULL".getBytes("UTF-8")};
    System.out.println("Original size: " + r1[4].asByteArray().length);
    byte[][] record_1 = { r1[0].asByteArray(), r1[1].asByteArray(), r1[2].asByteArray(), r1[3].asByteArray(),
            r1[4].asByteArray(), new byte[0], "NULL".getBytes("UTF-8") };

    Datum[] r2 = new Datum[7];
    idx = 0;
    r2[idx++] = DatumFactory.createInt4(100);
    r2[idx++] = DatumFactory.createInt8(200);
    r2[idx++] = DatumFactory.createFloat4(5.3f);
    r2[idx++] = DatumFactory.createFloat8(11.12d);
    r2[idx] = DatumFactory.createText("the second str");

    byte[][] record_2 = { r2[0].asByteArray(), r2[1].asByteArray(), r2[2].asByteArray(), r2[3].asByteArray(),
            r2[4].asByteArray(), new byte[0], "NULL".getBytes("UTF-8") };

    //    byte[][] record_2 = {"100".getBytes("UTF-8"), "200".getBytes("UTF-8"),
    //        "123".getBytes("UTF-8"), "1000".getBytes("UTF-8"),
    //        "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"),
    //        new byte[0], "NULL".getBytes("UTF-8")};

    conf.setInt(RCFile.COLUMN_NUMBER_CONF_STR, 7);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null,
            RCFile.createMetadata(new Text("apple"), new Text("block"), new Text("cat"), new Text("dog")),
            new DefaultCodec());
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(record_1.length);
    for (int i = 0; i < record_1.length; i++) {
        BytesRefWritable cu = new BytesRefWritable(record_1[i], 0, record_1[i].length);
        bytes.set(i, cu);
    }
    writer.append(bytes);
    bytes.clear();
    for (int i = 0; i < record_2.length; i++) {
        BytesRefWritable cu = new BytesRefWritable(record_2[i], 0, record_2[i].length);
        bytes.set(i, cu);
    }
    writer.append(bytes);
    writer.close();

    //    Object[] expectedRecord_1 = {new ByteWritable((byte) 123),
    //        new ShortWritable((short) 456), new IntWritable(789),
    //        new LongWritable(1000), new DoubleWritable(5.3),
    //        new Text("hive and hadoop"), null, null};
    //
    //    Object[] expectedRecord_2 = {new ByteWritable((byte) 100),
    //        new ShortWritable((short) 200), new IntWritable(123),
    //        new LongWritable(1000), new DoubleWritable(5.3),
    //        new Text("hive and hadoop"), null, null};

    ColumnProjectionUtils.setFullyReadColumns(conf);
    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
    assertEquals(new Text("block"), reader.getMetadata().get(new Text("apple")));
    assertEquals(new Text("block"), reader.getMetadataValueOf(new Text("apple")));
    assertEquals(new Text("dog"), reader.getMetadataValueOf(new Text("cat")));
    LongWritable rowID = new LongWritable();

    reader.next(rowID);
    BytesRefArrayWritable cols = new BytesRefArrayWritable();
    reader.getCurrentRow(cols);
    cols.resetValid(7);
    assertEquals(r1[0], new Int4Datum(cols.get(0).getBytesCopy()));
    assertEquals(r1[1], new Int8Datum(cols.get(1).getBytesCopy()));
    assertEquals(r1[2], new Float4Datum(cols.get(2).getBytesCopy()));
    assertEquals(r1[3], new Float8Datum(cols.get(3).getBytesCopy()));
    assertEquals(r1[4], new TextDatum(cols.get(4).getBytesCopy()));

    reader.next(rowID);
    cols = new BytesRefArrayWritable();
    reader.getCurrentRow(cols);
    cols.resetValid(7);
    assertEquals(r2[0], new Int4Datum(cols.get(0).getBytesCopy()));
    assertEquals(r2[1], new Int8Datum(cols.get(1).getBytesCopy()));
    assertEquals(r2[2], new Float4Datum(cols.get(2).getBytesCopy()));
    assertEquals(r2[3], new Float8Datum(cols.get(3).getBytesCopy()));
    assertEquals(r2[4], new TextDatum(cols.get(4).getBytesCopy()));
    /*
      cols.resetValid(8);
      Object row = serDe.deserialize(cols);
            
      StructObjectInspector oi = (StructObjectInspector) serDe
          .getObjectInspector();
      List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
      assertEquals("Field size should be 8", 8, fieldRefs.size());
      for (int j = 0; j < fieldRefs.size(); j++) {
        Object fieldData = oi.getStructFieldData(row, fieldRefs.get(j));
        Object standardWritableData = ObjectInspectorUtils
    .copyToStandardObject(fieldData, fieldRefs.get(j)
        .getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE);
        if (i == 0) {
          assertEquals("Field " + i, standardWritableData, expectedRecord_1[j]);
        } else {
          assertEquals("Field " + i, standardWritableData, expectedRecord_2[j]);
        }
      }
    }*/

    reader.close();
}

From source file:org.apache.tajo.storage.rcfile.TestRCFile.java

License:Apache License

private void writeTest(FileSystem fs, int count, Path file, byte[][] fieldsData, Configuration conf)
        throws IOException {
    fs.delete(file, true);//from ww w.j  av  a 2 s.co  m

    conf.setInt(RCFile.COLUMN_NUMBER_CONF_STR, fieldsData.length);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, new DefaultCodec());

    BytesRefArrayWritable bytes = new BytesRefArrayWritable(fieldsData.length);
    for (int i = 0; i < fieldsData.length; i++) {
        BytesRefWritable cu;
        cu = new BytesRefWritable(fieldsData[i], 0, fieldsData[i].length);
        bytes.set(i, cu);
    }

    for (int i = 0; i < count; i++) {
        writer.append(bytes);
    }
    writer.close();
    long fileLen = fs.getFileStatus(file).getLen();
    System.out.println("The file size of RCFile with " + bytes.size() + " number columns and " + count
            + " number rows is " + fileLen);
}

From source file:org.apache.tez.runtime.library.common.writers.TestUnorderedPartitionedKVWriter.java

License:Apache License

public void textTest(int numRegularRecords, int numPartitions, long availableMemory, int numLargeKeys,
        int numLargevalues, int numLargeKvPairs) throws IOException, InterruptedException {
    Partitioner partitioner = new HashPartitioner();
    ApplicationId appId = ApplicationId.newInstance(10000, 1);
    TezCounters counters = new TezCounters();
    String uniqueId = UUID.randomUUID().toString();
    OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId);
    Random random = new Random();

    Configuration conf = createConfiguration(outputContext, Text.class, Text.class, shouldCompress, -1,
            HashPartitioner.class);
    CompressionCodec codec = null;/*  w ww.j a  v a  2s . com*/
    if (shouldCompress) {
        codec = new DefaultCodec();
        ((Configurable) codec).setConf(conf);
    }

    int numRecordsWritten = 0;

    Map<Integer, Multimap<String, String>> expectedValues = new HashMap<Integer, Multimap<String, String>>();
    for (int i = 0; i < numPartitions; i++) {
        expectedValues.put(i, LinkedListMultimap.<String, String>create());
    }

    UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf,
            numPartitions, availableMemory);

    int sizePerBuffer = kvWriter.sizePerBuffer;

    BitSet partitionsWithData = new BitSet(numPartitions);
    Text keyText = new Text();
    Text valText = new Text();
    for (int i = 0; i < numRegularRecords; i++) {
        String key = createRandomString(Math.abs(random.nextInt(10)));
        String val = createRandomString(Math.abs(random.nextInt(20)));
        keyText.set(key);
        valText.set(val);
        int partition = partitioner.getPartition(keyText, valText, numPartitions);
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(key, val);
        kvWriter.write(keyText, valText);
        numRecordsWritten++;
    }

    // Write Large key records
    for (int i = 0; i < numLargeKeys; i++) {
        String key = createRandomString(sizePerBuffer + Math.abs(random.nextInt(100)));
        String val = createRandomString(Math.abs(random.nextInt(20)));
        keyText.set(key);
        valText.set(val);
        int partition = partitioner.getPartition(keyText, valText, numPartitions);
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(key, val);
        kvWriter.write(keyText, valText);
        numRecordsWritten++;
    }

    // Write Large val records
    for (int i = 0; i < numLargevalues; i++) {
        String key = createRandomString(Math.abs(random.nextInt(10)));
        String val = createRandomString(sizePerBuffer + Math.abs(random.nextInt(100)));
        keyText.set(key);
        valText.set(val);
        int partition = partitioner.getPartition(keyText, valText, numPartitions);
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(key, val);
        kvWriter.write(keyText, valText);
        numRecordsWritten++;
    }

    // Write records where key + val are large (but both can fit in the buffer individually)
    for (int i = 0; i < numLargeKvPairs; i++) {
        String key = createRandomString(sizePerBuffer / 2 + Math.abs(random.nextInt(100)));
        String val = createRandomString(sizePerBuffer / 2 + Math.abs(random.nextInt(100)));
        keyText.set(key);
        valText.set(val);
        int partition = partitioner.getPartition(keyText, valText, numPartitions);
        partitionsWithData.set(partition);
        expectedValues.get(partition).put(key, val);
        kvWriter.write(keyText, valText);
        numRecordsWritten++;
    }

    List<Event> events = kvWriter.close();
    verify(outputContext, never()).fatalError(any(Throwable.class), any(String.class));

    TezCounter outputLargeRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_LARGE_RECORDS);
    assertEquals(numLargeKeys + numLargevalues + numLargeKvPairs, outputLargeRecordsCounter.getValue());

    // Validate the event
    assertEquals(1, events.size());
    assertTrue(events.get(0) instanceof CompositeDataMovementEvent);
    CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(0);
    assertEquals(0, cdme.getSourceIndexStart());
    assertEquals(numPartitions, cdme.getCount());
    DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto
            .parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
    assertFalse(eventProto.hasData());
    BitSet emptyPartitionBits = null;
    if (partitionsWithData.cardinality() != numPartitions) {
        assertTrue(eventProto.hasEmptyPartitions());
        byte[] emptyPartitions = TezCommonUtils
                .decompressByteStringToByteArray(eventProto.getEmptyPartitions());
        emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions);
        assertEquals(numPartitions - partitionsWithData.cardinality(), emptyPartitionBits.cardinality());
    } else {
        assertFalse(eventProto.hasEmptyPartitions());
        emptyPartitionBits = new BitSet(numPartitions);
    }
    assertEquals(HOST_STRING, eventProto.getHost());
    assertEquals(SHUFFLE_PORT, eventProto.getPort());
    assertEquals(uniqueId, eventProto.getPathComponent());

    // Verify the data
    // Verify the actual data
    TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId);
    Path outputFilePath = kvWriter.finalOutPath;
    Path spillFilePath = kvWriter.finalIndexPath;
    if (numRecordsWritten > 0) {
        assertTrue(localFs.exists(outputFilePath));
        assertTrue(localFs.exists(spillFilePath));
    } else {
        return;
    }

    // Special case for 0 records.
    TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf);
    DataInputBuffer keyBuffer = new DataInputBuffer();
    DataInputBuffer valBuffer = new DataInputBuffer();
    Text keyDeser = new Text();
    Text valDeser = new Text();
    for (int i = 0; i < numPartitions; i++) {
        if (emptyPartitionBits.get(i)) {
            continue;
        }
        TezIndexRecord indexRecord = spillRecord.getIndex(i);
        FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath);
        inStream.seek(indexRecord.getStartOffset());
        IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null, null, false,
                0, -1);
        while (reader.nextRawKey(keyBuffer)) {
            reader.nextRawValue(valBuffer);
            keyDeser.readFields(keyBuffer);
            valDeser.readFields(valBuffer);
            int partition = partitioner.getPartition(keyDeser, valDeser, numPartitions);
            assertTrue(expectedValues.get(partition).remove(keyDeser.toString(), valDeser.toString()));
        }
        inStream.close();
    }
    for (int i = 0; i < numPartitions; i++) {
        assertEquals(0, expectedValues.get(i).size());
        expectedValues.remove(i);
    }
    assertEquals(0, expectedValues.size());
}

From source file:org.apache.tez.runtime.library.common.writers.TestUnorderedPartitionedKVWriter.java

License:Apache License

private void baseTest(int numRecords, int numPartitions, Set<Integer> skippedPartitions, boolean shouldCompress)
        throws IOException, InterruptedException {
    PartitionerForTest partitioner = new PartitionerForTest();
    ApplicationId appId = ApplicationId.newInstance(10000, 1);
    TezCounters counters = new TezCounters();
    String uniqueId = UUID.randomUUID().toString();
    OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId);

    Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class,
            shouldCompress, -1);/*www  .j  a v a  2  s .  c  o  m*/
    CompressionCodec codec = null;
    if (shouldCompress) {
        codec = new DefaultCodec();
        ((Configurable) codec).setConf(conf);
    }

    int numOutputs = numPartitions;
    long availableMemory = 2048;
    int numRecordsWritten = 0;

    Map<Integer, Multimap<Integer, Long>> expectedValues = new HashMap<Integer, Multimap<Integer, Long>>();
    for (int i = 0; i < numOutputs; i++) {
        expectedValues.put(i, LinkedListMultimap.<Integer, Long>create());
    }

    UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf,
            numOutputs, availableMemory);

    int sizePerBuffer = kvWriter.sizePerBuffer;
    int sizePerRecord = 4 + 8; // IntW + LongW
    int sizePerRecordWithOverhead = sizePerRecord + 12; // Record + META_OVERHEAD

    IntWritable intWritable = new IntWritable();
    LongWritable longWritable = new LongWritable();
    for (int i = 0; i < numRecords; i++) {
        intWritable.set(i);
        longWritable.set(i);
        int partition = partitioner.getPartition(intWritable, longWritable, numOutputs);
        if (skippedPartitions != null && skippedPartitions.contains(partition)) {
            continue;
        }
        expectedValues.get(partition).put(intWritable.get(), longWritable.get());
        kvWriter.write(intWritable, longWritable);
        numRecordsWritten++;
    }
    List<Event> events = kvWriter.close();

    int recordsPerBuffer = sizePerBuffer / sizePerRecordWithOverhead;
    int numExpectedSpills = numRecordsWritten / recordsPerBuffer;

    verify(outputContext, never()).fatalError(any(Throwable.class), any(String.class));

    // Verify the status of the buffers
    if (numExpectedSpills == 0) {
        assertEquals(1, kvWriter.numInitializedBuffers);
    } else {
        assertTrue(kvWriter.numInitializedBuffers > 1);
    }
    assertNull(kvWriter.currentBuffer);
    assertEquals(0, kvWriter.availableBuffers.size());

    // Verify the counters
    TezCounter outputRecordBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES);
    TezCounter outputRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_RECORDS);
    TezCounter outputBytesWithOverheadCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
    TezCounter fileOutputBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
    TezCounter spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS);
    TezCounter additionalSpillBytesWritternCounter = counters
            .findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
    TezCounter additionalSpillBytesReadCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);
    TezCounter numAdditionalSpillsCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);
    assertEquals(numRecordsWritten * sizePerRecord, outputRecordBytesCounter.getValue());
    assertEquals(numRecordsWritten, outputRecordsCounter.getValue());
    assertEquals(numRecordsWritten * sizePerRecordWithOverhead, outputBytesWithOverheadCounter.getValue());
    long fileOutputBytes = fileOutputBytesCounter.getValue();
    if (numRecordsWritten > 0) {
        assertTrue(fileOutputBytes > 0);
        if (!shouldCompress) {
            assertTrue(fileOutputBytes > outputRecordBytesCounter.getValue());
        }
    } else {
        assertEquals(0, fileOutputBytes);
    }
    assertEquals(recordsPerBuffer * numExpectedSpills, spilledRecordsCounter.getValue());
    long additionalSpillBytesWritten = additionalSpillBytesWritternCounter.getValue();
    long additionalSpillBytesRead = additionalSpillBytesReadCounter.getValue();
    if (numExpectedSpills == 0) {
        assertEquals(0, additionalSpillBytesWritten);
        assertEquals(0, additionalSpillBytesRead);
    } else {
        assertTrue(additionalSpillBytesWritten > 0);
        assertTrue(additionalSpillBytesRead > 0);
        if (!shouldCompress) {
            assertTrue(additionalSpillBytesWritten > (recordsPerBuffer * numExpectedSpills * sizePerRecord));
            assertTrue(additionalSpillBytesRead > (recordsPerBuffer * numExpectedSpills * sizePerRecord));
        }
    }
    assertTrue(additionalSpillBytesWritten == additionalSpillBytesRead);
    assertEquals(numExpectedSpills, numAdditionalSpillsCounter.getValue());

    BitSet emptyPartitionBits = null;
    // Verify the event returned
    assertEquals(1, events.size());
    assertTrue(events.get(0) instanceof CompositeDataMovementEvent);
    CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(0);
    assertEquals(0, cdme.getSourceIndexStart());
    assertEquals(numOutputs, cdme.getCount());
    DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto
            .parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
    assertFalse(eventProto.hasData());
    if (skippedPartitions == null && numRecordsWritten > 0) {
        assertFalse(eventProto.hasEmptyPartitions());
        emptyPartitionBits = new BitSet(numPartitions);
    } else {
        assertTrue(eventProto.hasEmptyPartitions());
        byte[] emptyPartitions = TezCommonUtils
                .decompressByteStringToByteArray(eventProto.getEmptyPartitions());
        emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions);
        if (numRecordsWritten == 0) {
            assertEquals(numPartitions, emptyPartitionBits.cardinality());
        } else {
            for (Integer e : skippedPartitions) {
                assertTrue(emptyPartitionBits.get(e));
            }
            assertEquals(skippedPartitions.size(), emptyPartitionBits.cardinality());
        }
    }
    if (emptyPartitionBits.cardinality() != numPartitions) {
        assertEquals(HOST_STRING, eventProto.getHost());
        assertEquals(SHUFFLE_PORT, eventProto.getPort());
        assertEquals(uniqueId, eventProto.getPathComponent());
    } else {
        assertFalse(eventProto.hasHost());
        assertFalse(eventProto.hasPort());
        assertFalse(eventProto.hasPathComponent());
    }

    // Verify the actual data
    TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId);
    Path outputFilePath = kvWriter.finalOutPath;
    Path spillFilePath = kvWriter.finalIndexPath;

    if (numRecordsWritten > 0) {
        assertTrue(localFs.exists(outputFilePath));
        assertTrue(localFs.exists(spillFilePath));
    } else {
        return;
    }

    // Special case for 0 records.
    TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf);
    DataInputBuffer keyBuffer = new DataInputBuffer();
    DataInputBuffer valBuffer = new DataInputBuffer();
    IntWritable keyDeser = new IntWritable();
    LongWritable valDeser = new LongWritable();
    for (int i = 0; i < numOutputs; i++) {
        if (skippedPartitions != null && skippedPartitions.contains(i)) {
            continue;
        }
        TezIndexRecord indexRecord = spillRecord.getIndex(i);
        FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath);
        inStream.seek(indexRecord.getStartOffset());
        IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null, null, false,
                0, -1);
        while (reader.nextRawKey(keyBuffer)) {
            reader.nextRawValue(valBuffer);
            keyDeser.readFields(keyBuffer);
            valDeser.readFields(valBuffer);
            int partition = partitioner.getPartition(keyDeser, valDeser, numOutputs);
            assertTrue(expectedValues.get(partition).remove(keyDeser.get(), valDeser.get()));
        }
        inStream.close();
    }
    for (int i = 0; i < numOutputs; i++) {
        assertEquals(0, expectedValues.get(i).size());
        expectedValues.remove(i);
    }
    assertEquals(0, expectedValues.size());
}

From source file:org.springframework.data.hadoop.fs.HdfsResourceLoaderLegacyTest.java

License:Apache License

@Test
public void testDecompressedStream() throws Exception {
    DefaultCodec codec = new DefaultCodec();
    codec.setConf(fs.getConf());// w  w  w.  ja  v a2  s  .  co m
    String name = "local/" + UUID.randomUUID() + codec.getDefaultExtension();
    OutputStream outputStream = codec.createOutputStream(fs.create(new Path(name)));
    byte[] content = name.getBytes();
    outputStream.write(content);
    outputStream.close();

    Resource resource = loader.getResource(name);
    assertNotNull(resource);
    InputStream inputStream = resource.getInputStream();
    assertEquals(DecompressorStream.class, inputStream.getClass());
    assertTrue(TestUtils.compareStreams(new ByteArrayInputStream(content), inputStream));
}

From source file:org.springframework.data.hadoop.fs.HdfsResourceLoaderLegacyTest.java

License:Apache License

@Test
public void testCompressedStream() throws Exception {

    DefaultCodec codec = new DefaultCodec();
    codec.setConf(fs.getConf());/*from ww w. j av  a2  s  . c  om*/
    String name = "local/" + UUID.randomUUID() + codec.getDefaultExtension();
    OutputStream outputStream = codec.createOutputStream(fs.create(new Path(name)));
    byte[] content = name.getBytes();
    outputStream.write(content);
    outputStream.close();

    loader.setUseCodecs(false);

    try {
        Resource resource = loader.getResource(name);
        assertNotNull(resource);
        InputStream inputStream = resource.getInputStream();
        System.out.println(inputStream.getClass());
        assertFalse(DecompressorStream.class.equals(inputStream.getClass()));
        assertFalse(TestUtils.compareStreams(new ByteArrayInputStream(content), inputStream));
    } finally {
        loader.setUseCodecs(true);
    }
}

From source file:tajo.storage.rcfile.TestRCFile.java

License:Apache License

@Test
public void testSimpleReadAndWrite() throws IOException {
    fs.delete(file, true);/* ww w.  j  av  a 2s . co  m*/

    Datum[] r1 = new Datum[7];
    int idx = 0;
    r1[idx++] = DatumFactory.createInt(123);
    r1[idx++] = DatumFactory.createLong(456);
    r1[idx++] = DatumFactory.createFloat(7.89f);
    r1[idx++] = DatumFactory.createDouble(10.11d);
    r1[idx++] = DatumFactory.createString("tajo and hadoop");

    //    byte[][] record_1 = {
    //        "123".getBytes("UTF-8"), "456".getBytes("UTF-8"),
    //        "789".getBytes("UTF-8"), "1000".getBytes("UTF-8"),
    //        "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"),
    //        new byte[0], "NULL".getBytes("UTF-8")};
    System.out.println("Original size: " + r1[4].asByteArray().length);
    byte[][] record_1 = { r1[0].asByteArray(), r1[1].asByteArray(), r1[2].asByteArray(), r1[3].asByteArray(),
            r1[4].asByteArray(), new byte[0], "NULL".getBytes("UTF-8") };

    Datum[] r2 = new Datum[7];
    idx = 0;
    r2[idx++] = DatumFactory.createInt(100);
    r2[idx++] = DatumFactory.createLong(200);
    r2[idx++] = DatumFactory.createFloat(5.3f);
    r2[idx++] = DatumFactory.createDouble(11.12d);
    r2[idx++] = DatumFactory.createString("the second str");

    byte[][] record_2 = { r2[0].asByteArray(), r2[1].asByteArray(), r2[2].asByteArray(), r2[3].asByteArray(),
            r2[4].asByteArray(), new byte[0], "NULL".getBytes("UTF-8") };

    //    byte[][] record_2 = {"100".getBytes("UTF-8"), "200".getBytes("UTF-8"),
    //        "123".getBytes("UTF-8"), "1000".getBytes("UTF-8"),
    //        "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"),
    //        new byte[0], "NULL".getBytes("UTF-8")};

    conf.setInt(RCFile.COLUMN_NUMBER_CONF_STR, 7);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null,
            RCFile.createMetadata(new Text("apple"), new Text("block"), new Text("cat"), new Text("dog")),
            new DefaultCodec());
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(record_1.length);
    for (int i = 0; i < record_1.length; i++) {
        BytesRefWritable cu = new BytesRefWritable(record_1[i], 0, record_1[i].length);
        bytes.set(i, cu);
    }
    writer.append(bytes);
    bytes.clear();
    for (int i = 0; i < record_2.length; i++) {
        BytesRefWritable cu = new BytesRefWritable(record_2[i], 0, record_2[i].length);
        bytes.set(i, cu);
    }
    writer.append(bytes);
    writer.close();

    //    Object[] expectedRecord_1 = {new ByteWritable((byte) 123),
    //        new ShortWritable((short) 456), new IntWritable(789),
    //        new LongWritable(1000), new DoubleWritable(5.3),
    //        new Text("hive and hadoop"), null, null};
    //
    //    Object[] expectedRecord_2 = {new ByteWritable((byte) 100),
    //        new ShortWritable((short) 200), new IntWritable(123),
    //        new LongWritable(1000), new DoubleWritable(5.3),
    //        new Text("hive and hadoop"), null, null};

    ColumnProjectionUtils.setFullyReadColumns(conf);
    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
    assertEquals(new Text("block"), reader.getMetadata().get(new Text("apple")));
    assertEquals(new Text("block"), reader.getMetadataValueOf(new Text("apple")));
    assertEquals(new Text("dog"), reader.getMetadataValueOf(new Text("cat")));
    LongWritable rowID = new LongWritable();

    reader.next(rowID);
    BytesRefArrayWritable cols = new BytesRefArrayWritable();
    reader.getCurrentRow(cols);
    cols.resetValid(7);
    assertEquals(r1[0], new IntDatum(cols.get(0).getBytesCopy()));
    assertEquals(r1[1], new LongDatum(cols.get(1).getBytesCopy()));
    assertEquals(r1[2], new FloatDatum(cols.get(2).getBytesCopy()));
    assertEquals(r1[3], new DoubleDatum(cols.get(3).getBytesCopy()));
    assertEquals(r1[4], new StringDatum(cols.get(4).getBytesCopy()));

    reader.next(rowID);
    cols = new BytesRefArrayWritable();
    reader.getCurrentRow(cols);
    cols.resetValid(7);
    assertEquals(r2[0], new IntDatum(cols.get(0).getBytesCopy()));
    assertEquals(r2[1], new LongDatum(cols.get(1).getBytesCopy()));
    assertEquals(r2[2], new FloatDatum(cols.get(2).getBytesCopy()));
    assertEquals(r2[3], new DoubleDatum(cols.get(3).getBytesCopy()));
    assertEquals(r2[4], new StringDatum(cols.get(4).getBytesCopy()));
    /*
      cols.resetValid(8);
      Object row = serDe.deserialize(cols);
            
      StructObjectInspector oi = (StructObjectInspector) serDe
          .getObjectInspector();
      List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
      assertEquals("Field size should be 8", 8, fieldRefs.size());
      for (int j = 0; j < fieldRefs.size(); j++) {
        Object fieldData = oi.getStructFieldData(row, fieldRefs.get(j));
        Object standardWritableData = ObjectInspectorUtils
    .copyToStandardObject(fieldData, fieldRefs.get(j)
        .getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE);
        if (i == 0) {
          assertEquals("Field " + i, standardWritableData, expectedRecord_1[j]);
        } else {
          assertEquals("Field " + i, standardWritableData, expectedRecord_2[j]);
        }
      }
    }*/

    reader.close();
}