Example usage for org.apache.hadoop.io.compress DefaultCodec DefaultCodec

List of usage examples for org.apache.hadoop.io.compress DefaultCodec DefaultCodec

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress DefaultCodec DefaultCodec.

Prototype

DefaultCodec

Source Link

Usage

From source file:tajo.storage.rcfile.TestRCFile.java

License:Apache License

private void writeTest(FileSystem fs, int count, Path file, byte[][] fieldsData, Configuration conf)
        throws IOException {
    fs.delete(file, true);/*from  w w w  .  ja v a2s.  c om*/

    conf.setInt(RCFile.COLUMN_NUMBER_CONF_STR, fieldsData.length);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, new DefaultCodec());

    BytesRefArrayWritable bytes = new BytesRefArrayWritable(fieldsData.length);
    for (int i = 0; i < fieldsData.length; i++) {
        BytesRefWritable cu = null;
        cu = new BytesRefWritable(fieldsData[i], 0, fieldsData[i].length);
        bytes.set(i, cu);
    }

    for (int i = 0; i < count; i++) {
        writer.append(bytes);
    }
    writer.close();
    long fileLen = fs.getFileStatus(file).getLen();
    System.out.println("The file size of RCFile with " + bytes.size() + " number columns and " + count
            + " number rows is " + fileLen);
}

From source file:us.yuxin.hump.TestRCFile.java

License:Apache License

public void testSimpleReadAndWrite() throws IOException, SerDeException {
    fs.delete(file, true);//from   w w w . j  a v a 2  s  .com

    byte[][] record_1 = { "123".getBytes("UTF-8"), "456".getBytes("UTF-8"), "789".getBytes("UTF-8"),
            "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"), new byte[0],
            "NULL".getBytes("UTF-8") };
    byte[][] record_2 = { "100".getBytes("UTF-8"), "200".getBytes("UTF-8"), "123".getBytes("UTF-8"),
            "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"), new byte[0],
            "NULL".getBytes("UTF-8") };

    RCFileOutputFormat.setColumnNumber(conf, expectedFieldsData.length);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, new DefaultCodec());
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(record_1.length);
    for (int i = 0; i < record_1.length; i++) {
        BytesRefWritable cu = new BytesRefWritable(record_1[i], 0, record_1[i].length);
        bytes.set(i, cu);
    }
    writer.append(bytes);
    bytes.clear();
    for (int i = 0; i < record_2.length; i++) {
        BytesRefWritable cu = new BytesRefWritable(record_2[i], 0, record_2[i].length);
        bytes.set(i, cu);
    }
    writer.append(bytes);
    writer.close();

    Object[] expectedRecord_1 = { new ByteWritable((byte) 123), new ShortWritable((short) 456),
            new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"),
            null, null };

    Object[] expectedRecord_2 = { new ByteWritable((byte) 100), new ShortWritable((short) 200),
            new IntWritable(123), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"),
            null, null };

    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);

    LongWritable rowID = new LongWritable();

    for (int i = 0; i < 2; i++) {
        reader.next(rowID);
        BytesRefArrayWritable cols = new BytesRefArrayWritable();
        reader.getCurrentRow(cols);
        cols.resetValid(8);
        Object row = serDe.deserialize(cols);

        StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
        List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
        assertEquals("Field size should be 8", 8, fieldRefs.size());
        for (int j = 0; j < fieldRefs.size(); j++) {
            Object fieldData = oi.getStructFieldData(row, fieldRefs.get(j));
            Object standardWritableData = ObjectInspectorUtils.copyToStandardObject(fieldData,
                    fieldRefs.get(j).getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE);
            if (i == 0) {
                assertEquals("Field " + i, standardWritableData, expectedRecord_1[j]);
            } else {
                assertEquals("Field " + i, standardWritableData, expectedRecord_2[j]);
            }
        }
    }

    reader.close();
}

From source file:us.yuxin.hump.TestRCFile.java

License:Apache License

private void writeTest(FileSystem fs, int count, Path file, byte[][] fieldsData)
        throws IOException, SerDeException {
    fs.delete(file, true);//from w  w w  .  ja v  a2s .  c  o m

    RCFileOutputFormat.setColumnNumber(conf, fieldsData.length);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, new DefaultCodec());

    BytesRefArrayWritable bytes = new BytesRefArrayWritable(fieldsData.length);
    for (int i = 0; i < fieldsData.length; i++) {
        BytesRefWritable cu = null;
        cu = new BytesRefWritable(fieldsData[i], 0, fieldsData[i].length);
        bytes.set(i, cu);
    }

    for (int i = 0; i < count; i++) {
        writer.append(bytes);
    }
    writer.close();
    long fileLen = fs.getFileStatus(file).getLen();
    System.out.println("The file size of RCFile with " + bytes.size() + " number columns and " + count
            + " number rows is " + fileLen);
}