List of usage examples for org.apache.hadoop.io.compress DefaultCodec DefaultCodec
DefaultCodec
From source file:tajo.storage.rcfile.TestRCFile.java
License:Apache License
private void writeTest(FileSystem fs, int count, Path file, byte[][] fieldsData, Configuration conf) throws IOException { fs.delete(file, true);/*from w w w . ja v a2s. c om*/ conf.setInt(RCFile.COLUMN_NUMBER_CONF_STR, fieldsData.length); RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, new DefaultCodec()); BytesRefArrayWritable bytes = new BytesRefArrayWritable(fieldsData.length); for (int i = 0; i < fieldsData.length; i++) { BytesRefWritable cu = null; cu = new BytesRefWritable(fieldsData[i], 0, fieldsData[i].length); bytes.set(i, cu); } for (int i = 0; i < count; i++) { writer.append(bytes); } writer.close(); long fileLen = fs.getFileStatus(file).getLen(); System.out.println("The file size of RCFile with " + bytes.size() + " number columns and " + count + " number rows is " + fileLen); }
From source file:us.yuxin.hump.TestRCFile.java
License:Apache License
public void testSimpleReadAndWrite() throws IOException, SerDeException { fs.delete(file, true);//from w w w . j a v a 2 s .com byte[][] record_1 = { "123".getBytes("UTF-8"), "456".getBytes("UTF-8"), "789".getBytes("UTF-8"), "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"), new byte[0], "NULL".getBytes("UTF-8") }; byte[][] record_2 = { "100".getBytes("UTF-8"), "200".getBytes("UTF-8"), "123".getBytes("UTF-8"), "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"), new byte[0], "NULL".getBytes("UTF-8") }; RCFileOutputFormat.setColumnNumber(conf, expectedFieldsData.length); RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, new DefaultCodec()); BytesRefArrayWritable bytes = new BytesRefArrayWritable(record_1.length); for (int i = 0; i < record_1.length; i++) { BytesRefWritable cu = new BytesRefWritable(record_1[i], 0, record_1[i].length); bytes.set(i, cu); } writer.append(bytes); bytes.clear(); for (int i = 0; i < record_2.length; i++) { BytesRefWritable cu = new BytesRefWritable(record_2[i], 0, record_2[i].length); bytes.set(i, cu); } writer.append(bytes); writer.close(); Object[] expectedRecord_1 = { new ByteWritable((byte) 123), new ShortWritable((short) 456), new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"), null, null }; Object[] expectedRecord_2 = { new ByteWritable((byte) 100), new ShortWritable((short) 200), new IntWritable(123), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"), null, null }; RCFile.Reader reader = new RCFile.Reader(fs, file, conf); LongWritable rowID = new LongWritable(); for (int i = 0; i < 2; i++) { reader.next(rowID); BytesRefArrayWritable cols = new BytesRefArrayWritable(); reader.getCurrentRow(cols); cols.resetValid(8); Object row = serDe.deserialize(cols); StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector(); List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs(); assertEquals("Field size should be 8", 8, fieldRefs.size()); for (int j = 0; j < fieldRefs.size(); j++) { Object fieldData = oi.getStructFieldData(row, fieldRefs.get(j)); Object standardWritableData = ObjectInspectorUtils.copyToStandardObject(fieldData, fieldRefs.get(j).getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE); if (i == 0) { assertEquals("Field " + i, standardWritableData, expectedRecord_1[j]); } else { assertEquals("Field " + i, standardWritableData, expectedRecord_2[j]); } } } reader.close(); }
From source file:us.yuxin.hump.TestRCFile.java
License:Apache License
private void writeTest(FileSystem fs, int count, Path file, byte[][] fieldsData) throws IOException, SerDeException { fs.delete(file, true);//from w w w . ja v a2s . c o m RCFileOutputFormat.setColumnNumber(conf, fieldsData.length); RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, new DefaultCodec()); BytesRefArrayWritable bytes = new BytesRefArrayWritable(fieldsData.length); for (int i = 0; i < fieldsData.length; i++) { BytesRefWritable cu = null; cu = new BytesRefWritable(fieldsData[i], 0, fieldsData[i].length); bytes.set(i, cu); } for (int i = 0; i < count; i++) { writer.append(bytes); } writer.close(); long fileLen = fs.getFileStatus(file).getLen(); System.out.println("The file size of RCFile with " + bytes.size() + " number columns and " + count + " number rows is " + fileLen); }