List of usage examples for org.apache.hadoop.io.compress DefaultCodec DefaultCodec
DefaultCodec
From source file:org.apache.pig.piggybank.test.storage.TestHiveColumnarLoader.java
License:Apache License
private static void produceYearMonthDayHourPartitionedData() throws IOException { yearMonthDayHourPartitionedDir = new File( "testhiveColumnarLoader-yearMonthDayHourDir-" + System.currentTimeMillis()); yearMonthDayHourPartitionedDir.mkdir(); yearMonthDayHourPartitionedDir.deleteOnExit(); int years = 1; int months = 2; int days = 3; int hours = 4; yearMonthDayHourcalendar = Calendar.getInstance(); yearMonthDayHourcalendar.set(Calendar.YEAR, 2010); yearMonthDayHourcalendar.set(Calendar.DAY_OF_MONTH, Calendar.MONDAY); yearMonthDayHourcalendar.set(Calendar.MONTH, Calendar.JANUARY); for (int i = 0; i < years; i++) { File file = new File(yearMonthDayHourPartitionedDir, "year=" + yearMonthDayHourcalendar.get(Calendar.YEAR)); file.mkdir();/* w ww .j ava2s . c o m*/ file.deleteOnExit(); for (int monthIndex = 0; monthIndex < months; monthIndex++) { File monthFile = new File(file, "month=" + yearMonthDayHourcalendar.get(Calendar.MONTH)); monthFile.mkdir(); monthFile.deleteOnExit(); for (int dayIndex = 0; dayIndex < days; dayIndex++) { File dayFile = new File(monthFile, "day=" + yearMonthDayHourcalendar.get(Calendar.DAY_OF_MONTH)); dayFile.mkdir(); dayFile.deleteOnExit(); for (int hourIndex = 0; hourIndex < hours; hourIndex++) { File hourFile = new File(dayFile, "hour=" + yearMonthDayHourcalendar.get(Calendar.HOUR_OF_DAY)); hourFile.mkdir(); hourFile.deleteOnExit(); File rcFile = new File(hourFile.getAbsolutePath() + "/attempt-00000"); Path hourFilePath = new Path(rcFile.getAbsolutePath()); rcFile.deleteOnExit(); writeRCFileTest(fs, simpleRowCount, hourFilePath, columnCount, new DefaultCodec(), columnCount); yearMonthDayHourcalendar.add(Calendar.HOUR_OF_DAY, 1); } yearMonthDayHourcalendar.add(Calendar.DAY_OF_MONTH, 1); } yearMonthDayHourcalendar.add(Calendar.MONTH, 1); } } endingDate = dateFormat.format(calendar.getTime()); }
From source file:org.apache.pig.piggybank.test.storage.TestHiveColumnarLoader.java
License:Apache License
/** * Writes out a simple temporary file with 5 columns and 100 rows.<br/> * Data is random numbers./*w w w . ja v a 2 s .c o m*/ * * @throws SerDeException * @throws IOException */ private static final void produceSimpleData() throws SerDeException, IOException { // produce on single file simpleDataFile = File.createTempFile("testhiveColumnarLoader", ".txt"); simpleDataFile.deleteOnExit(); Path path = new Path(simpleDataFile.getPath()); writeRCFileTest(fs, simpleRowCount, path, columnCount, new DefaultCodec(), columnCount); // produce a folder of simple data simpleDataDir = new File("simpleDataDir" + System.currentTimeMillis()); simpleDataDir.mkdir(); for (int i = 0; i < simpleDirFileCount; i++) { simpleDataFile = new File(simpleDataDir, "testhiveColumnarLoader-" + i + ".txt"); Path filePath = new Path(simpleDataFile.getPath()); writeRCFileTest(fs, simpleRowCount, filePath, columnCount, new DefaultCodec(), columnCount); } }
From source file:org.apache.pulsar.io.hdfs.AbstractHdfsConnector.java
License:Apache License
protected CompressionCodec getCompressionCodec() { if (connectorConfig.getCompression() == null) { return null; }//w ww . j av a 2s . co m CompressionCodec codec = getCompressionCodecFactory() .getCodecByName(connectorConfig.getCompression().name()); return (codec != null) ? codec : new DefaultCodec(); }
From source file:org.apache.tajo.storage.rcfile.TestRCFile.java
License:Apache License
@Test public void testSimpleReadAndWrite() throws IOException { fs.delete(file, true);/*from www. j a va 2s. com*/ Datum[] r1 = new Datum[7]; int idx = 0; r1[idx++] = DatumFactory.createInt4(123); r1[idx++] = DatumFactory.createInt8(456); r1[idx++] = DatumFactory.createFloat4(7.89f); r1[idx++] = DatumFactory.createFloat8(10.11d); r1[idx] = DatumFactory.createText("tajo and hadoop"); // byte[][] record_1 = { // "123".getBytes("UTF-8"), "456".getBytes("UTF-8"), // "789".getBytes("UTF-8"), "1000".getBytes("UTF-8"), // "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"), // new byte[0], "NULL".getBytes("UTF-8")}; System.out.println("Original size: " + r1[4].asByteArray().length); byte[][] record_1 = { r1[0].asByteArray(), r1[1].asByteArray(), r1[2].asByteArray(), r1[3].asByteArray(), r1[4].asByteArray(), new byte[0], "NULL".getBytes("UTF-8") }; Datum[] r2 = new Datum[7]; idx = 0; r2[idx++] = DatumFactory.createInt4(100); r2[idx++] = DatumFactory.createInt8(200); r2[idx++] = DatumFactory.createFloat4(5.3f); r2[idx++] = DatumFactory.createFloat8(11.12d); r2[idx] = DatumFactory.createText("the second str"); byte[][] record_2 = { r2[0].asByteArray(), r2[1].asByteArray(), r2[2].asByteArray(), r2[3].asByteArray(), r2[4].asByteArray(), new byte[0], "NULL".getBytes("UTF-8") }; // byte[][] record_2 = {"100".getBytes("UTF-8"), "200".getBytes("UTF-8"), // "123".getBytes("UTF-8"), "1000".getBytes("UTF-8"), // "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"), // new byte[0], "NULL".getBytes("UTF-8")}; conf.setInt(RCFile.COLUMN_NUMBER_CONF_STR, 7); RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, RCFile.createMetadata(new Text("apple"), new Text("block"), new Text("cat"), new Text("dog")), new DefaultCodec()); BytesRefArrayWritable bytes = new BytesRefArrayWritable(record_1.length); for (int i = 0; i < record_1.length; i++) { BytesRefWritable cu = new BytesRefWritable(record_1[i], 0, record_1[i].length); bytes.set(i, cu); } writer.append(bytes); bytes.clear(); for (int i = 0; i < record_2.length; i++) { BytesRefWritable cu = new BytesRefWritable(record_2[i], 0, record_2[i].length); bytes.set(i, cu); } writer.append(bytes); writer.close(); // Object[] expectedRecord_1 = {new ByteWritable((byte) 123), // new ShortWritable((short) 456), new IntWritable(789), // new LongWritable(1000), new DoubleWritable(5.3), // new Text("hive and hadoop"), null, null}; // // Object[] expectedRecord_2 = {new ByteWritable((byte) 100), // new ShortWritable((short) 200), new IntWritable(123), // new LongWritable(1000), new DoubleWritable(5.3), // new Text("hive and hadoop"), null, null}; ColumnProjectionUtils.setFullyReadColumns(conf); RCFile.Reader reader = new RCFile.Reader(fs, file, conf); assertEquals(new Text("block"), reader.getMetadata().get(new Text("apple"))); assertEquals(new Text("block"), reader.getMetadataValueOf(new Text("apple"))); assertEquals(new Text("dog"), reader.getMetadataValueOf(new Text("cat"))); LongWritable rowID = new LongWritable(); reader.next(rowID); BytesRefArrayWritable cols = new BytesRefArrayWritable(); reader.getCurrentRow(cols); cols.resetValid(7); assertEquals(r1[0], new Int4Datum(cols.get(0).getBytesCopy())); assertEquals(r1[1], new Int8Datum(cols.get(1).getBytesCopy())); assertEquals(r1[2], new Float4Datum(cols.get(2).getBytesCopy())); assertEquals(r1[3], new Float8Datum(cols.get(3).getBytesCopy())); assertEquals(r1[4], new TextDatum(cols.get(4).getBytesCopy())); reader.next(rowID); cols = new BytesRefArrayWritable(); reader.getCurrentRow(cols); cols.resetValid(7); assertEquals(r2[0], new Int4Datum(cols.get(0).getBytesCopy())); assertEquals(r2[1], new Int8Datum(cols.get(1).getBytesCopy())); assertEquals(r2[2], new Float4Datum(cols.get(2).getBytesCopy())); assertEquals(r2[3], new Float8Datum(cols.get(3).getBytesCopy())); assertEquals(r2[4], new TextDatum(cols.get(4).getBytesCopy())); /* cols.resetValid(8); Object row = serDe.deserialize(cols); StructObjectInspector oi = (StructObjectInspector) serDe .getObjectInspector(); List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs(); assertEquals("Field size should be 8", 8, fieldRefs.size()); for (int j = 0; j < fieldRefs.size(); j++) { Object fieldData = oi.getStructFieldData(row, fieldRefs.get(j)); Object standardWritableData = ObjectInspectorUtils .copyToStandardObject(fieldData, fieldRefs.get(j) .getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE); if (i == 0) { assertEquals("Field " + i, standardWritableData, expectedRecord_1[j]); } else { assertEquals("Field " + i, standardWritableData, expectedRecord_2[j]); } } }*/ reader.close(); }
From source file:org.apache.tajo.storage.rcfile.TestRCFile.java
License:Apache License
private void writeTest(FileSystem fs, int count, Path file, byte[][] fieldsData, Configuration conf) throws IOException { fs.delete(file, true);//from ww w.j av a 2 s.co m conf.setInt(RCFile.COLUMN_NUMBER_CONF_STR, fieldsData.length); RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, new DefaultCodec()); BytesRefArrayWritable bytes = new BytesRefArrayWritable(fieldsData.length); for (int i = 0; i < fieldsData.length; i++) { BytesRefWritable cu; cu = new BytesRefWritable(fieldsData[i], 0, fieldsData[i].length); bytes.set(i, cu); } for (int i = 0; i < count; i++) { writer.append(bytes); } writer.close(); long fileLen = fs.getFileStatus(file).getLen(); System.out.println("The file size of RCFile with " + bytes.size() + " number columns and " + count + " number rows is " + fileLen); }
From source file:org.apache.tez.runtime.library.common.writers.TestUnorderedPartitionedKVWriter.java
License:Apache License
public void textTest(int numRegularRecords, int numPartitions, long availableMemory, int numLargeKeys, int numLargevalues, int numLargeKvPairs) throws IOException, InterruptedException { Partitioner partitioner = new HashPartitioner(); ApplicationId appId = ApplicationId.newInstance(10000, 1); TezCounters counters = new TezCounters(); String uniqueId = UUID.randomUUID().toString(); OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId); Random random = new Random(); Configuration conf = createConfiguration(outputContext, Text.class, Text.class, shouldCompress, -1, HashPartitioner.class); CompressionCodec codec = null;/* w ww.j a v a 2s . com*/ if (shouldCompress) { codec = new DefaultCodec(); ((Configurable) codec).setConf(conf); } int numRecordsWritten = 0; Map<Integer, Multimap<String, String>> expectedValues = new HashMap<Integer, Multimap<String, String>>(); for (int i = 0; i < numPartitions; i++) { expectedValues.put(i, LinkedListMultimap.<String, String>create()); } UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numPartitions, availableMemory); int sizePerBuffer = kvWriter.sizePerBuffer; BitSet partitionsWithData = new BitSet(numPartitions); Text keyText = new Text(); Text valText = new Text(); for (int i = 0; i < numRegularRecords; i++) { String key = createRandomString(Math.abs(random.nextInt(10))); String val = createRandomString(Math.abs(random.nextInt(20))); keyText.set(key); valText.set(val); int partition = partitioner.getPartition(keyText, valText, numPartitions); partitionsWithData.set(partition); expectedValues.get(partition).put(key, val); kvWriter.write(keyText, valText); numRecordsWritten++; } // Write Large key records for (int i = 0; i < numLargeKeys; i++) { String key = createRandomString(sizePerBuffer + Math.abs(random.nextInt(100))); String val = createRandomString(Math.abs(random.nextInt(20))); keyText.set(key); valText.set(val); int partition = partitioner.getPartition(keyText, valText, numPartitions); partitionsWithData.set(partition); expectedValues.get(partition).put(key, val); kvWriter.write(keyText, valText); numRecordsWritten++; } // Write Large val records for (int i = 0; i < numLargevalues; i++) { String key = createRandomString(Math.abs(random.nextInt(10))); String val = createRandomString(sizePerBuffer + Math.abs(random.nextInt(100))); keyText.set(key); valText.set(val); int partition = partitioner.getPartition(keyText, valText, numPartitions); partitionsWithData.set(partition); expectedValues.get(partition).put(key, val); kvWriter.write(keyText, valText); numRecordsWritten++; } // Write records where key + val are large (but both can fit in the buffer individually) for (int i = 0; i < numLargeKvPairs; i++) { String key = createRandomString(sizePerBuffer / 2 + Math.abs(random.nextInt(100))); String val = createRandomString(sizePerBuffer / 2 + Math.abs(random.nextInt(100))); keyText.set(key); valText.set(val); int partition = partitioner.getPartition(keyText, valText, numPartitions); partitionsWithData.set(partition); expectedValues.get(partition).put(key, val); kvWriter.write(keyText, valText); numRecordsWritten++; } List<Event> events = kvWriter.close(); verify(outputContext, never()).fatalError(any(Throwable.class), any(String.class)); TezCounter outputLargeRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_LARGE_RECORDS); assertEquals(numLargeKeys + numLargevalues + numLargeKvPairs, outputLargeRecordsCounter.getValue()); // Validate the event assertEquals(1, events.size()); assertTrue(events.get(0) instanceof CompositeDataMovementEvent); CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(0); assertEquals(0, cdme.getSourceIndexStart()); assertEquals(numPartitions, cdme.getCount()); DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto .parseFrom(ByteString.copyFrom(cdme.getUserPayload())); assertFalse(eventProto.hasData()); BitSet emptyPartitionBits = null; if (partitionsWithData.cardinality() != numPartitions) { assertTrue(eventProto.hasEmptyPartitions()); byte[] emptyPartitions = TezCommonUtils .decompressByteStringToByteArray(eventProto.getEmptyPartitions()); emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions); assertEquals(numPartitions - partitionsWithData.cardinality(), emptyPartitionBits.cardinality()); } else { assertFalse(eventProto.hasEmptyPartitions()); emptyPartitionBits = new BitSet(numPartitions); } assertEquals(HOST_STRING, eventProto.getHost()); assertEquals(SHUFFLE_PORT, eventProto.getPort()); assertEquals(uniqueId, eventProto.getPathComponent()); // Verify the data // Verify the actual data TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId); Path outputFilePath = kvWriter.finalOutPath; Path spillFilePath = kvWriter.finalIndexPath; if (numRecordsWritten > 0) { assertTrue(localFs.exists(outputFilePath)); assertTrue(localFs.exists(spillFilePath)); } else { return; } // Special case for 0 records. TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf); DataInputBuffer keyBuffer = new DataInputBuffer(); DataInputBuffer valBuffer = new DataInputBuffer(); Text keyDeser = new Text(); Text valDeser = new Text(); for (int i = 0; i < numPartitions; i++) { if (emptyPartitionBits.get(i)) { continue; } TezIndexRecord indexRecord = spillRecord.getIndex(i); FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath); inStream.seek(indexRecord.getStartOffset()); IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null, null, false, 0, -1); while (reader.nextRawKey(keyBuffer)) { reader.nextRawValue(valBuffer); keyDeser.readFields(keyBuffer); valDeser.readFields(valBuffer); int partition = partitioner.getPartition(keyDeser, valDeser, numPartitions); assertTrue(expectedValues.get(partition).remove(keyDeser.toString(), valDeser.toString())); } inStream.close(); } for (int i = 0; i < numPartitions; i++) { assertEquals(0, expectedValues.get(i).size()); expectedValues.remove(i); } assertEquals(0, expectedValues.size()); }
From source file:org.apache.tez.runtime.library.common.writers.TestUnorderedPartitionedKVWriter.java
License:Apache License
private void baseTest(int numRecords, int numPartitions, Set<Integer> skippedPartitions, boolean shouldCompress) throws IOException, InterruptedException { PartitionerForTest partitioner = new PartitionerForTest(); ApplicationId appId = ApplicationId.newInstance(10000, 1); TezCounters counters = new TezCounters(); String uniqueId = UUID.randomUUID().toString(); OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId); Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class, shouldCompress, -1);/*www .j a v a 2 s . c o m*/ CompressionCodec codec = null; if (shouldCompress) { codec = new DefaultCodec(); ((Configurable) codec).setConf(conf); } int numOutputs = numPartitions; long availableMemory = 2048; int numRecordsWritten = 0; Map<Integer, Multimap<Integer, Long>> expectedValues = new HashMap<Integer, Multimap<Integer, Long>>(); for (int i = 0; i < numOutputs; i++) { expectedValues.put(i, LinkedListMultimap.<Integer, Long>create()); } UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, availableMemory); int sizePerBuffer = kvWriter.sizePerBuffer; int sizePerRecord = 4 + 8; // IntW + LongW int sizePerRecordWithOverhead = sizePerRecord + 12; // Record + META_OVERHEAD IntWritable intWritable = new IntWritable(); LongWritable longWritable = new LongWritable(); for (int i = 0; i < numRecords; i++) { intWritable.set(i); longWritable.set(i); int partition = partitioner.getPartition(intWritable, longWritable, numOutputs); if (skippedPartitions != null && skippedPartitions.contains(partition)) { continue; } expectedValues.get(partition).put(intWritable.get(), longWritable.get()); kvWriter.write(intWritable, longWritable); numRecordsWritten++; } List<Event> events = kvWriter.close(); int recordsPerBuffer = sizePerBuffer / sizePerRecordWithOverhead; int numExpectedSpills = numRecordsWritten / recordsPerBuffer; verify(outputContext, never()).fatalError(any(Throwable.class), any(String.class)); // Verify the status of the buffers if (numExpectedSpills == 0) { assertEquals(1, kvWriter.numInitializedBuffers); } else { assertTrue(kvWriter.numInitializedBuffers > 1); } assertNull(kvWriter.currentBuffer); assertEquals(0, kvWriter.availableBuffers.size()); // Verify the counters TezCounter outputRecordBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES); TezCounter outputRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_RECORDS); TezCounter outputBytesWithOverheadCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD); TezCounter fileOutputBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL); TezCounter spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS); TezCounter additionalSpillBytesWritternCounter = counters .findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN); TezCounter additionalSpillBytesReadCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ); TezCounter numAdditionalSpillsCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT); assertEquals(numRecordsWritten * sizePerRecord, outputRecordBytesCounter.getValue()); assertEquals(numRecordsWritten, outputRecordsCounter.getValue()); assertEquals(numRecordsWritten * sizePerRecordWithOverhead, outputBytesWithOverheadCounter.getValue()); long fileOutputBytes = fileOutputBytesCounter.getValue(); if (numRecordsWritten > 0) { assertTrue(fileOutputBytes > 0); if (!shouldCompress) { assertTrue(fileOutputBytes > outputRecordBytesCounter.getValue()); } } else { assertEquals(0, fileOutputBytes); } assertEquals(recordsPerBuffer * numExpectedSpills, spilledRecordsCounter.getValue()); long additionalSpillBytesWritten = additionalSpillBytesWritternCounter.getValue(); long additionalSpillBytesRead = additionalSpillBytesReadCounter.getValue(); if (numExpectedSpills == 0) { assertEquals(0, additionalSpillBytesWritten); assertEquals(0, additionalSpillBytesRead); } else { assertTrue(additionalSpillBytesWritten > 0); assertTrue(additionalSpillBytesRead > 0); if (!shouldCompress) { assertTrue(additionalSpillBytesWritten > (recordsPerBuffer * numExpectedSpills * sizePerRecord)); assertTrue(additionalSpillBytesRead > (recordsPerBuffer * numExpectedSpills * sizePerRecord)); } } assertTrue(additionalSpillBytesWritten == additionalSpillBytesRead); assertEquals(numExpectedSpills, numAdditionalSpillsCounter.getValue()); BitSet emptyPartitionBits = null; // Verify the event returned assertEquals(1, events.size()); assertTrue(events.get(0) instanceof CompositeDataMovementEvent); CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(0); assertEquals(0, cdme.getSourceIndexStart()); assertEquals(numOutputs, cdme.getCount()); DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto .parseFrom(ByteString.copyFrom(cdme.getUserPayload())); assertFalse(eventProto.hasData()); if (skippedPartitions == null && numRecordsWritten > 0) { assertFalse(eventProto.hasEmptyPartitions()); emptyPartitionBits = new BitSet(numPartitions); } else { assertTrue(eventProto.hasEmptyPartitions()); byte[] emptyPartitions = TezCommonUtils .decompressByteStringToByteArray(eventProto.getEmptyPartitions()); emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions); if (numRecordsWritten == 0) { assertEquals(numPartitions, emptyPartitionBits.cardinality()); } else { for (Integer e : skippedPartitions) { assertTrue(emptyPartitionBits.get(e)); } assertEquals(skippedPartitions.size(), emptyPartitionBits.cardinality()); } } if (emptyPartitionBits.cardinality() != numPartitions) { assertEquals(HOST_STRING, eventProto.getHost()); assertEquals(SHUFFLE_PORT, eventProto.getPort()); assertEquals(uniqueId, eventProto.getPathComponent()); } else { assertFalse(eventProto.hasHost()); assertFalse(eventProto.hasPort()); assertFalse(eventProto.hasPathComponent()); } // Verify the actual data TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId); Path outputFilePath = kvWriter.finalOutPath; Path spillFilePath = kvWriter.finalIndexPath; if (numRecordsWritten > 0) { assertTrue(localFs.exists(outputFilePath)); assertTrue(localFs.exists(spillFilePath)); } else { return; } // Special case for 0 records. TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf); DataInputBuffer keyBuffer = new DataInputBuffer(); DataInputBuffer valBuffer = new DataInputBuffer(); IntWritable keyDeser = new IntWritable(); LongWritable valDeser = new LongWritable(); for (int i = 0; i < numOutputs; i++) { if (skippedPartitions != null && skippedPartitions.contains(i)) { continue; } TezIndexRecord indexRecord = spillRecord.getIndex(i); FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath); inStream.seek(indexRecord.getStartOffset()); IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null, null, false, 0, -1); while (reader.nextRawKey(keyBuffer)) { reader.nextRawValue(valBuffer); keyDeser.readFields(keyBuffer); valDeser.readFields(valBuffer); int partition = partitioner.getPartition(keyDeser, valDeser, numOutputs); assertTrue(expectedValues.get(partition).remove(keyDeser.get(), valDeser.get())); } inStream.close(); } for (int i = 0; i < numOutputs; i++) { assertEquals(0, expectedValues.get(i).size()); expectedValues.remove(i); } assertEquals(0, expectedValues.size()); }
From source file:org.springframework.data.hadoop.fs.HdfsResourceLoaderLegacyTest.java
License:Apache License
@Test public void testDecompressedStream() throws Exception { DefaultCodec codec = new DefaultCodec(); codec.setConf(fs.getConf());// w w w. ja v a2 s . co m String name = "local/" + UUID.randomUUID() + codec.getDefaultExtension(); OutputStream outputStream = codec.createOutputStream(fs.create(new Path(name))); byte[] content = name.getBytes(); outputStream.write(content); outputStream.close(); Resource resource = loader.getResource(name); assertNotNull(resource); InputStream inputStream = resource.getInputStream(); assertEquals(DecompressorStream.class, inputStream.getClass()); assertTrue(TestUtils.compareStreams(new ByteArrayInputStream(content), inputStream)); }
From source file:org.springframework.data.hadoop.fs.HdfsResourceLoaderLegacyTest.java
License:Apache License
@Test public void testCompressedStream() throws Exception { DefaultCodec codec = new DefaultCodec(); codec.setConf(fs.getConf());/*from ww w. j av a2 s . c om*/ String name = "local/" + UUID.randomUUID() + codec.getDefaultExtension(); OutputStream outputStream = codec.createOutputStream(fs.create(new Path(name))); byte[] content = name.getBytes(); outputStream.write(content); outputStream.close(); loader.setUseCodecs(false); try { Resource resource = loader.getResource(name); assertNotNull(resource); InputStream inputStream = resource.getInputStream(); System.out.println(inputStream.getClass()); assertFalse(DecompressorStream.class.equals(inputStream.getClass())); assertFalse(TestUtils.compareStreams(new ByteArrayInputStream(content), inputStream)); } finally { loader.setUseCodecs(true); } }
From source file:tajo.storage.rcfile.TestRCFile.java
License:Apache License
@Test public void testSimpleReadAndWrite() throws IOException { fs.delete(file, true);/* ww w. j av a 2s . co m*/ Datum[] r1 = new Datum[7]; int idx = 0; r1[idx++] = DatumFactory.createInt(123); r1[idx++] = DatumFactory.createLong(456); r1[idx++] = DatumFactory.createFloat(7.89f); r1[idx++] = DatumFactory.createDouble(10.11d); r1[idx++] = DatumFactory.createString("tajo and hadoop"); // byte[][] record_1 = { // "123".getBytes("UTF-8"), "456".getBytes("UTF-8"), // "789".getBytes("UTF-8"), "1000".getBytes("UTF-8"), // "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"), // new byte[0], "NULL".getBytes("UTF-8")}; System.out.println("Original size: " + r1[4].asByteArray().length); byte[][] record_1 = { r1[0].asByteArray(), r1[1].asByteArray(), r1[2].asByteArray(), r1[3].asByteArray(), r1[4].asByteArray(), new byte[0], "NULL".getBytes("UTF-8") }; Datum[] r2 = new Datum[7]; idx = 0; r2[idx++] = DatumFactory.createInt(100); r2[idx++] = DatumFactory.createLong(200); r2[idx++] = DatumFactory.createFloat(5.3f); r2[idx++] = DatumFactory.createDouble(11.12d); r2[idx++] = DatumFactory.createString("the second str"); byte[][] record_2 = { r2[0].asByteArray(), r2[1].asByteArray(), r2[2].asByteArray(), r2[3].asByteArray(), r2[4].asByteArray(), new byte[0], "NULL".getBytes("UTF-8") }; // byte[][] record_2 = {"100".getBytes("UTF-8"), "200".getBytes("UTF-8"), // "123".getBytes("UTF-8"), "1000".getBytes("UTF-8"), // "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"), // new byte[0], "NULL".getBytes("UTF-8")}; conf.setInt(RCFile.COLUMN_NUMBER_CONF_STR, 7); RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, RCFile.createMetadata(new Text("apple"), new Text("block"), new Text("cat"), new Text("dog")), new DefaultCodec()); BytesRefArrayWritable bytes = new BytesRefArrayWritable(record_1.length); for (int i = 0; i < record_1.length; i++) { BytesRefWritable cu = new BytesRefWritable(record_1[i], 0, record_1[i].length); bytes.set(i, cu); } writer.append(bytes); bytes.clear(); for (int i = 0; i < record_2.length; i++) { BytesRefWritable cu = new BytesRefWritable(record_2[i], 0, record_2[i].length); bytes.set(i, cu); } writer.append(bytes); writer.close(); // Object[] expectedRecord_1 = {new ByteWritable((byte) 123), // new ShortWritable((short) 456), new IntWritable(789), // new LongWritable(1000), new DoubleWritable(5.3), // new Text("hive and hadoop"), null, null}; // // Object[] expectedRecord_2 = {new ByteWritable((byte) 100), // new ShortWritable((short) 200), new IntWritable(123), // new LongWritable(1000), new DoubleWritable(5.3), // new Text("hive and hadoop"), null, null}; ColumnProjectionUtils.setFullyReadColumns(conf); RCFile.Reader reader = new RCFile.Reader(fs, file, conf); assertEquals(new Text("block"), reader.getMetadata().get(new Text("apple"))); assertEquals(new Text("block"), reader.getMetadataValueOf(new Text("apple"))); assertEquals(new Text("dog"), reader.getMetadataValueOf(new Text("cat"))); LongWritable rowID = new LongWritable(); reader.next(rowID); BytesRefArrayWritable cols = new BytesRefArrayWritable(); reader.getCurrentRow(cols); cols.resetValid(7); assertEquals(r1[0], new IntDatum(cols.get(0).getBytesCopy())); assertEquals(r1[1], new LongDatum(cols.get(1).getBytesCopy())); assertEquals(r1[2], new FloatDatum(cols.get(2).getBytesCopy())); assertEquals(r1[3], new DoubleDatum(cols.get(3).getBytesCopy())); assertEquals(r1[4], new StringDatum(cols.get(4).getBytesCopy())); reader.next(rowID); cols = new BytesRefArrayWritable(); reader.getCurrentRow(cols); cols.resetValid(7); assertEquals(r2[0], new IntDatum(cols.get(0).getBytesCopy())); assertEquals(r2[1], new LongDatum(cols.get(1).getBytesCopy())); assertEquals(r2[2], new FloatDatum(cols.get(2).getBytesCopy())); assertEquals(r2[3], new DoubleDatum(cols.get(3).getBytesCopy())); assertEquals(r2[4], new StringDatum(cols.get(4).getBytesCopy())); /* cols.resetValid(8); Object row = serDe.deserialize(cols); StructObjectInspector oi = (StructObjectInspector) serDe .getObjectInspector(); List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs(); assertEquals("Field size should be 8", 8, fieldRefs.size()); for (int j = 0; j < fieldRefs.size(); j++) { Object fieldData = oi.getStructFieldData(row, fieldRefs.get(j)); Object standardWritableData = ObjectInspectorUtils .copyToStandardObject(fieldData, fieldRefs.get(j) .getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE); if (i == 0) { assertEquals("Field " + i, standardWritableData, expectedRecord_1[j]); } else { assertEquals("Field " + i, standardWritableData, expectedRecord_2[j]); } } }*/ reader.close(); }