List of usage examples for org.apache.hadoop.io.compress DefaultCodec setConf
@Override public void setConf(Configuration conf)
From source file:TestCodec.java
License:Open Source License
public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); DefaultCodec codec = new DefaultCodec(); codec.setConf(conf); DataOutputBuffer chunksWriteBuffer = new DataOutputBuffer(); CompressionOutputStream compressionOutputStream = codec.createOutputStream(chunksWriteBuffer); DataInputBuffer chunkReadBuffer = new DataInputBuffer(); CompressionInputStream compressionInputStream = codec.createInputStream(chunkReadBuffer); String str = "laksjldfkjalskdjfl;aksjdflkajsldkfjalksjdflkajlsdkfjlaksjdflka"; compressionOutputStream.write(str.getBytes()); compressionOutputStream.finish();//from ww w .j av a 2s. c o m byte[] data = chunksWriteBuffer.getData(); System.out.println(str.length()); System.out.println(chunksWriteBuffer.getLength()); chunkReadBuffer.reset(data, chunksWriteBuffer.getLength()); DataOutputBuffer dob = new DataOutputBuffer(); IOUtils.copyBytes(compressionInputStream, dob, conf); System.out.println(dob.getData()); }
From source file:com.asakusafw.runtime.io.sequencefile.SequenceFileUtilTest.java
License:Apache License
/** * Creates a compressed sequence file./* w w w .jav a 2 s . c om*/ * @throws Exception if failed */ @Test public void write_compressed() throws Exception { DefaultCodec codec = new DefaultCodec(); codec.setConf(conf); Path path = new Path("testing"); LongWritable key = new LongWritable(); LongWritable value = new LongWritable(); try (OutputStream out = new FileOutputStream(fs.pathToFile(path)); SequenceFile.Writer writer = SequenceFileUtil.openWriter(new BufferedOutputStream(out), conf, key.getClass(), value.getClass(), codec);) { for (long i = 0; i < 300000; i++) { key.set(i); value.set(i + 1); writer.append(key, value); } } try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf)) { for (long i = 0; i < 300000; i++) { assertThat(reader.next(key, value), is(true)); assertThat(key.get(), is(i)); assertThat(value.get(), is(i + 1)); } assertThat(reader.next(key, value), is(false)); } }
From source file:com.cloudera.flume.handlers.hdfs.TestEscapedCustomOutputDfs.java
License:Apache License
/** * Test to write few log lines, compress using default, write to disk, read * back the compressed file and verify the written lines. * * @throws InterruptedException//from w ww .j av a 2 s. c om */ @Test public void testDefaultCodec() throws IOException, InterruptedException { DefaultCodec codec = new DefaultCodec(); codec.setConf(FlumeConfiguration.get()); // default needs conf checkOutputFormat("syslog", new SyslogEntryFormat(), "DefaultCodec", codec); }
From source file:com.hdfs.concat.crush.CrushReducerParameterizedTest.java
License:Apache License
/** * Every file in this unit test is named "file" followed by a number. This method will create a sequence file with as many lines * as the number in the file name. The keys in the file will count from one to the number. The values in the file will count * from 100n + 1 to 100n + n. This way each file will have distinct contents so long as no two files have the same name. *///from ww w . ja va 2 s . c o m private Text writeFile(File srcDir, String fileName, Format format) throws IOException { int fileNum = Integer.parseInt(fileName.substring(4)); File file = new File(srcDir, fileName); if (Format.TEXT == format) { PrintWriter writer = new PrintWriter(new FileOutputStream(file)); for (int k = 1, v = 100 * fileNum + 1; k <= fileNum; k++, v++) { writer.printf("%d\t%d\n", k, v); } writer.close(); } else { CustomWritable key = new CustomWritable(); CustomWritable value = new CustomWritable(); DefaultCodec codec = new DefaultCodec(); codec.setConf(job); Writer writer = SequenceFile.createWriter(fs, job, new Path(file.getAbsolutePath()), CustomWritable.class, CustomWritable.class, compressionType, codec); for (int k = 1, v = 100 * fileNum + 1; k <= fileNum; k++, v++) { key.set(k); value.set(v); writer.append(key, value); } writer.close(); } return new Text(file.getAbsolutePath()); }
From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestActiveRecordWriters.java
License:Apache License
@Test public void testWritersLifecycle() throws Exception { URI uri = new URI("file:///"); Configuration conf = new HdfsConfiguration(); String prefix = "prefix"; String template = getTestDir().toString() + "/${YYYY()}/${MM()}/${DD()}/${hh()}/${mm()}/${ss()}/${record:value('/')}"; TimeZone timeZone = TimeZone.getTimeZone("UTC"); long cutOffSecs = 2; long cutOffSize = 10000; long cutOffRecords = 2; HdfsFileType fileType = HdfsFileType.SEQUENCE_FILE; DefaultCodec compressionCodec = new DefaultCodec(); compressionCodec.setConf(conf); SequenceFile.CompressionType compressionType = SequenceFile.CompressionType.BLOCK; String keyEL = "uuid()"; DataGeneratorFactory generatorFactory = new DummyDataGeneratorFactory(null); RecordWriterManager mgr = new RecordWriterManager(uri, conf, prefix, template, timeZone, cutOffSecs, cutOffSize, cutOffRecords, fileType, compressionCodec, compressionType, keyEL, generatorFactory, ContextInfoCreator.createTargetContext(HdfsDTarget.class, "testWritersLifecycle", false, OnRecordError.TO_ERROR, null), "dirPathTemplate"); Assert.assertTrue(mgr.validateDirTemplate("g", "dirPathTemplate", new ArrayList<Stage.ConfigIssue>())); ActiveRecordWriters writers = new ActiveRecordWriters(mgr); Date now = new Date(); // record older than cut off Date recordDate = new Date(now.getTime() - 3 * 1000 - 1); Record record = RecordCreator.create(); record.set(Field.create("a")); Assert.assertNull(writers.get(now, recordDate, record)); recordDate = new Date(now.getTime()); RecordWriter writer = writers.get(now, recordDate, record); Assert.assertNotNull(writer);// w w w . java 2 s. co m Path tempPath = writer.getPath(); writer.write(record); writers.release(writer); //writer should still be open Assert.assertFalse(writer.isClosed()); writer = writers.get(now, recordDate, record); writer.write(record); writers.release(writer); //writer should be close because of going over record count threshold Assert.assertTrue(writer.isClosed()); //we should be able to get a new writer as the cutoff didn't kick in yet writer = writers.get(now, recordDate, record); Assert.assertNotNull(writer); writers.purge(); //purging should not close the writer as the cutoff didn't kick in yet Assert.assertFalse(writer.isClosed()); Thread.sleep(3001); writers.purge(); //purging should close the writer as the cutoff kicked in yet Assert.assertTrue(writer.isClosed()); //verifying closeAll() closes writers writer = writers.get(new Date(), new Date(), record); Assert.assertNotNull(writer); writers.closeAll(); Assert.assertTrue(writer.isClosed()); }
From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriterManager.java
License:Apache License
@Test public void testTextFileCompression() throws Exception { DefaultCodec codec = new DefaultCodec(); codec.setConf(new Configuration()); testTextFile(codec);/*w w w. j av a2 s .c o m*/ }
From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriterManager.java
License:Apache License
@Test public void testSeqFileCompression() throws Exception { DefaultCodec codec = new DefaultCodec(); codec.setConf(new Configuration()); testSeqFile(codec, SequenceFile.CompressionType.RECORD); testSeqFile(codec, SequenceFile.CompressionType.BLOCK); }
From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriterManager.java
License:Apache License
@Test public void testGetWriter() throws Exception { URI uri = new URI("file:///"); Configuration conf = new HdfsConfiguration(); final String prefix = "prefix"; String template = getTestDir().toString() + "/${YYYY()}/${MM()}/${DD()}/${hh()}/${mm()}/${ss()}/${record:value('/')}"; TimeZone timeZone = TimeZone.getTimeZone("UTC"); long cutOffSecs = 10; long cutOffSize = 5; long cutOffRecords = 2; HdfsFileType fileType = HdfsFileType.TEXT; DefaultCodec compressionCodec = new DefaultCodec(); compressionCodec.setConf(conf); SequenceFile.CompressionType compressionType = null; String keyEL = null;/* w w w .j av a 2 s . c om*/ DataGeneratorFactory generatorFactory = new DummyDataGeneratorFactory(null); RecordWriterManager mgr = new RecordWriterManager(uri, conf, prefix, template, timeZone, cutOffSecs, cutOffSize, cutOffRecords, fileType, compressionCodec, compressionType, keyEL, generatorFactory, targetContext, "dirPathTemplate"); Assert.assertTrue(mgr.validateDirTemplate("g", "dirPathTemplate", new ArrayList<Stage.ConfigIssue>())); FileSystem fs = FileSystem.get(uri, conf); Date now = getFixedDate(); // record older than cut off Date recordDate = new Date(now.getTime() - 10 * 1000 - 1); Record record = RecordCreator.create(); record.set(Field.create("a")); Assert.assertNull(mgr.getWriter(now, recordDate, record)); // record qualifies, first file recordDate = new Date(now.getTime() - 10 * 1000 + 1); RecordWriter writer = mgr.getWriter(now, recordDate, record); Assert.assertNotNull(writer); Path tempPath = writer.getPath(); Assert.assertEquals(mgr.getPath(recordDate, record), tempPath); Path finalPath = mgr.commitWriter(writer); //committing a closed writer is a NOP Assert.assertNull(mgr.commitWriter(writer)); Assert.assertEquals(1, getFinalFileNameCount(fs, tempPath.getParent(), prefix)); // record qualifies, second file writer = mgr.getWriter(now, recordDate, record); finalPath = mgr.commitWriter(writer); Assert.assertEquals(2, getFinalFileNameCount(fs, tempPath.getParent(), prefix)); // record qualifies, leaving temp file writer = mgr.getWriter(now, recordDate, record); writer.close(); // record qualifies, it should roll temp file and create 4th file writer = mgr.getWriter(now, recordDate, record); finalPath = mgr.commitWriter(writer); Assert.assertFalse(fs.exists(tempPath)); Assert.assertEquals(4, getFinalFileNameCount(fs, tempPath.getParent(), prefix)); // verifying thresholds because of record count writer = mgr.getWriter(now, recordDate, record); Assert.assertFalse(mgr.isOverThresholds(writer)); writer.write(record); writer.flush(); Assert.assertFalse(mgr.isOverThresholds(writer)); writer.write(record); writer.flush(); Assert.assertTrue(mgr.isOverThresholds(writer)); writer.write(record); mgr.commitWriter(writer); // verifying thresholds because of file size writer = mgr.getWriter(now, recordDate, record); Assert.assertFalse(mgr.isOverThresholds(writer)); record.set(Field.create("0123456789012345678901234567890123456789012345678901234567890123456789")); writer.write(record); writer.flush(); Assert.assertTrue(mgr.isOverThresholds(writer)); mgr.commitWriter(writer); }
From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriterManager.java
License:Apache License
@Test public void testThresholdRecords() throws Exception { URI uri = new URI("file:///"); Configuration conf = new HdfsConfiguration(); String prefix = "prefix"; String template = getTestDir().toString() + "/${YYYY()}/${MM()}/${DD()}/${hh()}/${mm()}/${ss()}/${record:value('/')}"; TimeZone timeZone = TimeZone.getTimeZone("UTC"); long cutOffSecs = 10; long cutOffSize = 50000; long cutOffRecords = 2; HdfsFileType fileType = HdfsFileType.TEXT; DefaultCodec compressionCodec = new DefaultCodec(); compressionCodec.setConf(conf); SequenceFile.CompressionType compressionType = null; String keyEL = null;/*from w ww . ja va2 s.com*/ DataGeneratorFactory generatorFactory = new DummyDataGeneratorFactory(null); RecordWriterManager mgr = new RecordWriterManager(uri, conf, prefix, template, timeZone, cutOffSecs, cutOffSize, cutOffRecords, fileType, compressionCodec, compressionType, keyEL, generatorFactory, targetContext, "dirPathTemplate"); Assert.assertTrue(mgr.validateDirTemplate("g", "dirPathTemplate", new ArrayList<Stage.ConfigIssue>())); Date now = getFixedDate(); Date recordDate = now; Record record = RecordCreator.create(); record.set(Field.create("a")); RecordWriter writer = mgr.getWriter(now, recordDate, record); Assert.assertNotNull(writer); for (int i = 0; i < 2; i++) { Assert.assertFalse(mgr.isOverThresholds(writer)); writer.write(record); writer.flush(); } Assert.assertTrue(mgr.isOverThresholds(writer)); mgr.commitWriter(writer); }
From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriterManager.java
License:Apache License
@Test public void testThresholdSize() throws Exception { URI uri = new URI("file:///"); Configuration conf = new HdfsConfiguration(); String prefix = "prefix"; String template = getTestDir().toString() + "/${YYYY()}/${MM()}/${DD()}/${hh()}/${mm()}/${ss()}/${record:value('/')}"; TimeZone timeZone = TimeZone.getTimeZone("UTC"); long cutOffSecs = 10; long cutOffSize = 4; long cutOffRecords = 20; HdfsFileType fileType = HdfsFileType.TEXT; DefaultCodec compressionCodec = new DefaultCodec(); compressionCodec.setConf(conf); SequenceFile.CompressionType compressionType = null; String keyEL = null;//from ww w.jav a2 s . com DataGeneratorFactory generatorFactory = new DummyDataGeneratorFactory(null); RecordWriterManager mgr = new RecordWriterManager(uri, conf, prefix, template, timeZone, cutOffSecs, cutOffSize, cutOffRecords, fileType, compressionCodec, compressionType, keyEL, generatorFactory, targetContext, "dirPathTemplate"); Assert.assertTrue(mgr.validateDirTemplate("g", "dirPathTemplate", new ArrayList<Stage.ConfigIssue>())); Date now = getFixedDate(); Date recordDate = now; Record record = RecordCreator.create(); record.set(Field.create("a")); RecordWriter writer = mgr.getWriter(now, recordDate, record); Assert.assertNotNull(writer); for (int i = 0; i < 2; i++) { Assert.assertFalse(mgr.isOverThresholds(writer)); writer.write(record); writer.flush(); } Assert.assertTrue(mgr.isOverThresholds(writer)); mgr.commitWriter(writer); }