List of usage examples for org.apache.hadoop.io.compress DefaultCodec DefaultCodec
DefaultCodec
From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriterManager.java
License:Apache License
@Test public void testGetWriter() throws Exception { URI uri = new URI("file:///"); Configuration conf = new HdfsConfiguration(); final String prefix = "prefix"; String template = getTestDir().toString() + "/${YYYY()}/${MM()}/${DD()}/${hh()}/${mm()}/${ss()}/${record:value('/')}"; TimeZone timeZone = TimeZone.getTimeZone("UTC"); long cutOffSecs = 10; long cutOffSize = 5; long cutOffRecords = 2; HdfsFileType fileType = HdfsFileType.TEXT; DefaultCodec compressionCodec = new DefaultCodec(); compressionCodec.setConf(conf);/*from w w w .j a v a 2 s .c o m*/ SequenceFile.CompressionType compressionType = null; String keyEL = null; DataGeneratorFactory generatorFactory = new DummyDataGeneratorFactory(null); RecordWriterManager mgr = new RecordWriterManager(uri, conf, prefix, template, timeZone, cutOffSecs, cutOffSize, cutOffRecords, fileType, compressionCodec, compressionType, keyEL, generatorFactory, targetContext, "dirPathTemplate"); Assert.assertTrue(mgr.validateDirTemplate("g", "dirPathTemplate", new ArrayList<Stage.ConfigIssue>())); FileSystem fs = FileSystem.get(uri, conf); Date now = getFixedDate(); // record older than cut off Date recordDate = new Date(now.getTime() - 10 * 1000 - 1); Record record = RecordCreator.create(); record.set(Field.create("a")); Assert.assertNull(mgr.getWriter(now, recordDate, record)); // record qualifies, first file recordDate = new Date(now.getTime() - 10 * 1000 + 1); RecordWriter writer = mgr.getWriter(now, recordDate, record); Assert.assertNotNull(writer); Path tempPath = writer.getPath(); Assert.assertEquals(mgr.getPath(recordDate, record), tempPath); Path finalPath = mgr.commitWriter(writer); //committing a closed writer is a NOP Assert.assertNull(mgr.commitWriter(writer)); Assert.assertEquals(1, getFinalFileNameCount(fs, tempPath.getParent(), prefix)); // record qualifies, second file writer = mgr.getWriter(now, recordDate, record); finalPath = mgr.commitWriter(writer); Assert.assertEquals(2, getFinalFileNameCount(fs, tempPath.getParent(), prefix)); // record qualifies, leaving temp file writer = mgr.getWriter(now, recordDate, record); writer.close(); // record qualifies, it should roll temp file and create 4th file writer = mgr.getWriter(now, recordDate, record); finalPath = mgr.commitWriter(writer); Assert.assertFalse(fs.exists(tempPath)); Assert.assertEquals(4, getFinalFileNameCount(fs, tempPath.getParent(), prefix)); // verifying thresholds because of record count writer = mgr.getWriter(now, recordDate, record); Assert.assertFalse(mgr.isOverThresholds(writer)); writer.write(record); writer.flush(); Assert.assertFalse(mgr.isOverThresholds(writer)); writer.write(record); writer.flush(); Assert.assertTrue(mgr.isOverThresholds(writer)); writer.write(record); mgr.commitWriter(writer); // verifying thresholds because of file size writer = mgr.getWriter(now, recordDate, record); Assert.assertFalse(mgr.isOverThresholds(writer)); record.set(Field.create("0123456789012345678901234567890123456789012345678901234567890123456789")); writer.write(record); writer.flush(); Assert.assertTrue(mgr.isOverThresholds(writer)); mgr.commitWriter(writer); }
From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriterManager.java
License:Apache License
@Test public void testThresholdRecords() throws Exception { URI uri = new URI("file:///"); Configuration conf = new HdfsConfiguration(); String prefix = "prefix"; String template = getTestDir().toString() + "/${YYYY()}/${MM()}/${DD()}/${hh()}/${mm()}/${ss()}/${record:value('/')}"; TimeZone timeZone = TimeZone.getTimeZone("UTC"); long cutOffSecs = 10; long cutOffSize = 50000; long cutOffRecords = 2; HdfsFileType fileType = HdfsFileType.TEXT; DefaultCodec compressionCodec = new DefaultCodec(); compressionCodec.setConf(conf);/*from www . j av a 2 s . c o m*/ SequenceFile.CompressionType compressionType = null; String keyEL = null; DataGeneratorFactory generatorFactory = new DummyDataGeneratorFactory(null); RecordWriterManager mgr = new RecordWriterManager(uri, conf, prefix, template, timeZone, cutOffSecs, cutOffSize, cutOffRecords, fileType, compressionCodec, compressionType, keyEL, generatorFactory, targetContext, "dirPathTemplate"); Assert.assertTrue(mgr.validateDirTemplate("g", "dirPathTemplate", new ArrayList<Stage.ConfigIssue>())); Date now = getFixedDate(); Date recordDate = now; Record record = RecordCreator.create(); record.set(Field.create("a")); RecordWriter writer = mgr.getWriter(now, recordDate, record); Assert.assertNotNull(writer); for (int i = 0; i < 2; i++) { Assert.assertFalse(mgr.isOverThresholds(writer)); writer.write(record); writer.flush(); } Assert.assertTrue(mgr.isOverThresholds(writer)); mgr.commitWriter(writer); }
From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriterManager.java
License:Apache License
@Test public void testThresholdSize() throws Exception { URI uri = new URI("file:///"); Configuration conf = new HdfsConfiguration(); String prefix = "prefix"; String template = getTestDir().toString() + "/${YYYY()}/${MM()}/${DD()}/${hh()}/${mm()}/${ss()}/${record:value('/')}"; TimeZone timeZone = TimeZone.getTimeZone("UTC"); long cutOffSecs = 10; long cutOffSize = 4; long cutOffRecords = 20; HdfsFileType fileType = HdfsFileType.TEXT; DefaultCodec compressionCodec = new DefaultCodec(); compressionCodec.setConf(conf);//ww w . java2s. com SequenceFile.CompressionType compressionType = null; String keyEL = null; DataGeneratorFactory generatorFactory = new DummyDataGeneratorFactory(null); RecordWriterManager mgr = new RecordWriterManager(uri, conf, prefix, template, timeZone, cutOffSecs, cutOffSize, cutOffRecords, fileType, compressionCodec, compressionType, keyEL, generatorFactory, targetContext, "dirPathTemplate"); Assert.assertTrue(mgr.validateDirTemplate("g", "dirPathTemplate", new ArrayList<Stage.ConfigIssue>())); Date now = getFixedDate(); Date recordDate = now; Record record = RecordCreator.create(); record.set(Field.create("a")); RecordWriter writer = mgr.getWriter(now, recordDate, record); Assert.assertNotNull(writer); for (int i = 0; i < 2; i++) { Assert.assertFalse(mgr.isOverThresholds(writer)); writer.write(record); writer.flush(); } Assert.assertTrue(mgr.isOverThresholds(writer)); mgr.commitWriter(writer); }
From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriterManager.java
License:Apache License
@Test public void testNoThreshold() throws Exception { URI uri = new URI("file:///"); Configuration conf = new HdfsConfiguration(); String prefix = "prefix"; String template = getTestDir().toString() + "/${YYYY()}/${MM()}/${DD()}/${hh()}/${mm()}/${ss()}/${record:value('/')}"; TimeZone timeZone = TimeZone.getTimeZone("UTC"); long cutOffSecs = 10; long cutOffSize = 0; long cutOffRecords = 0; HdfsFileType fileType = HdfsFileType.TEXT; DefaultCodec compressionCodec = new DefaultCodec(); compressionCodec.setConf(conf);//from w w w.jav a2 s.co m SequenceFile.CompressionType compressionType = null; String keyEL = null; DataGeneratorFactory generatorFactory = new DummyDataGeneratorFactory(null); RecordWriterManager mgr = new RecordWriterManager(uri, conf, prefix, template, timeZone, cutOffSecs, cutOffSize, cutOffRecords, fileType, compressionCodec, compressionType, keyEL, generatorFactory, targetContext, "dirPathTemplate"); Assert.assertTrue(mgr.validateDirTemplate("g", "dirPathTemplate", new ArrayList<Stage.ConfigIssue>())); Date now = getFixedDate(); Date recordDate = now; Record record = RecordCreator.create(); record.set(Field.create("a")); RecordWriter writer = mgr.getWriter(now, recordDate, record); Assert.assertNotNull(writer); for (int i = 0; i < 10; i++) { Assert.assertFalse(mgr.isOverThresholds(writer)); writer.write(record); writer.flush(); } Assert.assertFalse(mgr.isOverThresholds(writer)); mgr.commitWriter(writer); }
From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriterManager.java
License:Apache License
private RecordWriterManager getRecordWriterManager(String dirTemplate, long cutOffSecs) throws Exception { URI uri = new URI("file:///"); Configuration conf = new HdfsConfiguration(); String prefix = "prefix"; TimeZone timeZone = TimeZone.getTimeZone("UTC"); long cutOffSize = 20; long cutOffRecords = 2; HdfsFileType fileType = HdfsFileType.TEXT; SequenceFile.CompressionType compressionType = null; String keyEL = null;/*ww w . ja va2s .c om*/ DefaultCodec compressionCodec = new DefaultCodec(); DataGeneratorFactory generatorFactory = new DummyDataGeneratorFactory(null); RecordWriterManager mgr = new RecordWriterManager(uri, conf, prefix, dirTemplate, timeZone, cutOffSecs, cutOffSize, cutOffRecords, fileType, compressionCodec, compressionType, keyEL, generatorFactory, targetContext, "dirPathTemplate"); Assert.assertTrue(mgr.validateDirTemplate("g", "dirPathTemplate", new ArrayList<Stage.ConfigIssue>())); return mgr; }
From source file:crush.CrushUtil.java
License:Apache License
public void crush() throws CrushException { if (jobConf == null) { jobConf = new JobConf(CrushUtil.class); }//w w w . j av a 2 s. co m if (codec == null) { codec = new DefaultCodec(); l4j.warn("codec not specified using DefaultCodec"); } if (compressionType == null) { this.compressionType = SequenceFile.CompressionType.BLOCK; l4j.warn("compresstionType not specified using BLOCK"); } try { if (fs == null) { fs = FileSystem.get(jobConf); } if (!fs.exists(sourcePath)) { throw new CrushException(sourcePath + " does not exist"); } if (fs.isFile(sourcePath)) { throw new CrushException(sourcePath + " must be a directory"); } FileStatus[] status = fs.listStatus(sourcePath); if (status.length == 0 || status.length == 1) { return; } if (this.type == CrushUtil.FileType.SEQUENCEFILE) { sequenceCrush(fs, status); } if (this.type == CrushUtil.FileType.TEXT) { textCrush(fs, status); } } catch (IOException ex) { throw new CrushException("Crushed failed" + ex); } }
From source file:dima.kmeansseq.SequenceFile.java
License:Apache License
/** * Construct the preferred type of SequenceFile Writer. * /*from w ww .j a va2 s . co m*/ * @param fs * The configured filesystem. * @param conf * The configuration. * @param name * The name of the file. * @param keyClass * The 'key' type. * @param valClass * The 'value' type. * @param compressionType * The compression type. * @return Returns the handle to the constructed SequenceFile Writer. * @throws IOException */ public static Writer createWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass, CompressionType compressionType) throws IOException { return createWriter(fs, conf, name, keyClass, valClass, fs.getConf().getInt("io.file.buffer.size", 4096), fs.getDefaultReplication(), fs.getDefaultBlockSize(), compressionType, new DefaultCodec(), null, new Metadata()); }
From source file:dima.kmeansseq.SequenceFile.java
License:Apache License
/** * Construct the preferred type of SequenceFile Writer. * /*w ww .java 2 s . c o m*/ * @param fs * The configured filesystem. * @param conf * The configuration. * @param name * The name of the file. * @param keyClass * The 'key' type. * @param valClass * The 'value' type. * @param compressionType * The compression type. * @param progress * The Progressable object to track progress. * @return Returns the handle to the constructed SequenceFile Writer. * @throws IOException */ public static Writer createWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass, CompressionType compressionType, Progressable progress) throws IOException { return createWriter(fs, conf, name, keyClass, valClass, fs.getConf().getInt("io.file.buffer.size", 4096), fs.getDefaultReplication(), fs.getDefaultBlockSize(), compressionType, new DefaultCodec(), progress, new Metadata()); }
From source file:edu.bigdata.training.fileformats.compress.SequenceFileWriter.java
public static void main(String[] args) throws IOException { String uri = "output"; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new Path(uri); IntWritable key = new IntWritable(); Text value = new Text(); File infile = new File("src/main/resources/input.txt"); SequenceFile.Writer writer = null; try {// w w w .j a v a 2s. c om writer = SequenceFile.createWriter(conf, Writer.file(path), Writer.keyClass(key.getClass()), Writer.valueClass(value.getClass()), Writer.bufferSize(fs.getConf().getInt("io.file.buffer.size", 4096)), Writer.replication(fs.getDefaultReplication()), Writer.blockSize(1073741824), Writer.compression(SequenceFile.CompressionType.BLOCK, new DefaultCodec()), Writer.progressable(null), Writer.metadata(new Metadata())); int ctr = 100; List<String> lines = FileUtils.readLines(infile); for (String line : lines) { key.set(ctr++); value.set(line); if (ctr < 150) { System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value); } writer.append(key, value); } } finally { IOUtils.closeStream(writer); } }
From source file:gobblin.metastore.FsStateStore.java
License:Apache License
/** * See {@link StateStore#put(String, String, T)}. * * <p>/* w ww . ja v a 2 s .c o m*/ * This implementation does not support putting the state object into an existing store as * append is to be supported by the Hadoop SequenceFile (HADOOP-7139). * </p> */ @Override public void put(String storeName, String tableName, T state) throws IOException { String tmpTableName = this.useTmpFileForPut ? TMP_FILE_PREFIX + tableName : tableName; Path tmpTablePath = new Path(new Path(this.storeRootDir, storeName), tmpTableName); if (!this.fs.exists(tmpTablePath) && !create(storeName, tmpTableName)) { throw new IOException("Failed to create a state file for table " + tmpTableName); } Closer closer = Closer.create(); try { @SuppressWarnings("deprecation") SequenceFile.Writer writer = closer.register(SequenceFile.createWriter(this.fs, this.conf, tmpTablePath, Text.class, this.stateClass, SequenceFile.CompressionType.BLOCK, new DefaultCodec())); writer.append(new Text(Strings.nullToEmpty(state.getId())), state); } catch (Throwable t) { throw closer.rethrow(t); } finally { closer.close(); } if (this.useTmpFileForPut) { Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName); HadoopUtils.renamePath(this.fs, tmpTablePath, tablePath); } }