List of usage examples for org.apache.hadoop.io.compress DefaultCodec DefaultCodec
DefaultCodec
From source file:com.facebook.presto.hive.HivePageSink.java
License:Apache License
private String generateRandomFileName(String outputFormat) { // text format files must have the correct extension when compressed String extension = ""; if (HiveConf.getBoolVar(conf, COMPRESSRESULT) && HiveIgnoreKeyTextOutputFormat.class.getName().equals(outputFormat)) { extension = new DefaultCodec().getDefaultExtension(); String compressionCodecClass = conf.get("mapred.output.compression.codec"); if (compressionCodecClass != null) { try { Class<? extends CompressionCodec> codecClass = conf.getClassByName(compressionCodecClass) .asSubclass(CompressionCodec.class); extension = ReflectionUtil.newInstance(codecClass, conf).getDefaultExtension(); } catch (ClassNotFoundException e) { throw new PrestoException(HIVE_UNSUPPORTED_FORMAT, "Compression codec not found: " + compressionCodecClass, e); } catch (RuntimeException e) { throw new PrestoException(HIVE_UNSUPPORTED_FORMAT, "Failed to load compression codec: " + compressionCodecClass, e); }//from w w w . j a v a 2 s . c o m } } return filePrefix + "_" + randomUUID() + extension; }
From source file:com.facebook.presto.hive.HiveWriterFactory.java
License:Apache License
public static String getFileExtension(JobConf conf, StorageFormat storageFormat) { // text format files must have the correct extension when compressed if (!HiveConf.getBoolVar(conf, COMPRESSRESULT) || !HiveIgnoreKeyTextOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) { return ""; }/*from w w w .ja v a 2 s . co m*/ String compressionCodecClass = conf.get("mapred.output.compression.codec"); if (compressionCodecClass == null) { return new DefaultCodec().getDefaultExtension(); } try { Class<? extends CompressionCodec> codecClass = conf.getClassByName(compressionCodecClass) .asSubclass(CompressionCodec.class); return ReflectionUtil.newInstance(codecClass, conf).getDefaultExtension(); } catch (ClassNotFoundException e) { throw new PrestoException(HIVE_UNSUPPORTED_FORMAT, "Compression codec not found: " + compressionCodecClass, e); } catch (RuntimeException e) { throw new PrestoException(HIVE_UNSUPPORTED_FORMAT, "Failed to load compression codec: " + compressionCodecClass, e); } }
From source file:com.hdfs.concat.crush.CrushReducerParameterizedTest.java
License:Apache License
/** * Every file in this unit test is named "file" followed by a number. This method will create a sequence file with as many lines * as the number in the file name. The keys in the file will count from one to the number. The values in the file will count * from 100n + 1 to 100n + n. This way each file will have distinct contents so long as no two files have the same name. *//* w w w .ja v a 2 s. c om*/ private Text writeFile(File srcDir, String fileName, Format format) throws IOException { int fileNum = Integer.parseInt(fileName.substring(4)); File file = new File(srcDir, fileName); if (Format.TEXT == format) { PrintWriter writer = new PrintWriter(new FileOutputStream(file)); for (int k = 1, v = 100 * fileNum + 1; k <= fileNum; k++, v++) { writer.printf("%d\t%d\n", k, v); } writer.close(); } else { CustomWritable key = new CustomWritable(); CustomWritable value = new CustomWritable(); DefaultCodec codec = new DefaultCodec(); codec.setConf(job); Writer writer = SequenceFile.createWriter(fs, job, new Path(file.getAbsolutePath()), CustomWritable.class, CustomWritable.class, compressionType, codec); for (int k = 1, v = 100 * fileNum + 1; k <= fileNum; k++, v++) { key.set(k); value.set(v); writer.append(key, value); } writer.close(); } return new Text(file.getAbsolutePath()); }
From source file:com.hdfs.concat.crush.integration.CrushMapReduceTest.java
License:Apache License
@Before @Override/* ww w.jav a 2 s. c om*/ public void setUp() throws Exception { super.setUp(); job = createJobConf(); job.setBoolean("mapred.output.compress", true); job.set("mapred.output.compression.type", CompressionType.BLOCK.name()); job.set("mapred.output.compression.codec", CustomCompressionCodec.class.getName()); FileSystem fs = getFileSystem(); Path homeDirPath = fs.makeQualified(new Path(".")); homeDir = homeDirPath.toUri().getPath(); fs.delete(homeDirPath, true); defaultCodec = new DefaultCodec(); defaultCodec.setConf(job); customCodec = new CustomCompressionCodec(); customCodec.setConf(job); }
From source file:com.m6d.filecrush.crush.integration.CrushMapReduceTest.java
License:Apache License
@Before @Override/*w w w. ja va2s. co m*/ public void setUp() throws Exception { super.setUp(); job = createJobConf(); job.setBoolean("mapreduce.output.fileoutputformat.compress", true); job.set("mapreduce.output.fileoutputformat.compress.type", CompressionType.BLOCK.name()); job.set("mapreduce.output.fileoutputformat.compress.codec", CustomCompressionCodec.class.getName()); FileSystem fs = getFileSystem(); Path homeDirPath = fs.makeQualified(new Path(".")); homeDir = homeDirPath.toUri().getPath(); fs.delete(homeDirPath, true); defaultCodec = new DefaultCodec(); defaultCodec.setConf(job); customCodec = new CustomCompressionCodec(); customCodec.setConf(job); }
From source file:com.myhp.hive.rcfile.RCFileGenerator.java
License:Apache License
private static void genData(String format, int numRows, String output, String plainOutput) throws Exception { int numFields = 0; if (format.equals("student")) { rand = new Random(numRows); numFields = 3;/*from w w w . j a va2 s . c om*/ } else if (format.equals("voter")) { rand = new Random(1000000000 + numRows); numFields = 4; } else if (format.equals("alltypes")) { rand = new Random(2000000000L + numRows); numFields = 10; } RCFileOutputFormat.setColumnNumber(conf, numFields); RCFile.Writer writer = new RCFile.Writer(fs, conf, getFile(output), null, new DefaultCodec()); PrintWriter pw = new PrintWriter(new FileWriter(plainOutput)); for (int j = 0; j < numRows; j++) { BytesRefArrayWritable row = new BytesRefArrayWritable(numFields); byte[][] fields = null; if (format.equals("student")) { byte[][] f = { randomName().getBytes("UTF-8"), Integer.valueOf(randomAge()).toString().getBytes("UTF-8"), Double.valueOf(randomGpa()).toString().getBytes("UTF-8") }; fields = f; } else if (format.equals("voter")) { byte[][] f = { randomName().getBytes("UTF-8"), Integer.valueOf(randomAge()).toString().getBytes("UTF-8"), randomRegistration().getBytes("UTF-8"), Double.valueOf(randomContribution()).toString().getBytes("UTF-8") }; fields = f; } else if (format.equals("alltypes")) { byte[][] f = { Integer.valueOf(rand.nextInt(Byte.MAX_VALUE)).toString().getBytes("UTF-8"), Integer.valueOf(rand.nextInt(Short.MAX_VALUE)).toString().getBytes("UTF-8"), Integer.valueOf(rand.nextInt()).toString().getBytes("UTF-8"), Long.valueOf(rand.nextLong()).toString().getBytes("UTF-8"), Float.valueOf(rand.nextFloat() * 1000).toString().getBytes("UTF-8"), Double.valueOf(rand.nextDouble() * 1000000).toString().getBytes("UTF-8"), randomName().getBytes("UTF-8"), randomMap(), randomArray() }; fields = f; } for (int i = 0; i < fields.length; i++) { BytesRefWritable field = new BytesRefWritable(fields[i], 0, fields[i].length); row.set(i, field); pw.print(new String(fields[i])); if (i != fields.length - 1) pw.print("\t"); else pw.println(); } writer.append(row); } writer.close(); pw.close(); }
From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestActiveRecordWriters.java
License:Apache License
@Test public void testWritersLifecycle() throws Exception { URI uri = new URI("file:///"); Configuration conf = new HdfsConfiguration(); String prefix = "prefix"; String template = getTestDir().toString() + "/${YYYY()}/${MM()}/${DD()}/${hh()}/${mm()}/${ss()}/${record:value('/')}"; TimeZone timeZone = TimeZone.getTimeZone("UTC"); long cutOffSecs = 2; long cutOffSize = 10000; long cutOffRecords = 2; HdfsFileType fileType = HdfsFileType.SEQUENCE_FILE; DefaultCodec compressionCodec = new DefaultCodec(); compressionCodec.setConf(conf);/*from w w w . ja v a 2 s.c o m*/ SequenceFile.CompressionType compressionType = SequenceFile.CompressionType.BLOCK; String keyEL = "uuid()"; DataGeneratorFactory generatorFactory = new DummyDataGeneratorFactory(null); RecordWriterManager mgr = new RecordWriterManager(uri, conf, prefix, template, timeZone, cutOffSecs, cutOffSize, cutOffRecords, fileType, compressionCodec, compressionType, keyEL, generatorFactory, ContextInfoCreator.createTargetContext(HdfsDTarget.class, "testWritersLifecycle", false, OnRecordError.TO_ERROR, null), "dirPathTemplate"); Assert.assertTrue(mgr.validateDirTemplate("g", "dirPathTemplate", new ArrayList<Stage.ConfigIssue>())); ActiveRecordWriters writers = new ActiveRecordWriters(mgr); Date now = new Date(); // record older than cut off Date recordDate = new Date(now.getTime() - 3 * 1000 - 1); Record record = RecordCreator.create(); record.set(Field.create("a")); Assert.assertNull(writers.get(now, recordDate, record)); recordDate = new Date(now.getTime()); RecordWriter writer = writers.get(now, recordDate, record); Assert.assertNotNull(writer); Path tempPath = writer.getPath(); writer.write(record); writers.release(writer); //writer should still be open Assert.assertFalse(writer.isClosed()); writer = writers.get(now, recordDate, record); writer.write(record); writers.release(writer); //writer should be close because of going over record count threshold Assert.assertTrue(writer.isClosed()); //we should be able to get a new writer as the cutoff didn't kick in yet writer = writers.get(now, recordDate, record); Assert.assertNotNull(writer); writers.purge(); //purging should not close the writer as the cutoff didn't kick in yet Assert.assertFalse(writer.isClosed()); Thread.sleep(3001); writers.purge(); //purging should close the writer as the cutoff kicked in yet Assert.assertTrue(writer.isClosed()); //verifying closeAll() closes writers writer = writers.get(new Date(), new Date(), record); Assert.assertNotNull(writer); writers.closeAll(); Assert.assertTrue(writer.isClosed()); }
From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriterManager.java
License:Apache License
@Test public void testCompressionCodec() throws Exception { testPath(new DefaultCodec()); }
From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriterManager.java
License:Apache License
@Test public void testTextFileCompression() throws Exception { DefaultCodec codec = new DefaultCodec(); codec.setConf(new Configuration()); testTextFile(codec);/*from ww w .j a v a 2 s . co m*/ }
From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriterManager.java
License:Apache License
@Test public void testSeqFileCompression() throws Exception { DefaultCodec codec = new DefaultCodec(); codec.setConf(new Configuration()); testSeqFile(codec, SequenceFile.CompressionType.RECORD); testSeqFile(codec, SequenceFile.CompressionType.BLOCK); }