List of usage examples for org.apache.hadoop.io.compress SplittableCompressionCodec getDefaultExtension
String getDefaultExtension();
From source file:nl.basjes.hadoop.io.compress.TestSplittableCodecSeams.java
License:Apache License
/** * Write the specified number of records to file in test dir using codec. * Records are simply lines random ASCII *//*from w w w.ja v a 2 s .c o m*/ private static Path writeSplitTestFile(final Configuration conf, final Class<? extends SplittableCompressionCodec> codecClass, final long records, final int recordLength, final int trailingSizeJitter, final int randomizeEveryNChars) throws IOException { RAND.setSeed(1); // Make the tests better reproducable final FileSystem fs = FileSystem.getLocal(conf); final SplittableCompressionCodec codec = ReflectionUtils.newInstance(codecClass, conf); final Path wd = new Path(new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(fs.getUri(), fs.getWorkingDirectory()), codec.getClass().getSimpleName()); final Path file = new Path(wd, "test-" + records + "-" + recordLength + "-" + trailingSizeJitter + codec.getDefaultExtension()); DataOutputStream out = null; final Compressor cmp = CodecPool.getCompressor(codec); try { out = new DataOutputStream(codec.createOutputStream(fs.create(file, true), cmp)); for (long seq = 1; seq <= records; ++seq) { final String line = randomGibberish( recordLength + (trailingSizeJitter > 0 ? RAND.nextInt(trailingSizeJitter) : 0), randomizeEveryNChars) + "\n"; // There must be a simpler way to output ACSII instead of 2 byte UNICODE out.writeBytes(new String(line.getBytes("UTF-8"), "US-ASCII")); } } finally { IOUtils.cleanup(LOG, out); CodecPool.returnCompressor(cmp); } return file; }