Example usage for org.apache.hadoop.io.compress SplittableCompressionCodec getDefaultExtension

List of usage examples for org.apache.hadoop.io.compress SplittableCompressionCodec getDefaultExtension

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress SplittableCompressionCodec getDefaultExtension.

Prototype

String getDefaultExtension();

Source Link

Document

Get the default filename extension for this kind of compression.

Usage

From source file:nl.basjes.hadoop.io.compress.TestSplittableCodecSeams.java

License:Apache License

/**
 * Write the specified number of records to file in test dir using codec.
 * Records are simply lines random ASCII
 *//*from   w  w w.ja  v a  2 s  .c o m*/
private static Path writeSplitTestFile(final Configuration conf,
        final Class<? extends SplittableCompressionCodec> codecClass, final long records,
        final int recordLength, final int trailingSizeJitter, final int randomizeEveryNChars)
        throws IOException {

    RAND.setSeed(1); // Make the tests better reproducable

    final FileSystem fs = FileSystem.getLocal(conf);
    final SplittableCompressionCodec codec = ReflectionUtils.newInstance(codecClass, conf);

    final Path wd = new Path(new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(fs.getUri(),
            fs.getWorkingDirectory()), codec.getClass().getSimpleName());

    final Path file = new Path(wd,
            "test-" + records + "-" + recordLength + "-" + trailingSizeJitter + codec.getDefaultExtension());
    DataOutputStream out = null;
    final Compressor cmp = CodecPool.getCompressor(codec);
    try {
        out = new DataOutputStream(codec.createOutputStream(fs.create(file, true), cmp));

        for (long seq = 1; seq <= records; ++seq) {
            final String line = randomGibberish(
                    recordLength + (trailingSizeJitter > 0 ? RAND.nextInt(trailingSizeJitter) : 0),
                    randomizeEveryNChars) + "\n";
            // There must be a simpler way to output ACSII instead of 2 byte UNICODE
            out.writeBytes(new String(line.getBytes("UTF-8"), "US-ASCII"));
        }
    } finally {
        IOUtils.cleanup(LOG, out);
        CodecPool.returnCompressor(cmp);
    }
    return file;
}