Example usage for org.apache.hadoop.io.compress CompressionCodec getDefaultExtension

List of usage examples for org.apache.hadoop.io.compress CompressionCodec getDefaultExtension

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress CompressionCodec getDefaultExtension.

Prototype

String getDefaultExtension();

Source Link

Document

Get the default filename extension for this kind of compression.

Usage

From source file:org.apache.sqoop.connector.hdfs.TestHdfsBase.java

License:Apache License

protected void createTextInput(String indir, Class<? extends CompressionCodec> clz, int numberOfFiles,
        int numberOfRows) throws IOException, InstantiationException, IllegalAccessException {
    Configuration conf = new Configuration();

    CompressionCodec codec = null;
    String extension = "";
    if (clz != null) {
        codec = clz.newInstance();/* w w  w .  j  a  va  2 s .c  o  m*/
        if (codec instanceof Configurable) {
            ((Configurable) codec).setConf(conf);
        }
        extension = codec.getDefaultExtension();
    }

    int index = 1;
    for (int fi = 0; fi < numberOfFiles; fi++) {
        String fileName = indir + "/" + UUID.randomUUID() + extension;
        OutputStream filestream = FileUtils.create(fileName);
        BufferedWriter filewriter;
        if (codec != null) {
            filewriter = new BufferedWriter(new OutputStreamWriter(
                    codec.createOutputStream(filestream, codec.createCompressor()), "UTF-8"));
        } else {
            filewriter = new BufferedWriter(new OutputStreamWriter(filestream, "UTF-8"));
        }

        for (int ri = 0; ri < numberOfRows; ri++) {
            String row = index + "," + (double) index + ",'" + index + "'";
            filewriter.write(row + HdfsConstants.DEFAULT_RECORD_DELIMITER);
            index++;
        }

        filewriter.close();
    }
}

From source file:org.apache.tajo.storage.compress.CodecPool.java

License:Apache License

/**
 * Get a {@link Compressor} for the given {@link CompressionCodec} from the
 * pool or a new one.//www .  j  a v  a2 s.  com
 *
 * @param codec
 *          the <code>CompressionCodec</code> for which to get the
 *          <code>Compressor</code>
 * @param conf the <code>Configuration</code> object which contains confs for creating or reinit the compressor
 * @return <code>Compressor</code> for the given <code>CompressionCodec</code>
 *         from the pool or a new one
 */
public static Compressor getCompressor(CompressionCodec codec, Configuration conf) {
    Compressor compressor = borrow(COMPRESSOR_POOL, codec.getCompressorType());
    if (compressor == null) {
        compressor = codec.createCompressor();
        LOG.info("Got brand-new compressor [" + codec.getDefaultExtension() + "]");
    } else {
        compressor.reinit(conf);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Got recycled compressor");
        }
    }
    return compressor;
}

From source file:org.apache.tajo.storage.compress.CodecPool.java

License:Apache License

/**
 * Get a {@link Decompressor} for the given {@link CompressionCodec} from the
 * pool or a new one.//from ww w . j  a  va2s .c  om
 *
 * @param codec
 *          the <code>CompressionCodec</code> for which to get the
 *          <code>Decompressor</code>
 * @return <code>Decompressor</code> for the given
 *         <code>CompressionCodec</code> the pool or a new one
 */
public static Decompressor getDecompressor(CompressionCodec codec) {
    Decompressor decompressor = borrow(DECOMPRESSOR_POOL, codec.getDecompressorType());
    if (decompressor == null) {
        decompressor = codec.createDecompressor();
        LOG.info("Got brand-new decompressor [" + codec.getDefaultExtension() + "]");
    } else {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Got recycled decompressor");
        }
    }
    return decompressor;
}

From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.CommonFileOutputFormat.java

License:Apache License

protected DataOutputStream getDataOutputStream(final TaskAttemptContext job)
        throws IOException, InterruptedException {
    final Configuration conf = job.getConfiguration();
    boolean isCompressed = getCompressOutput(job);
    CompressionCodec codec = null;
    String extension = "";
    if (isCompressed) {
        final Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, DefaultCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }/*from   www . j av  a 2  s. c om*/
    final Path file = super.getDefaultWorkFile(job, extension);
    final FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {
        return new DataOutputStream(fs.create(file, false));
    } else {
        return new DataOutputStream(codec.createOutputStream(fs.create(file, false)));
    }
}

From source file:org.cloudgraph.mapreduce.GraphXmlOutputFormat.java

License:Apache License

@Override
public RecordWriter<LongWritable, GraphWritable> getRecordWriter(TaskAttemptContext job)
        throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    boolean isCompressed = getCompressOutput(job);
    CompressionCodec codec = null;
    String extension = "";
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }//from  ww  w . ja  v a2  s  .c  o  m
    Path file = getDefaultWorkFile(job, extension);
    FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new GraphXmlRecordWriter(fileOut, job);
    } else {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new GraphXmlRecordWriter(new DataOutputStream(codec.createOutputStream(fileOut)), job);
    }
}

From source file:org.godhuli.rhipe.RXTextOutputFormat.java

License:Apache License

public RecordWriter<RHBytesWritable, RHBytesWritable> getRecordWriter(TaskAttemptContext job)
        throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    boolean isCompressed = getCompressOutput(job);
    String keyValueSeparator = conf.get("mapred.textoutputformat.separator", "\t");
    String fieldSeparator = conf.get("mapred.field.separator", " ");
    boolean usekey = conf.get("mapred.textoutputformat.usekey").equals("TRUE") ? true : false;
    String newline = conf.get("rhipe.eol.sequence");
    String squote = conf.get("rhipe_string_quote");
    if (squote == null)
        squote = "";
    CompressionCodec codec = null;
    String extension = "";
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }// w ww. j a  v  a  2s .  c om
    Path file = getDefaultWorkFile(job, extension);
    FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new RXTextRecordWriter(fileOut, keyValueSeparator, fieldSeparator, squote, usekey, newline);
    } else {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new RXTextRecordWriter(new DataOutputStream(codec.createOutputStream(fileOut)),
                keyValueSeparator, fieldSeparator, squote, usekey, newline);
    }

}

From source file:org.ojai.json.mapreduce.JSONFileOutputFormat.java

License:Apache License

@Override
public RecordWriter<LongWritable, Document> getRecordWriter(TaskAttemptContext job)
        throws IOException, InterruptedException {

    Configuration conf = job.getConfiguration();
    boolean isCompressed = getCompressOutput(job);
    CompressionCodec codec = null;
    String extension = "";
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }/*w  w  w.j av  a2  s . c om*/
    Path path = getDefaultWorkFile(job, extension);
    FileSystem fs = path.getFileSystem(conf);
    FSDataOutputStream out = fs.create(path, false);
    if (!isCompressed) {
        return new JSONFileOutputRecordWriter(out);
    } else {
        return new JSONFileOutputRecordWriter(new DataOutputStream(codec.createOutputStream(out)));
    }
}

From source file:org.saarus.service.hadoop.util.JsonOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress)
        throws IOException {
    boolean isCompressed = getCompressOutput(job);
    String keyValueSeparator = job.get("mapred.textoutputformat.separator", "\t");
    String[] headers = job.getStrings("column.headers");
    if (!isCompressed) {
        Path file = FileOutputFormat.getTaskOutputPath(job, name);
        FileSystem fs = file.getFileSystem(job);
        FSDataOutputStream fileOut = fs.create(file, progress);
        return new JsonRecordWriter<K, V>(fileOut, keyValueSeparator, headers);
    } else {//from  w  w w .ja v a  2  s .  c  o  m
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        // create the named codec
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job);
        // build the filename including the extension
        Path file = FileOutputFormat.getTaskOutputPath(job, name + codec.getDefaultExtension());
        FileSystem fs = file.getFileSystem(job);
        FSDataOutputStream fileOut = fs.create(file, progress);
        return new JsonRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)),
                keyValueSeparator, headers);
    }
}

From source file:org.schedoscope.export.ftp.outputformat.FtpUploadOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException {

    Configuration conf = context.getConfiguration();

    boolean isCompressed = getCompressOutput(context);
    CompressionCodec codec = null;

    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class);

        // only support gzip and bzip2 compression
        if (codecClass.equals(BZip2Codec.class) || codecClass.equals(GzipCodec.class)) {
            codec = ReflectionUtils.newInstance(codecClass, conf);
            extension = codec.getDefaultExtension();
        } else {//from  ww  w .  ja  v a 2 s  .co m
            LOG.warn("neither gzip nor bzip2 compression codec found - disabling compression");
            isCompressed = false;
            extension = "";
        }
    }

    char delimiter = conf.get(FTP_EXPORT_CVS_DELIMITER, "\t").charAt(0);
    String[] header = conf.getStrings(FTP_EXPORT_HEADER_COLUMNS);

    Path file = getDefaultWorkFile(context, extension);
    FileSystem fs = file.getFileSystem(conf);
    FSDataOutputStream fileOut = fs.create(file, false);

    RecordWriter<K, V> writer;

    if (conf.get(FTP_EXPORT_FILE_TYPE).equals(FileOutputType.csv.toString())) {

        if (!isCompressed) {
            writer = new CSVRecordWriter<K, V>(fileOut, header, delimiter);
        } else {
            writer = new CSVRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)), header,
                    delimiter);
        }

    } else if (conf.get(FTP_EXPORT_FILE_TYPE).equals(FileOutputType.json.toString())) {

        if (!isCompressed) {
            writer = new JsonRecordWriter<K, V>(fileOut);
        } else {
            writer = new JsonRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)));
        }

    } else {
        throw new IllegalArgumentException("unknown file output type");
    }

    return writer;
}

From source file:org.seqdoop.hadoop_bam.FastqOutputFormat.java

License:Open Source License

public RecordWriter<Text, SequencedFragment> getRecordWriter(TaskAttemptContext task) throws IOException {
    Configuration conf = ContextUtil.getConfiguration(task);
    boolean isCompressed = getCompressOutput(task);

    CompressionCodec codec = null;
    String extension = "";

    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(task, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }/*from   w  w w .j  a  v  a  2  s . com*/

    Path file = getDefaultWorkFile(task, extension);
    FileSystem fs = file.getFileSystem(conf);

    OutputStream output;

    if (isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        output = new DataOutputStream(codec.createOutputStream(fileOut));
    } else
        output = fs.create(file, false);

    return new FastqRecordWriter(conf, output);
}