Example usage for org.apache.hadoop.io.compress CompressionCodec getDefaultExtension

List of usage examples for org.apache.hadoop.io.compress CompressionCodec getDefaultExtension


In this page you can find the example usage for org.apache.hadoop.io.compress CompressionCodec getDefaultExtension.


String getDefaultExtension();

Source Link


Get the default filename extension for this kind of compression.


From source file:be.ugent.intec.halvade.uploader.mapreduce.MyFastqOutputFormat.java

public RecordWriter<PairedIdWritable, FastqRecord> getRecordWriter(TaskAttemptContext task) throws IOException {
    Configuration conf = task.getConfiguration();
    boolean isCompressed = getCompressOutput(task);

    CompressionCodec codec = null;
    String extension = "";

    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(task, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }//from w w  w.  jav a2s.c  om

    Path file = getDefaultWorkFile(task, extension);
    FileSystem fs = file.getFileSystem(conf);

    OutputStream output;

    if (isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        output = new DataOutputStream(codec.createOutputStream(fileOut));
    } else {
        output = fs.create(file, false);

    return new FastqRecordWriter(conf, output);

From source file:cn.lhfei.hadoop.ch04.FileDecompressor.java

License:Apache License

 * use case: % hadoop FileDecompressor file.gz
 * @param args/* ww w . j a  va 2s. c om*/
public static void main(String[] args) {
    FileSystem fs = null;
    String uri = args[0];
    Path inputPath = null;
    Configuration conf = new Configuration();
    CompressionCodecFactory factory = null;

    InputStream in = null;
    OutputStream out = null;

    try {
        fs = FileSystem.get(URI.create(uri), conf);
        inputPath = new Path(uri);
        factory = new CompressionCodecFactory(conf);
        CompressionCodec codec = factory.getCodec(inputPath);
        if (codec == null) {
            System.err.println("No codec found for " + uri);

        String outputUri = CompressionCodecFactory.removeSuffix(uri, codec.getDefaultExtension());

        in = codec.createInputStream(fs.open(inputPath));
        out = fs.create(new Path(outputUri));

        IOUtils.copyBytes(in, out, conf);

    } catch (IOException e) {
    } finally {

From source file:co.nubetech.hiho.mapreduce.lib.output.FTPTextOutputFormat.java

License:Apache License

public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {

    Configuration conf = job.getConfiguration();

    String ip = conf.get(HIHOConf.FTP_ADDRESS);
    String portno = conf.get(HIHOConf.FTP_PORT);
    String usr = conf.get(HIHOConf.FTP_USER);
    String pwd = conf.get(HIHOConf.FTP_PASSWORD);
    String dir = getOutputPath(job).toString();
    System.out.println("\n\ninside ftpoutputformat" + ip + " " + portno + " " + usr + " " + pwd + " " + dir);
    String keyValueSeparator = conf.get("mapred.textoutputformat.separator", "\t");
    FTPClient f = new FTPClient();
    f.connect(ip, Integer.parseInt(portno));
    f.login(usr, pwd);/* w  ww . j  a  v a  2s .c o  m*/

    boolean isCompressed = getCompressOutput(job);
    CompressionCodec codec = null;
    String extension = "";
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    Path file = getDefaultWorkFile(job, extension);
    FileSystem fs = file.getFileSystem(conf);
    String filename = file.getName();
    if (!isCompressed) {
        // FSDataOutputStream fileOut = fs.create(file, false);
        OutputStream os = f.appendFileStream(filename);
        DataOutputStream fileOut = new DataOutputStream(os);
        return new FTPLineRecordWriter<K, V>(fileOut, new String(keyValueSeparator), f);

    } else {
        // FSDataOutputStream fileOut = fs.create(file, false);
        OutputStream os = f.appendFileStream(filename);
        DataOutputStream fileOut = new DataOutputStream(os);
        return new FTPLineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)),
                keyValueSeparator, f);

From source file:co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat.java

License:Apache License

public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException {
    boolean isCompressed = getCompressOutput(context);
    Configuration conf = context.getConfiguration();
    String ext = "";
    CompressionCodec codec = null;

    if (isCompressed) {
        // create the named codec
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, conf);

        ext = codec.getDefaultExtension();
    }/*from  www.j  a  v a2s  .  c  o  m*/

    Path file = getDefaultWorkFile(context, ext);
    FileSystem fs = file.getFileSystem(conf);
    FSDataOutputStream fileOut = fs.create(file, false);
    DataOutputStream ostream = fileOut;

    if (isCompressed) {
        ostream = new DataOutputStream(codec.createOutputStream(fileOut));

    return new NoKeyRecordWriter<K, V>(ostream);

From source file:com.alexholmes.hadooputils.sort.DelimitedTextOutputFormat.java

License:Apache License

public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress)
        throws IOException {

    SortConfig sortConf = new SortConfig(job);
    boolean isCompressed = getCompressOutput(job);
    String lineSeparator = sortConf.getRowSeparator("\n");
    byte[] hexcode = SortConfig.getHexDelimiter(lineSeparator);
    lineSeparator = (hexcode != null) ? new String(hexcode, "UTF-8") : lineSeparator;

    if (!isCompressed) {
        Path file = FileOutputFormat.getTaskOutputPath(job, name);
        FileSystem fs = file.getFileSystem(job);
        FSDataOutputStream fileOut = fs.create(file, progress);
        return new DelimitedLineRecordWriter<K, V>(fileOut, lineSeparator);
    } else {/*from  w w  w  . j  a  va  2s  .  c  o m*/
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job);
        Path file = FileOutputFormat.getTaskOutputPath(job, name + codec.getDefaultExtension());
        FileSystem fs = file.getFileSystem(job);
        FSDataOutputStream fileOut = fs.create(file, progress);
        return new DelimitedLineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)),

From source file:com.bonc.mr_roamRecognition_hjpt.comm.FileCountTextOutputFormat.java

License:Apache License

public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    boolean isCompressed = getCompressOutput(job);
    String keyValueSeparator = conf.get(SEPERATOR, "\t");
    CompressionCodec codec = null;
    String extension = "";
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }/*from   w  w w . j  a  v a  2s.  c  o  m*/
    Path file = getDefaultWorkFile(job, extension);
    FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new LineRecordWriter<K, V>(fileOut, keyValueSeparator);
    } else {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)),

From source file:com.cloudera.flume.handlers.hdfs.CustomDfsSink.java

License:Apache License

 * Hadoop Compression Codecs that use Native libs require an instance of a
 * Configuration Object. They require this due to some check against knowing
 * weather or not the native libs have been loaded. GzipCodec, LzoCodec,
 * LzopCodec are all codecs that require Native libs. GZipCodec has a slight
 * exception that if native libs are not accessible it will use Pure Java.
 * This results in no errors just notices. BZip2Codec is an example codec that
 * doesn't use native libs.//from   w w  w  .  ja  v a2s  .  c  om
public void open() throws IOException {
    FlumeConfiguration conf = FlumeConfiguration.get();
    FileSystem hdfs;
    String codecName = conf.getCollectorDfsCompressCodec();
    CompressionCodec codec = getCodec(conf, codecName);

    if (codec == null) {
        dstPath = new Path(path);
        hdfs = dstPath.getFileSystem(conf);
        pathManager = new PathManager(hdfs, dstPath.getParent(), dstPath.getName());
        writer = pathManager.open();
        LOG.info("Creating HDFS file: " + pathManager.getOpenPath());

    Compressor cmp = codec.createCompressor();
    dstPath = new Path(path + codec.getDefaultExtension());
    hdfs = dstPath.getFileSystem(conf);
    pathManager = new PathManager(hdfs, dstPath.getParent(), dstPath.getName());
    writer = pathManager.open();
    try {
        writer = codec.createOutputStream(writer, cmp);
    } catch (NullPointerException npe) {
        // tries to find "native" version of codec, if that fails, then tries to
        // find java version. If there is no java version, the createOutputStream
        // exits via NPE. We capture this and convert it into a IOE with a more
        // useful error message.
        LOG.error("Unable to load compression codec " + codec);
        throw new IOException("Unable to load compression codec " + codec);
    LOG.info("Creating " + codec + " compressed HDFS file: " + pathManager.getOpenPath());

From source file:com.cloudera.flume.handlers.hdfs.TestEscapedCustomOutputDfs.java

License:Apache License

void checkOutputFormat(String format, OutputFormat of, String codecName, CompressionCodec codec)
        throws IOException, InterruptedException {
    // set the output format.
    FlumeConfiguration conf = FlumeConfiguration.get();
    conf.set(FlumeConfiguration.COLLECTOR_OUTPUT_FORMAT, format);
    conf.set(FlumeConfiguration.COLLECTOR_DFS_COMPRESS_CODEC, codecName);

    // build a sink that outputs to that format.
    File f = FileUtil.mktempdir();
    SinkBuilder builder = EscapedCustomDfsSink.builder();
    EventSink snk = builder.create(new Context(), "file:///" + f.getPath() + "/sub-%{service}");
    Event e = new EventImpl("this is a test message".getBytes());
    Attributes.setString(e, "service", "foo");
    snk.open();// w w w .jav  a2 s.c o  m

    ByteArrayOutputStream exWriter = new ByteArrayOutputStream();
    of.format(exWriter, e);
    String expected = new String(exWriter.toByteArray());

    // check the output to make sure it is what we expected.

    // handle compression codec / extensions when checking.
    String ext = ""; // file extension
    if (codec != null) {
        ext = codec.getDefaultExtension();
    InputStream in = new FileInputStream(f.getPath() + "/sub-foo" + ext);
    if (codec != null) {
        in = codec.createInputStream(in);
    byte[] buf = new byte[1];
    StringBuilder output = new StringBuilder();
    // read the file
    while ((in.read(buf)) > 0) {
        output.append(new String(buf));
    in.close(); // Must close for windows to delete
    assertEquals(expected, output.toString());

    // This doesn't get deleted in windows but the core test succeeds
    assertTrue("temp folder successfully deleted", FileUtil.rmr(f));

From source file:com.cloudera.sqoop.mapreduce.RawKeyTextOutputFormat.java

License:Apache License

public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException {
    boolean isCompressed = getCompressOutput(context);
    Configuration conf = context.getConfiguration();
    String ext = "";
    CompressionCodec codec = null;

    if (isCompressed) {
        // create the named codec
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, conf);

        ext = codec.getDefaultExtension();
    }/*from   w ww  .ja  v  a  2s .  c  om*/

    Path file = getDefaultWorkFile(context, ext);
    FileSystem fs = file.getFileSystem(conf);
    FSDataOutputStream fileOut = fs.create(file, false);
    DataOutputStream ostream = fileOut;

    if (isCompressed) {
        ostream = new DataOutputStream(codec.createOutputStream(fileOut));

    return new RawKeyRecordWriter<K, V>(ostream);

From source file:com.cloudera.sqoop.TestCompression.java

License:Apache License

public void runTextCompressionTest(CompressionCodec codec, int expectedNum) throws IOException {

    String[] columns = HsqldbTestServer.getFieldNames();
    String[] argv = getArgv(true, columns, codec, "--as-textfile");
    runImport(argv);//from  w  ww .  ja v a 2  s .c  om

    Configuration conf = new Configuration();
    if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
        conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
    FileSystem fs = FileSystem.get(conf);

    if (codec == null) {
        codec = new GzipCodec();
    ReflectionUtils.setConf(codec, getConf());
    Path p = new Path(getDataFilePath().toString() + codec.getDefaultExtension());
    InputStream is = codec.createInputStream(fs.open(p));
    BufferedReader r = new BufferedReader(new InputStreamReader(is));
    int numLines = 0;
    while (true) {
        String ln = r.readLine();
        if (ln == null) {
    assertEquals(expectedNum, numLines);