Example usage for org.apache.hadoop.io.compress CompressionCodec createOutputStream

List of usage examples for org.apache.hadoop.io.compress CompressionCodec createOutputStream

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress CompressionCodec createOutputStream.

Prototype

CompressionOutputStream createOutputStream(OutputStream out) throws IOException;

Source Link

Document

Create a CompressionOutputStream that will write to the given OutputStream .

Usage

From source file:be.ugent.intec.halvade.uploader.mapreduce.MyFastqOutputFormat.java

public RecordWriter<PairedIdWritable, FastqRecord> getRecordWriter(TaskAttemptContext task) throws IOException {
    Configuration conf = task.getConfiguration();
    boolean isCompressed = getCompressOutput(task);

    CompressionCodec codec = null;
    String extension = "";

    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(task, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }/*from   www  . j a va  2  s .  c  o m*/

    Path file = getDefaultWorkFile(task, extension);
    FileSystem fs = file.getFileSystem(conf);

    OutputStream output;

    if (isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        output = new DataOutputStream(codec.createOutputStream(fileOut));
    } else {
        output = fs.create(file, false);
    }

    return new FastqRecordWriter(conf, output);
}

From source file:cn.lhfei.hadoop.ch04.StreamCompressor.java

License:Apache License

/**
 * Use case: % echo "Text" | hadoop StreamCompressor org.apache.hadoop.io.compress.GzipCodec \ | gunzip - Text 
 * @param args//  w  ww .java2  s .  c o m
 */
public static void main(String[] args) {
    String codecClassname = args[0];

    try {
        Class<?> codecClass = Class.forName(codecClassname);

        Configuration conf = new Configuration();

        CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);

        CompressionOutputStream out = codec.createOutputStream(System.out);

        IOUtils.copyBytes(System.in, out, 4096, false);

        out.finish();

    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }

}

From source file:co.nubetech.hiho.mapreduce.lib.output.FTPTextOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {

    Configuration conf = job.getConfiguration();

    String ip = conf.get(HIHOConf.FTP_ADDRESS);
    String portno = conf.get(HIHOConf.FTP_PORT);
    String usr = conf.get(HIHOConf.FTP_USER);
    String pwd = conf.get(HIHOConf.FTP_PASSWORD);
    String dir = getOutputPath(job).toString();
    System.out.println("\n\ninside ftpoutputformat" + ip + " " + portno + " " + usr + " " + pwd + " " + dir);
    String keyValueSeparator = conf.get("mapred.textoutputformat.separator", "\t");
    FTPClient f = new FTPClient();
    f.connect(ip, Integer.parseInt(portno));
    f.login(usr, pwd);/*from w  w w. j av  a 2  s.  c o  m*/
    f.changeWorkingDirectory(dir);
    f.setFileType(FTP.BINARY_FILE_TYPE);

    boolean isCompressed = getCompressOutput(job);
    CompressionCodec codec = null;
    String extension = "";
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }
    Path file = getDefaultWorkFile(job, extension);
    FileSystem fs = file.getFileSystem(conf);
    String filename = file.getName();
    if (!isCompressed) {
        // FSDataOutputStream fileOut = fs.create(file, false);
        OutputStream os = f.appendFileStream(filename);
        DataOutputStream fileOut = new DataOutputStream(os);
        return new FTPLineRecordWriter<K, V>(fileOut, new String(keyValueSeparator), f);

    } else {
        // FSDataOutputStream fileOut = fs.create(file, false);
        OutputStream os = f.appendFileStream(filename);
        DataOutputStream fileOut = new DataOutputStream(os);
        return new FTPLineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)),
                keyValueSeparator, f);
    }
}

From source file:co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat.java

License:Apache License

public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException {
    boolean isCompressed = getCompressOutput(context);
    Configuration conf = context.getConfiguration();
    String ext = "";
    CompressionCodec codec = null;

    if (isCompressed) {
        // create the named codec
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, conf);

        ext = codec.getDefaultExtension();
    }//from  w w  w .  ja v  a 2s.c  o  m

    Path file = getDefaultWorkFile(context, ext);
    FileSystem fs = file.getFileSystem(conf);
    FSDataOutputStream fileOut = fs.create(file, false);
    DataOutputStream ostream = fileOut;

    if (isCompressed) {
        ostream = new DataOutputStream(codec.createOutputStream(fileOut));
    }

    return new NoKeyRecordWriter<K, V>(ostream);
}

From source file:com.alexholmes.hadooputils.sort.DelimitedTextOutputFormat.java

License:Apache License

public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress)
        throws IOException {

    SortConfig sortConf = new SortConfig(job);
    boolean isCompressed = getCompressOutput(job);
    String lineSeparator = sortConf.getRowSeparator("\n");
    byte[] hexcode = SortConfig.getHexDelimiter(lineSeparator);
    lineSeparator = (hexcode != null) ? new String(hexcode, "UTF-8") : lineSeparator;

    if (!isCompressed) {
        Path file = FileOutputFormat.getTaskOutputPath(job, name);
        FileSystem fs = file.getFileSystem(job);
        FSDataOutputStream fileOut = fs.create(file, progress);
        return new DelimitedLineRecordWriter<K, V>(fileOut, lineSeparator);
    } else {/*from  ww w.jav  a  2  s .  c o m*/
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job);
        Path file = FileOutputFormat.getTaskOutputPath(job, name + codec.getDefaultExtension());
        FileSystem fs = file.getFileSystem(job);
        FSDataOutputStream fileOut = fs.create(file, progress);
        return new DelimitedLineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)),
                lineSeparator);
    }
}

From source file:com.asakusafw.runtime.io.text.directio.AbstractTextStreamFormat.java

License:Apache License

private OutputStream decorate(OutputStream stream) throws IOException {
    Class<? extends CompressionCodec> codecClass = getCompressionCodecClass();
    if (codecClass != null) {
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, getConf());
        return codec.createOutputStream(stream);
    }/*  www  .  j av a  2 s.c om*/
    return stream;
}

From source file:com.bonc.mr_roamRecognition_hjpt.comm.FileCountTextOutputFormat.java

License:Apache License

public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    boolean isCompressed = getCompressOutput(job);
    String keyValueSeparator = conf.get(SEPERATOR, "\t");
    CompressionCodec codec = null;
    String extension = "";
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }//from www  .  j  a v  a 2 s.  c  o  m
    Path file = getDefaultWorkFile(job, extension);
    FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new LineRecordWriter<K, V>(fileOut, keyValueSeparator);
    } else {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)),
                keyValueSeparator);
    }
}

From source file:com.cloudera.sqoop.mapreduce.RawKeyTextOutputFormat.java

License:Apache License

public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException {
    boolean isCompressed = getCompressOutput(context);
    Configuration conf = context.getConfiguration();
    String ext = "";
    CompressionCodec codec = null;

    if (isCompressed) {
        // create the named codec
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, conf);

        ext = codec.getDefaultExtension();
    }//from ww w.j a  v  a 2 s . c  om

    Path file = getDefaultWorkFile(context, ext);
    FileSystem fs = file.getFileSystem(conf);
    FSDataOutputStream fileOut = fs.create(file, false);
    DataOutputStream ostream = fileOut;

    if (isCompressed) {
        ostream = new DataOutputStream(codec.createOutputStream(fileOut));
    }

    return new RawKeyRecordWriter<K, V>(ostream);
}

From source file:com.cloudera.sqoop.TestExport.java

License:Apache License

/**
 * Create a data file that gets exported to the db.
 * @param fileNum the number of the file (for multi-file export)
 * @param numRecords how many records to write to the file.
 * @param gzip is true if the file should be gzipped.
 *//*from w w  w  .j  a va  2 s.  co m*/
protected void createTextFile(int fileNum, int numRecords, boolean gzip, ColumnGenerator... extraCols)
        throws IOException {
    int startId = fileNum * numRecords;

    String ext = ".txt";
    if (gzip) {
        ext = ext + ".gz";
    }
    Path tablePath = getTablePath();
    Path filePath = new Path(tablePath, "part" + fileNum + ext);

    Configuration conf = new Configuration();
    if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
        conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
    }
    FileSystem fs = FileSystem.get(conf);
    fs.mkdirs(tablePath);
    OutputStream os = fs.create(filePath);
    if (gzip) {
        CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
        CompressionCodec codec = ccf.getCodec(filePath);
        os = codec.createOutputStream(os);
    }
    BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os));
    for (int i = 0; i < numRecords; i++) {
        w.write(getRecordLine(startId + i, extraCols));
    }
    w.close();
    os.close();

    if (gzip) {
        verifyCompressedFile(filePath, numRecords);
    }
}

From source file:com.facebook.presto.hadoop.TestHadoopNative.java

License:Apache License

private static byte[] compress(CompressionCodec codec, byte[] input) throws IOException {
    ByteArrayOutputStream bytes = new ByteArrayOutputStream();
    try (OutputStream out = codec.createOutputStream(bytes)) {
        out.write(input);//from w ww  .  j  a v  a 2  s .  com
        out.close();
    }
    return bytes.toByteArray();
}