Example usage for org.apache.hadoop.io.compress CompressionCodec getDefaultExtension

List of usage examples for org.apache.hadoop.io.compress CompressionCodec getDefaultExtension

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress CompressionCodec getDefaultExtension.

Prototype

String getDefaultExtension();

Source Link

Document

Get the default filename extension for this kind of compression.

Usage

From source file:be.ugent.intec.halvade.uploader.mapreduce.MyFastqOutputFormat.java

public RecordWriter<PairedIdWritable, FastqRecord> getRecordWriter(TaskAttemptContext task) throws IOException {
    Configuration conf = task.getConfiguration();
    boolean isCompressed = getCompressOutput(task);

    CompressionCodec codec = null;
    String extension = "";

    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(task, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }//from w w  w.  jav a2s.c  om

    Path file = getDefaultWorkFile(task, extension);
    FileSystem fs = file.getFileSystem(conf);

    OutputStream output;

    if (isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        output = new DataOutputStream(codec.createOutputStream(fileOut));
    } else {
        output = fs.create(file, false);
    }

    return new FastqRecordWriter(conf, output);
}

From source file:cn.lhfei.hadoop.ch04.FileDecompressor.java

License:Apache License

/**
 * use case: % hadoop FileDecompressor file.gz
 * @param args/* ww w . j a  va 2s. c om*/
 */
public static void main(String[] args) {
    FileSystem fs = null;
    String uri = args[0];
    Path inputPath = null;
    Configuration conf = new Configuration();
    CompressionCodecFactory factory = null;

    InputStream in = null;
    OutputStream out = null;

    try {
        fs = FileSystem.get(URI.create(uri), conf);
        inputPath = new Path(uri);
        factory = new CompressionCodecFactory(conf);
        CompressionCodec codec = factory.getCodec(inputPath);
        if (codec == null) {
            System.err.println("No codec found for " + uri);
            System.exit(1);
        }

        String outputUri = CompressionCodecFactory.removeSuffix(uri, codec.getDefaultExtension());

        in = codec.createInputStream(fs.open(inputPath));
        out = fs.create(new Path(outputUri));

        IOUtils.copyBytes(in, out, conf);

    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        IOUtils.closeStream(in);
        IOUtils.closeStream(out);
    }
}

From source file:co.nubetech.hiho.mapreduce.lib.output.FTPTextOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {

    Configuration conf = job.getConfiguration();

    String ip = conf.get(HIHOConf.FTP_ADDRESS);
    String portno = conf.get(HIHOConf.FTP_PORT);
    String usr = conf.get(HIHOConf.FTP_USER);
    String pwd = conf.get(HIHOConf.FTP_PASSWORD);
    String dir = getOutputPath(job).toString();
    System.out.println("\n\ninside ftpoutputformat" + ip + " " + portno + " " + usr + " " + pwd + " " + dir);
    String keyValueSeparator = conf.get("mapred.textoutputformat.separator", "\t");
    FTPClient f = new FTPClient();
    f.connect(ip, Integer.parseInt(portno));
    f.login(usr, pwd);/* w  ww . j  a  v a  2s .c o  m*/
    f.changeWorkingDirectory(dir);
    f.setFileType(FTP.BINARY_FILE_TYPE);

    boolean isCompressed = getCompressOutput(job);
    CompressionCodec codec = null;
    String extension = "";
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }
    Path file = getDefaultWorkFile(job, extension);
    FileSystem fs = file.getFileSystem(conf);
    String filename = file.getName();
    if (!isCompressed) {
        // FSDataOutputStream fileOut = fs.create(file, false);
        OutputStream os = f.appendFileStream(filename);
        DataOutputStream fileOut = new DataOutputStream(os);
        return new FTPLineRecordWriter<K, V>(fileOut, new String(keyValueSeparator), f);

    } else {
        // FSDataOutputStream fileOut = fs.create(file, false);
        OutputStream os = f.appendFileStream(filename);
        DataOutputStream fileOut = new DataOutputStream(os);
        return new FTPLineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)),
                keyValueSeparator, f);
    }
}

From source file:co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat.java

License:Apache License

public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException {
    boolean isCompressed = getCompressOutput(context);
    Configuration conf = context.getConfiguration();
    String ext = "";
    CompressionCodec codec = null;

    if (isCompressed) {
        // create the named codec
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, conf);

        ext = codec.getDefaultExtension();
    }/*from  www.j  a  v a2s  .  c  o  m*/

    Path file = getDefaultWorkFile(context, ext);
    FileSystem fs = file.getFileSystem(conf);
    FSDataOutputStream fileOut = fs.create(file, false);
    DataOutputStream ostream = fileOut;

    if (isCompressed) {
        ostream = new DataOutputStream(codec.createOutputStream(fileOut));
    }

    return new NoKeyRecordWriter<K, V>(ostream);
}

From source file:com.alexholmes.hadooputils.sort.DelimitedTextOutputFormat.java

License:Apache License

public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress)
        throws IOException {

    SortConfig sortConf = new SortConfig(job);
    boolean isCompressed = getCompressOutput(job);
    String lineSeparator = sortConf.getRowSeparator("\n");
    byte[] hexcode = SortConfig.getHexDelimiter(lineSeparator);
    lineSeparator = (hexcode != null) ? new String(hexcode, "UTF-8") : lineSeparator;

    if (!isCompressed) {
        Path file = FileOutputFormat.getTaskOutputPath(job, name);
        FileSystem fs = file.getFileSystem(job);
        FSDataOutputStream fileOut = fs.create(file, progress);
        return new DelimitedLineRecordWriter<K, V>(fileOut, lineSeparator);
    } else {/*from  w w  w  . j  a  va  2s  .  c  o m*/
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job);
        Path file = FileOutputFormat.getTaskOutputPath(job, name + codec.getDefaultExtension());
        FileSystem fs = file.getFileSystem(job);
        FSDataOutputStream fileOut = fs.create(file, progress);
        return new DelimitedLineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)),
                lineSeparator);
    }
}

From source file:com.bonc.mr_roamRecognition_hjpt.comm.FileCountTextOutputFormat.java

License:Apache License

public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    boolean isCompressed = getCompressOutput(job);
    String keyValueSeparator = conf.get(SEPERATOR, "\t");
    CompressionCodec codec = null;
    String extension = "";
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }/*from   w  w w . j  a  v a  2s.  c  o  m*/
    Path file = getDefaultWorkFile(job, extension);
    FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new LineRecordWriter<K, V>(fileOut, keyValueSeparator);
    } else {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)),
                keyValueSeparator);
    }
}

From source file:com.cloudera.flume.handlers.hdfs.CustomDfsSink.java

License:Apache License

/**
 * Hadoop Compression Codecs that use Native libs require an instance of a
 * Configuration Object. They require this due to some check against knowing
 * weather or not the native libs have been loaded. GzipCodec, LzoCodec,
 * LzopCodec are all codecs that require Native libs. GZipCodec has a slight
 * exception that if native libs are not accessible it will use Pure Java.
 * This results in no errors just notices. BZip2Codec is an example codec that
 * doesn't use native libs.//from   w w  w  .  ja  v a2s  .  c  om
 */
@Override
public void open() throws IOException {
    FlumeConfiguration conf = FlumeConfiguration.get();
    FileSystem hdfs;
    String codecName = conf.getCollectorDfsCompressCodec();
    CompressionCodec codec = getCodec(conf, codecName);

    if (codec == null) {
        dstPath = new Path(path);
        hdfs = dstPath.getFileSystem(conf);
        pathManager = new PathManager(hdfs, dstPath.getParent(), dstPath.getName());
        writer = pathManager.open();
        LOG.info("Creating HDFS file: " + pathManager.getOpenPath());
        return;
    }

    Compressor cmp = codec.createCompressor();
    dstPath = new Path(path + codec.getDefaultExtension());
    hdfs = dstPath.getFileSystem(conf);
    pathManager = new PathManager(hdfs, dstPath.getParent(), dstPath.getName());
    writer = pathManager.open();
    try {
        writer = codec.createOutputStream(writer, cmp);
    } catch (NullPointerException npe) {
        // tries to find "native" version of codec, if that fails, then tries to
        // find java version. If there is no java version, the createOutputStream
        // exits via NPE. We capture this and convert it into a IOE with a more
        // useful error message.
        LOG.error("Unable to load compression codec " + codec);
        throw new IOException("Unable to load compression codec " + codec);
    }
    LOG.info("Creating " + codec + " compressed HDFS file: " + pathManager.getOpenPath());
}

From source file:com.cloudera.flume.handlers.hdfs.TestEscapedCustomOutputDfs.java

License:Apache License

void checkOutputFormat(String format, OutputFormat of, String codecName, CompressionCodec codec)
        throws IOException, InterruptedException {
    // set the output format.
    FlumeConfiguration conf = FlumeConfiguration.get();
    conf.set(FlumeConfiguration.COLLECTOR_OUTPUT_FORMAT, format);
    conf.set(FlumeConfiguration.COLLECTOR_DFS_COMPRESS_CODEC, codecName);

    // build a sink that outputs to that format.
    File f = FileUtil.mktempdir();
    SinkBuilder builder = EscapedCustomDfsSink.builder();
    EventSink snk = builder.create(new Context(), "file:///" + f.getPath() + "/sub-%{service}");
    Event e = new EventImpl("this is a test message".getBytes());
    Attributes.setString(e, "service", "foo");
    snk.open();// w w w .jav  a2 s.c o  m
    snk.append(e);
    snk.close();

    ByteArrayOutputStream exWriter = new ByteArrayOutputStream();
    of.format(exWriter, e);
    exWriter.close();
    String expected = new String(exWriter.toByteArray());

    // check the output to make sure it is what we expected.

    // handle compression codec / extensions when checking.
    String ext = ""; // file extension
    if (codec != null) {
        ext = codec.getDefaultExtension();
    }
    InputStream in = new FileInputStream(f.getPath() + "/sub-foo" + ext);
    if (codec != null) {
        in = codec.createInputStream(in);
    }
    byte[] buf = new byte[1];
    StringBuilder output = new StringBuilder();
    // read the file
    while ((in.read(buf)) > 0) {
        output.append(new String(buf));
    }
    in.close(); // Must close for windows to delete
    assertEquals(expected, output.toString());

    // This doesn't get deleted in windows but the core test succeeds
    assertTrue("temp folder successfully deleted", FileUtil.rmr(f));
}

From source file:com.cloudera.sqoop.mapreduce.RawKeyTextOutputFormat.java

License:Apache License

public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException {
    boolean isCompressed = getCompressOutput(context);
    Configuration conf = context.getConfiguration();
    String ext = "";
    CompressionCodec codec = null;

    if (isCompressed) {
        // create the named codec
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, conf);

        ext = codec.getDefaultExtension();
    }/*from   w ww  .ja  v  a  2s .  c  om*/

    Path file = getDefaultWorkFile(context, ext);
    FileSystem fs = file.getFileSystem(conf);
    FSDataOutputStream fileOut = fs.create(file, false);
    DataOutputStream ostream = fileOut;

    if (isCompressed) {
        ostream = new DataOutputStream(codec.createOutputStream(fileOut));
    }

    return new RawKeyRecordWriter<K, V>(ostream);
}

From source file:com.cloudera.sqoop.TestCompression.java

License:Apache License

public void runTextCompressionTest(CompressionCodec codec, int expectedNum) throws IOException {

    String[] columns = HsqldbTestServer.getFieldNames();
    String[] argv = getArgv(true, columns, codec, "--as-textfile");
    runImport(argv);//from  w  ww .  ja v a 2  s .c  om

    Configuration conf = new Configuration();
    if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
        conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
    }
    FileSystem fs = FileSystem.get(conf);

    if (codec == null) {
        codec = new GzipCodec();
    }
    ReflectionUtils.setConf(codec, getConf());
    Path p = new Path(getDataFilePath().toString() + codec.getDefaultExtension());
    InputStream is = codec.createInputStream(fs.open(p));
    BufferedReader r = new BufferedReader(new InputStreamReader(is));
    int numLines = 0;
    while (true) {
        String ln = r.readLine();
        if (ln == null) {
            break;
        }
        numLines++;
    }
    r.close();
    assertEquals(expectedNum, numLines);
}