Example usage for org.apache.hadoop.io.compress CompressionCodec createInputStream

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress CompressionCodec createInputStream.

Prototype

CompressionInputStream createInputStream(InputStream in) throws IOException;

Source Link

Document

Create a CompressionInputStream that will read from the given input stream.

Usage

From source file:brush.FastqRecordReader.java

License:Apache License

/**
 * Builds a new record reader given a config file and an input split.
 *
 * @param conf The Hadoop configuration object. Used for gaining access
 *   to the underlying file system./*from  w  w w .  j a v a2s  . c  om*/
 * @param split The file split to read.
 */
protected FastqRecordReader(final Configuration conf, final FileSplit split) throws IOException {
    file = split.getPath();
    start = split.getStart();
    end = start + split.getLength();

    FileSystem fs = file.getFileSystem(conf);
    FSDataInputStream fileIn = fs.open(file);

    CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
    CompressionCodec codec = codecFactory.getCodec(file);

    if (codec == null) { // no codec.  Uncompressed file.
        positionAtFirstRecord(fileIn);
        inputStream = fileIn;
    } else {
        // compressed file
        if (start != 0) {
            throw new RuntimeException("Start position for compressed file is not 0! (found " + start + ")");
        }

        inputStream = codec.createInputStream(fileIn);
        end = Long.MAX_VALUE; // read until the end of the file
    }

    lineReader = new LineReader(inputStream);
}

From source file:cn.lhfei.hadoop.ch04.FileDecompressor.java

License:Apache License

/**
 * use case: % hadoop FileDecompressor file.gz
 * @param args/*from  w  w w  .j  a  va2 s.co  m*/
 */
public static void main(String[] args) {
    FileSystem fs = null;
    String uri = args[0];
    Path inputPath = null;
    Configuration conf = new Configuration();
    CompressionCodecFactory factory = null;

    InputStream in = null;
    OutputStream out = null;

    try {
        fs = FileSystem.get(URI.create(uri), conf);
        inputPath = new Path(uri);
        factory = new CompressionCodecFactory(conf);
        CompressionCodec codec = factory.getCodec(inputPath);
        if (codec == null) {
            System.err.println("No codec found for " + uri);
            System.exit(1);
        }

        String outputUri = CompressionCodecFactory.removeSuffix(uri, codec.getDefaultExtension());

        in = codec.createInputStream(fs.open(inputPath));
        out = fs.create(new Path(outputUri));

        IOUtils.copyBytes(in, out, conf);

    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        IOUtils.closeStream(in);
        IOUtils.closeStream(out);
    }
}

From source file:co.nubetech.hiho.dedup.DelimitedLineRecordReader.java

License:Apache License

/**
 * //from   w  w w .ja  va 2s  .  co m
 * @param delimiter
 * @param column
 * 
 * 
 */

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.delimiter = job.get(DelimitedTextInputFormat.DELIMITER_CONF);
    this.column = job.getInt(DelimitedTextInputFormat.COLUMN_CONF, 0);
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        in = new LineReader(codec.createInputStream(fileIn), job);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        in = new LineReader(fileIn, job);
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:com.alexholmes.hadooputils.sort.DelimitedLineRecordReader.java

License:Apache License

protected void initialize(Configuration job, FileSplit split) throws IOException {
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();//from   www  . j  a v a  2 s  . c o m
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    String rowDelim = job.get("textinputformat.record.delimiter", null);
    if (codec != null) {
        if (rowDelim != null) {
            byte[] hexcode = SortConfig.getHexDelimiter(rowDelim);
            in = new DelimitedLineReader(codec.createInputStream(fileIn), job,
                    (hexcode != null) ? hexcode : rowDelim.getBytes());
        } else {
            in = new DelimitedLineReader(codec.createInputStream(fileIn), job);
        }
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        if (rowDelim != null) {
            byte[] hexcode = SortConfig.getHexDelimiter(rowDelim);
            in = new DelimitedLineReader(fileIn, job, (hexcode != null) ? hexcode : rowDelim.getBytes());
        } else {
            in = new DelimitedLineReader(fileIn, job);
        }
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:com.alexholmes.hadooputils.sort.LzoDelimitedLineRecordReader.java

License:Apache License

@Override
protected void initialize(Configuration job, FileSplit split) throws IOException {
    start = split.getStart();/*  w  w w.j  a  v  a2 s .c  o  m*/
    end = start + split.getLength();
    final Path file = split.getPath();

    FileSystem fs = file.getFileSystem(job);
    CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);
    if (codec == null) {
        throw new IOException("No codec for file " + file + " not found, cannot run");
    }

    // open the file and seek to the start of the split
    fileIn = fs.open(split.getPath());

    // creates input stream and also reads the file header
    String rowDelim = job.get("textinputformat.record.delimiter", null);
    if (rowDelim != null) {
        byte[] hexcode = SortConfig.getHexDelimiter(rowDelim);
        in = new DelimitedLineReader(fileIn, job, (hexcode != null) ? hexcode : rowDelim.getBytes());
    } else {
        in = new DelimitedLineReader(codec.createInputStream(fileIn), job);
    }

    if (start != 0) {
        fileIn.seek(start);

        // read and ignore the first line
        in.readLine(new Text());
        start = fileIn.getPos();
    }

    this.pos = start;
}

From source file:com.asakusafw.runtime.io.text.directio.AbstractTextStreamFormat.java

License:Apache License

private InputStream decorate(InputStream stream, long offset, long splitSize) throws IOException {
    InputSplitter splitter = getInputSplitter();
    if (splitter != null) {
        assert getCompressionCodecClass() == null;
        return splitter.trim(stream, offset, splitSize != -1L ? splitSize : Long.MAX_VALUE);
    }// ww w  .  j a  v a  2s  .  c  om
    Class<? extends CompressionCodec> codecClass = getCompressionCodecClass();
    if (codecClass != null) {
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, getConf());
        return codec.createInputStream(stream);
    }
    return stream;
}

From source file:com.cloudera.flume.handlers.hdfs.TestEscapedCustomOutputDfs.java

License:Apache License

void checkOutputFormat(String format, OutputFormat of, String codecName, CompressionCodec codec)
        throws IOException, InterruptedException {
    // set the output format.
    FlumeConfiguration conf = FlumeConfiguration.get();
    conf.set(FlumeConfiguration.COLLECTOR_OUTPUT_FORMAT, format);
    conf.set(FlumeConfiguration.COLLECTOR_DFS_COMPRESS_CODEC, codecName);

    // build a sink that outputs to that format.
    File f = FileUtil.mktempdir();
    SinkBuilder builder = EscapedCustomDfsSink.builder();
    EventSink snk = builder.create(new Context(), "file:///" + f.getPath() + "/sub-%{service}");
    Event e = new EventImpl("this is a test message".getBytes());
    Attributes.setString(e, "service", "foo");
    snk.open();//www .j a v  a  2 s .  c o m
    snk.append(e);
    snk.close();

    ByteArrayOutputStream exWriter = new ByteArrayOutputStream();
    of.format(exWriter, e);
    exWriter.close();
    String expected = new String(exWriter.toByteArray());

    // check the output to make sure it is what we expected.

    // handle compression codec / extensions when checking.
    String ext = ""; // file extension
    if (codec != null) {
        ext = codec.getDefaultExtension();
    }
    InputStream in = new FileInputStream(f.getPath() + "/sub-foo" + ext);
    if (codec != null) {
        in = codec.createInputStream(in);
    }
    byte[] buf = new byte[1];
    StringBuilder output = new StringBuilder();
    // read the file
    while ((in.read(buf)) > 0) {
        output.append(new String(buf));
    }
    in.close(); // Must close for windows to delete
    assertEquals(expected, output.toString());

    // This doesn't get deleted in windows but the core test succeeds
    assertTrue("temp folder successfully deleted", FileUtil.rmr(f));
}

From source file:com.cloudera.sqoop.TestCompression.java

License:Apache License

public void runTextCompressionTest(CompressionCodec codec, int expectedNum) throws IOException {

    String[] columns = HsqldbTestServer.getFieldNames();
    String[] argv = getArgv(true, columns, codec, "--as-textfile");
    runImport(argv);//  w w w.  j  a  v  a 2  s. c o  m

    Configuration conf = new Configuration();
    if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
        conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
    }
    FileSystem fs = FileSystem.get(conf);

    if (codec == null) {
        codec = new GzipCodec();
    }
    ReflectionUtils.setConf(codec, getConf());
    Path p = new Path(getDataFilePath().toString() + codec.getDefaultExtension());
    InputStream is = codec.createInputStream(fs.open(p));
    BufferedReader r = new BufferedReader(new InputStreamReader(is));
    int numLines = 0;
    while (true) {
        String ln = r.readLine();
        if (ln == null) {
            break;
        }
        numLines++;
    }
    r.close();
    assertEquals(expectedNum, numLines);
}

From source file:com.ds.lzo.DeprecatedLzoLineRecordReaderForCombined.java

License:Open Source License

public DeprecatedLzoLineRecordReaderForCombined(Configuration conf, FileSplit split) throws IOException {
    LOG.warn("split start: " + split.getStart());
    LOG.warn("split length: " + split.getLength());
    String[] locs = split.getLocations();
    for (String loc : locs) {
        LOG.warn("location: " + loc);
    }//from w  w  w .  j  a  v a  2s.  c o m
    start = split.getStart();
    end = start + split.getLength();
    LOG.warn("split end: " + end);
    final Path file = split.getPath();
    LOG.warn("file: " + file.getName());
    LOG.warn("INT split start: " + (int) split.getStart());
    LOG.warn("INT split length: " + (int) split.getLength());
    LOG.warn("INT split end: " + (int) end);

    FileSystem fs = file.getFileSystem(conf);
    codecFactory = new CompressionCodecFactory(conf);
    final CompressionCodec codec = codecFactory.getCodec(file);
    LOG.warn("codec: " + codec.toString());
    LOG.warn("config: " + conf.toString());
    if (codec == null) {
        throw new IOException("No LZO codec found, cannot run.");
    }

    // Open the file and seek to the next split.
    fileIn = fs.open(file);
    // Create input stream and read the file header.
    in = new LineReader(codec.createInputStream(fileIn), conf);
    if (start != 0) {
        fileIn.seek(start);
        LOG.warn("fileIn position: " + fileIn.getPos());
        LOG.warn("buffer size: " + conf.get("io.file.buffer.size"));

        // Read and ignore the first line.
        in.readLine(new Text());
        start = fileIn.getPos();
    }

    pos = start;
}

From source file:com.facebook.presto.hadoop.TestHadoopNative.java

License:Apache License

private static byte[] decompress(CompressionCodec codec, byte[] input) throws IOException {
    ByteArrayOutputStream bytes = new ByteArrayOutputStream();
    try (InputStream in = codec.createInputStream(new ByteArrayInputStream(input))) {
        int b;/*  w w w.  jav  a2 s  .c o m*/
        while ((b = in.read()) != -1) {
            bytes.write(b);
        }
    }
    return bytes.toByteArray();
}