Example usage for org.apache.hadoop.io.compress CodecPool getDecompressor

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress CodecPool getDecompressor.

Prototype

public static Decompressor getDecompressor(CompressionCodec codec)

Source Link

Document

Get a Decompressor for the given CompressionCodec from the pool or a new one.

Usage

From source file:org.apache.hawq.pxf.plugins.hdfs.ChunkRecordReader.java

License:Apache License

/**
 * Constructs a ChunkRecordReader instance.
 *
 * @param job the job configuration//  w  ww.j a  va 2 s .  c om
 * @param split contains the file name, begin byte of the split and the
 *            bytes length
 * @throws IOException if an I/O error occurs when accessing the file or
 *             creating input stream to read from it
 */
public ChunkRecordReader(Configuration job, FileSplit split) throws IOException {
    maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
    validateLength(maxLineLength);
    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    job.setBoolean(DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, true);
    final FileSystem fs = file.getFileSystem(job);
    fs.setVerifyChecksum(false);
    fileIn = fs.open(file, ChunkReader.DEFAULT_BUFFER_SIZE);
    fileLength = getInputStream().getFileLength();
    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = new ChunkReader(cIn);
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn; // take pos from compressed stream
        } else {
            in = new ChunkReader(codec.createInputStream(fileIn, decompressor));
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        in = new ChunkReader(fileIn);
        filePosition = fileIn;
    }
    /*
     * If this is not the first split, we always throw away first record
     * because we always (except the last split) read one extra line in
     * next() method.
     */
    if (start != 0) {
        start += in.readLine(new ChunkWritable(), maxBytesToConsume(start));
    }
    this.pos = start;
}

From source file:org.apache.jena.grande.mapreduce.io.TripleRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) genericSplit;

    // RIOT configuration 
    profile = Utils.createParserProfile(context, split.getPath());

    //        inputByteCounter = ((MapContext)context).getCounter(FileInputFormat.Counter.BYTES_READ);
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
    start = split.getStart();/* w w  w. j a v  a2s  . c  o m*/
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);
    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        in = new LineReader(codec.createInputStream(fileIn, decompressor), job);
        filePosition = fileIn;
    } else {
        fileIn.seek(start);
        in = new LineReader(fileIn, job);
        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}

From source file:org.apache.tez.engine.common.shuffle.impl.Fetcher.java

License:Apache License

public Fetcher(Configuration job, TezTaskAttemptID reduceId, ShuffleScheduler scheduler, MergeManager merger,
        TezTaskReporter reporter, ShuffleClientMetrics metrics, ExceptionReporter exceptionReporter,
        SecretKey jobTokenSecret) {
    this.job = job;
    this.reporter = reporter;
    this.scheduler = scheduler;
    this.merger = merger;
    this.metrics = metrics;
    this.exceptionReporter = exceptionReporter;
    this.id = ++nextId;
    this.reduce = reduceId.getTaskID().getId();
    this.jobTokenSecret = jobTokenSecret;
    ioErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.IO_ERROR.toString());
    wrongLengthErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_LENGTH.toString());
    badIdErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.BAD_ID.toString());
    wrongMapErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_MAP.toString());
    connectionErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.CONNECTION.toString());
    wrongReduceErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_REDUCE.toString());

    if (ConfigUtils.isIntermediateInputCompressed(job)) {
        Class<? extends CompressionCodec> codecClass = ConfigUtils.getIntermediateInputCompressorClass(job,
                DefaultCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, job);
        decompressor = CodecPool.getDecompressor(codec);
    } else {/* ww  w  .j a va 2 s  . com*/
        codec = null;
        decompressor = null;
    }

    this.connectionTimeout = job.getInt(TezJobConfig.TEZ_ENGINE_SHUFFLE_CONNECT_TIMEOUT,
            TezJobConfig.DEFAULT_TEZ_ENGINE_SHUFFLE_STALLED_COPY_TIMEOUT);
    this.readTimeout = job.getInt(TezJobConfig.TEZ_ENGINE_SHUFFLE_READ_TIMEOUT,
            TezJobConfig.DEFAULT_TEZ_ENGINE_SHUFFLE_READ_TIMEOUT);

    setName("fetcher#" + id);
    setDaemon(true);

    synchronized (Fetcher.class) {
        sslShuffle = job.getBoolean(TezJobConfig.TEZ_ENGINE_SHUFFLE_ENABLE_SSL,
                TezJobConfig.DEFAULT_TEZ_ENGINE_SHUFFLE_ENABLE_SSL);
        if (sslShuffle && sslFactory == null) {
            sslFactory = new SSLFactory(SSLFactory.Mode.CLIENT, job);
            try {
                sslFactory.init();
            } catch (Exception ex) {
                sslFactory.destroy();
                throw new RuntimeException(ex);
            }
        }
    }
}

From source file:org.apache.tez.runtime.library.common.shuffle.impl.Fetcher.java

License:Apache License

public Fetcher(Configuration job, ShuffleScheduler scheduler, MergeManager merger, ShuffleClientMetrics metrics,
        Shuffle shuffle, SecretKey jobTokenSecret, boolean ifileReadAhead, int ifileReadAheadLength,
        CompressionCodec codec, TezInputContext inputContext) throws IOException {
    this.job = job;
    this.scheduler = scheduler;
    this.merger = merger;
    this.metrics = metrics;
    this.shuffle = shuffle;
    this.id = ++nextId;
    this.jobTokenSecret = jobTokenSecret;
    ioErrs = inputContext.getCounters().findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.IO_ERROR.toString());
    wrongLengthErrs = inputContext.getCounters().findCounter(SHUFFLE_ERR_GRP_NAME,
            ShuffleErrors.WRONG_LENGTH.toString());
    badIdErrs = inputContext.getCounters().findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.BAD_ID.toString());
    wrongMapErrs = inputContext.getCounters().findCounter(SHUFFLE_ERR_GRP_NAME,
            ShuffleErrors.WRONG_MAP.toString());
    connectionErrs = inputContext.getCounters().findCounter(SHUFFLE_ERR_GRP_NAME,
            ShuffleErrors.CONNECTION.toString());
    wrongReduceErrs = inputContext.getCounters().findCounter(SHUFFLE_ERR_GRP_NAME,
            ShuffleErrors.WRONG_REDUCE.toString());

    this.ifileReadAhead = ifileReadAhead;
    this.ifileReadAheadLength = ifileReadAheadLength;

    if (codec != null) {
        this.codec = codec;
        this.decompressor = CodecPool.getDecompressor(codec);
    } else {//from  ww w  .j av  a2  s .c  o m
        this.codec = null;
        this.decompressor = null;
    }

    this.connectionTimeout = job.getInt(TezJobConfig.TEZ_RUNTIME_SHUFFLE_CONNECT_TIMEOUT,
            TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_STALLED_COPY_TIMEOUT);
    this.readTimeout = job.getInt(TezJobConfig.TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT,
            TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT);

    setName("fetcher#" + id);
    setDaemon(true);

    synchronized (Fetcher.class) {
        sslShuffle = job.getBoolean(TezJobConfig.TEZ_RUNTIME_SHUFFLE_ENABLE_SSL,
                TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_ENABLE_SSL);
        if (sslShuffle && sslFactory == null) {
            sslFactory = new SSLFactory(SSLFactory.Mode.CLIENT, job);
            try {
                sslFactory.init();
            } catch (Exception ex) {
                sslFactory.destroy();
                throw new RuntimeException(ex);
            }
        }
    }
}

From source file:org.apache.tez.runtime.library.shuffle.common.ShuffleUtils.java

License:Apache License

@SuppressWarnings("resource")
public static void shuffleToMemory(MemoryFetchedInput fetchedInput, InputStream input, int decompressedLength,
        int compressedLength, CompressionCodec codec, boolean ifileReadAhead, int ifileReadAheadLength, Log LOG)
        throws IOException {
    IFileInputStream checksumIn = new IFileInputStream(input, compressedLength, ifileReadAhead,
            ifileReadAheadLength);//from  w  ww  .j  a va 2  s  .co m

    input = checksumIn;

    // Are map-outputs compressed?
    if (codec != null) {
        Decompressor decompressor = CodecPool.getDecompressor(codec);
        decompressor.reset();
        input = codec.createInputStream(input, decompressor);
    }
    // Copy map-output into an in-memory buffer
    byte[] shuffleData = fetchedInput.getBytes();

    try {
        IOUtils.readFully(input, shuffleData, 0, shuffleData.length);
        // metrics.inputBytes(shuffleData.length);
        LOG.info("Read " + shuffleData.length + " bytes from input for "
                + fetchedInput.getInputAttemptIdentifier());
    } catch (IOException ioe) {
        // Close the streams
        IOUtils.cleanup(LOG, input);
        // Re-throw
        throw ioe;
    }
}

From source file:org.hedera.util.SeekableInputStream.java

License:Apache License

public static SeekableInputStream getInstance(Path path, long start, long end, FileSystem fs,
        CompressionCodecFactory compressionCodecs) throws IOException {
    CompressionCodec codec = compressionCodecs.getCodec(path);
    FSDataInputStream din = fs.open(path);
    if (codec != null) {
        Decompressor decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            SplittableCompressionCodec scodec = (SplittableCompressionCodec) codec;
            SplitCompressionInputStream cin = scodec.createInputStream(din, decompressor, start, end,
                    SplittableCompressionCodec.READ_MODE.BYBLOCK);
            return new SeekableInputStream(cin);
        } else {//from w w  w .jav  a2 s.  com
            // non-splittable compression input stream
            // no seeking or offsetting is needed
            assert start == 0;
            CompressionInputStream cin = codec.createInputStream(din, decompressor);
            return new SeekableInputStream(cin, din);
        }
    } else {
        // non compression input stream
        // we seek to the start of the split
        din.seek(start);
        return new SeekableInputStream(din);
    }
}

From source file:org.mrgeo.data.accumulo.image.AccumuloMrsImageReader.java

License:Apache License

/**
 * Prepare the scanners that end up being used for getting items out of Accumulo
 *//*from w  w w .  j  a  v a2s.c o  m*/
private void initializeScanners() {

    if (AMTR_props != null) {

        String authsStr = AMTR_props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_AUTHS);
        this.auths = AccumuloUtils.createAuthorizationsFromDelimitedString(authsStr);

        if (AMTR_props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_COMPRESS) != null) {
            //String tmp = AMTR_props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_COMPRESS);
            useCompression = Boolean
                    .parseBoolean(AMTR_props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_COMPRESS));
        }

    }

    try {

        if (useCompression) {
            codec = HadoopUtils.getCodec(HadoopUtils.createConfiguration());
            decompressor = CodecPool.getDecompressor(codec);
        } else {
            codec = null;
            decompressor = null;
        }

        // see if we are in a test state
        if (mock) {

            // in test mode - use a mock connector
            final MockInstance mi = new MockInstance(this.instance);
            connector = mi.getConnector(this.user, this.pass.getBytes());
            connector.tableOperations().create(this.table);

        } else if (this.instance != null) {

            // get a real connector
            connector = AccumuloConnector.getConnector(this.instance, this.zooServers, this.user, this.pass);
            if (useCompression) {
                codec = HadoopUtils.getCodec(HadoopUtils.createConfiguration());
                decompressor = CodecPool.getDecompressor(codec);
            } else {
                codec = null;
                decompressor = null;
            }

        } else {

            // we did not get the information needed from the properties objects - so use the configs from the install

            connector = AccumuloConnector.getConnector();

            // TODO: compression items need to be worked out
            codec = null;
            decompressor = null;

        }

        // establish the scanners
        scanner = connector.createScanner(this.table, this.auths);
        batchScanner = connector.createBatchScanner(this.table, this.auths, numQueryThreads);

    } catch (final TableNotFoundException | TableExistsException | AccumuloException | AccumuloSecurityException
            | IOException e) {
        throw new MrsImageException(e);
    }

}

From source file:org.mrgeo.data.accumulo.tile.AccumuloMrsTileReader.java

License:Apache License

/**
 * Prepare the scanners that end up being used for getting items out of Accumulo
 *//*from www .ja va  2  s.  c  om*/
private void initializeScanners() {

    if (AMTR_props != null) {

        if (AMTR_props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_AUTHS) != null) {
            this.auths = new Authorizations(
                    AMTR_props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_AUTHS).split(","));
        }

        if (AMTR_props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_COMPRESS) != null) {
            String tmp = AMTR_props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_COMPRESS);
            useCompression = Boolean
                    .parseBoolean(AMTR_props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_COMPRESS));
        }

    }

    try {

        if (useCompression) {
            codec = HadoopUtils.getCodec(HadoopUtils.createConfiguration());
            decompressor = CodecPool.getDecompressor(codec);
        } else {
            codec = null;
            decompressor = null;
        }

        // see if we are in a test state
        if (mock) {

            // in test mode - use a mock connector
            final MockInstance mi = new MockInstance(this.instance);
            connector = mi.getConnector(this.user, this.pass.getBytes());
            connector.tableOperations().create(this.table);

        } else if (this.instance != null) {

            // get a real connector
            connector = AccumuloConnector.getConnector(this.instance, this.zooServers, this.user, this.pass);
            if (useCompression) {
                codec = HadoopUtils.getCodec(HadoopUtils.createConfiguration());
                decompressor = CodecPool.getDecompressor(codec);
            } else {
                codec = null;
                decompressor = null;
            }

        } else {

            // we did not get the information needed from the properties objects - so use the configs from the install

            connector = AccumuloConnector.getConnector();

            // TODO: compression items need to be worked out
            codec = null;
            decompressor = null;

        }

        // establish the scanners
        scanner = connector.createScanner(this.table, this.auths);
        batchScanner = connector.createBatchScanner(this.table, this.auths, numQueryThreads);

        if (!mock) {

            // I AM MOCKING YOU!!!

            //metadata = loadGenericMetadata();

        }
    } catch (final TableNotFoundException e) {
        throw new MrsImageException(e);
    } catch (final IOException e) {
        throw new MrsImageException(e);
    } catch (final AccumuloSecurityException e) {
        throw new MrsImageException(e);
    } catch (final AccumuloException e) {
        throw new MrsImageException(e);
    } catch (final TableExistsException e) {
        throw new MrsImageException(e);
    }

}

From source file:org.springframework.data.hadoop.store.AbstractStorage.java

License:Apache License

protected synchronized StreamsHolder<InputStream> getInput(Path inputPath) throws IOException {
    if (inputHolder == null) {
        log.info("Creating new InputStream");
        inputHolder = new StreamsHolder<InputStream>();
        final FileSystem fs = basePath.getFileSystem(configuration);
        // TODO: hadoop2 isUriPathAbsolute() ?
        Path p = inputPath.isAbsolute() ? inputPath : new Path(getPath(), inputPath);
        if (!isCompressed()) {
            InputStream input = fs.open(p);
            inputHolder.setStream(input);
        } else {//from w  w  w.j  a va2 s. co  m
            Class<?> clazz = ClassUtils.resolveClassName(codecInfo.getCodecClass(),
                    getClass().getClassLoader());
            CompressionCodec compressionCodec = (CompressionCodec) ReflectionUtils.newInstance(clazz,
                    getConfiguration());
            Decompressor decompressor = CodecPool.getDecompressor(compressionCodec);
            FSDataInputStream winput = fs.open(p);
            InputStream input = compressionCodec.createInputStream(winput, decompressor);
            inputHolder.setWrappedStream(winput);
            inputHolder.setStream(input);
        }
    }
    return inputHolder;
}

From source file:org.springframework.data.hadoop.store.AbstractStorage.java

License:Apache License

/**
 * Gets the input stream for input split.
 * /*from w  w w. ja  v  a2s . c  o m*/
 * @param split the split
 * @return the input stream
 * @throws IOException Signals that an I/O exception has occurred.
 */
protected synchronized StreamsHolder<InputStream> getInput(InputSplit split) throws IOException {
    StreamsHolder<InputStream> holder = splitInputHolders.get(split);
    if (holder == null) {
        log.info("Creating new InputStream for split");
        holder = new StreamsHolder<InputStream>();
        final FileSystem fs = basePath.getFileSystem(configuration);
        if (!isCompressed()) {
            FSDataInputStream input = fs.open(split.getPath());
            input.seek(split.getStart());
            holder.setStream(input);
        } else {
            Class<?> clazz = ClassUtils.resolveClassName(codecInfo.getCodecClass(),
                    getClass().getClassLoader());

            if (!ClassUtils.isAssignable(SplittableCompressionCodec.class, clazz)) {
                throw new StorageException("Not a SplittableCompressionCodec");
            }

            FSDataInputStream winput = fs.open(split.getPath());

            CompressionCodec compressionCodec = (CompressionCodec) ReflectionUtils.newInstance(clazz,
                    getConfiguration());
            Decompressor decompressor = CodecPool.getDecompressor(compressionCodec);

            long start = split.getStart();
            long end = start + split.getLength();
            log.info("SplitCompressionInputStream start=" + start + " end=" + end);
            SplitCompressionInputStream input = ((SplittableCompressionCodec) compressionCodec)
                    .createInputStream(winput, decompressor, start, end,
                            SplittableCompressionCodec.READ_MODE.BYBLOCK);

            holder.setWrappedStream(winput);
            holder.setStream(input);
        }
        splitInputHolders.put(split, holder);
    }
    return holder;
}