Example usage for org.apache.hadoop.io.compress CodecPool getDecompressor

List of usage examples for org.apache.hadoop.io.compress CodecPool getDecompressor

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress CodecPool getDecompressor.

Prototype

public static Decompressor getDecompressor(CompressionCodec codec) 

Source Link

Document

Get a Decompressor for the given CompressionCodec from the pool or a new one.

Usage

From source file:org.apache.hawq.pxf.plugins.hdfs.ChunkRecordReader.java

License:Apache License

/**
 * Constructs a ChunkRecordReader instance.
 *
 * @param job the job configuration//  w  ww.j a  va 2 s .  c om
 * @param split contains the file name, begin byte of the split and the
 *            bytes length
 * @throws IOException if an I/O error occurs when accessing the file or
 *             creating input stream to read from it
 */
public ChunkRecordReader(Configuration job, FileSplit split) throws IOException {
    maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
    validateLength(maxLineLength);
    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    job.setBoolean(DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, true);
    final FileSystem fs = file.getFileSystem(job);
    fs.setVerifyChecksum(false);
    fileIn = fs.open(file, ChunkReader.DEFAULT_BUFFER_SIZE);
    fileLength = getInputStream().getFileLength();
    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = new ChunkReader(cIn);
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn; // take pos from compressed stream
        } else {
            in = new ChunkReader(codec.createInputStream(fileIn, decompressor));
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        in = new ChunkReader(fileIn);
        filePosition = fileIn;
    }
    /*
     * If this is not the first split, we always throw away first record
     * because we always (except the last split) read one extra line in
     * next() method.
     */
    if (start != 0) {
        start += in.readLine(new ChunkWritable(), maxBytesToConsume(start));
    }
    this.pos = start;
}

From source file:org.apache.jena.grande.mapreduce.io.TripleRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) genericSplit;

    // RIOT configuration 
    profile = Utils.createParserProfile(context, split.getPath());

    //        inputByteCounter = ((MapContext)context).getCounter(FileInputFormat.Counter.BYTES_READ);
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
    start = split.getStart();/* w w  w. j a v  a2s  . c  o m*/
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);
    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        in = new LineReader(codec.createInputStream(fileIn, decompressor), job);
        filePosition = fileIn;
    } else {
        fileIn.seek(start);
        in = new LineReader(fileIn, job);
        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}

From source file:org.apache.tez.engine.common.shuffle.impl.Fetcher.java

License:Apache License

public Fetcher(Configuration job, TezTaskAttemptID reduceId, ShuffleScheduler scheduler, MergeManager merger,
        TezTaskReporter reporter, ShuffleClientMetrics metrics, ExceptionReporter exceptionReporter,
        SecretKey jobTokenSecret) {
    this.job = job;
    this.reporter = reporter;
    this.scheduler = scheduler;
    this.merger = merger;
    this.metrics = metrics;
    this.exceptionReporter = exceptionReporter;
    this.id = ++nextId;
    this.reduce = reduceId.getTaskID().getId();
    this.jobTokenSecret = jobTokenSecret;
    ioErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.IO_ERROR.toString());
    wrongLengthErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_LENGTH.toString());
    badIdErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.BAD_ID.toString());
    wrongMapErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_MAP.toString());
    connectionErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.CONNECTION.toString());
    wrongReduceErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_REDUCE.toString());

    if (ConfigUtils.isIntermediateInputCompressed(job)) {
        Class<? extends CompressionCodec> codecClass = ConfigUtils.getIntermediateInputCompressorClass(job,
                DefaultCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, job);
        decompressor = CodecPool.getDecompressor(codec);
    } else {/* ww  w  .j a va 2 s  . com*/
        codec = null;
        decompressor = null;
    }

    this.connectionTimeout = job.getInt(TezJobConfig.TEZ_ENGINE_SHUFFLE_CONNECT_TIMEOUT,
            TezJobConfig.DEFAULT_TEZ_ENGINE_SHUFFLE_STALLED_COPY_TIMEOUT);
    this.readTimeout = job.getInt(TezJobConfig.TEZ_ENGINE_SHUFFLE_READ_TIMEOUT,
            TezJobConfig.DEFAULT_TEZ_ENGINE_SHUFFLE_READ_TIMEOUT);

    setName("fetcher#" + id);
    setDaemon(true);

    synchronized (Fetcher.class) {
        sslShuffle = job.getBoolean(TezJobConfig.TEZ_ENGINE_SHUFFLE_ENABLE_SSL,
                TezJobConfig.DEFAULT_TEZ_ENGINE_SHUFFLE_ENABLE_SSL);
        if (sslShuffle && sslFactory == null) {
            sslFactory = new SSLFactory(SSLFactory.Mode.CLIENT, job);
            try {
                sslFactory.init();
            } catch (Exception ex) {
                sslFactory.destroy();
                throw new RuntimeException(ex);
            }
        }
    }
}

From source file:org.apache.tez.runtime.library.common.shuffle.impl.Fetcher.java

License:Apache License

public Fetcher(Configuration job, ShuffleScheduler scheduler, MergeManager merger, ShuffleClientMetrics metrics,
        Shuffle shuffle, SecretKey jobTokenSecret, boolean ifileReadAhead, int ifileReadAheadLength,
        CompressionCodec codec, TezInputContext inputContext) throws IOException {
    this.job = job;
    this.scheduler = scheduler;
    this.merger = merger;
    this.metrics = metrics;
    this.shuffle = shuffle;
    this.id = ++nextId;
    this.jobTokenSecret = jobTokenSecret;
    ioErrs = inputContext.getCounters().findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.IO_ERROR.toString());
    wrongLengthErrs = inputContext.getCounters().findCounter(SHUFFLE_ERR_GRP_NAME,
            ShuffleErrors.WRONG_LENGTH.toString());
    badIdErrs = inputContext.getCounters().findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.BAD_ID.toString());
    wrongMapErrs = inputContext.getCounters().findCounter(SHUFFLE_ERR_GRP_NAME,
            ShuffleErrors.WRONG_MAP.toString());
    connectionErrs = inputContext.getCounters().findCounter(SHUFFLE_ERR_GRP_NAME,
            ShuffleErrors.CONNECTION.toString());
    wrongReduceErrs = inputContext.getCounters().findCounter(SHUFFLE_ERR_GRP_NAME,
            ShuffleErrors.WRONG_REDUCE.toString());

    this.ifileReadAhead = ifileReadAhead;
    this.ifileReadAheadLength = ifileReadAheadLength;

    if (codec != null) {
        this.codec = codec;
        this.decompressor = CodecPool.getDecompressor(codec);
    } else {//from  ww w  .j av  a2  s .c  o m
        this.codec = null;
        this.decompressor = null;
    }

    this.connectionTimeout = job.getInt(TezJobConfig.TEZ_RUNTIME_SHUFFLE_CONNECT_TIMEOUT,
            TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_STALLED_COPY_TIMEOUT);
    this.readTimeout = job.getInt(TezJobConfig.TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT,
            TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT);

    setName("fetcher#" + id);
    setDaemon(true);

    synchronized (Fetcher.class) {
        sslShuffle = job.getBoolean(TezJobConfig.TEZ_RUNTIME_SHUFFLE_ENABLE_SSL,
                TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_ENABLE_SSL);
        if (sslShuffle && sslFactory == null) {
            sslFactory = new SSLFactory(SSLFactory.Mode.CLIENT, job);
            try {
                sslFactory.init();
            } catch (Exception ex) {
                sslFactory.destroy();
                throw new RuntimeException(ex);
            }
        }
    }
}

From source file:org.apache.tez.runtime.library.shuffle.common.ShuffleUtils.java

License:Apache License

@SuppressWarnings("resource")
public static void shuffleToMemory(MemoryFetchedInput fetchedInput, InputStream input, int decompressedLength,
        int compressedLength, CompressionCodec codec, boolean ifileReadAhead, int ifileReadAheadLength, Log LOG)
        throws IOException {
    IFileInputStream checksumIn = new IFileInputStream(input, compressedLength, ifileReadAhead,
            ifileReadAheadLength);//from  w  ww  .j  a va 2  s  .co m

    input = checksumIn;

    // Are map-outputs compressed?
    if (codec != null) {
        Decompressor decompressor = CodecPool.getDecompressor(codec);
        decompressor.reset();
        input = codec.createInputStream(input, decompressor);
    }
    // Copy map-output into an in-memory buffer
    byte[] shuffleData = fetchedInput.getBytes();

    try {
        IOUtils.readFully(input, shuffleData, 0, shuffleData.length);
        // metrics.inputBytes(shuffleData.length);
        LOG.info("Read " + shuffleData.length + " bytes from input for "
                + fetchedInput.getInputAttemptIdentifier());
    } catch (IOException ioe) {
        // Close the streams
        IOUtils.cleanup(LOG, input);
        // Re-throw
        throw ioe;
    }
}

From source file:org.hedera.util.SeekableInputStream.java

License:Apache License

public static SeekableInputStream getInstance(Path path, long start, long end, FileSystem fs,
        CompressionCodecFactory compressionCodecs) throws IOException {
    CompressionCodec codec = compressionCodecs.getCodec(path);
    FSDataInputStream din = fs.open(path);
    if (codec != null) {
        Decompressor decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            SplittableCompressionCodec scodec = (SplittableCompressionCodec) codec;
            SplitCompressionInputStream cin = scodec.createInputStream(din, decompressor, start, end,
                    SplittableCompressionCodec.READ_MODE.BYBLOCK);
            return new SeekableInputStream(cin);
        } else {//from w w  w .jav  a2 s.  com
            // non-splittable compression input stream
            // no seeking or offsetting is needed
            assert start == 0;
            CompressionInputStream cin = codec.createInputStream(din, decompressor);
            return new SeekableInputStream(cin, din);
        }
    } else {
        // non compression input stream
        // we seek to the start of the split
        din.seek(start);
        return new SeekableInputStream(din);
    }
}

From source file:org.mrgeo.data.accumulo.image.AccumuloMrsImageReader.java

License:Apache License

/**
 * Prepare the scanners that end up being used for getting items out of Accumulo
 *//*from w  w w .  j  a  v a2s.c o  m*/
private void initializeScanners() {

    if (AMTR_props != null) {

        String authsStr = AMTR_props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_AUTHS);
        this.auths = AccumuloUtils.createAuthorizationsFromDelimitedString(authsStr);

        if (AMTR_props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_COMPRESS) != null) {
            //String tmp = AMTR_props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_COMPRESS);
            useCompression = Boolean
                    .parseBoolean(AMTR_props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_COMPRESS));
        }

    }

    try {

        if (useCompression) {
            codec = HadoopUtils.getCodec(HadoopUtils.createConfiguration());
            decompressor = CodecPool.getDecompressor(codec);
        } else {
            codec = null;
            decompressor = null;
        }

        // see if we are in a test state
        if (mock) {

            // in test mode - use a mock connector
            final MockInstance mi = new MockInstance(this.instance);
            connector = mi.getConnector(this.user, this.pass.getBytes());
            connector.tableOperations().create(this.table);

        } else if (this.instance != null) {

            // get a real connector
            connector = AccumuloConnector.getConnector(this.instance, this.zooServers, this.user, this.pass);
            if (useCompression) {
                codec = HadoopUtils.getCodec(HadoopUtils.createConfiguration());
                decompressor = CodecPool.getDecompressor(codec);
            } else {
                codec = null;
                decompressor = null;
            }

        } else {

            // we did not get the information needed from the properties objects - so use the configs from the install

            connector = AccumuloConnector.getConnector();

            // TODO: compression items need to be worked out
            codec = null;
            decompressor = null;

        }

        // establish the scanners
        scanner = connector.createScanner(this.table, this.auths);
        batchScanner = connector.createBatchScanner(this.table, this.auths, numQueryThreads);

    } catch (final TableNotFoundException | TableExistsException | AccumuloException | AccumuloSecurityException
            | IOException e) {
        throw new MrsImageException(e);
    }

}

From source file:org.mrgeo.data.accumulo.tile.AccumuloMrsTileReader.java

License:Apache License

/**
 * Prepare the scanners that end up being used for getting items out of Accumulo
 *//*from www .ja va  2  s.  c  om*/
private void initializeScanners() {

    if (AMTR_props != null) {

        if (AMTR_props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_AUTHS) != null) {
            this.auths = new Authorizations(
                    AMTR_props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_AUTHS).split(","));
        }

        if (AMTR_props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_COMPRESS) != null) {
            String tmp = AMTR_props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_COMPRESS);
            useCompression = Boolean
                    .parseBoolean(AMTR_props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_COMPRESS));
        }

    }

    try {

        if (useCompression) {
            codec = HadoopUtils.getCodec(HadoopUtils.createConfiguration());
            decompressor = CodecPool.getDecompressor(codec);
        } else {
            codec = null;
            decompressor = null;
        }

        // see if we are in a test state
        if (mock) {

            // in test mode - use a mock connector
            final MockInstance mi = new MockInstance(this.instance);
            connector = mi.getConnector(this.user, this.pass.getBytes());
            connector.tableOperations().create(this.table);

        } else if (this.instance != null) {

            // get a real connector
            connector = AccumuloConnector.getConnector(this.instance, this.zooServers, this.user, this.pass);
            if (useCompression) {
                codec = HadoopUtils.getCodec(HadoopUtils.createConfiguration());
                decompressor = CodecPool.getDecompressor(codec);
            } else {
                codec = null;
                decompressor = null;
            }

        } else {

            // we did not get the information needed from the properties objects - so use the configs from the install

            connector = AccumuloConnector.getConnector();

            // TODO: compression items need to be worked out
            codec = null;
            decompressor = null;

        }

        // establish the scanners
        scanner = connector.createScanner(this.table, this.auths);
        batchScanner = connector.createBatchScanner(this.table, this.auths, numQueryThreads);

        if (!mock) {

            // I AM MOCKING YOU!!!

            //metadata = loadGenericMetadata();

        }
    } catch (final TableNotFoundException e) {
        throw new MrsImageException(e);
    } catch (final IOException e) {
        throw new MrsImageException(e);
    } catch (final AccumuloSecurityException e) {
        throw new MrsImageException(e);
    } catch (final AccumuloException e) {
        throw new MrsImageException(e);
    } catch (final TableExistsException e) {
        throw new MrsImageException(e);
    }

}

From source file:org.springframework.data.hadoop.store.AbstractStorage.java

License:Apache License

protected synchronized StreamsHolder<InputStream> getInput(Path inputPath) throws IOException {
    if (inputHolder == null) {
        log.info("Creating new InputStream");
        inputHolder = new StreamsHolder<InputStream>();
        final FileSystem fs = basePath.getFileSystem(configuration);
        // TODO: hadoop2 isUriPathAbsolute() ?
        Path p = inputPath.isAbsolute() ? inputPath : new Path(getPath(), inputPath);
        if (!isCompressed()) {
            InputStream input = fs.open(p);
            inputHolder.setStream(input);
        } else {//from w  w  w.j  a va2 s. co  m
            Class<?> clazz = ClassUtils.resolveClassName(codecInfo.getCodecClass(),
                    getClass().getClassLoader());
            CompressionCodec compressionCodec = (CompressionCodec) ReflectionUtils.newInstance(clazz,
                    getConfiguration());
            Decompressor decompressor = CodecPool.getDecompressor(compressionCodec);
            FSDataInputStream winput = fs.open(p);
            InputStream input = compressionCodec.createInputStream(winput, decompressor);
            inputHolder.setWrappedStream(winput);
            inputHolder.setStream(input);
        }
    }
    return inputHolder;
}

From source file:org.springframework.data.hadoop.store.AbstractStorage.java

License:Apache License

/**
 * Gets the input stream for input split.
 * /*from w  w w. ja  v  a2s . c  o m*/
 * @param split the split
 * @return the input stream
 * @throws IOException Signals that an I/O exception has occurred.
 */
protected synchronized StreamsHolder<InputStream> getInput(InputSplit split) throws IOException {
    StreamsHolder<InputStream> holder = splitInputHolders.get(split);
    if (holder == null) {
        log.info("Creating new InputStream for split");
        holder = new StreamsHolder<InputStream>();
        final FileSystem fs = basePath.getFileSystem(configuration);
        if (!isCompressed()) {
            FSDataInputStream input = fs.open(split.getPath());
            input.seek(split.getStart());
            holder.setStream(input);
        } else {
            Class<?> clazz = ClassUtils.resolveClassName(codecInfo.getCodecClass(),
                    getClass().getClassLoader());

            if (!ClassUtils.isAssignable(SplittableCompressionCodec.class, clazz)) {
                throw new StorageException("Not a SplittableCompressionCodec");
            }

            FSDataInputStream winput = fs.open(split.getPath());

            CompressionCodec compressionCodec = (CompressionCodec) ReflectionUtils.newInstance(clazz,
                    getConfiguration());
            Decompressor decompressor = CodecPool.getDecompressor(compressionCodec);

            long start = split.getStart();
            long end = start + split.getLength();
            log.info("SplitCompressionInputStream start=" + start + " end=" + end);
            SplitCompressionInputStream input = ((SplittableCompressionCodec) compressionCodec)
                    .createInputStream(winput, decompressor, start, end,
                            SplittableCompressionCodec.READ_MODE.BYBLOCK);

            holder.setWrappedStream(winput);
            holder.setStream(input);
        }
        splitInputHolders.put(split, holder);
    }
    return holder;
}