List of usage examples for org.apache.hadoop.io.compress CodecPool getDecompressor
public static Decompressor getDecompressor(CompressionCodec codec)
From source file:org.apache.hawq.pxf.plugins.hdfs.ChunkRecordReader.java
License:Apache License
/** * Constructs a ChunkRecordReader instance. * * @param job the job configuration// w ww.j a va 2 s . c om * @param split contains the file name, begin byte of the split and the * bytes length * @throws IOException if an I/O error occurs when accessing the file or * creating input stream to read from it */ public ChunkRecordReader(Configuration job, FileSplit split) throws IOException { maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE); validateLength(maxLineLength); start = split.getStart(); end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split job.setBoolean(DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, true); final FileSystem fs = file.getFileSystem(job); fs.setVerifyChecksum(false); fileIn = fs.open(file, ChunkReader.DEFAULT_BUFFER_SIZE); fileLength = getInputStream().getFileLength(); if (isCompressedInput()) { decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream( fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK); in = new ChunkReader(cIn); start = cIn.getAdjustedStart(); end = cIn.getAdjustedEnd(); filePosition = cIn; // take pos from compressed stream } else { in = new ChunkReader(codec.createInputStream(fileIn, decompressor)); filePosition = fileIn; } } else { fileIn.seek(start); in = new ChunkReader(fileIn); filePosition = fileIn; } /* * If this is not the first split, we always throw away first record * because we always (except the last split) read one extra line in * next() method. */ if (start != 0) { start += in.readLine(new ChunkWritable(), maxBytesToConsume(start)); } this.pos = start; }
From source file:org.apache.jena.grande.mapreduce.io.TripleRecordReader.java
License:Apache License
@Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) genericSplit; // RIOT configuration profile = Utils.createParserProfile(context, split.getPath()); // inputByteCounter = ((MapContext)context).getCounter(FileInputFormat.Counter.BYTES_READ); Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE); start = split.getStart();/* w w w. j a v a2s . c o m*/ end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split final FileSystem fs = file.getFileSystem(job); fileIn = fs.open(file); if (isCompressedInput()) { decompressor = CodecPool.getDecompressor(codec); in = new LineReader(codec.createInputStream(fileIn, decompressor), job); filePosition = fileIn; } else { fileIn.seek(start); in = new LineReader(fileIn, job); filePosition = fileIn; } // If this is not the first split, we always throw away first record // because we always (except the last split) read one extra line in // next() method. if (start != 0) { start += in.readLine(new Text(), 0, maxBytesToConsume(start)); } this.pos = start; }
From source file:org.apache.tez.engine.common.shuffle.impl.Fetcher.java
License:Apache License
public Fetcher(Configuration job, TezTaskAttemptID reduceId, ShuffleScheduler scheduler, MergeManager merger, TezTaskReporter reporter, ShuffleClientMetrics metrics, ExceptionReporter exceptionReporter, SecretKey jobTokenSecret) { this.job = job; this.reporter = reporter; this.scheduler = scheduler; this.merger = merger; this.metrics = metrics; this.exceptionReporter = exceptionReporter; this.id = ++nextId; this.reduce = reduceId.getTaskID().getId(); this.jobTokenSecret = jobTokenSecret; ioErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.IO_ERROR.toString()); wrongLengthErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_LENGTH.toString()); badIdErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.BAD_ID.toString()); wrongMapErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_MAP.toString()); connectionErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.CONNECTION.toString()); wrongReduceErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_REDUCE.toString()); if (ConfigUtils.isIntermediateInputCompressed(job)) { Class<? extends CompressionCodec> codecClass = ConfigUtils.getIntermediateInputCompressorClass(job, DefaultCodec.class); codec = ReflectionUtils.newInstance(codecClass, job); decompressor = CodecPool.getDecompressor(codec); } else {/* ww w .j a va 2 s . com*/ codec = null; decompressor = null; } this.connectionTimeout = job.getInt(TezJobConfig.TEZ_ENGINE_SHUFFLE_CONNECT_TIMEOUT, TezJobConfig.DEFAULT_TEZ_ENGINE_SHUFFLE_STALLED_COPY_TIMEOUT); this.readTimeout = job.getInt(TezJobConfig.TEZ_ENGINE_SHUFFLE_READ_TIMEOUT, TezJobConfig.DEFAULT_TEZ_ENGINE_SHUFFLE_READ_TIMEOUT); setName("fetcher#" + id); setDaemon(true); synchronized (Fetcher.class) { sslShuffle = job.getBoolean(TezJobConfig.TEZ_ENGINE_SHUFFLE_ENABLE_SSL, TezJobConfig.DEFAULT_TEZ_ENGINE_SHUFFLE_ENABLE_SSL); if (sslShuffle && sslFactory == null) { sslFactory = new SSLFactory(SSLFactory.Mode.CLIENT, job); try { sslFactory.init(); } catch (Exception ex) { sslFactory.destroy(); throw new RuntimeException(ex); } } } }
From source file:org.apache.tez.runtime.library.common.shuffle.impl.Fetcher.java
License:Apache License
public Fetcher(Configuration job, ShuffleScheduler scheduler, MergeManager merger, ShuffleClientMetrics metrics, Shuffle shuffle, SecretKey jobTokenSecret, boolean ifileReadAhead, int ifileReadAheadLength, CompressionCodec codec, TezInputContext inputContext) throws IOException { this.job = job; this.scheduler = scheduler; this.merger = merger; this.metrics = metrics; this.shuffle = shuffle; this.id = ++nextId; this.jobTokenSecret = jobTokenSecret; ioErrs = inputContext.getCounters().findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.IO_ERROR.toString()); wrongLengthErrs = inputContext.getCounters().findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_LENGTH.toString()); badIdErrs = inputContext.getCounters().findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.BAD_ID.toString()); wrongMapErrs = inputContext.getCounters().findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_MAP.toString()); connectionErrs = inputContext.getCounters().findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.CONNECTION.toString()); wrongReduceErrs = inputContext.getCounters().findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_REDUCE.toString()); this.ifileReadAhead = ifileReadAhead; this.ifileReadAheadLength = ifileReadAheadLength; if (codec != null) { this.codec = codec; this.decompressor = CodecPool.getDecompressor(codec); } else {//from ww w .j av a2 s .c o m this.codec = null; this.decompressor = null; } this.connectionTimeout = job.getInt(TezJobConfig.TEZ_RUNTIME_SHUFFLE_CONNECT_TIMEOUT, TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_STALLED_COPY_TIMEOUT); this.readTimeout = job.getInt(TezJobConfig.TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT, TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT); setName("fetcher#" + id); setDaemon(true); synchronized (Fetcher.class) { sslShuffle = job.getBoolean(TezJobConfig.TEZ_RUNTIME_SHUFFLE_ENABLE_SSL, TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_ENABLE_SSL); if (sslShuffle && sslFactory == null) { sslFactory = new SSLFactory(SSLFactory.Mode.CLIENT, job); try { sslFactory.init(); } catch (Exception ex) { sslFactory.destroy(); throw new RuntimeException(ex); } } } }
From source file:org.apache.tez.runtime.library.shuffle.common.ShuffleUtils.java
License:Apache License
@SuppressWarnings("resource") public static void shuffleToMemory(MemoryFetchedInput fetchedInput, InputStream input, int decompressedLength, int compressedLength, CompressionCodec codec, boolean ifileReadAhead, int ifileReadAheadLength, Log LOG) throws IOException { IFileInputStream checksumIn = new IFileInputStream(input, compressedLength, ifileReadAhead, ifileReadAheadLength);//from w ww .j a va 2 s .co m input = checksumIn; // Are map-outputs compressed? if (codec != null) { Decompressor decompressor = CodecPool.getDecompressor(codec); decompressor.reset(); input = codec.createInputStream(input, decompressor); } // Copy map-output into an in-memory buffer byte[] shuffleData = fetchedInput.getBytes(); try { IOUtils.readFully(input, shuffleData, 0, shuffleData.length); // metrics.inputBytes(shuffleData.length); LOG.info("Read " + shuffleData.length + " bytes from input for " + fetchedInput.getInputAttemptIdentifier()); } catch (IOException ioe) { // Close the streams IOUtils.cleanup(LOG, input); // Re-throw throw ioe; } }
From source file:org.hedera.util.SeekableInputStream.java
License:Apache License
public static SeekableInputStream getInstance(Path path, long start, long end, FileSystem fs, CompressionCodecFactory compressionCodecs) throws IOException { CompressionCodec codec = compressionCodecs.getCodec(path); FSDataInputStream din = fs.open(path); if (codec != null) { Decompressor decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { SplittableCompressionCodec scodec = (SplittableCompressionCodec) codec; SplitCompressionInputStream cin = scodec.createInputStream(din, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK); return new SeekableInputStream(cin); } else {//from w w w .jav a2 s. com // non-splittable compression input stream // no seeking or offsetting is needed assert start == 0; CompressionInputStream cin = codec.createInputStream(din, decompressor); return new SeekableInputStream(cin, din); } } else { // non compression input stream // we seek to the start of the split din.seek(start); return new SeekableInputStream(din); } }
From source file:org.mrgeo.data.accumulo.image.AccumuloMrsImageReader.java
License:Apache License
/** * Prepare the scanners that end up being used for getting items out of Accumulo *//*from w w w . j a v a2s.c o m*/ private void initializeScanners() { if (AMTR_props != null) { String authsStr = AMTR_props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_AUTHS); this.auths = AccumuloUtils.createAuthorizationsFromDelimitedString(authsStr); if (AMTR_props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_COMPRESS) != null) { //String tmp = AMTR_props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_COMPRESS); useCompression = Boolean .parseBoolean(AMTR_props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_COMPRESS)); } } try { if (useCompression) { codec = HadoopUtils.getCodec(HadoopUtils.createConfiguration()); decompressor = CodecPool.getDecompressor(codec); } else { codec = null; decompressor = null; } // see if we are in a test state if (mock) { // in test mode - use a mock connector final MockInstance mi = new MockInstance(this.instance); connector = mi.getConnector(this.user, this.pass.getBytes()); connector.tableOperations().create(this.table); } else if (this.instance != null) { // get a real connector connector = AccumuloConnector.getConnector(this.instance, this.zooServers, this.user, this.pass); if (useCompression) { codec = HadoopUtils.getCodec(HadoopUtils.createConfiguration()); decompressor = CodecPool.getDecompressor(codec); } else { codec = null; decompressor = null; } } else { // we did not get the information needed from the properties objects - so use the configs from the install connector = AccumuloConnector.getConnector(); // TODO: compression items need to be worked out codec = null; decompressor = null; } // establish the scanners scanner = connector.createScanner(this.table, this.auths); batchScanner = connector.createBatchScanner(this.table, this.auths, numQueryThreads); } catch (final TableNotFoundException | TableExistsException | AccumuloException | AccumuloSecurityException | IOException e) { throw new MrsImageException(e); } }
From source file:org.mrgeo.data.accumulo.tile.AccumuloMrsTileReader.java
License:Apache License
/** * Prepare the scanners that end up being used for getting items out of Accumulo *//*from www .ja va 2 s. c om*/ private void initializeScanners() { if (AMTR_props != null) { if (AMTR_props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_AUTHS) != null) { this.auths = new Authorizations( AMTR_props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_AUTHS).split(",")); } if (AMTR_props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_COMPRESS) != null) { String tmp = AMTR_props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_COMPRESS); useCompression = Boolean .parseBoolean(AMTR_props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_COMPRESS)); } } try { if (useCompression) { codec = HadoopUtils.getCodec(HadoopUtils.createConfiguration()); decompressor = CodecPool.getDecompressor(codec); } else { codec = null; decompressor = null; } // see if we are in a test state if (mock) { // in test mode - use a mock connector final MockInstance mi = new MockInstance(this.instance); connector = mi.getConnector(this.user, this.pass.getBytes()); connector.tableOperations().create(this.table); } else if (this.instance != null) { // get a real connector connector = AccumuloConnector.getConnector(this.instance, this.zooServers, this.user, this.pass); if (useCompression) { codec = HadoopUtils.getCodec(HadoopUtils.createConfiguration()); decompressor = CodecPool.getDecompressor(codec); } else { codec = null; decompressor = null; } } else { // we did not get the information needed from the properties objects - so use the configs from the install connector = AccumuloConnector.getConnector(); // TODO: compression items need to be worked out codec = null; decompressor = null; } // establish the scanners scanner = connector.createScanner(this.table, this.auths); batchScanner = connector.createBatchScanner(this.table, this.auths, numQueryThreads); if (!mock) { // I AM MOCKING YOU!!! //metadata = loadGenericMetadata(); } } catch (final TableNotFoundException e) { throw new MrsImageException(e); } catch (final IOException e) { throw new MrsImageException(e); } catch (final AccumuloSecurityException e) { throw new MrsImageException(e); } catch (final AccumuloException e) { throw new MrsImageException(e); } catch (final TableExistsException e) { throw new MrsImageException(e); } }
From source file:org.springframework.data.hadoop.store.AbstractStorage.java
License:Apache License
protected synchronized StreamsHolder<InputStream> getInput(Path inputPath) throws IOException { if (inputHolder == null) { log.info("Creating new InputStream"); inputHolder = new StreamsHolder<InputStream>(); final FileSystem fs = basePath.getFileSystem(configuration); // TODO: hadoop2 isUriPathAbsolute() ? Path p = inputPath.isAbsolute() ? inputPath : new Path(getPath(), inputPath); if (!isCompressed()) { InputStream input = fs.open(p); inputHolder.setStream(input); } else {//from w w w.j a va2 s. co m Class<?> clazz = ClassUtils.resolveClassName(codecInfo.getCodecClass(), getClass().getClassLoader()); CompressionCodec compressionCodec = (CompressionCodec) ReflectionUtils.newInstance(clazz, getConfiguration()); Decompressor decompressor = CodecPool.getDecompressor(compressionCodec); FSDataInputStream winput = fs.open(p); InputStream input = compressionCodec.createInputStream(winput, decompressor); inputHolder.setWrappedStream(winput); inputHolder.setStream(input); } } return inputHolder; }
From source file:org.springframework.data.hadoop.store.AbstractStorage.java
License:Apache License
/** * Gets the input stream for input split. * /*from w w w. ja v a2s . c o m*/ * @param split the split * @return the input stream * @throws IOException Signals that an I/O exception has occurred. */ protected synchronized StreamsHolder<InputStream> getInput(InputSplit split) throws IOException { StreamsHolder<InputStream> holder = splitInputHolders.get(split); if (holder == null) { log.info("Creating new InputStream for split"); holder = new StreamsHolder<InputStream>(); final FileSystem fs = basePath.getFileSystem(configuration); if (!isCompressed()) { FSDataInputStream input = fs.open(split.getPath()); input.seek(split.getStart()); holder.setStream(input); } else { Class<?> clazz = ClassUtils.resolveClassName(codecInfo.getCodecClass(), getClass().getClassLoader()); if (!ClassUtils.isAssignable(SplittableCompressionCodec.class, clazz)) { throw new StorageException("Not a SplittableCompressionCodec"); } FSDataInputStream winput = fs.open(split.getPath()); CompressionCodec compressionCodec = (CompressionCodec) ReflectionUtils.newInstance(clazz, getConfiguration()); Decompressor decompressor = CodecPool.getDecompressor(compressionCodec); long start = split.getStart(); long end = start + split.getLength(); log.info("SplitCompressionInputStream start=" + start + " end=" + end); SplitCompressionInputStream input = ((SplittableCompressionCodec) compressionCodec) .createInputStream(winput, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK); holder.setWrappedStream(winput); holder.setStream(input); } splitInputHolders.put(split, holder); } return holder; }