Example usage for org.apache.hadoop.io.compress CompressionCodec createInputStream

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress CompressionCodec createInputStream.

Prototype

CompressionInputStream createInputStream(InputStream in) throws IOException;

Source Link

Document

Create a CompressionInputStream that will read from the given input stream.

Usage

From source file:net.darkseraphim.webanalytics.hadoop.csv.CSVLineRecordReader.java

License:Apache License

public void configure(InputSplit genericSplit, JobConf conf) throws IOException {
    FileSplit split = (FileSplit) genericSplit;

    start = split.getStart();//from  www. j  av a 2  s.c  om
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(conf);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(conf);
    FSDataInputStream fileIn = fs.open(split.getPath());

    if (codec != null) {
        is = codec.createInputStream(fileIn);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            fileIn.seek(start);
        }
        is = fileIn;
    }

    this.pos = start;
    init(is, conf);
}

From source file:newprotobuf.mapred.ProtobufRecordReader.java

License:Open Source License

public ProtobufRecordReader(Configuration conf, FileSplit split, Reporter reporter) throws IOException {

    this.conf = conf;

    start = split.getStart();/*ww w. jav  a  2 s.  com*/
    pos = start;
    splitLength = split.getLength();
    end = start + splitLength;

    file = split.getPath();

    FileSystem fs = file.getFileSystem(conf);
    in = fs.open(split.getPath());
    compressionCodecs = new CompressionCodecFactory(conf);
    final CompressionCodec codec = compressionCodecs.getCodec(file);
    if (codec != null) {
        in = codec.createInputStream(in);
        end = Long.MAX_VALUE;
    }
    this.reporter = reporter;
    skipbad = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEPBBADFILESKIP);
    LOG.info("Skip bad is set to " + skipbad);
}

From source file:nyu.cs.webgraph.MRhelpers.LzoTabSeperatedLineRecordReader.java

License:Open Source License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) genericSplit;
    start = split.getStart();//ww w .j  a v a2 s.c  o  m
    end = start + split.getLength();
    final Path file = split.getPath();
    Configuration job = context.getConfiguration();

    FileSystem fs = file.getFileSystem(job);
    CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);
    if (codec == null) {
        throw new IOException("Codec for file " + file + " not found, cannot run");
    }

    // open the file and seek to the start of the split
    fileIn = fs.open(split.getPath());

    // creates input stream and also reads the file header
    in = new LineReader(codec.createInputStream(fileIn), job);

    if (start != 0) {
        fileIn.seek(start);

        // read and ignore the first line
        in.readLine(new Text());
        start = fileIn.getPos();
    }

    this.pos = start;
}

From source file:org.apache.ambari.view.filebrowser.FilePreviewService.java

License:Apache License

@GET
@Path("/file")
@Produces(MediaType.APPLICATION_JSON)//from  w  ww. j  a  va 2s. com
public Response previewFile(@QueryParam("path") String path, @QueryParam("start") int start,
        @QueryParam("end") int end) {

    try {
        HdfsApi api = getApi(context);
        FileStatus status = api.getFileStatus(path);

        CompressionCodec codec = compressionCodecFactory.getCodec(status.getPath());

        // check if we have a compression codec we need to use
        InputStream stream = (codec != null) ? codec.createInputStream(api.open(path)) : api.open(path);

        int length = end - start;
        byte[] bytes = new byte[length];
        // ((Seekable)stream).seek(start); //seek(start);
        stream.skip(start);
        int readBytes = stream.read(bytes, 0, length);
        boolean isFileEnd = false;

        if (readBytes < length)
            isFileEnd = true;

        JSONObject response = new JSONObject();
        response.put("data", new String(bytes));
        response.put("readbytes", readBytes);
        response.put("isFileEnd", isFileEnd);

        return Response.ok(response).build();
    } catch (WebApplicationException ex) {
        throw ex;
    } catch (FileNotFoundException ex) {
        throw new NotFoundFormattedException(ex.getMessage(), ex);
    } catch (Exception ex) {
        throw new ServiceFormattedException(ex.getMessage(), ex);
    }
}

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileOutputOperatorTest.java

License:Apache License

@Test
public void testSnappyCompressionSimple() throws IOException {
    if (checkNativeSnappy()) {
        return;/*from  ww  w. j a  v  a  2 s .  c  o m*/
    }

    File snappyFile = new File(testMeta.getDir(), "snappyTestFile.snappy");

    BufferedOutputStream os = new BufferedOutputStream(new FileOutputStream(snappyFile));
    Configuration conf = new Configuration();
    CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(SnappyCodec.class, conf);
    FilterStreamCodec.SnappyFilterStream filterStream = new FilterStreamCodec.SnappyFilterStream(
            codec.createOutputStream(os));

    int ONE_MB = 1024 * 1024;

    String testStr = "TestSnap-16bytes";
    for (int i = 0; i < ONE_MB; i++) { // write 16 MBs
        filterStream.write(testStr.getBytes());
    }
    filterStream.flush();
    filterStream.close();

    CompressionInputStream is = codec.createInputStream(new FileInputStream(snappyFile));

    byte[] recovered = new byte[testStr.length()];
    int bytesRead = is.read(recovered);
    is.close();
    assertEquals(testStr, new String(recovered));
}

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileOutputOperatorTest.java

License:Apache License

private void checkSnappyFile(File file, List<Long> offsets, int startVal, int totalWindows, int totalRecords)
        throws IOException {
    FileInputStream fis;/* ww w  . j  a  v  a 2  s .com*/
    InputStream gss = null;
    Configuration conf = new Configuration();
    CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(SnappyCodec.class, conf);
    CompressionInputStream snappyIs = null;

    BufferedReader br = null;

    int numWindows = 0;
    try {
        fis = new FileInputStream(file);
        gss = fis;

        long startOffset = 0;
        for (long offset : offsets) {
            // Skip initial case in case file is not yet created
            if (offset == 0) {
                continue;
            }
            long limit = offset - startOffset;
            LimitInputStream lis = new LimitInputStream(gss, limit);

            snappyIs = codec.createInputStream(lis);
            br = new BufferedReader(new InputStreamReader(snappyIs));
            String eline = "" + (startVal + numWindows * 2);
            int count = 0;
            String line;
            while ((line = br.readLine()) != null) {
                Assert.assertEquals("File line", eline, line);
                ++count;
                if ((count % totalRecords) == 0) {
                    ++numWindows;
                    eline = "" + (startVal + numWindows * 2);
                }
            }
            startOffset = offset;
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        if (br != null) {
            br.close();
        } else {
            if (snappyIs != null) {
                snappyIs.close();
            } else if (gss != null) {
                gss.close();
            }
        }
    }
    Assert.assertEquals("Total", totalWindows, numWindows);
}

From source file:org.apache.drill.exec.store.dfs.DrillFileSystem.java

License:Apache License

public InputStream openPossiblyCompressedStream(Path path) throws IOException {
    CompressionCodec codec = codecFactory.getCodec(path); // infers from file ext.
    if (codec != null) {
        return codec.createInputStream(open(path));
    } else {/*w w w. jav a 2  s.c o m*/
        return open(path);
    }
}

From source file:org.apache.druid.indexer.Utils.java

License:Apache License

public static InputStream openInputStream(JobContext job, Path inputPath, final FileSystem fileSystem)
        throws IOException {
    if (!FileOutputFormat.getCompressOutput(job)) {
        return fileSystem.open(inputPath);
    } else {/*www  .j  a v a 2s . c o m*/
        Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job,
                GzipCodec.class);
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration());
        inputPath = new Path(inputPath + codec.getDefaultExtension());

        return codec.createInputStream(fileSystem.open(inputPath));
    }
}

From source file:org.apache.giraph.worker.BspServiceSource.java

License:Apache License

/**
 * Load saved partitions in multiple threads.
 * @param superstep superstep to load/*from   w w w .  j  av a2  s  .com*/
 * @param partitions list of partitions to load
 */
private void loadCheckpointVertices(final long superstep, List<Integer> partitions) {
    int numThreads = Math.min(GiraphConstants.NUM_CHECKPOINT_IO_THREADS.get(getConfiguration()),
            partitions.size());

    final Queue<Integer> partitionIdQueue = new ConcurrentLinkedQueue<>(partitions);

    final CompressionCodec codec = new CompressionCodecFactory(getConfiguration())
            .getCodec(new Path(GiraphConstants.CHECKPOINT_COMPRESSION_CODEC.get(getConfiguration())));

    long t0 = System.currentTimeMillis();

    CallableFactory<Void> callableFactory = new CallableFactory<Void>() {
        @Override
        public Callable<Void> newCallable(int callableId) {
            return new Callable<Void>() {

                @Override
                public Void call() throws Exception {
                    while (!partitionIdQueue.isEmpty()) {
                        Integer partitionId = partitionIdQueue.poll();
                        if (partitionId == null) {
                            break;
                        }
                        Path path = getSavedCheckpoint(superstep,
                                "_" + partitionId + CheckpointingUtils.CHECKPOINT_VERTICES_POSTFIX);

                        FSDataInputStream compressedStream = getFs().open(path);

                        DataInputStream stream = codec == null ? compressedStream
                                : new DataInputStream(codec.createInputStream(compressedStream));

                        Partition<I, V, E> partition = getConfiguration().createPartition(partitionId,
                                getContext());

                        partition.readFields(stream);

                        getPartitionStore().addPartition(partition);

                        stream.close();
                    }
                    return null;
                }

            };
        }
    };

    ProgressableUtils.getResultsWithNCallables(callableFactory, numThreads, "load-vertices-%d", getContext());

    LOG.info("Loaded checkpoint in " + (System.currentTimeMillis() - t0) + " ms, using " + numThreads
            + " threads");
}

From source file:org.apache.hama.bsp.LineRecordReader.java

License:Apache License

public LineRecordReader(Configuration job, FileSplit split) throws IOException {
    this.maxLineLength = job.getInt("bsp.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();// ww  w. j a va 2 s.c o  m
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        in = new LineReader(codec.createInputStream(fileIn), job);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        in = new LineReader(fileIn, job);
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min(Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}