List of usage examples for org.apache.hadoop.io.compress CompressionCodec createInputStream
CompressionInputStream createInputStream(InputStream in) throws IOException;
From source file:net.darkseraphim.webanalytics.hadoop.csv.CSVLineRecordReader.java
License:Apache License
public void configure(InputSplit genericSplit, JobConf conf) throws IOException { FileSplit split = (FileSplit) genericSplit; start = split.getStart();//from www. j av a 2 s.c om end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(conf); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(conf); FSDataInputStream fileIn = fs.open(split.getPath()); if (codec != null) { is = codec.createInputStream(fileIn); end = Long.MAX_VALUE; } else { if (start != 0) { fileIn.seek(start); } is = fileIn; } this.pos = start; init(is, conf); }
From source file:newprotobuf.mapred.ProtobufRecordReader.java
License:Open Source License
public ProtobufRecordReader(Configuration conf, FileSplit split, Reporter reporter) throws IOException { this.conf = conf; start = split.getStart();/*ww w. jav a 2 s. com*/ pos = start; splitLength = split.getLength(); end = start + splitLength; file = split.getPath(); FileSystem fs = file.getFileSystem(conf); in = fs.open(split.getPath()); compressionCodecs = new CompressionCodecFactory(conf); final CompressionCodec codec = compressionCodecs.getCodec(file); if (codec != null) { in = codec.createInputStream(in); end = Long.MAX_VALUE; } this.reporter = reporter; skipbad = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEPBBADFILESKIP); LOG.info("Skip bad is set to " + skipbad); }
From source file:nyu.cs.webgraph.MRhelpers.LzoTabSeperatedLineRecordReader.java
License:Open Source License
@Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) genericSplit; start = split.getStart();//ww w .j a v a2 s.c o m end = start + split.getLength(); final Path file = split.getPath(); Configuration job = context.getConfiguration(); FileSystem fs = file.getFileSystem(job); CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); if (codec == null) { throw new IOException("Codec for file " + file + " not found, cannot run"); } // open the file and seek to the start of the split fileIn = fs.open(split.getPath()); // creates input stream and also reads the file header in = new LineReader(codec.createInputStream(fileIn), job); if (start != 0) { fileIn.seek(start); // read and ignore the first line in.readLine(new Text()); start = fileIn.getPos(); } this.pos = start; }
From source file:org.apache.ambari.view.filebrowser.FilePreviewService.java
License:Apache License
@GET @Path("/file") @Produces(MediaType.APPLICATION_JSON)//from w ww. j a va 2s. com public Response previewFile(@QueryParam("path") String path, @QueryParam("start") int start, @QueryParam("end") int end) { try { HdfsApi api = getApi(context); FileStatus status = api.getFileStatus(path); CompressionCodec codec = compressionCodecFactory.getCodec(status.getPath()); // check if we have a compression codec we need to use InputStream stream = (codec != null) ? codec.createInputStream(api.open(path)) : api.open(path); int length = end - start; byte[] bytes = new byte[length]; // ((Seekable)stream).seek(start); //seek(start); stream.skip(start); int readBytes = stream.read(bytes, 0, length); boolean isFileEnd = false; if (readBytes < length) isFileEnd = true; JSONObject response = new JSONObject(); response.put("data", new String(bytes)); response.put("readbytes", readBytes); response.put("isFileEnd", isFileEnd); return Response.ok(response).build(); } catch (WebApplicationException ex) { throw ex; } catch (FileNotFoundException ex) { throw new NotFoundFormattedException(ex.getMessage(), ex); } catch (Exception ex) { throw new ServiceFormattedException(ex.getMessage(), ex); } }
From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileOutputOperatorTest.java
License:Apache License
@Test public void testSnappyCompressionSimple() throws IOException { if (checkNativeSnappy()) { return;/*from ww w. j a v a 2 s . c o m*/ } File snappyFile = new File(testMeta.getDir(), "snappyTestFile.snappy"); BufferedOutputStream os = new BufferedOutputStream(new FileOutputStream(snappyFile)); Configuration conf = new Configuration(); CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(SnappyCodec.class, conf); FilterStreamCodec.SnappyFilterStream filterStream = new FilterStreamCodec.SnappyFilterStream( codec.createOutputStream(os)); int ONE_MB = 1024 * 1024; String testStr = "TestSnap-16bytes"; for (int i = 0; i < ONE_MB; i++) { // write 16 MBs filterStream.write(testStr.getBytes()); } filterStream.flush(); filterStream.close(); CompressionInputStream is = codec.createInputStream(new FileInputStream(snappyFile)); byte[] recovered = new byte[testStr.length()]; int bytesRead = is.read(recovered); is.close(); assertEquals(testStr, new String(recovered)); }
From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileOutputOperatorTest.java
License:Apache License
private void checkSnappyFile(File file, List<Long> offsets, int startVal, int totalWindows, int totalRecords) throws IOException { FileInputStream fis;/* ww w . j a v a 2 s .com*/ InputStream gss = null; Configuration conf = new Configuration(); CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(SnappyCodec.class, conf); CompressionInputStream snappyIs = null; BufferedReader br = null; int numWindows = 0; try { fis = new FileInputStream(file); gss = fis; long startOffset = 0; for (long offset : offsets) { // Skip initial case in case file is not yet created if (offset == 0) { continue; } long limit = offset - startOffset; LimitInputStream lis = new LimitInputStream(gss, limit); snappyIs = codec.createInputStream(lis); br = new BufferedReader(new InputStreamReader(snappyIs)); String eline = "" + (startVal + numWindows * 2); int count = 0; String line; while ((line = br.readLine()) != null) { Assert.assertEquals("File line", eline, line); ++count; if ((count % totalRecords) == 0) { ++numWindows; eline = "" + (startVal + numWindows * 2); } } startOffset = offset; } } catch (Exception e) { e.printStackTrace(); } finally { if (br != null) { br.close(); } else { if (snappyIs != null) { snappyIs.close(); } else if (gss != null) { gss.close(); } } } Assert.assertEquals("Total", totalWindows, numWindows); }
From source file:org.apache.drill.exec.store.dfs.DrillFileSystem.java
License:Apache License
public InputStream openPossiblyCompressedStream(Path path) throws IOException { CompressionCodec codec = codecFactory.getCodec(path); // infers from file ext. if (codec != null) { return codec.createInputStream(open(path)); } else {/*w w w. jav a 2 s.c o m*/ return open(path); } }
From source file:org.apache.druid.indexer.Utils.java
License:Apache License
public static InputStream openInputStream(JobContext job, Path inputPath, final FileSystem fileSystem) throws IOException { if (!FileOutputFormat.getCompressOutput(job)) { return fileSystem.open(inputPath); } else {/*www .j a v a 2s . c o m*/ Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); inputPath = new Path(inputPath + codec.getDefaultExtension()); return codec.createInputStream(fileSystem.open(inputPath)); } }
From source file:org.apache.giraph.worker.BspServiceSource.java
License:Apache License
/** * Load saved partitions in multiple threads. * @param superstep superstep to load/*from w w w . j av a2 s .com*/ * @param partitions list of partitions to load */ private void loadCheckpointVertices(final long superstep, List<Integer> partitions) { int numThreads = Math.min(GiraphConstants.NUM_CHECKPOINT_IO_THREADS.get(getConfiguration()), partitions.size()); final Queue<Integer> partitionIdQueue = new ConcurrentLinkedQueue<>(partitions); final CompressionCodec codec = new CompressionCodecFactory(getConfiguration()) .getCodec(new Path(GiraphConstants.CHECKPOINT_COMPRESSION_CODEC.get(getConfiguration()))); long t0 = System.currentTimeMillis(); CallableFactory<Void> callableFactory = new CallableFactory<Void>() { @Override public Callable<Void> newCallable(int callableId) { return new Callable<Void>() { @Override public Void call() throws Exception { while (!partitionIdQueue.isEmpty()) { Integer partitionId = partitionIdQueue.poll(); if (partitionId == null) { break; } Path path = getSavedCheckpoint(superstep, "_" + partitionId + CheckpointingUtils.CHECKPOINT_VERTICES_POSTFIX); FSDataInputStream compressedStream = getFs().open(path); DataInputStream stream = codec == null ? compressedStream : new DataInputStream(codec.createInputStream(compressedStream)); Partition<I, V, E> partition = getConfiguration().createPartition(partitionId, getContext()); partition.readFields(stream); getPartitionStore().addPartition(partition); stream.close(); } return null; } }; } }; ProgressableUtils.getResultsWithNCallables(callableFactory, numThreads, "load-vertices-%d", getContext()); LOG.info("Loaded checkpoint in " + (System.currentTimeMillis() - t0) + " ms, using " + numThreads + " threads"); }
From source file:org.apache.hama.bsp.LineRecordReader.java
License:Apache License
public LineRecordReader(Configuration job, FileSplit split) throws IOException { this.maxLineLength = job.getInt("bsp.linerecordreader.maxlength", Integer.MAX_VALUE); start = split.getStart();// ww w. j a va 2 s.c o m end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); boolean skipFirstLine = false; if (codec != null) { in = new LineReader(codec.createInputStream(fileIn), job); end = Long.MAX_VALUE; } else { if (start != 0) { skipFirstLine = true; --start; fileIn.seek(start); } in = new LineReader(fileIn, job); } if (skipFirstLine) { // skip first line and re-establish "start". start += in.readLine(new Text(), 0, (int) Math.min(Integer.MAX_VALUE, end - start)); } this.pos = start; }