List of usage examples for org.apache.hadoop.io.compress CompressionInputStream close
@Override public void close() throws IOException
From source file:Compress.TestLZO.java
License:Open Source License
public static void main(String[] argv) throws IOException { System.out.println(System.getProperty("java.library.path")); Configuration conf = new Configuration(); conf.setInt("io.compression.codec.lzo.buffersize", 64 * 1024); LzoCodec codec = new LzoCodec(); codec.setConf(conf);//from w ww .j a v a 2s . c o m OutputStream out = new DataOutputBuffer(); CompressionOutputStream out2 = codec.createOutputStream(out); byte[] str2 = new byte[20]; int num = 10000; int before = 0; String part = "hello konten hello konten"; for (long i = 0; i < num; i++) { Util.long2bytes(str2, i); out2.write(str2, 0, 8); } out2.finish(); byte[] buffer = ((DataOutputBuffer) out).getData(); System.out.println("org len:" + num * 8 + ", compressed len:" + ((DataOutputBuffer) out).getLength()); InputStream in = new DataInputBuffer(); ((DataInputBuffer) in).reset(((DataOutputBuffer) out).getData(), 0, ((DataOutputBuffer) out).getLength()); CompressionInputStream in2 = codec.createInputStream(in); byte[] buf = new byte[100]; for (long i = 0; i < num; i++) { int count = 0; count = in2.read(buf, 0, 8); if (count > 0) { long value = Util.bytes2long(buf, 0, 8); if (value != i) { System.out.println(i + ",count:" + count + ",value:" + value); } else if (i > (num - 20)) { System.out.println(i + ",value:" + value); } } else { System.out.println("count:" + count + ", string " + i); break; } } in2.close(); System.out.println("test compress array..."); OutputStream out3 = new DataOutputBuffer(); CompressionOutputStream out4 = codec.createOutputStream(out3); DataOutputBuffer tout3 = new DataOutputBuffer(); for (long i = 0; i < num; i++) { Util.long2bytes(str2, i); out4.write(str2, 0, 8); } out4.finish(); buffer = ((DataOutputBuffer) out3).getData(); System.out.println("org len:" + num * 8 + ", compressed len:" + ((DataOutputBuffer) out3).getLength()); InputStream in3 = new DataInputBuffer(); ((DataInputBuffer) in3).reset(((DataOutputBuffer) out3).getData(), 0, ((DataOutputBuffer) out3).getLength()); CompressionInputStream in4 = codec.createInputStream(in3); for (long i = 0; i < num; i++) { int count = 0; count = in4.read(buf, 0, 8); if (count > 0) { long value = Util.bytes2long(buf, 0, 8); if (value != i) { System.out.println(i + ",count:" + count + ",value:" + value); } if (i > (num - 20)) { System.out.println(i + ",value:" + value); } } else { System.out.println("count:" + count + ", string " + i); break; } } in2.close(); }
From source file:gr.ntua.h2rdf.loadTriples.TranslateAndImport.java
License:Apache License
public Job createSubmittableJob(String[] args) throws IOException, ClassNotFoundException { //compute sample partitions FileSystem fs;//from w w w.j a va 2s. co m Configuration conf = new Configuration(); int collected = 0, chunks = 0; try { fs = FileSystem.get(conf); Path sampleDir = new Path("sample"); FileStatus[] samples = fs.listStatus(sampleDir); TreeSet<String> set = new TreeSet<String>(); for (FileStatus sample : samples) { FSDataInputStream in = fs.open(sample.getPath()); CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(GzipCodec.class, conf); CompressionInputStream in1 = codec.createInputStream(in); NxParser nxp = new NxParser(in1); Iterator<Node[]> it = nxp.iterator(); while (it.hasNext()) { Node[] tr = it.next(); //System.out.println(tr[0].toN3()); set.add(tr[0].toN3()); set.add(tr[1].toN3()); set.add(tr[2].toN3()); } in1.close(); in.close(); } IndexTranslator translator = new IndexTranslator(TABLE_NAME + "_Index"); HashMap<String, Long> index = translator.translate(set); set.clear(); TreeSet<ImmutableBytesWritable> set1 = new TreeSet<ImmutableBytesWritable>( new ImmutableBytesWritable.Comparator()); for (FileStatus sample : samples) { FSDataInputStream in = fs.open(sample.getPath()); CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(GzipCodec.class, conf); CompressionInputStream in1 = codec.createInputStream(in); NxParser nxp = new NxParser(in1); Iterator<Node[]> it = nxp.iterator(); while (it.hasNext()) { Node[] tr = it.next(); ByteTriple btr = new ByteTriple(index.get(tr[0].toN3()), index.get(tr[1].toN3()), index.get(tr[2].toN3())); set1.add(new ImmutableBytesWritable(btr.getSPOByte())); set1.add(new ImmutableBytesWritable(btr.getSOPByte())); set1.add(new ImmutableBytesWritable(btr.getOPSByte())); set1.add(new ImmutableBytesWritable(btr.getOSPByte())); set1.add(new ImmutableBytesWritable(btr.getPOSByte())); set1.add(new ImmutableBytesWritable(btr.getPSOByte())); } in1.close(); in.close(); } index.clear(); Path p = new Path("hexastorePartition"); if (fs.exists(p)) { fs.delete(p, true); } SequenceFile.Writer partitionWriter = SequenceFile.createWriter(fs, conf, p, ImmutableBytesWritable.class, NullWritable.class); double chunkSize = bucketSampledTriples * DistinctIds.samplingRate; System.out.println("chunkSize: " + chunkSize); Iterator<ImmutableBytesWritable> it = set1.iterator(); while (it.hasNext()) { ImmutableBytesWritable key = it.next(); if (collected > chunkSize) { partitionWriter.append(key, NullWritable.get()); //System.out.println(Bytes.toStringBinary(key.get())); collected = 0; chunks++; } else { collected++; } } System.out.println("chunks: " + chunks); partitionWriter.close(); } catch (IOException e) { e.printStackTrace(); } Job job = new Job(); job = new Job(conf, "Import Hexastore"); FileInputFormat.setInputPaths(job, new Path(args[0])); Path out = new Path("out"); try { fs = FileSystem.get(conf); if (fs.exists(out)) { fs.delete(out, true); } } catch (IOException e) { e.printStackTrace(); } FileOutputFormat.setOutputPath(job, out); job.setPartitionerClass(TotalOrderPartitioner.class); TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("hexastorePartition")); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(HFileOutputFormat.class); StringBuilder compressionConfigValue = new StringBuilder(); compressionConfigValue.append(URLEncoder.encode("I", "UTF-8")); compressionConfigValue.append('='); compressionConfigValue.append(URLEncoder.encode(Algorithm.SNAPPY.getName(), "UTF-8")); compressionConfigValue.append('&'); compressionConfigValue.append(URLEncoder.encode("S", "UTF-8")); compressionConfigValue.append('='); compressionConfigValue.append(URLEncoder.encode(Algorithm.SNAPPY.getName(), "UTF-8")); compressionConfigValue.append('&'); compressionConfigValue.append(URLEncoder.encode("T", "UTF-8")); compressionConfigValue.append('='); compressionConfigValue.append(URLEncoder.encode(Algorithm.SNAPPY.getName(), "UTF-8")); job.getConfiguration().set("hbase.hfileoutputformat.families.compression", compressionConfigValue.toString()); //job.getConfiguration().setInt("hbase.mapreduce.hfileoutputformat.blocksize",262144); //job.getConfiguration().setInt("hbase.mapreduce.hfileoutputformat.blocksize",16384); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); job.setJarByClass(TranslateAndImport.class); job.setMapperClass(Map.class); //job.setReducerClass(HexaStoreHistogramsReduce.class); job.setReducerClass(HexaStoreReduce.class); job.getConfiguration().set("h2rdf.tableName", TABLE_NAME); job.getConfiguration().setInt("mapred.reduce.tasks", chunks + 1); //job.setCombinerClass(Combiner.class); job.setJobName("Translate Projections"); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); job.getConfiguration().setInt("io.sort.mb", 100); job.getConfiguration().setInt("io.file.buffer.size", 131072); job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1); job.getConfiguration().set("mapred.compress.map.output", "true"); job.getConfiguration().set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //job.getConfiguration().setInt("hbase.hregion.max.filesize", 268435456); //job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864); job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432); return job; }
From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileOutputOperatorTest.java
License:Apache License
@Test public void testSnappyCompressionSimple() throws IOException { if (checkNativeSnappy()) { return;/*from www . j av a 2 s.com*/ } File snappyFile = new File(testMeta.getDir(), "snappyTestFile.snappy"); BufferedOutputStream os = new BufferedOutputStream(new FileOutputStream(snappyFile)); Configuration conf = new Configuration(); CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(SnappyCodec.class, conf); FilterStreamCodec.SnappyFilterStream filterStream = new FilterStreamCodec.SnappyFilterStream( codec.createOutputStream(os)); int ONE_MB = 1024 * 1024; String testStr = "TestSnap-16bytes"; for (int i = 0; i < ONE_MB; i++) { // write 16 MBs filterStream.write(testStr.getBytes()); } filterStream.flush(); filterStream.close(); CompressionInputStream is = codec.createInputStream(new FileInputStream(snappyFile)); byte[] recovered = new byte[testStr.length()]; int bytesRead = is.read(recovered); is.close(); assertEquals(testStr, new String(recovered)); }
From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileOutputOperatorTest.java
License:Apache License
private void checkSnappyFile(File file, List<Long> offsets, int startVal, int totalWindows, int totalRecords) throws IOException { FileInputStream fis;// w w w . j a va 2 s .c om InputStream gss = null; Configuration conf = new Configuration(); CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(SnappyCodec.class, conf); CompressionInputStream snappyIs = null; BufferedReader br = null; int numWindows = 0; try { fis = new FileInputStream(file); gss = fis; long startOffset = 0; for (long offset : offsets) { // Skip initial case in case file is not yet created if (offset == 0) { continue; } long limit = offset - startOffset; LimitInputStream lis = new LimitInputStream(gss, limit); snappyIs = codec.createInputStream(lis); br = new BufferedReader(new InputStreamReader(snappyIs)); String eline = "" + (startVal + numWindows * 2); int count = 0; String line; while ((line = br.readLine()) != null) { Assert.assertEquals("File line", eline, line); ++count; if ((count % totalRecords) == 0) { ++numWindows; eline = "" + (startVal + numWindows * 2); } } startOffset = offset; } } catch (Exception e) { e.printStackTrace(); } finally { if (br != null) { br.close(); } else { if (snappyIs != null) { snappyIs.close(); } else if (gss != null) { gss.close(); } } } Assert.assertEquals("Total", totalWindows, numWindows); }
From source file:org.apache.pig.piggybank.test.storage.TestMultiStorageCompression.java
License:Apache License
private void verifyResults(String type, List<String> filesToDelete, String outputPath) throws IOException, FileNotFoundException { // Verify the output File outputDir = new File(outputPath); List<String> indexFolders = Arrays.asList(outputDir.list()); // Assert whether all keys are present assertTrue(indexFolders.contains("f1." + type)); assertTrue(indexFolders.contains("f2." + type)); assertTrue(indexFolders.contains("f3." + type)); assertTrue(indexFolders.contains("f4." + type)); // Sort so that assertions are easy Collections.sort(indexFolders); for (int i = 0; i < indexFolders.size(); i++) { String indexFolder = indexFolders.get(i); if (indexFolder.startsWith("._SUCCESS") || indexFolder.startsWith("_SUCCESS")) continue; String topFolder = outputPath + File.separator + indexFolder; File indexFolderFile = new File(topFolder); filesToDelete.add(topFolder);//from w w w . j ava2s. c o m String[] list = indexFolderFile.list(); for (String outputFile : list) { String file = topFolder + File.separator + outputFile; filesToDelete.add(file); // Skip off any file starting with . if (outputFile.startsWith(".")) continue; // Try to read the records using the codec CompressionCodec codec = null; // Use the codec according to the test case if (type.equals("bz2")) { codec = new BZip2Codec(); } else if (type.equals("gz")) { codec = new GzipCodec(); } if (codec instanceof Configurable) { ((Configurable) codec).setConf(new Configuration()); } CompressionInputStream createInputStream = codec.createInputStream(new FileInputStream(file)); int b; StringBuffer sb = new StringBuffer(); while ((b = createInputStream.read()) != -1) { sb.append((char) b); } createInputStream.close(); // Assert for the number of fields and keys. String[] fields = sb.toString().split("\\t"); assertEquals(3, fields.length); String id = indexFolder.substring(1, 2); assertEquals("f" + id, fields[0]); } } }