Example usage for org.apache.hadoop.io.compress CompressionInputStream close

List of usage examples for org.apache.hadoop.io.compress CompressionInputStream close

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress CompressionInputStream close.

Prototype

@Override
    public void close() throws IOException 

Source Link

Usage

From source file:Compress.TestLZO.java

License:Open Source License

public static void main(String[] argv) throws IOException {
    System.out.println(System.getProperty("java.library.path"));

    Configuration conf = new Configuration();

    conf.setInt("io.compression.codec.lzo.buffersize", 64 * 1024);

    LzoCodec codec = new LzoCodec();
    codec.setConf(conf);//from w  ww .j  a v  a  2s . c o m

    OutputStream out = new DataOutputBuffer();
    CompressionOutputStream out2 = codec.createOutputStream(out);

    byte[] str2 = new byte[20];

    int num = 10000;
    int before = 0;
    String part = "hello konten hello konten";
    for (long i = 0; i < num; i++) {
        Util.long2bytes(str2, i);
        out2.write(str2, 0, 8);

    }
    out2.finish();

    byte[] buffer = ((DataOutputBuffer) out).getData();

    System.out.println("org len:" + num * 8 + ", compressed len:" + ((DataOutputBuffer) out).getLength());

    InputStream in = new DataInputBuffer();
    ((DataInputBuffer) in).reset(((DataOutputBuffer) out).getData(), 0, ((DataOutputBuffer) out).getLength());

    CompressionInputStream in2 = codec.createInputStream(in);

    byte[] buf = new byte[100];
    for (long i = 0; i < num; i++) {
        int count = 0;
        count = in2.read(buf, 0, 8);
        if (count > 0) {
            long value = Util.bytes2long(buf, 0, 8);
            if (value != i) {
                System.out.println(i + ",count:" + count + ",value:" + value);
            } else if (i > (num - 20)) {
                System.out.println(i + ",value:" + value);
            }

        } else {
            System.out.println("count:" + count + ", string " + i);
            break;
        }
    }

    in2.close();

    System.out.println("test compress array...");

    OutputStream out3 = new DataOutputBuffer();
    CompressionOutputStream out4 = codec.createOutputStream(out3);

    DataOutputBuffer tout3 = new DataOutputBuffer();

    for (long i = 0; i < num; i++) {
        Util.long2bytes(str2, i);
        out4.write(str2, 0, 8);
    }
    out4.finish();

    buffer = ((DataOutputBuffer) out3).getData();

    System.out.println("org len:" + num * 8 + ", compressed len:" + ((DataOutputBuffer) out3).getLength());

    InputStream in3 = new DataInputBuffer();
    ((DataInputBuffer) in3).reset(((DataOutputBuffer) out3).getData(), 0,
            ((DataOutputBuffer) out3).getLength());

    CompressionInputStream in4 = codec.createInputStream(in3);

    for (long i = 0; i < num; i++) {
        int count = 0;
        count = in4.read(buf, 0, 8);
        if (count > 0) {
            long value = Util.bytes2long(buf, 0, 8);
            if (value != i) {
                System.out.println(i + ",count:" + count + ",value:" + value);
            }

            if (i > (num - 20)) {
                System.out.println(i + ",value:" + value);
            }

        } else {
            System.out.println("count:" + count + ", string " + i);
            break;
        }
    }

    in2.close();

}

From source file:gr.ntua.h2rdf.loadTriples.TranslateAndImport.java

License:Apache License

public Job createSubmittableJob(String[] args) throws IOException, ClassNotFoundException {
    //compute sample partitions
    FileSystem fs;//from w w w.j  a  va 2s.  co m
    Configuration conf = new Configuration();
    int collected = 0, chunks = 0;
    try {
        fs = FileSystem.get(conf);
        Path sampleDir = new Path("sample");
        FileStatus[] samples = fs.listStatus(sampleDir);
        TreeSet<String> set = new TreeSet<String>();
        for (FileStatus sample : samples) {
            FSDataInputStream in = fs.open(sample.getPath());
            CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(GzipCodec.class, conf);
            CompressionInputStream in1 = codec.createInputStream(in);
            NxParser nxp = new NxParser(in1);
            Iterator<Node[]> it = nxp.iterator();
            while (it.hasNext()) {
                Node[] tr = it.next();
                //System.out.println(tr[0].toN3());
                set.add(tr[0].toN3());
                set.add(tr[1].toN3());
                set.add(tr[2].toN3());
            }
            in1.close();
            in.close();
        }

        IndexTranslator translator = new IndexTranslator(TABLE_NAME + "_Index");
        HashMap<String, Long> index = translator.translate(set);
        set.clear();
        TreeSet<ImmutableBytesWritable> set1 = new TreeSet<ImmutableBytesWritable>(
                new ImmutableBytesWritable.Comparator());

        for (FileStatus sample : samples) {
            FSDataInputStream in = fs.open(sample.getPath());
            CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(GzipCodec.class, conf);
            CompressionInputStream in1 = codec.createInputStream(in);
            NxParser nxp = new NxParser(in1);
            Iterator<Node[]> it = nxp.iterator();
            while (it.hasNext()) {
                Node[] tr = it.next();
                ByteTriple btr = new ByteTriple(index.get(tr[0].toN3()), index.get(tr[1].toN3()),
                        index.get(tr[2].toN3()));
                set1.add(new ImmutableBytesWritable(btr.getSPOByte()));
                set1.add(new ImmutableBytesWritable(btr.getSOPByte()));
                set1.add(new ImmutableBytesWritable(btr.getOPSByte()));
                set1.add(new ImmutableBytesWritable(btr.getOSPByte()));
                set1.add(new ImmutableBytesWritable(btr.getPOSByte()));
                set1.add(new ImmutableBytesWritable(btr.getPSOByte()));
            }
            in1.close();
            in.close();
        }
        index.clear();

        Path p = new Path("hexastorePartition");
        if (fs.exists(p)) {
            fs.delete(p, true);
        }
        SequenceFile.Writer partitionWriter = SequenceFile.createWriter(fs, conf, p,
                ImmutableBytesWritable.class, NullWritable.class);

        double chunkSize = bucketSampledTriples * DistinctIds.samplingRate;
        System.out.println("chunkSize: " + chunkSize);
        Iterator<ImmutableBytesWritable> it = set1.iterator();
        while (it.hasNext()) {
            ImmutableBytesWritable key = it.next();
            if (collected > chunkSize) {
                partitionWriter.append(key, NullWritable.get());
                //System.out.println(Bytes.toStringBinary(key.get()));
                collected = 0;
                chunks++;
            } else {
                collected++;
            }
        }
        System.out.println("chunks: " + chunks);
        partitionWriter.close();

    } catch (IOException e) {
        e.printStackTrace();
    }

    Job job = new Job();
    job = new Job(conf, "Import Hexastore");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    Path out = new Path("out");
    try {
        fs = FileSystem.get(conf);
        if (fs.exists(out)) {
            fs.delete(out, true);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    FileOutputFormat.setOutputPath(job, out);

    job.setPartitionerClass(TotalOrderPartitioner.class);
    TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("hexastorePartition"));
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(HFileOutputFormat.class);

    StringBuilder compressionConfigValue = new StringBuilder();
    compressionConfigValue.append(URLEncoder.encode("I", "UTF-8"));
    compressionConfigValue.append('=');
    compressionConfigValue.append(URLEncoder.encode(Algorithm.SNAPPY.getName(), "UTF-8"));
    compressionConfigValue.append('&');
    compressionConfigValue.append(URLEncoder.encode("S", "UTF-8"));
    compressionConfigValue.append('=');
    compressionConfigValue.append(URLEncoder.encode(Algorithm.SNAPPY.getName(), "UTF-8"));
    compressionConfigValue.append('&');
    compressionConfigValue.append(URLEncoder.encode("T", "UTF-8"));
    compressionConfigValue.append('=');
    compressionConfigValue.append(URLEncoder.encode(Algorithm.SNAPPY.getName(), "UTF-8"));
    job.getConfiguration().set("hbase.hfileoutputformat.families.compression",
            compressionConfigValue.toString());
    //job.getConfiguration().setInt("hbase.mapreduce.hfileoutputformat.blocksize",262144);
    //job.getConfiguration().setInt("hbase.mapreduce.hfileoutputformat.blocksize",16384);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setJarByClass(TranslateAndImport.class);
    job.setMapperClass(Map.class);
    //job.setReducerClass(HexaStoreHistogramsReduce.class);
    job.setReducerClass(HexaStoreReduce.class);

    job.getConfiguration().set("h2rdf.tableName", TABLE_NAME);
    job.getConfiguration().setInt("mapred.reduce.tasks", chunks + 1);
    //job.setCombinerClass(Combiner.class);
    job.setJobName("Translate Projections");
    job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
    job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    job.getConfiguration().setInt("io.sort.mb", 100);
    job.getConfiguration().setInt("io.file.buffer.size", 131072);
    job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1);

    job.getConfiguration().set("mapred.compress.map.output", "true");
    job.getConfiguration().set("mapred.map.output.compression.codec",
            "org.apache.hadoop.io.compress.SnappyCodec");
    //job.getConfiguration().setInt("hbase.hregion.max.filesize", 268435456);
    //job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864);
    job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432);

    return job;

}

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileOutputOperatorTest.java

License:Apache License

@Test
public void testSnappyCompressionSimple() throws IOException {
    if (checkNativeSnappy()) {
        return;/*from www  . j av  a 2 s.com*/
    }

    File snappyFile = new File(testMeta.getDir(), "snappyTestFile.snappy");

    BufferedOutputStream os = new BufferedOutputStream(new FileOutputStream(snappyFile));
    Configuration conf = new Configuration();
    CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(SnappyCodec.class, conf);
    FilterStreamCodec.SnappyFilterStream filterStream = new FilterStreamCodec.SnappyFilterStream(
            codec.createOutputStream(os));

    int ONE_MB = 1024 * 1024;

    String testStr = "TestSnap-16bytes";
    for (int i = 0; i < ONE_MB; i++) { // write 16 MBs
        filterStream.write(testStr.getBytes());
    }
    filterStream.flush();
    filterStream.close();

    CompressionInputStream is = codec.createInputStream(new FileInputStream(snappyFile));

    byte[] recovered = new byte[testStr.length()];
    int bytesRead = is.read(recovered);
    is.close();
    assertEquals(testStr, new String(recovered));
}

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileOutputOperatorTest.java

License:Apache License

private void checkSnappyFile(File file, List<Long> offsets, int startVal, int totalWindows, int totalRecords)
        throws IOException {
    FileInputStream fis;// w  w  w .  j  a va 2  s  .c  om
    InputStream gss = null;
    Configuration conf = new Configuration();
    CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(SnappyCodec.class, conf);
    CompressionInputStream snappyIs = null;

    BufferedReader br = null;

    int numWindows = 0;
    try {
        fis = new FileInputStream(file);
        gss = fis;

        long startOffset = 0;
        for (long offset : offsets) {
            // Skip initial case in case file is not yet created
            if (offset == 0) {
                continue;
            }
            long limit = offset - startOffset;
            LimitInputStream lis = new LimitInputStream(gss, limit);

            snappyIs = codec.createInputStream(lis);
            br = new BufferedReader(new InputStreamReader(snappyIs));
            String eline = "" + (startVal + numWindows * 2);
            int count = 0;
            String line;
            while ((line = br.readLine()) != null) {
                Assert.assertEquals("File line", eline, line);
                ++count;
                if ((count % totalRecords) == 0) {
                    ++numWindows;
                    eline = "" + (startVal + numWindows * 2);
                }
            }
            startOffset = offset;
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        if (br != null) {
            br.close();
        } else {
            if (snappyIs != null) {
                snappyIs.close();
            } else if (gss != null) {
                gss.close();
            }
        }
    }
    Assert.assertEquals("Total", totalWindows, numWindows);
}

From source file:org.apache.pig.piggybank.test.storage.TestMultiStorageCompression.java

License:Apache License

private void verifyResults(String type, List<String> filesToDelete, String outputPath)
        throws IOException, FileNotFoundException {
    // Verify the output
    File outputDir = new File(outputPath);
    List<String> indexFolders = Arrays.asList(outputDir.list());

    // Assert whether all keys are present
    assertTrue(indexFolders.contains("f1." + type));
    assertTrue(indexFolders.contains("f2." + type));
    assertTrue(indexFolders.contains("f3." + type));
    assertTrue(indexFolders.contains("f4." + type));

    // Sort so that assertions are easy
    Collections.sort(indexFolders);

    for (int i = 0; i < indexFolders.size(); i++) {

        String indexFolder = indexFolders.get(i);
        if (indexFolder.startsWith("._SUCCESS") || indexFolder.startsWith("_SUCCESS"))
            continue;
        String topFolder = outputPath + File.separator + indexFolder;
        File indexFolderFile = new File(topFolder);
        filesToDelete.add(topFolder);//from w  w  w .  j  ava2s. c o  m
        String[] list = indexFolderFile.list();
        for (String outputFile : list) {

            String file = topFolder + File.separator + outputFile;
            filesToDelete.add(file);

            // Skip off any file starting with .
            if (outputFile.startsWith("."))
                continue;

            // Try to read the records using the codec
            CompressionCodec codec = null;

            // Use the codec according to the test case
            if (type.equals("bz2")) {
                codec = new BZip2Codec();
            } else if (type.equals("gz")) {
                codec = new GzipCodec();
            }
            if (codec instanceof Configurable) {
                ((Configurable) codec).setConf(new Configuration());
            }

            CompressionInputStream createInputStream = codec.createInputStream(new FileInputStream(file));
            int b;
            StringBuffer sb = new StringBuffer();
            while ((b = createInputStream.read()) != -1) {
                sb.append((char) b);
            }
            createInputStream.close();

            // Assert for the number of fields and keys.
            String[] fields = sb.toString().split("\\t");
            assertEquals(3, fields.length);
            String id = indexFolder.substring(1, 2);
            assertEquals("f" + id, fields[0]);

        }

    }
}