Example usage for org.apache.hadoop.io DataOutputBuffer DataOutputBuffer

List of usage examples for org.apache.hadoop.io DataOutputBuffer DataOutputBuffer

Introduction

In this page you can find the example usage for org.apache.hadoop.io DataOutputBuffer DataOutputBuffer.

Prototype

private DataOutputBuffer(Buffer buffer) 

Source Link

Usage

From source file:org.commoncrawl.util.GZIPUtils.java

License:Apache License

/**
 * Returns an gunzipped copy of the input array, truncated to
 * <code>sizeLimit</code> bytes, if necessary. If the gzipped input has been
 * truncated or corrupted, a best-effort attempt is made to unzip as much as
 * possible. If no data can be extracted <code>null</code> is returned.
 *///from  w  w  w  .  j  ava 2s.  c o m
public static final UnzipResult unzipBestEffort(byte[] in, int offset, int sizeIn, int sizeLimit) {

    try {
        // decompress using GZIPInputStream
        DataOutputBuffer outStream = new DataOutputBuffer(EXPECTED_COMPRESSION_RATIO * in.length);

        boolean truncated = false;

        GZIPInputStream inStream = new GZIPInputStream(new ByteArrayInputStream(in, offset, sizeIn));

        byte[] buf = new byte[BUF_SIZE];
        int written = 0;
        while (true) {
            try {
                int size = inStream.read(buf);
                if (size <= 0)
                    break;
                if ((written + size) > sizeLimit) {
                    outStream.write(buf, 0, sizeLimit - written);
                    truncated = true;
                    break;
                }
                outStream.write(buf, 0, size);
                written += size;
            } catch (Exception e) {
                break;
            }
        }
        try {
            outStream.close();
        } catch (IOException e) {
        }

        return new UnzipResult(outStream.getData(), 0, outStream.getLength(), truncated);

    } catch (IOException e) {
        return null;
    } catch (OutOfMemoryError e) {
        LOG.fatal(CCStringUtils.stringifyException(e));
        return null;
    }
}

From source file:org.seqdoop.hadoop_bam.TestVCFOutputFormat.java

License:Open Source License

@Test
public void testVariantContextReadWrite() throws IOException, InterruptedException {
    // This is to check whether issue https://github.com/HadoopGenomics/Hadoop-BAM/issues/1 has been
    // resolved//from  w  w w  .  j  av a  2 s  .c om
    VariantContextBuilder vctx_builder = new VariantContextBuilder();

    ArrayList<Allele> alleles = new ArrayList<Allele>();
    alleles.add(Allele.create("C", false));
    alleles.add(Allele.create("G", true));
    vctx_builder.alleles(alleles);

    ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
    GenotypeBuilder builder = new GenotypeBuilder();
    genotypes.add(builder.alleles(alleles.subList(0, 1)).name("NA00001").GQ(48).DP(1).make());
    genotypes.add(builder.alleles(alleles.subList(0, 1)).name("NA00002").GQ(42).DP(2).make());
    genotypes.add(builder.alleles(alleles.subList(0, 1)).name("NA00003").GQ(39).DP(3).make());
    vctx_builder.genotypes(genotypes);

    HashSet<String> filters = new HashSet<String>();
    vctx_builder.filters(filters);

    HashMap<String, Object> attributes = new HashMap<String, Object>();
    attributes.put("NS", new Integer(4));
    vctx_builder.attributes(attributes);

    vctx_builder.loc("20", 2, 2);
    vctx_builder.log10PError(-8.0);

    VariantContext ctx = vctx_builder.make();
    VariantContextWithHeader ctxh = new VariantContextWithHeader(ctx, readHeader());
    writable.set(ctxh);

    DataOutputBuffer out = new DataOutputBuffer(1000);
    writable.write(out);

    byte[] data = out.getData();
    ByteArrayInputStream bis = new ByteArrayInputStream(data);

    writable = new VariantContextWritable();
    writable.readFields(new DataInputStream(bis));

    VariantContext vc = writable.get();
    Assert.assertArrayEquals("comparing Alleles", ctx.getAlleles().toArray(), vc.getAlleles().toArray());
    Assert.assertEquals("comparing Log10PError", ctx.getLog10PError(), vc.getLog10PError(), 0.01);
    Assert.assertArrayEquals("comparing Filters", ctx.getFilters().toArray(), vc.getFilters().toArray());
    Assert.assertEquals("comparing Attributes", ctx.getAttributes(), vc.getAttributes());

    // Now check the genotypes. Note: we need to make the header accessible before decoding the genotypes.
    GenotypesContext gc = vc.getGenotypes();
    assert (gc instanceof LazyVCFGenotypesContext);
    LazyVCFGenotypesContext.HeaderDataCache headerDataCache = new LazyVCFGenotypesContext.HeaderDataCache();
    headerDataCache.setHeader(readHeader());
    ((LazyVCFGenotypesContext) gc).getParser().setHeaderDataCache(headerDataCache);

    for (Genotype genotype : genotypes) {
        Assert.assertEquals("checking genotype name", genotype.getSampleName(),
                gc.get(genotypes.indexOf(genotype)).getSampleName());
        Assert.assertEquals("checking genotype quality", genotype.getGQ(),
                gc.get(genotypes.indexOf(genotype)).getGQ());
        Assert.assertEquals("checking genotype read depth", genotype.getDP(),
                gc.get(genotypes.indexOf(genotype)).getDP());
    }
}