Example usage for org.apache.hadoop.io DataOutputBuffer DataOutputBuffer

Introduction

In this page you can find the example usage for org.apache.hadoop.io DataOutputBuffer DataOutputBuffer.

Prototype

private DataOutputBuffer(Buffer buffer)

Source Link

Usage

From source file:org.commoncrawl.util.GZIPUtils.java

License:Apache License

/**
 * Returns an gunzipped copy of the input array, truncated to
 * <code>sizeLimit</code> bytes, if necessary. If the gzipped input has been
 * truncated or corrupted, a best-effort attempt is made to unzip as much as
 * possible. If no data can be extracted <code>null</code> is returned.
 *///from  w  w  w  .  j  ava 2s.  c o m
public static final UnzipResult unzipBestEffort(byte[] in, int offset, int sizeIn, int sizeLimit) {

    try {
        // decompress using GZIPInputStream
        DataOutputBuffer outStream = new DataOutputBuffer(EXPECTED_COMPRESSION_RATIO * in.length);

        boolean truncated = false;

        GZIPInputStream inStream = new GZIPInputStream(new ByteArrayInputStream(in, offset, sizeIn));

        byte[] buf = new byte[BUF_SIZE];
        int written = 0;
        while (true) {
            try {
                int size = inStream.read(buf);
                if (size <= 0)
                    break;
                if ((written + size) > sizeLimit) {
                    outStream.write(buf, 0, sizeLimit - written);
                    truncated = true;
                    break;
                }
                outStream.write(buf, 0, size);
                written += size;
            } catch (Exception e) {
                break;
            }
        }
        try {
            outStream.close();
        } catch (IOException e) {
        }

        return new UnzipResult(outStream.getData(), 0, outStream.getLength(), truncated);

    } catch (IOException e) {
        return null;
    } catch (OutOfMemoryError e) {
        LOG.fatal(CCStringUtils.stringifyException(e));
        return null;
    }
}

From source file:org.seqdoop.hadoop_bam.TestVCFOutputFormat.java

License:Open Source License

@Test
public void testVariantContextReadWrite() throws IOException, InterruptedException {
    // This is to check whether issue https://github.com/HadoopGenomics/Hadoop-BAM/issues/1 has been
    // resolved//from  w  w w  .  j  av a  2 s  .c om
    VariantContextBuilder vctx_builder = new VariantContextBuilder();

    ArrayList<Allele> alleles = new ArrayList<Allele>();
    alleles.add(Allele.create("C", false));
    alleles.add(Allele.create("G", true));
    vctx_builder.alleles(alleles);

    ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
    GenotypeBuilder builder = new GenotypeBuilder();
    genotypes.add(builder.alleles(alleles.subList(0, 1)).name("NA00001").GQ(48).DP(1).make());
    genotypes.add(builder.alleles(alleles.subList(0, 1)).name("NA00002").GQ(42).DP(2).make());
    genotypes.add(builder.alleles(alleles.subList(0, 1)).name("NA00003").GQ(39).DP(3).make());
    vctx_builder.genotypes(genotypes);

    HashSet<String> filters = new HashSet<String>();
    vctx_builder.filters(filters);

    HashMap<String, Object> attributes = new HashMap<String, Object>();
    attributes.put("NS", new Integer(4));
    vctx_builder.attributes(attributes);

    vctx_builder.loc("20", 2, 2);
    vctx_builder.log10PError(-8.0);

    VariantContext ctx = vctx_builder.make();
    VariantContextWithHeader ctxh = new VariantContextWithHeader(ctx, readHeader());
    writable.set(ctxh);

    DataOutputBuffer out = new DataOutputBuffer(1000);
    writable.write(out);

    byte[] data = out.getData();
    ByteArrayInputStream bis = new ByteArrayInputStream(data);

    writable = new VariantContextWritable();
    writable.readFields(new DataInputStream(bis));

    VariantContext vc = writable.get();
    Assert.assertArrayEquals("comparing Alleles", ctx.getAlleles().toArray(), vc.getAlleles().toArray());
    Assert.assertEquals("comparing Log10PError", ctx.getLog10PError(), vc.getLog10PError(), 0.01);
    Assert.assertArrayEquals("comparing Filters", ctx.getFilters().toArray(), vc.getFilters().toArray());
    Assert.assertEquals("comparing Attributes", ctx.getAttributes(), vc.getAttributes());

    // Now check the genotypes. Note: we need to make the header accessible before decoding the genotypes.
    GenotypesContext gc = vc.getGenotypes();
    assert (gc instanceof LazyVCFGenotypesContext);
    LazyVCFGenotypesContext.HeaderDataCache headerDataCache = new LazyVCFGenotypesContext.HeaderDataCache();
    headerDataCache.setHeader(readHeader());
    ((LazyVCFGenotypesContext) gc).getParser().setHeaderDataCache(headerDataCache);

    for (Genotype genotype : genotypes) {
        Assert.assertEquals("checking genotype name", genotype.getSampleName(),
                gc.get(genotypes.indexOf(genotype)).getSampleName());
        Assert.assertEquals("checking genotype quality", genotype.getGQ(),
                gc.get(genotypes.indexOf(genotype)).getGQ());
        Assert.assertEquals("checking genotype read depth", genotype.getDP(),
                gc.get(genotypes.indexOf(genotype)).getDP());
    }
}