List of usage examples for org.apache.hadoop.io DataOutputBuffer DataOutputBuffer
private DataOutputBuffer(Buffer buffer)
From source file:org.commoncrawl.util.GZIPUtils.java
License:Apache License
/** * Returns an gunzipped copy of the input array, truncated to * <code>sizeLimit</code> bytes, if necessary. If the gzipped input has been * truncated or corrupted, a best-effort attempt is made to unzip as much as * possible. If no data can be extracted <code>null</code> is returned. *///from w w w . j ava 2s. c o m public static final UnzipResult unzipBestEffort(byte[] in, int offset, int sizeIn, int sizeLimit) { try { // decompress using GZIPInputStream DataOutputBuffer outStream = new DataOutputBuffer(EXPECTED_COMPRESSION_RATIO * in.length); boolean truncated = false; GZIPInputStream inStream = new GZIPInputStream(new ByteArrayInputStream(in, offset, sizeIn)); byte[] buf = new byte[BUF_SIZE]; int written = 0; while (true) { try { int size = inStream.read(buf); if (size <= 0) break; if ((written + size) > sizeLimit) { outStream.write(buf, 0, sizeLimit - written); truncated = true; break; } outStream.write(buf, 0, size); written += size; } catch (Exception e) { break; } } try { outStream.close(); } catch (IOException e) { } return new UnzipResult(outStream.getData(), 0, outStream.getLength(), truncated); } catch (IOException e) { return null; } catch (OutOfMemoryError e) { LOG.fatal(CCStringUtils.stringifyException(e)); return null; } }
From source file:org.seqdoop.hadoop_bam.TestVCFOutputFormat.java
License:Open Source License
@Test public void testVariantContextReadWrite() throws IOException, InterruptedException { // This is to check whether issue https://github.com/HadoopGenomics/Hadoop-BAM/issues/1 has been // resolved//from w w w . j av a 2 s .c om VariantContextBuilder vctx_builder = new VariantContextBuilder(); ArrayList<Allele> alleles = new ArrayList<Allele>(); alleles.add(Allele.create("C", false)); alleles.add(Allele.create("G", true)); vctx_builder.alleles(alleles); ArrayList<Genotype> genotypes = new ArrayList<Genotype>(); GenotypeBuilder builder = new GenotypeBuilder(); genotypes.add(builder.alleles(alleles.subList(0, 1)).name("NA00001").GQ(48).DP(1).make()); genotypes.add(builder.alleles(alleles.subList(0, 1)).name("NA00002").GQ(42).DP(2).make()); genotypes.add(builder.alleles(alleles.subList(0, 1)).name("NA00003").GQ(39).DP(3).make()); vctx_builder.genotypes(genotypes); HashSet<String> filters = new HashSet<String>(); vctx_builder.filters(filters); HashMap<String, Object> attributes = new HashMap<String, Object>(); attributes.put("NS", new Integer(4)); vctx_builder.attributes(attributes); vctx_builder.loc("20", 2, 2); vctx_builder.log10PError(-8.0); VariantContext ctx = vctx_builder.make(); VariantContextWithHeader ctxh = new VariantContextWithHeader(ctx, readHeader()); writable.set(ctxh); DataOutputBuffer out = new DataOutputBuffer(1000); writable.write(out); byte[] data = out.getData(); ByteArrayInputStream bis = new ByteArrayInputStream(data); writable = new VariantContextWritable(); writable.readFields(new DataInputStream(bis)); VariantContext vc = writable.get(); Assert.assertArrayEquals("comparing Alleles", ctx.getAlleles().toArray(), vc.getAlleles().toArray()); Assert.assertEquals("comparing Log10PError", ctx.getLog10PError(), vc.getLog10PError(), 0.01); Assert.assertArrayEquals("comparing Filters", ctx.getFilters().toArray(), vc.getFilters().toArray()); Assert.assertEquals("comparing Attributes", ctx.getAttributes(), vc.getAttributes()); // Now check the genotypes. Note: we need to make the header accessible before decoding the genotypes. GenotypesContext gc = vc.getGenotypes(); assert (gc instanceof LazyVCFGenotypesContext); LazyVCFGenotypesContext.HeaderDataCache headerDataCache = new LazyVCFGenotypesContext.HeaderDataCache(); headerDataCache.setHeader(readHeader()); ((LazyVCFGenotypesContext) gc).getParser().setHeaderDataCache(headerDataCache); for (Genotype genotype : genotypes) { Assert.assertEquals("checking genotype name", genotype.getSampleName(), gc.get(genotypes.indexOf(genotype)).getSampleName()); Assert.assertEquals("checking genotype quality", genotype.getGQ(), gc.get(genotypes.indexOf(genotype)).getGQ()); Assert.assertEquals("checking genotype read depth", genotype.getDP(), gc.get(genotypes.indexOf(genotype)).getDP()); } }