List of usage examples for org.apache.hadoop.io DataInputBuffer reset
public void reset(byte[] input, int length)
From source file:org.commoncrawl.util.CompressedURLFPListV2.java
License:Open Source License
public static void main(String[] args) { // initialize ... final Configuration conf = new Configuration(); conf.addResource("nutch-default.xml"); conf.addResource("nutch-site.xml"); conf.addResource("core-site.xml"); conf.addResource("hdfs-site.xml"); conf.addResource("mapred-site.xml"); BasicConfigurator.configure();/* w w w .j a v a 2 s. c o m*/ CrawlEnvironment.setHadoopConfig(conf); try { FileSystem fs = CrawlEnvironment.getDefaultFileSystem(); Path testFile = new Path("crawl/linkdb/merged1282844121161/linkData/part-00000"); SequenceFile.Reader reader = new SequenceFile.Reader(fs, testFile, conf); URLFPV2 fp = new URLFPV2(); BytesWritable bytes = new BytesWritable(); while (reader.next(fp, bytes)) { if (bytes.getLength() != 0) { DataInputBuffer inputStream = new DataInputBuffer(); inputStream.reset(bytes.get(), bytes.getLength()); CompressedURLFPListV2.Reader listReader = new CompressedURLFPListV2.Reader(inputStream); while (listReader.hasNext()) { URLFPV2 nextFP = listReader.next(); LOG.info("DH:" + nextFP.getDomainHash() + " UH:" + nextFP.getUrlHash()); } } else { LOG.error("ZERO BYTE LIST!"); } } reader.close(); } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); } if (1 == 1) return; validateDuplicateChecking(); // validateReallyBigList(); validateURLFPSerializationRootDomain(); validateURLFPSerializationSingleSubDomain(); validateURLFPSerializationMultiDomain(); validateURLFPFlagSerializationRootDomain(); validateURLFPFlagSerializationMultipleSubDomains(); validateURLFPFlagSerializationOneSubDomain(); }
From source file:org.commoncrawl.util.TimeSeriesDataFile.java
License:Open Source License
private void doCommonRead(ArrayList<KeyValueTuple<Long, ValueType>> valuesOut, RandomAccessFile file, long headerOffset, long endOfPrevRecord, int currentRecordLength, int recordsToRead, long optionalMinKeyValue) throws IOException { Buffer recordBuffer = new Buffer(); DataInputBuffer inputBuffer = new DataInputBuffer(); // ok start walking backwards ... while (recordsToRead != 0) { // setup new previous record pos pointer endOfPrevRecord = endOfPrevRecord - currentRecordLength - 4; // and seek to it endOfLastRecord - 4 file.seek(endOfPrevRecord - 4);/*from w w w. jav a 2s.c o m*/ recordBuffer.setCapacity(currentRecordLength + 8); // read in proper amount of data ... file.read(recordBuffer.get(), 0, currentRecordLength + 8); // ok initialize input buffer ... inputBuffer.reset(recordBuffer.get(), currentRecordLength + 8); // now read next record length first ... int nextRecordLength = inputBuffer.readInt(); // next read sync bytes ... int syncBytes = inputBuffer.readInt(); // validate if (syncBytes != SyncBytes) { throw new IOException("Corrupt Record Detected!"); } // ok read real record bytes ... int realRecordBytes = inputBuffer.readInt(); // read crc ... long crcValue = inputBuffer.readLong(); // ok validate crc ... crc.reset(); crc.update(inputBuffer.getData(), inputBuffer.getPosition(), realRecordBytes - 8); if (crcValue != crc.getValue()) { throw new IOException("CRC Mismatch!"); } // ok now read key and value try { long key = WritableUtils.readVLong(inputBuffer); if (optionalMinKeyValue != -1 && key < optionalMinKeyValue) { break; } ValueType value = (ValueType) valueClass.newInstance(); value.readFields(inputBuffer); KeyValueTuple tuple = new KeyValueTuple<Long, ValueType>(key, value); tuple.recordPos = endOfPrevRecord; valuesOut.add(0, tuple); } catch (Exception e) { throw new IOException(e); } currentRecordLength = nextRecordLength; recordsToRead--; if (endOfPrevRecord == headerOffset) break; } }
From source file:org.mrgeo.data.accumulo.image.AccumuloMrsImageReader.java
License:Apache License
protected MrGeoRaster toNonWritable(byte[] val, CompressionCodec codec, Decompressor decompressor) throws IOException { DataInputBuffer dib = new DataInputBuffer(); dib.reset(val, val.length); RasterWritable rw = new RasterWritable(); rw.readFields(dib);//from w ww .j ava 2 s. com if (codec == null || decompressor == null) { return RasterWritable.toMrGeoRaster(rw); } return RasterWritable.toMrGeoRaster(rw, codec, decompressor); }
From source file:org.mrgeo.data.accumulo.image.AccumuloMrsPyramidInputFormat.java
License:Apache License
public static RecordReader<TileIdWritable, RasterWritable> makeRecordReader() { return new RecordReaderBase<TileIdWritable, RasterWritable>() { @Override/* w ww .j a v a 2 s .c om*/ public void initialize(InputSplit inSplit, TaskAttemptContext attempt) throws IOException { // RangeInputSplit ris = (RangeInputSplit) ((TiledInputSplit)inSplit).getWrappedSplit(); // // log.info("initializing with instance of " + ris.getInstanceName()); // log.info("initializing with auths of " + ris.getAuths().toString()); // // super.initialize(((TiledInputSplit)inSplit).getWrappedSplit(), attempt); log.info("initializing input splits of type " + inSplit.getClass().getCanonicalName()); String[] locs; try { locs = inSplit.getLocations(); for (int x = 0; x < locs.length; x++) { log.info("location " + x + " -> " + locs[x]); } } catch (InterruptedException ie) { log.error("Exception thrown", ie); return; } if (inSplit instanceof TiledInputSplit) { // deal with this org.apache.accumulo.core.client.mapreduce.RangeInputSplit ris = new org.apache.accumulo.core.client.mapreduce.RangeInputSplit(); InputSplit inS = ((TiledInputSplit) inSplit).getWrappedSplit(); log.info("input split class: " + inS.getClass().getCanonicalName()); long startId = ((TiledInputSplit) inSplit).getStartTileId(); long endId = ((TiledInputSplit) inSplit).getEndTileId(); Key startKey = AccumuloUtils.toKey(startId); Key endKey = AccumuloUtils.toKey(endId); int zoomL = ((TiledInputSplit) inSplit).getZoomLevel(); Range r = new Range(startKey, endKey); log.info("Zoom Level = " + zoomL); log.info("Range " + startId + " to " + endId); try { locs = inS.getLocations(); for (int x = 0; x < locs.length; x++) { log.info("split " + x + " -> " + locs[x]); } ris.setRange(r); ris.setLocations(locs); ris.setTableName( ((org.apache.accumulo.core.client.mapreduce.RangeInputSplit) inS).getTableName()); ris.setTableId( ((org.apache.accumulo.core.client.mapreduce.RangeInputSplit) inS).getTableId()); // there can be more added here } catch (InterruptedException ie) { throw new RuntimeErrorException(new Error(ie.getMessage())); } log.info("table " + ris.getTableName() + " is offline: " + ris.isOffline()); super.initialize(ris, attempt); //super.initialize(((TiledInputSplit) inSplit).getWrappedSplit(), attempt); } else { super.initialize(inSplit, attempt); } } // end initialize @Override public void close() { log.info("Record Reader closing!"); } @Override public boolean nextKeyValue() throws IOException, InterruptedException { if (scannerIterator.hasNext()) { ++numKeysRead; Entry<Key, Value> entry = scannerIterator.next(); // transform key and value long id = AccumuloUtils.toLong(entry.getKey().getRow()); currentKey = entry.getKey(); //currentValue = entry.getValue(); log.info("Processing " + id + " -> " + entry.getValue().getSize()); currentK = new TileIdWritable(id); DataInputBuffer dib = new DataInputBuffer(); byte[] data = entry.getValue().get(); dib.reset(data, data.length); currentV = new RasterWritable(); currentV.readFields(dib); //log.info("current key = " + id); // if (log.isTraceEnabled()) // log.trace("Processing key/value pair: " + DefaultFormatter.formatEntry(entry, true)); return true; } return false; } }; //end RecordReaderBase }
From source file:org.mrgeo.data.accumulo.utils.AccumuloUtils.java
License:Apache License
/** * Get a single raster// w w w .j av a 2 s .c o m * * @param table this is the table containing the raster * @param tid the tile id to get * @param zl the zoom level of the raster * @param conn the Accumulo connector to use * @param auths the authorizations to use for access * @return */ @Deprecated @SuppressWarnings("squid:S1166") // TableNotFoundException are caught and ignored. This is OK public static RasterWritable getRaster(String table, long tid, int zl, Connector conn, String auths) { RasterWritable retRaster = null; Authorizations authorizations = createAuthorizationsFromDelimitedString(auths); Scanner scanner = null; try { scanner = conn.createScanner(table, authorizations); Range r = new Range(toRowId(tid), toRowId(tid + 1)); scanner.setRange(r); scanner.fetchColumnFamily(new Text(Integer.toString(zl).getBytes())); for (Entry<Key, Value> entry : scanner) { System.out.println("Key: " + entry.getKey().toString()); DataInputBuffer dib = new DataInputBuffer(); byte[] data = entry.getValue().get(); dib.reset(data, data.length); retRaster = new RasterWritable(); retRaster.readFields(dib); break; } } catch (TableNotFoundException ignored) { } catch (IOException e) { log.error("Exception thrown", e); } return retRaster; }
From source file:org.uv.himongo.io.BSONWritable.java
License:Apache License
/** * Used by child copy constructors.//from ww w . j a va 2 s. co m * * @param other */ protected synchronized void copy(Writable other) { if (other != null) { try { DataOutputBuffer out = new DataOutputBuffer(); other.write(out); DataInputBuffer in = new DataInputBuffer(); in.reset(out.getData(), out.getLength()); readFields(in); } catch (IOException e) { throw new IllegalArgumentException("map cannot be copied: " + e.getMessage()); } } else { throw new IllegalArgumentException("source map cannot be null"); } }