List of usage examples for org.apache.hadoop.io DataInputBuffer DataInputBuffer
public DataInputBuffer()
From source file:org.commoncrawl.util.TimeSeriesDataFile.java
License:Open Source License
private void doCommonRead(ArrayList<KeyValueTuple<Long, ValueType>> valuesOut, RandomAccessFile file, long headerOffset, long endOfPrevRecord, int currentRecordLength, int recordsToRead, long optionalMinKeyValue) throws IOException { Buffer recordBuffer = new Buffer(); DataInputBuffer inputBuffer = new DataInputBuffer(); // ok start walking backwards ... while (recordsToRead != 0) { // setup new previous record pos pointer endOfPrevRecord = endOfPrevRecord - currentRecordLength - 4; // and seek to it endOfLastRecord - 4 file.seek(endOfPrevRecord - 4);/*from ww w.jav a2 s . com*/ recordBuffer.setCapacity(currentRecordLength + 8); // read in proper amount of data ... file.read(recordBuffer.get(), 0, currentRecordLength + 8); // ok initialize input buffer ... inputBuffer.reset(recordBuffer.get(), currentRecordLength + 8); // now read next record length first ... int nextRecordLength = inputBuffer.readInt(); // next read sync bytes ... int syncBytes = inputBuffer.readInt(); // validate if (syncBytes != SyncBytes) { throw new IOException("Corrupt Record Detected!"); } // ok read real record bytes ... int realRecordBytes = inputBuffer.readInt(); // read crc ... long crcValue = inputBuffer.readLong(); // ok validate crc ... crc.reset(); crc.update(inputBuffer.getData(), inputBuffer.getPosition(), realRecordBytes - 8); if (crcValue != crc.getValue()) { throw new IOException("CRC Mismatch!"); } // ok now read key and value try { long key = WritableUtils.readVLong(inputBuffer); if (optionalMinKeyValue != -1 && key < optionalMinKeyValue) { break; } ValueType value = (ValueType) valueClass.newInstance(); value.readFields(inputBuffer); KeyValueTuple tuple = new KeyValueTuple<Long, ValueType>(key, value); tuple.recordPos = endOfPrevRecord; valuesOut.add(0, tuple); } catch (Exception e) { throw new IOException(e); } currentRecordLength = nextRecordLength; recordsToRead--; if (endOfPrevRecord == headerOffset) break; } }
From source file:org.commoncrawl.util.TimeSeriesDataFile.java
License:Open Source License
/** * get the key value of the last record in the file * @return record key as a long or -1 if zero records in file * @throws IOException/*from w w w . jav a2 s . co m*/ */ public synchronized long getLastRecordKey() throws IOException { LogFileHeader header = new LogFileHeader(); if (fileName.exists()) { RandomAccessFile file = new RandomAccessFile(fileName, "r"); Buffer recordBuffer = new Buffer(); DataInputBuffer inputBuffer = new DataInputBuffer(); try { //read header ... long headerOffset = readLogFileHeader(file, header); return header._lastRecordKey; } finally { if (file != null) { file.close(); } } } return -1; }
From source file:org.commoncrawl.util.TimeSeriesDataFile.java
License:Open Source License
/** * get the number of records in the file * /*from w w w . ja va 2 s. com*/ * @return record count in file * @throws IOException */ public synchronized int getRecordCount() throws IOException { LogFileHeader header = new LogFileHeader(); if (fileName.exists()) { RandomAccessFile file = new RandomAccessFile(fileName, "r"); Buffer recordBuffer = new Buffer(); DataInputBuffer inputBuffer = new DataInputBuffer(); try { //read header ... long headerOffset = readLogFileHeader(file, header); return header._itemCount; } finally { if (file != null) { file.close(); } } } return 0; }
From source file:org.commoncrawl.util.Tuples.java
License:Open Source License
static void validateTextTuple() { // validate tuple code IntAndTwoTextByteTuples tuple1 = new IntAndTwoTextByteTuples(); IntAndTwoTextByteTuples tuple2 = new IntAndTwoTextByteTuples(); tuple1.setIntValue(1);//www. j ava 2s . c o m tuple2.setIntValue(1); tuple1.setTextValueBytes(new TextBytes("AAAAA")); tuple2.setTextValueBytes(new TextBytes("AAAAA")); tuple1.setSecondTextValueBytes(new TextBytes("AAAAA")); tuple2.setSecondTextValueBytes(new TextBytes("AAAAB")); // compare the two Assert.assertTrue(tuple1.compareTo(tuple2) == -1); tuple1.setTextValueBytes(new TextBytes("BAAAA")); Assert.assertTrue(tuple1.compareTo(tuple2) == 1); tuple2.setIntValue(2); Assert.assertTrue(tuple1.compareTo(tuple2) == -1); // ok restore ... tuple1.setTextValueBytes(new TextBytes("AAAAA")); tuple2.setTextValueBytes(new TextBytes("AAAAA")); tuple1.setSecondTextValueBytes(new TextBytes("AAAAA")); tuple2.setSecondTextValueBytes(new TextBytes("AAAAB")); DataOutputBuffer outputBuffer = new DataOutputBuffer(); try { tuple1.write(outputBuffer); tuple2.write(outputBuffer); IntAndTwoTextByteTuples tuple3 = new IntAndTwoTextByteTuples(); IntAndTwoTextByteTuples tuple4 = new IntAndTwoTextByteTuples(); DataInputBuffer inputBuffer = new DataInputBuffer(); inputBuffer.reset(outputBuffer.getData(), 0, outputBuffer.getLength()); tuple3.readFields(inputBuffer); tuple4.readFields(inputBuffer); Assert.assertTrue(tuple3.compareTo(tuple1) == 0); Assert.assertTrue(tuple4.compareTo(tuple2) == 0); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:org.commoncrawl.util.Tuples.java
License:Open Source License
static void validateBufferTuple() { // run some tests on the new code String aTestString = new String("A Test Strnig"); // convert it to bytes byte bytes[] = aTestString.getBytes(); // over allocate an array byte overAllocated[] = new byte[bytes.length * 2]; // copy source System.arraycopy(bytes, 0, overAllocated, bytes.length, bytes.length); IntBufferTuple tuple1 = new IntBufferTuple(); IntBufferTuple tuple2 = new IntBufferTuple(); tuple1.setIntValue(1);/*from w w w .j a v a 2s. com*/ tuple2.setIntValue(1); tuple1.getBuffer().set(overAllocated, bytes.length, bytes.length); tuple2.getBuffer().set(overAllocated, bytes.length, bytes.length); Assert.assertTrue(tuple1.compareTo(tuple2) == 0); DataOutputBuffer outputBuffer = new DataOutputBuffer(); try { tuple1.write(outputBuffer); tuple2.write(outputBuffer); DataInputBuffer inputBuffer = new DataInputBuffer(); inputBuffer.reset(outputBuffer.getData(), 0, outputBuffer.getLength()); tuple1.readFields(inputBuffer); tuple2.readFields(inputBuffer); Assert.assertTrue(tuple1.compareTo(tuple2) == 0); DataOutputBuffer outputBuffer2 = new DataOutputBuffer(); tuple1.write(outputBuffer2); tuple2.write(outputBuffer2); Assert.assertTrue(WritableComparator.compareBytes(outputBuffer.getData(), 0, outputBuffer.getLength(), outputBuffer2.getData(), 0, outputBuffer2.getLength()) == 0); } catch (IOException e) { e.printStackTrace(); } }
From source file:org.goldenorb.io.input.VertexInput.java
License:Apache License
/** * *//* w w w . j a va 2s . co m*/ @SuppressWarnings("unchecked") public void initialize() { // rebuild the input split org.apache.hadoop.mapreduce.InputSplit split = null; DataInputBuffer splitBuffer = new DataInputBuffer(); splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength()); SerializationFactory factory = new SerializationFactory(orbConf); Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer; try { deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory .getDeserializer(orbConf.getClassByName(splitClass)); deserializer.open(splitBuffer); split = deserializer.deserialize(null); JobConf job = new JobConf(orbConf); JobContext jobContext = new JobContext(job, new JobID(getOrbConf().getJobNumber(), 0)); InputFormat<INPUT_KEY, INPUT_VALUE> inputFormat; inputFormat = (InputFormat<INPUT_KEY, INPUT_VALUE>) ReflectionUtils .newInstance(jobContext.getInputFormatClass(), orbConf); TaskAttemptContext tao = new TaskAttemptContext(job, new TaskAttemptID(new TaskID(jobContext.getJobID(), true, partitionID), 0)); recordReader = inputFormat.createRecordReader(split, tao); recordReader.initialize(split, tao); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } catch (IOException e) { throw new RuntimeException(e); } catch (InterruptedException e) { throw new RuntimeException(e); } }
From source file:org.mrgeo.data.accumulo.image.AccumuloMrsImageReader.java
License:Apache License
protected MrGeoRaster toNonWritable(byte[] val, CompressionCodec codec, Decompressor decompressor) throws IOException { DataInputBuffer dib = new DataInputBuffer(); dib.reset(val, val.length); RasterWritable rw = new RasterWritable(); rw.readFields(dib);//from w w w . j av a2 s .c o m if (codec == null || decompressor == null) { return RasterWritable.toMrGeoRaster(rw); } return RasterWritable.toMrGeoRaster(rw, codec, decompressor); }
From source file:org.mrgeo.data.accumulo.image.AccumuloMrsPyramidInputFormat.java
License:Apache License
public static RecordReader<TileIdWritable, RasterWritable> makeRecordReader() { return new RecordReaderBase<TileIdWritable, RasterWritable>() { @Override/* w ww. j a v a 2 s . c om*/ public void initialize(InputSplit inSplit, TaskAttemptContext attempt) throws IOException { // RangeInputSplit ris = (RangeInputSplit) ((TiledInputSplit)inSplit).getWrappedSplit(); // // log.info("initializing with instance of " + ris.getInstanceName()); // log.info("initializing with auths of " + ris.getAuths().toString()); // // super.initialize(((TiledInputSplit)inSplit).getWrappedSplit(), attempt); log.info("initializing input splits of type " + inSplit.getClass().getCanonicalName()); String[] locs; try { locs = inSplit.getLocations(); for (int x = 0; x < locs.length; x++) { log.info("location " + x + " -> " + locs[x]); } } catch (InterruptedException ie) { log.error("Exception thrown", ie); return; } if (inSplit instanceof TiledInputSplit) { // deal with this org.apache.accumulo.core.client.mapreduce.RangeInputSplit ris = new org.apache.accumulo.core.client.mapreduce.RangeInputSplit(); InputSplit inS = ((TiledInputSplit) inSplit).getWrappedSplit(); log.info("input split class: " + inS.getClass().getCanonicalName()); long startId = ((TiledInputSplit) inSplit).getStartTileId(); long endId = ((TiledInputSplit) inSplit).getEndTileId(); Key startKey = AccumuloUtils.toKey(startId); Key endKey = AccumuloUtils.toKey(endId); int zoomL = ((TiledInputSplit) inSplit).getZoomLevel(); Range r = new Range(startKey, endKey); log.info("Zoom Level = " + zoomL); log.info("Range " + startId + " to " + endId); try { locs = inS.getLocations(); for (int x = 0; x < locs.length; x++) { log.info("split " + x + " -> " + locs[x]); } ris.setRange(r); ris.setLocations(locs); ris.setTableName( ((org.apache.accumulo.core.client.mapreduce.RangeInputSplit) inS).getTableName()); ris.setTableId( ((org.apache.accumulo.core.client.mapreduce.RangeInputSplit) inS).getTableId()); // there can be more added here } catch (InterruptedException ie) { throw new RuntimeErrorException(new Error(ie.getMessage())); } log.info("table " + ris.getTableName() + " is offline: " + ris.isOffline()); super.initialize(ris, attempt); //super.initialize(((TiledInputSplit) inSplit).getWrappedSplit(), attempt); } else { super.initialize(inSplit, attempt); } } // end initialize @Override public void close() { log.info("Record Reader closing!"); } @Override public boolean nextKeyValue() throws IOException, InterruptedException { if (scannerIterator.hasNext()) { ++numKeysRead; Entry<Key, Value> entry = scannerIterator.next(); // transform key and value long id = AccumuloUtils.toLong(entry.getKey().getRow()); currentKey = entry.getKey(); //currentValue = entry.getValue(); log.info("Processing " + id + " -> " + entry.getValue().getSize()); currentK = new TileIdWritable(id); DataInputBuffer dib = new DataInputBuffer(); byte[] data = entry.getValue().get(); dib.reset(data, data.length); currentV = new RasterWritable(); currentV.readFields(dib); //log.info("current key = " + id); // if (log.isTraceEnabled()) // log.trace("Processing key/value pair: " + DefaultFormatter.formatEntry(entry, true)); return true; } return false; } }; //end RecordReaderBase }
From source file:org.mrgeo.data.accumulo.utils.AccumuloUtils.java
License:Apache License
/** * Get a single raster//from w ww . j av a 2 s . c om * * @param table this is the table containing the raster * @param tid the tile id to get * @param zl the zoom level of the raster * @param conn the Accumulo connector to use * @param auths the authorizations to use for access * @return */ @Deprecated @SuppressWarnings("squid:S1166") // TableNotFoundException are caught and ignored. This is OK public static RasterWritable getRaster(String table, long tid, int zl, Connector conn, String auths) { RasterWritable retRaster = null; Authorizations authorizations = createAuthorizationsFromDelimitedString(auths); Scanner scanner = null; try { scanner = conn.createScanner(table, authorizations); Range r = new Range(toRowId(tid), toRowId(tid + 1)); scanner.setRange(r); scanner.fetchColumnFamily(new Text(Integer.toString(zl).getBytes())); for (Entry<Key, Value> entry : scanner) { System.out.println("Key: " + entry.getKey().toString()); DataInputBuffer dib = new DataInputBuffer(); byte[] data = entry.getValue().get(); dib.reset(data, data.length); retRaster = new RasterWritable(); retRaster.readFields(dib); break; } } catch (TableNotFoundException ignored) { } catch (IOException e) { log.error("Exception thrown", e); } return retRaster; }
From source file:org.springframework.data.hadoop.fs.TextRecordInputStream.java
License:Apache License
public TextRecordInputStream(Path p, FileSystem fs, Configuration configuration) throws IOException { r = new SequenceFile.Reader(fs, p, configuration); key = ReflectionUtils.newInstance(r.getKeyClass().asSubclass(WritableComparable.class), configuration); val = ReflectionUtils.newInstance(r.getValueClass().asSubclass(Writable.class), configuration); inbuf = new DataInputBuffer(); outbuf = new DataOutputBuffer(); }