Example usage for org.apache.hadoop.io DataInputBuffer DataInputBuffer

Introduction

In this page you can find the example usage for org.apache.hadoop.io DataInputBuffer DataInputBuffer.

Prototype

public DataInputBuffer()

Source Link

Document

Constructs a new empty buffer.

Usage

From source file:org.commoncrawl.util.TimeSeriesDataFile.java

License:Open Source License

private void doCommonRead(ArrayList<KeyValueTuple<Long, ValueType>> valuesOut, RandomAccessFile file,
        long headerOffset, long endOfPrevRecord, int currentRecordLength, int recordsToRead,
        long optionalMinKeyValue) throws IOException {

    Buffer recordBuffer = new Buffer();
    DataInputBuffer inputBuffer = new DataInputBuffer();

    // ok start walking backwards ... 
    while (recordsToRead != 0) {
        // setup new previous record pos pointer  
        endOfPrevRecord = endOfPrevRecord - currentRecordLength - 4;
        // and seek to it endOfLastRecord - 4
        file.seek(endOfPrevRecord - 4);/*from ww  w.jav a2  s  .  com*/

        recordBuffer.setCapacity(currentRecordLength + 8);
        // read in proper amount of data ...
        file.read(recordBuffer.get(), 0, currentRecordLength + 8);
        // ok initialize input buffer ... 
        inputBuffer.reset(recordBuffer.get(), currentRecordLength + 8);
        // now read next record length first ... 
        int nextRecordLength = inputBuffer.readInt();
        // next read sync bytes ... 
        int syncBytes = inputBuffer.readInt();
        // validate 
        if (syncBytes != SyncBytes) {
            throw new IOException("Corrupt Record Detected!");
        }
        // ok read real record bytes ... 
        int realRecordBytes = inputBuffer.readInt();
        // read crc ... 
        long crcValue = inputBuffer.readLong();
        // ok validate crc ...  
        crc.reset();
        crc.update(inputBuffer.getData(), inputBuffer.getPosition(), realRecordBytes - 8);
        if (crcValue != crc.getValue()) {
            throw new IOException("CRC Mismatch!");
        }
        // ok now read key and value 
        try {
            long key = WritableUtils.readVLong(inputBuffer);

            if (optionalMinKeyValue != -1 && key < optionalMinKeyValue) {
                break;
            }

            ValueType value = (ValueType) valueClass.newInstance();
            value.readFields(inputBuffer);
            KeyValueTuple tuple = new KeyValueTuple<Long, ValueType>(key, value);
            tuple.recordPos = endOfPrevRecord;
            valuesOut.add(0, tuple);

        } catch (Exception e) {
            throw new IOException(e);
        }

        currentRecordLength = nextRecordLength;

        recordsToRead--;

        if (endOfPrevRecord == headerOffset)
            break;
    }
}

From source file:org.commoncrawl.util.TimeSeriesDataFile.java

License:Open Source License

/**
 * get the key value of the last record in the file 
 * @return record key as a long or -1 if zero records in file 
 * @throws IOException/*from   w w  w  . jav a2  s  . co m*/
 */
public synchronized long getLastRecordKey() throws IOException {
    LogFileHeader header = new LogFileHeader();

    if (fileName.exists()) {

        RandomAccessFile file = new RandomAccessFile(fileName, "r");

        Buffer recordBuffer = new Buffer();
        DataInputBuffer inputBuffer = new DataInputBuffer();
        try {

            //read header ... 
            long headerOffset = readLogFileHeader(file, header);

            return header._lastRecordKey;
        } finally {
            if (file != null) {
                file.close();
            }
        }
    }
    return -1;
}

From source file:org.commoncrawl.util.TimeSeriesDataFile.java

License:Open Source License

/**
 * get the number of records in the file 
 * /*from  w w w  . ja va  2 s. com*/
 * @return record count in file 
 * @throws IOException
 */
public synchronized int getRecordCount() throws IOException {

    LogFileHeader header = new LogFileHeader();

    if (fileName.exists()) {

        RandomAccessFile file = new RandomAccessFile(fileName, "r");

        Buffer recordBuffer = new Buffer();
        DataInputBuffer inputBuffer = new DataInputBuffer();
        try {

            //read header ... 
            long headerOffset = readLogFileHeader(file, header);

            return header._itemCount;
        } finally {
            if (file != null) {
                file.close();
            }
        }
    }
    return 0;
}

From source file:org.commoncrawl.util.Tuples.java

License:Open Source License

static void validateTextTuple() {
    // validate tuple code 
    IntAndTwoTextByteTuples tuple1 = new IntAndTwoTextByteTuples();
    IntAndTwoTextByteTuples tuple2 = new IntAndTwoTextByteTuples();

    tuple1.setIntValue(1);//www. j  ava 2s  . c o  m
    tuple2.setIntValue(1);
    tuple1.setTextValueBytes(new TextBytes("AAAAA"));
    tuple2.setTextValueBytes(new TextBytes("AAAAA"));
    tuple1.setSecondTextValueBytes(new TextBytes("AAAAA"));
    tuple2.setSecondTextValueBytes(new TextBytes("AAAAB"));

    // compare the two 
    Assert.assertTrue(tuple1.compareTo(tuple2) == -1);

    tuple1.setTextValueBytes(new TextBytes("BAAAA"));
    Assert.assertTrue(tuple1.compareTo(tuple2) == 1);
    tuple2.setIntValue(2);
    Assert.assertTrue(tuple1.compareTo(tuple2) == -1);
    // ok restore ... 
    tuple1.setTextValueBytes(new TextBytes("AAAAA"));
    tuple2.setTextValueBytes(new TextBytes("AAAAA"));
    tuple1.setSecondTextValueBytes(new TextBytes("AAAAA"));
    tuple2.setSecondTextValueBytes(new TextBytes("AAAAB"));

    DataOutputBuffer outputBuffer = new DataOutputBuffer();

    try {

        tuple1.write(outputBuffer);
        tuple2.write(outputBuffer);

        IntAndTwoTextByteTuples tuple3 = new IntAndTwoTextByteTuples();
        IntAndTwoTextByteTuples tuple4 = new IntAndTwoTextByteTuples();

        DataInputBuffer inputBuffer = new DataInputBuffer();
        inputBuffer.reset(outputBuffer.getData(), 0, outputBuffer.getLength());

        tuple3.readFields(inputBuffer);
        tuple4.readFields(inputBuffer);

        Assert.assertTrue(tuple3.compareTo(tuple1) == 0);
        Assert.assertTrue(tuple4.compareTo(tuple2) == 0);

    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

}

From source file:org.commoncrawl.util.Tuples.java

License:Open Source License

static void validateBufferTuple() {

    // run some tests on the new code 
    String aTestString = new String("A Test Strnig");
    // convert it to bytes
    byte bytes[] = aTestString.getBytes();
    // over allocate an array 
    byte overAllocated[] = new byte[bytes.length * 2];
    // copy source 
    System.arraycopy(bytes, 0, overAllocated, bytes.length, bytes.length);

    IntBufferTuple tuple1 = new IntBufferTuple();
    IntBufferTuple tuple2 = new IntBufferTuple();

    tuple1.setIntValue(1);/*from w w w  .j  a  v  a  2s.  com*/
    tuple2.setIntValue(1);
    tuple1.getBuffer().set(overAllocated, bytes.length, bytes.length);
    tuple2.getBuffer().set(overAllocated, bytes.length, bytes.length);

    Assert.assertTrue(tuple1.compareTo(tuple2) == 0);

    DataOutputBuffer outputBuffer = new DataOutputBuffer();

    try {
        tuple1.write(outputBuffer);
        tuple2.write(outputBuffer);

        DataInputBuffer inputBuffer = new DataInputBuffer();

        inputBuffer.reset(outputBuffer.getData(), 0, outputBuffer.getLength());

        tuple1.readFields(inputBuffer);
        tuple2.readFields(inputBuffer);

        Assert.assertTrue(tuple1.compareTo(tuple2) == 0);

        DataOutputBuffer outputBuffer2 = new DataOutputBuffer();

        tuple1.write(outputBuffer2);
        tuple2.write(outputBuffer2);

        Assert.assertTrue(WritableComparator.compareBytes(outputBuffer.getData(), 0, outputBuffer.getLength(),
                outputBuffer2.getData(), 0, outputBuffer2.getLength()) == 0);
    } catch (IOException e) {
        e.printStackTrace();
    }

}

From source file:org.goldenorb.io.input.VertexInput.java

License:Apache License

/**
* 
*//* w w  w .  j  a va 2s  .  co  m*/
@SuppressWarnings("unchecked")
public void initialize() {
    // rebuild the input split
    org.apache.hadoop.mapreduce.InputSplit split = null;
    DataInputBuffer splitBuffer = new DataInputBuffer();
    splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength());
    SerializationFactory factory = new SerializationFactory(orbConf);
    Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer;
    try {
        deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory
                .getDeserializer(orbConf.getClassByName(splitClass));
        deserializer.open(splitBuffer);
        split = deserializer.deserialize(null);
        JobConf job = new JobConf(orbConf);
        JobContext jobContext = new JobContext(job, new JobID(getOrbConf().getJobNumber(), 0));
        InputFormat<INPUT_KEY, INPUT_VALUE> inputFormat;
        inputFormat = (InputFormat<INPUT_KEY, INPUT_VALUE>) ReflectionUtils
                .newInstance(jobContext.getInputFormatClass(), orbConf);
        TaskAttemptContext tao = new TaskAttemptContext(job,
                new TaskAttemptID(new TaskID(jobContext.getJobID(), true, partitionID), 0));
        recordReader = inputFormat.createRecordReader(split, tao);
        recordReader.initialize(split, tao);
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    } catch (IOException e) {
        throw new RuntimeException(e);
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    }

}

From source file:org.mrgeo.data.accumulo.image.AccumuloMrsImageReader.java

License:Apache License

protected MrGeoRaster toNonWritable(byte[] val, CompressionCodec codec, Decompressor decompressor)
        throws IOException {
    DataInputBuffer dib = new DataInputBuffer();
    dib.reset(val, val.length);

    RasterWritable rw = new RasterWritable();
    rw.readFields(dib);//from w  w  w .  j av a2 s .c o  m

    if (codec == null || decompressor == null) {
        return RasterWritable.toMrGeoRaster(rw);
    }
    return RasterWritable.toMrGeoRaster(rw, codec, decompressor);
}

From source file:org.mrgeo.data.accumulo.image.AccumuloMrsPyramidInputFormat.java

License:Apache License

public static RecordReader<TileIdWritable, RasterWritable> makeRecordReader() {
    return new RecordReaderBase<TileIdWritable, RasterWritable>() {

        @Override/*  w  ww.  j  a  v a  2  s  .  c om*/
        public void initialize(InputSplit inSplit, TaskAttemptContext attempt) throws IOException {

            //        RangeInputSplit ris = (RangeInputSplit) ((TiledInputSplit)inSplit).getWrappedSplit();
            //
            //        log.info("initializing with instance of " + ris.getInstanceName());
            //        log.info("initializing with auths of " + ris.getAuths().toString());
            //
            //        super.initialize(((TiledInputSplit)inSplit).getWrappedSplit(), attempt);

            log.info("initializing input splits of type " + inSplit.getClass().getCanonicalName());
            String[] locs;
            try {
                locs = inSplit.getLocations();
                for (int x = 0; x < locs.length; x++) {
                    log.info("location " + x + " -> " + locs[x]);
                }
            } catch (InterruptedException ie) {
                log.error("Exception thrown", ie);
                return;
            }
            if (inSplit instanceof TiledInputSplit) {

                // deal with this
                org.apache.accumulo.core.client.mapreduce.RangeInputSplit ris = new org.apache.accumulo.core.client.mapreduce.RangeInputSplit();
                InputSplit inS = ((TiledInputSplit) inSplit).getWrappedSplit();
                log.info("input split class: " + inS.getClass().getCanonicalName());
                long startId = ((TiledInputSplit) inSplit).getStartTileId();
                long endId = ((TiledInputSplit) inSplit).getEndTileId();
                Key startKey = AccumuloUtils.toKey(startId);
                Key endKey = AccumuloUtils.toKey(endId);
                int zoomL = ((TiledInputSplit) inSplit).getZoomLevel();
                Range r = new Range(startKey, endKey);

                log.info("Zoom Level = " + zoomL);
                log.info("Range " + startId + " to " + endId);

                try {
                    locs = inS.getLocations();
                    for (int x = 0; x < locs.length; x++) {
                        log.info("split " + x + " -> " + locs[x]);
                    }
                    ris.setRange(r);
                    ris.setLocations(locs);
                    ris.setTableName(
                            ((org.apache.accumulo.core.client.mapreduce.RangeInputSplit) inS).getTableName());
                    ris.setTableId(
                            ((org.apache.accumulo.core.client.mapreduce.RangeInputSplit) inS).getTableId());

                    // there can be more added here

                } catch (InterruptedException ie) {
                    throw new RuntimeErrorException(new Error(ie.getMessage()));
                }
                log.info("table " + ris.getTableName() + " is offline: " + ris.isOffline());
                super.initialize(ris, attempt);

                //super.initialize(((TiledInputSplit) inSplit).getWrappedSplit(), attempt);

            } else {
                super.initialize(inSplit, attempt);
            }

        } // end initialize

        @Override
        public void close() {
            log.info("Record Reader closing!");
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (scannerIterator.hasNext()) {
                ++numKeysRead;
                Entry<Key, Value> entry = scannerIterator.next();
                // transform key and value
                long id = AccumuloUtils.toLong(entry.getKey().getRow());
                currentKey = entry.getKey();
                //currentValue = entry.getValue();

                log.info("Processing " + id + " -> " + entry.getValue().getSize());

                currentK = new TileIdWritable(id);
                DataInputBuffer dib = new DataInputBuffer();
                byte[] data = entry.getValue().get();
                dib.reset(data, data.length);

                currentV = new RasterWritable();
                currentV.readFields(dib);

                //log.info("current key = " + id);
                //          if (log.isTraceEnabled())
                //            log.trace("Processing key/value pair: " + DefaultFormatter.formatEntry(entry, true));
                return true;
            }
            return false;
        }
    }; //end RecordReaderBase
}

From source file:org.mrgeo.data.accumulo.utils.AccumuloUtils.java

License:Apache License

/**
 * Get a single raster//from w  ww . j  av  a  2  s .  c om
 *
 * @param table this is the table containing the raster
 * @param tid   the tile id to get
 * @param zl    the zoom level of the raster
 * @param conn  the Accumulo connector to use
 * @param auths the authorizations to use for access
 * @return
 */
@Deprecated
@SuppressWarnings("squid:S1166") // TableNotFoundException are caught and ignored.  This is OK
public static RasterWritable getRaster(String table, long tid, int zl, Connector conn, String auths) {
    RasterWritable retRaster = null;
    Authorizations authorizations = createAuthorizationsFromDelimitedString(auths);

    Scanner scanner = null;
    try {
        scanner = conn.createScanner(table, authorizations);

        Range r = new Range(toRowId(tid), toRowId(tid + 1));
        scanner.setRange(r);
        scanner.fetchColumnFamily(new Text(Integer.toString(zl).getBytes()));

        for (Entry<Key, Value> entry : scanner) {
            System.out.println("Key: " + entry.getKey().toString());

            DataInputBuffer dib = new DataInputBuffer();
            byte[] data = entry.getValue().get();
            dib.reset(data, data.length);

            retRaster = new RasterWritable();
            retRaster.readFields(dib);
            break;
        }

    } catch (TableNotFoundException ignored) {

    } catch (IOException e) {
        log.error("Exception thrown", e);
    }

    return retRaster;
}

From source file:org.springframework.data.hadoop.fs.TextRecordInputStream.java

License:Apache License

public TextRecordInputStream(Path p, FileSystem fs, Configuration configuration) throws IOException {
    r = new SequenceFile.Reader(fs, p, configuration);
    key = ReflectionUtils.newInstance(r.getKeyClass().asSubclass(WritableComparable.class), configuration);
    val = ReflectionUtils.newInstance(r.getValueClass().asSubclass(Writable.class), configuration);
    inbuf = new DataInputBuffer();
    outbuf = new DataOutputBuffer();
}