Example usage for org.apache.hadoop.io DataInputBuffer DataInputBuffer

List of usage examples for org.apache.hadoop.io DataInputBuffer DataInputBuffer

Introduction

In this page you can find the example usage for org.apache.hadoop.io DataInputBuffer DataInputBuffer.

Prototype

public DataInputBuffer() 

Source Link

Document

Constructs a new empty buffer.

Usage

From source file:org.commoncrawl.util.TimeSeriesDataFile.java

License:Open Source License

private void doCommonRead(ArrayList<KeyValueTuple<Long, ValueType>> valuesOut, RandomAccessFile file,
        long headerOffset, long endOfPrevRecord, int currentRecordLength, int recordsToRead,
        long optionalMinKeyValue) throws IOException {

    Buffer recordBuffer = new Buffer();
    DataInputBuffer inputBuffer = new DataInputBuffer();

    // ok start walking backwards ... 
    while (recordsToRead != 0) {
        // setup new previous record pos pointer  
        endOfPrevRecord = endOfPrevRecord - currentRecordLength - 4;
        // and seek to it endOfLastRecord - 4
        file.seek(endOfPrevRecord - 4);/*from ww  w.jav a2  s  .  com*/

        recordBuffer.setCapacity(currentRecordLength + 8);
        // read in proper amount of data ...
        file.read(recordBuffer.get(), 0, currentRecordLength + 8);
        // ok initialize input buffer ... 
        inputBuffer.reset(recordBuffer.get(), currentRecordLength + 8);
        // now read next record length first ... 
        int nextRecordLength = inputBuffer.readInt();
        // next read sync bytes ... 
        int syncBytes = inputBuffer.readInt();
        // validate 
        if (syncBytes != SyncBytes) {
            throw new IOException("Corrupt Record Detected!");
        }
        // ok read real record bytes ... 
        int realRecordBytes = inputBuffer.readInt();
        // read crc ... 
        long crcValue = inputBuffer.readLong();
        // ok validate crc ...  
        crc.reset();
        crc.update(inputBuffer.getData(), inputBuffer.getPosition(), realRecordBytes - 8);
        if (crcValue != crc.getValue()) {
            throw new IOException("CRC Mismatch!");
        }
        // ok now read key and value 
        try {
            long key = WritableUtils.readVLong(inputBuffer);

            if (optionalMinKeyValue != -1 && key < optionalMinKeyValue) {
                break;
            }

            ValueType value = (ValueType) valueClass.newInstance();
            value.readFields(inputBuffer);
            KeyValueTuple tuple = new KeyValueTuple<Long, ValueType>(key, value);
            tuple.recordPos = endOfPrevRecord;
            valuesOut.add(0, tuple);

        } catch (Exception e) {
            throw new IOException(e);
        }

        currentRecordLength = nextRecordLength;

        recordsToRead--;

        if (endOfPrevRecord == headerOffset)
            break;
    }
}

From source file:org.commoncrawl.util.TimeSeriesDataFile.java

License:Open Source License

/**
 * get the key value of the last record in the file 
 * @return record key as a long or -1 if zero records in file 
 * @throws IOException/*from   w w  w  . jav a2  s  . co m*/
 */
public synchronized long getLastRecordKey() throws IOException {
    LogFileHeader header = new LogFileHeader();

    if (fileName.exists()) {

        RandomAccessFile file = new RandomAccessFile(fileName, "r");

        Buffer recordBuffer = new Buffer();
        DataInputBuffer inputBuffer = new DataInputBuffer();
        try {

            //read header ... 
            long headerOffset = readLogFileHeader(file, header);

            return header._lastRecordKey;
        } finally {
            if (file != null) {
                file.close();
            }
        }
    }
    return -1;
}

From source file:org.commoncrawl.util.TimeSeriesDataFile.java

License:Open Source License

/**
 * get the number of records in the file 
 * /*from  w w w  . ja va  2 s. com*/
 * @return record count in file 
 * @throws IOException
 */
public synchronized int getRecordCount() throws IOException {

    LogFileHeader header = new LogFileHeader();

    if (fileName.exists()) {

        RandomAccessFile file = new RandomAccessFile(fileName, "r");

        Buffer recordBuffer = new Buffer();
        DataInputBuffer inputBuffer = new DataInputBuffer();
        try {

            //read header ... 
            long headerOffset = readLogFileHeader(file, header);

            return header._itemCount;
        } finally {
            if (file != null) {
                file.close();
            }
        }
    }
    return 0;
}

From source file:org.commoncrawl.util.Tuples.java

License:Open Source License

static void validateTextTuple() {
    // validate tuple code 
    IntAndTwoTextByteTuples tuple1 = new IntAndTwoTextByteTuples();
    IntAndTwoTextByteTuples tuple2 = new IntAndTwoTextByteTuples();

    tuple1.setIntValue(1);//www. j  ava 2s  . c o  m
    tuple2.setIntValue(1);
    tuple1.setTextValueBytes(new TextBytes("AAAAA"));
    tuple2.setTextValueBytes(new TextBytes("AAAAA"));
    tuple1.setSecondTextValueBytes(new TextBytes("AAAAA"));
    tuple2.setSecondTextValueBytes(new TextBytes("AAAAB"));

    // compare the two 
    Assert.assertTrue(tuple1.compareTo(tuple2) == -1);

    tuple1.setTextValueBytes(new TextBytes("BAAAA"));
    Assert.assertTrue(tuple1.compareTo(tuple2) == 1);
    tuple2.setIntValue(2);
    Assert.assertTrue(tuple1.compareTo(tuple2) == -1);
    // ok restore ... 
    tuple1.setTextValueBytes(new TextBytes("AAAAA"));
    tuple2.setTextValueBytes(new TextBytes("AAAAA"));
    tuple1.setSecondTextValueBytes(new TextBytes("AAAAA"));
    tuple2.setSecondTextValueBytes(new TextBytes("AAAAB"));

    DataOutputBuffer outputBuffer = new DataOutputBuffer();

    try {

        tuple1.write(outputBuffer);
        tuple2.write(outputBuffer);

        IntAndTwoTextByteTuples tuple3 = new IntAndTwoTextByteTuples();
        IntAndTwoTextByteTuples tuple4 = new IntAndTwoTextByteTuples();

        DataInputBuffer inputBuffer = new DataInputBuffer();
        inputBuffer.reset(outputBuffer.getData(), 0, outputBuffer.getLength());

        tuple3.readFields(inputBuffer);
        tuple4.readFields(inputBuffer);

        Assert.assertTrue(tuple3.compareTo(tuple1) == 0);
        Assert.assertTrue(tuple4.compareTo(tuple2) == 0);

    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

}

From source file:org.commoncrawl.util.Tuples.java

License:Open Source License

static void validateBufferTuple() {

    // run some tests on the new code 
    String aTestString = new String("A Test Strnig");
    // convert it to bytes
    byte bytes[] = aTestString.getBytes();
    // over allocate an array 
    byte overAllocated[] = new byte[bytes.length * 2];
    // copy source 
    System.arraycopy(bytes, 0, overAllocated, bytes.length, bytes.length);

    IntBufferTuple tuple1 = new IntBufferTuple();
    IntBufferTuple tuple2 = new IntBufferTuple();

    tuple1.setIntValue(1);/*from w w w  .j  a  v  a  2s.  com*/
    tuple2.setIntValue(1);
    tuple1.getBuffer().set(overAllocated, bytes.length, bytes.length);
    tuple2.getBuffer().set(overAllocated, bytes.length, bytes.length);

    Assert.assertTrue(tuple1.compareTo(tuple2) == 0);

    DataOutputBuffer outputBuffer = new DataOutputBuffer();

    try {
        tuple1.write(outputBuffer);
        tuple2.write(outputBuffer);

        DataInputBuffer inputBuffer = new DataInputBuffer();

        inputBuffer.reset(outputBuffer.getData(), 0, outputBuffer.getLength());

        tuple1.readFields(inputBuffer);
        tuple2.readFields(inputBuffer);

        Assert.assertTrue(tuple1.compareTo(tuple2) == 0);

        DataOutputBuffer outputBuffer2 = new DataOutputBuffer();

        tuple1.write(outputBuffer2);
        tuple2.write(outputBuffer2);

        Assert.assertTrue(WritableComparator.compareBytes(outputBuffer.getData(), 0, outputBuffer.getLength(),
                outputBuffer2.getData(), 0, outputBuffer2.getLength()) == 0);
    } catch (IOException e) {
        e.printStackTrace();
    }

}

From source file:org.goldenorb.io.input.VertexInput.java

License:Apache License

/**
* 
*//* w w  w .  j  a va 2s  .  co  m*/
@SuppressWarnings("unchecked")
public void initialize() {
    // rebuild the input split
    org.apache.hadoop.mapreduce.InputSplit split = null;
    DataInputBuffer splitBuffer = new DataInputBuffer();
    splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength());
    SerializationFactory factory = new SerializationFactory(orbConf);
    Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer;
    try {
        deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory
                .getDeserializer(orbConf.getClassByName(splitClass));
        deserializer.open(splitBuffer);
        split = deserializer.deserialize(null);
        JobConf job = new JobConf(orbConf);
        JobContext jobContext = new JobContext(job, new JobID(getOrbConf().getJobNumber(), 0));
        InputFormat<INPUT_KEY, INPUT_VALUE> inputFormat;
        inputFormat = (InputFormat<INPUT_KEY, INPUT_VALUE>) ReflectionUtils
                .newInstance(jobContext.getInputFormatClass(), orbConf);
        TaskAttemptContext tao = new TaskAttemptContext(job,
                new TaskAttemptID(new TaskID(jobContext.getJobID(), true, partitionID), 0));
        recordReader = inputFormat.createRecordReader(split, tao);
        recordReader.initialize(split, tao);
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    } catch (IOException e) {
        throw new RuntimeException(e);
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    }

}

From source file:org.mrgeo.data.accumulo.image.AccumuloMrsImageReader.java

License:Apache License

protected MrGeoRaster toNonWritable(byte[] val, CompressionCodec codec, Decompressor decompressor)
        throws IOException {
    DataInputBuffer dib = new DataInputBuffer();
    dib.reset(val, val.length);

    RasterWritable rw = new RasterWritable();
    rw.readFields(dib);//from w  w  w .  j av a2 s .c o  m

    if (codec == null || decompressor == null) {
        return RasterWritable.toMrGeoRaster(rw);
    }
    return RasterWritable.toMrGeoRaster(rw, codec, decompressor);
}

From source file:org.mrgeo.data.accumulo.image.AccumuloMrsPyramidInputFormat.java

License:Apache License

public static RecordReader<TileIdWritable, RasterWritable> makeRecordReader() {
    return new RecordReaderBase<TileIdWritable, RasterWritable>() {

        @Override/*  w  ww.  j  a  v a  2  s  .  c om*/
        public void initialize(InputSplit inSplit, TaskAttemptContext attempt) throws IOException {

            //        RangeInputSplit ris = (RangeInputSplit) ((TiledInputSplit)inSplit).getWrappedSplit();
            //
            //        log.info("initializing with instance of " + ris.getInstanceName());
            //        log.info("initializing with auths of " + ris.getAuths().toString());
            //
            //        super.initialize(((TiledInputSplit)inSplit).getWrappedSplit(), attempt);

            log.info("initializing input splits of type " + inSplit.getClass().getCanonicalName());
            String[] locs;
            try {
                locs = inSplit.getLocations();
                for (int x = 0; x < locs.length; x++) {
                    log.info("location " + x + " -> " + locs[x]);
                }
            } catch (InterruptedException ie) {
                log.error("Exception thrown", ie);
                return;
            }
            if (inSplit instanceof TiledInputSplit) {

                // deal with this
                org.apache.accumulo.core.client.mapreduce.RangeInputSplit ris = new org.apache.accumulo.core.client.mapreduce.RangeInputSplit();
                InputSplit inS = ((TiledInputSplit) inSplit).getWrappedSplit();
                log.info("input split class: " + inS.getClass().getCanonicalName());
                long startId = ((TiledInputSplit) inSplit).getStartTileId();
                long endId = ((TiledInputSplit) inSplit).getEndTileId();
                Key startKey = AccumuloUtils.toKey(startId);
                Key endKey = AccumuloUtils.toKey(endId);
                int zoomL = ((TiledInputSplit) inSplit).getZoomLevel();
                Range r = new Range(startKey, endKey);

                log.info("Zoom Level = " + zoomL);
                log.info("Range " + startId + " to " + endId);

                try {
                    locs = inS.getLocations();
                    for (int x = 0; x < locs.length; x++) {
                        log.info("split " + x + " -> " + locs[x]);
                    }
                    ris.setRange(r);
                    ris.setLocations(locs);
                    ris.setTableName(
                            ((org.apache.accumulo.core.client.mapreduce.RangeInputSplit) inS).getTableName());
                    ris.setTableId(
                            ((org.apache.accumulo.core.client.mapreduce.RangeInputSplit) inS).getTableId());

                    // there can be more added here

                } catch (InterruptedException ie) {
                    throw new RuntimeErrorException(new Error(ie.getMessage()));
                }
                log.info("table " + ris.getTableName() + " is offline: " + ris.isOffline());
                super.initialize(ris, attempt);

                //super.initialize(((TiledInputSplit) inSplit).getWrappedSplit(), attempt);

            } else {
                super.initialize(inSplit, attempt);
            }

        } // end initialize

        @Override
        public void close() {
            log.info("Record Reader closing!");
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (scannerIterator.hasNext()) {
                ++numKeysRead;
                Entry<Key, Value> entry = scannerIterator.next();
                // transform key and value
                long id = AccumuloUtils.toLong(entry.getKey().getRow());
                currentKey = entry.getKey();
                //currentValue = entry.getValue();

                log.info("Processing " + id + " -> " + entry.getValue().getSize());

                currentK = new TileIdWritable(id);
                DataInputBuffer dib = new DataInputBuffer();
                byte[] data = entry.getValue().get();
                dib.reset(data, data.length);

                currentV = new RasterWritable();
                currentV.readFields(dib);

                //log.info("current key = " + id);
                //          if (log.isTraceEnabled())
                //            log.trace("Processing key/value pair: " + DefaultFormatter.formatEntry(entry, true));
                return true;
            }
            return false;
        }
    }; //end RecordReaderBase
}

From source file:org.mrgeo.data.accumulo.utils.AccumuloUtils.java

License:Apache License

/**
 * Get a single raster//from w  ww . j  av  a  2  s .  c om
 *
 * @param table this is the table containing the raster
 * @param tid   the tile id to get
 * @param zl    the zoom level of the raster
 * @param conn  the Accumulo connector to use
 * @param auths the authorizations to use for access
 * @return
 */
@Deprecated
@SuppressWarnings("squid:S1166") // TableNotFoundException are caught and ignored.  This is OK
public static RasterWritable getRaster(String table, long tid, int zl, Connector conn, String auths) {
    RasterWritable retRaster = null;
    Authorizations authorizations = createAuthorizationsFromDelimitedString(auths);

    Scanner scanner = null;
    try {
        scanner = conn.createScanner(table, authorizations);

        Range r = new Range(toRowId(tid), toRowId(tid + 1));
        scanner.setRange(r);
        scanner.fetchColumnFamily(new Text(Integer.toString(zl).getBytes()));

        for (Entry<Key, Value> entry : scanner) {
            System.out.println("Key: " + entry.getKey().toString());

            DataInputBuffer dib = new DataInputBuffer();
            byte[] data = entry.getValue().get();
            dib.reset(data, data.length);

            retRaster = new RasterWritable();
            retRaster.readFields(dib);
            break;
        }

    } catch (TableNotFoundException ignored) {

    } catch (IOException e) {
        log.error("Exception thrown", e);
    }

    return retRaster;
}

From source file:org.springframework.data.hadoop.fs.TextRecordInputStream.java

License:Apache License

public TextRecordInputStream(Path p, FileSystem fs, Configuration configuration) throws IOException {
    r = new SequenceFile.Reader(fs, p, configuration);
    key = ReflectionUtils.newInstance(r.getKeyClass().asSubclass(WritableComparable.class), configuration);
    val = ReflectionUtils.newInstance(r.getValueClass().asSubclass(Writable.class), configuration);
    inbuf = new DataInputBuffer();
    outbuf = new DataOutputBuffer();
}