Example usage for org.apache.hadoop.io DataInputBuffer reset

List of usage examples for org.apache.hadoop.io DataInputBuffer reset

Introduction

In this page you can find the example usage for org.apache.hadoop.io DataInputBuffer reset.

Prototype

public void reset(byte[] input, int length) 

Source Link

Document

Resets the data that the buffer reads.

Usage

From source file:org.commoncrawl.util.CompressedURLFPListV2.java

License:Open Source License

public static void main(String[] args) {

    // initialize ...
    final Configuration conf = new Configuration();

    conf.addResource("nutch-default.xml");
    conf.addResource("nutch-site.xml");
    conf.addResource("core-site.xml");
    conf.addResource("hdfs-site.xml");
    conf.addResource("mapred-site.xml");

    BasicConfigurator.configure();/* w  w w .j  a v a  2 s.  c o m*/
    CrawlEnvironment.setHadoopConfig(conf);

    try {
        FileSystem fs = CrawlEnvironment.getDefaultFileSystem();

        Path testFile = new Path("crawl/linkdb/merged1282844121161/linkData/part-00000");
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, testFile, conf);

        URLFPV2 fp = new URLFPV2();
        BytesWritable bytes = new BytesWritable();

        while (reader.next(fp, bytes)) {
            if (bytes.getLength() != 0) {
                DataInputBuffer inputStream = new DataInputBuffer();
                inputStream.reset(bytes.get(), bytes.getLength());
                CompressedURLFPListV2.Reader listReader = new CompressedURLFPListV2.Reader(inputStream);
                while (listReader.hasNext()) {
                    URLFPV2 nextFP = listReader.next();
                    LOG.info("DH:" + nextFP.getDomainHash() + " UH:" + nextFP.getUrlHash());
                }
            } else {
                LOG.error("ZERO BYTE LIST!");
            }
        }

        reader.close();
    } catch (IOException e) {
        LOG.error(CCStringUtils.stringifyException(e));
    }

    if (1 == 1)
        return;

    validateDuplicateChecking();
    // validateReallyBigList();
    validateURLFPSerializationRootDomain();
    validateURLFPSerializationSingleSubDomain();
    validateURLFPSerializationMultiDomain();
    validateURLFPFlagSerializationRootDomain();
    validateURLFPFlagSerializationMultipleSubDomains();
    validateURLFPFlagSerializationOneSubDomain();
}

From source file:org.commoncrawl.util.TimeSeriesDataFile.java

License:Open Source License

private void doCommonRead(ArrayList<KeyValueTuple<Long, ValueType>> valuesOut, RandomAccessFile file,
        long headerOffset, long endOfPrevRecord, int currentRecordLength, int recordsToRead,
        long optionalMinKeyValue) throws IOException {

    Buffer recordBuffer = new Buffer();
    DataInputBuffer inputBuffer = new DataInputBuffer();

    // ok start walking backwards ... 
    while (recordsToRead != 0) {
        // setup new previous record pos pointer  
        endOfPrevRecord = endOfPrevRecord - currentRecordLength - 4;
        // and seek to it endOfLastRecord - 4
        file.seek(endOfPrevRecord - 4);/*from w w  w.  jav a  2s.c o  m*/

        recordBuffer.setCapacity(currentRecordLength + 8);
        // read in proper amount of data ...
        file.read(recordBuffer.get(), 0, currentRecordLength + 8);
        // ok initialize input buffer ... 
        inputBuffer.reset(recordBuffer.get(), currentRecordLength + 8);
        // now read next record length first ... 
        int nextRecordLength = inputBuffer.readInt();
        // next read sync bytes ... 
        int syncBytes = inputBuffer.readInt();
        // validate 
        if (syncBytes != SyncBytes) {
            throw new IOException("Corrupt Record Detected!");
        }
        // ok read real record bytes ... 
        int realRecordBytes = inputBuffer.readInt();
        // read crc ... 
        long crcValue = inputBuffer.readLong();
        // ok validate crc ...  
        crc.reset();
        crc.update(inputBuffer.getData(), inputBuffer.getPosition(), realRecordBytes - 8);
        if (crcValue != crc.getValue()) {
            throw new IOException("CRC Mismatch!");
        }
        // ok now read key and value 
        try {
            long key = WritableUtils.readVLong(inputBuffer);

            if (optionalMinKeyValue != -1 && key < optionalMinKeyValue) {
                break;
            }

            ValueType value = (ValueType) valueClass.newInstance();
            value.readFields(inputBuffer);
            KeyValueTuple tuple = new KeyValueTuple<Long, ValueType>(key, value);
            tuple.recordPos = endOfPrevRecord;
            valuesOut.add(0, tuple);

        } catch (Exception e) {
            throw new IOException(e);
        }

        currentRecordLength = nextRecordLength;

        recordsToRead--;

        if (endOfPrevRecord == headerOffset)
            break;
    }
}

From source file:org.mrgeo.data.accumulo.image.AccumuloMrsImageReader.java

License:Apache License

protected MrGeoRaster toNonWritable(byte[] val, CompressionCodec codec, Decompressor decompressor)
        throws IOException {
    DataInputBuffer dib = new DataInputBuffer();
    dib.reset(val, val.length);

    RasterWritable rw = new RasterWritable();
    rw.readFields(dib);//from   w ww .j ava  2  s.  com

    if (codec == null || decompressor == null) {
        return RasterWritable.toMrGeoRaster(rw);
    }
    return RasterWritable.toMrGeoRaster(rw, codec, decompressor);
}

From source file:org.mrgeo.data.accumulo.image.AccumuloMrsPyramidInputFormat.java

License:Apache License

public static RecordReader<TileIdWritable, RasterWritable> makeRecordReader() {
    return new RecordReaderBase<TileIdWritable, RasterWritable>() {

        @Override/*  w ww .j a v a  2 s .c  om*/
        public void initialize(InputSplit inSplit, TaskAttemptContext attempt) throws IOException {

            //        RangeInputSplit ris = (RangeInputSplit) ((TiledInputSplit)inSplit).getWrappedSplit();
            //
            //        log.info("initializing with instance of " + ris.getInstanceName());
            //        log.info("initializing with auths of " + ris.getAuths().toString());
            //
            //        super.initialize(((TiledInputSplit)inSplit).getWrappedSplit(), attempt);

            log.info("initializing input splits of type " + inSplit.getClass().getCanonicalName());
            String[] locs;
            try {
                locs = inSplit.getLocations();
                for (int x = 0; x < locs.length; x++) {
                    log.info("location " + x + " -> " + locs[x]);
                }
            } catch (InterruptedException ie) {
                log.error("Exception thrown", ie);
                return;
            }
            if (inSplit instanceof TiledInputSplit) {

                // deal with this
                org.apache.accumulo.core.client.mapreduce.RangeInputSplit ris = new org.apache.accumulo.core.client.mapreduce.RangeInputSplit();
                InputSplit inS = ((TiledInputSplit) inSplit).getWrappedSplit();
                log.info("input split class: " + inS.getClass().getCanonicalName());
                long startId = ((TiledInputSplit) inSplit).getStartTileId();
                long endId = ((TiledInputSplit) inSplit).getEndTileId();
                Key startKey = AccumuloUtils.toKey(startId);
                Key endKey = AccumuloUtils.toKey(endId);
                int zoomL = ((TiledInputSplit) inSplit).getZoomLevel();
                Range r = new Range(startKey, endKey);

                log.info("Zoom Level = " + zoomL);
                log.info("Range " + startId + " to " + endId);

                try {
                    locs = inS.getLocations();
                    for (int x = 0; x < locs.length; x++) {
                        log.info("split " + x + " -> " + locs[x]);
                    }
                    ris.setRange(r);
                    ris.setLocations(locs);
                    ris.setTableName(
                            ((org.apache.accumulo.core.client.mapreduce.RangeInputSplit) inS).getTableName());
                    ris.setTableId(
                            ((org.apache.accumulo.core.client.mapreduce.RangeInputSplit) inS).getTableId());

                    // there can be more added here

                } catch (InterruptedException ie) {
                    throw new RuntimeErrorException(new Error(ie.getMessage()));
                }
                log.info("table " + ris.getTableName() + " is offline: " + ris.isOffline());
                super.initialize(ris, attempt);

                //super.initialize(((TiledInputSplit) inSplit).getWrappedSplit(), attempt);

            } else {
                super.initialize(inSplit, attempt);
            }

        } // end initialize

        @Override
        public void close() {
            log.info("Record Reader closing!");
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (scannerIterator.hasNext()) {
                ++numKeysRead;
                Entry<Key, Value> entry = scannerIterator.next();
                // transform key and value
                long id = AccumuloUtils.toLong(entry.getKey().getRow());
                currentKey = entry.getKey();
                //currentValue = entry.getValue();

                log.info("Processing " + id + " -> " + entry.getValue().getSize());

                currentK = new TileIdWritable(id);
                DataInputBuffer dib = new DataInputBuffer();
                byte[] data = entry.getValue().get();
                dib.reset(data, data.length);

                currentV = new RasterWritable();
                currentV.readFields(dib);

                //log.info("current key = " + id);
                //          if (log.isTraceEnabled())
                //            log.trace("Processing key/value pair: " + DefaultFormatter.formatEntry(entry, true));
                return true;
            }
            return false;
        }
    }; //end RecordReaderBase
}

From source file:org.mrgeo.data.accumulo.utils.AccumuloUtils.java

License:Apache License

/**
 * Get a single raster// w  w w .j av  a  2 s .c  o  m
 *
 * @param table this is the table containing the raster
 * @param tid   the tile id to get
 * @param zl    the zoom level of the raster
 * @param conn  the Accumulo connector to use
 * @param auths the authorizations to use for access
 * @return
 */
@Deprecated
@SuppressWarnings("squid:S1166") // TableNotFoundException are caught and ignored.  This is OK
public static RasterWritable getRaster(String table, long tid, int zl, Connector conn, String auths) {
    RasterWritable retRaster = null;
    Authorizations authorizations = createAuthorizationsFromDelimitedString(auths);

    Scanner scanner = null;
    try {
        scanner = conn.createScanner(table, authorizations);

        Range r = new Range(toRowId(tid), toRowId(tid + 1));
        scanner.setRange(r);
        scanner.fetchColumnFamily(new Text(Integer.toString(zl).getBytes()));

        for (Entry<Key, Value> entry : scanner) {
            System.out.println("Key: " + entry.getKey().toString());

            DataInputBuffer dib = new DataInputBuffer();
            byte[] data = entry.getValue().get();
            dib.reset(data, data.length);

            retRaster = new RasterWritable();
            retRaster.readFields(dib);
            break;
        }

    } catch (TableNotFoundException ignored) {

    } catch (IOException e) {
        log.error("Exception thrown", e);
    }

    return retRaster;
}

From source file:org.uv.himongo.io.BSONWritable.java

License:Apache License

/**
 * Used by child copy constructors.//from ww  w  .  j a  va  2  s. co  m
 *
 * @param other
 */
protected synchronized void copy(Writable other) {
    if (other != null) {
        try {
            DataOutputBuffer out = new DataOutputBuffer();
            other.write(out);
            DataInputBuffer in = new DataInputBuffer();
            in.reset(out.getData(), out.getLength());
            readFields(in);

        } catch (IOException e) {
            throw new IllegalArgumentException("map cannot be copied: " + e.getMessage());
        }

    } else {
        throw new IllegalArgumentException("source map cannot be null");
    }
}