Example usage for org.apache.hadoop.fs FSDataInputStream seek

List of usage examples for org.apache.hadoop.fs FSDataInputStream seek


In this page you can find the example usage for org.apache.hadoop.fs FSDataInputStream seek.


public void seek(long desired) throws IOException 

Source Link


Seek to the given offset.


From source file:FormatStorageBasicTest.java

License:Open Source License

public void testChunkToRecord() {
    try {// w w w .ja  v a  2 s . com
        String fileName = prefix + "testChunkToRecord";
        Path path = new Path(fileName);
        FileSystem fs = FileSystem.get(new Configuration());
        FSDataOutputStream out = fs.create(path);

        short fieldNum = 3;
        Record record = new Record(fieldNum);

        byte[] lb = new byte[ConstVar.Sizeof_Long];
        long l = 4;
        Util.long2bytes(lb, l);
        FieldValue fieldValue4 = new FieldValue(ConstVar.FieldType_Long, ConstVar.Sizeof_Long, lb, (short) 13);

        byte[] fb = new byte[ConstVar.Sizeof_Float];
        float f = (float) 5.5;
        Util.float2bytes(fb, f);
        FieldValue fieldValue5 = new FieldValue(ConstVar.FieldType_Float, ConstVar.Sizeof_Float, fb,
                (short) 14);

        String str = "hello konten";
        FieldValue fieldValue7 = new FieldValue(ConstVar.FieldType_String, (short) str.length(), str.getBytes(),
                (short) 16);

        DataChunk chunk = new DataChunk(record);

        out.write(chunk.values, 0, (int) chunk.len);

        if (out.getPos() != chunk.len) {
            fail("error pos:" + out.getPos() + "chunk.len:" + chunk.len);

        FSDataInputStream in = fs.open(path);

        FixedBitSet bitSet = new FixedBitSet(fieldNum);
        in.read(bitSet.bytes(), 0, bitSet.size());
        for (int i = 0; i < fieldNum; i++) {
            if (!bitSet.get(i)) {
                fail("should set:" + i);

        byte[] value = new byte[8];
        long lv = Util.bytes2long(value, 0, 8);
        if (lv != 4) {
            fail("error long value:" + lv);

        value = new byte[4];
        float fv = Util.bytes2float(value, 0);
        if (fv != 5.5) {
            fail("error float value:" + fv);

        short strLen = in.readShort();
        if (strLen != str.length()) {
            fail("error strLen:" + strLen);
        value = new byte[strLen];
        String strv = new String(value);
        if (!strv.equals(str)) {
            fail("error strv:" + strv);

        FieldMap fieldMap = new FieldMap();
        fieldMap.addField(new Field(ConstVar.FieldType_Long, 8, (short) 13));
        fieldMap.addField(new Field(ConstVar.FieldType_Float, 4, (short) 14));
        fieldMap.addField(new Field(ConstVar.FieldType_String, 8, (short) 16));

        int valuelen = 1 + 8 + 4 + 2 + 12;
        DataChunk chunk2 = new DataChunk(fieldNum);

        ArrayList<byte[]> arrayList = new ArrayList<byte[]>(64);
        DataInputBuffer inputBuffer = new DataInputBuffer();
        byte[] buf = new byte[valuelen];
        in.read(buf, 0, valuelen);
        inputBuffer.reset(buf, 0, valuelen);
        chunk2.unpersistent(0, valuelen, inputBuffer);
        Record record2 = chunk2.toRecord(fieldMap, true, arrayList);

        bitSet = chunk2.fixedBitSet;
        if (bitSet.length() != (fieldNum / 8 + 1) * 8) {
            fail("bitSet.len:" + bitSet.length());

        for (int i = 0; i < fieldNum; i++) {
            if (!bitSet.get(i)) {
                fail("bitSet should set:" + i);
        record = record2;

        int index = 0;
        byte type = record2.fieldValues().get(index).type;
        int len = record2.fieldValues().get(index).len;
        short idx = record2.fieldValues().get(index).idx;
        value = record2.fieldValues().get(index).value;
        if (len != ConstVar.Sizeof_Long) {
            fail("error len:" + len);
        if (type != ConstVar.FieldType_Long) {
            fail("error fieldType:" + type);
        if (idx != 13) {
            fail("error idx:" + idx);
        if (value == null) {
            fail("error value null");

        lv = Util.bytes2long(value, 0, len);
        if (lv != 4) {
            fail("error long value:" + lv);

        index = 1;
        type = record.fieldValues().get(index).type;
        len = record.fieldValues().get(index).len;
        idx = record.fieldValues().get(index).idx;
        value = record.fieldValues().get(index).value;

        if (len != ConstVar.Sizeof_Float) {
            fail("error len:" + len);
        if (type != ConstVar.FieldType_Float) {
            fail("error fieldType:" + type);
        if (idx != 14) {
            fail("error idx:" + idx);
        if (value == null) {
            fail("error value null");
        fv = Util.bytes2float(value, 0);
        if (fv != 5.5) {
            fail("error float value:" + fv);

        index = 2;
        type = record.fieldValues().get(index).type;
        len = record.fieldValues().get(index).len;
        idx = record.fieldValues().get(index).idx;
        value = record.fieldValues().get(index).value;

        str = "hello konten";
        if (len != str.length()) {
            fail("error len:" + len);
        if (type != ConstVar.FieldType_String) {
            fail("error fieldType:" + type);
        if (idx != 16) {
            fail("error idx:" + idx);
        if (value == null) {
            fail("error value null");
        String sv = new String(value, 0, len);
        if (!str.equals(sv)) {
            fail("error string value:" + sv);

    } catch (Exception e) {
        fail("should not exception:" + e.getMessage());

From source file:FormatStorageBasicTest.java

License:Open Source License

public void testChunkToRecordNull() {
    try {/*  w  w  w . j  av a  2  s  . co  m*/
        String fileName = prefix + "testChunkToRecord2";
        Path path = new Path(fileName);
        FileSystem fs = FileSystem.get(new Configuration());
        FSDataOutputStream out = fs.create(path);

        short fieldNum = 3;
        Record record = new Record(fieldNum);

        byte[] lb = new byte[ConstVar.Sizeof_Long];
        long l = 4;
        Util.long2bytes(lb, l);
        FieldValue fieldValue4 = new FieldValue(ConstVar.FieldType_Long, ConstVar.Sizeof_Long, lb, (short) 13);

        FieldValue fieldValue5 = new FieldValue(ConstVar.FieldType_Float, ConstVar.Sizeof_Float, null,
                (short) 14);

        String str = "hello konten";
        FieldValue fieldValue7 = new FieldValue(ConstVar.FieldType_String, (short) str.length(), str.getBytes(),
                (short) 16);

        DataChunk chunk = new DataChunk(record);

        out.write(chunk.values, 0, (int) chunk.len);

        if (out.getPos() != chunk.len) {
            fail("error pos:" + out.getPos() + "chunk.len:" + chunk.len);

        FSDataInputStream in = fs.open(path);

        FixedBitSet bitSet = new FixedBitSet(fieldNum);
        in.read(bitSet.bytes(), 0, bitSet.size());

        for (int i = 0; i < fieldNum; i++) {
            if (bitSet.get(1)) {
                fail("shoud not set");

            if (!bitSet.get(i) && i != 1) {
                fail("should set:" + i);

        byte[] value = new byte[8];
        long lv = Util.bytes2long(value, 0, 8);
        if (lv != 4) {
            fail("error long value:" + lv);


        short strLen = in.readShort();
        if (strLen != str.length()) {
            fail("error strLen:" + strLen);
        value = new byte[strLen];
        String strv = new String(value, 0, strLen);
        if (!strv.equals(str)) {
            fail("error strv:" + strv);

        FieldMap fieldMap = new FieldMap();
        fieldMap.addField(new Field(ConstVar.FieldType_Long, 8, (short) 13));
        fieldMap.addField(new Field(ConstVar.FieldType_Float, 4, (short) 14));
        fieldMap.addField(new Field(ConstVar.FieldType_String, 8, (short) 16));

        int valuelen = 1 + 8 + 4 + 2 + 12;
        DataChunk chunk2 = new DataChunk(fieldNum);

        ArrayList<byte[]> arrayList = new ArrayList<byte[]>(64);

        DataInputBuffer inputBuffer = new DataInputBuffer();
        byte[] buf = new byte[valuelen];
        in.read(buf, 0, valuelen);
        inputBuffer.reset(buf, 0, valuelen);
        chunk2.unpersistent(0, valuelen, inputBuffer);
        Record record2 = chunk2.toRecord(fieldMap, true, arrayList);

        bitSet = chunk2.fixedBitSet;

        for (int i = 0; i < fieldNum; i++) {
            if (bitSet.get(1)) {
                fail("shoud not set");

            if (!bitSet.get(i) && i != 1) {
                fail("should set:" + i);
        record = record2;

        int index = 0;
        byte type = record2.fieldValues().get(index).type;
        int len = record2.fieldValues().get(index).len;
        short idx = record2.fieldValues().get(index).idx;
        value = record2.fieldValues().get(index).value;
        if (len != ConstVar.Sizeof_Long) {
            fail("error len:" + len);
        if (type != ConstVar.FieldType_Long) {
            fail("error fieldType:" + type);
        if (idx != 13) {
            fail("error idx:" + idx);
        if (value == null) {
            fail("error value null");
        lv = Util.bytes2long(value, 0, 8);
        if (lv != 4) {
            fail("error long value:" + lv);

        index = 1;
        type = record.fieldValues().get(index).type;
        len = record.fieldValues().get(index).len;
        idx = record.fieldValues().get(index).idx;
        value = record.fieldValues().get(index).value;

        if (len != ConstVar.Sizeof_Float) {
            fail("error len:" + len);
        if (type != ConstVar.FieldType_Float) {
            fail("error fieldType:" + type);
        if (idx != 14) {
            fail("error idx:" + idx);
        if (value != null) {
            fail("error value not null");

        index = 2;
        type = record.fieldValues().get(index).type;
        len = record.fieldValues().get(index).len;
        idx = record.fieldValues().get(index).idx;
        value = record.fieldValues().get(index).value;

        str = "hello konten";
        if (len != str.length()) {
            fail("error len:" + len);
        if (type != ConstVar.FieldType_String) {
            fail("error fieldType:" + type);
        if (idx != 16) {
            fail("error idx:" + idx);
        if (value == null) {
            fail("error value null");
        String sv = new String(value, 0, len);
        if (!str.equals(sv)) {
            fail("error string value:" + sv);

    } catch (Exception e) {
        fail("should not exception:" + e.getMessage());

From source file:FormatStorageBasicTest.java

License:Open Source License

public void testPersistentUnitVar() {
    try {/*  w ww. ja  v a 2s .  c o  m*/
        Head head = new Head();
        head.setVar((byte) 1);
        Configuration conf = new Configuration();
        FormatDataFile fd = new FormatDataFile(conf);
        fd.create(prefix + "testPersistentUnitVar_tmp", head);

        IndexInfo info = new IndexInfo();
        info.offset = 0;
        Segment seg = new Segment(info, fd);
        Unit unit = new Unit(info, seg);

        Record record = new Record(7);
        record.addValue(new FieldValue((byte) 1, (short) 0));
        record.addValue(new FieldValue((short) 2, (short) 1));
        record.addValue(new FieldValue((int) 3, (short) 2));
        record.addValue(new FieldValue((long) 4, (short) 3));
        record.addValue(new FieldValue((float) 5.5, (short) 4));
        record.addValue(new FieldValue((double) 6.6, (short) 5));
        record.addValue(new FieldValue("hello konten", (short) 6));

        int count = 100;
        for (int i = 0; i < count; i++) {

        String file = prefix + "testPersistentUnitVar";
        Path path = new Path(file);
        FileSystem fs = FileSystem.get(new Configuration());
        FSDataOutputStream out = fs.create(path);

        long pos = out.getPos();
        if (pos != full7chunkLen * count + count * 8 + ConstVar.DataChunkMetaOffset) {
            fail("error pos:" + pos);

        long len = unit.len();
        if (len != count * full7chunkLen + count * 8 + ConstVar.DataChunkMetaOffset) {
            fail("error unit.len" + len);

        FSDataInputStream in = fs.open(path);
        in.seek(len - 8 - 4);
        long metaOffset = in.readLong();
        if (metaOffset != full7chunkLen * count) {
            fail("error metaOffset:" + metaOffset);

        in.seek(len - 8 - 4 - 4);
        int recordNum = in.readInt();
        if (recordNum != count) {
            fail("error recordNum:" + recordNum);

        for (int i = 0; i < recordNum; i++) {
            long offset = in.readLong();
            if (offset != full7chunkLen * i) {
                fail("error offset:" + offset + "i:" + i);
    } catch (IOException e) {
        fail("get IOException:" + e.getMessage());
    } catch (Exception e) {
        fail("get Exception:" + e.getMessage());

From source file:FormatStorageBasicTest.java

License:Open Source License

public void testPersistentUnitNotVar() {
    try {//w  w  w  . j  a  va2 s.c o  m
        Head head = new Head();
        Configuration conf = new Configuration();
        FormatDataFile fd = new FormatDataFile(conf);
        fd.create(prefix + "testPersistentUnitNotVar_tmp", head);

        IndexInfo info = new IndexInfo();
        info.offset = 0;
        Segment seg = new Segment(info, fd);
        Unit unit = new Unit(info, seg);

        Record record = new Record(6);
        record.addValue(new FieldValue((byte) 1, (short) 0));
        record.addValue(new FieldValue((short) 2, (short) 1));
        record.addValue(new FieldValue((int) 3, (short) 2));
        record.addValue(new FieldValue((long) 4, (short) 3));
        record.addValue(new FieldValue((float) 5.5, (short) 4));
        record.addValue(new FieldValue((double) 6.6, (short) 5));

        int count = 100;
        for (int i = 0; i < count; i++) {

        String file = prefix + "testPersistentUnitNotVar";
        Path path = new Path(file);
        FileSystem fs = FileSystem.get(new Configuration());
        FSDataOutputStream out = fs.create(path);

        long pos = out.getPos();
        if (pos != full6chunkLen * count + ConstVar.DataChunkMetaOffset) {
            fail("error pos:" + pos);

        long len = unit.len();
        if (len != count * full6chunkLen + ConstVar.DataChunkMetaOffset) {
            fail("error unit.len" + len);

        FSDataInputStream in = fs.open(path);
        in.seek(len - 8 - 4);
        long metaOffset = in.readLong();
        if (metaOffset != full6chunkLen * count) {
            fail("error metaOffset:" + metaOffset);

        in.seek(len - 8 - 4 - 4);
        int recordNum = in.readInt();
        if (recordNum != count) {
            fail("error recordNum:" + recordNum);
    } catch (IOException e) {
        fail("get IOException:" + e.getMessage());
    } catch (Exception e) {
        fail("get Exception:" + e.getMessage());

From source file:FormatStorageBasicTest.java

License:Open Source License

public void testPersistentSegment() {
    try {// w w w. ja v  a2 s .com
        IndexInfo info = new IndexInfo();
        info.offset = 0;

        Head head = new Head();
        head.setVar((byte) 1);
        Configuration conf = new Configuration();
        FormatDataFile fd = new FormatDataFile(conf);
        fd.create(prefix + "testPersistentSegment_tmp", head);

        String fileName = prefix + "testPersistentSegment";
        Path path = new Path(fileName);
        FileSystem fs = FileSystem.get(new Configuration());
        FSDataOutputStream out = fs.create(path);

        Segment segment = new Segment(info, fd);

        int unitSize = 100;
        for (int i = 0; i < unitSize; i++) {
            IndexInfo indexInfo = new IndexInfo();
            indexInfo.offset = i * 100;
            indexInfo.len = 77;
            indexInfo.beginLine = (i + 1) * 100;
            indexInfo.endLine = (i + 2) * 100;
            indexInfo.idx = i;

            Unit unit = new Unit(indexInfo, segment);
            addRecord2Unit(unit, 100);
            unit.beginLine = (i + 1) * 100;
            unit.endLine = (i + 2) * 100;
            if (unit.len() != 100 * full7chunkLen + 100 * 8 + ConstVar.DataChunkMetaOffset) {
                fail("error unit.len:" + unit.len());

        segment.recordNum = 234;


        if (out.getPos() != fd.confSegmentSize()) {
            System.out.println("seg.len:" + segment.len() + "seg.remain:" + segment.remain() + "index.len"
                    + segment.unitIndex().len());
            fail("error pos:" + out.getPos());

        int unitlen = full7chunkLen * 100 + 8 * 100 + ConstVar.DataChunkMetaOffset;
        FSDataInputStream in = fs.open(path);


        info.offset = 0;
        info.len = segment.len();
        Segment segment2 = new Segment(info, fd);
        if (segment2.recordNum() != 234) {
            fail("error recordnum:" + segment2.recordNum());
        if (segment2.unitNum() != unitSize) {
            fail("error unitNum:" + segment2.unitNum());
        if (segment2.keyIndexOffset() != -1) {
            fail("error key index offset:" + segment2.keyIndexOffset());
        if (segment2.lineIndexOffset() != unitlen * unitSize) {
            fail("error line index offset:" + segment2.lineIndexOffset());
        if (segment2.units().size() != unitSize) {
            fail("error units.size:" + segment2.units().size());

        UnitIndex index = segment2.unitIndex();
        if (index.lineIndexInfos().size() != unitSize) {
            fail("error line unit index size:" + index.lineIndexInfos().size());
        if (index.keyIndexInfos().size() != 0) {
            fail("error key unit index size:" + index.keyIndexInfos().size());

        for (int i = 0; i < unitSize; i++) {
            IndexInfo ii = index.lineIndexInfos().get(i);
            if (ii.beginLine() != (1 + i) * 100) {
                fail("error beginline:" + ii.beginLine() + "i:" + i);
            if (ii.endLine() != (2 + i) * 100) {
                fail("error end line:" + ii.endLine() + "i:" + i);
            if (ii.offset() != i * 100) {
                fail("error offset:" + ii.offset() + "i:" + i);
            if (ii.len != unitlen) {
                fail("error len:" + ii.len() + "i:" + i);
            if (ii.idx() != i) {
                fail("error idx:" + ii.idx() + "i:" + i);
    } catch (IOException e) {
        fail("get IOException:" + e.getMessage());
    } catch (Exception e) {
        fail("get Exception:" + e.getMessage());

From source file:FormatStorageBasicTest.java

License:Open Source License

public void testClose() {
    try {//from w w w .ja v a2 s  .c o  m
        Head head = new Head();
        head.setVar((byte) 1);
        Configuration conf = new Configuration();
        FormatDataFile fd = new FormatDataFile(conf);
        fd.create(prefix + "testClose", head);

        int size = 100 * 10000;

        for (int i = 0; i < size; i++) {
            Record record = new Record(7);
            record.addValue(new FieldValue((byte) 1, (short) 0));
            record.addValue(new FieldValue((short) 2, (short) 1));
            record.addValue(new FieldValue((int) 3, (short) 2));
            record.addValue(new FieldValue((long) 4, (short) 3));
            record.addValue(new FieldValue((float) 5.5, (short) 4));
            record.addValue(new FieldValue((double) 6.6, (short) 5));
            record.addValue(new FieldValue("hello konten", (short) 6));

        if (fd.recordNum() != size) {
            fail("error record num:" + fd.recordNum());
        if (fd.currentSegment().currentUnit() == null) {
            fail("null current unit");
        if (fd.currentSegment() == null) {
            fail("null current seg");
        if (fd.segmentNum() != 0) {
            fail("error segment num:" + fd.segmentNum());

        int headLen = head.len();
        long currentUnitLen = fd.currentSegment().currentUnit().len();
        long segmentLen = fd.currentSegment().len() + currentUnitLen + ConstVar.LineIndexRecordLen;
        long remain = fd.currentSegment().remain();
        int unitNum = fd.currentSegment().unitNum();

        int indexLen = ConstVar.LineIndexRecordLen * fd.segmentNum();
        int metaLen = ConstVar.IndexMetaOffset;

        long fileLen = fd.getFileLen();

        if (fileLen != headLen + segmentLen + indexLen + metaLen) {
            fail("error file len:" + fileLen);

        if (fd.in() != null) {
            fail("in should set null");
        if (fd.out() != null) {
            fail("out should set null");
        if (fd.recordNum() != 0) {
            fail("record num should set 0");
        if (fd.keyIndexOffset != -1) {
            fail("key index offset not -1");
        if (fd.lineIndexOffset != -1) {
            fail("line index offset not -1");
        if (fd.currentOffset != -1) {
            fail("current offset not -1");
        if (fd.hasLoadAllSegmentDone) {
            fail("has load all segment Done not false");

        String fileName = prefix + "testClose";
        Path path = new Path(fileName);
        FileSystem fs = FileSystem.get(new Configuration());
        FSDataInputStream in = fs.open(path);

        long metaOffset = fileLen - ConstVar.IndexMetaOffset;

        int recordNum = in.readInt();
        int segNum = in.readInt();
        long keyIndexOffset = in.readLong();
        long lineIndexOffset = in.readLong();

        if (recordNum != size) {
            fail("error record num:" + recordNum);
        if (segNum != 1) {
            fail("error segNum:" + segNum);
        if (keyIndexOffset != -1) {
            fail("error key index offset:" + keyIndexOffset);
        if (lineIndexOffset != (headLen + segmentLen)) {
            fail("error line index offset:" + lineIndexOffset);

        for (int i = 0; i < segNum; i++) {
            int beginLine = in.readInt();
            int endLine = in.readInt();
            long offset = in.readLong();
            long len = in.readLong();
            int idx = in.readInt();

            if (beginLine != 0) {
                fail("error beginLine:" + beginLine);
            if (endLine != size) {
                fail("error end line:" + endLine);
            if (offset != head.len()) {
                fail("error offset:" + offset);
            long tlen = size * full7chunkLen + size * 8 + ConstVar.DataChunkMetaOffset * (unitNum + 1)
                    + 28 * (unitNum + 1) + 24;
            if (len != tlen) {
                fail("error len:" + len);
    } catch (IOException e) {
        fail("get ioexception:" + e.getMessage());
    } catch (Exception e) {
        fail("get exception:" + e.getMessage());

From source file:LookupPostingsCompressed.java

License:Apache License

 * Runs this tool.//from ww  w .  jav  a 2s.  c om
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX));
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());

    if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(LookupPostingsCompressed.class.getName(), options);

    String indexPath = cmdline.getOptionValue(INDEX);
    String collectionPath = cmdline.getOptionValue(COLLECTION);

    if (collectionPath.endsWith(".gz")) {
        System.out.println("gzipped collection is not seekable: use compressed version!");

    Configuration config = new Configuration();
    FileSystem fs = FileSystem.get(config);
    MapFile.Reader reader = new MapFile.Reader(new Path(indexPath + "/part-r-00000"), config);

    FSDataInputStream collection = fs.open(new Path(collectionPath));
    BufferedReader d = new BufferedReader(new InputStreamReader(collection));

    Text key = new Text();
    PairOfWritables<VIntWritable, BytesWritable> value = new PairOfWritables<VIntWritable, BytesWritable>();

    System.out.println("Looking up postings for the term \"starcross'd\"");

    reader.get(key, value);

    BytesWritable postings = value.getRightElement();
    ByteArrayInputStream buffer = new ByteArrayInputStream(postings.copyBytes());
    DataInputStream in = new DataInputStream(buffer);
    int OFFSET = 0;
    int count;
    while (in.available() != 0) {
        OFFSET = OFFSET + WritableUtils.readVInt(in);
        count = WritableUtils.readVInt(in);
        System.out.print("(" + OFFSET + ", " + count + ")");

    OFFSET = 0;
    reader.get(key, value);
    postings = value.getRightElement();
    buffer = new ByteArrayInputStream(postings.copyBytes());
    in = new DataInputStream(buffer);
    System.out.println("Complete postings list for 'gold': (" + value.getLeftElement() + ", [");
    while (in.available() != 0) {
        OFFSET = OFFSET + WritableUtils.readVInt(in);
        count = WritableUtils.readVInt(in);
        System.out.print("(" + OFFSET + ", " + count + ")");
        System.out.print(", ");

    Int2IntFrequencyDistribution goldHist = new Int2IntFrequencyDistributionEntry();

    OFFSET = 0;
    while (in.available() != 0) {
        OFFSET = OFFSET + WritableUtils.readVInt(in);
        count = WritableUtils.readVInt(in);

    System.out.println("histogram of tf values for gold");
    for (PairOfInts pair : goldHist) {
        System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement());


    reader.get(key, value);
    postings = value.getRightElement();
    buffer = new ByteArrayInputStream(postings.copyBytes());
    in = new DataInputStream(buffer);
    System.out.println("Complete postings list for 'silver': (" + value.getLeftElement() + ", [");
    while (in.available() != 0) {
        OFFSET = OFFSET + WritableUtils.readVInt(in);
        count = WritableUtils.readVInt(in);
        System.out.print("(" + OFFSET + ", " + count + ")");
        System.out.print(", ");

    Int2IntFrequencyDistribution silverHist = new Int2IntFrequencyDistributionEntry();

    OFFSET = 0;
    while (in.available() != 0) {
        OFFSET = OFFSET + WritableUtils.readVInt(in);
        count = WritableUtils.readVInt(in);

    System.out.println("histogram of tf values for silver");
    for (PairOfInts pair : goldHist) {
        System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement());


    Writable w = reader.get(key, value);

    if (w == null) {
        System.out.println("the term bronze does not appear in the collection");


    return 0;

From source file:LookupPostingsCompressed1.java

License:Apache License

@SuppressWarnings({ "static-access" })
public static void main(String[] args) throws IOException {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX));
    options.addOption(//from   ww w . ja  v  a  2s. co  m
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());

    if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(LookupPostingsCompressed1.class.getName(), options);

    String indexPath = cmdline.getOptionValue(INDEX);
    String collectionPath = cmdline.getOptionValue(COLLECTION);

    if (collectionPath.endsWith(".gz")) {
        System.out.println("gzipped collection is not seekable: use compressed version!");

    Configuration config = new Configuration();
    FileSystem fs = FileSystem.get(config);
    MapFile.Reader reader = new MapFile.Reader(new Path(indexPath + "/part-r-00000"), config);

    FSDataInputStream collection = fs.open(new Path(collectionPath));
    BufferedReader d = new BufferedReader(new InputStreamReader(collection));

    Text key = new Text();
    PairOfWritables<VIntWritable, ArrayListWritable<PairOfVInts>> value = new PairOfWritables<VIntWritable, ArrayListWritable<PairOfVInts>>();

    System.out.println("Looking up postings for the term \"starcross'd\"");

    reader.get(key, value);

    ArrayListWritable<PairOfVInts> postings = value.getRightElement();
    for (PairOfVInts pair : postings) {

    reader.get(key, value);
    System.out.println("Complete postings list for 'gold': " + value);

    Int2IntFrequencyDistribution goldHist = new Int2IntFrequencyDistributionEntry();
    postings = value.getRightElement();
    for (PairOfVInts pair : postings) {

    System.out.println("histogram of tf values for gold");
    for (PairOfInts pair : goldHist) {
        System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement());

    reader.get(key, value);
    System.out.println("Complete postings list for 'silver': " + value);

    Int2IntFrequencyDistribution silverHist = new Int2IntFrequencyDistributionEntry();
    postings = value.getRightElement();
    for (PairOfVInts pair : postings) {

    System.out.println("histogram of tf values for silver");
    for (PairOfInts pair : silverHist) {
        System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement());

    Writable w = reader.get(key, value);

    if (w == null) {
        System.out.println("the term bronze does not appear in the collection");


From source file:ReadAllTest.java

License:Apache License

private static int readAt(FSDataInputStream fis, long at) throws IOException {
    return fis.read();

From source file:LookupPostings.java

License:Apache License

 * Runs this tool./*from   ww  w. j a  va 2  s  .  c o m*/
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX));
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());

    if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(LookupPostings.class.getName(), options);

    String indexPath = cmdline.getOptionValue(INDEX);
    String collectionPath = cmdline.getOptionValue(COLLECTION);

    if (collectionPath.endsWith(".gz")) {
        System.out.println("gzipped collection is not seekable: use compressed version!");

    Configuration config = new Configuration();
    FileSystem fs = FileSystem.get(config);
    MapFile.Reader reader = new MapFile.Reader(new Path(indexPath + "/part-r-00000"), config);

    FSDataInputStream collection = fs.open(new Path(collectionPath));
    BufferedReader d = new BufferedReader(new InputStreamReader(collection));

    Text key = new Text();
    PairOfWritables<IntWritable, ArrayListWritable<PairOfInts>> value = new PairOfWritables<IntWritable, ArrayListWritable<PairOfInts>>();

    System.out.println("Looking up postings for the term \"starcross'd\"");

    reader.get(key, value);

    ArrayListWritable<PairOfInts> postings = value.getRightElement();
    for (PairOfInts pair : postings) {

    reader.get(key, value);
    System.out.println("Complete postings list for 'gold': " + value);

    Int2IntFrequencyDistribution goldHist = new Int2IntFrequencyDistributionEntry();
    postings = value.getRightElement();
    for (PairOfInts pair : postings) {

    System.out.println("histogram of tf values for gold");
    for (PairOfInts pair : goldHist) {
        System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement());

    reader.get(key, value);
    System.out.println("Complete postings list for 'silver': " + value);

    Int2IntFrequencyDistribution silverHist = new Int2IntFrequencyDistributionEntry();
    postings = value.getRightElement();
    for (PairOfInts pair : postings) {

    System.out.println("histogram of tf values for silver");
    for (PairOfInts pair : silverHist) {
        System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement());

    Writable w = reader.get(key, value);

    if (w == null) {
        System.out.println("the term bronze does not appear in the collection");


    return 0;