Example usage for org.apache.hadoop.fs FSDataInputStream seek

List of usage examples for org.apache.hadoop.fs FSDataInputStream seek

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FSDataInputStream seek.

Prototype

@Override
public void seek(long desired) throws IOException 

Source Link

Document

Seek to the given offset.

Usage

From source file:FormatStorageBasicTest.java

License:Open Source License

public void testChunkToRecord() {
    try {// w w w .ja  v a  2 s . com
        String fileName = prefix + "testChunkToRecord";
        Path path = new Path(fileName);
        FileSystem fs = FileSystem.get(new Configuration());
        FSDataOutputStream out = fs.create(path);

        short fieldNum = 3;
        Record record = new Record(fieldNum);

        byte[] lb = new byte[ConstVar.Sizeof_Long];
        long l = 4;
        Util.long2bytes(lb, l);
        FieldValue fieldValue4 = new FieldValue(ConstVar.FieldType_Long, ConstVar.Sizeof_Long, lb, (short) 13);
        record.addValue(fieldValue4);

        byte[] fb = new byte[ConstVar.Sizeof_Float];
        float f = (float) 5.5;
        Util.float2bytes(fb, f);
        FieldValue fieldValue5 = new FieldValue(ConstVar.FieldType_Float, ConstVar.Sizeof_Float, fb,
                (short) 14);
        record.addValue(fieldValue5);

        String str = "hello konten";
        FieldValue fieldValue7 = new FieldValue(ConstVar.FieldType_String, (short) str.length(), str.getBytes(),
                (short) 16);
        record.addValue(fieldValue7);

        DataChunk chunk = new DataChunk(record);

        out.write(chunk.values, 0, (int) chunk.len);

        if (out.getPos() != chunk.len) {
            fail("error pos:" + out.getPos() + "chunk.len:" + chunk.len);
        }
        out.close();

        FSDataInputStream in = fs.open(path);

        FixedBitSet bitSet = new FixedBitSet(fieldNum);
        in.read(bitSet.bytes(), 0, bitSet.size());
        for (int i = 0; i < fieldNum; i++) {
            if (!bitSet.get(i)) {
                fail("should set:" + i);
            }
        }

        byte[] value = new byte[8];
        in.readFully(value);
        long lv = Util.bytes2long(value, 0, 8);
        if (lv != 4) {
            fail("error long value:" + lv);
        }

        value = new byte[4];
        in.readFully(value);
        float fv = Util.bytes2float(value, 0);
        if (fv != 5.5) {
            fail("error float value:" + fv);
        }

        short strLen = in.readShort();
        if (strLen != str.length()) {
            fail("error strLen:" + strLen);
        }
        value = new byte[strLen];
        in.readFully(value);
        String strv = new String(value);
        if (!strv.equals(str)) {
            fail("error strv:" + strv);
        }

        FieldMap fieldMap = new FieldMap();
        fieldMap.addField(new Field(ConstVar.FieldType_Long, 8, (short) 13));
        fieldMap.addField(new Field(ConstVar.FieldType_Float, 4, (short) 14));
        fieldMap.addField(new Field(ConstVar.FieldType_String, 8, (short) 16));

        in.seek(0);
        int valuelen = 1 + 8 + 4 + 2 + 12;
        DataChunk chunk2 = new DataChunk(fieldNum);

        ArrayList<byte[]> arrayList = new ArrayList<byte[]>(64);
        DataInputBuffer inputBuffer = new DataInputBuffer();
        byte[] buf = new byte[valuelen];
        in.read(buf, 0, valuelen);
        inputBuffer.reset(buf, 0, valuelen);
        chunk2.unpersistent(0, valuelen, inputBuffer);
        Record record2 = chunk2.toRecord(fieldMap, true, arrayList);

        bitSet = chunk2.fixedBitSet;
        if (bitSet.length() != (fieldNum / 8 + 1) * 8) {
            fail("bitSet.len:" + bitSet.length());
        }

        for (int i = 0; i < fieldNum; i++) {
            if (!bitSet.get(i)) {
                fail("bitSet should set:" + i);
            }
        }
        record = record2;

        int index = 0;
        byte type = record2.fieldValues().get(index).type;
        int len = record2.fieldValues().get(index).len;
        short idx = record2.fieldValues().get(index).idx;
        value = record2.fieldValues().get(index).value;
        if (len != ConstVar.Sizeof_Long) {
            fail("error len:" + len);
        }
        if (type != ConstVar.FieldType_Long) {
            fail("error fieldType:" + type);
        }
        if (idx != 13) {
            fail("error idx:" + idx);
        }
        if (value == null) {
            fail("error value null");
        }

        {
        }
        lv = Util.bytes2long(value, 0, len);
        if (lv != 4) {
            fail("error long value:" + lv);
        }

        index = 1;
        type = record.fieldValues().get(index).type;
        len = record.fieldValues().get(index).len;
        idx = record.fieldValues().get(index).idx;
        value = record.fieldValues().get(index).value;

        if (len != ConstVar.Sizeof_Float) {
            fail("error len:" + len);
        }
        if (type != ConstVar.FieldType_Float) {
            fail("error fieldType:" + type);
        }
        if (idx != 14) {
            fail("error idx:" + idx);
        }
        if (value == null) {
            fail("error value null");
        }
        {
        }
        fv = Util.bytes2float(value, 0);
        if (fv != 5.5) {
            fail("error float value:" + fv);
        }

        index = 2;
        type = record.fieldValues().get(index).type;
        len = record.fieldValues().get(index).len;
        idx = record.fieldValues().get(index).idx;
        value = record.fieldValues().get(index).value;

        str = "hello konten";
        if (len != str.length()) {
            fail("error len:" + len);
        }
        if (type != ConstVar.FieldType_String) {
            fail("error fieldType:" + type);
        }
        if (idx != 16) {
            fail("error idx:" + idx);
        }
        if (value == null) {
            fail("error value null");
        }
        {
        }
        String sv = new String(value, 0, len);
        if (!str.equals(sv)) {
            fail("error string value:" + sv);
        }

    } catch (Exception e) {
        fail("should not exception:" + e.getMessage());
    }
}

From source file:FormatStorageBasicTest.java

License:Open Source License

public void testChunkToRecordNull() {
    try {/*  w  w  w . j  av a  2  s  . co  m*/
        String fileName = prefix + "testChunkToRecord2";
        Path path = new Path(fileName);
        FileSystem fs = FileSystem.get(new Configuration());
        FSDataOutputStream out = fs.create(path);

        short fieldNum = 3;
        Record record = new Record(fieldNum);

        byte[] lb = new byte[ConstVar.Sizeof_Long];
        long l = 4;
        Util.long2bytes(lb, l);
        FieldValue fieldValue4 = new FieldValue(ConstVar.FieldType_Long, ConstVar.Sizeof_Long, lb, (short) 13);
        record.addValue(fieldValue4);

        FieldValue fieldValue5 = new FieldValue(ConstVar.FieldType_Float, ConstVar.Sizeof_Float, null,
                (short) 14);
        record.addValue(fieldValue5);

        String str = "hello konten";
        FieldValue fieldValue7 = new FieldValue(ConstVar.FieldType_String, (short) str.length(), str.getBytes(),
                (short) 16);
        record.addValue(fieldValue7);

        DataChunk chunk = new DataChunk(record);

        out.write(chunk.values, 0, (int) chunk.len);

        if (out.getPos() != chunk.len) {
            fail("error pos:" + out.getPos() + "chunk.len:" + chunk.len);
        }
        out.close();

        FSDataInputStream in = fs.open(path);

        FixedBitSet bitSet = new FixedBitSet(fieldNum);
        in.read(bitSet.bytes(), 0, bitSet.size());

        for (int i = 0; i < fieldNum; i++) {
            if (bitSet.get(1)) {
                fail("shoud not set");
            }

            if (!bitSet.get(i) && i != 1) {
                fail("should set:" + i);
            }
        }

        byte[] value = new byte[8];
        in.readFully(value);
        long lv = Util.bytes2long(value, 0, 8);
        if (lv != 4) {
            fail("error long value:" + lv);
        }

        in.readFloat();

        short strLen = in.readShort();
        if (strLen != str.length()) {
            fail("error strLen:" + strLen);
        }
        value = new byte[strLen];
        in.readFully(value);
        String strv = new String(value, 0, strLen);
        if (!strv.equals(str)) {
            fail("error strv:" + strv);
        }

        FieldMap fieldMap = new FieldMap();
        fieldMap.addField(new Field(ConstVar.FieldType_Long, 8, (short) 13));
        fieldMap.addField(new Field(ConstVar.FieldType_Float, 4, (short) 14));
        fieldMap.addField(new Field(ConstVar.FieldType_String, 8, (short) 16));

        in.seek(0);
        int valuelen = 1 + 8 + 4 + 2 + 12;
        DataChunk chunk2 = new DataChunk(fieldNum);

        ArrayList<byte[]> arrayList = new ArrayList<byte[]>(64);

        DataInputBuffer inputBuffer = new DataInputBuffer();
        byte[] buf = new byte[valuelen];
        in.read(buf, 0, valuelen);
        inputBuffer.reset(buf, 0, valuelen);
        chunk2.unpersistent(0, valuelen, inputBuffer);
        Record record2 = chunk2.toRecord(fieldMap, true, arrayList);

        bitSet = chunk2.fixedBitSet;

        for (int i = 0; i < fieldNum; i++) {
            if (bitSet.get(1)) {
                fail("shoud not set");
            }

            if (!bitSet.get(i) && i != 1) {
                fail("should set:" + i);
            }
        }
        record = record2;

        int index = 0;
        byte type = record2.fieldValues().get(index).type;
        int len = record2.fieldValues().get(index).len;
        short idx = record2.fieldValues().get(index).idx;
        value = record2.fieldValues().get(index).value;
        if (len != ConstVar.Sizeof_Long) {
            fail("error len:" + len);
        }
        if (type != ConstVar.FieldType_Long) {
            fail("error fieldType:" + type);
        }
        if (idx != 13) {
            fail("error idx:" + idx);
        }
        if (value == null) {
            fail("error value null");
        }
        {
        }
        lv = Util.bytes2long(value, 0, 8);
        if (lv != 4) {
            fail("error long value:" + lv);
        }

        index = 1;
        type = record.fieldValues().get(index).type;
        len = record.fieldValues().get(index).len;
        idx = record.fieldValues().get(index).idx;
        value = record.fieldValues().get(index).value;

        if (len != ConstVar.Sizeof_Float) {
            fail("error len:" + len);
        }
        if (type != ConstVar.FieldType_Float) {
            fail("error fieldType:" + type);
        }
        if (idx != 14) {
            fail("error idx:" + idx);
        }
        if (value != null) {
            fail("error value not null");
        }

        index = 2;
        type = record.fieldValues().get(index).type;
        len = record.fieldValues().get(index).len;
        idx = record.fieldValues().get(index).idx;
        value = record.fieldValues().get(index).value;

        str = "hello konten";
        if (len != str.length()) {
            fail("error len:" + len);
        }
        if (type != ConstVar.FieldType_String) {
            fail("error fieldType:" + type);
        }
        if (idx != 16) {
            fail("error idx:" + idx);
        }
        if (value == null) {
            fail("error value null");
        }
        {
        }
        String sv = new String(value, 0, len);
        if (!str.equals(sv)) {
            fail("error string value:" + sv);
        }

    } catch (Exception e) {
        e.printStackTrace();
        fail("should not exception:" + e.getMessage());
    }
}

From source file:FormatStorageBasicTest.java

License:Open Source License

public void testPersistentUnitVar() {
    try {/*  w ww. ja  v a 2s .  c o  m*/
        Head head = new Head();
        head.setVar((byte) 1);
        Configuration conf = new Configuration();
        FormatDataFile fd = new FormatDataFile(conf);
        fd.create(prefix + "testPersistentUnitVar_tmp", head);

        IndexInfo info = new IndexInfo();
        info.offset = 0;
        Segment seg = new Segment(info, fd);
        Unit unit = new Unit(info, seg);

        Record record = new Record(7);
        record.addValue(new FieldValue((byte) 1, (short) 0));
        record.addValue(new FieldValue((short) 2, (short) 1));
        record.addValue(new FieldValue((int) 3, (short) 2));
        record.addValue(new FieldValue((long) 4, (short) 3));
        record.addValue(new FieldValue((float) 5.5, (short) 4));
        record.addValue(new FieldValue((double) 6.6, (short) 5));
        record.addValue(new FieldValue("hello konten", (short) 6));

        int count = 100;
        for (int i = 0; i < count; i++) {
            unit.addRecord(record);
        }

        String file = prefix + "testPersistentUnitVar";
        Path path = new Path(file);
        FileSystem fs = FileSystem.get(new Configuration());
        FSDataOutputStream out = fs.create(path);

        unit.persistent(out);
        long pos = out.getPos();
        if (pos != full7chunkLen * count + count * 8 + ConstVar.DataChunkMetaOffset) {
            fail("error pos:" + pos);
        }
        out.close();

        long len = unit.len();
        if (len != count * full7chunkLen + count * 8 + ConstVar.DataChunkMetaOffset) {
            fail("error unit.len" + len);
        }

        FSDataInputStream in = fs.open(path);
        in.seek(len - 8 - 4);
        long metaOffset = in.readLong();
        if (metaOffset != full7chunkLen * count) {
            fail("error metaOffset:" + metaOffset);
        }

        in.seek(len - 8 - 4 - 4);
        int recordNum = in.readInt();
        if (recordNum != count) {
            fail("error recordNum:" + recordNum);
        }

        in.seek(metaOffset);
        for (int i = 0; i < recordNum; i++) {
            long offset = in.readLong();
            if (offset != full7chunkLen * i) {
                fail("error offset:" + offset + "i:" + i);
            }
        }
    } catch (IOException e) {
        e.printStackTrace();
        fail("get IOException:" + e.getMessage());
    } catch (Exception e) {
        e.printStackTrace();
        fail("get Exception:" + e.getMessage());
    }
}

From source file:FormatStorageBasicTest.java

License:Open Source License

public void testPersistentUnitNotVar() {
    try {//w  w  w  . j  a  va2 s.c o  m
        Head head = new Head();
        Configuration conf = new Configuration();
        FormatDataFile fd = new FormatDataFile(conf);
        fd.create(prefix + "testPersistentUnitNotVar_tmp", head);

        IndexInfo info = new IndexInfo();
        info.offset = 0;
        Segment seg = new Segment(info, fd);
        Unit unit = new Unit(info, seg);

        Record record = new Record(6);
        record.addValue(new FieldValue((byte) 1, (short) 0));
        record.addValue(new FieldValue((short) 2, (short) 1));
        record.addValue(new FieldValue((int) 3, (short) 2));
        record.addValue(new FieldValue((long) 4, (short) 3));
        record.addValue(new FieldValue((float) 5.5, (short) 4));
        record.addValue(new FieldValue((double) 6.6, (short) 5));

        int count = 100;
        for (int i = 0; i < count; i++) {
            unit.addRecord(record);
        }

        String file = prefix + "testPersistentUnitNotVar";
        Path path = new Path(file);
        FileSystem fs = FileSystem.get(new Configuration());
        FSDataOutputStream out = fs.create(path);

        unit.persistent(out);
        long pos = out.getPos();
        if (pos != full6chunkLen * count + ConstVar.DataChunkMetaOffset) {
            fail("error pos:" + pos);
        }
        out.close();

        long len = unit.len();
        if (len != count * full6chunkLen + ConstVar.DataChunkMetaOffset) {
            fail("error unit.len" + len);
        }

        FSDataInputStream in = fs.open(path);
        in.seek(len - 8 - 4);
        long metaOffset = in.readLong();
        if (metaOffset != full6chunkLen * count) {
            fail("error metaOffset:" + metaOffset);
        }

        in.seek(len - 8 - 4 - 4);
        int recordNum = in.readInt();
        if (recordNum != count) {
            fail("error recordNum:" + recordNum);
        }
    } catch (IOException e) {
        e.printStackTrace();
        fail("get IOException:" + e.getMessage());
    } catch (Exception e) {
        e.printStackTrace();
        fail("get Exception:" + e.getMessage());
    }
}

From source file:FormatStorageBasicTest.java

License:Open Source License

public void testPersistentSegment() {
    try {// w w w. ja v  a2 s .com
        IndexInfo info = new IndexInfo();
        info.offset = 0;

        Head head = new Head();
        head.setVar((byte) 1);
        Configuration conf = new Configuration();
        FormatDataFile fd = new FormatDataFile(conf);
        fd.create(prefix + "testPersistentSegment_tmp", head);

        String fileName = prefix + "testPersistentSegment";
        Path path = new Path(fileName);
        FileSystem fs = FileSystem.get(new Configuration());
        FSDataOutputStream out = fs.create(path);

        fd.setOut(out);
        Segment segment = new Segment(info, fd);

        int unitSize = 100;
        for (int i = 0; i < unitSize; i++) {
            IndexInfo indexInfo = new IndexInfo();
            indexInfo.offset = i * 100;
            indexInfo.len = 77;
            indexInfo.beginLine = (i + 1) * 100;
            indexInfo.endLine = (i + 2) * 100;
            indexInfo.idx = i;

            Unit unit = new Unit(indexInfo, segment);
            addRecord2Unit(unit, 100);
            unit.beginLine = (i + 1) * 100;
            unit.endLine = (i + 2) * 100;
            segment.addUnit(unit);
            if (unit.len() != 100 * full7chunkLen + 100 * 8 + ConstVar.DataChunkMetaOffset) {
                fail("error unit.len:" + unit.len());
            }
        }

        segment.recordNum = 234;
        segment.setBeginLine(1);
        segment.setEndLine(235);

        segment.persistent(out);

        if (out.getPos() != fd.confSegmentSize()) {
            System.out.println("seg.len:" + segment.len() + "seg.remain:" + segment.remain() + "index.len"
                    + segment.unitIndex().len());
            fail("error pos:" + out.getPos());
        }
        out.close();

        int unitlen = full7chunkLen * 100 + 8 * 100 + ConstVar.DataChunkMetaOffset;
        FSDataInputStream in = fs.open(path);

        in.seek(segment.lineIndexOffset());

        info.offset = 0;
        info.len = segment.len();
        fd.setWorkStatus(ConstVar.WS_Read);
        Segment segment2 = new Segment(info, fd);
        segment2.unpersistentUnitIndex(in);
        if (segment2.recordNum() != 234) {
            fail("error recordnum:" + segment2.recordNum());
        }
        if (segment2.unitNum() != unitSize) {
            fail("error unitNum:" + segment2.unitNum());
        }
        if (segment2.keyIndexOffset() != -1) {
            fail("error key index offset:" + segment2.keyIndexOffset());
        }
        if (segment2.lineIndexOffset() != unitlen * unitSize) {
            fail("error line index offset:" + segment2.lineIndexOffset());
        }
        if (segment2.units().size() != unitSize) {
            fail("error units.size:" + segment2.units().size());
        }

        UnitIndex index = segment2.unitIndex();
        if (index.lineIndexInfos().size() != unitSize) {
            fail("error line unit index size:" + index.lineIndexInfos().size());
        }
        if (index.keyIndexInfos().size() != 0) {
            fail("error key unit index size:" + index.keyIndexInfos().size());
        }

        for (int i = 0; i < unitSize; i++) {
            IndexInfo ii = index.lineIndexInfos().get(i);
            if (ii.beginLine() != (1 + i) * 100) {
                fail("error beginline:" + ii.beginLine() + "i:" + i);
            }
            if (ii.endLine() != (2 + i) * 100) {
                fail("error end line:" + ii.endLine() + "i:" + i);
            }
            if (ii.offset() != i * 100) {
                fail("error offset:" + ii.offset() + "i:" + i);
            }
            if (ii.len != unitlen) {
                fail("error len:" + ii.len() + "i:" + i);
            }
            if (ii.idx() != i) {
                fail("error idx:" + ii.idx() + "i:" + i);
            }
        }
    } catch (IOException e) {
        e.printStackTrace();
        fail("get IOException:" + e.getMessage());
    } catch (Exception e) {
        e.printStackTrace();
        fail("get Exception:" + e.getMessage());
    }
}

From source file:FormatStorageBasicTest.java

License:Open Source License

public void testClose() {
    try {//from w w w .ja v a2 s  .c o  m
        Head head = new Head();
        head.setVar((byte) 1);
        Configuration conf = new Configuration();
        FormatDataFile fd = new FormatDataFile(conf);
        fd.create(prefix + "testClose", head);

        int size = 100 * 10000;

        for (int i = 0; i < size; i++) {
            Record record = new Record(7);
            record.addValue(new FieldValue((byte) 1, (short) 0));
            record.addValue(new FieldValue((short) 2, (short) 1));
            record.addValue(new FieldValue((int) 3, (short) 2));
            record.addValue(new FieldValue((long) 4, (short) 3));
            record.addValue(new FieldValue((float) 5.5, (short) 4));
            record.addValue(new FieldValue((double) 6.6, (short) 5));
            record.addValue(new FieldValue("hello konten", (short) 6));
            fd.addRecord(record);
        }

        if (fd.recordNum() != size) {
            fail("error record num:" + fd.recordNum());
        }
        if (fd.currentSegment().currentUnit() == null) {
            fail("null current unit");
        }
        if (fd.currentSegment() == null) {
            fail("null current seg");
        }
        if (fd.segmentNum() != 0) {
            fail("error segment num:" + fd.segmentNum());
        }

        int headLen = head.len();
        long currentUnitLen = fd.currentSegment().currentUnit().len();
        long segmentLen = fd.currentSegment().len() + currentUnitLen + ConstVar.LineIndexRecordLen;
        long remain = fd.currentSegment().remain();
        int unitNum = fd.currentSegment().unitNum();
        fd.close();

        int indexLen = ConstVar.LineIndexRecordLen * fd.segmentNum();
        int metaLen = ConstVar.IndexMetaOffset;

        long fileLen = fd.getFileLen();

        if (fileLen != headLen + segmentLen + indexLen + metaLen) {
            fail("error file len:" + fileLen);
        }

        if (fd.in() != null) {
            fail("in should set null");
        }
        if (fd.out() != null) {
            fail("out should set null");
        }
        if (fd.recordNum() != 0) {
            fail("record num should set 0");
        }
        if (fd.keyIndexOffset != -1) {
            fail("key index offset not -1");
        }
        if (fd.lineIndexOffset != -1) {
            fail("line index offset not -1");
        }
        if (fd.currentOffset != -1) {
            fail("current offset not -1");
        }
        if (fd.hasLoadAllSegmentDone) {
            fail("has load all segment Done not false");
        }

        String fileName = prefix + "testClose";
        Path path = new Path(fileName);
        FileSystem fs = FileSystem.get(new Configuration());
        FSDataInputStream in = fs.open(path);

        long metaOffset = fileLen - ConstVar.IndexMetaOffset;
        in.seek(metaOffset);

        int recordNum = in.readInt();
        int segNum = in.readInt();
        long keyIndexOffset = in.readLong();
        long lineIndexOffset = in.readLong();

        if (recordNum != size) {
            fail("error record num:" + recordNum);
        }
        if (segNum != 1) {
            fail("error segNum:" + segNum);
        }
        if (keyIndexOffset != -1) {
            fail("error key index offset:" + keyIndexOffset);
        }
        if (lineIndexOffset != (headLen + segmentLen)) {
            fail("error line index offset:" + lineIndexOffset);
        }

        in.seek(lineIndexOffset);
        for (int i = 0; i < segNum; i++) {
            int beginLine = in.readInt();
            int endLine = in.readInt();
            long offset = in.readLong();
            long len = in.readLong();
            int idx = in.readInt();

            if (beginLine != 0) {
                fail("error beginLine:" + beginLine);
            }
            if (endLine != size) {
                fail("error end line:" + endLine);
            }
            if (offset != head.len()) {
                fail("error offset:" + offset);
            }
            long tlen = size * full7chunkLen + size * 8 + ConstVar.DataChunkMetaOffset * (unitNum + 1)
                    + 28 * (unitNum + 1) + 24;
            if (len != tlen) {
                fail("error len:" + len);
            }
        }
    } catch (IOException e) {
        e.printStackTrace();
        fail("get ioexception:" + e.getMessage());
    } catch (Exception e) {
        e.printStackTrace();
        fail("get exception:" + e.getMessage());
    }
}

From source file:LookupPostingsCompressed.java

License:Apache License

/**
 * Runs this tool.//from ww  w .  jav  a 2s.  c om
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(LookupPostingsCompressed.class.getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.exit(-1);
    }

    String indexPath = cmdline.getOptionValue(INDEX);
    String collectionPath = cmdline.getOptionValue(COLLECTION);

    if (collectionPath.endsWith(".gz")) {
        System.out.println("gzipped collection is not seekable: use compressed version!");
        System.exit(-1);
    }

    Configuration config = new Configuration();
    FileSystem fs = FileSystem.get(config);
    MapFile.Reader reader = new MapFile.Reader(new Path(indexPath + "/part-r-00000"), config);

    FSDataInputStream collection = fs.open(new Path(collectionPath));
    BufferedReader d = new BufferedReader(new InputStreamReader(collection));

    Text key = new Text();
    PairOfWritables<VIntWritable, BytesWritable> value = new PairOfWritables<VIntWritable, BytesWritable>();

    System.out.println("Looking up postings for the term \"starcross'd\"");
    key.set("starcross'd");

    reader.get(key, value);

    BytesWritable postings = value.getRightElement();
    ByteArrayInputStream buffer = new ByteArrayInputStream(postings.copyBytes());
    DataInputStream in = new DataInputStream(buffer);
    int OFFSET = 0;
    int count;
    while (in.available() != 0) {
        OFFSET = OFFSET + WritableUtils.readVInt(in);
        count = WritableUtils.readVInt(in);
        System.out.print("(" + OFFSET + ", " + count + ")");
        collection.seek(OFFSET);
        System.out.println(d.readLine());
    }

    OFFSET = 0;
    key.set("gold");
    reader.get(key, value);
    postings = value.getRightElement();
    buffer = new ByteArrayInputStream(postings.copyBytes());
    in = new DataInputStream(buffer);
    System.out.println("Complete postings list for 'gold': (" + value.getLeftElement() + ", [");
    while (in.available() != 0) {
        OFFSET = OFFSET + WritableUtils.readVInt(in);
        count = WritableUtils.readVInt(in);
        System.out.print("(" + OFFSET + ", " + count + ")");
        //collection.seek(OFFSET);
        //System.out.println(d.readLine());
        System.out.print(", ");
    }
    System.out.print("])\n");

    Int2IntFrequencyDistribution goldHist = new Int2IntFrequencyDistributionEntry();
    buffer.reset();

    OFFSET = 0;
    while (in.available() != 0) {
        OFFSET = OFFSET + WritableUtils.readVInt(in);
        count = WritableUtils.readVInt(in);
        goldHist.increment(count);
    }

    System.out.println("histogram of tf values for gold");
    for (PairOfInts pair : goldHist) {
        System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement());
    }

    buffer.close();
    //Silver

    key.set("silver");
    reader.get(key, value);
    postings = value.getRightElement();
    buffer = new ByteArrayInputStream(postings.copyBytes());
    in = new DataInputStream(buffer);
    System.out.println("Complete postings list for 'silver': (" + value.getLeftElement() + ", [");
    while (in.available() != 0) {
        OFFSET = OFFSET + WritableUtils.readVInt(in);
        count = WritableUtils.readVInt(in);
        System.out.print("(" + OFFSET + ", " + count + ")");
        //collection.seek(OFFSET);
        //System.out.println(d.readLine());
        System.out.print(", ");
    }
    System.out.print("])\n");

    Int2IntFrequencyDistribution silverHist = new Int2IntFrequencyDistributionEntry();
    buffer.reset();

    OFFSET = 0;
    while (in.available() != 0) {
        OFFSET = OFFSET + WritableUtils.readVInt(in);
        count = WritableUtils.readVInt(in);
        silverHist.increment(count);
    }

    System.out.println("histogram of tf values for silver");
    for (PairOfInts pair : goldHist) {
        System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement());
    }

    buffer.close();

    key.set("bronze");
    Writable w = reader.get(key, value);

    if (w == null) {
        System.out.println("the term bronze does not appear in the collection");
    }

    collection.close();
    reader.close();

    return 0;
}

From source file:LookupPostingsCompressed1.java

License:Apache License

@SuppressWarnings({ "static-access" })
public static void main(String[] args) throws IOException {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX));
    options.addOption(//from   ww w . ja  v  a  2s. co  m
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(LookupPostingsCompressed1.class.getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.exit(-1);
    }

    String indexPath = cmdline.getOptionValue(INDEX);
    String collectionPath = cmdline.getOptionValue(COLLECTION);

    if (collectionPath.endsWith(".gz")) {
        System.out.println("gzipped collection is not seekable: use compressed version!");
        System.exit(-1);
    }

    Configuration config = new Configuration();
    FileSystem fs = FileSystem.get(config);
    MapFile.Reader reader = new MapFile.Reader(new Path(indexPath + "/part-r-00000"), config);

    FSDataInputStream collection = fs.open(new Path(collectionPath));
    BufferedReader d = new BufferedReader(new InputStreamReader(collection));

    Text key = new Text();
    PairOfWritables<VIntWritable, ArrayListWritable<PairOfVInts>> value = new PairOfWritables<VIntWritable, ArrayListWritable<PairOfVInts>>();

    System.out.println("Looking up postings for the term \"starcross'd\"");
    key.set("starcross'd");

    reader.get(key, value);

    ArrayListWritable<PairOfVInts> postings = value.getRightElement();
    for (PairOfVInts pair : postings) {
        System.out.println(pair);
        collection.seek(pair.getLeftElement());
        System.out.println(d.readLine());
    }

    key.set("gold");
    reader.get(key, value);
    System.out.println("Complete postings list for 'gold': " + value);

    Int2IntFrequencyDistribution goldHist = new Int2IntFrequencyDistributionEntry();
    postings = value.getRightElement();
    for (PairOfVInts pair : postings) {
        goldHist.increment(pair.getRightElement());
    }

    System.out.println("histogram of tf values for gold");
    for (PairOfInts pair : goldHist) {
        System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement());
    }

    key.set("silver");
    reader.get(key, value);
    System.out.println("Complete postings list for 'silver': " + value);

    Int2IntFrequencyDistribution silverHist = new Int2IntFrequencyDistributionEntry();
    postings = value.getRightElement();
    for (PairOfVInts pair : postings) {
        silverHist.increment(pair.getRightElement());
    }

    System.out.println("histogram of tf values for silver");
    for (PairOfInts pair : silverHist) {
        System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement());
    }

    key.set("bronze");
    Writable w = reader.get(key, value);

    if (w == null) {
        System.out.println("the term bronze does not appear in the collection");
    }

    collection.close();
    reader.close();
}

From source file:ReadAllTest.java

License:Apache License

private static int readAt(FSDataInputStream fis, long at) throws IOException {
    fis.seek(at);
    return fis.read();
}

From source file:LookupPostings.java

License:Apache License

/**
 * Runs this tool./*from   ww  w. j a  va 2  s  .  c o m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(LookupPostings.class.getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.exit(-1);
    }

    String indexPath = cmdline.getOptionValue(INDEX);
    String collectionPath = cmdline.getOptionValue(COLLECTION);

    if (collectionPath.endsWith(".gz")) {
        System.out.println("gzipped collection is not seekable: use compressed version!");
        System.exit(-1);
    }

    Configuration config = new Configuration();
    FileSystem fs = FileSystem.get(config);
    MapFile.Reader reader = new MapFile.Reader(new Path(indexPath + "/part-r-00000"), config);

    FSDataInputStream collection = fs.open(new Path(collectionPath));
    BufferedReader d = new BufferedReader(new InputStreamReader(collection));

    Text key = new Text();
    PairOfWritables<IntWritable, ArrayListWritable<PairOfInts>> value = new PairOfWritables<IntWritable, ArrayListWritable<PairOfInts>>();

    System.out.println("Looking up postings for the term \"starcross'd\"");
    key.set("starcross'd");

    reader.get(key, value);

    ArrayListWritable<PairOfInts> postings = value.getRightElement();
    for (PairOfInts pair : postings) {
        System.out.println(pair);
        collection.seek(pair.getLeftElement());
        System.out.println(d.readLine());
    }

    key.set("gold");
    reader.get(key, value);
    System.out.println("Complete postings list for 'gold': " + value);

    Int2IntFrequencyDistribution goldHist = new Int2IntFrequencyDistributionEntry();
    postings = value.getRightElement();
    for (PairOfInts pair : postings) {
        goldHist.increment(pair.getRightElement());
    }

    System.out.println("histogram of tf values for gold");
    for (PairOfInts pair : goldHist) {
        System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement());
    }

    key.set("silver");
    reader.get(key, value);
    System.out.println("Complete postings list for 'silver': " + value);

    Int2IntFrequencyDistribution silverHist = new Int2IntFrequencyDistributionEntry();
    postings = value.getRightElement();
    for (PairOfInts pair : postings) {
        silverHist.increment(pair.getRightElement());
    }

    System.out.println("histogram of tf values for silver");
    for (PairOfInts pair : silverHist) {
        System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement());
    }

    key.set("bronze");
    Writable w = reader.get(key, value);

    if (w == null) {
        System.out.println("the term bronze does not appear in the collection");
    }

    collection.close();
    reader.close();

    return 0;
}