List of usage examples for org.apache.hadoop.fs FSDataInputStream seek
@Override public void seek(long desired) throws IOException
From source file:FormatStorageBasicTest.java
License:Open Source License
public void testChunkToRecord() { try {// w w w .ja v a 2 s . com String fileName = prefix + "testChunkToRecord"; Path path = new Path(fileName); FileSystem fs = FileSystem.get(new Configuration()); FSDataOutputStream out = fs.create(path); short fieldNum = 3; Record record = new Record(fieldNum); byte[] lb = new byte[ConstVar.Sizeof_Long]; long l = 4; Util.long2bytes(lb, l); FieldValue fieldValue4 = new FieldValue(ConstVar.FieldType_Long, ConstVar.Sizeof_Long, lb, (short) 13); record.addValue(fieldValue4); byte[] fb = new byte[ConstVar.Sizeof_Float]; float f = (float) 5.5; Util.float2bytes(fb, f); FieldValue fieldValue5 = new FieldValue(ConstVar.FieldType_Float, ConstVar.Sizeof_Float, fb, (short) 14); record.addValue(fieldValue5); String str = "hello konten"; FieldValue fieldValue7 = new FieldValue(ConstVar.FieldType_String, (short) str.length(), str.getBytes(), (short) 16); record.addValue(fieldValue7); DataChunk chunk = new DataChunk(record); out.write(chunk.values, 0, (int) chunk.len); if (out.getPos() != chunk.len) { fail("error pos:" + out.getPos() + "chunk.len:" + chunk.len); } out.close(); FSDataInputStream in = fs.open(path); FixedBitSet bitSet = new FixedBitSet(fieldNum); in.read(bitSet.bytes(), 0, bitSet.size()); for (int i = 0; i < fieldNum; i++) { if (!bitSet.get(i)) { fail("should set:" + i); } } byte[] value = new byte[8]; in.readFully(value); long lv = Util.bytes2long(value, 0, 8); if (lv != 4) { fail("error long value:" + lv); } value = new byte[4]; in.readFully(value); float fv = Util.bytes2float(value, 0); if (fv != 5.5) { fail("error float value:" + fv); } short strLen = in.readShort(); if (strLen != str.length()) { fail("error strLen:" + strLen); } value = new byte[strLen]; in.readFully(value); String strv = new String(value); if (!strv.equals(str)) { fail("error strv:" + strv); } FieldMap fieldMap = new FieldMap(); fieldMap.addField(new Field(ConstVar.FieldType_Long, 8, (short) 13)); fieldMap.addField(new Field(ConstVar.FieldType_Float, 4, (short) 14)); fieldMap.addField(new Field(ConstVar.FieldType_String, 8, (short) 16)); in.seek(0); int valuelen = 1 + 8 + 4 + 2 + 12; DataChunk chunk2 = new DataChunk(fieldNum); ArrayList<byte[]> arrayList = new ArrayList<byte[]>(64); DataInputBuffer inputBuffer = new DataInputBuffer(); byte[] buf = new byte[valuelen]; in.read(buf, 0, valuelen); inputBuffer.reset(buf, 0, valuelen); chunk2.unpersistent(0, valuelen, inputBuffer); Record record2 = chunk2.toRecord(fieldMap, true, arrayList); bitSet = chunk2.fixedBitSet; if (bitSet.length() != (fieldNum / 8 + 1) * 8) { fail("bitSet.len:" + bitSet.length()); } for (int i = 0; i < fieldNum; i++) { if (!bitSet.get(i)) { fail("bitSet should set:" + i); } } record = record2; int index = 0; byte type = record2.fieldValues().get(index).type; int len = record2.fieldValues().get(index).len; short idx = record2.fieldValues().get(index).idx; value = record2.fieldValues().get(index).value; if (len != ConstVar.Sizeof_Long) { fail("error len:" + len); } if (type != ConstVar.FieldType_Long) { fail("error fieldType:" + type); } if (idx != 13) { fail("error idx:" + idx); } if (value == null) { fail("error value null"); } { } lv = Util.bytes2long(value, 0, len); if (lv != 4) { fail("error long value:" + lv); } index = 1; type = record.fieldValues().get(index).type; len = record.fieldValues().get(index).len; idx = record.fieldValues().get(index).idx; value = record.fieldValues().get(index).value; if (len != ConstVar.Sizeof_Float) { fail("error len:" + len); } if (type != ConstVar.FieldType_Float) { fail("error fieldType:" + type); } if (idx != 14) { fail("error idx:" + idx); } if (value == null) { fail("error value null"); } { } fv = Util.bytes2float(value, 0); if (fv != 5.5) { fail("error float value:" + fv); } index = 2; type = record.fieldValues().get(index).type; len = record.fieldValues().get(index).len; idx = record.fieldValues().get(index).idx; value = record.fieldValues().get(index).value; str = "hello konten"; if (len != str.length()) { fail("error len:" + len); } if (type != ConstVar.FieldType_String) { fail("error fieldType:" + type); } if (idx != 16) { fail("error idx:" + idx); } if (value == null) { fail("error value null"); } { } String sv = new String(value, 0, len); if (!str.equals(sv)) { fail("error string value:" + sv); } } catch (Exception e) { fail("should not exception:" + e.getMessage()); } }
From source file:FormatStorageBasicTest.java
License:Open Source License
public void testChunkToRecordNull() { try {/* w w w . j av a 2 s . co m*/ String fileName = prefix + "testChunkToRecord2"; Path path = new Path(fileName); FileSystem fs = FileSystem.get(new Configuration()); FSDataOutputStream out = fs.create(path); short fieldNum = 3; Record record = new Record(fieldNum); byte[] lb = new byte[ConstVar.Sizeof_Long]; long l = 4; Util.long2bytes(lb, l); FieldValue fieldValue4 = new FieldValue(ConstVar.FieldType_Long, ConstVar.Sizeof_Long, lb, (short) 13); record.addValue(fieldValue4); FieldValue fieldValue5 = new FieldValue(ConstVar.FieldType_Float, ConstVar.Sizeof_Float, null, (short) 14); record.addValue(fieldValue5); String str = "hello konten"; FieldValue fieldValue7 = new FieldValue(ConstVar.FieldType_String, (short) str.length(), str.getBytes(), (short) 16); record.addValue(fieldValue7); DataChunk chunk = new DataChunk(record); out.write(chunk.values, 0, (int) chunk.len); if (out.getPos() != chunk.len) { fail("error pos:" + out.getPos() + "chunk.len:" + chunk.len); } out.close(); FSDataInputStream in = fs.open(path); FixedBitSet bitSet = new FixedBitSet(fieldNum); in.read(bitSet.bytes(), 0, bitSet.size()); for (int i = 0; i < fieldNum; i++) { if (bitSet.get(1)) { fail("shoud not set"); } if (!bitSet.get(i) && i != 1) { fail("should set:" + i); } } byte[] value = new byte[8]; in.readFully(value); long lv = Util.bytes2long(value, 0, 8); if (lv != 4) { fail("error long value:" + lv); } in.readFloat(); short strLen = in.readShort(); if (strLen != str.length()) { fail("error strLen:" + strLen); } value = new byte[strLen]; in.readFully(value); String strv = new String(value, 0, strLen); if (!strv.equals(str)) { fail("error strv:" + strv); } FieldMap fieldMap = new FieldMap(); fieldMap.addField(new Field(ConstVar.FieldType_Long, 8, (short) 13)); fieldMap.addField(new Field(ConstVar.FieldType_Float, 4, (short) 14)); fieldMap.addField(new Field(ConstVar.FieldType_String, 8, (short) 16)); in.seek(0); int valuelen = 1 + 8 + 4 + 2 + 12; DataChunk chunk2 = new DataChunk(fieldNum); ArrayList<byte[]> arrayList = new ArrayList<byte[]>(64); DataInputBuffer inputBuffer = new DataInputBuffer(); byte[] buf = new byte[valuelen]; in.read(buf, 0, valuelen); inputBuffer.reset(buf, 0, valuelen); chunk2.unpersistent(0, valuelen, inputBuffer); Record record2 = chunk2.toRecord(fieldMap, true, arrayList); bitSet = chunk2.fixedBitSet; for (int i = 0; i < fieldNum; i++) { if (bitSet.get(1)) { fail("shoud not set"); } if (!bitSet.get(i) && i != 1) { fail("should set:" + i); } } record = record2; int index = 0; byte type = record2.fieldValues().get(index).type; int len = record2.fieldValues().get(index).len; short idx = record2.fieldValues().get(index).idx; value = record2.fieldValues().get(index).value; if (len != ConstVar.Sizeof_Long) { fail("error len:" + len); } if (type != ConstVar.FieldType_Long) { fail("error fieldType:" + type); } if (idx != 13) { fail("error idx:" + idx); } if (value == null) { fail("error value null"); } { } lv = Util.bytes2long(value, 0, 8); if (lv != 4) { fail("error long value:" + lv); } index = 1; type = record.fieldValues().get(index).type; len = record.fieldValues().get(index).len; idx = record.fieldValues().get(index).idx; value = record.fieldValues().get(index).value; if (len != ConstVar.Sizeof_Float) { fail("error len:" + len); } if (type != ConstVar.FieldType_Float) { fail("error fieldType:" + type); } if (idx != 14) { fail("error idx:" + idx); } if (value != null) { fail("error value not null"); } index = 2; type = record.fieldValues().get(index).type; len = record.fieldValues().get(index).len; idx = record.fieldValues().get(index).idx; value = record.fieldValues().get(index).value; str = "hello konten"; if (len != str.length()) { fail("error len:" + len); } if (type != ConstVar.FieldType_String) { fail("error fieldType:" + type); } if (idx != 16) { fail("error idx:" + idx); } if (value == null) { fail("error value null"); } { } String sv = new String(value, 0, len); if (!str.equals(sv)) { fail("error string value:" + sv); } } catch (Exception e) { e.printStackTrace(); fail("should not exception:" + e.getMessage()); } }
From source file:FormatStorageBasicTest.java
License:Open Source License
public void testPersistentUnitVar() { try {/* w ww. ja v a 2s . c o m*/ Head head = new Head(); head.setVar((byte) 1); Configuration conf = new Configuration(); FormatDataFile fd = new FormatDataFile(conf); fd.create(prefix + "testPersistentUnitVar_tmp", head); IndexInfo info = new IndexInfo(); info.offset = 0; Segment seg = new Segment(info, fd); Unit unit = new Unit(info, seg); Record record = new Record(7); record.addValue(new FieldValue((byte) 1, (short) 0)); record.addValue(new FieldValue((short) 2, (short) 1)); record.addValue(new FieldValue((int) 3, (short) 2)); record.addValue(new FieldValue((long) 4, (short) 3)); record.addValue(new FieldValue((float) 5.5, (short) 4)); record.addValue(new FieldValue((double) 6.6, (short) 5)); record.addValue(new FieldValue("hello konten", (short) 6)); int count = 100; for (int i = 0; i < count; i++) { unit.addRecord(record); } String file = prefix + "testPersistentUnitVar"; Path path = new Path(file); FileSystem fs = FileSystem.get(new Configuration()); FSDataOutputStream out = fs.create(path); unit.persistent(out); long pos = out.getPos(); if (pos != full7chunkLen * count + count * 8 + ConstVar.DataChunkMetaOffset) { fail("error pos:" + pos); } out.close(); long len = unit.len(); if (len != count * full7chunkLen + count * 8 + ConstVar.DataChunkMetaOffset) { fail("error unit.len" + len); } FSDataInputStream in = fs.open(path); in.seek(len - 8 - 4); long metaOffset = in.readLong(); if (metaOffset != full7chunkLen * count) { fail("error metaOffset:" + metaOffset); } in.seek(len - 8 - 4 - 4); int recordNum = in.readInt(); if (recordNum != count) { fail("error recordNum:" + recordNum); } in.seek(metaOffset); for (int i = 0; i < recordNum; i++) { long offset = in.readLong(); if (offset != full7chunkLen * i) { fail("error offset:" + offset + "i:" + i); } } } catch (IOException e) { e.printStackTrace(); fail("get IOException:" + e.getMessage()); } catch (Exception e) { e.printStackTrace(); fail("get Exception:" + e.getMessage()); } }
From source file:FormatStorageBasicTest.java
License:Open Source License
public void testPersistentUnitNotVar() { try {//w w w . j a va2 s.c o m Head head = new Head(); Configuration conf = new Configuration(); FormatDataFile fd = new FormatDataFile(conf); fd.create(prefix + "testPersistentUnitNotVar_tmp", head); IndexInfo info = new IndexInfo(); info.offset = 0; Segment seg = new Segment(info, fd); Unit unit = new Unit(info, seg); Record record = new Record(6); record.addValue(new FieldValue((byte) 1, (short) 0)); record.addValue(new FieldValue((short) 2, (short) 1)); record.addValue(new FieldValue((int) 3, (short) 2)); record.addValue(new FieldValue((long) 4, (short) 3)); record.addValue(new FieldValue((float) 5.5, (short) 4)); record.addValue(new FieldValue((double) 6.6, (short) 5)); int count = 100; for (int i = 0; i < count; i++) { unit.addRecord(record); } String file = prefix + "testPersistentUnitNotVar"; Path path = new Path(file); FileSystem fs = FileSystem.get(new Configuration()); FSDataOutputStream out = fs.create(path); unit.persistent(out); long pos = out.getPos(); if (pos != full6chunkLen * count + ConstVar.DataChunkMetaOffset) { fail("error pos:" + pos); } out.close(); long len = unit.len(); if (len != count * full6chunkLen + ConstVar.DataChunkMetaOffset) { fail("error unit.len" + len); } FSDataInputStream in = fs.open(path); in.seek(len - 8 - 4); long metaOffset = in.readLong(); if (metaOffset != full6chunkLen * count) { fail("error metaOffset:" + metaOffset); } in.seek(len - 8 - 4 - 4); int recordNum = in.readInt(); if (recordNum != count) { fail("error recordNum:" + recordNum); } } catch (IOException e) { e.printStackTrace(); fail("get IOException:" + e.getMessage()); } catch (Exception e) { e.printStackTrace(); fail("get Exception:" + e.getMessage()); } }
From source file:FormatStorageBasicTest.java
License:Open Source License
public void testPersistentSegment() { try {// w w w. ja v a2 s .com IndexInfo info = new IndexInfo(); info.offset = 0; Head head = new Head(); head.setVar((byte) 1); Configuration conf = new Configuration(); FormatDataFile fd = new FormatDataFile(conf); fd.create(prefix + "testPersistentSegment_tmp", head); String fileName = prefix + "testPersistentSegment"; Path path = new Path(fileName); FileSystem fs = FileSystem.get(new Configuration()); FSDataOutputStream out = fs.create(path); fd.setOut(out); Segment segment = new Segment(info, fd); int unitSize = 100; for (int i = 0; i < unitSize; i++) { IndexInfo indexInfo = new IndexInfo(); indexInfo.offset = i * 100; indexInfo.len = 77; indexInfo.beginLine = (i + 1) * 100; indexInfo.endLine = (i + 2) * 100; indexInfo.idx = i; Unit unit = new Unit(indexInfo, segment); addRecord2Unit(unit, 100); unit.beginLine = (i + 1) * 100; unit.endLine = (i + 2) * 100; segment.addUnit(unit); if (unit.len() != 100 * full7chunkLen + 100 * 8 + ConstVar.DataChunkMetaOffset) { fail("error unit.len:" + unit.len()); } } segment.recordNum = 234; segment.setBeginLine(1); segment.setEndLine(235); segment.persistent(out); if (out.getPos() != fd.confSegmentSize()) { System.out.println("seg.len:" + segment.len() + "seg.remain:" + segment.remain() + "index.len" + segment.unitIndex().len()); fail("error pos:" + out.getPos()); } out.close(); int unitlen = full7chunkLen * 100 + 8 * 100 + ConstVar.DataChunkMetaOffset; FSDataInputStream in = fs.open(path); in.seek(segment.lineIndexOffset()); info.offset = 0; info.len = segment.len(); fd.setWorkStatus(ConstVar.WS_Read); Segment segment2 = new Segment(info, fd); segment2.unpersistentUnitIndex(in); if (segment2.recordNum() != 234) { fail("error recordnum:" + segment2.recordNum()); } if (segment2.unitNum() != unitSize) { fail("error unitNum:" + segment2.unitNum()); } if (segment2.keyIndexOffset() != -1) { fail("error key index offset:" + segment2.keyIndexOffset()); } if (segment2.lineIndexOffset() != unitlen * unitSize) { fail("error line index offset:" + segment2.lineIndexOffset()); } if (segment2.units().size() != unitSize) { fail("error units.size:" + segment2.units().size()); } UnitIndex index = segment2.unitIndex(); if (index.lineIndexInfos().size() != unitSize) { fail("error line unit index size:" + index.lineIndexInfos().size()); } if (index.keyIndexInfos().size() != 0) { fail("error key unit index size:" + index.keyIndexInfos().size()); } for (int i = 0; i < unitSize; i++) { IndexInfo ii = index.lineIndexInfos().get(i); if (ii.beginLine() != (1 + i) * 100) { fail("error beginline:" + ii.beginLine() + "i:" + i); } if (ii.endLine() != (2 + i) * 100) { fail("error end line:" + ii.endLine() + "i:" + i); } if (ii.offset() != i * 100) { fail("error offset:" + ii.offset() + "i:" + i); } if (ii.len != unitlen) { fail("error len:" + ii.len() + "i:" + i); } if (ii.idx() != i) { fail("error idx:" + ii.idx() + "i:" + i); } } } catch (IOException e) { e.printStackTrace(); fail("get IOException:" + e.getMessage()); } catch (Exception e) { e.printStackTrace(); fail("get Exception:" + e.getMessage()); } }
From source file:FormatStorageBasicTest.java
License:Open Source License
public void testClose() { try {//from w w w .ja v a2 s .c o m Head head = new Head(); head.setVar((byte) 1); Configuration conf = new Configuration(); FormatDataFile fd = new FormatDataFile(conf); fd.create(prefix + "testClose", head); int size = 100 * 10000; for (int i = 0; i < size; i++) { Record record = new Record(7); record.addValue(new FieldValue((byte) 1, (short) 0)); record.addValue(new FieldValue((short) 2, (short) 1)); record.addValue(new FieldValue((int) 3, (short) 2)); record.addValue(new FieldValue((long) 4, (short) 3)); record.addValue(new FieldValue((float) 5.5, (short) 4)); record.addValue(new FieldValue((double) 6.6, (short) 5)); record.addValue(new FieldValue("hello konten", (short) 6)); fd.addRecord(record); } if (fd.recordNum() != size) { fail("error record num:" + fd.recordNum()); } if (fd.currentSegment().currentUnit() == null) { fail("null current unit"); } if (fd.currentSegment() == null) { fail("null current seg"); } if (fd.segmentNum() != 0) { fail("error segment num:" + fd.segmentNum()); } int headLen = head.len(); long currentUnitLen = fd.currentSegment().currentUnit().len(); long segmentLen = fd.currentSegment().len() + currentUnitLen + ConstVar.LineIndexRecordLen; long remain = fd.currentSegment().remain(); int unitNum = fd.currentSegment().unitNum(); fd.close(); int indexLen = ConstVar.LineIndexRecordLen * fd.segmentNum(); int metaLen = ConstVar.IndexMetaOffset; long fileLen = fd.getFileLen(); if (fileLen != headLen + segmentLen + indexLen + metaLen) { fail("error file len:" + fileLen); } if (fd.in() != null) { fail("in should set null"); } if (fd.out() != null) { fail("out should set null"); } if (fd.recordNum() != 0) { fail("record num should set 0"); } if (fd.keyIndexOffset != -1) { fail("key index offset not -1"); } if (fd.lineIndexOffset != -1) { fail("line index offset not -1"); } if (fd.currentOffset != -1) { fail("current offset not -1"); } if (fd.hasLoadAllSegmentDone) { fail("has load all segment Done not false"); } String fileName = prefix + "testClose"; Path path = new Path(fileName); FileSystem fs = FileSystem.get(new Configuration()); FSDataInputStream in = fs.open(path); long metaOffset = fileLen - ConstVar.IndexMetaOffset; in.seek(metaOffset); int recordNum = in.readInt(); int segNum = in.readInt(); long keyIndexOffset = in.readLong(); long lineIndexOffset = in.readLong(); if (recordNum != size) { fail("error record num:" + recordNum); } if (segNum != 1) { fail("error segNum:" + segNum); } if (keyIndexOffset != -1) { fail("error key index offset:" + keyIndexOffset); } if (lineIndexOffset != (headLen + segmentLen)) { fail("error line index offset:" + lineIndexOffset); } in.seek(lineIndexOffset); for (int i = 0; i < segNum; i++) { int beginLine = in.readInt(); int endLine = in.readInt(); long offset = in.readLong(); long len = in.readLong(); int idx = in.readInt(); if (beginLine != 0) { fail("error beginLine:" + beginLine); } if (endLine != size) { fail("error end line:" + endLine); } if (offset != head.len()) { fail("error offset:" + offset); } long tlen = size * full7chunkLen + size * 8 + ConstVar.DataChunkMetaOffset * (unitNum + 1) + 28 * (unitNum + 1) + 24; if (len != tlen) { fail("error len:" + len); } } } catch (IOException e) { e.printStackTrace(); fail("get ioexception:" + e.getMessage()); } catch (Exception e) { e.printStackTrace(); fail("get exception:" + e.getMessage()); } }
From source file:LookupPostingsCompressed.java
License:Apache License
/** * Runs this tool.//from ww w . jav a 2s. c om */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(LookupPostingsCompressed.class.getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.exit(-1); } String indexPath = cmdline.getOptionValue(INDEX); String collectionPath = cmdline.getOptionValue(COLLECTION); if (collectionPath.endsWith(".gz")) { System.out.println("gzipped collection is not seekable: use compressed version!"); System.exit(-1); } Configuration config = new Configuration(); FileSystem fs = FileSystem.get(config); MapFile.Reader reader = new MapFile.Reader(new Path(indexPath + "/part-r-00000"), config); FSDataInputStream collection = fs.open(new Path(collectionPath)); BufferedReader d = new BufferedReader(new InputStreamReader(collection)); Text key = new Text(); PairOfWritables<VIntWritable, BytesWritable> value = new PairOfWritables<VIntWritable, BytesWritable>(); System.out.println("Looking up postings for the term \"starcross'd\""); key.set("starcross'd"); reader.get(key, value); BytesWritable postings = value.getRightElement(); ByteArrayInputStream buffer = new ByteArrayInputStream(postings.copyBytes()); DataInputStream in = new DataInputStream(buffer); int OFFSET = 0; int count; while (in.available() != 0) { OFFSET = OFFSET + WritableUtils.readVInt(in); count = WritableUtils.readVInt(in); System.out.print("(" + OFFSET + ", " + count + ")"); collection.seek(OFFSET); System.out.println(d.readLine()); } OFFSET = 0; key.set("gold"); reader.get(key, value); postings = value.getRightElement(); buffer = new ByteArrayInputStream(postings.copyBytes()); in = new DataInputStream(buffer); System.out.println("Complete postings list for 'gold': (" + value.getLeftElement() + ", ["); while (in.available() != 0) { OFFSET = OFFSET + WritableUtils.readVInt(in); count = WritableUtils.readVInt(in); System.out.print("(" + OFFSET + ", " + count + ")"); //collection.seek(OFFSET); //System.out.println(d.readLine()); System.out.print(", "); } System.out.print("])\n"); Int2IntFrequencyDistribution goldHist = new Int2IntFrequencyDistributionEntry(); buffer.reset(); OFFSET = 0; while (in.available() != 0) { OFFSET = OFFSET + WritableUtils.readVInt(in); count = WritableUtils.readVInt(in); goldHist.increment(count); } System.out.println("histogram of tf values for gold"); for (PairOfInts pair : goldHist) { System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement()); } buffer.close(); //Silver key.set("silver"); reader.get(key, value); postings = value.getRightElement(); buffer = new ByteArrayInputStream(postings.copyBytes()); in = new DataInputStream(buffer); System.out.println("Complete postings list for 'silver': (" + value.getLeftElement() + ", ["); while (in.available() != 0) { OFFSET = OFFSET + WritableUtils.readVInt(in); count = WritableUtils.readVInt(in); System.out.print("(" + OFFSET + ", " + count + ")"); //collection.seek(OFFSET); //System.out.println(d.readLine()); System.out.print(", "); } System.out.print("])\n"); Int2IntFrequencyDistribution silverHist = new Int2IntFrequencyDistributionEntry(); buffer.reset(); OFFSET = 0; while (in.available() != 0) { OFFSET = OFFSET + WritableUtils.readVInt(in); count = WritableUtils.readVInt(in); silverHist.increment(count); } System.out.println("histogram of tf values for silver"); for (PairOfInts pair : goldHist) { System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement()); } buffer.close(); key.set("bronze"); Writable w = reader.get(key, value); if (w == null) { System.out.println("the term bronze does not appear in the collection"); } collection.close(); reader.close(); return 0; }
From source file:LookupPostingsCompressed1.java
License:Apache License
@SuppressWarnings({ "static-access" }) public static void main(String[] args) throws IOException { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX)); options.addOption(//from ww w . ja v a 2s. co m OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(LookupPostingsCompressed1.class.getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.exit(-1); } String indexPath = cmdline.getOptionValue(INDEX); String collectionPath = cmdline.getOptionValue(COLLECTION); if (collectionPath.endsWith(".gz")) { System.out.println("gzipped collection is not seekable: use compressed version!"); System.exit(-1); } Configuration config = new Configuration(); FileSystem fs = FileSystem.get(config); MapFile.Reader reader = new MapFile.Reader(new Path(indexPath + "/part-r-00000"), config); FSDataInputStream collection = fs.open(new Path(collectionPath)); BufferedReader d = new BufferedReader(new InputStreamReader(collection)); Text key = new Text(); PairOfWritables<VIntWritable, ArrayListWritable<PairOfVInts>> value = new PairOfWritables<VIntWritable, ArrayListWritable<PairOfVInts>>(); System.out.println("Looking up postings for the term \"starcross'd\""); key.set("starcross'd"); reader.get(key, value); ArrayListWritable<PairOfVInts> postings = value.getRightElement(); for (PairOfVInts pair : postings) { System.out.println(pair); collection.seek(pair.getLeftElement()); System.out.println(d.readLine()); } key.set("gold"); reader.get(key, value); System.out.println("Complete postings list for 'gold': " + value); Int2IntFrequencyDistribution goldHist = new Int2IntFrequencyDistributionEntry(); postings = value.getRightElement(); for (PairOfVInts pair : postings) { goldHist.increment(pair.getRightElement()); } System.out.println("histogram of tf values for gold"); for (PairOfInts pair : goldHist) { System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement()); } key.set("silver"); reader.get(key, value); System.out.println("Complete postings list for 'silver': " + value); Int2IntFrequencyDistribution silverHist = new Int2IntFrequencyDistributionEntry(); postings = value.getRightElement(); for (PairOfVInts pair : postings) { silverHist.increment(pair.getRightElement()); } System.out.println("histogram of tf values for silver"); for (PairOfInts pair : silverHist) { System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement()); } key.set("bronze"); Writable w = reader.get(key, value); if (w == null) { System.out.println("the term bronze does not appear in the collection"); } collection.close(); reader.close(); }
From source file:ReadAllTest.java
License:Apache License
private static int readAt(FSDataInputStream fis, long at) throws IOException { fis.seek(at); return fis.read(); }
From source file:LookupPostings.java
License:Apache License
/** * Runs this tool./*from ww w. j a va 2 s . c o m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(LookupPostings.class.getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.exit(-1); } String indexPath = cmdline.getOptionValue(INDEX); String collectionPath = cmdline.getOptionValue(COLLECTION); if (collectionPath.endsWith(".gz")) { System.out.println("gzipped collection is not seekable: use compressed version!"); System.exit(-1); } Configuration config = new Configuration(); FileSystem fs = FileSystem.get(config); MapFile.Reader reader = new MapFile.Reader(new Path(indexPath + "/part-r-00000"), config); FSDataInputStream collection = fs.open(new Path(collectionPath)); BufferedReader d = new BufferedReader(new InputStreamReader(collection)); Text key = new Text(); PairOfWritables<IntWritable, ArrayListWritable<PairOfInts>> value = new PairOfWritables<IntWritable, ArrayListWritable<PairOfInts>>(); System.out.println("Looking up postings for the term \"starcross'd\""); key.set("starcross'd"); reader.get(key, value); ArrayListWritable<PairOfInts> postings = value.getRightElement(); for (PairOfInts pair : postings) { System.out.println(pair); collection.seek(pair.getLeftElement()); System.out.println(d.readLine()); } key.set("gold"); reader.get(key, value); System.out.println("Complete postings list for 'gold': " + value); Int2IntFrequencyDistribution goldHist = new Int2IntFrequencyDistributionEntry(); postings = value.getRightElement(); for (PairOfInts pair : postings) { goldHist.increment(pair.getRightElement()); } System.out.println("histogram of tf values for gold"); for (PairOfInts pair : goldHist) { System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement()); } key.set("silver"); reader.get(key, value); System.out.println("Complete postings list for 'silver': " + value); Int2IntFrequencyDistribution silverHist = new Int2IntFrequencyDistributionEntry(); postings = value.getRightElement(); for (PairOfInts pair : postings) { silverHist.increment(pair.getRightElement()); } System.out.println("histogram of tf values for silver"); for (PairOfInts pair : silverHist) { System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement()); } key.set("bronze"); Writable w = reader.get(key, value); if (w == null) { System.out.println("the term bronze does not appear in the collection"); } collection.close(); reader.close(); return 0; }