List of usage examples for org.apache.hadoop.io WritableUtils readVInt
public static int readVInt(DataInput stream) throws IOException
From source file:BooleanRetrievalCompressed.java
License:Apache License
public Set<Integer> fetchDocumentSet(String term) throws IOException { Set<Integer> set = new TreeSet<Integer>(); byte[] buffer = fetchPostings(term).copyBytes(); ByteArrayInputStream post = new ByteArrayInputStream(buffer); DataInputStream in = new DataInputStream(post); int RealDocNo = 0; while (in.available() != 0) { RealDocNo = RealDocNo + WritableUtils.readVInt(in); set.add(RealDocNo);// www .j a v a2 s .c o m WritableUtils.readVInt(in); } post.close(); in.close(); return set; }
From source file:LookupPostingsCompressed.java
License:Apache License
/** * Runs this tool.// w w w . j av a2 s . c om */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(LookupPostingsCompressed.class.getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.exit(-1); } String indexPath = cmdline.getOptionValue(INDEX); String collectionPath = cmdline.getOptionValue(COLLECTION); if (collectionPath.endsWith(".gz")) { System.out.println("gzipped collection is not seekable: use compressed version!"); System.exit(-1); } Configuration config = new Configuration(); FileSystem fs = FileSystem.get(config); MapFile.Reader reader = new MapFile.Reader(new Path(indexPath + "/part-r-00000"), config); FSDataInputStream collection = fs.open(new Path(collectionPath)); BufferedReader d = new BufferedReader(new InputStreamReader(collection)); Text key = new Text(); PairOfWritables<VIntWritable, BytesWritable> value = new PairOfWritables<VIntWritable, BytesWritable>(); System.out.println("Looking up postings for the term \"starcross'd\""); key.set("starcross'd"); reader.get(key, value); BytesWritable postings = value.getRightElement(); ByteArrayInputStream buffer = new ByteArrayInputStream(postings.copyBytes()); DataInputStream in = new DataInputStream(buffer); int OFFSET = 0; int count; while (in.available() != 0) { OFFSET = OFFSET + WritableUtils.readVInt(in); count = WritableUtils.readVInt(in); System.out.print("(" + OFFSET + ", " + count + ")"); collection.seek(OFFSET); System.out.println(d.readLine()); } OFFSET = 0; key.set("gold"); reader.get(key, value); postings = value.getRightElement(); buffer = new ByteArrayInputStream(postings.copyBytes()); in = new DataInputStream(buffer); System.out.println("Complete postings list for 'gold': (" + value.getLeftElement() + ", ["); while (in.available() != 0) { OFFSET = OFFSET + WritableUtils.readVInt(in); count = WritableUtils.readVInt(in); System.out.print("(" + OFFSET + ", " + count + ")"); //collection.seek(OFFSET); //System.out.println(d.readLine()); System.out.print(", "); } System.out.print("])\n"); Int2IntFrequencyDistribution goldHist = new Int2IntFrequencyDistributionEntry(); buffer.reset(); OFFSET = 0; while (in.available() != 0) { OFFSET = OFFSET + WritableUtils.readVInt(in); count = WritableUtils.readVInt(in); goldHist.increment(count); } System.out.println("histogram of tf values for gold"); for (PairOfInts pair : goldHist) { System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement()); } buffer.close(); //Silver key.set("silver"); reader.get(key, value); postings = value.getRightElement(); buffer = new ByteArrayInputStream(postings.copyBytes()); in = new DataInputStream(buffer); System.out.println("Complete postings list for 'silver': (" + value.getLeftElement() + ", ["); while (in.available() != 0) { OFFSET = OFFSET + WritableUtils.readVInt(in); count = WritableUtils.readVInt(in); System.out.print("(" + OFFSET + ", " + count + ")"); //collection.seek(OFFSET); //System.out.println(d.readLine()); System.out.print(", "); } System.out.print("])\n"); Int2IntFrequencyDistribution silverHist = new Int2IntFrequencyDistributionEntry(); buffer.reset(); OFFSET = 0; while (in.available() != 0) { OFFSET = OFFSET + WritableUtils.readVInt(in); count = WritableUtils.readVInt(in); silverHist.increment(count); } System.out.println("histogram of tf values for silver"); for (PairOfInts pair : goldHist) { System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement()); } buffer.close(); key.set("bronze"); Writable w = reader.get(key, value); if (w == null) { System.out.println("the term bronze does not appear in the collection"); } collection.close(); reader.close(); return 0; }
From source file:Assignment4_P2_StockAverageWithCombiner.StockAverage_CompositeValueWritable.java
@Override public void readFields(DataInput di) throws IOException { count = WritableUtils.readVInt(di); average = WritableUtils.readString(di); }
From source file:babel.content.pages.Page.java
License:Apache License
public void readFields(DataInput in) throws IOException { m_pageURL = Text.readString(in); m_pageProps.readFields(in);//from w w w .j a va 2s.c o m int numVersions = WritableUtils.readVInt(in); m_versions = new ArrayList<PageVersion>(numVersions); PageVersion curVer; for (int i = 0; i < numVersions; i++) { curVer = new PageVersion(); curVer.readFields(in); m_versions.add(curVer); } }
From source file:babel.content.pages.PageVersion.java
License:Apache License
public void readFields(DataInput in) throws IOException { m_verProps.readFields(in);//from w w w . ja v a2s .c om m_contentMeta.readFields(in); m_parseMeta.readFields(in); int numLinks = WritableUtils.readVInt(in); m_outLinks = (numLinks == 0) ? null : new Outlink[numLinks]; for (int i = 0; i < numLinks; i++) { (m_outLinks[i] = new Outlink()).readFields(in); } m_content = Text.readString(in); }
From source file:cascading.tuple.hadoop.io.HadoopTupleInputStream.java
License:Open Source License
public int readVInt() throws IOException { return WritableUtils.readVInt(this); }
From source file:cn.edu.jnu.ie.backend.NutchDocument.java
License:Apache License
public void readFields(DataInput in) throws IOException { fields.clear();/*from w ww . ja v a 2 s . c o m*/ byte version = in.readByte(); if (version != VERSION) { throw new VersionMismatchException(VERSION, version); } int size = WritableUtils.readVInt(in); for (int i = 0; i < size; i++) { String name = Text.readString(in); NutchField field = new NutchField(); field.readFields(in); fields.put(name, field); } weight = in.readFloat(); }
From source file:cn.iie.haiep.hbase.value.Bytes.java
License:Apache License
/** * Read byte-array written with a WritableableUtils.vint prefix. * @param in Input to read from.//from w w w. j av a 2 s. c o m * @return byte array read off <code>in</code> * @throws IOException e */ public static byte[] readByteArray(final DataInput in) throws IOException { int len = WritableUtils.readVInt(in); if (len < 0) { throw new NegativeArraySizeException(Integer.toString(len)); } byte[] result = new byte[len]; in.readFully(result, 0, len); return result; }
From source file:co.cask.cdap.examples.purchase.Purchase.java
License:Apache License
@Override public void readFields(DataInput in) throws IOException { customer = WritableUtils.readString(in); product = WritableUtils.readString(in); quantity = WritableUtils.readVInt(in); price = WritableUtils.readVInt(in);//from w w w . ja v a2s. co m purchaseTime = WritableUtils.readVLong(in); catalogId = WritableUtils.readString(in); }
From source file:com.asakusafw.bridge.hadoop.directio.Util.java
License:Apache License
static Map<String, String> readMap(DataInput in) throws IOException { int size = WritableUtils.readVInt(in); if (size == 0) { return Collections.emptyMap(); }/*w w w . j av a2 s. co m*/ Map<String, String> result = new LinkedHashMap<>(); for (int i = 0; i < size; i++) { String k = Text.readString(in); String v = Text.readString(in); result.put(k, v); } return result; }