Example usage for org.apache.hadoop.io WritableUtils readVInt

List of usage examples for org.apache.hadoop.io WritableUtils readVInt

Introduction

In this page you can find the example usage for org.apache.hadoop.io WritableUtils readVInt.

Prototype

public static int readVInt(DataInput stream) throws IOException 

Source Link

Document

Reads a zero-compressed encoded integer from input stream and returns it.

Usage

From source file:BooleanRetrievalCompressed.java

License:Apache License

public Set<Integer> fetchDocumentSet(String term) throws IOException {
    Set<Integer> set = new TreeSet<Integer>();
    byte[] buffer = fetchPostings(term).copyBytes();
    ByteArrayInputStream post = new ByteArrayInputStream(buffer);
    DataInputStream in = new DataInputStream(post);
    int RealDocNo = 0;
    while (in.available() != 0) {
        RealDocNo = RealDocNo + WritableUtils.readVInt(in);
        set.add(RealDocNo);//  www .j  a  v a2  s .c o  m
        WritableUtils.readVInt(in);
    }

    post.close();
    in.close();
    return set;
}

From source file:LookupPostingsCompressed.java

License:Apache License

/**
 * Runs this tool.//  w w w . j  av  a2  s  . c om
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(LookupPostingsCompressed.class.getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.exit(-1);
    }

    String indexPath = cmdline.getOptionValue(INDEX);
    String collectionPath = cmdline.getOptionValue(COLLECTION);

    if (collectionPath.endsWith(".gz")) {
        System.out.println("gzipped collection is not seekable: use compressed version!");
        System.exit(-1);
    }

    Configuration config = new Configuration();
    FileSystem fs = FileSystem.get(config);
    MapFile.Reader reader = new MapFile.Reader(new Path(indexPath + "/part-r-00000"), config);

    FSDataInputStream collection = fs.open(new Path(collectionPath));
    BufferedReader d = new BufferedReader(new InputStreamReader(collection));

    Text key = new Text();
    PairOfWritables<VIntWritable, BytesWritable> value = new PairOfWritables<VIntWritable, BytesWritable>();

    System.out.println("Looking up postings for the term \"starcross'd\"");
    key.set("starcross'd");

    reader.get(key, value);

    BytesWritable postings = value.getRightElement();
    ByteArrayInputStream buffer = new ByteArrayInputStream(postings.copyBytes());
    DataInputStream in = new DataInputStream(buffer);
    int OFFSET = 0;
    int count;
    while (in.available() != 0) {
        OFFSET = OFFSET + WritableUtils.readVInt(in);
        count = WritableUtils.readVInt(in);
        System.out.print("(" + OFFSET + ", " + count + ")");
        collection.seek(OFFSET);
        System.out.println(d.readLine());
    }

    OFFSET = 0;
    key.set("gold");
    reader.get(key, value);
    postings = value.getRightElement();
    buffer = new ByteArrayInputStream(postings.copyBytes());
    in = new DataInputStream(buffer);
    System.out.println("Complete postings list for 'gold': (" + value.getLeftElement() + ", [");
    while (in.available() != 0) {
        OFFSET = OFFSET + WritableUtils.readVInt(in);
        count = WritableUtils.readVInt(in);
        System.out.print("(" + OFFSET + ", " + count + ")");
        //collection.seek(OFFSET);
        //System.out.println(d.readLine());
        System.out.print(", ");
    }
    System.out.print("])\n");

    Int2IntFrequencyDistribution goldHist = new Int2IntFrequencyDistributionEntry();
    buffer.reset();

    OFFSET = 0;
    while (in.available() != 0) {
        OFFSET = OFFSET + WritableUtils.readVInt(in);
        count = WritableUtils.readVInt(in);
        goldHist.increment(count);
    }

    System.out.println("histogram of tf values for gold");
    for (PairOfInts pair : goldHist) {
        System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement());
    }

    buffer.close();
    //Silver

    key.set("silver");
    reader.get(key, value);
    postings = value.getRightElement();
    buffer = new ByteArrayInputStream(postings.copyBytes());
    in = new DataInputStream(buffer);
    System.out.println("Complete postings list for 'silver': (" + value.getLeftElement() + ", [");
    while (in.available() != 0) {
        OFFSET = OFFSET + WritableUtils.readVInt(in);
        count = WritableUtils.readVInt(in);
        System.out.print("(" + OFFSET + ", " + count + ")");
        //collection.seek(OFFSET);
        //System.out.println(d.readLine());
        System.out.print(", ");
    }
    System.out.print("])\n");

    Int2IntFrequencyDistribution silverHist = new Int2IntFrequencyDistributionEntry();
    buffer.reset();

    OFFSET = 0;
    while (in.available() != 0) {
        OFFSET = OFFSET + WritableUtils.readVInt(in);
        count = WritableUtils.readVInt(in);
        silverHist.increment(count);
    }

    System.out.println("histogram of tf values for silver");
    for (PairOfInts pair : goldHist) {
        System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement());
    }

    buffer.close();

    key.set("bronze");
    Writable w = reader.get(key, value);

    if (w == null) {
        System.out.println("the term bronze does not appear in the collection");
    }

    collection.close();
    reader.close();

    return 0;
}

From source file:Assignment4_P2_StockAverageWithCombiner.StockAverage_CompositeValueWritable.java

@Override
public void readFields(DataInput di) throws IOException {
    count = WritableUtils.readVInt(di);
    average = WritableUtils.readString(di);
}

From source file:babel.content.pages.Page.java

License:Apache License

public void readFields(DataInput in) throws IOException {
    m_pageURL = Text.readString(in);
    m_pageProps.readFields(in);//from w w w  .j  a va  2s.c  o  m

    int numVersions = WritableUtils.readVInt(in);
    m_versions = new ArrayList<PageVersion>(numVersions);

    PageVersion curVer;

    for (int i = 0; i < numVersions; i++) {
        curVer = new PageVersion();
        curVer.readFields(in);
        m_versions.add(curVer);
    }
}

From source file:babel.content.pages.PageVersion.java

License:Apache License

public void readFields(DataInput in) throws IOException {
    m_verProps.readFields(in);//from   w  w w  .  ja v  a2s  .c  om
    m_contentMeta.readFields(in);
    m_parseMeta.readFields(in);

    int numLinks = WritableUtils.readVInt(in);

    m_outLinks = (numLinks == 0) ? null : new Outlink[numLinks];

    for (int i = 0; i < numLinks; i++) {
        (m_outLinks[i] = new Outlink()).readFields(in);
    }

    m_content = Text.readString(in);
}

From source file:cascading.tuple.hadoop.io.HadoopTupleInputStream.java

License:Open Source License

public int readVInt() throws IOException {
    return WritableUtils.readVInt(this);
}

From source file:cn.edu.jnu.ie.backend.NutchDocument.java

License:Apache License

public void readFields(DataInput in) throws IOException {
    fields.clear();/*from  w  ww .  ja v a  2  s .  c o  m*/
    byte version = in.readByte();
    if (version != VERSION) {
        throw new VersionMismatchException(VERSION, version);
    }
    int size = WritableUtils.readVInt(in);
    for (int i = 0; i < size; i++) {
        String name = Text.readString(in);
        NutchField field = new NutchField();
        field.readFields(in);
        fields.put(name, field);
    }
    weight = in.readFloat();
}

From source file:cn.iie.haiep.hbase.value.Bytes.java

License:Apache License

/**
 * Read byte-array written with a WritableableUtils.vint prefix.
 * @param in Input to read from.//from  w w  w. j  av  a  2  s. c  o  m
 * @return byte array read off <code>in</code>
 * @throws IOException e
 */
public static byte[] readByteArray(final DataInput in) throws IOException {
    int len = WritableUtils.readVInt(in);
    if (len < 0) {
        throw new NegativeArraySizeException(Integer.toString(len));
    }
    byte[] result = new byte[len];
    in.readFully(result, 0, len);
    return result;
}

From source file:co.cask.cdap.examples.purchase.Purchase.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    customer = WritableUtils.readString(in);
    product = WritableUtils.readString(in);
    quantity = WritableUtils.readVInt(in);
    price = WritableUtils.readVInt(in);//from   w  w w . ja  v  a2s.  co  m
    purchaseTime = WritableUtils.readVLong(in);
    catalogId = WritableUtils.readString(in);
}

From source file:com.asakusafw.bridge.hadoop.directio.Util.java

License:Apache License

static Map<String, String> readMap(DataInput in) throws IOException {
    int size = WritableUtils.readVInt(in);
    if (size == 0) {
        return Collections.emptyMap();
    }/*w  w  w  .  j  av a2  s. co  m*/
    Map<String, String> result = new LinkedHashMap<>();
    for (int i = 0; i < size; i++) {
        String k = Text.readString(in);
        String v = Text.readString(in);
        result.put(k, v);
    }
    return result;
}