Example usage for org.apache.hadoop.io VIntWritable get

List of usage examples for org.apache.hadoop.io VIntWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io VIntWritable get.

Prototype

public int get() 

Source Link

Document

Return the value of this VIntWritable.

Usage

From source file:org.terrier.structures.indexing.singlepass.hadoop.Inv2DirectMultiReduce.java

License:Mozilla Public License

/** 
 * {@inheritDoc} /*  w  w  w . jav  a2s .c om*/
 */
public void reduce(VIntWritable _targetDocid, Iterator<Posting> documentPostings,
        OutputCollector<Object, Object> collector, Reporter reporter) throws IOException {
    final int targetDocid = _targetDocid.get();
    reporter.setStatus("Reducing for doc " + targetDocid);
    if (actualDocid > targetDocid) {
        logger.error("Received posting list for target doc " + targetDocid + " which is greater than actualDoc "
                + actualDocid + ". This target doc's posting will be ignored.");
        return;
    }
    while (actualDocid < targetDocid) {
        //if (logger.isDebugEnabled())
        //   logger.debug("moving forward: target="+targetDocid + " actual="+actualDocid );
        SimpleBitIndexPointer p = new SimpleBitIndexPointer();
        p.setOffset(postingOutputStream.getOffset());
        p.setNumberOfEntries(0);
        p.write(pointerOutputStream);
        //System.err.println("actualDocid="+ actualDocid + " writing empty pointer");
        actualDocid++;
        reporter.progress();
    }

    /* this implementation loads all postings for a given document into memory, then sorts them by
     * term id. This is acceptable, as documents are assumed to have sufficiently small postings that
     * they can fit in memory */

    List<Posting> postingList = new ArrayList<Posting>();
    //int doclen = 0;
    TIntHashSet foundIds = new TIntHashSet();
    while (documentPostings.hasNext()) {
        final Posting p = documentPostings.next().asWritablePosting();
        //check for duplicate pointers
        if (!foundIds.contains(p.getId())) {
            postingList.add(p);
            //doclen += p.getFrequency();
            reporter.progress();
            foundIds.add(p.getId());
        } else {
            dupPointers++;
        }
    }

    Collections.sort(postingList, new PostingIdComparator());
    BitIndexPointer pointer = postingOutputStream.writePostings(postingList.iterator());
    pointer.write(pointerOutputStream);
    actualDocid++;
}

From source file:PFPGrowth_in_SPARK.TransactionTree.java

License:Apache License

public void readFields(DataInput in) throws IOException {
    representedAsList = in.readBoolean();

    VIntWritable vInt = new VIntWritable();
    VLongWritable vLong = new VLongWritable();

    if (representedAsList) {
        transactionSet = Lists.newArrayList();
        vInt.readFields(in);//from w ww  .  j av  a2s  .c  o m
        int numTransactions = vInt.get();
        for (int i = 0; i < numTransactions; i++) {
            vLong.readFields(in);
            Long support = vLong.get();

            vInt.readFields(in);
            int length = vInt.get();

            int[] items = new int[length];
            for (int j = 0; j < length; j++) {
                vInt.readFields(in);
                items[j] = vInt.get();
            }
            Pair<IntArrayList, Long> transaction = new Pair<IntArrayList, Long>(new IntArrayList(items),
                    support);
            transactionSet.add(transaction);
        }
    } else {
        vInt.readFields(in);
        nodes = vInt.get();
        attribute = new int[nodes];
        nodeCount = new long[nodes];
        childCount = new int[nodes];
        nodeChildren = new int[nodes][];
        for (int i = 0; i < nodes; i++) {
            vInt.readFields(in);
            attribute[i] = vInt.get();
            vLong.readFields(in);
            nodeCount[i] = vLong.get();
            vInt.readFields(in);
            int childCountI = vInt.get();
            childCount[i] = childCountI;
            nodeChildren[i] = new int[childCountI];
            for (int j = 0; j < childCountI; j++) {
                vInt.readFields(in);
                nodeChildren[i][j] = vInt.get();
            }
        }
    }
}