List of usage examples for org.apache.hadoop.io VIntWritable get
public int get()
From source file:org.terrier.structures.indexing.singlepass.hadoop.Inv2DirectMultiReduce.java
License:Mozilla Public License
/** * {@inheritDoc} /* w w w . jav a2s .c om*/ */ public void reduce(VIntWritable _targetDocid, Iterator<Posting> documentPostings, OutputCollector<Object, Object> collector, Reporter reporter) throws IOException { final int targetDocid = _targetDocid.get(); reporter.setStatus("Reducing for doc " + targetDocid); if (actualDocid > targetDocid) { logger.error("Received posting list for target doc " + targetDocid + " which is greater than actualDoc " + actualDocid + ". This target doc's posting will be ignored."); return; } while (actualDocid < targetDocid) { //if (logger.isDebugEnabled()) // logger.debug("moving forward: target="+targetDocid + " actual="+actualDocid ); SimpleBitIndexPointer p = new SimpleBitIndexPointer(); p.setOffset(postingOutputStream.getOffset()); p.setNumberOfEntries(0); p.write(pointerOutputStream); //System.err.println("actualDocid="+ actualDocid + " writing empty pointer"); actualDocid++; reporter.progress(); } /* this implementation loads all postings for a given document into memory, then sorts them by * term id. This is acceptable, as documents are assumed to have sufficiently small postings that * they can fit in memory */ List<Posting> postingList = new ArrayList<Posting>(); //int doclen = 0; TIntHashSet foundIds = new TIntHashSet(); while (documentPostings.hasNext()) { final Posting p = documentPostings.next().asWritablePosting(); //check for duplicate pointers if (!foundIds.contains(p.getId())) { postingList.add(p); //doclen += p.getFrequency(); reporter.progress(); foundIds.add(p.getId()); } else { dupPointers++; } } Collections.sort(postingList, new PostingIdComparator()); BitIndexPointer pointer = postingOutputStream.writePostings(postingList.iterator()); pointer.write(pointerOutputStream); actualDocid++; }
From source file:PFPGrowth_in_SPARK.TransactionTree.java
License:Apache License
public void readFields(DataInput in) throws IOException { representedAsList = in.readBoolean(); VIntWritable vInt = new VIntWritable(); VLongWritable vLong = new VLongWritable(); if (representedAsList) { transactionSet = Lists.newArrayList(); vInt.readFields(in);//from w ww . j av a2s .c o m int numTransactions = vInt.get(); for (int i = 0; i < numTransactions; i++) { vLong.readFields(in); Long support = vLong.get(); vInt.readFields(in); int length = vInt.get(); int[] items = new int[length]; for (int j = 0; j < length; j++) { vInt.readFields(in); items[j] = vInt.get(); } Pair<IntArrayList, Long> transaction = new Pair<IntArrayList, Long>(new IntArrayList(items), support); transactionSet.add(transaction); } } else { vInt.readFields(in); nodes = vInt.get(); attribute = new int[nodes]; nodeCount = new long[nodes]; childCount = new int[nodes]; nodeChildren = new int[nodes][]; for (int i = 0; i < nodes; i++) { vInt.readFields(in); attribute[i] = vInt.get(); vLong.readFields(in); nodeCount[i] = vLong.get(); vInt.readFields(in); int childCountI = vInt.get(); childCount[i] = childCountI; nodeChildren[i] = new int[childCountI]; for (int j = 0; j < childCountI; j++) { vInt.readFields(in); nodeChildren[i][j] = vInt.get(); } } } }