List of usage examples for com.google.common.collect MinMaxPriorityQueue maximumSize
int maximumSize
To view the source code for com.google.common.collect MinMaxPriorityQueue maximumSize.
Click Source Link
From source file:eu.project.ttc.engines.BilingualAligner.java
public List<TranslationCandidate> alignDistributional(Term sourceTerm, int nbCandidates, int minCandidateFrequency) { Queue<TranslationCandidate> alignedCandidateQueue = MinMaxPriorityQueue.maximumSize(nbCandidates).create(); ContextVector sourceVector = sourceTerm.getContextVector(); ContextVector translatedSourceVector = AlignerUtils.translateVector(sourceVector, dico, AlignerUtils.TRANSLATION_STRATEGY_MOST_SPECIFIC, targetTermino); ExplainedValue v;/*from w w w . j a va2 s .co m*/ int nbVectorsNotComputed = 0; int nbVectorsComputed = 0; for (Term targetTerm : IteratorUtils.toIterable(targetTermino.singleWordTermIterator())) { if (targetTerm.getFrequency() < minCandidateFrequency) continue; if (targetTerm.isContextVectorComputed()) { nbVectorsComputed++; v = distance.getExplainedValue(translatedSourceVector, targetTerm.getContextVector()); alignedCandidateQueue.add(new TranslationCandidate(targetTerm, AlignmentMethod.DISTRIBUTIONAL, v.getValue(), v.getExplanation())); } } if (nbVectorsNotComputed > 0) { LOGGER.warn(MSG_SEVERAL_VECTORS_NOT_COMPUTED, nbVectorsComputed, nbVectorsNotComputed); } // sort alignedCandidates List<TranslationCandidate> alignedCandidates = Lists.newArrayListWithCapacity(alignedCandidateQueue.size()); alignedCandidates.addAll(alignedCandidateQueue); normalizeCandidateScores(alignedCandidates); return Lists.newArrayList(alignedCandidateQueue); }
From source file:de.tudarmstadt.ukp.dkpro.core.api.frequency.util.FrequencyDistribution.java
/** * Returns the n most frequent samples in the distribution. The ordering within in a group of * samples with the same frequency is undefined. * /*from w w w .j a v a 2 s . co m*/ * @param n * the numer of most frequent samples to return. * @return the n most frequent samples in the distribution. */ public List<T> getMostFrequentSamples(int n) { MinMaxPriorityQueue<TermFreqTuple<T>> topN = MinMaxPriorityQueue.maximumSize(n).create(); for (T key : this.getKeys()) { topN.add(new TermFreqTuple<T>(key, this.getCount(key))); } List<T> topNList = new ArrayList<T>(); while (!topN.isEmpty()) { topNList.add(topN.poll().getKey()); } return topNList; }
From source file:org.dkpro.tc.features.pair.core.ngram.LuceneNGramPFE.java
private FrequencyDistribution<String> getTopNgrams(int topNgramThreshold, String fieldName) throws ResourceInitializationException { FrequencyDistribution<String> topNGrams = new FrequencyDistribution<String>(); MinMaxPriorityQueue<TermFreqTuple> topN = MinMaxPriorityQueue.maximumSize(topNgramThreshold).create(); IndexReader reader;/* w w w . j a va 2 s .c o m*/ try { reader = DirectoryReader.open(FSDirectory.open(luceneDir)); Fields fields = MultiFields.getFields(reader); if (fields != null) { Terms terms = fields.terms(fieldName); if (terms != null) { TermsEnum termsEnum = terms.iterator(null); BytesRef text = null; while ((text = termsEnum.next()) != null) { String term = text.utf8ToString(); long freq = termsEnum.totalTermFreq(); topN.add(new TermFreqTuple(term, freq)); } } } } catch (Exception e) { throw new ResourceInitializationException(e); } int size = topN.size(); for (int i = 0; i < size; i++) { TermFreqTuple tuple = topN.poll(); // System.out.println(tuple.getTerm() + " - " + tuple.getFreq()); topNGrams.addSample(tuple.getTerm(), tuple.getFreq()); } return topNGrams; }