List of usage examples for com.google.common.collect MinMaxPriorityQueue maximumSize
int maximumSize
To view the source code for com.google.common.collect MinMaxPriorityQueue maximumSize.
Click Source Link
From source file:model.Heaven.java
public Heaven(int heavenLimit) { graphQueue = MinMaxPriorityQueue.maximumSize(heavenLimit).create(); }
From source file:com.davidbracewell.ml.sequence.decoder.HMMViterbi.java
@Override public double[] decode(SequenceModel<V> raw, Sequence<V> seq) { FirstOrderHMM<V> model = Val.of(raw).cast(); int len = seq.length(); Feature classFeature = model.getTargetFeature(); int NC = classFeature.alphabetSize(); MinMaxPriorityQueue<State> beam = MinMaxPriorityQueue.maximumSize(beamSize).create(); MinMaxPriorityQueue<State> tempBeam = MinMaxPriorityQueue.maximumSize(beamSize).create(); Instance firstInstance = seq.generateInstance(0, new double[] { 0 }); for (int ci = 0; ci < NC; ci++) { if (isValidStartTag(classFeature.valueAtIndex(ci)) && isValidTag(classFeature.valueAtIndex(ci), seq.getData(0))) { beam.add(new State(model.pi(ci) + model.beta(ci, firstInstance), ci, null)); }/*from w ww . j av a2s .c o m*/ } for (int i = 1; i < len; i++) { for (int ci = 0; ci < NC; ci++) { String thisTag = classFeature.valueAtIndex(ci); if (!isValidTag(thisTag, seq.getData(i))) { continue; } if ((i + 1 < seq.length()) || isValidEndTag(thisTag)) { double pInst = model.beta(ci, seq.generateInstance(i, new double[] { ci })); for (State state : beam) { if (isValidTransition(classFeature.valueAtIndex(state.tag), thisTag)) { tempBeam.add(new State(state.probability + //previous probability model.alpha(state.tag, ci) + //transition probability pInst, //probability of the vector given the tag ci, state)); } } } } MinMaxPriorityQueue<State> t = beam; beam = tempBeam; tempBeam = t; tempBeam.clear(); } State max = beam.remove(); double[] prediction = new double[len]; for (int i = len - 1; i >= 0; i--) { prediction[i] = max.tag; max = max.prev; } return prediction; }
From source file:io.anserini.qa.passage.IdfPassageScorer.java
public IdfPassageScorer(String index, Integer k) throws IOException { this.util = new IndexUtils(index); this.directory = FSDirectory.open(new File(index).toPath()); this.reader = DirectoryReader.open(directory); this.topPassages = k; scoredPassageHeap = MinMaxPriorityQueue.maximumSize(topPassages).create(); stopWords = new ArrayList<>(); //Get file from resources folder InputStream is = getClass().getResourceAsStream("/io/anserini/qa/english-stoplist.txt"); BufferedReader bRdr = new BufferedReader(new InputStreamReader(is)); String line;//w w w. j a va2s . c o m while ((line = bRdr.readLine()) != null) { if (!line.contains("#")) { stopWords.add(line); } } termIdfMap = new HashMap<>(); }
From source file:com.davidbracewell.ml.sequence.decoder.LinearViterbi.java
@Override public double[] decode(SequenceModel<V> raw, Sequence<V> sequence) { LinearSequenceModel<V> model = Val.of(raw).cast(); final Feature classFeature = model.getTargetFeature(); final int numStates = classFeature.alphabetSize(); MinMaxPriorityQueue<State> beam = MinMaxPriorityQueue.maximumSize(beamSize).create(); ClassificationResult result = model.classifyItem(0, sequence, new double[0]); for (int ci = 0; ci < numStates; ci++) { if (isValidStartTag(classFeature.valueAtIndex(ci)) && isValidTag(classFeature.valueAtIndex(ci), sequence.getData(0))) { beam.add(new State(Math.log(result.getConfidence(ci)), ci, null, 0)); }//ww w . j a va 2s .c o m } MinMaxPriorityQueue<State> tempBeam = MinMaxPriorityQueue.maximumSize(beamSize).create(); for (int i = 1; i < sequence.length(); i++) { while (!beam.isEmpty()) { // go through all the previous states State state = beam.removeFirst(); String previousTag = classFeature.valueAtIndex(state.tag); result = model.classifyItem(i, sequence, state.labels()); for (int ci = 0; ci < numStates; ci++) { if (isValidTransition(previousTag, classFeature.valueAtIndex(ci)) && ((i + 1 < sequence.length()) || isValidEndTag(classFeature.valueAtIndex(ci))) && isValidTag(classFeature.valueAtIndex(ci), sequence.getData(i))) { tempBeam.add( new State(state.probability + Math.log(result.getConfidence(ci)), ci, state, i)); } } } beam.addAll(tempBeam); tempBeam.clear(); } return beam.remove().labels(); }
From source file:de.tudarmstadt.ukp.dkpro.tc.features.ngram.base.LuceneFeatureExtractorBase.java
@Override protected FrequencyDistribution<String> getTopNgrams() throws ResourceInitializationException { FrequencyDistribution<String> topNGrams = new FrequencyDistribution<String>(); MinMaxPriorityQueue<TermFreqTuple> topN = MinMaxPriorityQueue.maximumSize(getTopN()).create(); long ngramVocabularySize = 0; IndexReader reader;//from w w w . j a v a 2s . com try { reader = DirectoryReader.open(FSDirectory.open(luceneDir)); Fields fields = MultiFields.getFields(reader); if (fields != null) { Terms terms = fields.terms(getFieldName()); if (terms != null) { TermsEnum termsEnum = terms.iterator(null); BytesRef text = null; while ((text = termsEnum.next()) != null) { String term = text.utf8ToString(); long freq = termsEnum.totalTermFreq(); if (passesScreening(term)) { topN.add(new TermFreqTuple(term, freq)); ngramVocabularySize += freq; } } } } } catch (Exception e) { throw new ResourceInitializationException(e); } int size = topN.size(); for (int i = 0; i < size; i++) { TermFreqTuple tuple = topN.poll(); long absCount = tuple.getFreq(); double relFrequency = ((double) absCount) / ngramVocabularySize; if (relFrequency >= ngramFreqThreshold) topNGrams.addSample(tuple.getTerm(), tuple.getFreq()); } getLogger().log(Level.INFO, "+++ SELECTING THE " + topNGrams.getB() + " MOST FREQUENT NGRAMS"); return topNGrams; }
From source file:org.dkpro.tc.features.ngram.base.LuceneFeatureExtractorBase.java
@Override protected FrequencyDistribution<String> getTopNgrams() throws ResourceInitializationException { FrequencyDistribution<String> topNGrams = new FrequencyDistribution<String>(); MinMaxPriorityQueue<TermFreqTuple> topN = MinMaxPriorityQueue.maximumSize(getTopN()).create(); long ngramVocabularySize = 0; IndexReader reader;//w w w . j a va 2s.c o m try { reader = DirectoryReader.open(FSDirectory.open(luceneDir)); Fields fields = MultiFields.getFields(reader); if (fields != null) { Terms terms = fields.terms(getFieldName()); if (terms != null) { TermsEnum termsEnum = terms.iterator(null); BytesRef text = null; while ((text = termsEnum.next()) != null) { String term = text.utf8ToString(); long freq = termsEnum.totalTermFreq(); if (passesScreening(term)) { topN.add(new TermFreqTuple(term, freq)); ngramVocabularySize += freq; } } } } } catch (Exception e) { throw new ResourceInitializationException(e); } int size = topN.size(); for (int i = 0; i < size; i++) { TermFreqTuple tuple = topN.poll(); long absCount = tuple.getFreq(); double relFrequency = ((double) absCount) / ngramVocabularySize; if (relFrequency >= ngramFreqThreshold) { topNGrams.addSample(tuple.getTerm(), tuple.getFreq()); } } logSelectionProcess(topNGrams.getB()); return topNGrams; }
From source file:QueryAspect.java
private void prepare(String outputDir) { queryQueue = MinMaxPriorityQueue.maximumSize(NUM_SLOWEST_QUERIES).create(); if (outputDir != null) { try {//from www . j av a2 s . c om fullOutput = new PrintWriter(new BufferedWriter( new FileWriter(outputDir + File.separator + "query-report.txt"), 2 ^ 20)); StringBuilder sb = new StringBuilder(); sb.append("Query Report" + "\n"); sb.append("-----------------" + "\n\n"); fullOutput.write(sb.toString()); } catch (IOException e) { throw new RuntimeException(e); } } }
From source file:eu.project.ttc.metrics.Explanation.java
/** * Builds an explanation object.// w w w .jav a 2 s .c o m * * @param topN */ public Explanation(int topN) { super(); this.entries = MinMaxPriorityQueue.maximumSize(topN).create(); }
From source file:org.sosy_lab.ccvisu.clustering.ClustererMinDistPerc.java
@Override protected List<Group> internalCreateClustersOfLayout() throws InterruptedException { Stopwatch stopwatch = Stopwatch.createAndStart(); List<Group> clusters = new ArrayList<Group>(); List<GraphVertex> vertices = graphData.getVertices(); //// ww w . j a v a 2s .c om // // Initially put each node in a separate cluster. // setProgress(0, vertices.size(), "Creating initial clusters."); double minX = Double.MAX_VALUE; double minY = Double.MAX_VALUE; double minZ = Double.MAX_VALUE; double maxX = Double.MIN_VALUE; double maxY = Double.MIN_VALUE; double maxZ = Double.MIN_VALUE; int clusterSeqNo = 0; for (GraphVertex vertex : vertices) { Group vertexCluster = new Group("Cluster " + clusterSeqNo++, graphData); vertexCluster.setKind(GroupKind.CLUSTER); vertexCluster.addNode(vertex); clusters.add(vertexCluster); maxX = Math.max(maxX, vertex.getPosition().x); maxY = Math.max(maxY, vertex.getPosition().y); maxZ = Math.max(maxZ, vertex.getPosition().z); minX = Math.min(minX, vertex.getPosition().x); minY = Math.min(minY, vertex.getPosition().y); minZ = Math.min(minZ, vertex.getPosition().z); } // // // Calculate the diagonal of the layout. // double layoutDistanceX = Math.abs(maxX - minX); double layoutDistanceY = Math.abs(maxY - minY); double layoutDistanceZ = Math.abs(maxZ - minZ); double layoutDiagonal = Math.sqrt(layoutDistanceX * layoutDistanceX + layoutDistanceZ * layoutDistanceZ + layoutDistanceY * layoutDistanceY); // // // Calculate the parameters. // int initialNumOfClusters = clusters.size(); int numberOfClustersWithNodes = initialNumOfClusters; double maxDistanceToAutoMerge = layoutDiagonal * maxDistancePercentToAutoMerge; double minClusterDistanceAbsoulte = layoutDiagonal * minClusterDistancePercent; // // // Aggregate cluster until there are only k clusters left. // int iterationNumber = 0; int mergesInIteration = 0; do { iterationNumber++; mergesInIteration = 0; HashMap<Group, RadiusOfGroup> fixedBarycenters = new HashMap<Group, RadiusOfGroup>(); setProgress(initialNumOfClusters - numberOfClustersWithNodes, initialNumOfClusters, "Creating clusters"); System.out.println("Num of non-empty clusters: " + numberOfClustersWithNodes); // Calculate the distance between all clusters. // Merge clusters if their distance is less than lMaxDistanceToAutoMerge. MinMaxPriorityQueue<ClusterPair> nearestPairs = MinMaxPriorityQueue.maximumSize(100).create(); int highestClusterWithRadius = -1; for (int a = clusters.size() - 1; a >= 0; a--) { Group clusterA = clusters.get(a); if (clusterA.getNodes().size() > 0) { RadiusOfGroup barycenterA = null; if (a > highestClusterWithRadius) { fixedBarycenters.put(clusterA, new RadiusOfGroup(clusterA.getNodes())); highestClusterWithRadius = a; } else { barycenterA = fixedBarycenters.get(clusterA); } if (Thread.interrupted()) { throw new InterruptedException(); } for (int b = a - 1; b >= 0; b--) { Group clusterB = clusters.get(b); if (clusterB.getNodes().size() > 0) { RadiusOfGroup barycenterB = null; if (b > highestClusterWithRadius) { fixedBarycenters.put(clusterB, new RadiusOfGroup(clusterB.getNodes())); highestClusterWithRadius = b; } else { barycenterB = fixedBarycenters.get(clusterB); } ClusterPair clusterPair = new ClusterPair(clusterA, clusterB, barycenterA, barycenterB); double pairDistance = clusterPair.getEucDistanceBetweenBarycenters(); // First stage merging: // Merge clusters without recalculating the distances to the // merged clusters. // * Only merge clusters having a distance less than... if (pairDistance <= minClusterDistanceAbsoulte) { if (pairDistance < maxDistanceToAutoMerge) { if (numberOfClustersWithNodes > numberOfClusters) { mergeClusters(clusterB, clusterA); mergesInIteration++; numberOfClustersWithNodes--; } } else { nearestPairs.add(clusterPair); } } } } } } int mergesIndSecondPhase = 0; double nearestPairDistance = -1; do { if (numberOfClustersWithNodes > numberOfClusters) { ClusterPair pair = nearestPairs.poll(); if (pair != null) { double pairDistance = pair.getEucDistanceBetweenBarycenters(); if (nearestPairDistance == -1) { nearestPairDistance = pairDistance; } if (mergesIndSecondPhase == 0 || ((pairDistance / nearestPairDistance) - 1 <= 0.01)) { Group sourceGroup = pair.clusterA; Group targetGroup = pair.clusterB; if (targetGroup.getNodes().size() == 0) { sourceGroup = pair.clusterB; targetGroup = pair.clusterA; } if (sourceGroup.getNodes().size() > 0 && targetGroup.getNodes().size() > 0) { if (numberOfClustersWithNodes > numberOfClusters) { mergeClusters(sourceGroup, targetGroup); numberOfClustersWithNodes--; mergesInIteration++; mergesIndSecondPhase++; } } } else { break; } } else { break; } } else { break; } } while (true); // More merging of clusters necessary? System.out.println(String.format("%d merges in iteration %d", mergesInIteration, iterationNumber)); } while (mergesInIteration > 0); for (int i = clusters.size() - 1; i > 0; i--) { Group group = clusters.get(i); if (group.getNodes().size() == 0) { clusters.remove(i); } else { System.out.println(String.format("%s with %d nodes.", group.getName(), group.getNodes().size())); } } setProgress(1, 1, stopwatch.stop().toString()); return clusters; }
From source file:org.dkpro.tc.features.pair.core.ngram.LuceneNGramCPFE.java
private FrequencyDistribution<String> getTopNgramsCombo(int topNgramThreshold, String fieldName) throws ResourceInitializationException { FrequencyDistribution<String> topNGrams = new FrequencyDistribution<String>(); MinMaxPriorityQueue<TermFreqTuple> topN = MinMaxPriorityQueue.maximumSize(topNgramThreshold).create(); IndexReader reader;//from w w w. ja va2 s . c om try { reader = DirectoryReader.open(FSDirectory.open(luceneDir)); Fields fields = MultiFields.getFields(reader); if (fields != null) { Terms terms = fields.terms(fieldName); if (terms != null) { TermsEnum termsEnum = terms.iterator(null); BytesRef text = null; while ((text = termsEnum.next()) != null) { String term = text.utf8ToString(); long freq = termsEnum.totalTermFreq(); //add conditions here, like ngram1 is in most freq ngrams1... String combo1 = term.split(ComboUtils.JOINT)[0]; String combo2 = term.split(ComboUtils.JOINT)[1]; int combinedSize = combo1.split("_").length + combo2.split("_").length; if (topKSetView1.contains(combo1) && topKSet.contains(combo1) && topKSetView2.contains(combo2) && topKSet.contains(combo2) && combinedSize <= ngramMaxNCombo && combinedSize >= ngramMinNCombo) { //print out here for testing topN.add(new TermFreqTuple(term, freq)); } } } } } catch (Exception e) { throw new ResourceInitializationException(e); } int size = topN.size(); for (int i = 0; i < size; i++) { TermFreqTuple tuple = topN.poll(); // System.out.println(tuple.getTerm() + " - " + tuple.getFreq()); topNGrams.addSample(tuple.getTerm(), tuple.getFreq()); } return topNGrams; }