Example usage for com.google.common.collect MinMaxPriorityQueue maximumSize

List of usage examples for com.google.common.collect MinMaxPriorityQueue maximumSize

Introduction

In this page you can find the example usage for com.google.common.collect MinMaxPriorityQueue maximumSize.

Prototype

int maximumSize

To view the source code for com.google.common.collect MinMaxPriorityQueue maximumSize.

Click Source Link

Usage

From source file:model.Heaven.java

public Heaven(int heavenLimit) {
    graphQueue = MinMaxPriorityQueue.maximumSize(heavenLimit).create();

}

From source file:com.davidbracewell.ml.sequence.decoder.HMMViterbi.java

@Override
public double[] decode(SequenceModel<V> raw, Sequence<V> seq) {
    FirstOrderHMM<V> model = Val.of(raw).cast();
    int len = seq.length();
    Feature classFeature = model.getTargetFeature();
    int NC = classFeature.alphabetSize();
    MinMaxPriorityQueue<State> beam = MinMaxPriorityQueue.maximumSize(beamSize).create();
    MinMaxPriorityQueue<State> tempBeam = MinMaxPriorityQueue.maximumSize(beamSize).create();

    Instance firstInstance = seq.generateInstance(0, new double[] { 0 });
    for (int ci = 0; ci < NC; ci++) {
        if (isValidStartTag(classFeature.valueAtIndex(ci))
                && isValidTag(classFeature.valueAtIndex(ci), seq.getData(0))) {
            beam.add(new State(model.pi(ci) + model.beta(ci, firstInstance), ci, null));
        }/*from   w ww . j  av  a2s  .c  o m*/
    }

    for (int i = 1; i < len; i++) {
        for (int ci = 0; ci < NC; ci++) {
            String thisTag = classFeature.valueAtIndex(ci);
            if (!isValidTag(thisTag, seq.getData(i))) {
                continue;
            }
            if ((i + 1 < seq.length()) || isValidEndTag(thisTag)) {
                double pInst = model.beta(ci, seq.generateInstance(i, new double[] { ci }));
                for (State state : beam) {
                    if (isValidTransition(classFeature.valueAtIndex(state.tag), thisTag)) {
                        tempBeam.add(new State(state.probability + //previous probability
                                model.alpha(state.tag, ci) + //transition probability
                                pInst, //probability of the vector given the tag
                                ci, state));
                    }
                }
            }
        }
        MinMaxPriorityQueue<State> t = beam;
        beam = tempBeam;
        tempBeam = t;
        tempBeam.clear();
    }

    State max = beam.remove();
    double[] prediction = new double[len];
    for (int i = len - 1; i >= 0; i--) {
        prediction[i] = max.tag;
        max = max.prev;
    }

    return prediction;
}

From source file:io.anserini.qa.passage.IdfPassageScorer.java

public IdfPassageScorer(String index, Integer k) throws IOException {
    this.util = new IndexUtils(index);
    this.directory = FSDirectory.open(new File(index).toPath());
    this.reader = DirectoryReader.open(directory);
    this.topPassages = k;
    scoredPassageHeap = MinMaxPriorityQueue.maximumSize(topPassages).create();
    stopWords = new ArrayList<>();

    //Get file from resources folder
    InputStream is = getClass().getResourceAsStream("/io/anserini/qa/english-stoplist.txt");
    BufferedReader bRdr = new BufferedReader(new InputStreamReader(is));
    String line;//w  w w. j  a  va2s  .  c o m
    while ((line = bRdr.readLine()) != null) {
        if (!line.contains("#")) {
            stopWords.add(line);
        }
    }

    termIdfMap = new HashMap<>();
}

From source file:com.davidbracewell.ml.sequence.decoder.LinearViterbi.java

@Override
public double[] decode(SequenceModel<V> raw, Sequence<V> sequence) {
    LinearSequenceModel<V> model = Val.of(raw).cast();
    final Feature classFeature = model.getTargetFeature();
    final int numStates = classFeature.alphabetSize();

    MinMaxPriorityQueue<State> beam = MinMaxPriorityQueue.maximumSize(beamSize).create();
    ClassificationResult result = model.classifyItem(0, sequence, new double[0]);
    for (int ci = 0; ci < numStates; ci++) {
        if (isValidStartTag(classFeature.valueAtIndex(ci))
                && isValidTag(classFeature.valueAtIndex(ci), sequence.getData(0))) {
            beam.add(new State(Math.log(result.getConfidence(ci)), ci, null, 0));
        }//ww  w . j a  va 2s  .c  o  m
    }

    MinMaxPriorityQueue<State> tempBeam = MinMaxPriorityQueue.maximumSize(beamSize).create();
    for (int i = 1; i < sequence.length(); i++) {
        while (!beam.isEmpty()) { // go through all the previous states
            State state = beam.removeFirst();
            String previousTag = classFeature.valueAtIndex(state.tag);
            result = model.classifyItem(i, sequence, state.labels());
            for (int ci = 0; ci < numStates; ci++) {
                if (isValidTransition(previousTag, classFeature.valueAtIndex(ci))
                        && ((i + 1 < sequence.length()) || isValidEndTag(classFeature.valueAtIndex(ci)))
                        && isValidTag(classFeature.valueAtIndex(ci), sequence.getData(i))) {
                    tempBeam.add(
                            new State(state.probability + Math.log(result.getConfidence(ci)), ci, state, i));
                }
            }
        }
        beam.addAll(tempBeam);
        tempBeam.clear();
    }

    return beam.remove().labels();
}

From source file:de.tudarmstadt.ukp.dkpro.tc.features.ngram.base.LuceneFeatureExtractorBase.java

@Override
protected FrequencyDistribution<String> getTopNgrams() throws ResourceInitializationException {

    FrequencyDistribution<String> topNGrams = new FrequencyDistribution<String>();

    MinMaxPriorityQueue<TermFreqTuple> topN = MinMaxPriorityQueue.maximumSize(getTopN()).create();

    long ngramVocabularySize = 0;
    IndexReader reader;//from  w w w . j  a v  a  2s .  com
    try {
        reader = DirectoryReader.open(FSDirectory.open(luceneDir));
        Fields fields = MultiFields.getFields(reader);
        if (fields != null) {
            Terms terms = fields.terms(getFieldName());
            if (terms != null) {
                TermsEnum termsEnum = terms.iterator(null);
                BytesRef text = null;
                while ((text = termsEnum.next()) != null) {
                    String term = text.utf8ToString();
                    long freq = termsEnum.totalTermFreq();
                    if (passesScreening(term)) {
                        topN.add(new TermFreqTuple(term, freq));
                        ngramVocabularySize += freq;
                    }
                }
            }
        }
    } catch (Exception e) {
        throw new ResourceInitializationException(e);
    }

    int size = topN.size();
    for (int i = 0; i < size; i++) {
        TermFreqTuple tuple = topN.poll();
        long absCount = tuple.getFreq();
        double relFrequency = ((double) absCount) / ngramVocabularySize;

        if (relFrequency >= ngramFreqThreshold)
            topNGrams.addSample(tuple.getTerm(), tuple.getFreq());
    }

    getLogger().log(Level.INFO, "+++ SELECTING THE " + topNGrams.getB() + " MOST FREQUENT NGRAMS");

    return topNGrams;
}

From source file:org.dkpro.tc.features.ngram.base.LuceneFeatureExtractorBase.java

@Override
protected FrequencyDistribution<String> getTopNgrams() throws ResourceInitializationException {

    FrequencyDistribution<String> topNGrams = new FrequencyDistribution<String>();

    MinMaxPriorityQueue<TermFreqTuple> topN = MinMaxPriorityQueue.maximumSize(getTopN()).create();

    long ngramVocabularySize = 0;
    IndexReader reader;//w  w w .  j  a va  2s.c  o m
    try {
        reader = DirectoryReader.open(FSDirectory.open(luceneDir));
        Fields fields = MultiFields.getFields(reader);
        if (fields != null) {
            Terms terms = fields.terms(getFieldName());
            if (terms != null) {
                TermsEnum termsEnum = terms.iterator(null);
                BytesRef text = null;
                while ((text = termsEnum.next()) != null) {
                    String term = text.utf8ToString();
                    long freq = termsEnum.totalTermFreq();
                    if (passesScreening(term)) {
                        topN.add(new TermFreqTuple(term, freq));
                        ngramVocabularySize += freq;
                    }
                }
            }
        }
    } catch (Exception e) {
        throw new ResourceInitializationException(e);
    }

    int size = topN.size();
    for (int i = 0; i < size; i++) {
        TermFreqTuple tuple = topN.poll();
        long absCount = tuple.getFreq();
        double relFrequency = ((double) absCount) / ngramVocabularySize;

        if (relFrequency >= ngramFreqThreshold) {
            topNGrams.addSample(tuple.getTerm(), tuple.getFreq());
        }
    }

    logSelectionProcess(topNGrams.getB());

    return topNGrams;
}

From source file:QueryAspect.java

private void prepare(String outputDir) {
    queryQueue = MinMaxPriorityQueue.maximumSize(NUM_SLOWEST_QUERIES).create();
    if (outputDir != null) {
        try {//from   www  .  j  av a2  s .  c om
            fullOutput = new PrintWriter(new BufferedWriter(
                    new FileWriter(outputDir + File.separator + "query-report.txt"), 2 ^ 20));
            StringBuilder sb = new StringBuilder();
            sb.append("Query Report" + "\n");
            sb.append("-----------------" + "\n\n");
            fullOutput.write(sb.toString());
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
}

From source file:eu.project.ttc.metrics.Explanation.java

/**
 * Builds an explanation object.// w w w .jav  a  2  s .c  o m
 * 
 * @param topN
 */
public Explanation(int topN) {
    super();
    this.entries = MinMaxPriorityQueue.maximumSize(topN).create();
}

From source file:org.sosy_lab.ccvisu.clustering.ClustererMinDistPerc.java

@Override
protected List<Group> internalCreateClustersOfLayout() throws InterruptedException {
    Stopwatch stopwatch = Stopwatch.createAndStart();

    List<Group> clusters = new ArrayList<Group>();
    List<GraphVertex> vertices = graphData.getVertices();

    //// ww  w .  j  a v  a 2s  .c  om
    //
    // Initially put each node in a separate cluster.
    //
    setProgress(0, vertices.size(), "Creating initial clusters.");

    double minX = Double.MAX_VALUE;
    double minY = Double.MAX_VALUE;
    double minZ = Double.MAX_VALUE;

    double maxX = Double.MIN_VALUE;
    double maxY = Double.MIN_VALUE;
    double maxZ = Double.MIN_VALUE;

    int clusterSeqNo = 0;
    for (GraphVertex vertex : vertices) {
        Group vertexCluster = new Group("Cluster " + clusterSeqNo++, graphData);
        vertexCluster.setKind(GroupKind.CLUSTER);
        vertexCluster.addNode(vertex);
        clusters.add(vertexCluster);

        maxX = Math.max(maxX, vertex.getPosition().x);
        maxY = Math.max(maxY, vertex.getPosition().y);
        maxZ = Math.max(maxZ, vertex.getPosition().z);

        minX = Math.min(minX, vertex.getPosition().x);
        minY = Math.min(minY, vertex.getPosition().y);
        minZ = Math.min(minZ, vertex.getPosition().z);
    }

    //
    //
    // Calculate the diagonal of the layout.
    //
    double layoutDistanceX = Math.abs(maxX - minX);
    double layoutDistanceY = Math.abs(maxY - minY);
    double layoutDistanceZ = Math.abs(maxZ - minZ);

    double layoutDiagonal = Math.sqrt(layoutDistanceX * layoutDistanceX + layoutDistanceZ * layoutDistanceZ
            + layoutDistanceY * layoutDistanceY);

    //
    //
    // Calculate the parameters.
    //
    int initialNumOfClusters = clusters.size();
    int numberOfClustersWithNodes = initialNumOfClusters;
    double maxDistanceToAutoMerge = layoutDiagonal * maxDistancePercentToAutoMerge;
    double minClusterDistanceAbsoulte = layoutDiagonal * minClusterDistancePercent;

    //
    //
    // Aggregate cluster until there are only k clusters left.
    //
    int iterationNumber = 0;
    int mergesInIteration = 0;

    do {
        iterationNumber++;
        mergesInIteration = 0;

        HashMap<Group, RadiusOfGroup> fixedBarycenters = new HashMap<Group, RadiusOfGroup>();

        setProgress(initialNumOfClusters - numberOfClustersWithNodes, initialNumOfClusters,
                "Creating clusters");
        System.out.println("Num of non-empty clusters: " + numberOfClustersWithNodes);

        // Calculate the distance between all clusters.
        // Merge clusters if their distance is less than lMaxDistanceToAutoMerge.
        MinMaxPriorityQueue<ClusterPair> nearestPairs = MinMaxPriorityQueue.maximumSize(100).create();

        int highestClusterWithRadius = -1;
        for (int a = clusters.size() - 1; a >= 0; a--) {
            Group clusterA = clusters.get(a);
            if (clusterA.getNodes().size() > 0) {
                RadiusOfGroup barycenterA = null;

                if (a > highestClusterWithRadius) {
                    fixedBarycenters.put(clusterA, new RadiusOfGroup(clusterA.getNodes()));
                    highestClusterWithRadius = a;
                } else {
                    barycenterA = fixedBarycenters.get(clusterA);
                }

                if (Thread.interrupted()) {
                    throw new InterruptedException();
                }

                for (int b = a - 1; b >= 0; b--) {
                    Group clusterB = clusters.get(b);
                    if (clusterB.getNodes().size() > 0) {
                        RadiusOfGroup barycenterB = null;
                        if (b > highestClusterWithRadius) {
                            fixedBarycenters.put(clusterB, new RadiusOfGroup(clusterB.getNodes()));
                            highestClusterWithRadius = b;
                        } else {
                            barycenterB = fixedBarycenters.get(clusterB);
                        }

                        ClusterPair clusterPair = new ClusterPair(clusterA, clusterB, barycenterA, barycenterB);
                        double pairDistance = clusterPair.getEucDistanceBetweenBarycenters();

                        // First stage merging:
                        //    Merge clusters without recalculating the distances to the
                        //    merged clusters.
                        // * Only merge clusters having a distance less than...

                        if (pairDistance <= minClusterDistanceAbsoulte) {
                            if (pairDistance < maxDistanceToAutoMerge) {
                                if (numberOfClustersWithNodes > numberOfClusters) {
                                    mergeClusters(clusterB, clusterA);
                                    mergesInIteration++;
                                    numberOfClustersWithNodes--;
                                }
                            } else {
                                nearestPairs.add(clusterPair);
                            }
                        }
                    }
                }
            }
        }

        int mergesIndSecondPhase = 0;
        double nearestPairDistance = -1;

        do {
            if (numberOfClustersWithNodes > numberOfClusters) {
                ClusterPair pair = nearestPairs.poll();
                if (pair != null) {
                    double pairDistance = pair.getEucDistanceBetweenBarycenters();
                    if (nearestPairDistance == -1) {
                        nearestPairDistance = pairDistance;
                    }

                    if (mergesIndSecondPhase == 0 || ((pairDistance / nearestPairDistance) - 1 <= 0.01)) {

                        Group sourceGroup = pair.clusterA;
                        Group targetGroup = pair.clusterB;
                        if (targetGroup.getNodes().size() == 0) {
                            sourceGroup = pair.clusterB;
                            targetGroup = pair.clusterA;
                        }

                        if (sourceGroup.getNodes().size() > 0 && targetGroup.getNodes().size() > 0) {
                            if (numberOfClustersWithNodes > numberOfClusters) {
                                mergeClusters(sourceGroup, targetGroup);
                                numberOfClustersWithNodes--;
                                mergesInIteration++;
                                mergesIndSecondPhase++;
                            }
                        }

                    } else {
                        break;
                    }
                } else {
                    break;
                }
            } else {
                break;
            }
        } while (true);

        // More merging of clusters necessary?
        System.out.println(String.format("%d merges in iteration %d", mergesInIteration, iterationNumber));

    } while (mergesInIteration > 0);

    for (int i = clusters.size() - 1; i > 0; i--) {
        Group group = clusters.get(i);
        if (group.getNodes().size() == 0) {
            clusters.remove(i);
        } else {
            System.out.println(String.format("%s with %d nodes.", group.getName(), group.getNodes().size()));
        }
    }

    setProgress(1, 1, stopwatch.stop().toString());

    return clusters;
}

From source file:org.dkpro.tc.features.pair.core.ngram.LuceneNGramCPFE.java

private FrequencyDistribution<String> getTopNgramsCombo(int topNgramThreshold, String fieldName)
        throws ResourceInitializationException {

    FrequencyDistribution<String> topNGrams = new FrequencyDistribution<String>();

    MinMaxPriorityQueue<TermFreqTuple> topN = MinMaxPriorityQueue.maximumSize(topNgramThreshold).create();
    IndexReader reader;//from  w w w.  ja va2 s  .  c om
    try {
        reader = DirectoryReader.open(FSDirectory.open(luceneDir));
        Fields fields = MultiFields.getFields(reader);
        if (fields != null) {
            Terms terms = fields.terms(fieldName);
            if (terms != null) {
                TermsEnum termsEnum = terms.iterator(null);
                BytesRef text = null;
                while ((text = termsEnum.next()) != null) {
                    String term = text.utf8ToString();
                    long freq = termsEnum.totalTermFreq();
                    //add conditions here, like ngram1 is in most freq ngrams1...
                    String combo1 = term.split(ComboUtils.JOINT)[0];
                    String combo2 = term.split(ComboUtils.JOINT)[1];
                    int combinedSize = combo1.split("_").length + combo2.split("_").length;
                    if (topKSetView1.contains(combo1) && topKSet.contains(combo1)
                            && topKSetView2.contains(combo2) && topKSet.contains(combo2)
                            && combinedSize <= ngramMaxNCombo && combinedSize >= ngramMinNCombo) {
                        //print out here for testing
                        topN.add(new TermFreqTuple(term, freq));
                    }
                }
            }
        }
    } catch (Exception e) {
        throw new ResourceInitializationException(e);
    }

    int size = topN.size();
    for (int i = 0; i < size; i++) {
        TermFreqTuple tuple = topN.poll();
        // System.out.println(tuple.getTerm() + " - " + tuple.getFreq());
        topNGrams.addSample(tuple.getTerm(), tuple.getFreq());
    }

    return topNGrams;
}