Example usage for org.apache.lucene.classification ClassificationResult getScore

List of usage examples for org.apache.lucene.classification ClassificationResult getScore

Introduction

In this page you can find the example usage for org.apache.lucene.classification ClassificationResult getScore.

Prototype

public double getScore() 

Source Link

Document

retrieve the result score

Usage

From source file:SimpleNaiveBayesClassifier.java

License:Apache License

/**
 * {@inheritDoc}/*from   w w  w  .j ava 2  s.c o  m*/
 */
@Override
public ClassificationResult<BytesRef> assignClass(String inputDocument) throws IOException {
    List<ClassificationResult<BytesRef>> assignedClasses = assignClassNormalizedList(inputDocument);
    ClassificationResult<BytesRef> assignedClass = null;
    double maxscore = -Double.MAX_VALUE;
    for (ClassificationResult<BytesRef> c : assignedClasses) {
        if (c.getScore() > maxscore) {
            assignedClass = c;
            maxscore = c.getScore();
        }
    }
    return assignedClass;
}

From source file:SimpleNaiveBayesClassifier.java

License:Apache License

/**
 * Normalize the classification results based on the max score available
 * @param assignedClasses the list of assigned classes
 * @return the normalized results//from  w w  w .j a va2 s .  co  m
 */
protected ArrayList<ClassificationResult<BytesRef>> normClassificationResults(
        List<ClassificationResult<BytesRef>> assignedClasses) {
    // normalization; the values transforms to a 0-1 range
    ArrayList<ClassificationResult<BytesRef>> returnList = new ArrayList<>();
    if (!assignedClasses.isEmpty()) {
        Collections.sort(assignedClasses);
        // this is a negative number closest to 0 = a
        double smax = assignedClasses.get(0).getScore();

        double sumLog = 0;
        // log(sum(exp(x_n-a)))
        for (ClassificationResult<BytesRef> cr : assignedClasses) {
            // getScore-smax <=0 (both negative, smax is the smallest abs()
            sumLog += Math.exp(cr.getScore() - smax);
        }
        // loga=a+log(sum(exp(x_n-a))) = log(sum(exp(x_n)))
        double loga = smax;
        loga += Math.log(sumLog);

        // 1/sum*x = exp(log(x))*1/sum = exp(log(x)-log(sum))
        for (ClassificationResult<BytesRef> cr : assignedClasses) {
            double scoreDiff = cr.getScore() - loga;
            returnList.add(new ClassificationResult<>(cr.getAssignedClass(), Math.exp(scoreDiff)));
        }
    }
    return returnList;
}

From source file:KNearestNeighbourDocumentClassifier.java

License:Apache License

/**
 * {@inheritDoc}/*from www  .jav  a2s . c o  m*/
 */
@Override
public ClassificationResult<BytesRef> assignClass(Document document) throws IOException {
    TopDocs knnResults = knnSearch(document);
    List<ClassificationResult<BytesRef>> assignedClasses = buildListFromTopDocs(knnResults);
    ClassificationResult<BytesRef> assignedClass = null;
    double maxscore = -Double.MAX_VALUE;
    for (ClassificationResult<BytesRef> cl : assignedClasses) {
        if (cl.getScore() > maxscore) {
            assignedClass = cl;
            maxscore = cl.getScore();
        }
    }
    return assignedClass;
}

From source file:KNearestNeighborClassifier.java

License:Apache License

/**
 * {@inheritDoc}//w w w . j  av  a  2  s .co  m
 */
@Override
public ClassificationResult<BytesRef> assignClass(String text) throws IOException {
    TopDocs knnResults = knnSearch(text);
    List<ClassificationResult<BytesRef>> assignedClasses = buildListFromTopDocs(knnResults);
    ClassificationResult<BytesRef> assignedClass = null;
    double maxscore = -Double.MAX_VALUE;
    for (ClassificationResult<BytesRef> cl : assignedClasses) {
        if (cl.getScore() > maxscore) {
            assignedClass = cl;
            maxscore = cl.getScore();
        }
    }
    return assignedClass;
}

From source file:KNearestNeighborClassifier.java

License:Apache License

/**
 * build a list of classification results from search results
 * @param topDocs the search results as a {@link TopDocs} object
 * @return a {@link List} of {@link ClassificationResult}, one for each existing class
 * @throws IOException if it's not possible to get the stored value of class field
 *///from  www.j a va  2  s . c  o m
protected List<ClassificationResult<BytesRef>> buildListFromTopDocs(TopDocs topDocs) throws IOException {
    Map<BytesRef, Integer> classCounts = new HashMap<>();
    Map<BytesRef, Double> classBoosts = new HashMap<>(); // this is a boost based on class ranking positions in topDocs
    float maxScore = topDocs.getMaxScore();
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
        IndexableField storableField = indexSearcher.doc(scoreDoc.doc).getField(classFieldName);
        if (storableField != null) {
            BytesRef cl = new BytesRef(storableField.stringValue());
            //update count
            Integer count = classCounts.get(cl);
            if (count != null) {
                classCounts.put(cl, count + 1);
            } else {
                classCounts.put(cl, 1);
            }
            //update boost, the boost is based on the best score
            Double totalBoost = classBoosts.get(cl);
            double singleBoost = scoreDoc.score / maxScore;
            if (totalBoost != null) {
                classBoosts.put(cl, totalBoost + singleBoost);
            } else {
                classBoosts.put(cl, singleBoost);
            }
        }
    }
    List<ClassificationResult<BytesRef>> returnList = new ArrayList<>();
    List<ClassificationResult<BytesRef>> temporaryList = new ArrayList<>();
    int sumdoc = 0;
    for (Map.Entry<BytesRef, Integer> entry : classCounts.entrySet()) {
        Integer count = entry.getValue();
        Double normBoost = classBoosts.get(entry.getKey()) / count; //the boost is normalized to be 0<b<1
        temporaryList.add(new ClassificationResult<>(entry.getKey().clone(), (count * normBoost) / (double) k));
        sumdoc += count;
    }

    //correction
    if (sumdoc < k) {
        for (ClassificationResult<BytesRef> cr : temporaryList) {
            returnList.add(
                    new ClassificationResult<>(cr.getAssignedClass(), cr.getScore() * k / (double) sumdoc));
        }
    } else {
        returnList = temporaryList;
    }
    return returnList;
}

From source file:SimpleNaiveBayesDocumentClassifier.java

License:Apache License

/**
 * {@inheritDoc}/*from w  w w. ja v a  2s . c  o m*/
 */
@Override
public ClassificationResult<BytesRef> assignClass(Document document) throws IOException {
    List<ClassificationResult<BytesRef>> assignedClasses = assignNormClasses(document);
    ClassificationResult<BytesRef> assignedClass = null;
    double maxscore = -Double.MAX_VALUE;
    for (ClassificationResult<BytesRef> c : assignedClasses) {
        if (c.getScore() > maxscore) {
            assignedClass = c;
            maxscore = c.getScore();
        }
    }
    return assignedClass;
}

From source file:com.github.tteofili.looseen.MinHashClassifier.java

License:Apache License

List<ClassificationResult<BytesRef>> buildListFromTopDocs(IndexSearcher searcher, String categoryFieldName,
        TopDocs topDocs, int k) throws IOException {
    Map<BytesRef, Integer> classCounts = new HashMap<>();
    Map<BytesRef, Double> classBoosts = new HashMap<>(); // this is a boost based on class ranking positions in topDocs
    float maxScore = topDocs.getMaxScore();
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
        IndexableField storableField = searcher.doc(scoreDoc.doc).getField(categoryFieldName);
        if (storableField != null) {
            BytesRef cl = new BytesRef(storableField.stringValue());
            //update count
            Integer count = classCounts.get(cl);
            if (count != null) {
                classCounts.put(cl, count + 1);
            } else {
                classCounts.put(cl, 1);//w  ww.ja va 2  s.c o  m
            }
            //update boost, the boost is based on the best score
            Double totalBoost = classBoosts.get(cl);
            double singleBoost = scoreDoc.score / maxScore;
            if (totalBoost != null) {
                classBoosts.put(cl, totalBoost + singleBoost);
            } else {
                classBoosts.put(cl, singleBoost);
            }
        }
    }
    List<ClassificationResult<BytesRef>> returnList = new ArrayList<>();
    List<ClassificationResult<BytesRef>> temporaryList = new ArrayList<>();
    int sumdoc = 0;
    for (Map.Entry<BytesRef, Integer> entry : classCounts.entrySet()) {
        Integer count = entry.getValue();
        Double normBoost = classBoosts.get(entry.getKey()) / count; //the boost is normalized to be 0<b<1
        temporaryList.add(new ClassificationResult<>(entry.getKey().clone(), (count * normBoost) / (double) k));
        sumdoc += count;
    }

    //correction
    if (sumdoc < k) {
        for (ClassificationResult<BytesRef> cr : temporaryList) {
            returnList.add(
                    new ClassificationResult<>(cr.getAssignedClass(), cr.getScore() * k / (double) sumdoc));
        }
    } else {
        returnList = temporaryList;
    }
    return returnList;
}

From source file:com.github.tteofili.looseen.QueryingClassifier.java

License:Apache License

@Override
public ClassificationResult<BytesRef> assignClass(String text) throws IOException {
    ClassificationResult<BytesRef> result = null;
    for (Map.Entry<String, Query> entry : queriesPerClass.entrySet()) {
        TopDocs search = indexSearcher.search(entry.getValue(), 1);
        float score;
        if (useCounts) {
            score = search.totalHits;/*  www .j  ava 2  s .  co m*/
        } else {
            score = search.getMaxScore();
        }

        if (result == null) {
            result = new ClassificationResult<>(new BytesRef(entry.getKey()), score);
        } else if (score > result.getScore()) {
            result = new ClassificationResult<>(new BytesRef(entry.getKey()), score);
        }
    }
    return result;
}

From source file:de.uni_koeln.spinfo.textengineering.tm.classification.lucene.LuceneAdapter.java

License:Open Source License

@SuppressWarnings("unused")
private void printAssignments(Document document, ClassificationResult<BytesRef> c) {
    System.out.println("doc: " + document.getSource());
    System.out.println("class: " + c.getAssignedClass().utf8ToString());
    System.out.println("score: " + c.getScore());
    System.out.println("---------");
}

From source file:org.solr.classtify.SimpleNaiveBayesClassifierTest.java

License:Apache License

@Test
public void classtify() throws IOException {
    SimpleNaiveBayesClassifier classifier = new SimpleNaiveBayesClassifier();
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(dir)));

    AtomicReader wrap = SlowCompositeReaderWrapper.wrap(reader);
    classifier.train(wrap, textFieldName, categoryFieldName, analyzer);
    ClassificationResult<BytesRef> assignClass = classifier.assignClass(newText);
    BytesRef assignedClass = assignClass.getAssignedClass();

    double score = assignClass.getScore();
    System.out.println(assignedClass.utf8ToString() + "," + score);
}