List of usage examples for org.apache.lucene.classification ClassificationResult getScore
public double getScore()
From source file:SimpleNaiveBayesClassifier.java
License:Apache License
/** * {@inheritDoc}/*from w w w .j ava 2 s.c o m*/ */ @Override public ClassificationResult<BytesRef> assignClass(String inputDocument) throws IOException { List<ClassificationResult<BytesRef>> assignedClasses = assignClassNormalizedList(inputDocument); ClassificationResult<BytesRef> assignedClass = null; double maxscore = -Double.MAX_VALUE; for (ClassificationResult<BytesRef> c : assignedClasses) { if (c.getScore() > maxscore) { assignedClass = c; maxscore = c.getScore(); } } return assignedClass; }
From source file:SimpleNaiveBayesClassifier.java
License:Apache License
/** * Normalize the classification results based on the max score available * @param assignedClasses the list of assigned classes * @return the normalized results//from w w w .j a va2 s . co m */ protected ArrayList<ClassificationResult<BytesRef>> normClassificationResults( List<ClassificationResult<BytesRef>> assignedClasses) { // normalization; the values transforms to a 0-1 range ArrayList<ClassificationResult<BytesRef>> returnList = new ArrayList<>(); if (!assignedClasses.isEmpty()) { Collections.sort(assignedClasses); // this is a negative number closest to 0 = a double smax = assignedClasses.get(0).getScore(); double sumLog = 0; // log(sum(exp(x_n-a))) for (ClassificationResult<BytesRef> cr : assignedClasses) { // getScore-smax <=0 (both negative, smax is the smallest abs() sumLog += Math.exp(cr.getScore() - smax); } // loga=a+log(sum(exp(x_n-a))) = log(sum(exp(x_n))) double loga = smax; loga += Math.log(sumLog); // 1/sum*x = exp(log(x))*1/sum = exp(log(x)-log(sum)) for (ClassificationResult<BytesRef> cr : assignedClasses) { double scoreDiff = cr.getScore() - loga; returnList.add(new ClassificationResult<>(cr.getAssignedClass(), Math.exp(scoreDiff))); } } return returnList; }
From source file:KNearestNeighbourDocumentClassifier.java
License:Apache License
/** * {@inheritDoc}/*from www .jav a2s . c o m*/ */ @Override public ClassificationResult<BytesRef> assignClass(Document document) throws IOException { TopDocs knnResults = knnSearch(document); List<ClassificationResult<BytesRef>> assignedClasses = buildListFromTopDocs(knnResults); ClassificationResult<BytesRef> assignedClass = null; double maxscore = -Double.MAX_VALUE; for (ClassificationResult<BytesRef> cl : assignedClasses) { if (cl.getScore() > maxscore) { assignedClass = cl; maxscore = cl.getScore(); } } return assignedClass; }
From source file:KNearestNeighborClassifier.java
License:Apache License
/** * {@inheritDoc}//w w w . j av a 2 s .co m */ @Override public ClassificationResult<BytesRef> assignClass(String text) throws IOException { TopDocs knnResults = knnSearch(text); List<ClassificationResult<BytesRef>> assignedClasses = buildListFromTopDocs(knnResults); ClassificationResult<BytesRef> assignedClass = null; double maxscore = -Double.MAX_VALUE; for (ClassificationResult<BytesRef> cl : assignedClasses) { if (cl.getScore() > maxscore) { assignedClass = cl; maxscore = cl.getScore(); } } return assignedClass; }
From source file:KNearestNeighborClassifier.java
License:Apache License
/** * build a list of classification results from search results * @param topDocs the search results as a {@link TopDocs} object * @return a {@link List} of {@link ClassificationResult}, one for each existing class * @throws IOException if it's not possible to get the stored value of class field *///from www.j a va 2 s . c o m protected List<ClassificationResult<BytesRef>> buildListFromTopDocs(TopDocs topDocs) throws IOException { Map<BytesRef, Integer> classCounts = new HashMap<>(); Map<BytesRef, Double> classBoosts = new HashMap<>(); // this is a boost based on class ranking positions in topDocs float maxScore = topDocs.getMaxScore(); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { IndexableField storableField = indexSearcher.doc(scoreDoc.doc).getField(classFieldName); if (storableField != null) { BytesRef cl = new BytesRef(storableField.stringValue()); //update count Integer count = classCounts.get(cl); if (count != null) { classCounts.put(cl, count + 1); } else { classCounts.put(cl, 1); } //update boost, the boost is based on the best score Double totalBoost = classBoosts.get(cl); double singleBoost = scoreDoc.score / maxScore; if (totalBoost != null) { classBoosts.put(cl, totalBoost + singleBoost); } else { classBoosts.put(cl, singleBoost); } } } List<ClassificationResult<BytesRef>> returnList = new ArrayList<>(); List<ClassificationResult<BytesRef>> temporaryList = new ArrayList<>(); int sumdoc = 0; for (Map.Entry<BytesRef, Integer> entry : classCounts.entrySet()) { Integer count = entry.getValue(); Double normBoost = classBoosts.get(entry.getKey()) / count; //the boost is normalized to be 0<b<1 temporaryList.add(new ClassificationResult<>(entry.getKey().clone(), (count * normBoost) / (double) k)); sumdoc += count; } //correction if (sumdoc < k) { for (ClassificationResult<BytesRef> cr : temporaryList) { returnList.add( new ClassificationResult<>(cr.getAssignedClass(), cr.getScore() * k / (double) sumdoc)); } } else { returnList = temporaryList; } return returnList; }
From source file:SimpleNaiveBayesDocumentClassifier.java
License:Apache License
/** * {@inheritDoc}/*from w w w. ja v a 2s . c o m*/ */ @Override public ClassificationResult<BytesRef> assignClass(Document document) throws IOException { List<ClassificationResult<BytesRef>> assignedClasses = assignNormClasses(document); ClassificationResult<BytesRef> assignedClass = null; double maxscore = -Double.MAX_VALUE; for (ClassificationResult<BytesRef> c : assignedClasses) { if (c.getScore() > maxscore) { assignedClass = c; maxscore = c.getScore(); } } return assignedClass; }
From source file:com.github.tteofili.looseen.MinHashClassifier.java
License:Apache License
List<ClassificationResult<BytesRef>> buildListFromTopDocs(IndexSearcher searcher, String categoryFieldName,
TopDocs topDocs, int k) throws IOException {
Map<BytesRef, Integer> classCounts = new HashMap<>();
Map<BytesRef, Double> classBoosts = new HashMap<>(); // this is a boost based on class ranking positions in topDocs
float maxScore = topDocs.getMaxScore();
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
IndexableField storableField = searcher.doc(scoreDoc.doc).getField(categoryFieldName);
if (storableField != null) {
BytesRef cl = new BytesRef(storableField.stringValue());
//update count
Integer count = classCounts.get(cl);
if (count != null) {
classCounts.put(cl, count + 1);
} else {
classCounts.put(cl, 1);//w ww.ja va 2 s.c o m
}
//update boost, the boost is based on the best score
Double totalBoost = classBoosts.get(cl);
double singleBoost = scoreDoc.score / maxScore;
if (totalBoost != null) {
classBoosts.put(cl, totalBoost + singleBoost);
} else {
classBoosts.put(cl, singleBoost);
}
}
}
List<ClassificationResult<BytesRef>> returnList = new ArrayList<>();
List<ClassificationResult<BytesRef>> temporaryList = new ArrayList<>();
int sumdoc = 0;
for (Map.Entry<BytesRef, Integer> entry : classCounts.entrySet()) {
Integer count = entry.getValue();
Double normBoost = classBoosts.get(entry.getKey()) / count; //the boost is normalized to be 0<b<1
temporaryList.add(new ClassificationResult<>(entry.getKey().clone(), (count * normBoost) / (double) k));
sumdoc += count;
}
//correction
if (sumdoc < k) {
for (ClassificationResult<BytesRef> cr : temporaryList) {
returnList.add(
new ClassificationResult<>(cr.getAssignedClass(), cr.getScore() * k / (double) sumdoc));
}
} else {
returnList = temporaryList;
}
return returnList;
}
From source file:com.github.tteofili.looseen.QueryingClassifier.java
License:Apache License
@Override public ClassificationResult<BytesRef> assignClass(String text) throws IOException { ClassificationResult<BytesRef> result = null; for (Map.Entry<String, Query> entry : queriesPerClass.entrySet()) { TopDocs search = indexSearcher.search(entry.getValue(), 1); float score; if (useCounts) { score = search.totalHits;/* www .j ava 2 s . co m*/ } else { score = search.getMaxScore(); } if (result == null) { result = new ClassificationResult<>(new BytesRef(entry.getKey()), score); } else if (score > result.getScore()) { result = new ClassificationResult<>(new BytesRef(entry.getKey()), score); } } return result; }
From source file:de.uni_koeln.spinfo.textengineering.tm.classification.lucene.LuceneAdapter.java
License:Open Source License
@SuppressWarnings("unused") private void printAssignments(Document document, ClassificationResult<BytesRef> c) { System.out.println("doc: " + document.getSource()); System.out.println("class: " + c.getAssignedClass().utf8ToString()); System.out.println("score: " + c.getScore()); System.out.println("---------"); }
From source file:org.solr.classtify.SimpleNaiveBayesClassifierTest.java
License:Apache License
@Test public void classtify() throws IOException { SimpleNaiveBayesClassifier classifier = new SimpleNaiveBayesClassifier(); IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(dir))); AtomicReader wrap = SlowCompositeReaderWrapper.wrap(reader); classifier.train(wrap, textFieldName, categoryFieldName, analyzer); ClassificationResult<BytesRef> assignClass = classifier.assignClass(newText); BytesRef assignedClass = assignClass.getAssignedClass(); double score = assignClass.getScore(); System.out.println(assignedClass.utf8ToString() + "," + score); }