List of usage examples for org.apache.lucene.index Term Term
public Term(String fld, String text)
From source file:SimpleNaiveBayesClassifier.java
License:Apache License
/** * Calculate probabilities for all classes for a given input text * @param inputDocument the input text as a {@code String} * @return a {@code List} of {@code ClassificationResult}, one for each existing class * @throws IOException if assigning probabilities fails *///from w ww .j ava2 s . c om protected List<ClassificationResult<BytesRef>> assignClassNormalizedList(String inputDocument) throws IOException { List<ClassificationResult<BytesRef>> assignedClasses = new ArrayList<>(); Terms classes = MultiFields.getTerms(leafReader, classFieldName); TermsEnum classesEnum = classes.iterator(); BytesRef next; String[] tokenizedText = tokenize(inputDocument); int docsWithClassSize = countDocsWithClass(); while ((next = classesEnum.next()) != null) { if (next.length > 0) { Term term = new Term(this.classFieldName, next); double clVal = calculateLogPrior(term, docsWithClassSize) + calculateLogLikelihood(tokenizedText, term, docsWithClassSize); assignedClasses.add(new ClassificationResult<>(term.bytes(), clVal)); } } // normalization; the values transforms to a 0-1 range return normClassificationResults(assignedClasses); }
From source file:SimpleNaiveBayesClassifier.java
License:Apache License
/** * count the number of documents in the index having at least a value for the 'class' field * * @return the no. of documents having a value for the 'class' field * @throws IOException if accessing to term vectors or search fails *//*from www . ja va 2s. co m*/ protected int countDocsWithClass() throws IOException { int docCount = MultiFields.getTerms(this.leafReader, this.classFieldName).getDocCount(); if (docCount == -1) { // in case codec doesn't support getDocCount TotalHitCountCollector classQueryCountCollector = new TotalHitCountCollector(); BooleanQuery.Builder q = new BooleanQuery.Builder(); q.add(new BooleanClause( new WildcardQuery(new Term(classFieldName, String.valueOf(WildcardQuery.WILDCARD_STRING))), BooleanClause.Occur.MUST)); if (query != null) { q.add(query, BooleanClause.Occur.MUST); } indexSearcher.search(q.build(), classQueryCountCollector); docCount = classQueryCountCollector.getTotalHits(); } return docCount; }
From source file:SimpleNaiveBayesClassifier.java
License:Apache License
/** * Returns the number of documents of the input class ( from the whole index or from a subset) * that contains the word ( in a specific field or in all the fields if no one selected) * @param word the token produced by the analyzer * @param term the term representing the class * @return the number of documents of the input class * @throws IOException if a low level I/O problem happens *//*w w w . j a v a 2s . c o m*/ private int getWordFreqForClass(String word, Term term) throws IOException { BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder(); BooleanQuery.Builder subQuery = new BooleanQuery.Builder(); for (String textFieldName : textFieldNames) { subQuery.add( new BooleanClause(new TermQuery(new Term(textFieldName, word)), BooleanClause.Occur.SHOULD)); } booleanQuery.add(new BooleanClause(subQuery.build(), BooleanClause.Occur.MUST)); booleanQuery.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.MUST)); if (query != null) { booleanQuery.add(query, BooleanClause.Occur.MUST); } TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector(); indexSearcher.search(booleanQuery.build(), totalHitCountCollector); return totalHitCountCollector.getTotalHits(); }
From source file:ContentBasedAnalysis.java
License:Apache License
private static int findDocId(IndexSearcher searcher, String filename) throws Exception { Term t = new Term("path", filename); Query q = new TermQuery(t); TopDocs td = searcher.search(q, 2);/*from www.j a v a2s.c om*/ if (td.totalHits < 1) return -1; else return td.scoreDocs[0].doc; }
From source file:ContentBasedAnalysis.java
License:Apache License
private static int docFreq(IndexReader reader, String s) throws Exception { return reader.docFreq(new Term("contents", s)); }
From source file:SearcherTest.java
/** * ?/*from ww w . j a v a2s .c o m*/ * ???TermQuery * TermQuery??QueryTermQuery??????? * ??????TermQuery?? * Lucene??????????/ * ?????????? * * @throws Exception */ @Test public void testTermQuery() throws Exception { String searchField = "contents"; String q = "xxxxxxxxx$"; Term t = new Term(searchField, q); Query query = new TermQuery(t); TopDocs hits = is.search(query, 10); System.out.println("? '" + q + "'" + hits.totalHits + ""); for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = is.doc(scoreDoc.doc); System.out.println(doc.get("fullPath")); } }
From source file:SearcherTest.java
/** * ???BooleanQuery/*from ww w . j av a 2 s . co m*/ * BooleanQuery???Query * ?Query???Query * ?BooleanQuery?? * BooleanQuery?????API?? * ?BooleanQuery????API? * * @throws Exception */ @Test public void testBooleanQuery() throws Exception { String searchField = "contents"; String q1 = "xxxxxxxxx"; String q2 = "oooooooooooooooo"; Query query1 = new TermQuery(new Term(searchField, q1)); Query query2 = new TermQuery(new Term(searchField, q2)); BooleanQuery.Builder builder = new BooleanQuery.Builder(); // 1MUSTMUST??? // 2MUSTMUST_NOT??MUST_NOT?? // 3SHOULDMUST_NOT?MUSTMUST_NOT // 4SHOULDMUSTMUST??,SHOULD??? // 5SHOULDSHOULD??? // 6MUST_NOTMUST_NOT? builder.add(query1, BooleanClause.Occur.MUST); builder.add(query2, BooleanClause.Occur.MUST); BooleanQuery booleanQuery = builder.build(); TopDocs hits = is.search(booleanQuery, 10); System.out.println("? " + q1 + "And" + q2 + "" + hits.totalHits + ""); for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = is.doc(scoreDoc.doc); System.out.println(doc.get("fullPath")); } }
From source file:SearcherTest.java
/** * PrefixQuery PrefixQuery?xxx%//from w w w.j ava 2 s. c o m * * @throws Exception */ @Test public void testPrefixQuery() throws Exception { String searchField = "contents"; String q = "1license"; Term t = new Term(searchField, q); Query query = new PrefixQuery(t); TopDocs hits = is.search(query, 10); System.out.println("? '" + q + "'" + hits.totalHits + ""); for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = is.doc(scoreDoc.doc); System.out.println(doc.get("fullPath")); } }
From source file:SearcherTest.java
/** * PhraseQuery?big car?/*www .j a v a2s . com*/ * ?document?"big car" * document???????big black car? * ????slop * slopslop??? * * @throws Exception */ @Test public void testPhraseQuery() throws Exception { String searchField = "contents"; String q1 = "xxxx"; String q2 = "bbb"; Term t1 = new Term(searchField, q1); Term t2 = new Term(searchField, q2); PhraseQuery.Builder builder = new PhraseQuery.Builder(); builder.add(t1); builder.add(t2); builder.setSlop(0); PhraseQuery query = builder.build(); TopDocs hits = is.search(query, 10); System.out.println("? '" + q1 + q2 + "" + "" + hits.totalHits + ""); for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = is.doc(scoreDoc.doc); System.out.println(doc.get("fullPath")); } }
From source file:SearcherTest.java
/** * ??FuzzyQuery/*from ww w. ja v a 2 s .co m*/ * FuzzyQuery???? * * @throws Exception */ @Test public void testFuzzyQuery() throws Exception { String searchField = "contents"; String q = "ljlxx"; Term t = new Term(searchField, q); Query query = new FuzzyQuery(t); TopDocs hits = is.search(query, 10); System.out.println("? '" + q + "'" + hits.totalHits + ""); for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = is.doc(scoreDoc.doc); System.out.println(doc.get("fullPath")); } }