Example usage for org.apache.lucene.index Term Term

List of usage examples for org.apache.lucene.index Term Term

Introduction

In this page you can find the example usage for org.apache.lucene.index Term Term.

Prototype

public Term(String fld, String text) 

Source Link

Document

Constructs a Term with the given field and text.

Usage

From source file:SimpleNaiveBayesClassifier.java

License:Apache License

/**
 * Calculate probabilities for all classes for a given input text
 * @param inputDocument the input text as a {@code String}
 * @return a {@code List} of {@code ClassificationResult}, one for each existing class
 * @throws IOException if assigning probabilities fails
 *///from  w  ww  .j  ava2 s .  c  om
protected List<ClassificationResult<BytesRef>> assignClassNormalizedList(String inputDocument)
        throws IOException {
    List<ClassificationResult<BytesRef>> assignedClasses = new ArrayList<>();

    Terms classes = MultiFields.getTerms(leafReader, classFieldName);
    TermsEnum classesEnum = classes.iterator();
    BytesRef next;
    String[] tokenizedText = tokenize(inputDocument);
    int docsWithClassSize = countDocsWithClass();
    while ((next = classesEnum.next()) != null) {
        if (next.length > 0) {
            Term term = new Term(this.classFieldName, next);
            double clVal = calculateLogPrior(term, docsWithClassSize)
                    + calculateLogLikelihood(tokenizedText, term, docsWithClassSize);
            assignedClasses.add(new ClassificationResult<>(term.bytes(), clVal));
        }
    }

    // normalization; the values transforms to a 0-1 range
    return normClassificationResults(assignedClasses);
}

From source file:SimpleNaiveBayesClassifier.java

License:Apache License

/**
 * count the number of documents in the index having at least a value for the 'class' field
 *
 * @return the no. of documents having a value for the 'class' field
 * @throws IOException if accessing to term vectors or search fails
 *//*from   www  .  ja va  2s. co m*/
protected int countDocsWithClass() throws IOException {
    int docCount = MultiFields.getTerms(this.leafReader, this.classFieldName).getDocCount();
    if (docCount == -1) { // in case codec doesn't support getDocCount
        TotalHitCountCollector classQueryCountCollector = new TotalHitCountCollector();
        BooleanQuery.Builder q = new BooleanQuery.Builder();
        q.add(new BooleanClause(
                new WildcardQuery(new Term(classFieldName, String.valueOf(WildcardQuery.WILDCARD_STRING))),
                BooleanClause.Occur.MUST));
        if (query != null) {
            q.add(query, BooleanClause.Occur.MUST);
        }
        indexSearcher.search(q.build(), classQueryCountCollector);
        docCount = classQueryCountCollector.getTotalHits();
    }
    return docCount;
}

From source file:SimpleNaiveBayesClassifier.java

License:Apache License

/**
 * Returns the number of documents of the input class ( from the whole index or from a subset)
 * that contains the word ( in a specific field or in all the fields if no one selected)
 * @param word the token produced by the analyzer
 * @param term the term representing the class
 * @return the number of documents of the input class
 * @throws IOException if a low level I/O problem happens
 *//*w  w  w  .  j  a  v a  2s .  c o  m*/
private int getWordFreqForClass(String word, Term term) throws IOException {
    BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
    BooleanQuery.Builder subQuery = new BooleanQuery.Builder();
    for (String textFieldName : textFieldNames) {
        subQuery.add(
                new BooleanClause(new TermQuery(new Term(textFieldName, word)), BooleanClause.Occur.SHOULD));
    }
    booleanQuery.add(new BooleanClause(subQuery.build(), BooleanClause.Occur.MUST));
    booleanQuery.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.MUST));
    if (query != null) {
        booleanQuery.add(query, BooleanClause.Occur.MUST);
    }
    TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
    indexSearcher.search(booleanQuery.build(), totalHitCountCollector);
    return totalHitCountCollector.getTotalHits();
}

From source file:ContentBasedAnalysis.java

License:Apache License

private static int findDocId(IndexSearcher searcher, String filename) throws Exception {
    Term t = new Term("path", filename);
    Query q = new TermQuery(t);
    TopDocs td = searcher.search(q, 2);/*from  www.j  a v  a2s.c  om*/
    if (td.totalHits < 1)
        return -1;
    else
        return td.scoreDocs[0].doc;
}

From source file:ContentBasedAnalysis.java

License:Apache License

private static int docFreq(IndexReader reader, String s) throws Exception {
    return reader.docFreq(new Term("contents", s));
}

From source file:SearcherTest.java

/**
 * ?/*from  ww  w  .  j  a v a2s .c o m*/
 * ???TermQuery
 * TermQuery??QueryTermQuery???????
 * ??????TermQuery??
 * Lucene??????????/
 * ??????????
 *
 * @throws Exception
 */
@Test
public void testTermQuery() throws Exception {
    String searchField = "contents";
    String q = "xxxxxxxxx$";
    Term t = new Term(searchField, q);
    Query query = new TermQuery(t);
    TopDocs hits = is.search(query, 10);
    System.out.println("? '" + q + "'" + hits.totalHits + "");
    for (ScoreDoc scoreDoc : hits.scoreDocs) {
        Document doc = is.doc(scoreDoc.doc);
        System.out.println(doc.get("fullPath"));
    }
}

From source file:SearcherTest.java

/**
 * ???BooleanQuery/*from   ww  w  . j av a  2  s  . co  m*/
 * BooleanQuery???Query
 * ?Query???Query
 * ?BooleanQuery??
 * BooleanQuery?????API??
 * ?BooleanQuery????API?
 *
 * @throws Exception
 */
@Test
public void testBooleanQuery() throws Exception {
    String searchField = "contents";
    String q1 = "xxxxxxxxx";
    String q2 = "oooooooooooooooo";
    Query query1 = new TermQuery(new Term(searchField, q1));
    Query query2 = new TermQuery(new Term(searchField, q2));
    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    //  1MUSTMUST???
    //  2MUSTMUST_NOT??MUST_NOT??
    // 3SHOULDMUST_NOT?MUSTMUST_NOT
    // 4SHOULDMUSTMUST??,SHOULD???
    // 5SHOULDSHOULD???
    // 6MUST_NOTMUST_NOT?
    builder.add(query1, BooleanClause.Occur.MUST);
    builder.add(query2, BooleanClause.Occur.MUST);
    BooleanQuery booleanQuery = builder.build();
    TopDocs hits = is.search(booleanQuery, 10);
    System.out.println("? " + q1 + "And" + q2 + "" + hits.totalHits + "");
    for (ScoreDoc scoreDoc : hits.scoreDocs) {
        Document doc = is.doc(scoreDoc.doc);
        System.out.println(doc.get("fullPath"));
    }
}

From source file:SearcherTest.java

/**
 * PrefixQuery  PrefixQuery?xxx%//from w  w w.j ava 2 s. c  o  m
 *
 * @throws Exception
 */
@Test
public void testPrefixQuery() throws Exception {
    String searchField = "contents";
    String q = "1license";
    Term t = new Term(searchField, q);
    Query query = new PrefixQuery(t);
    TopDocs hits = is.search(query, 10);
    System.out.println("? '" + q + "'" + hits.totalHits + "");

    for (ScoreDoc scoreDoc : hits.scoreDocs) {
        Document doc = is.doc(scoreDoc.doc);
        System.out.println(doc.get("fullPath"));
    }
}

From source file:SearcherTest.java

/**
 * PhraseQuery?big car?/*www  .j a v  a2s .  com*/
 * ?document?"big car"
 * document???????big black car?
 * ????slop
 * slopslop???
 *
 * @throws Exception
 */
@Test
public void testPhraseQuery() throws Exception {
    String searchField = "contents";
    String q1 = "xxxx";
    String q2 = "bbb";
    Term t1 = new Term(searchField, q1);
    Term t2 = new Term(searchField, q2);
    PhraseQuery.Builder builder = new PhraseQuery.Builder();
    builder.add(t1);
    builder.add(t2);
    builder.setSlop(0);
    PhraseQuery query = builder.build();
    TopDocs hits = is.search(query, 10);
    System.out.println("? '" + q1 + q2 + "" + "" + hits.totalHits
            + "");

    for (ScoreDoc scoreDoc : hits.scoreDocs) {
        Document doc = is.doc(scoreDoc.doc);
        System.out.println(doc.get("fullPath"));
    }
}

From source file:SearcherTest.java

/**
 * ??FuzzyQuery/*from  ww  w.  ja v  a  2  s  .co m*/
 * FuzzyQuery????
 *
 * @throws Exception
 */
@Test
public void testFuzzyQuery() throws Exception {
    String searchField = "contents";
    String q = "ljlxx";
    Term t = new Term(searchField, q);
    Query query = new FuzzyQuery(t);
    TopDocs hits = is.search(query, 10);
    System.out.println("? '" + q + "'" + hits.totalHits + "");

    for (ScoreDoc scoreDoc : hits.scoreDocs) {
        Document doc = is.doc(scoreDoc.doc);
        System.out.println(doc.get("fullPath"));
    }
}