Example usage for org.apache.lucene.search BooleanQuery.Builder BooleanQuery.Builder

List of usage examples for org.apache.lucene.search BooleanQuery.Builder BooleanQuery.Builder

Introduction

In this page you can find the example usage for org.apache.lucene.search BooleanQuery.Builder BooleanQuery.Builder.

Prototype

BooleanQuery.Builder

Source Link

Usage

From source file:SimpleNaiveBayesClassifier.java

License:Apache License

/**
 * count the number of documents in the index having at least a value for the 'class' field
 *
 * @return the no. of documents having a value for the 'class' field
 * @throws IOException if accessing to term vectors or search fails
 *//*ww  w .  j a  va 2 s  . c o  m*/
protected int countDocsWithClass() throws IOException {
    int docCount = MultiFields.getTerms(this.leafReader, this.classFieldName).getDocCount();
    if (docCount == -1) { // in case codec doesn't support getDocCount
        TotalHitCountCollector classQueryCountCollector = new TotalHitCountCollector();
        BooleanQuery.Builder q = new BooleanQuery.Builder();
        q.add(new BooleanClause(
                new WildcardQuery(new Term(classFieldName, String.valueOf(WildcardQuery.WILDCARD_STRING))),
                BooleanClause.Occur.MUST));
        if (query != null) {
            q.add(query, BooleanClause.Occur.MUST);
        }
        indexSearcher.search(q.build(), classQueryCountCollector);
        docCount = classQueryCountCollector.getTotalHits();
    }
    return docCount;
}

From source file:SimpleNaiveBayesClassifier.java

License:Apache License

/**
 * Returns the number of documents of the input class ( from the whole index or from a subset)
 * that contains the word ( in a specific field or in all the fields if no one selected)
 * @param word the token produced by the analyzer
 * @param term the term representing the class
 * @return the number of documents of the input class
 * @throws IOException if a low level I/O problem happens
 *///from  w  w w  .  j a v  a  2 s.  com
private int getWordFreqForClass(String word, Term term) throws IOException {
    BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
    BooleanQuery.Builder subQuery = new BooleanQuery.Builder();
    for (String textFieldName : textFieldNames) {
        subQuery.add(
                new BooleanClause(new TermQuery(new Term(textFieldName, word)), BooleanClause.Occur.SHOULD));
    }
    booleanQuery.add(new BooleanClause(subQuery.build(), BooleanClause.Occur.MUST));
    booleanQuery.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.MUST));
    if (query != null) {
        booleanQuery.add(query, BooleanClause.Occur.MUST);
    }
    TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
    indexSearcher.search(booleanQuery.build(), totalHitCountCollector);
    return totalHitCountCollector.getTotalHits();
}

From source file:SearcherTest.java

/**
 * ???BooleanQuery// w ww. j a  v a2  s .c  om
 * BooleanQuery???Query
 * ?Query???Query
 * ?BooleanQuery??
 * BooleanQuery?????API??
 * ?BooleanQuery????API?
 *
 * @throws Exception
 */
@Test
public void testBooleanQuery() throws Exception {
    String searchField = "contents";
    String q1 = "xxxxxxxxx";
    String q2 = "oooooooooooooooo";
    Query query1 = new TermQuery(new Term(searchField, q1));
    Query query2 = new TermQuery(new Term(searchField, q2));
    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    //  1MUSTMUST???
    //  2MUSTMUST_NOT??MUST_NOT??
    // 3SHOULDMUST_NOT?MUSTMUST_NOT
    // 4SHOULDMUSTMUST??,SHOULD???
    // 5SHOULDSHOULD???
    // 6MUST_NOTMUST_NOT?
    builder.add(query1, BooleanClause.Occur.MUST);
    builder.add(query2, BooleanClause.Occur.MUST);
    BooleanQuery booleanQuery = builder.build();
    TopDocs hits = is.search(booleanQuery, 10);
    System.out.println("? " + q1 + "And" + q2 + "" + hits.totalHits + "");
    for (ScoreDoc scoreDoc : hits.scoreDocs) {
        Document doc = is.doc(scoreDoc.doc);
        System.out.println(doc.get("fullPath"));
    }
}

From source file:KNearestNeighbourDocumentClassifier.java

License:Apache License

/**
 * Returns the top k results from a More Like This query based on the input document
 *
 * @param document the document to use for More Like This search
 * @return the top results for the MLT query
 * @throws IOException If there is a low-level I/O error
 *///from   w  ww .  ja  v  a 2  s  .  co  m
private TopDocs knnSearch(Document document) throws IOException {
    BooleanQuery.Builder mltQuery = new BooleanQuery.Builder();

    for (String fieldName : textFieldNames) {
        String boost = null;
        if (fieldName.contains("^")) {
            String[] field2boost = fieldName.split("\\^");
            fieldName = field2boost[0];
            boost = field2boost[1];
        }
        String[] fieldValues = document.getValues(fieldName);
        if (boost != null) {
            mlt.setBoost(true);
            mlt.setBoostFactor(Float.parseFloat(boost));
        }
        mlt.setAnalyzer(field2analyzer.get(fieldName));
        for (String fieldContent : fieldValues) {
            mltQuery.add(new BooleanClause(mlt.like(fieldName, new StringReader(fieldContent)),
                    BooleanClause.Occur.SHOULD));
        }
        mlt.setBoost(false);
    }
    Query classFieldQuery = new WildcardQuery(new Term(classFieldName, "*"));
    mltQuery.add(new BooleanClause(classFieldQuery, BooleanClause.Occur.MUST));
    if (query != null) {
        mltQuery.add(query, BooleanClause.Occur.MUST);
    }
    return indexSearcher.search(mltQuery.build(), k);
}

From source file:KNearestNeighborClassifier.java

License:Apache License

private TopDocs knnSearch(String text) throws IOException {
    BooleanQuery.Builder mltQuery = new BooleanQuery.Builder();
    for (String fieldName : textFieldNames) {
        String boost = null;//from  w w w  .  j  a  v  a 2  s.  co  m
        mlt.setBoost(true); //terms boost actually helps in MLT queries
        if (fieldName.contains("^")) {
            String[] field2boost = fieldName.split("\\^");
            fieldName = field2boost[0];
            boost = field2boost[1];
        }
        if (boost != null) {
            mlt.setBoostFactor(Float.parseFloat(boost));//if we have a field boost, we add it
        }
        mltQuery.add(
                new BooleanClause(mlt.like(fieldName, new StringReader(text)), BooleanClause.Occur.SHOULD));
        mlt.setBoostFactor(1);// restore neutral boost for next field
    }
    Query classFieldQuery = new WildcardQuery(new Term(classFieldName, "*"));
    mltQuery.add(new BooleanClause(classFieldQuery, BooleanClause.Occur.MUST));
    if (query != null) {
        mltQuery.add(query, BooleanClause.Occur.MUST);
    }
    return indexSearcher.search(mltQuery.build(), k);
}

From source file:SimpleNaiveBayesDocumentClassifier.java

License:Apache License

/**
 * Returns the number of documents of the input class ( from the whole index or from a subset)
 * that contains the word ( in a specific field or in all the fields if no one selected)
 *
 * @param word      the token produced by the analyzer
 * @param fieldName the field the word is coming from
 * @param term      the class term/*from w w w.  j a v a  2 s .  co m*/
 * @return number of documents of the input class
 * @throws java.io.IOException If there is a low-level I/O error
 */
private int getWordFreqForClass(String word, String fieldName, Term term) throws IOException {
    BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
    BooleanQuery.Builder subQuery = new BooleanQuery.Builder();
    subQuery.add(new BooleanClause(new TermQuery(new Term(fieldName, word)), BooleanClause.Occur.SHOULD));
    booleanQuery.add(new BooleanClause(subQuery.build(), BooleanClause.Occur.MUST));
    booleanQuery.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.MUST));
    if (query != null) {
        booleanQuery.add(query, BooleanClause.Occur.MUST);
    }
    TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
    indexSearcher.search(booleanQuery.build(), totalHitCountCollector);
    return totalHitCountCollector.getTotalHits();
}

From source file:luceneInterface.java

License:Apache License

public static List<Document> query(String index, String stoppath, String question, int numResult, String sim)
        throws Exception {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index)));
    IndexSearcher searcher = new IndexSearcher(reader);

    Analyzer analyzer = new EnglishAnalyzer(StopFilter.makeStopSet(mygetStopwords(stoppath)));

    if (sim.equals("TFIDF"))
        searcher.setSimilarity(new ClassicSimilarity());
    else if (sim.equals("BM25"))
        searcher.setSimilarity(new BM25Similarity());
    else/*from   w ww.j  a v a  2s.c o  m*/
        searcher.setSimilarity(new BM25Similarity());

    String field = "contents";
    QueryParser parser = new QueryParser(field, analyzer);
    Query query = parser.parse(parser.escape(question));

    BooleanQuery.Builder bqb = new BooleanQuery.Builder();
    bqb.add(new TermQuery(new Term("contents", parser.escape(question))), BooleanClause.Occur.SHOULD);
    bqb.add(new TermQuery(new Term("sec", parser.escape(question))), BooleanClause.Occur.SHOULD);

    //        Term term = new Term(field, question);
    //        Query query = new TermQuery(term);

    //        TopDocs results = searcher.search(query, numResult);
    TopDocs results = searcher.search(parser.parse(bqb.build().toString()), numResult);

    ScoreDoc[] hits = results.scoreDocs;
    List<Document> docs = new ArrayList<Document>();

    int numTotalHits = results.totalHits;
    //        System.out.println(numTotalHits + " total matching documents");

    int end = Math.min(numTotalHits, numResult);

    String searchResult = "";
    //        System.out.println("Only results 1 - " + hits.length);

    for (int i = 0; i < end; i++) {
        Document doc = searcher.doc(hits[i].doc);
        docs.add(doc);
    }

    return docs;
}

From source file:at.ac.univie.mminf.luceneSKOS.test.termexpansion.AbstractTermExpansionTest.java

License:Apache License

/**
 * This test indexes a sample metadata record (=lucene document) having a
 * "title", "description", and "subject" field, which contains plain subject
 * terms.//  w  ww.  ja  v  a 2 s.c  om
 * <p/>
 * A search for "arms" doesn't return that record because the term "arms" is
 * not explicitly contained in the record (document).
 *
 * @throws IOException
 * @throws LockObtainFailedException
 * @throws CorruptIndexException
 */
@Test
public void noExpansion() throws IOException {

    /* defining the document to be indexed */
    Document doc = new Document();
    doc.add(new Field("title", "Spearhead", TextField.TYPE_STORED));
    doc.add(new Field("description",
            "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..."
                    + "The spear was mainly a thrusting weapon, but could also be thrown. "
                    + "It was the principal weapon of the auxiliary soldier... "
                    + "(second - fourth century, Arbeia Roman Fort).",
            TextField.TYPE_NOT_STORED));
    doc.add(new Field("subject", "weapons", TextField.TYPE_NOT_STORED));

    /* setting up a writer with a default (simple) analyzer */
    writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(new SimpleAnalyzer()));

    /* adding the document to the index */
    writer.addDocument(doc);

    /* defining a query that searches over all fields */
    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    builder.add(new TermQuery(new Term("title", "arms")), BooleanClause.Occur.SHOULD)
            .add(new TermQuery(new Term("description", "arms")), BooleanClause.Occur.SHOULD)
            .add(new TermQuery(new Term("subject", "arms")), BooleanClause.Occur.SHOULD);

    /* creating a new searcher */
    searcher = new IndexSearcher(DirectoryReader.open(writer, false));

    TopDocs results = searcher.search(builder.build(), 10);

    /* no results are returned since there is no term match */
    assertEquals(0, results.totalHits);
}

From source file:at.ac.univie.mminf.luceneSKOS.test.termexpansion.LabelbasedTermExpansionTest.java

License:Apache License

/**
 * This test indexes a sample metadata record (=lucene document) having a
 * "title", "description", and "subject" field.
 * <p/>/*ww w  . j av a  2 s . c  o m*/
 * A search for "arms" returns that record as a result because "arms" is
 * defined as an alternative label for "weapons", the term which is
 * contained in the subject field.
 *
 * @throws IOException
 */
@Test
public void labelBasedTermExpansion() throws IOException {

    /* defining the document to be indexed */
    Document doc = new Document();
    doc.add(new Field("title", "Spearhead", TextField.TYPE_STORED));
    doc.add(new Field("description",
            "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..."
                    + "The spear was mainly a thrusting weapon, but could also be thrown. "
                    + "It was the principal weapon of the auxiliary soldier... "
                    + "(second - fourth century, Arbeia Roman Fort).",
            TextField.TYPE_NOT_STORED));
    doc.add(new Field("subject", "weapons", TextField.TYPE_NOT_STORED));

    /* setting up the SKOS analyzer */
    String skosFile = "src/test/resources/skos_samples/ukat_examples.n3";
    String indexPath = "build/";

    /* ExpansionType.URI->the field to be analyzed (expanded) contains URIs */
    Analyzer skosAnalyzer = new SKOSAnalyzer(indexPath, skosFile, ExpansionType.LABEL);

    /* Define different analyzers for different fields */
    Map<String, Analyzer> analyzerPerField = new HashMap<>();
    analyzerPerField.put("subject", skosAnalyzer);
    PerFieldAnalyzerWrapper indexAnalyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(), analyzerPerField);

    /* setting up a writer with a default (simple) analyzer */
    writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(indexAnalyzer));

    /* adding the document to the index */
    writer.addDocument(doc);

    /* defining a query that searches over all fields */
    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    builder.add(new TermQuery(new Term("title", "arms")), BooleanClause.Occur.SHOULD)
            .add(new TermQuery(new Term("description", "arms")), BooleanClause.Occur.SHOULD)
            .add(new TermQuery(new Term("subject", "arms")), BooleanClause.Occur.SHOULD);

    /* creating a new searcher */
    searcher = new IndexSearcher(DirectoryReader.open(writer, false));

    TopDocs results = searcher.search(builder.build(), 10);

    /* the document matches because "arms" is among the expanded terms */
    assertEquals(1, results.totalHits);

    /* defining a query that searches for a broader concept */
    Query query = new TermQuery(new Term("subject", "military equipment"));

    results = searcher.search(query, 10);

    /* ... also returns the document as result */
    assertEquals(1, results.totalHits);
}

From source file:at.ac.univie.mminf.luceneSKOS.test.termexpansion.URIbasedTermExpansionTest.java

License:Apache License

/**
 * This test indexes a sample metadata record (=lucene document) having a
 * "title", "description", and "subject" field, which is semantically
 * enriched by a URI pointing to a SKOS concept "weapons".
 * <p/>/*from   w ww  .  j a v  a2  s .co m*/
 * A search for "arms" returns that record as a result because "arms" is
 * defined as an alternative label (altLabel) for the concept "weapons".
 *
 * @throws IOException
 */
@Test
public void uriBasedTermExpansion() throws IOException {

    /* defining the document to be indexed */
    Document doc = new Document();
    doc.add(new Field("title", "Spearhead", TextField.TYPE_STORED));
    doc.add(new Field("description",
            "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..."
                    + "The spear was mainly a thrusting weapon, but could also be thrown. "
                    + "It was the principal weapon of the auxiliary soldier... "
                    + "(second - fourth century, Arbeia Roman Fort).",
            TextField.TYPE_NOT_STORED));
    doc.add(new Field("subject", "http://www.ukat.org.uk/thesaurus/concept/859", TextField.TYPE_NOT_STORED));

    /* setting up the SKOS analyzer */
    String skosFile = "src/test/resources/skos_samples/ukat_examples.n3";
    String indexPath = "build/";

    /* ExpansionType.URI->the field to be analyzed (expanded) contains URIs */
    Analyzer skosAnalyzer = new SKOSAnalyzer(indexPath, skosFile, ExpansionType.URI);

    /* Define different analyzers for different fields */
    Map<String, Analyzer> analyzerPerField = new HashMap<>();
    analyzerPerField.put("subject", skosAnalyzer);
    PerFieldAnalyzerWrapper indexAnalyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(), analyzerPerField);

    /* setting up a writer with a default (simple) analyzer */
    writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(indexAnalyzer));

    /* adding the document to the index */
    writer.addDocument(doc);

    /* defining a query that searches over all fields */
    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    builder.add(new TermQuery(new Term("title", "arms")), BooleanClause.Occur.SHOULD)
            .add(new TermQuery(new Term("description", "arms")), BooleanClause.Occur.SHOULD)
            .add(new TermQuery(new Term("subject", "arms")), BooleanClause.Occur.SHOULD);

    /* creating a new searcher */
    searcher = new IndexSearcher(DirectoryReader.open(writer, false));

    TopDocs results = searcher.search(builder.build(), 10);

    /* the document matches because "arms" is among the expanded terms */
    assertEquals(1, results.totalHits);

    /* defining a query that searches for a broader concept */
    Query query = new TermQuery(new Term("subject", "military equipment"));

    results = searcher.search(query, 10);

    /* ... also returns the document as result */
    assertEquals(1, results.totalHits);

}