Example usage for org.apache.lucene.search BooleanQuery BooleanQuery

Introduction

In this page you can find the example usage for org.apache.lucene.search BooleanQuery BooleanQuery.

Prototype

BooleanQuery

Source Link

Usage

From source file:com.svenjacobs.lugaene.GaeDirectoryTest.java

License:Apache License

@Test
public void wholeCycle() throws Exception {

    // Index/*from w w w .j ava 2  s  .c  o m*/

    final Directory directory = new GaeDirectory("Test");
    final Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);

    final IndexWriterConfig config = GaeIndexWriterConfigHelper.create(Version.LUCENE_44, analyzer);
    final IndexWriter indexWriter = new IndexWriter(directory, config);

    final Document doc1 = new Document();

    doc1.add(new StringField(FIELD_TITLE, "Title1", Field.Store.YES));
    doc1.add(new TextField(FIELD_CONTENTS, "keyword1 keyword2 lorem ipsum", Field.Store.NO));

    indexWriter.addDocument(doc1);

    final Document doc2 = new Document();

    doc2.add(new StringField(FIELD_TITLE, "Title2", Field.Store.YES));
    doc2.add(new TextField(FIELD_CONTENTS, "keyword3 keyword4 lorem ipsum", Field.Store.NO));

    indexWriter.addDocument(doc2);

    indexWriter.close();

    // Search

    final DirectoryReader reader = DirectoryReader.open(directory);
    final IndexSearcher searcher = new IndexSearcher(reader);

    final BooleanQuery query = new BooleanQuery();

    query.add(new TermQuery(new Term(FIELD_TITLE, "Title1")), BooleanClause.Occur.MUST);
    query.add(new TermQuery(new Term(FIELD_CONTENTS, "lorem")), BooleanClause.Occur.MUST);

    ScoreDoc[] hits = searcher.search(query, 100).scoreDocs;

    assertThat(hits.length, is(1));
    assertThat(searcher.doc(hits[0].doc).get(FIELD_TITLE), is("Title1"));

    hits = searcher.search(new TermQuery(new Term(FIELD_CONTENTS, "lorem")), 100).scoreDocs;

    assertThat(hits.length, is(2));
    assertThat(searcher.doc(hits[0].doc).get(FIELD_TITLE), is("Title1"));
    assertThat(searcher.doc(hits[1].doc).get(FIELD_TITLE), is("Title2"));

    hits = searcher.search(new TermQuery(new Term(FIELD_CONTENTS, "keyword3")), 100).scoreDocs;

    assertThat(hits.length, is(1));
    assertThat(searcher.doc(hits[0].doc).get(FIELD_TITLE), is("Title2"));
}

From source file:com.sxc.lucene.searching.BooleanQueryTest.java

License:Apache License

public void testAnd() throws Exception {
    TermQuery searchingBooks = new TermQuery(new Term("subject", "search")); //#1

    Query books2010 = //#2
            NumericRangeQuery.newIntRange("pubmonth", 201001, //#2
                    201012, //#2
                    true, true); //#2

    BooleanQuery searchingBooks2010 = new BooleanQuery(); //#3
    searchingBooks2010.add(searchingBooks, BooleanClause.Occur.MUST); //#3
    searchingBooks2010.add(books2010, BooleanClause.Occur.MUST); //#3

    Directory dir = TestUtil.getBookIndexDirectory();
    IndexReader reader = DirectoryReader.open(dir);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs matches = searcher.search(searchingBooks2010, 10);

    assertTrue(TestUtil.hitsIncludeTitle(searcher, matches, "Lucene in Action, Second Edition"));
    reader.close();/*from   www.  j av  a2 s  . co  m*/
    dir.close();
}

From source file:com.sxc.lucene.searching.BooleanQueryTest.java

License:Apache License

public void testOr() throws Exception {
    TermQuery methodologyBooks = new TermQuery( // #1
            new Term("category", // #1
                    "/technology/computers/programming/methodology")); // #1

    TermQuery easternPhilosophyBooks = new TermQuery( // #2
            new Term("category", // #2
                    "/philosophy/eastern")); // #2

    BooleanQuery enlightenmentBooks = new BooleanQuery(); // #3
    enlightenmentBooks.add(methodologyBooks, // #3
            BooleanClause.Occur.SHOULD); // #3
    enlightenmentBooks.add(easternPhilosophyBooks, // #3
            BooleanClause.Occur.SHOULD); // #3

    Directory dir = TestUtil.getBookIndexDirectory();
    IndexReader reader = DirectoryReader.open(dir);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs matches = searcher.search(enlightenmentBooks, 10);
    System.out.println("or = " + enlightenmentBooks);

    assertTrue(TestUtil.hitsIncludeTitle(searcher, matches, "Extreme Programming Explained"));
    assertTrue(TestUtil.hitsIncludeTitle(searcher, matches, "Tao Te Ching \u9053\u5FB7\u7D93"));
    reader.close();/*from  w ww.j  a  v  a2s . c o m*/
    dir.close();
}

From source file:com.thoughtworks.studios.journey.jql.JourneyQuery.java

License:Open Source License

private Iterable<Node> query() {
    BooleanQuery luceneQuery = new BooleanQuery();
    luceneQuery.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
    for (JourneyCondition condition : conditions) {
        if (condition.matchingIndexes()) {
            Query q = condition.indexQuery(app);
            luceneQuery.add(q, BooleanClause.Occur.MUST);
        }/* www .j av  a  2s .co  m*/
    }
    Sort sorting = new Sort(new SortField(Journeys.PROP_START_AT, SortField.LONG, descOrder));
    QueryContext queryContext = new QueryContext(luceneQuery).sort(sorting);
    return app.journeys().query(queryContext);
}

From source file:com.thoughtworks.studios.journey.utils.LuceneUtils.java

License:Open Source License

public static Query negate(Query query) {
    BooleanQuery bq = new BooleanQuery();
    bq.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
    bq.add(query, BooleanClause.Occur.MUST_NOT);
    return bq;//from w w  w  .j  a v a2  s.c  om
}

From source file:com.tuplejump.stargate.lucene.BasicIndexer.java

License:Apache License

@Override
public void delete(Term... terms) {
    BooleanQuery q = new BooleanQuery();
    for (Term t : terms) {
        if (logger.isDebugEnabled())
            logger.debug(indexName + " Delete term - " + t);
        q.add(new TermQuery(t), BooleanClause.Occur.MUST);
    }/*from   ww  w .  j av a  2 s  .c  om*/
    try {
        indexWriter.deleteDocuments(q);
        searcherManager.maybeRefresh();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.tuplejump.stargate.lucene.NearRealTimeIndexer.java

License:Apache License

@Override
public void delete(Term... terms) {
    try {/*  w ww. jav a2s.c om*/
        BooleanQuery q = new BooleanQuery();
        for (Term t : terms) {
            if (logger.isDebugEnabled())
                logger.debug(indexName + " Delete term - " + t);
            q.add(new TermQuery(t), BooleanClause.Occur.MUST);
        }
        latest = indexWriter.deleteDocuments(q);
        indexSearcherReferenceManager.maybeRefresh();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.tuplejump.stargate.lucene.query.BooleanCondition.java

License:Apache License

/**
 * {@inheritDoc}//from ww w. ja va  2 s  .c  o m
 */
@Override
public Query query(Options schema) throws Exception {
    BooleanQuery luceneQuery = new BooleanQuery();
    luceneQuery.setBoost(boost);
    for (Condition query : must) {
        luceneQuery.add(query.query(schema), Occur.MUST);
    }
    for (Condition query : should) {
        luceneQuery.add(query.query(schema), Occur.SHOULD);
    }
    for (Condition query : not) {
        luceneQuery.add(query.query(schema), Occur.MUST_NOT);
    }
    return luceneQuery;
}

From source file:com.twentyn.patentSearch.DocumentSearch.java

License:Open Source License

public static void main(String[] args) throws Exception {
    System.out.println("Starting up...");
    System.out.flush();/*ww w.  j a v  a  2s .co m*/
    Options opts = new Options();
    opts.addOption(Option.builder("x").longOpt("index").hasArg().required().desc("Path to index file to read")
            .build());
    opts.addOption(Option.builder("h").longOpt("help").desc("Print this help message and exit").build());
    opts.addOption(Option.builder("v").longOpt("verbose").desc("Print verbose log output").build());

    opts.addOption(Option.builder("f").longOpt("field").hasArg().desc("The indexed field to search").build());
    opts.addOption(
            Option.builder("q").longOpt("query").hasArg().desc("The query to use when searching").build());
    opts.addOption(Option.builder("l").longOpt("list-file").hasArg()
            .desc("A file containing a list of queries to run in sequence").build());
    opts.addOption(
            Option.builder("e").longOpt("enumerate").desc("Enumerate the documents in the index").build());
    opts.addOption(Option.builder("d").longOpt("dump").hasArg()
            .desc("Dump terms in the document index for a specified field").build());
    opts.addOption(
            Option.builder("o").longOpt("output").hasArg().desc("Write results JSON to this file.").build());
    opts.addOption(Option.builder("n").longOpt("inchi-field").hasArg()
            .desc("The index of the InChI field if an input TSV is specified.").build());
    opts.addOption(Option.builder("s").longOpt("synonym-field").hasArg()
            .desc("The index of the chemical synonym field if an input TSV is specified.").build());

    HelpFormatter helpFormatter = new HelpFormatter();
    CommandLineParser cmdLineParser = new DefaultParser();
    CommandLine cmdLine = null;
    try {
        cmdLine = cmdLineParser.parse(opts, args);
    } catch (ParseException e) {
        System.out.println("Caught exception when parsing command line: " + e.getMessage());
        helpFormatter.printHelp("DocumentIndexer", opts);
        System.exit(1);
    }

    if (cmdLine.hasOption("help")) {
        helpFormatter.printHelp("DocumentIndexer", opts);
        System.exit(0);
    }

    if (!(cmdLine.hasOption("enumerate") || cmdLine.hasOption("dump") || (cmdLine.hasOption("field")
            && (cmdLine.hasOption("query") || cmdLine.hasOption("list-file"))))) {
        System.out.println("Must specify one of 'enumerate', 'dump', or 'field' + {'query', 'list-file'}");
        helpFormatter.printHelp("DocumentIndexer", opts);
        System.exit(1);
    }

    if (cmdLine.hasOption("verbose")) {
        // With help from http://stackoverflow.com/questions/23434252/programmatically-change-log-level-in-log4j2
        LoggerContext ctx = (LoggerContext) LogManager.getContext(false);
        Configuration ctxConfig = ctx.getConfiguration();
        LoggerConfig logConfig = ctxConfig.getLoggerConfig(LogManager.ROOT_LOGGER_NAME);
        logConfig.setLevel(Level.DEBUG);

        ctx.updateLoggers();
        LOGGER.debug("Verbose logging enabled");
    }

    ObjectMapper objectMapper = new ObjectMapper();
    objectMapper.enable(SerializationFeature.INDENT_OUTPUT);
    objectMapper.setVisibility(PropertyAccessor.ALL, JsonAutoDetect.Visibility.ANY);

    LOGGER.info("Opening index at " + cmdLine.getOptionValue("index"));

    try (Directory indexDir = FSDirectory.open(new File(cmdLine.getOptionValue("index")).toPath());
            IndexReader indexReader = DirectoryReader.open(indexDir);) {
        if (cmdLine.hasOption("enumerate")) {
            /* Enumerate all documents in the index.
             * With help from
             * http://stackoverflow.com/questions/2311845/is-it-possible-to-iterate-through-documents-stored-in-lucene-index
             */
            for (int i = 0; i < indexReader.maxDoc(); i++) {
                Document doc = indexReader.document(i);
                LOGGER.info("Doc " + i + ":");
                LOGGER.info(doc);
            }
        } else if (cmdLine.hasOption("dump")) {
            /* Dump indexed terms for a specific field.
             * With help from http://stackoverflow.com/questions/11148036/find-list-of-terms-indexed-by-lucene */
            Terms terms = SlowCompositeReaderWrapper.wrap(indexReader).terms(cmdLine.getOptionValue("dump"));
            LOGGER.info("Has positions: " + terms.hasPositions());
            LOGGER.info("Has offsets:   " + terms.hasOffsets());
            LOGGER.info("Has freqs:     " + terms.hasFreqs());
            LOGGER.info("Stats:         " + terms.getStats());
            LOGGER.info(terms);
            TermsEnum termsEnum = terms.iterator();
            BytesRef br = null;
            while ((br = termsEnum.next()) != null) {
                LOGGER.info("  " + br.utf8ToString());
            }

        } else {
            IndexSearcher searcher = new IndexSearcher(indexReader);
            String field = cmdLine.getOptionValue("field");

            List<Pair<String, String>> queries = null;
            if (cmdLine.hasOption("query")) {
                queries = Collections.singletonList(Pair.of("", cmdLine.getOptionValue("query")));
            } else if (cmdLine.hasOption("list-file")) {
                if (!(cmdLine.hasOption("inchi-field") && cmdLine.hasOption("synonym-field"))) {
                    LOGGER.error("Must specify both inchi-field and synonym-field when using list-file.");
                    System.exit(1);
                }
                Integer inchiField = Integer.parseInt(cmdLine.getOptionValue("inchi-field"));
                Integer synonymField = Integer.parseInt(cmdLine.getOptionValue("synonym-field"));

                queries = new LinkedList<>();
                BufferedReader r = new BufferedReader(new FileReader(cmdLine.getOptionValue("list-file")));
                String line;
                while ((line = r.readLine()) != null) {
                    line = line.trim();
                    if (!line.isEmpty()) {
                        // TODO: use a proper TSV reader; this is intentionally terrible as is.
                        String[] fields = line.split("\t");
                        queries.add(Pair.of(fields[inchiField].replace("\"", ""), fields[synonymField]));
                    }
                }
                r.close();
            }

            if (queries == null || queries.size() == 0) {
                LOGGER.error("Found no queries to run.");
                return;
            }

            List<SearchResult> searchResults = new ArrayList<>(queries.size());
            for (Pair<String, String> queryPair : queries) {
                String inchi = queryPair.getLeft();
                String rawQueryString = queryPair.getRight();
                /* The Lucene query parser interprets the kind of structural annotations we see in chemical entities
                 * as query directives, which is not what we want at all.  Phrase queries seem to work adequately
                 * with the analyzer we're currently using. */
                String queryString = rawQueryString.trim().toLowerCase();
                String[] parts = queryString.split("\\s+");
                PhraseQuery query = new PhraseQuery();
                for (String p : parts) {
                    query.add(new Term(field, p));
                }
                LOGGER.info("Running query: " + query.toString());

                BooleanQuery bq = new BooleanQuery();
                bq.add(query, BooleanClause.Occur.MUST);
                bq.add(new TermQuery(new Term(field, "yeast")), BooleanClause.Occur.SHOULD);
                bq.add(new TermQuery(new Term(field, "ferment")), BooleanClause.Occur.SHOULD);
                bq.add(new TermQuery(new Term(field, "fermentation")), BooleanClause.Occur.SHOULD);
                bq.add(new TermQuery(new Term(field, "fermentive")), BooleanClause.Occur.SHOULD);
                bq.add(new TermQuery(new Term(field, "saccharomyces")), BooleanClause.Occur.SHOULD);

                LOGGER.info("  Full query: " + bq.toString());

                TopDocs topDocs = searcher.search(bq, 100);
                ScoreDoc[] scoreDocs = topDocs.scoreDocs;
                if (scoreDocs.length == 0) {
                    LOGGER.info("Search returned no results.");
                }
                List<ResultDocument> results = new ArrayList<>(scoreDocs.length);
                for (int i = 0; i < scoreDocs.length; i++) {
                    ScoreDoc scoreDoc = scoreDocs[i];
                    Document doc = indexReader.document(scoreDoc.doc);
                    LOGGER.info("Doc " + i + ": " + scoreDoc.doc + ", score " + scoreDoc.score + ": "
                            + doc.get("id") + ", " + doc.get("title"));
                    results.add(new ResultDocument(scoreDoc.doc, scoreDoc.score, doc.get("title"),
                            doc.get("id"), null));
                }
                LOGGER.info("----- Done with query " + query.toString());
                // TODO: reduce memory usage when not writing results to an output file.
                searchResults.add(new SearchResult(inchi, rawQueryString, bq, results));
            }

            if (cmdLine.hasOption("output")) {
                try (FileWriter writer = new FileWriter(cmdLine.getOptionValue("output"));) {
                    writer.write(objectMapper.writeValueAsString(searchResults));
                }
            }
        }
    }
}

From source file:com.twentyn.patentSearch.Searcher.java

License:Open Source License

private BooleanQuery makeQuery(String synonym, String field) {
    BooleanQuery bq = new BooleanQuery();

    // Set the synonym as a required phrase query.  Phrase queries handle multi-word synonyms, but require construction.
    String queryString = synonym.trim().toLowerCase();
    String[] parts = queryString.split("\\s+");
    PhraseQuery query = new PhraseQuery();
    Arrays.stream(parts).forEach(p -> query.add(new Term(field, p)));
    bq.add(query, BooleanClause.Occur.MUST);

    // Append all keywords as optional clauses.  The more of these we find, the higher the score will be.
    KEYWORDS.forEach(term -> bq.add(new TermQuery(new Term(field, term)), BooleanClause.Occur.SHOULD));

    return bq;/*from w w w .  j  av a  2  s  .  c  o  m*/
}