Example usage for org.apache.lucene.search BooleanQuery BooleanQuery

List of usage examples for org.apache.lucene.search BooleanQuery BooleanQuery

Introduction

In this page you can find the example usage for org.apache.lucene.search BooleanQuery BooleanQuery.

Prototype

BooleanQuery

Source Link

Usage

From source file:com.svenjacobs.lugaene.GaeDirectoryTest.java

License:Apache License

@Test
public void wholeCycle() throws Exception {

    // Index/*from w w w .j ava 2  s  .c  o m*/

    final Directory directory = new GaeDirectory("Test");
    final Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);

    final IndexWriterConfig config = GaeIndexWriterConfigHelper.create(Version.LUCENE_44, analyzer);
    final IndexWriter indexWriter = new IndexWriter(directory, config);

    final Document doc1 = new Document();

    doc1.add(new StringField(FIELD_TITLE, "Title1", Field.Store.YES));
    doc1.add(new TextField(FIELD_CONTENTS, "keyword1 keyword2 lorem ipsum", Field.Store.NO));

    indexWriter.addDocument(doc1);

    final Document doc2 = new Document();

    doc2.add(new StringField(FIELD_TITLE, "Title2", Field.Store.YES));
    doc2.add(new TextField(FIELD_CONTENTS, "keyword3 keyword4 lorem ipsum", Field.Store.NO));

    indexWriter.addDocument(doc2);

    indexWriter.close();

    // Search

    final DirectoryReader reader = DirectoryReader.open(directory);
    final IndexSearcher searcher = new IndexSearcher(reader);

    final BooleanQuery query = new BooleanQuery();

    query.add(new TermQuery(new Term(FIELD_TITLE, "Title1")), BooleanClause.Occur.MUST);
    query.add(new TermQuery(new Term(FIELD_CONTENTS, "lorem")), BooleanClause.Occur.MUST);

    ScoreDoc[] hits = searcher.search(query, 100).scoreDocs;

    assertThat(hits.length, is(1));
    assertThat(searcher.doc(hits[0].doc).get(FIELD_TITLE), is("Title1"));

    hits = searcher.search(new TermQuery(new Term(FIELD_CONTENTS, "lorem")), 100).scoreDocs;

    assertThat(hits.length, is(2));
    assertThat(searcher.doc(hits[0].doc).get(FIELD_TITLE), is("Title1"));
    assertThat(searcher.doc(hits[1].doc).get(FIELD_TITLE), is("Title2"));

    hits = searcher.search(new TermQuery(new Term(FIELD_CONTENTS, "keyword3")), 100).scoreDocs;

    assertThat(hits.length, is(1));
    assertThat(searcher.doc(hits[0].doc).get(FIELD_TITLE), is("Title2"));
}

From source file:com.sxc.lucene.searching.BooleanQueryTest.java

License:Apache License

public void testAnd() throws Exception {
    TermQuery searchingBooks = new TermQuery(new Term("subject", "search")); //#1

    Query books2010 = //#2
            NumericRangeQuery.newIntRange("pubmonth", 201001, //#2
                    201012, //#2
                    true, true); //#2

    BooleanQuery searchingBooks2010 = new BooleanQuery(); //#3
    searchingBooks2010.add(searchingBooks, BooleanClause.Occur.MUST); //#3
    searchingBooks2010.add(books2010, BooleanClause.Occur.MUST); //#3

    Directory dir = TestUtil.getBookIndexDirectory();
    IndexReader reader = DirectoryReader.open(dir);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs matches = searcher.search(searchingBooks2010, 10);

    assertTrue(TestUtil.hitsIncludeTitle(searcher, matches, "Lucene in Action, Second Edition"));
    reader.close();/*from   www.  j av  a2 s  . co  m*/
    dir.close();
}

From source file:com.sxc.lucene.searching.BooleanQueryTest.java

License:Apache License

public void testOr() throws Exception {
    TermQuery methodologyBooks = new TermQuery( // #1
            new Term("category", // #1
                    "/technology/computers/programming/methodology")); // #1

    TermQuery easternPhilosophyBooks = new TermQuery( // #2
            new Term("category", // #2
                    "/philosophy/eastern")); // #2

    BooleanQuery enlightenmentBooks = new BooleanQuery(); // #3
    enlightenmentBooks.add(methodologyBooks, // #3
            BooleanClause.Occur.SHOULD); // #3
    enlightenmentBooks.add(easternPhilosophyBooks, // #3
            BooleanClause.Occur.SHOULD); // #3

    Directory dir = TestUtil.getBookIndexDirectory();
    IndexReader reader = DirectoryReader.open(dir);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs matches = searcher.search(enlightenmentBooks, 10);
    System.out.println("or = " + enlightenmentBooks);

    assertTrue(TestUtil.hitsIncludeTitle(searcher, matches, "Extreme Programming Explained"));
    assertTrue(TestUtil.hitsIncludeTitle(searcher, matches, "Tao Te Ching \u9053\u5FB7\u7D93"));
    reader.close();/*from  w ww.j  a  v  a2s . c o m*/
    dir.close();
}

From source file:com.thoughtworks.studios.journey.jql.JourneyQuery.java

License:Open Source License

private Iterable<Node> query() {
    BooleanQuery luceneQuery = new BooleanQuery();
    luceneQuery.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
    for (JourneyCondition condition : conditions) {
        if (condition.matchingIndexes()) {
            Query q = condition.indexQuery(app);
            luceneQuery.add(q, BooleanClause.Occur.MUST);
        }/* www .j av  a  2s .co  m*/
    }
    Sort sorting = new Sort(new SortField(Journeys.PROP_START_AT, SortField.LONG, descOrder));
    QueryContext queryContext = new QueryContext(luceneQuery).sort(sorting);
    return app.journeys().query(queryContext);
}

From source file:com.thoughtworks.studios.journey.utils.LuceneUtils.java

License:Open Source License

public static Query negate(Query query) {
    BooleanQuery bq = new BooleanQuery();
    bq.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
    bq.add(query, BooleanClause.Occur.MUST_NOT);
    return bq;//from w w  w  .j  a v a2  s.c  om
}

From source file:com.tuplejump.stargate.lucene.BasicIndexer.java

License:Apache License

@Override
public void delete(Term... terms) {
    BooleanQuery q = new BooleanQuery();
    for (Term t : terms) {
        if (logger.isDebugEnabled())
            logger.debug(indexName + " Delete term - " + t);
        q.add(new TermQuery(t), BooleanClause.Occur.MUST);
    }/*from   ww  w .  j av a  2 s  .c  om*/
    try {
        indexWriter.deleteDocuments(q);
        searcherManager.maybeRefresh();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.tuplejump.stargate.lucene.NearRealTimeIndexer.java

License:Apache License

@Override
public void delete(Term... terms) {
    try {/*  w ww. jav a2s.c om*/
        BooleanQuery q = new BooleanQuery();
        for (Term t : terms) {
            if (logger.isDebugEnabled())
                logger.debug(indexName + " Delete term - " + t);
            q.add(new TermQuery(t), BooleanClause.Occur.MUST);
        }
        latest = indexWriter.deleteDocuments(q);
        indexSearcherReferenceManager.maybeRefresh();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.tuplejump.stargate.lucene.query.BooleanCondition.java

License:Apache License

/**
 * {@inheritDoc}//from ww w. ja va  2 s  .c  o m
 */
@Override
public Query query(Options schema) throws Exception {
    BooleanQuery luceneQuery = new BooleanQuery();
    luceneQuery.setBoost(boost);
    for (Condition query : must) {
        luceneQuery.add(query.query(schema), Occur.MUST);
    }
    for (Condition query : should) {
        luceneQuery.add(query.query(schema), Occur.SHOULD);
    }
    for (Condition query : not) {
        luceneQuery.add(query.query(schema), Occur.MUST_NOT);
    }
    return luceneQuery;
}

From source file:com.twentyn.patentSearch.DocumentSearch.java

License:Open Source License

public static void main(String[] args) throws Exception {
    System.out.println("Starting up...");
    System.out.flush();/*ww w.  j a v  a  2s .co m*/
    Options opts = new Options();
    opts.addOption(Option.builder("x").longOpt("index").hasArg().required().desc("Path to index file to read")
            .build());
    opts.addOption(Option.builder("h").longOpt("help").desc("Print this help message and exit").build());
    opts.addOption(Option.builder("v").longOpt("verbose").desc("Print verbose log output").build());

    opts.addOption(Option.builder("f").longOpt("field").hasArg().desc("The indexed field to search").build());
    opts.addOption(
            Option.builder("q").longOpt("query").hasArg().desc("The query to use when searching").build());
    opts.addOption(Option.builder("l").longOpt("list-file").hasArg()
            .desc("A file containing a list of queries to run in sequence").build());
    opts.addOption(
            Option.builder("e").longOpt("enumerate").desc("Enumerate the documents in the index").build());
    opts.addOption(Option.builder("d").longOpt("dump").hasArg()
            .desc("Dump terms in the document index for a specified field").build());
    opts.addOption(
            Option.builder("o").longOpt("output").hasArg().desc("Write results JSON to this file.").build());
    opts.addOption(Option.builder("n").longOpt("inchi-field").hasArg()
            .desc("The index of the InChI field if an input TSV is specified.").build());
    opts.addOption(Option.builder("s").longOpt("synonym-field").hasArg()
            .desc("The index of the chemical synonym field if an input TSV is specified.").build());

    HelpFormatter helpFormatter = new HelpFormatter();
    CommandLineParser cmdLineParser = new DefaultParser();
    CommandLine cmdLine = null;
    try {
        cmdLine = cmdLineParser.parse(opts, args);
    } catch (ParseException e) {
        System.out.println("Caught exception when parsing command line: " + e.getMessage());
        helpFormatter.printHelp("DocumentIndexer", opts);
        System.exit(1);
    }

    if (cmdLine.hasOption("help")) {
        helpFormatter.printHelp("DocumentIndexer", opts);
        System.exit(0);
    }

    if (!(cmdLine.hasOption("enumerate") || cmdLine.hasOption("dump") || (cmdLine.hasOption("field")
            && (cmdLine.hasOption("query") || cmdLine.hasOption("list-file"))))) {
        System.out.println("Must specify one of 'enumerate', 'dump', or 'field' + {'query', 'list-file'}");
        helpFormatter.printHelp("DocumentIndexer", opts);
        System.exit(1);
    }

    if (cmdLine.hasOption("verbose")) {
        // With help from http://stackoverflow.com/questions/23434252/programmatically-change-log-level-in-log4j2
        LoggerContext ctx = (LoggerContext) LogManager.getContext(false);
        Configuration ctxConfig = ctx.getConfiguration();
        LoggerConfig logConfig = ctxConfig.getLoggerConfig(LogManager.ROOT_LOGGER_NAME);
        logConfig.setLevel(Level.DEBUG);

        ctx.updateLoggers();
        LOGGER.debug("Verbose logging enabled");
    }

    ObjectMapper objectMapper = new ObjectMapper();
    objectMapper.enable(SerializationFeature.INDENT_OUTPUT);
    objectMapper.setVisibility(PropertyAccessor.ALL, JsonAutoDetect.Visibility.ANY);

    LOGGER.info("Opening index at " + cmdLine.getOptionValue("index"));

    try (Directory indexDir = FSDirectory.open(new File(cmdLine.getOptionValue("index")).toPath());
            IndexReader indexReader = DirectoryReader.open(indexDir);) {
        if (cmdLine.hasOption("enumerate")) {
            /* Enumerate all documents in the index.
             * With help from
             * http://stackoverflow.com/questions/2311845/is-it-possible-to-iterate-through-documents-stored-in-lucene-index
             */
            for (int i = 0; i < indexReader.maxDoc(); i++) {
                Document doc = indexReader.document(i);
                LOGGER.info("Doc " + i + ":");
                LOGGER.info(doc);
            }
        } else if (cmdLine.hasOption("dump")) {
            /* Dump indexed terms for a specific field.
             * With help from http://stackoverflow.com/questions/11148036/find-list-of-terms-indexed-by-lucene */
            Terms terms = SlowCompositeReaderWrapper.wrap(indexReader).terms(cmdLine.getOptionValue("dump"));
            LOGGER.info("Has positions: " + terms.hasPositions());
            LOGGER.info("Has offsets:   " + terms.hasOffsets());
            LOGGER.info("Has freqs:     " + terms.hasFreqs());
            LOGGER.info("Stats:         " + terms.getStats());
            LOGGER.info(terms);
            TermsEnum termsEnum = terms.iterator();
            BytesRef br = null;
            while ((br = termsEnum.next()) != null) {
                LOGGER.info("  " + br.utf8ToString());
            }

        } else {
            IndexSearcher searcher = new IndexSearcher(indexReader);
            String field = cmdLine.getOptionValue("field");

            List<Pair<String, String>> queries = null;
            if (cmdLine.hasOption("query")) {
                queries = Collections.singletonList(Pair.of("", cmdLine.getOptionValue("query")));
            } else if (cmdLine.hasOption("list-file")) {
                if (!(cmdLine.hasOption("inchi-field") && cmdLine.hasOption("synonym-field"))) {
                    LOGGER.error("Must specify both inchi-field and synonym-field when using list-file.");
                    System.exit(1);
                }
                Integer inchiField = Integer.parseInt(cmdLine.getOptionValue("inchi-field"));
                Integer synonymField = Integer.parseInt(cmdLine.getOptionValue("synonym-field"));

                queries = new LinkedList<>();
                BufferedReader r = new BufferedReader(new FileReader(cmdLine.getOptionValue("list-file")));
                String line;
                while ((line = r.readLine()) != null) {
                    line = line.trim();
                    if (!line.isEmpty()) {
                        // TODO: use a proper TSV reader; this is intentionally terrible as is.
                        String[] fields = line.split("\t");
                        queries.add(Pair.of(fields[inchiField].replace("\"", ""), fields[synonymField]));
                    }
                }
                r.close();
            }

            if (queries == null || queries.size() == 0) {
                LOGGER.error("Found no queries to run.");
                return;
            }

            List<SearchResult> searchResults = new ArrayList<>(queries.size());
            for (Pair<String, String> queryPair : queries) {
                String inchi = queryPair.getLeft();
                String rawQueryString = queryPair.getRight();
                /* The Lucene query parser interprets the kind of structural annotations we see in chemical entities
                 * as query directives, which is not what we want at all.  Phrase queries seem to work adequately
                 * with the analyzer we're currently using. */
                String queryString = rawQueryString.trim().toLowerCase();
                String[] parts = queryString.split("\\s+");
                PhraseQuery query = new PhraseQuery();
                for (String p : parts) {
                    query.add(new Term(field, p));
                }
                LOGGER.info("Running query: " + query.toString());

                BooleanQuery bq = new BooleanQuery();
                bq.add(query, BooleanClause.Occur.MUST);
                bq.add(new TermQuery(new Term(field, "yeast")), BooleanClause.Occur.SHOULD);
                bq.add(new TermQuery(new Term(field, "ferment")), BooleanClause.Occur.SHOULD);
                bq.add(new TermQuery(new Term(field, "fermentation")), BooleanClause.Occur.SHOULD);
                bq.add(new TermQuery(new Term(field, "fermentive")), BooleanClause.Occur.SHOULD);
                bq.add(new TermQuery(new Term(field, "saccharomyces")), BooleanClause.Occur.SHOULD);

                LOGGER.info("  Full query: " + bq.toString());

                TopDocs topDocs = searcher.search(bq, 100);
                ScoreDoc[] scoreDocs = topDocs.scoreDocs;
                if (scoreDocs.length == 0) {
                    LOGGER.info("Search returned no results.");
                }
                List<ResultDocument> results = new ArrayList<>(scoreDocs.length);
                for (int i = 0; i < scoreDocs.length; i++) {
                    ScoreDoc scoreDoc = scoreDocs[i];
                    Document doc = indexReader.document(scoreDoc.doc);
                    LOGGER.info("Doc " + i + ": " + scoreDoc.doc + ", score " + scoreDoc.score + ": "
                            + doc.get("id") + ", " + doc.get("title"));
                    results.add(new ResultDocument(scoreDoc.doc, scoreDoc.score, doc.get("title"),
                            doc.get("id"), null));
                }
                LOGGER.info("----- Done with query " + query.toString());
                // TODO: reduce memory usage when not writing results to an output file.
                searchResults.add(new SearchResult(inchi, rawQueryString, bq, results));
            }

            if (cmdLine.hasOption("output")) {
                try (FileWriter writer = new FileWriter(cmdLine.getOptionValue("output"));) {
                    writer.write(objectMapper.writeValueAsString(searchResults));
                }
            }
        }
    }
}

From source file:com.twentyn.patentSearch.Searcher.java

License:Open Source License

private BooleanQuery makeQuery(String synonym, String field) {
    BooleanQuery bq = new BooleanQuery();

    // Set the synonym as a required phrase query.  Phrase queries handle multi-word synonyms, but require construction.
    String queryString = synonym.trim().toLowerCase();
    String[] parts = queryString.split("\\s+");
    PhraseQuery query = new PhraseQuery();
    Arrays.stream(parts).forEach(p -> query.add(new Term(field, p)));
    bq.add(query, BooleanClause.Occur.MUST);

    // Append all keywords as optional clauses.  The more of these we find, the higher the score will be.
    KEYWORDS.forEach(term -> bq.add(new TermQuery(new Term(field, term)), BooleanClause.Occur.SHOULD));

    return bq;/*from w w w .  j  av a  2  s  .  c  o  m*/
}