List of usage examples for org.apache.lucene.search BooleanQuery BooleanQuery
BooleanQuery
From source file:com.svenjacobs.lugaene.GaeDirectoryTest.java
License:Apache License
@Test public void wholeCycle() throws Exception { // Index/*from w w w .j ava 2 s .c o m*/ final Directory directory = new GaeDirectory("Test"); final Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44); final IndexWriterConfig config = GaeIndexWriterConfigHelper.create(Version.LUCENE_44, analyzer); final IndexWriter indexWriter = new IndexWriter(directory, config); final Document doc1 = new Document(); doc1.add(new StringField(FIELD_TITLE, "Title1", Field.Store.YES)); doc1.add(new TextField(FIELD_CONTENTS, "keyword1 keyword2 lorem ipsum", Field.Store.NO)); indexWriter.addDocument(doc1); final Document doc2 = new Document(); doc2.add(new StringField(FIELD_TITLE, "Title2", Field.Store.YES)); doc2.add(new TextField(FIELD_CONTENTS, "keyword3 keyword4 lorem ipsum", Field.Store.NO)); indexWriter.addDocument(doc2); indexWriter.close(); // Search final DirectoryReader reader = DirectoryReader.open(directory); final IndexSearcher searcher = new IndexSearcher(reader); final BooleanQuery query = new BooleanQuery(); query.add(new TermQuery(new Term(FIELD_TITLE, "Title1")), BooleanClause.Occur.MUST); query.add(new TermQuery(new Term(FIELD_CONTENTS, "lorem")), BooleanClause.Occur.MUST); ScoreDoc[] hits = searcher.search(query, 100).scoreDocs; assertThat(hits.length, is(1)); assertThat(searcher.doc(hits[0].doc).get(FIELD_TITLE), is("Title1")); hits = searcher.search(new TermQuery(new Term(FIELD_CONTENTS, "lorem")), 100).scoreDocs; assertThat(hits.length, is(2)); assertThat(searcher.doc(hits[0].doc).get(FIELD_TITLE), is("Title1")); assertThat(searcher.doc(hits[1].doc).get(FIELD_TITLE), is("Title2")); hits = searcher.search(new TermQuery(new Term(FIELD_CONTENTS, "keyword3")), 100).scoreDocs; assertThat(hits.length, is(1)); assertThat(searcher.doc(hits[0].doc).get(FIELD_TITLE), is("Title2")); }
From source file:com.sxc.lucene.searching.BooleanQueryTest.java
License:Apache License
public void testAnd() throws Exception { TermQuery searchingBooks = new TermQuery(new Term("subject", "search")); //#1 Query books2010 = //#2 NumericRangeQuery.newIntRange("pubmonth", 201001, //#2 201012, //#2 true, true); //#2 BooleanQuery searchingBooks2010 = new BooleanQuery(); //#3 searchingBooks2010.add(searchingBooks, BooleanClause.Occur.MUST); //#3 searchingBooks2010.add(books2010, BooleanClause.Occur.MUST); //#3 Directory dir = TestUtil.getBookIndexDirectory(); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); TopDocs matches = searcher.search(searchingBooks2010, 10); assertTrue(TestUtil.hitsIncludeTitle(searcher, matches, "Lucene in Action, Second Edition")); reader.close();/*from www. j av a2 s . co m*/ dir.close(); }
From source file:com.sxc.lucene.searching.BooleanQueryTest.java
License:Apache License
public void testOr() throws Exception { TermQuery methodologyBooks = new TermQuery( // #1 new Term("category", // #1 "/technology/computers/programming/methodology")); // #1 TermQuery easternPhilosophyBooks = new TermQuery( // #2 new Term("category", // #2 "/philosophy/eastern")); // #2 BooleanQuery enlightenmentBooks = new BooleanQuery(); // #3 enlightenmentBooks.add(methodologyBooks, // #3 BooleanClause.Occur.SHOULD); // #3 enlightenmentBooks.add(easternPhilosophyBooks, // #3 BooleanClause.Occur.SHOULD); // #3 Directory dir = TestUtil.getBookIndexDirectory(); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); TopDocs matches = searcher.search(enlightenmentBooks, 10); System.out.println("or = " + enlightenmentBooks); assertTrue(TestUtil.hitsIncludeTitle(searcher, matches, "Extreme Programming Explained")); assertTrue(TestUtil.hitsIncludeTitle(searcher, matches, "Tao Te Ching \u9053\u5FB7\u7D93")); reader.close();/*from w ww.j a v a2s . c o m*/ dir.close(); }
From source file:com.thoughtworks.studios.journey.jql.JourneyQuery.java
License:Open Source License
private Iterable<Node> query() { BooleanQuery luceneQuery = new BooleanQuery(); luceneQuery.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); for (JourneyCondition condition : conditions) { if (condition.matchingIndexes()) { Query q = condition.indexQuery(app); luceneQuery.add(q, BooleanClause.Occur.MUST); }/* www .j av a 2s .co m*/ } Sort sorting = new Sort(new SortField(Journeys.PROP_START_AT, SortField.LONG, descOrder)); QueryContext queryContext = new QueryContext(luceneQuery).sort(sorting); return app.journeys().query(queryContext); }
From source file:com.thoughtworks.studios.journey.utils.LuceneUtils.java
License:Open Source License
public static Query negate(Query query) { BooleanQuery bq = new BooleanQuery(); bq.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); bq.add(query, BooleanClause.Occur.MUST_NOT); return bq;//from w w w .j a v a2 s.c om }
From source file:com.tuplejump.stargate.lucene.BasicIndexer.java
License:Apache License
@Override public void delete(Term... terms) { BooleanQuery q = new BooleanQuery(); for (Term t : terms) { if (logger.isDebugEnabled()) logger.debug(indexName + " Delete term - " + t); q.add(new TermQuery(t), BooleanClause.Occur.MUST); }/*from ww w . j av a 2 s .c om*/ try { indexWriter.deleteDocuments(q); searcherManager.maybeRefresh(); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:com.tuplejump.stargate.lucene.NearRealTimeIndexer.java
License:Apache License
@Override public void delete(Term... terms) { try {/* w ww. jav a2s.c om*/ BooleanQuery q = new BooleanQuery(); for (Term t : terms) { if (logger.isDebugEnabled()) logger.debug(indexName + " Delete term - " + t); q.add(new TermQuery(t), BooleanClause.Occur.MUST); } latest = indexWriter.deleteDocuments(q); indexSearcherReferenceManager.maybeRefresh(); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:com.tuplejump.stargate.lucene.query.BooleanCondition.java
License:Apache License
/** * {@inheritDoc}//from ww w. ja va 2 s .c o m */ @Override public Query query(Options schema) throws Exception { BooleanQuery luceneQuery = new BooleanQuery(); luceneQuery.setBoost(boost); for (Condition query : must) { luceneQuery.add(query.query(schema), Occur.MUST); } for (Condition query : should) { luceneQuery.add(query.query(schema), Occur.SHOULD); } for (Condition query : not) { luceneQuery.add(query.query(schema), Occur.MUST_NOT); } return luceneQuery; }
From source file:com.twentyn.patentSearch.DocumentSearch.java
License:Open Source License
public static void main(String[] args) throws Exception { System.out.println("Starting up..."); System.out.flush();/*ww w. j a v a 2s .co m*/ Options opts = new Options(); opts.addOption(Option.builder("x").longOpt("index").hasArg().required().desc("Path to index file to read") .build()); opts.addOption(Option.builder("h").longOpt("help").desc("Print this help message and exit").build()); opts.addOption(Option.builder("v").longOpt("verbose").desc("Print verbose log output").build()); opts.addOption(Option.builder("f").longOpt("field").hasArg().desc("The indexed field to search").build()); opts.addOption( Option.builder("q").longOpt("query").hasArg().desc("The query to use when searching").build()); opts.addOption(Option.builder("l").longOpt("list-file").hasArg() .desc("A file containing a list of queries to run in sequence").build()); opts.addOption( Option.builder("e").longOpt("enumerate").desc("Enumerate the documents in the index").build()); opts.addOption(Option.builder("d").longOpt("dump").hasArg() .desc("Dump terms in the document index for a specified field").build()); opts.addOption( Option.builder("o").longOpt("output").hasArg().desc("Write results JSON to this file.").build()); opts.addOption(Option.builder("n").longOpt("inchi-field").hasArg() .desc("The index of the InChI field if an input TSV is specified.").build()); opts.addOption(Option.builder("s").longOpt("synonym-field").hasArg() .desc("The index of the chemical synonym field if an input TSV is specified.").build()); HelpFormatter helpFormatter = new HelpFormatter(); CommandLineParser cmdLineParser = new DefaultParser(); CommandLine cmdLine = null; try { cmdLine = cmdLineParser.parse(opts, args); } catch (ParseException e) { System.out.println("Caught exception when parsing command line: " + e.getMessage()); helpFormatter.printHelp("DocumentIndexer", opts); System.exit(1); } if (cmdLine.hasOption("help")) { helpFormatter.printHelp("DocumentIndexer", opts); System.exit(0); } if (!(cmdLine.hasOption("enumerate") || cmdLine.hasOption("dump") || (cmdLine.hasOption("field") && (cmdLine.hasOption("query") || cmdLine.hasOption("list-file"))))) { System.out.println("Must specify one of 'enumerate', 'dump', or 'field' + {'query', 'list-file'}"); helpFormatter.printHelp("DocumentIndexer", opts); System.exit(1); } if (cmdLine.hasOption("verbose")) { // With help from http://stackoverflow.com/questions/23434252/programmatically-change-log-level-in-log4j2 LoggerContext ctx = (LoggerContext) LogManager.getContext(false); Configuration ctxConfig = ctx.getConfiguration(); LoggerConfig logConfig = ctxConfig.getLoggerConfig(LogManager.ROOT_LOGGER_NAME); logConfig.setLevel(Level.DEBUG); ctx.updateLoggers(); LOGGER.debug("Verbose logging enabled"); } ObjectMapper objectMapper = new ObjectMapper(); objectMapper.enable(SerializationFeature.INDENT_OUTPUT); objectMapper.setVisibility(PropertyAccessor.ALL, JsonAutoDetect.Visibility.ANY); LOGGER.info("Opening index at " + cmdLine.getOptionValue("index")); try (Directory indexDir = FSDirectory.open(new File(cmdLine.getOptionValue("index")).toPath()); IndexReader indexReader = DirectoryReader.open(indexDir);) { if (cmdLine.hasOption("enumerate")) { /* Enumerate all documents in the index. * With help from * http://stackoverflow.com/questions/2311845/is-it-possible-to-iterate-through-documents-stored-in-lucene-index */ for (int i = 0; i < indexReader.maxDoc(); i++) { Document doc = indexReader.document(i); LOGGER.info("Doc " + i + ":"); LOGGER.info(doc); } } else if (cmdLine.hasOption("dump")) { /* Dump indexed terms for a specific field. * With help from http://stackoverflow.com/questions/11148036/find-list-of-terms-indexed-by-lucene */ Terms terms = SlowCompositeReaderWrapper.wrap(indexReader).terms(cmdLine.getOptionValue("dump")); LOGGER.info("Has positions: " + terms.hasPositions()); LOGGER.info("Has offsets: " + terms.hasOffsets()); LOGGER.info("Has freqs: " + terms.hasFreqs()); LOGGER.info("Stats: " + terms.getStats()); LOGGER.info(terms); TermsEnum termsEnum = terms.iterator(); BytesRef br = null; while ((br = termsEnum.next()) != null) { LOGGER.info(" " + br.utf8ToString()); } } else { IndexSearcher searcher = new IndexSearcher(indexReader); String field = cmdLine.getOptionValue("field"); List<Pair<String, String>> queries = null; if (cmdLine.hasOption("query")) { queries = Collections.singletonList(Pair.of("", cmdLine.getOptionValue("query"))); } else if (cmdLine.hasOption("list-file")) { if (!(cmdLine.hasOption("inchi-field") && cmdLine.hasOption("synonym-field"))) { LOGGER.error("Must specify both inchi-field and synonym-field when using list-file."); System.exit(1); } Integer inchiField = Integer.parseInt(cmdLine.getOptionValue("inchi-field")); Integer synonymField = Integer.parseInt(cmdLine.getOptionValue("synonym-field")); queries = new LinkedList<>(); BufferedReader r = new BufferedReader(new FileReader(cmdLine.getOptionValue("list-file"))); String line; while ((line = r.readLine()) != null) { line = line.trim(); if (!line.isEmpty()) { // TODO: use a proper TSV reader; this is intentionally terrible as is. String[] fields = line.split("\t"); queries.add(Pair.of(fields[inchiField].replace("\"", ""), fields[synonymField])); } } r.close(); } if (queries == null || queries.size() == 0) { LOGGER.error("Found no queries to run."); return; } List<SearchResult> searchResults = new ArrayList<>(queries.size()); for (Pair<String, String> queryPair : queries) { String inchi = queryPair.getLeft(); String rawQueryString = queryPair.getRight(); /* The Lucene query parser interprets the kind of structural annotations we see in chemical entities * as query directives, which is not what we want at all. Phrase queries seem to work adequately * with the analyzer we're currently using. */ String queryString = rawQueryString.trim().toLowerCase(); String[] parts = queryString.split("\\s+"); PhraseQuery query = new PhraseQuery(); for (String p : parts) { query.add(new Term(field, p)); } LOGGER.info("Running query: " + query.toString()); BooleanQuery bq = new BooleanQuery(); bq.add(query, BooleanClause.Occur.MUST); bq.add(new TermQuery(new Term(field, "yeast")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term(field, "ferment")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term(field, "fermentation")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term(field, "fermentive")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term(field, "saccharomyces")), BooleanClause.Occur.SHOULD); LOGGER.info(" Full query: " + bq.toString()); TopDocs topDocs = searcher.search(bq, 100); ScoreDoc[] scoreDocs = topDocs.scoreDocs; if (scoreDocs.length == 0) { LOGGER.info("Search returned no results."); } List<ResultDocument> results = new ArrayList<>(scoreDocs.length); for (int i = 0; i < scoreDocs.length; i++) { ScoreDoc scoreDoc = scoreDocs[i]; Document doc = indexReader.document(scoreDoc.doc); LOGGER.info("Doc " + i + ": " + scoreDoc.doc + ", score " + scoreDoc.score + ": " + doc.get("id") + ", " + doc.get("title")); results.add(new ResultDocument(scoreDoc.doc, scoreDoc.score, doc.get("title"), doc.get("id"), null)); } LOGGER.info("----- Done with query " + query.toString()); // TODO: reduce memory usage when not writing results to an output file. searchResults.add(new SearchResult(inchi, rawQueryString, bq, results)); } if (cmdLine.hasOption("output")) { try (FileWriter writer = new FileWriter(cmdLine.getOptionValue("output"));) { writer.write(objectMapper.writeValueAsString(searchResults)); } } } } }
From source file:com.twentyn.patentSearch.Searcher.java
License:Open Source License
private BooleanQuery makeQuery(String synonym, String field) { BooleanQuery bq = new BooleanQuery(); // Set the synonym as a required phrase query. Phrase queries handle multi-word synonyms, but require construction. String queryString = synonym.trim().toLowerCase(); String[] parts = queryString.split("\\s+"); PhraseQuery query = new PhraseQuery(); Arrays.stream(parts).forEach(p -> query.add(new Term(field, p))); bq.add(query, BooleanClause.Occur.MUST); // Append all keywords as optional clauses. The more of these we find, the higher the score will be. KEYWORDS.forEach(term -> bq.add(new TermQuery(new Term(field, term)), BooleanClause.Occur.SHOULD)); return bq;/*from w w w . j av a 2 s . c o m*/ }