List of usage examples for org.apache.lucene.search BooleanQuery.Builder BooleanQuery.Builder
BooleanQuery.Builder
From source file:SimpleNaiveBayesClassifier.java
License:Apache License
/** * count the number of documents in the index having at least a value for the 'class' field * * @return the no. of documents having a value for the 'class' field * @throws IOException if accessing to term vectors or search fails *//*ww w . j a va 2 s . c o m*/ protected int countDocsWithClass() throws IOException { int docCount = MultiFields.getTerms(this.leafReader, this.classFieldName).getDocCount(); if (docCount == -1) { // in case codec doesn't support getDocCount TotalHitCountCollector classQueryCountCollector = new TotalHitCountCollector(); BooleanQuery.Builder q = new BooleanQuery.Builder(); q.add(new BooleanClause( new WildcardQuery(new Term(classFieldName, String.valueOf(WildcardQuery.WILDCARD_STRING))), BooleanClause.Occur.MUST)); if (query != null) { q.add(query, BooleanClause.Occur.MUST); } indexSearcher.search(q.build(), classQueryCountCollector); docCount = classQueryCountCollector.getTotalHits(); } return docCount; }
From source file:SimpleNaiveBayesClassifier.java
License:Apache License
/** * Returns the number of documents of the input class ( from the whole index or from a subset) * that contains the word ( in a specific field or in all the fields if no one selected) * @param word the token produced by the analyzer * @param term the term representing the class * @return the number of documents of the input class * @throws IOException if a low level I/O problem happens *///from w w w . j a v a 2 s. com private int getWordFreqForClass(String word, Term term) throws IOException { BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder(); BooleanQuery.Builder subQuery = new BooleanQuery.Builder(); for (String textFieldName : textFieldNames) { subQuery.add( new BooleanClause(new TermQuery(new Term(textFieldName, word)), BooleanClause.Occur.SHOULD)); } booleanQuery.add(new BooleanClause(subQuery.build(), BooleanClause.Occur.MUST)); booleanQuery.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.MUST)); if (query != null) { booleanQuery.add(query, BooleanClause.Occur.MUST); } TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector(); indexSearcher.search(booleanQuery.build(), totalHitCountCollector); return totalHitCountCollector.getTotalHits(); }
From source file:SearcherTest.java
/** * ???BooleanQuery// w ww. j a v a2 s .c om * BooleanQuery???Query * ?Query???Query * ?BooleanQuery?? * BooleanQuery?????API?? * ?BooleanQuery????API? * * @throws Exception */ @Test public void testBooleanQuery() throws Exception { String searchField = "contents"; String q1 = "xxxxxxxxx"; String q2 = "oooooooooooooooo"; Query query1 = new TermQuery(new Term(searchField, q1)); Query query2 = new TermQuery(new Term(searchField, q2)); BooleanQuery.Builder builder = new BooleanQuery.Builder(); // 1MUSTMUST??? // 2MUSTMUST_NOT??MUST_NOT?? // 3SHOULDMUST_NOT?MUSTMUST_NOT // 4SHOULDMUSTMUST??,SHOULD??? // 5SHOULDSHOULD??? // 6MUST_NOTMUST_NOT? builder.add(query1, BooleanClause.Occur.MUST); builder.add(query2, BooleanClause.Occur.MUST); BooleanQuery booleanQuery = builder.build(); TopDocs hits = is.search(booleanQuery, 10); System.out.println("? " + q1 + "And" + q2 + "" + hits.totalHits + ""); for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = is.doc(scoreDoc.doc); System.out.println(doc.get("fullPath")); } }
From source file:KNearestNeighbourDocumentClassifier.java
License:Apache License
/** * Returns the top k results from a More Like This query based on the input document * * @param document the document to use for More Like This search * @return the top results for the MLT query * @throws IOException If there is a low-level I/O error *///from w ww . ja v a 2 s . co m private TopDocs knnSearch(Document document) throws IOException { BooleanQuery.Builder mltQuery = new BooleanQuery.Builder(); for (String fieldName : textFieldNames) { String boost = null; if (fieldName.contains("^")) { String[] field2boost = fieldName.split("\\^"); fieldName = field2boost[0]; boost = field2boost[1]; } String[] fieldValues = document.getValues(fieldName); if (boost != null) { mlt.setBoost(true); mlt.setBoostFactor(Float.parseFloat(boost)); } mlt.setAnalyzer(field2analyzer.get(fieldName)); for (String fieldContent : fieldValues) { mltQuery.add(new BooleanClause(mlt.like(fieldName, new StringReader(fieldContent)), BooleanClause.Occur.SHOULD)); } mlt.setBoost(false); } Query classFieldQuery = new WildcardQuery(new Term(classFieldName, "*")); mltQuery.add(new BooleanClause(classFieldQuery, BooleanClause.Occur.MUST)); if (query != null) { mltQuery.add(query, BooleanClause.Occur.MUST); } return indexSearcher.search(mltQuery.build(), k); }
From source file:KNearestNeighborClassifier.java
License:Apache License
private TopDocs knnSearch(String text) throws IOException { BooleanQuery.Builder mltQuery = new BooleanQuery.Builder(); for (String fieldName : textFieldNames) { String boost = null;//from w w w . j a v a 2 s. co m mlt.setBoost(true); //terms boost actually helps in MLT queries if (fieldName.contains("^")) { String[] field2boost = fieldName.split("\\^"); fieldName = field2boost[0]; boost = field2boost[1]; } if (boost != null) { mlt.setBoostFactor(Float.parseFloat(boost));//if we have a field boost, we add it } mltQuery.add( new BooleanClause(mlt.like(fieldName, new StringReader(text)), BooleanClause.Occur.SHOULD)); mlt.setBoostFactor(1);// restore neutral boost for next field } Query classFieldQuery = new WildcardQuery(new Term(classFieldName, "*")); mltQuery.add(new BooleanClause(classFieldQuery, BooleanClause.Occur.MUST)); if (query != null) { mltQuery.add(query, BooleanClause.Occur.MUST); } return indexSearcher.search(mltQuery.build(), k); }
From source file:SimpleNaiveBayesDocumentClassifier.java
License:Apache License
/** * Returns the number of documents of the input class ( from the whole index or from a subset) * that contains the word ( in a specific field or in all the fields if no one selected) * * @param word the token produced by the analyzer * @param fieldName the field the word is coming from * @param term the class term/*from w w w. j a v a 2 s . co m*/ * @return number of documents of the input class * @throws java.io.IOException If there is a low-level I/O error */ private int getWordFreqForClass(String word, String fieldName, Term term) throws IOException { BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder(); BooleanQuery.Builder subQuery = new BooleanQuery.Builder(); subQuery.add(new BooleanClause(new TermQuery(new Term(fieldName, word)), BooleanClause.Occur.SHOULD)); booleanQuery.add(new BooleanClause(subQuery.build(), BooleanClause.Occur.MUST)); booleanQuery.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.MUST)); if (query != null) { booleanQuery.add(query, BooleanClause.Occur.MUST); } TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector(); indexSearcher.search(booleanQuery.build(), totalHitCountCollector); return totalHitCountCollector.getTotalHits(); }
From source file:luceneInterface.java
License:Apache License
public static List<Document> query(String index, String stoppath, String question, int numResult, String sim) throws Exception { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new EnglishAnalyzer(StopFilter.makeStopSet(mygetStopwords(stoppath))); if (sim.equals("TFIDF")) searcher.setSimilarity(new ClassicSimilarity()); else if (sim.equals("BM25")) searcher.setSimilarity(new BM25Similarity()); else/*from w ww.j a v a 2s.c o m*/ searcher.setSimilarity(new BM25Similarity()); String field = "contents"; QueryParser parser = new QueryParser(field, analyzer); Query query = parser.parse(parser.escape(question)); BooleanQuery.Builder bqb = new BooleanQuery.Builder(); bqb.add(new TermQuery(new Term("contents", parser.escape(question))), BooleanClause.Occur.SHOULD); bqb.add(new TermQuery(new Term("sec", parser.escape(question))), BooleanClause.Occur.SHOULD); // Term term = new Term(field, question); // Query query = new TermQuery(term); // TopDocs results = searcher.search(query, numResult); TopDocs results = searcher.search(parser.parse(bqb.build().toString()), numResult); ScoreDoc[] hits = results.scoreDocs; List<Document> docs = new ArrayList<Document>(); int numTotalHits = results.totalHits; // System.out.println(numTotalHits + " total matching documents"); int end = Math.min(numTotalHits, numResult); String searchResult = ""; // System.out.println("Only results 1 - " + hits.length); for (int i = 0; i < end; i++) { Document doc = searcher.doc(hits[i].doc); docs.add(doc); } return docs; }
From source file:at.ac.univie.mminf.luceneSKOS.test.termexpansion.AbstractTermExpansionTest.java
License:Apache License
/** * This test indexes a sample metadata record (=lucene document) having a * "title", "description", and "subject" field, which contains plain subject * terms.// w ww. ja v a 2 s.c om * <p/> * A search for "arms" doesn't return that record because the term "arms" is * not explicitly contained in the record (document). * * @throws IOException * @throws LockObtainFailedException * @throws CorruptIndexException */ @Test public void noExpansion() throws IOException { /* defining the document to be indexed */ Document doc = new Document(); doc.add(new Field("title", "Spearhead", TextField.TYPE_STORED)); doc.add(new Field("description", "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..." + "The spear was mainly a thrusting weapon, but could also be thrown. " + "It was the principal weapon of the auxiliary soldier... " + "(second - fourth century, Arbeia Roman Fort).", TextField.TYPE_NOT_STORED)); doc.add(new Field("subject", "weapons", TextField.TYPE_NOT_STORED)); /* setting up a writer with a default (simple) analyzer */ writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(new SimpleAnalyzer())); /* adding the document to the index */ writer.addDocument(doc); /* defining a query that searches over all fields */ BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add(new TermQuery(new Term("title", "arms")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("description", "arms")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("subject", "arms")), BooleanClause.Occur.SHOULD); /* creating a new searcher */ searcher = new IndexSearcher(DirectoryReader.open(writer, false)); TopDocs results = searcher.search(builder.build(), 10); /* no results are returned since there is no term match */ assertEquals(0, results.totalHits); }
From source file:at.ac.univie.mminf.luceneSKOS.test.termexpansion.LabelbasedTermExpansionTest.java
License:Apache License
/** * This test indexes a sample metadata record (=lucene document) having a * "title", "description", and "subject" field. * <p/>/*ww w . j av a 2 s . c o m*/ * A search for "arms" returns that record as a result because "arms" is * defined as an alternative label for "weapons", the term which is * contained in the subject field. * * @throws IOException */ @Test public void labelBasedTermExpansion() throws IOException { /* defining the document to be indexed */ Document doc = new Document(); doc.add(new Field("title", "Spearhead", TextField.TYPE_STORED)); doc.add(new Field("description", "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..." + "The spear was mainly a thrusting weapon, but could also be thrown. " + "It was the principal weapon of the auxiliary soldier... " + "(second - fourth century, Arbeia Roman Fort).", TextField.TYPE_NOT_STORED)); doc.add(new Field("subject", "weapons", TextField.TYPE_NOT_STORED)); /* setting up the SKOS analyzer */ String skosFile = "src/test/resources/skos_samples/ukat_examples.n3"; String indexPath = "build/"; /* ExpansionType.URI->the field to be analyzed (expanded) contains URIs */ Analyzer skosAnalyzer = new SKOSAnalyzer(indexPath, skosFile, ExpansionType.LABEL); /* Define different analyzers for different fields */ Map<String, Analyzer> analyzerPerField = new HashMap<>(); analyzerPerField.put("subject", skosAnalyzer); PerFieldAnalyzerWrapper indexAnalyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(), analyzerPerField); /* setting up a writer with a default (simple) analyzer */ writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(indexAnalyzer)); /* adding the document to the index */ writer.addDocument(doc); /* defining a query that searches over all fields */ BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add(new TermQuery(new Term("title", "arms")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("description", "arms")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("subject", "arms")), BooleanClause.Occur.SHOULD); /* creating a new searcher */ searcher = new IndexSearcher(DirectoryReader.open(writer, false)); TopDocs results = searcher.search(builder.build(), 10); /* the document matches because "arms" is among the expanded terms */ assertEquals(1, results.totalHits); /* defining a query that searches for a broader concept */ Query query = new TermQuery(new Term("subject", "military equipment")); results = searcher.search(query, 10); /* ... also returns the document as result */ assertEquals(1, results.totalHits); }
From source file:at.ac.univie.mminf.luceneSKOS.test.termexpansion.URIbasedTermExpansionTest.java
License:Apache License
/** * This test indexes a sample metadata record (=lucene document) having a * "title", "description", and "subject" field, which is semantically * enriched by a URI pointing to a SKOS concept "weapons". * <p/>/*from w ww . j a v a2 s .co m*/ * A search for "arms" returns that record as a result because "arms" is * defined as an alternative label (altLabel) for the concept "weapons". * * @throws IOException */ @Test public void uriBasedTermExpansion() throws IOException { /* defining the document to be indexed */ Document doc = new Document(); doc.add(new Field("title", "Spearhead", TextField.TYPE_STORED)); doc.add(new Field("description", "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..." + "The spear was mainly a thrusting weapon, but could also be thrown. " + "It was the principal weapon of the auxiliary soldier... " + "(second - fourth century, Arbeia Roman Fort).", TextField.TYPE_NOT_STORED)); doc.add(new Field("subject", "http://www.ukat.org.uk/thesaurus/concept/859", TextField.TYPE_NOT_STORED)); /* setting up the SKOS analyzer */ String skosFile = "src/test/resources/skos_samples/ukat_examples.n3"; String indexPath = "build/"; /* ExpansionType.URI->the field to be analyzed (expanded) contains URIs */ Analyzer skosAnalyzer = new SKOSAnalyzer(indexPath, skosFile, ExpansionType.URI); /* Define different analyzers for different fields */ Map<String, Analyzer> analyzerPerField = new HashMap<>(); analyzerPerField.put("subject", skosAnalyzer); PerFieldAnalyzerWrapper indexAnalyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(), analyzerPerField); /* setting up a writer with a default (simple) analyzer */ writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(indexAnalyzer)); /* adding the document to the index */ writer.addDocument(doc); /* defining a query that searches over all fields */ BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add(new TermQuery(new Term("title", "arms")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("description", "arms")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("subject", "arms")), BooleanClause.Occur.SHOULD); /* creating a new searcher */ searcher = new IndexSearcher(DirectoryReader.open(writer, false)); TopDocs results = searcher.search(builder.build(), 10); /* the document matches because "arms" is among the expanded terms */ assertEquals(1, results.totalHits); /* defining a query that searches for a broader concept */ Query query = new TermQuery(new Term("subject", "military equipment")); results = searcher.search(query, 10); /* ... also returns the document as result */ assertEquals(1, results.totalHits); }