List of usage examples for org.apache.lucene.index DirectoryReader open
public static DirectoryReader open(final IndexCommit commit) throws IOException
From source file:com.main.Searcher.java
public List<Bean> searching(String s1, String s2, String radioBtn) throws IOException, ParseException, InvalidTokenOffsetsException { //getting reference of directory Directory dir = FSDirectory.open(Paths.get(Index_Dir)); //Index reader - an interface for accessing a point-in-time view of a lucene index IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); //analyzer with the default stop words, takes out the stop words Analyzer analyzer = new StandardAnalyzer(); String contents = "contents"; QueryParser parser = new QueryParser(contents, analyzer); int numOfDoc = reader.numDocs(); for (int i = 0; i < numOfDoc; i++) { Document d = reader.document(i); }//w w w . ja va2 s .c o m Query q1 = parser.parse(s1); Query q2 = parser.parse(s2); //conjuction, disjunction and negation BooleanQuery.Builder bq = new BooleanQuery.Builder(); //occur.must : both queries required in a doc if (radioBtn.equals("conjunction")) { bq.add(q1, BooleanClause.Occur.MUST); bq.add(q2, BooleanClause.Occur.MUST); bq.build(); } //occur.should: one of the q1 should be presen t in doc else if (radioBtn.equals("disjunction")) { bq.add(q1, BooleanClause.Occur.SHOULD); bq.add(q2, BooleanClause.Occur.SHOULD); bq.build(); } //negation: first should present , second should not else { bq.add(q1, BooleanClause.Occur.MUST); bq.add(q2, BooleanClause.Occur.MUST_NOT); bq.build(); } TopDocs hits = searcher.search(bq.build(), 10); Formatter formatter = new SimpleHTMLFormatter(); QueryScorer scorer = new QueryScorer(bq.build()); //used to markup highlighted terms found in the best sections of a cont Highlighter highlighter = new Highlighter(formatter, scorer); //It breaks cont up into same-size texts but does not split up spans Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 10); //breaks cont up into same-size fragments with no concerns over spotting sentence boundaries. //set fragmenter to highlighter highlighter.setTextFragmenter(fragmenter); for (int i = 0; i < hits.scoreDocs.length; i++) { Bean bean = new Bean(); int outResult = hits.scoreDocs.length; bean.setNumFile(outResult); int docid = hits.scoreDocs[i].doc; double rank = hits.scoreDocs[i].score; bean.setRankSc(rank); Document doc = searcher.doc(docid); String name = doc.get("name"); String title = doc.get("title"); bean.setTitle(name); String path = doc.get("path"); bean.setPath(path); String cont = doc.get("contents"); //Create token stream TokenStream stream = TokenSources.getAnyTokenStream(reader, docid, "contents", analyzer); //Get highlighted cont fragments String[] frags = highlighter.getBestFragments(stream, cont, 10); ArrayList<String> dummy = new ArrayList<>(); for (String frag : frags) { dummy.add(frag); } bean.setContent(dummy); beanList.add(bean); } dir.close(); // } return beanList; }
From source file:com.main.Searcher.java
public List<Bean> searching(String s1) throws IOException, ParseException, InvalidTokenOffsetsException { //Get directory reference Directory dir = FSDirectory.open(Paths.get(Index_Dir)); //Index reader - an interface for accessing a point-in-time view of a lucene index IndexReader reader = DirectoryReader.open(dir); //CreateIndexReader reader = DirectoryReader.open(dir); lucene searcher. It search over a single IndexReader. IndexSearcher searcher = new IndexSearcher(reader); //analyzer with the default stop words Analyzer analyzer = new StandardAnalyzer(); //Query parser to be used for creating TermQuery String queries = null;//from ww w . j av a 2s. c o m String queryString = null; //regular search String contents = "contents"; BufferedReader in = null; if (queries != null) { in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8); } else { in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); } QueryParser parser = new QueryParser(contents, analyzer); int numOfDoc = reader.numDocs(); for (int i = 0; i < numOfDoc; i++) { Document d = reader.document(i); } Query q1 = parser.parse(s1); BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(q1, BooleanClause.Occur.MUST); //Search the lucene documents TopDocs hits = searcher.search(bq.build(), 10); // TopScoreDocCollector collector = TopScoreDocCollector.create(5); /** * Highlighter Code Start *** */ //Uses HTML <B></B> tag to highlight the searched terms Formatter formatter = new SimpleHTMLFormatter(); //It scores cont fragments by the number of unique q1 terms found //Basically the matching score in layman terms QueryScorer scorer = new QueryScorer(bq.build()); //used to markup highlighted terms found in the best sections of a cont Highlighter highlighter = new Highlighter(formatter, scorer); //It breaks cont up into same-size texts but does not split up spans Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 10); //breaks cont up into same-size fragments with no concerns over spotting sentence boundaries. //set fragmenter to highlighter highlighter.setTextFragmenter(fragmenter); //Iterate over found results for (int i = 0; i < hits.scoreDocs.length; i++) { Bean bean = new Bean(); //int rank = hits.scoreDocs.length; int outResult = hits.scoreDocs.length; bean.setNumFile(outResult); int docid = hits.scoreDocs[i].doc; double rank = hits.scoreDocs[i].score; bean.setRankSc(rank); Document doc = searcher.doc(docid); // String title = doc.get("title"); String name = doc.get("name"); String title = doc.get("title"); bean.setTitle(name); String path = doc.get("path"); bean.setPath(path); String cont = doc.get("contents"); //Create token stream TokenStream stream = TokenSources.getAnyTokenStream(reader, docid, "contents", analyzer); //Get highlighted cont fragments String[] frags = highlighter.getBestFragments(stream, cont, 10); ArrayList<String> dummy = new ArrayList<>(); for (String frag : frags) { dummy.add(frag); } bean.setContent(dummy); beanList.add(bean); } dir.close(); // } return beanList; }
From source file:com.mathworks.xzheng.admin.SearcherManager.java
License:Apache License
public SearcherManager(IndexWriter writer) throws IOException { //2 this.writer = writer; currentSearcher = new IndexSearcher(DirectoryReader.open(writer.getDirectory())); //C warm(currentSearcher);/*from w w w. jav a 2 s. c om*/ writer.getConfig().setMergedSegmentWarmer( // 3 new IndexWriter.IndexReaderWarmer() { // 3 public void warm(AtomicReader reader) throws IOException { // 3 SearcherManager.this.warm(new IndexSearcher(reader)); // 3 } // 3 }); // 3 }
From source file:com.mathworks.xzheng.advsearching.FilterTest.java
License:Apache License
protected void setUp() throws Exception { // #1 allBooks = new MatchAllDocsQuery(); dir = TestUtil.getBookIndexDirectory(); searcher = new IndexSearcher(DirectoryReader.open(dir)); }
From source file:com.mathworks.xzheng.advsearching.MultiFieldQueryParserTest.java
License:Apache License
public void testDefaultOperator() throws Exception { Query query = new MultiFieldQueryParser(Version.LUCENE_46, new String[] { "title", "subject" }, new SimpleAnalyzer(Version.LUCENE_46)).parse("development"); Directory dir = TestUtil.getBookIndexDirectory(); IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(dir)); TopDocs hits = searcher.search(query, 10); assertTrue(TestUtil.hitsIncludeTitle(searcher, hits, "Ant in Action")); assertTrue(TestUtil.hitsIncludeTitle( //A searcher, //A hits, //A "Extreme Programming Explained")); //A dir.close();/* w ww . java 2 s . c o m*/ }
From source file:com.mathworks.xzheng.advsearching.MultiFieldQueryParserTest.java
License:Apache License
public void testSpecifiedOperator() throws Exception { Query query = MultiFieldQueryParser.parse(Version.LUCENE_46, "lucene", new String[] { "title", "subject" }, new BooleanClause.Occur[] { BooleanClause.Occur.MUST, BooleanClause.Occur.MUST }, new SimpleAnalyzer(Version.LUCENE_46)); Directory dir = TestUtil.getBookIndexDirectory(); IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(dir)); TopDocs hits = searcher.search(query, 10); assertTrue(TestUtil.hitsIncludeTitle(searcher, hits, "Lucene in Action, Second Edition")); assertEquals("one and only one", 1, hits.scoreDocs.length); dir.close();//from w w w . j a v a 2s . com }
From source file:com.mathworks.xzheng.advsearching.MultiPhraseQueryTest.java
License:Apache License
protected void setUp() throws Exception { Directory directory = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, new WhitespaceAnalyzer(Version.LUCENE_46)); IndexWriter writer = new IndexWriter(directory, config); Document doc1 = new Document(); doc1.add(new Field("field", "the quick brown fox jumped over the lazy dog", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc1);/*w w w .jav a2 s. c om*/ Document doc2 = new Document(); doc2.add(new Field("field", "the fast fox hopped over the hound", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc2); writer.close(); searcher = new IndexSearcher(DirectoryReader.open(directory)); }
From source file:com.mathworks.xzheng.advsearching.MultiSearcherTest.java
License:Apache License
public void setUp() throws Exception { String[] animals = { "aardvark", "beaver", "coati", "dog", "elephant", "frog", "gila monster", "horse", "iguana", "javelina", "kangaroo", "lemur", "moose", "nematode", "orca", "python", "quokka", "rat", "scorpion", "tarantula", "uromastyx", "vicuna", "walrus", "xiphias", "yak", "zebra" }; Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_46); Directory aTOmDirectory = new RAMDirectory(); // #1 Directory nTOzDirectory = new RAMDirectory(); // #1 IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer); IndexWriter aTOmWriter = new IndexWriter(aTOmDirectory, config); IndexWriter nTOzWriter = new IndexWriter(nTOzDirectory, config); for (int i = animals.length - 1; i >= 0; i--) { Document doc = new Document(); String animal = animals[i]; doc.add(new Field("animal", animal, Field.Store.YES, Field.Index.NOT_ANALYZED)); if (animal.charAt(0) < 'n') { aTOmWriter.addDocument(doc); // #2 } else {// w w w. ja v a2s .co m nTOzWriter.addDocument(doc); // #2 } } aTOmWriter.close(); nTOzWriter.close(); searchers = new IndexSearcher[2]; searchers[0] = new IndexSearcher(DirectoryReader.open(aTOmDirectory)); searchers[1] = new IndexSearcher(DirectoryReader.open(nTOzDirectory)); }
From source file:com.mathworks.xzheng.advsearching.SecurityFilterTest.java
License:Apache License
protected void setUp() throws Exception { Directory directory = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, new WhitespaceAnalyzer(Version.LUCENE_46)); IndexWriter writer = new IndexWriter(directory, config); Document document = new Document(); // 1 document.add(new Field("owner", // 1 "elwood", // 1 Field.Store.YES, // 1 Field.Index.NOT_ANALYZED)); // 1 document.add(new Field("keywords", // 1 "elwood's sensitive info", // 1 Field.Store.YES, // 1 Field.Index.ANALYZED)); // 1 writer.addDocument(document);/*from w w w . j a v a 2s.c om*/ document = new Document(); // 2 document.add(new Field("owner", // 2 "jake", // 2 Field.Store.YES, // 2 Field.Index.NOT_ANALYZED)); // 2 document.add(new Field("keywords", // 2 "jake's sensitive info", // 2 Field.Store.YES, // 2 Field.Index.ANALYZED)); // 2 writer.addDocument(document); writer.close(); searcher = new IndexSearcher(DirectoryReader.open(directory)); }
From source file:com.mathworks.xzheng.advsearching.SortingExample.java
License:Apache License
public void displayResults(Query query, Sort sort) // #1 throws IOException { IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(directory)); searcher.setDefaultFieldSortScoring(true, false); // #2 TopDocs results = searcher.search(query, null, // #3 20, sort); // #3 System.out.println("\nResults for: " + // #4 query.toString() + " sorted by " + sort); System.out.println(StringUtils.rightPad("Title", 30) + StringUtils.rightPad("pubmonth", 10) + StringUtils.center("id", 4) + StringUtils.center("score", 15)); PrintStream out = new PrintStream(System.out, true, "UTF-8"); // #5 DecimalFormat scoreFormatter = new DecimalFormat("0.######"); for (ScoreDoc sd : results.scoreDocs) { int docID = sd.doc; float score = sd.score; Document doc = searcher.doc(docID); out.println(StringUtils.rightPad( // #6 StringUtils.abbreviate(doc.get("title"), 29), 30) + // #6 StringUtils.rightPad(doc.get("pubmonth"), 10) + // #6 StringUtils.center("" + docID, 4) + // #6 StringUtils.leftPad( // #6 scoreFormatter.format(score), 12)); // #6 out.println(" " + doc.get("category")); //out.println(searcher.explain(query, docID)); // #7 }//from w w w .j av a 2s . com }