List of usage examples for org.apache.lucene.index IndexReader close
@Override public final synchronized void close() throws IOException
From source file:invertedindex.SearchIndex.java
public ArrayList<SearchResults> multipleSearch(String keyword1, String keyword2, String radio) throws IOException { String indexLocation = this.getIndexLocation(); try {/*from ww w. j a va 2s. c o m*/ IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation))); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(topDocs, true); String query1 = keyword1; String query2 = keyword2; query1 = "\"" + query1 + "\""; query2 = "\"" + query2 + "\""; Query q1 = new QueryParser(Version.LUCENE_47, "contents", analyzer).parse(query1); Query q2 = new QueryParser(Version.LUCENE_47, "contents", analyzer).parse(query2); BooleanQuery apiQuery = new BooleanQuery(); if (radio.equalsIgnoreCase("and")) { apiQuery.add(q1, BooleanClause.Occur.MUST); apiQuery.add(q2, BooleanClause.Occur.MUST); } else if (radio.equalsIgnoreCase("or")) { apiQuery.add(q1, BooleanClause.Occur.SHOULD); apiQuery.add(q2, BooleanClause.Occur.SHOULD); } else if (radio.equalsIgnoreCase("not")) { apiQuery.add(q1, BooleanClause.Occur.MUST); apiQuery.add(q2, BooleanClause.Occur.MUST_NOT); } SimpleFragListBuilder fragListBuilder = new SimpleFragListBuilder(); ScoreOrderFragmentsBuilder fragBuilder = new ScoreOrderFragmentsBuilder(); FastVectorHighlighter fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT, FastVectorHighlighter.DEFAULT_FIELD_MATCH, fragListBuilder, fragBuilder); fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT, FastVectorHighlighter.DEFAULT_FIELD_MATCH, fragListBuilder, fragBuilder); searcher.search(apiQuery, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; System.out.println("Found " + hits.length + " hits."); totalHits = hits.length; searchResulsAL = new ArrayList<>(); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; FieldQuery fq = fvh.getFieldQuery(apiQuery); // String[] fragments = fvh.getBestFragments(fq, searcher.getIndexReader(), docId, "contents", 50, 10); Document d = searcher.doc(docId); // String filePath = d.get("path"); for (int j = 0; j < fragments.length; j++) { String temp = Jsoup.parse(fragments[j]).text(); // LineNumberSearcher lns = new LineNumberSearcher(); //lineNumbersList = new ArrayList<>(); lineNumber = "null"; lineNumberArrayList = new ArrayList<>(); boolean g = Pattern.compile("\\n").matcher(fragments[j]).find(); if (!g) { // System.out.println("NO G g"); lineNumbersList = lns.search(temp, filePath); // for(String s : lineNumbersList){ // System.out.println("s is "+s); // } // if (!lineNumbersList.isEmpty()) { // System.out.println("in line number"); lineNumber = lineNumbersList.get(0); } } fragments[j] = fragments[j].replaceAll("\\n", " "); // System.out.println("\t\t" + fragments[j] + "..."); fragments[j] = fragments[j] + " ...."; if (!(lineNumber.equals("null"))) { // System.out.println("in line number"); fragments[j] = fragments[j] + " at Line " + lineNumber; } } SearchResults sr = new SearchResults(); sr.setFilename(d.get("filename")); sr.setScore(hits[i].score); sr.setFragments(fragments); sr.setPath(filePath); sr.setContentType(d.get("contentType")); searchResulsAL.add(sr); } reader.close(); } catch (Exception e) { System.out.println("Error searching in search index " + e + " : " + e.getMessage()); } return searchResulsAL; }
From source file:io.anserini.integration.IndexerTest.java
License:Apache License
@Test public void testReadingPostings() throws Exception { Directory dir = FSDirectory.open(tempDir1); IndexReader reader = DirectoryReader.open(dir); assertEquals(3, reader.numDocs());/* www . j a va 2 s. co m*/ assertEquals(1, reader.leaves().size()); System.out.println("Dumping out postings..."); dumpPostings(reader); assertEquals(2, reader.docFreq(new Term("text", "here"))); assertEquals(2, reader.docFreq(new Term("text", "more"))); assertEquals(1, reader.docFreq(new Term("text", "some"))); assertEquals(1, reader.docFreq(new Term("text", "test"))); assertEquals(2, reader.docFreq(new Term("text", "text"))); reader.close(); }
From source file:io.anserini.integration.IndexerTest.java
License:Apache License
@Test public void testCloneIndex() throws Exception { System.out.println("Cloning index:"); Directory dir1 = FSDirectory.open(tempDir1); IndexReader reader = DirectoryReader.open(dir1); Directory dir2 = FSDirectory.open(tempDir2); IndexWriterConfig config = new IndexWriterConfig(new EnglishAnalyzer()); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir2, config); LeafReader leafReader = reader.leaves().get(0).reader(); CodecReader codecReader = SlowCodecReaderWrapper.wrap(leafReader); writer.addIndexes(new MyFilterCodecReader(codecReader)); writer.commit();/* w w w. jav a 2 s .co m*/ writer.forceMerge(1); writer.close(); reader.close(); // Open up the cloned index and verify it. reader = DirectoryReader.open(dir2); assertEquals(3, reader.numDocs()); assertEquals(1, reader.leaves().size()); System.out.println("Dumping out postings..."); dumpPostings(reader); assertEquals(2, reader.docFreq(new Term("text", "here"))); assertEquals(2, reader.docFreq(new Term("text", "more"))); assertEquals(1, reader.docFreq(new Term("text", "some"))); assertEquals(1, reader.docFreq(new Term("text", "test"))); assertEquals(2, reader.docFreq(new Term("text", "text"))); reader.close(); }
From source file:io.anserini.search.SearchTweets.java
License:Apache License
public static void main(String[] args) throws Exception { long curTime = System.nanoTime(); SearchArgs searchArgs = new SearchArgs(); CmdLineParser parser = new CmdLineParser(searchArgs, ParserProperties.defaults().withUsageWidth(90)); try {//ww w. j a v a 2s. c o m parser.parseArgument(args); } catch (CmdLineException e) { System.err.println(e.getMessage()); parser.printUsage(System.err); System.err.println("Example: SearchTweets" + parser.printExample(OptionHandlerFilter.REQUIRED)); return; } LOG.info("Reading index at " + searchArgs.index); Directory dir; if (searchArgs.inmem) { LOG.info("Using MMapDirectory with preload"); dir = new MMapDirectory(Paths.get(searchArgs.index)); ((MMapDirectory) dir).setPreload(true); } else { LOG.info("Using default FSDirectory"); dir = FSDirectory.open(Paths.get(searchArgs.index)); } IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); if (searchArgs.ql) { LOG.info("Using QL scoring model"); searcher.setSimilarity(new LMDirichletSimilarity(searchArgs.mu)); } else if (searchArgs.bm25) { LOG.info("Using BM25 scoring model"); searcher.setSimilarity(new BM25Similarity(searchArgs.k1, searchArgs.b)); } else { LOG.error("Error: Must specify scoring model!"); System.exit(-1); } RerankerCascade cascade = new RerankerCascade(); if (searchArgs.rm3) { cascade.add(new Rm3Reranker(IndexTweets.ANALYZER, StatusField.TEXT.name, "src/main/resources/io/anserini/rerank/rm3/rm3-stoplist.twitter.txt")); cascade.add(new RemoveRetweetsTemporalTiebreakReranker()); } else { cascade.add(new RemoveRetweetsTemporalTiebreakReranker()); } MicroblogTopicSet topics = MicroblogTopicSet.fromFile(new File(searchArgs.topics)); PrintStream out = new PrintStream(new FileOutputStream(new File(searchArgs.output))); LOG.info("Writing output to " + searchArgs.output); LOG.info("Initialized complete! (elapsed time = " + (System.nanoTime() - curTime) / 1000000 + "ms)"); long totalTime = 0; int cnt = 0; for (MicroblogTopic topic : topics) { long curQueryTime = System.nanoTime(); Filter filter = NumericRangeFilter.newLongRange(StatusField.ID.name, 0L, topic.getQueryTweetTime(), true, true); Query query = AnalyzerUtils.buildBagOfWordsQuery(StatusField.TEXT.name, IndexTweets.ANALYZER, topic.getQuery()); TopDocs rs = searcher.search(query, filter, searchArgs.hits); RerankerContext context = new RerankerContext(searcher, query, topic.getId(), topic.getQuery(), Sets.newHashSet(AnalyzerUtils.tokenize(IndexTweets.ANALYZER, topic.getQuery())), filter); ScoredDocuments docs = cascade.run(ScoredDocuments.fromTopDocs(rs, searcher), context); for (int i = 0; i < docs.documents.length; i++) { String qid = topic.getId().replaceFirst("^MB0*", ""); out.println(String.format("%s Q0 %s %d %f %s", qid, docs.documents[i].getField(StatusField.ID.name).numericValue(), (i + 1), docs.scores[i], searchArgs.runtag)); } long qtime = (System.nanoTime() - curQueryTime) / 1000000; LOG.info("Query " + topic.getId() + " (elapsed time = " + qtime + "ms)"); totalTime += qtime; cnt++; } LOG.info("All queries completed!"); LOG.info("Total elapsed time = " + totalTime + "ms"); LOG.info("Average query latency = " + (totalTime / cnt) + "ms"); reader.close(); out.close(); }
From source file:io.crate.execution.engine.collect.collectors.LuceneOrderedDocCollectorTest.java
License:Apache License
@Test public void testSearchAfterQueriesNullsLast() throws Exception { Directory index = createLuceneIndex(); IndexReader reader = DirectoryReader.open(index); // reverseOrdering = false, nulls First = false // 1 2 null null // ^ (lastCollected = 2) FieldDoc afterDoc = new FieldDoc(0, 0, new Object[] { 2L }); Long[] result = nextPageQuery(reader, afterDoc, false, null); assertThat(result, is(new Long[] { 2L, null, null })); // reverseOrdering = false, nulls First = false // 1 2 null null // ^/*from w w w.j a v a 2s.com*/ afterDoc = new FieldDoc(0, 0, new Object[] { LuceneMissingValue.missingValue(false, null, SortField.Type.LONG) }); result = nextPageQuery(reader, afterDoc, false, null); assertThat(result, is(new Long[] { null, null })); // reverseOrdering = true, nulls First = false // 2 1 null null // ^ afterDoc = new FieldDoc(0, 0, new Object[] { 1L }); result = nextPageQuery(reader, afterDoc, true, false); assertThat(result, is(new Long[] { 1L, null, null })); // reverseOrdering = true, nulls First = false // 2 1 null null // ^ afterDoc = new FieldDoc(0, 0, new Object[] { LuceneMissingValue.missingValue(true, false, SortField.Type.LONG) }); result = nextPageQuery(reader, afterDoc, true, false); assertThat(result, is(new Long[] { null, null })); reader.close(); }
From source file:io.crate.execution.engine.collect.collectors.LuceneOrderedDocCollectorTest.java
License:Apache License
@Test public void testSearchAfterQueriesNullsFirst() throws Exception { Directory index = createLuceneIndex(); IndexReader reader = DirectoryReader.open(index); // reverseOrdering = false, nulls First = true // null, null, 1, 2 // ^ (lastCollected = 2L) FieldDoc afterDoc = new FieldDoc(0, 0, new Object[] { 2L }); Long[] result = nextPageQuery(reader, afterDoc, false, true); assertThat(result, is(new Long[] { 2L })); // reverseOrdering = false, nulls First = true // null, null, 1, 2 // ^/*w w w . j a va 2s.co m*/ afterDoc = new FieldDoc(0, 0, new Object[] { LuceneMissingValue.missingValue(false, true, SortField.Type.LONG) }); result = nextPageQuery(reader, afterDoc, false, true); assertThat(result, is(new Long[] { null, null, 1L, 2L })); // reverseOrdering = true, nulls First = true // null, null, 2, 1 // ^ afterDoc = new FieldDoc(0, 0, new Object[] { 1L }); result = nextPageQuery(reader, afterDoc, true, true); assertThat(result, is(new Long[] { 1L })); // reverseOrdering = true, nulls First = true // null, null, 2, 1 // ^ afterDoc = new FieldDoc(0, 0, new Object[] { LuceneMissingValue.missingValue(true, true, SortField.Type.LONG) }); result = nextPageQuery(reader, afterDoc, true, true); assertThat(result, is(new Long[] { null, null, 2L, 1L })); reader.close(); }
From source file:io.crate.operation.collect.LuceneDocCollectorTest.java
License:Apache License
@Test public void testSearchAfterQueriesNullsLast() throws Exception { Directory index = createLuceneIndex(); IndexReader reader = DirectoryReader.open(index); // reverseOrdering = false, nulls First = false // 1 2 null null // ^ (lastCollected = 2) FieldDoc afterDoc = new FieldDoc(0, 0, new Object[] { 2L }); Long[] result = nextPageQuery(reader, afterDoc, false, null); assertThat(result, is(new Long[] { 2L, null, null })); // reverseOrdering = false, nulls First = false // 1 2 null null // ^// w w w .j a v a 2 s . c o m afterDoc = new FieldDoc(0, 0, new Object[] { LuceneMissingValue.missingValue(false, null, SortField.Type.LONG) }); result = nextPageQuery(reader, afterDoc, false, null); assertThat(result, is(new Long[] { null, null })); // reverseOrdering = true, nulls First = false // 2 1 null null // ^ afterDoc = new FieldDoc(0, 0, new Object[] { 1L }); result = nextPageQuery(reader, afterDoc, true, null); assertThat(result, is(new Long[] { null, null, 1L })); // reverseOrdering = true, nulls First = false // 2 1 null null // ^ afterDoc = new FieldDoc(0, 0, new Object[] { LuceneMissingValue.missingValue(true, null, SortField.Type.LONG) }); result = nextPageQuery(reader, afterDoc, true, null); assertThat(result, is(new Long[] { null, null })); reader.close(); }
From source file:io.datalayer.lucene.delete.LuceneDeleteTest.java
License:Apache License
@Test public void testDelete() throws IOException { IndexWriter writer = AosIndexUtil.newIndexWithDocuments(); Term term = new Term(ID, "1"); Query query = new TermQuery(term); IndexReader reader = DirectoryReader.open(writer, true); IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(writer, true)); TopDocs topDocs = indexSearcher.search(query, 1); LOGGER.info("" + topDocs.scoreDocs[0].doc); assertNotNull(reader.document(topDocs.scoreDocs[0].doc)); LOGGER.info("Deleting documents containing " + term); writer.deleteDocuments(term);//from w ww. j av a 2 s. c om // writer.deleteDocuments(query); writer.commit(); indexSearcher = new IndexSearcher(DirectoryReader.open(writer, true)); topDocs = indexSearcher.search(query, 1); assertEquals(0, topDocs.scoreDocs.length); reader.close(); writer.close(); }
From source file:io.datalayer.lucene.frequency.AosFrequencyTerms.java
License:Apache License
public static void main(String... args) throws Exception { IndexReader reader = null; FSDirectory dir = null;/*from ww w . j a va2 s . c om*/ String field = null; boolean IncludeTermFreqs = false; if (args.length == 0 || args.length > 4) { usage(); System.exit(1); } if (args.length > 0) { dir = FSDirectory.open(new File(args[0])); } for (int i = 1; i < args.length; i++) { if (args[i].equals("-t")) { IncludeTermFreqs = true; } else { try { numTerms = Integer.parseInt(args[i]); } catch (NumberFormatException e) { field = args[i]; } } } String[] fields = field != null ? new String[] { field } : null; reader = DirectoryReader.open(dir); AosTermStats[] terms = getHighFreqTerms(reader, numTerms, fields); if (!IncludeTermFreqs) { // default HighFreqTerms behavior for (int i = 0; i < terms.length; i++) { System.out.printf("%s:%s %,d \n", terms[i].field, terms[i].termtext.utf8ToString(), terms[i].docFreq); } } else { AosTermStats[] termsWithTF = sortByTotalTermFreq(reader, terms); for (int i = 0; i < termsWithTF.length; i++) { System.out.printf("%s:%s \t totalTF = %,d \t doc freq = %,d \n", termsWithTF[i].field, termsWithTF[i].termtext.utf8ToString(), termsWithTF[i].totalTermFreq, termsWithTF[i].docFreq); } } reader.close(); }
From source file:io.datalayer.lucene.index.LuceneLifecycleTest.java
License:Apache License
@Test public void testReader() throws IOException { IndexReader reader = DirectoryReader.open(directory); assertEquals(ids.length, reader.maxDoc()); assertEquals(ids.length, reader.numDocs()); reader.close(); }