List of usage examples for org.apache.lucene.index DirectoryReader open
public static DirectoryReader open(final IndexCommit commit) throws IOException
From source file:com.mathworks.xzheng.tools.HighlightTest.java
License:Apache License
public void testHits() throws Exception { IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(TestUtil.getBookIndexDirectory())); TermQuery query = new TermQuery(new Term("title", "action")); TopDocs hits = searcher.search(query, 10); QueryScorer scorer = new QueryScorer(query, "title"); Highlighter highlighter = new Highlighter(scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer)); Analyzer analyzer = new SimpleAnalyzer(Version.LUCENE_46); for (ScoreDoc sd : hits.scoreDocs) { Document doc = searcher.doc(sd.doc); String title = doc.get("title"); TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, "title", doc, analyzer);/*from w ww .j a v a 2s .c o m*/ String fragment = highlighter.getBestFragment(stream, title); System.out.println(fragment); } }
From source file:com.mathworks.xzheng.tools.RegexQueryTest.java
License:Apache License
public void testRegexQuery() throws Exception { Directory directory = TestUtil.getBookIndexDirectory(); IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(directory)); RegexQuery q = new RegexQuery(new Term("title", ".*st.*")); TopDocs hits = searcher.search(q, 10); assertEquals(2, hits.totalHits);/*from w ww . j a v a 2 s . c o m*/ assertTrue(TestUtil.hitsIncludeTitle(searcher, hits, "Tapestry in Action")); assertTrue( TestUtil.hitsIncludeTitle(searcher, hits, "Mindstorms: Children, Computers, And Powerful Ideas")); directory.close(); }
From source file:com.mathworks.xzheng.tools.remote.SearchServer.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 1) { System.err.println("Usage: SearchServer <basedir>"); System.exit(-1);// w ww . java 2 s . c o m } String basedir = args[0]; //1 Directory[] dirs = new Directory[ALPHABET.length()]; IndexReader[] searchables = new IndexReader[ALPHABET.length()]; for (int i = 0; i < ALPHABET.length(); i++) { dirs[i] = FSDirectory.open(new File(basedir, "" + ALPHABET.charAt(i))); searchables[i] = DirectoryReader.open(dirs[i]); //2 } LocateRegistry.createRegistry(1099); //3 IndexSearcher multiSearcher = new IndexSearcher(new MultiReader(searchables)); //4 ExecutorService pool = Executors.newFixedThreadPool(nThreads); IndexSearcher multiImpl = //4 new IndexSearcher(multiSearcher); //4 Naming.rebind("//localhost/LIA_Multi", multiImpl); //4 Searcher parallelSearcher = //5 new ParallelMultiSearcher(searchables); //5 RemoteSearchable parallelImpl = //5 new RemoteSearchable(parallelSearcher); //5 Naming.rebind("//localhost/LIA_Parallel", parallelImpl);//5 System.out.println("Server started"); for (int i = 0; i < ALPHABET.length(); i++) { dirs[i].close(); } }
From source file:com.meizu.nlp.classification.utils.DataSplitterTest.java
License:Apache License
public static void assertSplit(LeafReader originalIndex, double testRatio, double crossValidationRatio, String... fieldNames) throws Exception { BaseDirectoryWrapper trainingIndex = newDirectory(); BaseDirectoryWrapper testIndex = newDirectory(); BaseDirectoryWrapper crossValidationIndex = newDirectory(); try {/*from w ww.j a va2 s . c o m*/ DatasetSplitter datasetSplitter = new DatasetSplitter(testRatio, crossValidationRatio); datasetSplitter.split(originalIndex, trainingIndex, testIndex, crossValidationIndex, new MockAnalyzer(random()), fieldNames); assertNotNull(trainingIndex); assertNotNull(testIndex); assertNotNull(crossValidationIndex); DirectoryReader trainingReader = DirectoryReader.open(trainingIndex); assertTrue((int) (originalIndex.maxDoc() * (1d - testRatio - crossValidationRatio)) == trainingReader .maxDoc()); DirectoryReader testReader = DirectoryReader.open(testIndex); assertTrue((int) (originalIndex.maxDoc() * testRatio) == testReader.maxDoc()); DirectoryReader cvReader = DirectoryReader.open(crossValidationIndex); assertTrue((int) (originalIndex.maxDoc() * crossValidationRatio) == cvReader.maxDoc()); trainingReader.close(); testReader.close(); cvReader.close(); closeQuietly(trainingReader); closeQuietly(testReader); closeQuietly(cvReader); } finally { if (trainingIndex != null) { trainingIndex.close(); } if (testIndex != null) { testIndex.close(); } if (crossValidationIndex != null) { crossValidationIndex.close(); } } }
From source file:com.meltmedia.cadmium.search.SearchContentPreprocessor.java
License:Apache License
@Override public synchronized void processFromDirectory(String metaDir) throws Exception { SearchHolder newStagedSearcher = new SearchHolder(); indexDir = new File(metaDir, "lucene-index"); dataDir = new File(metaDir).getParentFile(); newStagedSearcher.directory = new NIOFSDirectory(indexDir); IndexWriter iwriter = null;// w w w .j a v a 2s . c om try { iwriter = new IndexWriter(newStagedSearcher.directory, new IndexWriterConfig(Version.LUCENE_43, analyzer).setRAMBufferSizeMB(5)); iwriter.deleteAll(); writeIndex(iwriter, dataDir); } finally { IOUtils.closeQuietly(iwriter); iwriter = null; } newStagedSearcher.indexReader = DirectoryReader.open(newStagedSearcher.directory); SearchHolder oldStage = stagedSearch; stagedSearch = newStagedSearcher; if (oldStage != null) { oldStage.close(); } log.info("About to call processSearchPreprocessors()"); processSearchPreprocessors(newStagedSearcher.indexReader, analyzer, "content"); }
From source file:com.meltwater.elasticsearch.index.BatchPercolatorService.java
License:Apache License
private Map<String, QueryAndSource> filterQueriesToSearchWith( ConcurrentMap<String, QueryAndSource> percolateQueries, Directory directory) throws IOException { Map<String, QueryAndSource> filteredQueries = new HashMap<>(); try (DirectoryReader reader = DirectoryReader.open(directory)) { for (Map.Entry<String, QueryAndSource> entry : percolateQueries.entrySet()) { if (hasDocumentMatchingFilter(reader, entry.getValue().getLimitingFilter())) { filteredQueries.put(entry.getKey(), entry.getValue()); }/*from ww w . j a va 2 s . com*/ } } return filteredQueries; }
From source file:com.meltwater.elasticsearch.index.BatchPercolatorService.java
License:Apache License
private SearchContext createSearchContext(BatchPercolateShardRequest request, IndexService percolateIndexService, IndexShard indexShard, Directory directory) throws IOException { SearchShardTarget searchShardTarget = new SearchShardTarget(clusterService.localNode().id(), request.shardId().getIndex(), request.shardId().id()); return new DefaultSearchContext(0, new ShardSearchLocalRequest(new ShardId("local_index", 0), 0, SearchType.QUERY_AND_FETCH, null, null, false),/* ww w . ja v a 2 s . c om*/ searchShardTarget, new DocSearcher(new IndexSearcher(DirectoryReader.open(directory))), percolateIndexService, indexShard, scriptService, cacheRecycler, pageCacheRecycler, bigArrays, Counter.newCounter()); }
From source file:com.mikeqian.search.SearchFiles.java
License:Apache License
/** * Simple command-line based search demo. *//*from w w w .j ava 2 s . c o m*/ public static void main(String[] args) throws Exception { String index = System.getProperty("java.io.tmpdir", "tmp") + System.getProperty("file.separator") + "index-dir"; System.out.println(index); String field = "contents"; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); WordConfTools.set("dic.path", "classpath:dic.txt,classpath:dic_CN.txt"); Analyzer analyzer = new ChineseWordAnalyzer(); BufferedReader in = null; if (queries != null) { in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8); } else { in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); } QueryParser parser = new QueryParser(field, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } reader.close(); }
From source file:com.miliworks.virgo.test.LuceneIndexAndSearchDemo.java
License:Apache License
/** * // w w w . ja v a 2 s . co m * ??? * @param args */ public static void main(String[] args) { //Lucene Document?? String fieldName = "text"; // String text = "IK Analyzer???????"; //IKAnalyzer? Analyzer analyzer = new IKAnalyzer(true); Directory directory = null; IndexWriter iwriter = null; IndexReader ireader = null; IndexSearcher isearcher = null; try { // directory = new RAMDirectory(); //?IndexWriterConfig IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_40, analyzer); iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); iwriter = new IndexWriter(directory, iwConfig); // Document doc = new Document(); doc.add(new StringField("ID", "10000", Field.Store.YES)); doc.add(new TextField(fieldName, text, Field.Store.YES)); iwriter.addDocument(doc); iwriter.close(); //?********************************** //? ireader = DirectoryReader.open(directory); isearcher = new IndexSearcher(ireader); String keyword = "?"; //QueryParser?Query QueryParser qp = new QueryParser(Version.LUCENE_40, fieldName, analyzer); qp.setDefaultOperator(QueryParser.AND_OPERATOR); Query query = qp.parse(keyword); System.out.println("Query = " + query); //?5? TopDocs topDocs = isearcher.search(query, 5); System.out.println("" + topDocs.totalHits); // ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (int i = 0; i < topDocs.totalHits; i++) { Document targetDoc = isearcher.doc(scoreDocs[i].doc); System.out.println("" + targetDoc.toString()); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { e.printStackTrace(); } } if (directory != null) { try { directory.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:com.mycompany.lucenedemo.SearchFiles.java
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage] [-sim vsm or bm25]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length == 0 || (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0])))) { System.out.println(usage); System.exit(0);/*ww w . j a v a 2 s . co m*/ } String index = "index"; String field = "contents"; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; SimilarityScore score = SimilarityScore.DEFAULT; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } else if ("-sim".equals(args[i])) { if (args[i + 1].equals("vsm")) { score = SimilarityScore.VSM; } else { score = SimilarityScore.BM25; } } } IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); BufferedReader in = null; if (queries != null) { in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8); } else { in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); } QueryParser parser = new QueryParser(field, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); switch (score) { case DEFAULT: break; case VSM: searcher.setSimilarity(new ClassicSimilarity()); break; case BM25: searcher.setSimilarity(new BM25Similarity()); break; } if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } reader.close(); }