List of usage examples for org.apache.lucene.index DirectoryReader open
public static DirectoryReader open(final IndexCommit commit) throws IOException
From source file:collene.Freedb.java
License:Apache License
public static void DoSearch(Directory directory) throws Exception { out.println("I think these are the files:"); for (String s : directory.listAll()) { out.println(s);//from w w w .j a v a2 s . c om } IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(directory)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9); QueryParser parser = new QueryParser(Version.LUCENE_4_9, "any", analyzer); for (int i = 0; i < 5; i++) { long searchStart = System.currentTimeMillis(); Query query = parser.parse("morrissey"); //Query query = parser.parse("Dance"); TopDocs docs = searcher.search(query, 10); long searchEnd = System.currentTimeMillis(); out.println(String.format("%s %d total hits in %d", directory.getClass().getSimpleName(), docs.totalHits, searchEnd - searchStart)); long lookupStart = System.currentTimeMillis(); for (ScoreDoc d : docs.scoreDocs) { Document doc = searcher.doc(d.doc); out.println(String.format("%d %.2f %d %s", d.doc, d.score, d.shardIndex, doc.getField("any").stringValue())); } long lookupEnd = System.currentTimeMillis(); out.println(String.format("Document lookup took %d ms for %d documents", lookupEnd - lookupStart, docs.scoreDocs.length)); } directory.close(); }
From source file:com.adanac.module.blog.search.LuceneHelper.java
License:Apache License
private static List<Map<String, String>> search(String searchText, String path, String title, LoadQuery loadQuery) {/*from w w w .j av a2 s . c o m*/ try { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(INDEX_PATH + path))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new SmartChineseAnalyzer(); QueryParser parser = new QueryParser("indexedContent", analyzer); Query query = parser.parse(searchText); TopDocs resultDocs = searcher.search(query, 100); ScoreDoc[] scoreDocs = resultDocs.scoreDocs; // SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<span style=\"color:red;\">", "</span>"); Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(150)); List<Map<String, String>> result = new ArrayList<>(); List<Integer> idList = new ArrayList<>(); for (int i = 0; i < scoreDocs.length; i++) { Document doc = searcher.doc(scoreDocs[i].doc); Integer id = Integer.valueOf(doc.get("id")); if (!idList.contains(id)) { String indexedContent = doc.get("indexedContent"); TokenStream tokenStream = analyzer.tokenStream("indexedContent", indexedContent); Map<String, String> data = loadQuery.getById(id); String highlighterString = highlighter.getBestFragment(tokenStream, indexedContent); if (highlighterString.contains(SEPARATOR)) { String[] array = highlighterString.split(SEPARATOR); data.put(title, array[0]); if (array.length > 1) { data.put("summary", array[1]); } } else { data.put("summary", highlighterString); } result.add(data); idList.add(id); } } return result; } catch (Exception e) { logger.error("search failed ...", e); } return new ArrayList<>(); }
From source file:com.admarketplace.isg.lucene.demo.SearchFiles.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length <= 0 || ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);//from w w w .jav a 2s . co m } String index = "index"; String field = "contents"; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); BufferedReader in = null; if (queries != null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8")); } else { in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, null, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } reader.close(); }
From source file:com.aurel.track.lucene.search.LuceneSearcher.java
License:Open Source License
/** * Prepares an Indexsearcher object for an Index * @param index/*from ww w . j a v a 2 s . c o m*/ * @return */ public static IndexSearcher getIndexSearcher(int index) { Directory indexDir = LuceneUtil.getIndexDirectory(index); if (indexDir == null) { LOGGER.error("The index directory for " + index + " doesn't exist or is not a directory"); return null; } //initialize the searcher //we will initialize always a new searcher because the data should be up to date /* * Lucene FAQ: * 1. Make sure to open a new IndexSearcher after adding documents. * An IndexSearcher will only see the documents that were in the index in the moment it was opened. * 2. It is recommended to use only one IndexSearcher from all threads in order to save memory * Thanks for your help :) */ IndexSearcher is = null; try { IndexReader indexReader = DirectoryReader.open(indexDir); is = new IndexSearcher(indexReader); } catch (IOException e) { LOGGER.warn("Initializing the IndexSearcher for index " + index + " failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); } return is; }
From source file:com.b2international.index.lucene.NullIndexSearcher.java
License:Apache License
private static DirectoryReader createRamReader() throws IOException { final RAMDirectory directory = new RAMDirectory(); if (!DirectoryReader.indexExists(directory)) { final IndexWriterConfig conf = new IndexWriterConfig(new WhitespaceAnalyzer()); final IndexWriter writer = new IndexWriter(directory, conf); writer.commit();/*from w w w.j av a 2 s. com*/ writer.close(); } return DirectoryReader.open(directory); }
From source file:com.bah.lucene.blockcache_v2.CacheDirectoryTest.java
License:Apache License
@Test public void test3() throws IOException, InterruptedException { // Thread.sleep(30000); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer()); IndexWriter writer = new IndexWriter(_cacheDirectory, conf); int docs = 100000; for (int i = 0; i < docs; i++) { if (i % 500 == 0) { System.out.println(i); }//from w w w .ja v a 2s. c om writer.addDocument(newDoc()); // Thread.sleep(1); } writer.close(); System.out.println("done writing"); DirectoryReader reader = DirectoryReader.open(_cacheDirectory); System.out.println("done opening"); assertEquals(docs, reader.numDocs()); Document document = reader.document(0); System.out.println("done fetching"); System.out.println(document); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("test", "test")), 10); System.out.println("done searching"); assertEquals(docs, topDocs.totalHits); reader.close(); }
From source file:com.basistech.lucene.tools.LuceneQueryTool.java
License:Apache License
public static void main(String[] args) throws IOException, org.apache.lucene.queryparser.classic.ParseException { String charsetName = Charset.defaultCharset().name(); if (!"UTF-8".equals(charsetName)) { // Really only a problem on mac, where the default charset is MacRoman, // and it cannot be changed via the system Locale. System.err.println(String.format("defaultCharset is %s, but we require UTF-8.", charsetName)); System.err.println("Set -Dfile.encoding=UTF-8 on the Java command line, or"); System.err.println("set JAVA_TOOL_OPTIONS=-Dfile.encoding=UTF-8 in the environment."); System.exit(1);/* w w w. j a va2 s . c o m*/ } Options options = LuceneQueryTool.createOptions(); CommandLineParser parser = new GnuParser(); CommandLine cmdline = null; try { cmdline = parser.parse(options, args); validateOptions(options, args); } catch (org.apache.commons.cli.ParseException e) { System.err.println(e.getMessage()); usage(options); System.exit(1); } String[] remaining = cmdline.getArgs(); if (remaining != null && remaining.length > 0) { System.err.println("unknown extra args found: " + Lists.newArrayList(remaining)); usage(options); System.exit(1); } String[] indexPaths = cmdline.getOptionValues("index"); IndexReader[] readers = new IndexReader[indexPaths.length]; for (int i = 0; i < indexPaths.length; i++) { Path path = FileSystems.getDefault().getPath(indexPaths[i]); readers[i] = DirectoryReader.open(FSDirectory.open(path)); } IndexReader reader = new MultiReader(readers, true); LuceneQueryTool that = new LuceneQueryTool(reader); String opt; opt = cmdline.getOptionValue("query-limit"); if (opt != null) { that.setQueryLimit(Integer.parseInt(opt)); } opt = cmdline.getOptionValue("output-limit"); if (opt != null) { that.setOutputLimit(Integer.parseInt(opt)); } opt = cmdline.getOptionValue("analyzer"); if (opt != null) { that.setAnalyzer(opt); } opt = cmdline.getOptionValue("query-field"); if (opt != null) { that.setDefaultField(opt); } opt = cmdline.getOptionValue("output"); PrintStream out = null; if (opt != null) { out = new PrintStream(new FileOutputStream(new File(opt)), true); that.setOutputStream(out); } if (cmdline.hasOption("show-id")) { that.setShowId(true); } if (cmdline.hasOption("show-hits")) { that.setShowHits(true); } if (cmdline.hasOption("show-score")) { that.setShowScore(true); } if (cmdline.hasOption("sort-fields")) { that.setSortFields(true); } boolean suppressNames = cmdline.hasOption("suppress-names"); Formatter.Format format = Formatter.Format.MULTILINE; opt = cmdline.getOptionValue("format"); if (opt != null) { format = Formatter.Format.fromName(opt); } if (cmdline.hasOption("tabular")) { // compatibility option format = Formatter.Format.TABULAR; } that.setFormatter(Formatter.newInstance(format, suppressNames)); String[] opts; opts = cmdline.getOptionValues("fields"); if (opts != null) { that.setFieldNames(Lists.newArrayList(opts)); } opt = cmdline.getOptionValue("regex"); if (opt != null) { Pattern p = Pattern.compile("^(.*?):/(.*)/$"); Matcher m = p.matcher(opt); if (m.matches()) { that.setRegex(m.group(1), Pattern.compile(m.group(2))); } else { System.err.println("Invalid regex, should be field:/regex/"); usage(options); System.exit(1); } } opts = cmdline.getOptionValues("query"); that.run(opts); if (out != null) { out.close(); } reader.close(); }
From source file:com.basistech.lucene.tools.LuceneQueryToolTest.java
License:Apache License
@BeforeClass public static void oneTimeSetup() throws IOException, ParseException { LuceneQueryToolTest.showOutput = false; // for debugging tests Directory dir = new RAMDirectory(); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(dir, config); Document doc = new Document(); doc.add(new Field("longest-mention", "Bill Clinton", StringField.TYPE_STORED)); doc.add(new Field("context", "Hillary Clinton Arkansas", TextField.TYPE_NOT_STORED)); writer.addDocument(doc);/*from w w w . java2s. c om*/ doc = new Document(); doc.add(new Field("longest-mention", "George W. Bush", StringField.TYPE_STORED)); doc.add(new Field("context", "Texas Laura Bush", TextField.TYPE_NOT_STORED)); writer.addDocument(doc); doc = new Document(); doc.add(new Field("longest-mention", "George H. W. Bush", StringField.TYPE_STORED)); doc.add(new Field("context", "Barbara Bush Texas", TextField.TYPE_NOT_STORED)); writer.addDocument(doc); doc = new Document(); doc.add(new Field("bbb", "foo", StringField.TYPE_STORED)); doc.add(new Field("bbb", "bar", StringField.TYPE_STORED)); doc.add(new Field("aaa", "foo", StringField.TYPE_STORED)); FieldType typeUnindexed = new FieldType(StringField.TYPE_STORED); typeUnindexed.setIndexOptions(IndexOptions.NONE); doc.add(new Field("zzz", "foo", typeUnindexed)); writer.addDocument(doc); writer.close(); reader = DirectoryReader.open(dir); }
From source file:com.basistech.lucene.tools.LuceneQueryToolTest.java
License:Apache License
@Test public void testBinaryField() throws IOException, ParseException { Directory dir = new RAMDirectory(); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(dir, config); Document doc = new Document(); doc.add(new Field("id", "1", StringField.TYPE_STORED)); doc.add(new Field("binary-field", "ABC".getBytes(Charsets.UTF_8), StoredField.TYPE)); writer.addDocument(doc);/*w ww . ja v a 2 s . c o m*/ writer.close(); reader = DirectoryReader.open(dir); ByteArrayOutputStream bytes = new ByteArrayOutputStream(); PrintStream out = new PrintStream(bytes); LuceneQueryTool lqt = new LuceneQueryTool(reader, out); lqt.run(new String[] { "id:1" }); String result = Joiner.on('\n').join(getOutput(bytes)); assertTrue(result.contains("0x414243")); // binary rep of "ABC" }
From source file:com.bdaum.zoom.lal.internal.LireActivator.java
License:Open Source License
public IndexReader createBaseIndexReader(File indexPath) throws IOException { return DirectoryReader.open(SimpleFSDirectory.open(indexPath.toPath())); }