List of usage examples for org.apache.lucene.index DirectoryReader open
public static DirectoryReader open(final IndexCommit commit) throws IOException
From source file:com.test.LuceneDemo.java
License:Apache License
@Test public void test() throws IOException, org.apache.lucene.queryparser.classic.ParseException { Analyzer analyzer = new StandardAnalyzer(); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead: //Directory directory = FSDirectory.open("/tmp/testindex"); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter iwriter = new IndexWriter(directory, config); Document doc = new Document(); String text = "This is the text to be indexed."; doc.add(new Field("fieldname", text, TextField.TYPE_STORED)); iwriter.addDocument(doc);/* www.j av a 2 s.c o m*/ iwriter.close(); // Now search the index: DirectoryReader ireader = DirectoryReader.open(directory); IndexSearcher isearcher = new IndexSearcher(ireader); // Parse a simple query that searches for "text": QueryParser parser = new QueryParser("fieldname", analyzer); Query query = parser.parse("indexed"); ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs; assertEquals(1, hits.length); // Iterate through the results: for (int i = 0; i < hits.length; i++) { Document hitDoc = isearcher.doc(hits[i].doc); assertEquals("This is the text to be indexed.", hitDoc.get("fieldname")); } ireader.close(); directory.close(); }
From source file:com.twentyn.patentSearch.DocumentSearch.java
License:Open Source License
public static void main(String[] args) throws Exception { System.out.println("Starting up..."); System.out.flush();/*from ww w . ja v a2 s . c om*/ Options opts = new Options(); opts.addOption(Option.builder("x").longOpt("index").hasArg().required().desc("Path to index file to read") .build()); opts.addOption(Option.builder("h").longOpt("help").desc("Print this help message and exit").build()); opts.addOption(Option.builder("v").longOpt("verbose").desc("Print verbose log output").build()); opts.addOption(Option.builder("f").longOpt("field").hasArg().desc("The indexed field to search").build()); opts.addOption( Option.builder("q").longOpt("query").hasArg().desc("The query to use when searching").build()); opts.addOption(Option.builder("l").longOpt("list-file").hasArg() .desc("A file containing a list of queries to run in sequence").build()); opts.addOption( Option.builder("e").longOpt("enumerate").desc("Enumerate the documents in the index").build()); opts.addOption(Option.builder("d").longOpt("dump").hasArg() .desc("Dump terms in the document index for a specified field").build()); opts.addOption( Option.builder("o").longOpt("output").hasArg().desc("Write results JSON to this file.").build()); opts.addOption(Option.builder("n").longOpt("inchi-field").hasArg() .desc("The index of the InChI field if an input TSV is specified.").build()); opts.addOption(Option.builder("s").longOpt("synonym-field").hasArg() .desc("The index of the chemical synonym field if an input TSV is specified.").build()); HelpFormatter helpFormatter = new HelpFormatter(); CommandLineParser cmdLineParser = new DefaultParser(); CommandLine cmdLine = null; try { cmdLine = cmdLineParser.parse(opts, args); } catch (ParseException e) { System.out.println("Caught exception when parsing command line: " + e.getMessage()); helpFormatter.printHelp("DocumentIndexer", opts); System.exit(1); } if (cmdLine.hasOption("help")) { helpFormatter.printHelp("DocumentIndexer", opts); System.exit(0); } if (!(cmdLine.hasOption("enumerate") || cmdLine.hasOption("dump") || (cmdLine.hasOption("field") && (cmdLine.hasOption("query") || cmdLine.hasOption("list-file"))))) { System.out.println("Must specify one of 'enumerate', 'dump', or 'field' + {'query', 'list-file'}"); helpFormatter.printHelp("DocumentIndexer", opts); System.exit(1); } if (cmdLine.hasOption("verbose")) { // With help from http://stackoverflow.com/questions/23434252/programmatically-change-log-level-in-log4j2 LoggerContext ctx = (LoggerContext) LogManager.getContext(false); Configuration ctxConfig = ctx.getConfiguration(); LoggerConfig logConfig = ctxConfig.getLoggerConfig(LogManager.ROOT_LOGGER_NAME); logConfig.setLevel(Level.DEBUG); ctx.updateLoggers(); LOGGER.debug("Verbose logging enabled"); } ObjectMapper objectMapper = new ObjectMapper(); objectMapper.enable(SerializationFeature.INDENT_OUTPUT); objectMapper.setVisibility(PropertyAccessor.ALL, JsonAutoDetect.Visibility.ANY); LOGGER.info("Opening index at " + cmdLine.getOptionValue("index")); try (Directory indexDir = FSDirectory.open(new File(cmdLine.getOptionValue("index")).toPath()); IndexReader indexReader = DirectoryReader.open(indexDir);) { if (cmdLine.hasOption("enumerate")) { /* Enumerate all documents in the index. * With help from * http://stackoverflow.com/questions/2311845/is-it-possible-to-iterate-through-documents-stored-in-lucene-index */ for (int i = 0; i < indexReader.maxDoc(); i++) { Document doc = indexReader.document(i); LOGGER.info("Doc " + i + ":"); LOGGER.info(doc); } } else if (cmdLine.hasOption("dump")) { /* Dump indexed terms for a specific field. * With help from http://stackoverflow.com/questions/11148036/find-list-of-terms-indexed-by-lucene */ Terms terms = SlowCompositeReaderWrapper.wrap(indexReader).terms(cmdLine.getOptionValue("dump")); LOGGER.info("Has positions: " + terms.hasPositions()); LOGGER.info("Has offsets: " + terms.hasOffsets()); LOGGER.info("Has freqs: " + terms.hasFreqs()); LOGGER.info("Stats: " + terms.getStats()); LOGGER.info(terms); TermsEnum termsEnum = terms.iterator(); BytesRef br = null; while ((br = termsEnum.next()) != null) { LOGGER.info(" " + br.utf8ToString()); } } else { IndexSearcher searcher = new IndexSearcher(indexReader); String field = cmdLine.getOptionValue("field"); List<Pair<String, String>> queries = null; if (cmdLine.hasOption("query")) { queries = Collections.singletonList(Pair.of("", cmdLine.getOptionValue("query"))); } else if (cmdLine.hasOption("list-file")) { if (!(cmdLine.hasOption("inchi-field") && cmdLine.hasOption("synonym-field"))) { LOGGER.error("Must specify both inchi-field and synonym-field when using list-file."); System.exit(1); } Integer inchiField = Integer.parseInt(cmdLine.getOptionValue("inchi-field")); Integer synonymField = Integer.parseInt(cmdLine.getOptionValue("synonym-field")); queries = new LinkedList<>(); BufferedReader r = new BufferedReader(new FileReader(cmdLine.getOptionValue("list-file"))); String line; while ((line = r.readLine()) != null) { line = line.trim(); if (!line.isEmpty()) { // TODO: use a proper TSV reader; this is intentionally terrible as is. String[] fields = line.split("\t"); queries.add(Pair.of(fields[inchiField].replace("\"", ""), fields[synonymField])); } } r.close(); } if (queries == null || queries.size() == 0) { LOGGER.error("Found no queries to run."); return; } List<SearchResult> searchResults = new ArrayList<>(queries.size()); for (Pair<String, String> queryPair : queries) { String inchi = queryPair.getLeft(); String rawQueryString = queryPair.getRight(); /* The Lucene query parser interprets the kind of structural annotations we see in chemical entities * as query directives, which is not what we want at all. Phrase queries seem to work adequately * with the analyzer we're currently using. */ String queryString = rawQueryString.trim().toLowerCase(); String[] parts = queryString.split("\\s+"); PhraseQuery query = new PhraseQuery(); for (String p : parts) { query.add(new Term(field, p)); } LOGGER.info("Running query: " + query.toString()); BooleanQuery bq = new BooleanQuery(); bq.add(query, BooleanClause.Occur.MUST); bq.add(new TermQuery(new Term(field, "yeast")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term(field, "ferment")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term(field, "fermentation")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term(field, "fermentive")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term(field, "saccharomyces")), BooleanClause.Occur.SHOULD); LOGGER.info(" Full query: " + bq.toString()); TopDocs topDocs = searcher.search(bq, 100); ScoreDoc[] scoreDocs = topDocs.scoreDocs; if (scoreDocs.length == 0) { LOGGER.info("Search returned no results."); } List<ResultDocument> results = new ArrayList<>(scoreDocs.length); for (int i = 0; i < scoreDocs.length; i++) { ScoreDoc scoreDoc = scoreDocs[i]; Document doc = indexReader.document(scoreDoc.doc); LOGGER.info("Doc " + i + ": " + scoreDoc.doc + ", score " + scoreDoc.score + ": " + doc.get("id") + ", " + doc.get("title")); results.add(new ResultDocument(scoreDoc.doc, scoreDoc.score, doc.get("title"), doc.get("id"), null)); } LOGGER.info("----- Done with query " + query.toString()); // TODO: reduce memory usage when not writing results to an output file. searchResults.add(new SearchResult(inchi, rawQueryString, bq, results)); } if (cmdLine.hasOption("output")) { try (FileWriter writer = new FileWriter(cmdLine.getOptionValue("output"));) { writer.write(objectMapper.writeValueAsString(searchResults)); } } } } }
From source file:com.twentyn.patentSearch.Searcher.java
License:Open Source License
private void init(List<File> indexDirectories) throws IOException { for (File indexDirectory : indexDirectories) { LOGGER.info("Opening index dir at %s", indexDirectory.getAbsolutePath()); Directory indexDir = FSDirectory.open(indexDirectory.toPath()); IndexReader indexReader = DirectoryReader.open(indexDir); IndexSearcher searcher = new IndexSearcher(indexReader); // Only add to the list if both of these calls work. indexReadersAndSearchers.add(Pair.of(indexReader, searcher)); }/*from ww w . ja va2 s. com*/ }
From source file:com.vnet.demo.service.lucene.LuceneService.java
License:Apache License
public SearchResult<DocumentData> query(String keyword, int start, int number) { SearchResult<DocumentData> searchResult = null; try {//www.j a v a2 s . co m List<DocumentData> documentDatas = new ArrayList<DocumentData>(); DirectoryReader ireader = DirectoryReader.open(index); IndexSearcher isearcher = new IndexSearcher(ireader); Query query = new QueryParser(version, "title", analyzer).parse(keyword + "*"); TopDocs hits = null; if (start > 0) { TopDocs result = isearcher.search(query, start); ScoreDoc scoreDoc = result.scoreDocs[result.scoreDocs.length - 1]; hits = isearcher.searchAfter(scoreDoc, query, number); } else { hits = isearcher.search(query, number); } for (int i = 0; i < hits.scoreDocs.length; i++) { DocumentData data = new DocumentData(); Document hitDoc = isearcher.doc(hits.scoreDocs[i].doc); data.setId(Long.parseLong(hitDoc.get("id"))); data.setTitle(hitDoc.get("title")); data.setSummary(hitDoc.get("summary")); data.setCreateDate(Long.parseLong(hitDoc.get("createDate"))); documentDatas.add(data); } searchResult = new SearchResult<DocumentData>(new Long(hits.totalHits), documentDatas); } catch (ParseException | IOException e) { e.printStackTrace(); } return searchResult; }
From source file:com.work.SearchFiles.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { // String indexPath = "C:/Users/Harish/Desktop/IR/Data/Data English/masc_500k_texts/written/letters/index/one/"; // String queries = "C:/Users/Harish/Desktop/IR/Data/Data English/masc_500k_texts/written/letters/query/query1.txt"; // Analyzer analyzer = new StandardAnalyzer(); //Hindi//from w w w .j a v a2 s .c o m String indexPath = "C:/Users/Harish/Desktop/IR/Data/Hindi Data/hin_corp_unicode/index/one/"; // String queries = "C:/Users/Harish/Desktop/IR/Data/Hindi Data/hin_corp_unicode/query/one.txt"; String queries = null; Analyzer analyzer = new HindiAnalyzer(); //Chinese // Analyzer analyzer = new CJKAnalyzer(); String index = indexPath; String field = "contents"; String a = "???"; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); IndexSearcher searcher = new IndexSearcher(reader); BufferedReader in = null; if (queries != null) { in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8); } else { in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); } QueryParser parser = new QueryParser(field, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } reader.close(); }
From source file:com.xiaomi.linden.hadoop.indexing.keyvalueformat.IntermediateForm.java
License:Apache License
/** * This method is used by the index update combiner and process an * intermediate form into the current intermediate form. More specifically, * the input intermediate forms are a single-document ram index and/or a * single delete term.//from ww w.j a v a 2s . c om * @param form the input intermediate form * @throws IOException */ public void process(IntermediateForm form, FacetsConfig facetsConfig) throws IOException { if (form.dir.ramBytesUsed() > 0 || form.taxoDir.ramBytesUsed() > 0) { if (writer == null) { createWriter(); } if (facetsConfig != null) { DirectoryTaxonomyWriter.OrdinalMap map = new DirectoryTaxonomyWriter.MemoryOrdinalMap(); // merge the taxonomies taxoWriter.addTaxonomy(form.taxoDir, map); int ordinalMap[] = map.getMap(); DirectoryReader reader = DirectoryReader.open(form.dir); try { List<AtomicReaderContext> leaves = reader.leaves(); int numReaders = leaves.size(); AtomicReader wrappedLeaves[] = new AtomicReader[numReaders]; for (int i = 0; i < numReaders; i++) { wrappedLeaves[i] = new OrdinalMappingAtomicReader(leaves.get(i).reader(), ordinalMap, facetsConfig); } writer.addIndexes(new MultiReader(wrappedLeaves)); } finally { reader.close(); } } else { writer.addIndexes(new Directory[] { form.dir }); } numDocs++; } }
From source file:com.xiaomi.linden.hadoop.indexing.reduce.ShardWriter.java
License:Apache License
/** * Process an intermediate form by carrying out, on the Lucene instance of * the shard, the deletes and the inserts (a ram index) in the form. * @param form the intermediate form containing deletes and a ram index * @throws IOException/* w w w.j a v a2 s . co m*/ */ public void process(IntermediateForm form, FacetsConfig facetsConfig) throws IOException { if (facetsConfig != null) { DirectoryTaxonomyWriter.OrdinalMap map = new DirectoryTaxonomyWriter.MemoryOrdinalMap(); // merge the taxonomies taxoWriter.addTaxonomy(form.getTaxoDirectory(), map); int ordinalMap[] = map.getMap(); DirectoryReader reader = DirectoryReader.open(form.getDirectory()); try { List<AtomicReaderContext> leaves = reader.leaves(); int numReaders = leaves.size(); AtomicReader wrappedLeaves[] = new AtomicReader[numReaders]; for (int i = 0; i < numReaders; i++) { wrappedLeaves[i] = new OrdinalMappingAtomicReader(leaves.get(i).reader(), ordinalMap, facetsConfig); } writer.addIndexes(new MultiReader(wrappedLeaves)); } finally { reader.close(); } } else { writer.addIndexes(new Directory[] { form.getDirectory() }); } numForms++; }
From source file:com.xpn.xwiki.plugin.lucene.internal.IndexRebuilder.java
License:Open Source License
public IndexSearcher createSearcher(Directory directory, XWikiContext context) { IndexSearcher searcher = null;/*from ww w . ja v a 2 s .co m*/ try { searcher = new IndexSearcher(DirectoryReader.open(directory)); } catch (Exception e) { LOGGER.error("Faild to create IndexSearcher for Lucene index [{}]", directory, e); } return searcher; }
From source file:com.yahoo.bard.webservice.data.dimension.impl.LuceneSearchProvider.java
License:Apache License
/** * Re-open the Index Searcher, opening it for the first time if it's never been opened. * <p>/*from w ww. ja v a 2 s . co m*/ * This method will attempt to acquire and release a write lock. * * @param firstTimeThrough If true, will write an empty index and will then re-open the searcher */ private void reopenIndexSearcher(boolean firstTimeThrough) { lock.writeLock().lock(); try { // Close the current reader if open if (luceneIndexSearcher != null) { luceneIndexSearcher.getIndexReader().close(); } // Open a new IndexSearcher on a new DirectoryReader luceneIndexSearcher = new IndexSearcher(DirectoryReader.open(luceneDirectory)); } catch (IOException reopenException) { // If there is no index file, this is expected. On the 1st time through, write an empty index and try again if (firstTimeThrough) { IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LUCENE_ANALYZER); try (IndexWriter ignored = new IndexWriter(luceneDirectory, indexWriterConfig)) { // Closed automatically by the try-resource block } catch (IOException emptyIndexWriteException) { // We can't move past this, so puke luceneIndexIsHealthy = false; String message = String.format("Unable to write empty index to %s:", luceneIndexPath); LOG.error(message, emptyIndexWriteException); throw new RuntimeException(emptyIndexWriteException); } reopenIndexSearcher(false); } else { // We've been here before, so puke luceneIndexIsHealthy = false; String message = String.format("Unable to open index searcher for %s:", luceneIndexPath); LOG.error(message, reopenException); throw new RuntimeException(reopenException); } } finally { lock.writeLock().unlock(); } }
From source file:com.zghw.lucene.demo.AssociationsFacetsExample.java
License:Apache License
/** User runs a query and aggregates facets by summing their association values. */ private List<FacetResult> sumAssociations() throws IOException { DirectoryReader indexReader = DirectoryReader.open(indexDir); IndexSearcher searcher = new IndexSearcher(indexReader); TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); FacetsCollector fc = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query: FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, fc); Facets tags = new TaxonomyFacetSumIntAssociations("$tags", taxoReader, config, fc); Facets genre = new TaxonomyFacetSumFloatAssociations("$genre", taxoReader, config, fc); // Retrieve results List<FacetResult> results = new ArrayList<FacetResult>(); results.add(tags.getTopChildren(10, "tags")); results.add(genre.getTopChildren(10, "genre")); indexReader.close();//www .j av a 2s . c o m taxoReader.close(); return results; }