Example usage for org.apache.lucene.index DirectoryReader open

List of usage examples for org.apache.lucene.index DirectoryReader open

Introduction

In this page you can find the example usage for org.apache.lucene.index DirectoryReader open.

Prototype

public static DirectoryReader open(final IndexCommit commit) throws IOException 

Source Link

Document

Expert: returns an IndexReader reading the index in the given IndexCommit .

Usage

From source file:com.test.LuceneDemo.java

License:Apache License

@Test
public void test() throws IOException, org.apache.lucene.queryparser.classic.ParseException {
    Analyzer analyzer = new StandardAnalyzer();

    // Store the index in memory:
    Directory directory = new RAMDirectory();
    // To store an index on disk, use this instead:
    //Directory directory = FSDirectory.open("/tmp/testindex");
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    IndexWriter iwriter = new IndexWriter(directory, config);
    Document doc = new Document();
    String text = "This is the text to be indexed.";
    doc.add(new Field("fieldname", text, TextField.TYPE_STORED));
    iwriter.addDocument(doc);/* www.j  av  a  2 s.c o m*/
    iwriter.close();

    // Now search the index:
    DirectoryReader ireader = DirectoryReader.open(directory);
    IndexSearcher isearcher = new IndexSearcher(ireader);
    // Parse a simple query that searches for "text":
    QueryParser parser = new QueryParser("fieldname", analyzer);
    Query query = parser.parse("indexed");
    ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs;
    assertEquals(1, hits.length);
    // Iterate through the results:
    for (int i = 0; i < hits.length; i++) {
        Document hitDoc = isearcher.doc(hits[i].doc);
        assertEquals("This is the text to be indexed.", hitDoc.get("fieldname"));
    }
    ireader.close();
    directory.close();
}

From source file:com.twentyn.patentSearch.DocumentSearch.java

License:Open Source License

public static void main(String[] args) throws Exception {
    System.out.println("Starting up...");
    System.out.flush();/*from   ww  w . ja  v  a2 s . c om*/
    Options opts = new Options();
    opts.addOption(Option.builder("x").longOpt("index").hasArg().required().desc("Path to index file to read")
            .build());
    opts.addOption(Option.builder("h").longOpt("help").desc("Print this help message and exit").build());
    opts.addOption(Option.builder("v").longOpt("verbose").desc("Print verbose log output").build());

    opts.addOption(Option.builder("f").longOpt("field").hasArg().desc("The indexed field to search").build());
    opts.addOption(
            Option.builder("q").longOpt("query").hasArg().desc("The query to use when searching").build());
    opts.addOption(Option.builder("l").longOpt("list-file").hasArg()
            .desc("A file containing a list of queries to run in sequence").build());
    opts.addOption(
            Option.builder("e").longOpt("enumerate").desc("Enumerate the documents in the index").build());
    opts.addOption(Option.builder("d").longOpt("dump").hasArg()
            .desc("Dump terms in the document index for a specified field").build());
    opts.addOption(
            Option.builder("o").longOpt("output").hasArg().desc("Write results JSON to this file.").build());
    opts.addOption(Option.builder("n").longOpt("inchi-field").hasArg()
            .desc("The index of the InChI field if an input TSV is specified.").build());
    opts.addOption(Option.builder("s").longOpt("synonym-field").hasArg()
            .desc("The index of the chemical synonym field if an input TSV is specified.").build());

    HelpFormatter helpFormatter = new HelpFormatter();
    CommandLineParser cmdLineParser = new DefaultParser();
    CommandLine cmdLine = null;
    try {
        cmdLine = cmdLineParser.parse(opts, args);
    } catch (ParseException e) {
        System.out.println("Caught exception when parsing command line: " + e.getMessage());
        helpFormatter.printHelp("DocumentIndexer", opts);
        System.exit(1);
    }

    if (cmdLine.hasOption("help")) {
        helpFormatter.printHelp("DocumentIndexer", opts);
        System.exit(0);
    }

    if (!(cmdLine.hasOption("enumerate") || cmdLine.hasOption("dump") || (cmdLine.hasOption("field")
            && (cmdLine.hasOption("query") || cmdLine.hasOption("list-file"))))) {
        System.out.println("Must specify one of 'enumerate', 'dump', or 'field' + {'query', 'list-file'}");
        helpFormatter.printHelp("DocumentIndexer", opts);
        System.exit(1);
    }

    if (cmdLine.hasOption("verbose")) {
        // With help from http://stackoverflow.com/questions/23434252/programmatically-change-log-level-in-log4j2
        LoggerContext ctx = (LoggerContext) LogManager.getContext(false);
        Configuration ctxConfig = ctx.getConfiguration();
        LoggerConfig logConfig = ctxConfig.getLoggerConfig(LogManager.ROOT_LOGGER_NAME);
        logConfig.setLevel(Level.DEBUG);

        ctx.updateLoggers();
        LOGGER.debug("Verbose logging enabled");
    }

    ObjectMapper objectMapper = new ObjectMapper();
    objectMapper.enable(SerializationFeature.INDENT_OUTPUT);
    objectMapper.setVisibility(PropertyAccessor.ALL, JsonAutoDetect.Visibility.ANY);

    LOGGER.info("Opening index at " + cmdLine.getOptionValue("index"));

    try (Directory indexDir = FSDirectory.open(new File(cmdLine.getOptionValue("index")).toPath());
            IndexReader indexReader = DirectoryReader.open(indexDir);) {
        if (cmdLine.hasOption("enumerate")) {
            /* Enumerate all documents in the index.
             * With help from
             * http://stackoverflow.com/questions/2311845/is-it-possible-to-iterate-through-documents-stored-in-lucene-index
             */
            for (int i = 0; i < indexReader.maxDoc(); i++) {
                Document doc = indexReader.document(i);
                LOGGER.info("Doc " + i + ":");
                LOGGER.info(doc);
            }
        } else if (cmdLine.hasOption("dump")) {
            /* Dump indexed terms for a specific field.
             * With help from http://stackoverflow.com/questions/11148036/find-list-of-terms-indexed-by-lucene */
            Terms terms = SlowCompositeReaderWrapper.wrap(indexReader).terms(cmdLine.getOptionValue("dump"));
            LOGGER.info("Has positions: " + terms.hasPositions());
            LOGGER.info("Has offsets:   " + terms.hasOffsets());
            LOGGER.info("Has freqs:     " + terms.hasFreqs());
            LOGGER.info("Stats:         " + terms.getStats());
            LOGGER.info(terms);
            TermsEnum termsEnum = terms.iterator();
            BytesRef br = null;
            while ((br = termsEnum.next()) != null) {
                LOGGER.info("  " + br.utf8ToString());
            }

        } else {
            IndexSearcher searcher = new IndexSearcher(indexReader);
            String field = cmdLine.getOptionValue("field");

            List<Pair<String, String>> queries = null;
            if (cmdLine.hasOption("query")) {
                queries = Collections.singletonList(Pair.of("", cmdLine.getOptionValue("query")));
            } else if (cmdLine.hasOption("list-file")) {
                if (!(cmdLine.hasOption("inchi-field") && cmdLine.hasOption("synonym-field"))) {
                    LOGGER.error("Must specify both inchi-field and synonym-field when using list-file.");
                    System.exit(1);
                }
                Integer inchiField = Integer.parseInt(cmdLine.getOptionValue("inchi-field"));
                Integer synonymField = Integer.parseInt(cmdLine.getOptionValue("synonym-field"));

                queries = new LinkedList<>();
                BufferedReader r = new BufferedReader(new FileReader(cmdLine.getOptionValue("list-file")));
                String line;
                while ((line = r.readLine()) != null) {
                    line = line.trim();
                    if (!line.isEmpty()) {
                        // TODO: use a proper TSV reader; this is intentionally terrible as is.
                        String[] fields = line.split("\t");
                        queries.add(Pair.of(fields[inchiField].replace("\"", ""), fields[synonymField]));
                    }
                }
                r.close();
            }

            if (queries == null || queries.size() == 0) {
                LOGGER.error("Found no queries to run.");
                return;
            }

            List<SearchResult> searchResults = new ArrayList<>(queries.size());
            for (Pair<String, String> queryPair : queries) {
                String inchi = queryPair.getLeft();
                String rawQueryString = queryPair.getRight();
                /* The Lucene query parser interprets the kind of structural annotations we see in chemical entities
                 * as query directives, which is not what we want at all.  Phrase queries seem to work adequately
                 * with the analyzer we're currently using. */
                String queryString = rawQueryString.trim().toLowerCase();
                String[] parts = queryString.split("\\s+");
                PhraseQuery query = new PhraseQuery();
                for (String p : parts) {
                    query.add(new Term(field, p));
                }
                LOGGER.info("Running query: " + query.toString());

                BooleanQuery bq = new BooleanQuery();
                bq.add(query, BooleanClause.Occur.MUST);
                bq.add(new TermQuery(new Term(field, "yeast")), BooleanClause.Occur.SHOULD);
                bq.add(new TermQuery(new Term(field, "ferment")), BooleanClause.Occur.SHOULD);
                bq.add(new TermQuery(new Term(field, "fermentation")), BooleanClause.Occur.SHOULD);
                bq.add(new TermQuery(new Term(field, "fermentive")), BooleanClause.Occur.SHOULD);
                bq.add(new TermQuery(new Term(field, "saccharomyces")), BooleanClause.Occur.SHOULD);

                LOGGER.info("  Full query: " + bq.toString());

                TopDocs topDocs = searcher.search(bq, 100);
                ScoreDoc[] scoreDocs = topDocs.scoreDocs;
                if (scoreDocs.length == 0) {
                    LOGGER.info("Search returned no results.");
                }
                List<ResultDocument> results = new ArrayList<>(scoreDocs.length);
                for (int i = 0; i < scoreDocs.length; i++) {
                    ScoreDoc scoreDoc = scoreDocs[i];
                    Document doc = indexReader.document(scoreDoc.doc);
                    LOGGER.info("Doc " + i + ": " + scoreDoc.doc + ", score " + scoreDoc.score + ": "
                            + doc.get("id") + ", " + doc.get("title"));
                    results.add(new ResultDocument(scoreDoc.doc, scoreDoc.score, doc.get("title"),
                            doc.get("id"), null));
                }
                LOGGER.info("----- Done with query " + query.toString());
                // TODO: reduce memory usage when not writing results to an output file.
                searchResults.add(new SearchResult(inchi, rawQueryString, bq, results));
            }

            if (cmdLine.hasOption("output")) {
                try (FileWriter writer = new FileWriter(cmdLine.getOptionValue("output"));) {
                    writer.write(objectMapper.writeValueAsString(searchResults));
                }
            }
        }
    }
}

From source file:com.twentyn.patentSearch.Searcher.java

License:Open Source License

private void init(List<File> indexDirectories) throws IOException {
    for (File indexDirectory : indexDirectories) {
        LOGGER.info("Opening index dir at %s", indexDirectory.getAbsolutePath());
        Directory indexDir = FSDirectory.open(indexDirectory.toPath());
        IndexReader indexReader = DirectoryReader.open(indexDir);
        IndexSearcher searcher = new IndexSearcher(indexReader);
        // Only add to the list if both of these calls work.
        indexReadersAndSearchers.add(Pair.of(indexReader, searcher));
    }/*from  ww w  . ja va2  s. com*/
}

From source file:com.vnet.demo.service.lucene.LuceneService.java

License:Apache License

public SearchResult<DocumentData> query(String keyword, int start, int number) {
    SearchResult<DocumentData> searchResult = null;
    try {//www.j  a  v  a2 s  . co m
        List<DocumentData> documentDatas = new ArrayList<DocumentData>();
        DirectoryReader ireader = DirectoryReader.open(index);
        IndexSearcher isearcher = new IndexSearcher(ireader);
        Query query = new QueryParser(version, "title", analyzer).parse(keyword + "*");

        TopDocs hits = null;
        if (start > 0) {
            TopDocs result = isearcher.search(query, start);
            ScoreDoc scoreDoc = result.scoreDocs[result.scoreDocs.length - 1];
            hits = isearcher.searchAfter(scoreDoc, query, number);
        } else {
            hits = isearcher.search(query, number);
        }
        for (int i = 0; i < hits.scoreDocs.length; i++) {
            DocumentData data = new DocumentData();
            Document hitDoc = isearcher.doc(hits.scoreDocs[i].doc);
            data.setId(Long.parseLong(hitDoc.get("id")));
            data.setTitle(hitDoc.get("title"));
            data.setSummary(hitDoc.get("summary"));
            data.setCreateDate(Long.parseLong(hitDoc.get("createDate")));
            documentDatas.add(data);
        }
        searchResult = new SearchResult<DocumentData>(new Long(hits.totalHits), documentDatas);
    } catch (ParseException | IOException e) {
        e.printStackTrace();
    }
    return searchResult;
}

From source file:com.work.SearchFiles.java

License:Apache License

/** Simple command-line based search demo. */
public static void main(String[] args) throws Exception {

    //  String indexPath = "C:/Users/Harish/Desktop/IR/Data/Data English/masc_500k_texts/written/letters/index/one/";
    //  String queries = "C:/Users/Harish/Desktop/IR/Data/Data English/masc_500k_texts/written/letters/query/query1.txt";
    //   Analyzer analyzer = new StandardAnalyzer();

    //Hindi//from   w w w  .j  a  v  a2 s .c o  m
    String indexPath = "C:/Users/Harish/Desktop/IR/Data/Hindi Data/hin_corp_unicode/index/one/";
    //  String queries = "C:/Users/Harish/Desktop/IR/Data/Hindi Data/hin_corp_unicode/query/one.txt";
    String queries = null;
    Analyzer analyzer = new HindiAnalyzer();

    //Chinese
    //  Analyzer analyzer = new CJKAnalyzer();

    String index = indexPath;
    String field = "contents";
    String a = "???";

    int repeat = 0;
    boolean raw = false;
    String queryString = null;
    int hitsPerPage = 10;

    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index)));
    IndexSearcher searcher = new IndexSearcher(reader);

    BufferedReader in = null;
    if (queries != null) {
        in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8);
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
    }
    QueryParser parser = new QueryParser(field, analyzer);

    while (true) {
        if (queries == null && queryString == null) { // prompt the user
            System.out.println("Enter query: ");
        }

        String line = queryString != null ? queryString : in.readLine();

        if (line == null || line.length() == -1) {
            break;
        }

        line = line.trim();
        if (line.length() == 0) {
            break;
        }

        Query query = parser.parse(line);
        System.out.println("Searching for: " + query.toString(field));

        if (repeat > 0) { // repeat & time as benchmark
            Date start = new Date();
            for (int i = 0; i < repeat; i++) {
                searcher.search(query, 100);
            }
            Date end = new Date();
            System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms");
        }

        doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null);

        if (queryString != null) {
            break;
        }
    }
    reader.close();
}

From source file:com.xiaomi.linden.hadoop.indexing.keyvalueformat.IntermediateForm.java

License:Apache License

/**
 * This method is used by the index update combiner and process an
 * intermediate form into the current intermediate form. More specifically,
 * the input intermediate forms are a single-document ram index and/or a
 * single delete term.//from   ww  w.j a v  a  2s . c  om
 * @param form  the input intermediate form
 * @throws IOException
 */
public void process(IntermediateForm form, FacetsConfig facetsConfig) throws IOException {
    if (form.dir.ramBytesUsed() > 0 || form.taxoDir.ramBytesUsed() > 0) {
        if (writer == null) {
            createWriter();
        }

        if (facetsConfig != null) {
            DirectoryTaxonomyWriter.OrdinalMap map = new DirectoryTaxonomyWriter.MemoryOrdinalMap();
            // merge the taxonomies
            taxoWriter.addTaxonomy(form.taxoDir, map);
            int ordinalMap[] = map.getMap();
            DirectoryReader reader = DirectoryReader.open(form.dir);
            try {
                List<AtomicReaderContext> leaves = reader.leaves();
                int numReaders = leaves.size();
                AtomicReader wrappedLeaves[] = new AtomicReader[numReaders];
                for (int i = 0; i < numReaders; i++) {
                    wrappedLeaves[i] = new OrdinalMappingAtomicReader(leaves.get(i).reader(), ordinalMap,
                            facetsConfig);
                }
                writer.addIndexes(new MultiReader(wrappedLeaves));
            } finally {
                reader.close();
            }
        } else {
            writer.addIndexes(new Directory[] { form.dir });
        }
        numDocs++;
    }
}

From source file:com.xiaomi.linden.hadoop.indexing.reduce.ShardWriter.java

License:Apache License

/**
 * Process an intermediate form by carrying out, on the Lucene instance of
 * the shard, the deletes and the inserts (a ram index) in the form.
 * @param form  the intermediate form containing deletes and a ram index
 * @throws IOException/*  w w  w.j a  v a2  s .  co m*/
 */
public void process(IntermediateForm form, FacetsConfig facetsConfig) throws IOException {
    if (facetsConfig != null) {
        DirectoryTaxonomyWriter.OrdinalMap map = new DirectoryTaxonomyWriter.MemoryOrdinalMap();
        // merge the taxonomies
        taxoWriter.addTaxonomy(form.getTaxoDirectory(), map);
        int ordinalMap[] = map.getMap();
        DirectoryReader reader = DirectoryReader.open(form.getDirectory());
        try {
            List<AtomicReaderContext> leaves = reader.leaves();
            int numReaders = leaves.size();
            AtomicReader wrappedLeaves[] = new AtomicReader[numReaders];
            for (int i = 0; i < numReaders; i++) {
                wrappedLeaves[i] = new OrdinalMappingAtomicReader(leaves.get(i).reader(), ordinalMap,
                        facetsConfig);
            }
            writer.addIndexes(new MultiReader(wrappedLeaves));
        } finally {
            reader.close();
        }
    } else {
        writer.addIndexes(new Directory[] { form.getDirectory() });
    }
    numForms++;
}

From source file:com.xpn.xwiki.plugin.lucene.internal.IndexRebuilder.java

License:Open Source License

public IndexSearcher createSearcher(Directory directory, XWikiContext context) {
    IndexSearcher searcher = null;/*from ww  w . ja v  a 2 s .co  m*/

    try {
        searcher = new IndexSearcher(DirectoryReader.open(directory));
    } catch (Exception e) {
        LOGGER.error("Faild to create IndexSearcher for Lucene index [{}]", directory, e);
    }

    return searcher;
}

From source file:com.yahoo.bard.webservice.data.dimension.impl.LuceneSearchProvider.java

License:Apache License

/**
 * Re-open the Index Searcher, opening it for the first time if it's never been opened.
 * <p>/*from  w  ww. ja v  a  2 s . co  m*/
 * This method will attempt to acquire and release a write lock.
 *
 * @param firstTimeThrough  If true, will write an empty index and will then re-open the searcher
 */
private void reopenIndexSearcher(boolean firstTimeThrough) {
    lock.writeLock().lock();
    try {
        // Close the current reader if open
        if (luceneIndexSearcher != null) {
            luceneIndexSearcher.getIndexReader().close();
        }

        // Open a new IndexSearcher on a new DirectoryReader
        luceneIndexSearcher = new IndexSearcher(DirectoryReader.open(luceneDirectory));
    } catch (IOException reopenException) {
        // If there is no index file, this is expected. On the 1st time through, write an empty index and try again
        if (firstTimeThrough) {
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LUCENE_ANALYZER);
            try (IndexWriter ignored = new IndexWriter(luceneDirectory, indexWriterConfig)) {
                // Closed automatically by the try-resource block
            } catch (IOException emptyIndexWriteException) {
                // We can't move past this, so puke
                luceneIndexIsHealthy = false;
                String message = String.format("Unable to write empty index to %s:", luceneIndexPath);
                LOG.error(message, emptyIndexWriteException);
                throw new RuntimeException(emptyIndexWriteException);
            }
            reopenIndexSearcher(false);
        } else {
            // We've been here before, so puke
            luceneIndexIsHealthy = false;
            String message = String.format("Unable to open index searcher for %s:", luceneIndexPath);
            LOG.error(message, reopenException);
            throw new RuntimeException(reopenException);
        }
    } finally {
        lock.writeLock().unlock();
    }
}

From source file:com.zghw.lucene.demo.AssociationsFacetsExample.java

License:Apache License

/** User runs a query and aggregates facets by summing their association values. */
private List<FacetResult> sumAssociations() throws IOException {
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);

    FacetsCollector fc = new FacetsCollector();

    // MatchAllDocsQuery is for "browsing" (counts facets
    // for all non-deleted docs in the index); normally
    // you'd use a "normal" query:
    FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, fc);

    Facets tags = new TaxonomyFacetSumIntAssociations("$tags", taxoReader, config, fc);
    Facets genre = new TaxonomyFacetSumFloatAssociations("$genre", taxoReader, config, fc);

    // Retrieve results
    List<FacetResult> results = new ArrayList<FacetResult>();
    results.add(tags.getTopChildren(10, "tags"));
    results.add(genre.getTopChildren(10, "genre"));

    indexReader.close();//www .j av  a  2s  . c  o  m
    taxoReader.close();

    return results;
}