Example usage for org.apache.lucene.search IndexSearcher getIndexReader

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher getIndexReader.

Prototype

public IndexReader getIndexReader()

Source Link

Document

Return the IndexReader this searches.

Usage

From source file:io.puntanegra.fhir.index.lucene.LuceneService.java

License:Apache License

/**
 * Returns the total number of {@link Document}s in this index.
 *
 * @return the number of {@link Document}s
 *///from   w ww.j a v  a2 s .c  om

public long getNumDocs() {
    logger.debug("Getting {} num docs", name);
    try {
        IndexSearcher searcher = searcherManager.acquire();
        try {
            return searcher.getIndexReader().numDocs();
        } finally {
            searcherManager.release(searcher);
        }
    } catch (Exception e) {
        throw new FhirIndexException(e, "Error getting %s num docs", name);
    }
}

From source file:io.puntanegra.fhir.index.lucene.LuceneService.java

License:Apache License

/**
 * Returns the total number of deleted {@link Document}s in this index.
 *
 * @return the number of deleted {@link Document}s
 *//* w  w  w. jav  a 2 s . com*/

public long getNumDeletedDocs() {
    logger.debug("Getting %s num deleted docs", name);
    try {
        IndexSearcher searcher = searcherManager.acquire();
        try {
            return searcher.getIndexReader().numDeletedDocs();
        } finally {
            searcherManager.release(searcher);
        }
    } catch (Exception e) {
        throw new FhirIndexException(e, "Error getting %s num docs", name);
    }
}

From source file:it.cnr.ilc.lc.clavius.search.Tester.java

private static void searchWithHighlighter(String term)
        throws IOException, ParseException, InvalidTokenOffsetsException {

    logger.info("searchWithContext2 (" + term + ")");
    Directory indexDirectory = FSDirectory
            .open(Paths.get("/var/lucene/claviusTest/indexes/it.cnr.ilc.lc.clavius.search.entity.PlainText"));
    DirectoryReader ireader = DirectoryReader.open(indexDirectory);

    IndexSearcher searcher = new IndexSearcher(ireader);
    QueryParser parser = new QueryParser("content", new StandardAnalyzer());
    Query query = parser.parse(term);

    TopDocs hits = searcher.search(query, 10);

    SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
    //Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
    ClaviusHighlighter highlighter = new ClaviusHighlighter(htmlFormatter, new QueryScorer(query));
    highlighter.setTextFragmenter(new SimpleFragmenter(9));
    for (int i = 0; i < hits.totalHits; i++) {
        int id = hits.scoreDocs[i].doc;
        Document doc = searcher.doc(id);
        String idDoc = doc.get("idDoc");
        String text = doc.get("content");
        TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "content",
                new StandardAnalyzer());
        List<Annotation> frag = highlighter.getBestTextClaviusFragments(tokenStream, idDoc, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "...");
        for (int j = 0; j < frag.size(); j++) {
            logger.info("idDoc: " + idDoc + ", Annotation[" + j + "] " + frag.get(j).toString());
        }/* w ww  .  j  a v  a  2 s  .  c o  m*/
        //            TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "...");
        //            for (int j = 0; j < frag.length; j++) {
        //                if ((frag[j] != null) && (frag[j].getScore() > 0)) {
        //                    logger.info("frag["+j+"] "+frag[j].toString());
        //                }
        //            }
        //            
    }
}

From source file:it.cnr.ilc.lc.clavius.search.Tester.java

private static void searchWithContext(String term) {

    try {//  ww  w  .  ja  va2  s  . c  o m
        logger.info("searchWithContext(" + term + ")");
        SpanQuery spanQuery = new SpanTermQuery(new Term("content", term));
        Directory indexDirectory = FSDirectory.open(
                Paths.get("/var/lucene/claviusTest/indexes/it.cnr.ilc.lc.clavius.search.entity.PlainText"));
        DirectoryReader indexReader = DirectoryReader.open(indexDirectory);
        IndexSearcher searcher = new IndexSearcher(indexReader);
        IndexReader reader = searcher.getIndexReader();
        //spanQuery = (SpanQuery) spanQuery.rewrite(reader);
        //SpanWeight weight = (SpanWeight) searcher.createWeight(spanQuery, false);
        Spans spans = spanQuery.createWeight(searcher, false)
                .getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS);
        //            Spans spans2 = weight.getSpans(reader.leaves().get(0),
        //                    SpanWeight.Postings.OFFSETS);
        //Spans spans = weight.getSpans(reader.leaves().get(0), SpanWeight.Postings.POSITIONS);
        ScoreDoc[] sc = searcher.search(spanQuery, 10).scoreDocs;

        logger.info("hits :" + sc.length);

        int i;
        if (null != spans) {
            //                while ((nextDoc = spans.nextDoc()) != Spans.NO_MORE_DOCS) {
            for (int k = 0; k < sc.length; k++) {
                int docId = sc[k].doc;
                logger.info("docID: " + docId);
                int newDocID = spans.advance(docId);
                logger.info("newDocID: " + newDocID);

                int nextSpan = -1;
                while ((nextSpan = spans.nextStartPosition()) != Spans.NO_MORE_POSITIONS) {
                    logger.info("nextSpan             : " + nextSpan);
                    logger.info("spans.startPosition(): " + spans.startPosition());
                    logger.info("spans.endPosition()  : " + spans.endPosition());
                    logger.info("spans.width()        : " + spans.width());

                    Fields fields = reader.getTermVectors(docId);
                    Terms terms = fields.terms("content");

                    TermsEnum termsEnum = terms.iterator();
                    BytesRef text;
                    PostingsEnum postingEnum = null;
                    int start = spans.startPosition() - 3;
                    int end = spans.endPosition() + 3;
                    while ((text = termsEnum.next()) != null) {
                        //could store the BytesRef here, but String is easier for this example
                        String s = new String(text.bytes, text.offset, text.length);
                        //                DocsAndPositionsEnum positionsEnum = termsEnum.docsAndPositions(null, null);
                        postingEnum = termsEnum.postings(postingEnum);
                        if (postingEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                            i = 0;
                            int position = -1;
                            while (i < postingEnum.freq() && (position = postingEnum.nextPosition()) != -1) {
                                if (position >= start && position <= end) {
                                    logger.info("pos: " + position + ", term: " + s + " offset: " + text.offset
                                            + " length: " + text.length);
                                }
                                i++;
                            }

                        }

                    }
                }
            }
        } else {
            logger.info("no " + term + " found!");
        }
    } catch (IOException e) {
        logger.error(e.getMessage());
    }
    logger.info("End.");
}

From source file:it.cnr.ilc.lc.claviusweb.ClaviusSearch.java

private static List<Annotation> fullTextSearch(String term)
        throws IOException, ParseException, InvalidTokenOffsetsException {

    log.info("fullTextSearch (" + term + ")");
    List<Annotation> result = new ArrayList<>();

    try {/*from  w w w. j  a  v  a 2s . c o m*/
        Directory indexDirectory = FSDirectory
                .open(Paths.get("/var/lucene/clavius-1.0.5/indexes/it.cnr.ilc.lc.claviusweb.entity.PlainText"));
        DirectoryReader ireader = DirectoryReader.open(indexDirectory);

        IndexSearcher searcher = new IndexSearcher(ireader);

        Analyzer fullTextAnalyzer = CustomAnalyzer.builder()
                .addCharFilter("patternReplace", "pattern", "([\\-\\(\\)\\[\\],\\.;:])", "replacement", " $1 ")
                .withTokenizer("whitespace").build();

        //QueryParser parserTerm = new QueryParser("content", fullTextAnalyzer);
        //            AnalyzingQueryParser parser = new AnalyzingQueryParser("content", fullTextAnalyzer);
        //            Query query2 = parser.parse(term);
        //            
        Query query = new WildcardQuery(new Term("content", term));
        TopDocs hits = searcher.search(query, MAX_SEARCH_HITS);

        SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
        //Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
        ClaviusHighlighter highlighter = new ClaviusHighlighter(htmlFormatter, new QueryScorer(query));
        highlighter.setTextFragmenter(new SimpleFragmenter());

        log.info("hits.totalHits=(" + hits.totalHits + ")");
        for (int i = 0; i < hits.totalHits; i++) {
            int id = hits.scoreDocs[i].doc;
            Document doc = searcher.doc(id);
            String idDoc = doc.get("idDoc");

            //String text = doc.get("content");
            TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "content",
                    fullTextAnalyzer);

            List<Annotation> frag = highlighter.getBestTextClaviusFragments(tokenStream, doc, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "...");
            for (int j = 0; j < frag.size(); j++) {
                log.debug("idDoc: " + idDoc + ", Annotation[" + j + "] " + frag.get(j).toString());
            }
            result.addAll(frag);
        }
    } catch (InvalidTokenOffsetsException | IOException e) {
        log.error(e);
    }
    log.info("Full Text Search found " + result.size() + " result(s) for term " + term);
    return result;
}

From source file:it.drwolf.ridire.util.async.FrequencyListGenerator.java

License:Apache License

private Map<String, Integer> getBareTable(List<String> corporaNames, String functionalMetadatumDescription,
        String semanticMetadatumDescription, String frequencyBy) throws IOException {
    Map<String, Integer> fl = new HashMap<String, Integer>();
    Query q = new BooleanQuery();
    if (corporaNames != null && corporaNames.size() > 0
            && !(corporaNames.size() == 1 && corporaNames.get(0) == null)) {
        BooleanQuery corporaQuery = new BooleanQuery();
        for (String cn : corporaNames) {
            if (cn != null) {
                corporaQuery.add(new TermQuery(new Term("corpus", cn)), Occur.SHOULD);
            }/*  w  ww  .j a va2s .  com*/
        }
        ((BooleanQuery) q).add(corporaQuery, Occur.MUST);
    }
    if (functionalMetadatumDescription != null) {
        TermQuery funcQuery = new TermQuery(new Term("functionalMetadatum", functionalMetadatumDescription));
        ((BooleanQuery) q).add(funcQuery, Occur.MUST);
    }
    if (semanticMetadatumDescription != null) {
        TermQuery semaQuery = new TermQuery(new Term("semanticMetadatum", semanticMetadatumDescription));
        ((BooleanQuery) q).add(semaQuery, Occur.MUST);
    }
    PrefixQuery prefixQuery = new PrefixQuery(new Term("performaFL", ""));
    ((BooleanQuery) q).add(prefixQuery, Occur.MUST);
    IndexSearcher indexSearcher = this.contextsIndexManager.getIndexSearcherR();
    System.out.println("Starting FL calculation");
    TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
    indexSearcher.search(q, null, totalHitCountCollector);
    int totalHits = totalHitCountCollector.getTotalHits();
    System.out.println("Frequency list calculation. Docs to be processed: " + totalHits);
    ScoreDoc after = null;
    int docsProcessed = 0;
    for (int j = 0; j < totalHits; j += FrequencyListGenerator.BATCH_SIZE) {
        TopDocs topDocs = null;
        if (after == null) {
            topDocs = indexSearcher.search(q, FrequencyListGenerator.BATCH_SIZE);
        } else {
            topDocs = indexSearcher.searchAfter(after, q, FrequencyListGenerator.BATCH_SIZE);
        }
        StrTokenizer strTokenizer = new StrTokenizer();
        strTokenizer.setDelimiterString(ContextAnalyzer.SEPARATOR);
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        if (scoreDocs != null) {
            for (ScoreDoc scoreDoc : scoreDocs) {
                ++docsProcessed;
                after = scoreDoc;
                TermFreqVector termFreqVector = indexSearcher.getIndexReader().getTermFreqVector(scoreDoc.doc,
                        "performaFL");
                if (termFreqVector == null) {
                    continue;
                }
                String[] terms = termFreqVector.getTerms();
                int[] frequencies = termFreqVector.getTermFrequencies();
                for (int i = 0; i < terms.length; i++) {
                    String term = terms[i];
                    String[] tokenArray = strTokenizer.reset(term).getTokenArray();
                    if (tokenArray.length != 3) {
                        continue;
                    }
                    String pos = tokenArray[1];
                    String lemma = tokenArray[2];
                    if (lemma.equals("<unknown>")) {
                        lemma = tokenArray[0];
                    }
                    if (frequencyBy.equals("forma")) {
                        term = tokenArray[0];
                    } else if (frequencyBy.equals("lemma")) {
                        term = lemma;
                    } else if (frequencyBy.equals("PoS-lemma")) {
                        if (pos.startsWith("VER")) {
                            pos = "VER";
                        }
                        term = pos + " / " + lemma;
                    } else if (frequencyBy.equals("PoS-forma")) {
                        if (pos.startsWith("VER")) {
                            pos = "VER";
                        }
                        term = pos + " / " + tokenArray[0];
                    } else {
                        term = tokenArray[1];
                    }
                    Integer count = fl.get(term);
                    if (count == null) {
                        fl.put(term, frequencies[i]);
                    } else {
                        fl.put(term, frequencies[i] + count);
                    }
                }
                if (docsProcessed % 1000 == 0) {
                    System.out.println("Frequency list calculation. Docs processed: " + docsProcessed
                            + " on total: " + totalHits + " (" + docsProcessed * 100.0f / totalHits + "%)");
                }
            }
        }
    }
    return fl;
}

From source file:it.eng.spagobi.commons.utilities.indexing.LuceneSearcher.java

License:Mozilla Public License

public static HashMap<String, Object> searchIndexFuzzy(IndexSearcher searcher, String queryString, String index,
        String[] fields, String metaDataToSearch) throws IOException, ParseException {
    logger.debug("IN");
    HashMap<String, Object> objectsToReturn = new HashMap<String, Object>();
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
    BooleanQuery orQuery = new BooleanQuery();
    BooleanQuery andQuery = new BooleanQuery();
    for (int i = 0; i < fields.length; i++) {
        Query query = new FuzzyQuery(new Term(fields[i], queryString));
        query = query.rewrite(searcher.getIndexReader());
        orQuery.add(query, BooleanClause.Occur.SHOULD);
    }/*from ww w . j  a va 2 s  .c om*/
    andQuery.add(orQuery, BooleanClause.Occur.MUST);
    if (metaDataToSearch != null) {
        //search for query string on metadata name field and content
        //where metadata name = metaDataToSearch
        Query queryMetadata = new TermQuery(new Term(IndexingConstants.METADATA, metaDataToSearch));
        andQuery.add(queryMetadata, BooleanClause.Occur.MUST);
    }

    Query tenantQuery = new TermQuery(new Term(IndexingConstants.TENANT, getTenant()));
    andQuery.add(tenantQuery, BooleanClause.Occur.MUST);

    logger.debug("Searching for: " + andQuery.toString());
    int hitsPerPage = 50;

    // Collect enough docs to show 5 pages
    TopScoreDocCollector collector = TopScoreDocCollector.create(5 * hitsPerPage, false);
    searcher.search(andQuery, collector);
    ScoreDoc[] hits = collector.topDocs().scoreDocs;
    objectsToReturn.put("hits", hits);

    //highlighter
    //orQuery = orQuery.rewrite(searcher.getIndexReader());
    //andQuery = andQuery.rewrite(searcher.getIndexReader());
    Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(andQuery));

    if (hits != null) {
        for (int i = 0; i < hits.length; i++) {
            ScoreDoc hit = hits[i];
            Document doc = searcher.doc(hit.doc);
            String biobjId = doc.get(IndexingConstants.BIOBJ_ID);
            String summary = " ";
            if (highlighter != null) {
                String[] summaries;
                try {
                    Integer idobj = (Integer.valueOf(biobjId));

                    String contentToSearchOn = fillSummaryText(idobj);
                    summaries = highlighter.getBestFragments(new StandardAnalyzer(Version.LUCENE_CURRENT),
                            IndexingConstants.CONTENTS, contentToSearchOn, 3);

                    StringBuffer summaryBuffer = new StringBuffer();
                    if (summaries.length > 0) {
                        summaryBuffer.append(summaries[0]);
                    }
                    for (int j = 1; j < summaries.length; j++) {
                        summaryBuffer.append(" ... ");
                        summaryBuffer.append(summaries[j]);
                    }
                    summary = summaryBuffer.toString();
                    //get only a portion of summary
                    if (summary.length() > 101) {
                        summary = summary.substring(0, 100);
                        summary += "...";
                    }
                    objectsToReturn.put(biobjId, summary);
                } catch (InvalidTokenOffsetsException e) {
                    logger.error(e.getMessage(), e);
                } catch (Exception e) {
                    logger.error(e.getMessage(), e);
                }

            }
        }
    }

    int numTotalHits = collector.getTotalHits();
    logger.info(numTotalHits + " total matching documents");

    logger.debug("OUT");
    return objectsToReturn;

}

From source file:it.giacomobergami.lucenepdfindexer.lucene.LuceneRead.java

License:Open Source License

public LuceneRead(File directory, IndexSearcher indexSearcher) {
    this.directory = directory;
    this.indexSearcher = indexSearcher;
    if (indexSearcher != null) {
        reader = indexSearcher.getIndexReader();
    }// ww w. ja  v  a  2 s  . co m
    this.pos = 0;
}

From source file:it.unibz.instasearch.indexing.Searcher.java

License:Open Source License

private SearchResult searchIndex(SearchQuery searchQuery) throws Exception {

    IndexSearcher indexSearcher = getIndexSearcher();
    IndexReader reader = indexSearcher.getIndexReader();
    boolean exact = searchQuery.isExact();

    Query query = null;//ww  w .  ja va2  s . c o m

    try {
        query = parseSearchQuery(searchQuery, reader, exact, true);

    } catch (TooManyClauses e) { // too many, try without prefix search
        query = parseSearchQuery(searchQuery, reader, exact, false);

    } catch (ParseException e) {

        // remove special query characters
        String newSearchString = searchQuery.getSearchString().replaceAll("[\\(\\)\"\\[\\]'\\{\\}]", " ");

        try {
            searchQuery.setSearchString(newSearchString);
            query = parseSearchQuery(searchQuery, reader, exact, true);

        } catch (ParseException ignored) {
            // can have error while typing query, just ignore
            debug(newSearchString, " - ", ignored.getMessage());
            return null;
        }
    }

    SearchResult searchResut = collectSearchResults(searchQuery, indexSearcher, reader, query);

    return searchResut;
}

From source file:kaist.irproject.lucene.trec.DocNameExtractor.java

License:Apache License

/**
 * Extract the name of the input doc from the index.
 * @param searcher access to the index./*from   w ww .  jav  a2  s .c om*/
 * @param docid ID of doc whose name is needed.
 * @return the name of the input doc as extracted from the index.
 * @throws IOException if cannot extract the doc name from the index.
 */
public String docName(IndexSearcher searcher, int docid) throws IOException {
    final List<String> name = new ArrayList<>();
    searcher.getIndexReader().document(docid, new StoredFieldVisitor() {
        @Override
        public void stringField(FieldInfo fieldInfo, String value) {
            name.add(value);
        }

        @Override
        public Status needsField(FieldInfo fieldInfo) {
            if (!name.isEmpty()) {
                return Status.STOP;
            } else if (fieldInfo.name.equals(docNameField)) {
                return Status.YES;
            } else {
                return Status.NO;
            }
        }
    });
    if (name.size() != 0) {
        return name.get(0);
    } else {
        return null;
    }
}