List of usage examples for org.apache.lucene.search IndexSearcher getIndexReader
public IndexReader getIndexReader()
From source file:io.puntanegra.fhir.index.lucene.LuceneService.java
License:Apache License
/** * Returns the total number of {@link Document}s in this index. * * @return the number of {@link Document}s *///from w ww.j a v a2 s .c om public long getNumDocs() { logger.debug("Getting {} num docs", name); try { IndexSearcher searcher = searcherManager.acquire(); try { return searcher.getIndexReader().numDocs(); } finally { searcherManager.release(searcher); } } catch (Exception e) { throw new FhirIndexException(e, "Error getting %s num docs", name); } }
From source file:io.puntanegra.fhir.index.lucene.LuceneService.java
License:Apache License
/** * Returns the total number of deleted {@link Document}s in this index. * * @return the number of deleted {@link Document}s *//* w w w. jav a 2 s . com*/ public long getNumDeletedDocs() { logger.debug("Getting %s num deleted docs", name); try { IndexSearcher searcher = searcherManager.acquire(); try { return searcher.getIndexReader().numDeletedDocs(); } finally { searcherManager.release(searcher); } } catch (Exception e) { throw new FhirIndexException(e, "Error getting %s num docs", name); } }
From source file:it.cnr.ilc.lc.clavius.search.Tester.java
private static void searchWithHighlighter(String term) throws IOException, ParseException, InvalidTokenOffsetsException { logger.info("searchWithContext2 (" + term + ")"); Directory indexDirectory = FSDirectory .open(Paths.get("/var/lucene/claviusTest/indexes/it.cnr.ilc.lc.clavius.search.entity.PlainText")); DirectoryReader ireader = DirectoryReader.open(indexDirectory); IndexSearcher searcher = new IndexSearcher(ireader); QueryParser parser = new QueryParser("content", new StandardAnalyzer()); Query query = parser.parse(term); TopDocs hits = searcher.search(query, 10); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); //Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); ClaviusHighlighter highlighter = new ClaviusHighlighter(htmlFormatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(9)); for (int i = 0; i < hits.totalHits; i++) { int id = hits.scoreDocs[i].doc; Document doc = searcher.doc(id); String idDoc = doc.get("idDoc"); String text = doc.get("content"); TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "content", new StandardAnalyzer()); List<Annotation> frag = highlighter.getBestTextClaviusFragments(tokenStream, idDoc, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "..."); for (int j = 0; j < frag.size(); j++) { logger.info("idDoc: " + idDoc + ", Annotation[" + j + "] " + frag.get(j).toString()); }/* w ww . j a v a 2 s . c o m*/ // TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "..."); // for (int j = 0; j < frag.length; j++) { // if ((frag[j] != null) && (frag[j].getScore() > 0)) { // logger.info("frag["+j+"] "+frag[j].toString()); // } // } // } }
From source file:it.cnr.ilc.lc.clavius.search.Tester.java
private static void searchWithContext(String term) { try {// ww w . ja va2 s . c o m logger.info("searchWithContext(" + term + ")"); SpanQuery spanQuery = new SpanTermQuery(new Term("content", term)); Directory indexDirectory = FSDirectory.open( Paths.get("/var/lucene/claviusTest/indexes/it.cnr.ilc.lc.clavius.search.entity.PlainText")); DirectoryReader indexReader = DirectoryReader.open(indexDirectory); IndexSearcher searcher = new IndexSearcher(indexReader); IndexReader reader = searcher.getIndexReader(); //spanQuery = (SpanQuery) spanQuery.rewrite(reader); //SpanWeight weight = (SpanWeight) searcher.createWeight(spanQuery, false); Spans spans = spanQuery.createWeight(searcher, false) .getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); // Spans spans2 = weight.getSpans(reader.leaves().get(0), // SpanWeight.Postings.OFFSETS); //Spans spans = weight.getSpans(reader.leaves().get(0), SpanWeight.Postings.POSITIONS); ScoreDoc[] sc = searcher.search(spanQuery, 10).scoreDocs; logger.info("hits :" + sc.length); int i; if (null != spans) { // while ((nextDoc = spans.nextDoc()) != Spans.NO_MORE_DOCS) { for (int k = 0; k < sc.length; k++) { int docId = sc[k].doc; logger.info("docID: " + docId); int newDocID = spans.advance(docId); logger.info("newDocID: " + newDocID); int nextSpan = -1; while ((nextSpan = spans.nextStartPosition()) != Spans.NO_MORE_POSITIONS) { logger.info("nextSpan : " + nextSpan); logger.info("spans.startPosition(): " + spans.startPosition()); logger.info("spans.endPosition() : " + spans.endPosition()); logger.info("spans.width() : " + spans.width()); Fields fields = reader.getTermVectors(docId); Terms terms = fields.terms("content"); TermsEnum termsEnum = terms.iterator(); BytesRef text; PostingsEnum postingEnum = null; int start = spans.startPosition() - 3; int end = spans.endPosition() + 3; while ((text = termsEnum.next()) != null) { //could store the BytesRef here, but String is easier for this example String s = new String(text.bytes, text.offset, text.length); // DocsAndPositionsEnum positionsEnum = termsEnum.docsAndPositions(null, null); postingEnum = termsEnum.postings(postingEnum); if (postingEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { i = 0; int position = -1; while (i < postingEnum.freq() && (position = postingEnum.nextPosition()) != -1) { if (position >= start && position <= end) { logger.info("pos: " + position + ", term: " + s + " offset: " + text.offset + " length: " + text.length); } i++; } } } } } } else { logger.info("no " + term + " found!"); } } catch (IOException e) { logger.error(e.getMessage()); } logger.info("End."); }
From source file:it.cnr.ilc.lc.claviusweb.ClaviusSearch.java
private static List<Annotation> fullTextSearch(String term) throws IOException, ParseException, InvalidTokenOffsetsException { log.info("fullTextSearch (" + term + ")"); List<Annotation> result = new ArrayList<>(); try {/*from w w w. j a v a 2s . c o m*/ Directory indexDirectory = FSDirectory .open(Paths.get("/var/lucene/clavius-1.0.5/indexes/it.cnr.ilc.lc.claviusweb.entity.PlainText")); DirectoryReader ireader = DirectoryReader.open(indexDirectory); IndexSearcher searcher = new IndexSearcher(ireader); Analyzer fullTextAnalyzer = CustomAnalyzer.builder() .addCharFilter("patternReplace", "pattern", "([\\-\\(\\)\\[\\],\\.;:])", "replacement", " $1 ") .withTokenizer("whitespace").build(); //QueryParser parserTerm = new QueryParser("content", fullTextAnalyzer); // AnalyzingQueryParser parser = new AnalyzingQueryParser("content", fullTextAnalyzer); // Query query2 = parser.parse(term); // Query query = new WildcardQuery(new Term("content", term)); TopDocs hits = searcher.search(query, MAX_SEARCH_HITS); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); //Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); ClaviusHighlighter highlighter = new ClaviusHighlighter(htmlFormatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter()); log.info("hits.totalHits=(" + hits.totalHits + ")"); for (int i = 0; i < hits.totalHits; i++) { int id = hits.scoreDocs[i].doc; Document doc = searcher.doc(id); String idDoc = doc.get("idDoc"); //String text = doc.get("content"); TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "content", fullTextAnalyzer); List<Annotation> frag = highlighter.getBestTextClaviusFragments(tokenStream, doc, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "..."); for (int j = 0; j < frag.size(); j++) { log.debug("idDoc: " + idDoc + ", Annotation[" + j + "] " + frag.get(j).toString()); } result.addAll(frag); } } catch (InvalidTokenOffsetsException | IOException e) { log.error(e); } log.info("Full Text Search found " + result.size() + " result(s) for term " + term); return result; }
From source file:it.drwolf.ridire.util.async.FrequencyListGenerator.java
License:Apache License
private Map<String, Integer> getBareTable(List<String> corporaNames, String functionalMetadatumDescription, String semanticMetadatumDescription, String frequencyBy) throws IOException { Map<String, Integer> fl = new HashMap<String, Integer>(); Query q = new BooleanQuery(); if (corporaNames != null && corporaNames.size() > 0 && !(corporaNames.size() == 1 && corporaNames.get(0) == null)) { BooleanQuery corporaQuery = new BooleanQuery(); for (String cn : corporaNames) { if (cn != null) { corporaQuery.add(new TermQuery(new Term("corpus", cn)), Occur.SHOULD); }/* w ww .j a va2s . com*/ } ((BooleanQuery) q).add(corporaQuery, Occur.MUST); } if (functionalMetadatumDescription != null) { TermQuery funcQuery = new TermQuery(new Term("functionalMetadatum", functionalMetadatumDescription)); ((BooleanQuery) q).add(funcQuery, Occur.MUST); } if (semanticMetadatumDescription != null) { TermQuery semaQuery = new TermQuery(new Term("semanticMetadatum", semanticMetadatumDescription)); ((BooleanQuery) q).add(semaQuery, Occur.MUST); } PrefixQuery prefixQuery = new PrefixQuery(new Term("performaFL", "")); ((BooleanQuery) q).add(prefixQuery, Occur.MUST); IndexSearcher indexSearcher = this.contextsIndexManager.getIndexSearcherR(); System.out.println("Starting FL calculation"); TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector(); indexSearcher.search(q, null, totalHitCountCollector); int totalHits = totalHitCountCollector.getTotalHits(); System.out.println("Frequency list calculation. Docs to be processed: " + totalHits); ScoreDoc after = null; int docsProcessed = 0; for (int j = 0; j < totalHits; j += FrequencyListGenerator.BATCH_SIZE) { TopDocs topDocs = null; if (after == null) { topDocs = indexSearcher.search(q, FrequencyListGenerator.BATCH_SIZE); } else { topDocs = indexSearcher.searchAfter(after, q, FrequencyListGenerator.BATCH_SIZE); } StrTokenizer strTokenizer = new StrTokenizer(); strTokenizer.setDelimiterString(ContextAnalyzer.SEPARATOR); ScoreDoc[] scoreDocs = topDocs.scoreDocs; if (scoreDocs != null) { for (ScoreDoc scoreDoc : scoreDocs) { ++docsProcessed; after = scoreDoc; TermFreqVector termFreqVector = indexSearcher.getIndexReader().getTermFreqVector(scoreDoc.doc, "performaFL"); if (termFreqVector == null) { continue; } String[] terms = termFreqVector.getTerms(); int[] frequencies = termFreqVector.getTermFrequencies(); for (int i = 0; i < terms.length; i++) { String term = terms[i]; String[] tokenArray = strTokenizer.reset(term).getTokenArray(); if (tokenArray.length != 3) { continue; } String pos = tokenArray[1]; String lemma = tokenArray[2]; if (lemma.equals("<unknown>")) { lemma = tokenArray[0]; } if (frequencyBy.equals("forma")) { term = tokenArray[0]; } else if (frequencyBy.equals("lemma")) { term = lemma; } else if (frequencyBy.equals("PoS-lemma")) { if (pos.startsWith("VER")) { pos = "VER"; } term = pos + " / " + lemma; } else if (frequencyBy.equals("PoS-forma")) { if (pos.startsWith("VER")) { pos = "VER"; } term = pos + " / " + tokenArray[0]; } else { term = tokenArray[1]; } Integer count = fl.get(term); if (count == null) { fl.put(term, frequencies[i]); } else { fl.put(term, frequencies[i] + count); } } if (docsProcessed % 1000 == 0) { System.out.println("Frequency list calculation. Docs processed: " + docsProcessed + " on total: " + totalHits + " (" + docsProcessed * 100.0f / totalHits + "%)"); } } } } return fl; }
From source file:it.eng.spagobi.commons.utilities.indexing.LuceneSearcher.java
License:Mozilla Public License
public static HashMap<String, Object> searchIndexFuzzy(IndexSearcher searcher, String queryString, String index, String[] fields, String metaDataToSearch) throws IOException, ParseException { logger.debug("IN"); HashMap<String, Object> objectsToReturn = new HashMap<String, Object>(); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); BooleanQuery orQuery = new BooleanQuery(); BooleanQuery andQuery = new BooleanQuery(); for (int i = 0; i < fields.length; i++) { Query query = new FuzzyQuery(new Term(fields[i], queryString)); query = query.rewrite(searcher.getIndexReader()); orQuery.add(query, BooleanClause.Occur.SHOULD); }/*from ww w . j a va 2 s .c om*/ andQuery.add(orQuery, BooleanClause.Occur.MUST); if (metaDataToSearch != null) { //search for query string on metadata name field and content //where metadata name = metaDataToSearch Query queryMetadata = new TermQuery(new Term(IndexingConstants.METADATA, metaDataToSearch)); andQuery.add(queryMetadata, BooleanClause.Occur.MUST); } Query tenantQuery = new TermQuery(new Term(IndexingConstants.TENANT, getTenant())); andQuery.add(tenantQuery, BooleanClause.Occur.MUST); logger.debug("Searching for: " + andQuery.toString()); int hitsPerPage = 50; // Collect enough docs to show 5 pages TopScoreDocCollector collector = TopScoreDocCollector.create(5 * hitsPerPage, false); searcher.search(andQuery, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; objectsToReturn.put("hits", hits); //highlighter //orQuery = orQuery.rewrite(searcher.getIndexReader()); //andQuery = andQuery.rewrite(searcher.getIndexReader()); Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(andQuery)); if (hits != null) { for (int i = 0; i < hits.length; i++) { ScoreDoc hit = hits[i]; Document doc = searcher.doc(hit.doc); String biobjId = doc.get(IndexingConstants.BIOBJ_ID); String summary = " "; if (highlighter != null) { String[] summaries; try { Integer idobj = (Integer.valueOf(biobjId)); String contentToSearchOn = fillSummaryText(idobj); summaries = highlighter.getBestFragments(new StandardAnalyzer(Version.LUCENE_CURRENT), IndexingConstants.CONTENTS, contentToSearchOn, 3); StringBuffer summaryBuffer = new StringBuffer(); if (summaries.length > 0) { summaryBuffer.append(summaries[0]); } for (int j = 1; j < summaries.length; j++) { summaryBuffer.append(" ... "); summaryBuffer.append(summaries[j]); } summary = summaryBuffer.toString(); //get only a portion of summary if (summary.length() > 101) { summary = summary.substring(0, 100); summary += "..."; } objectsToReturn.put(biobjId, summary); } catch (InvalidTokenOffsetsException e) { logger.error(e.getMessage(), e); } catch (Exception e) { logger.error(e.getMessage(), e); } } } } int numTotalHits = collector.getTotalHits(); logger.info(numTotalHits + " total matching documents"); logger.debug("OUT"); return objectsToReturn; }
From source file:it.giacomobergami.lucenepdfindexer.lucene.LuceneRead.java
License:Open Source License
public LuceneRead(File directory, IndexSearcher indexSearcher) { this.directory = directory; this.indexSearcher = indexSearcher; if (indexSearcher != null) { reader = indexSearcher.getIndexReader(); }// ww w. ja v a 2 s . co m this.pos = 0; }
From source file:it.unibz.instasearch.indexing.Searcher.java
License:Open Source License
private SearchResult searchIndex(SearchQuery searchQuery) throws Exception { IndexSearcher indexSearcher = getIndexSearcher(); IndexReader reader = indexSearcher.getIndexReader(); boolean exact = searchQuery.isExact(); Query query = null;//ww w . ja va2 s . c o m try { query = parseSearchQuery(searchQuery, reader, exact, true); } catch (TooManyClauses e) { // too many, try without prefix search query = parseSearchQuery(searchQuery, reader, exact, false); } catch (ParseException e) { // remove special query characters String newSearchString = searchQuery.getSearchString().replaceAll("[\\(\\)\"\\[\\]'\\{\\}]", " "); try { searchQuery.setSearchString(newSearchString); query = parseSearchQuery(searchQuery, reader, exact, true); } catch (ParseException ignored) { // can have error while typing query, just ignore debug(newSearchString, " - ", ignored.getMessage()); return null; } } SearchResult searchResut = collectSearchResults(searchQuery, indexSearcher, reader, query); return searchResut; }
From source file:kaist.irproject.lucene.trec.DocNameExtractor.java
License:Apache License
/** * Extract the name of the input doc from the index. * @param searcher access to the index./*from w ww . jav a2 s .c om*/ * @param docid ID of doc whose name is needed. * @return the name of the input doc as extracted from the index. * @throws IOException if cannot extract the doc name from the index. */ public String docName(IndexSearcher searcher, int docid) throws IOException { final List<String> name = new ArrayList<>(); searcher.getIndexReader().document(docid, new StoredFieldVisitor() { @Override public void stringField(FieldInfo fieldInfo, String value) { name.add(value); } @Override public Status needsField(FieldInfo fieldInfo) { if (!name.isEmpty()) { return Status.STOP; } else if (fieldInfo.name.equals(docNameField)) { return Status.YES; } else { return Status.NO; } } }); if (name.size() != 0) { return name.get(0); } else { return null; } }