List of usage examples for org.apache.lucene.search IndexSearcher getIndexReader
public IndexReader getIndexReader()
From source file:br.bireme.ngrams.NGrams.java
public static Set<String> search(final NGIndex index, final NGSchema schema, final String text, final boolean original) throws IOException, ParseException { if (index == null) { throw new NullPointerException("index"); }//www.java 2s . c om if (schema == null) { throw new NullPointerException("schema"); } if (text == null) { throw new NullPointerException("text"); } final IndexSearcher searcher = index.getIndexSearcher(); final NGAnalyzer analyzer = (NGAnalyzer) index.getAnalyzer(); final Parameters parameters = schema.getParameters(); final NGramDistance ngDistance = new NGramDistance(analyzer.getNgramSize()); final Set<String> id_id = new HashSet<>(); final Set<Result> results = new HashSet<>(); final String ttext = text.replace(':', ' ').trim(); final String[] split = ttext.split(" *\\| *", Integer.MAX_VALUE); if (split.length != parameters.nameFields.size()) { throw new IOException("invalid number of fields: " + text); } searchRaw(parameters, searcher, analyzer, ngDistance, ttext, true, id_id, results); searcher.getIndexReader().close(); return original ? results2pipeFull(parameters, results) : results2pipe(parameters, results); }
From source file:br.bireme.ngrams.NGrams.java
public static Set<String> srcWithoutSimil(final NGIndex index, final NGSchema schema, final String text, final boolean original) throws IOException, ParseException { if (index == null) { throw new NullPointerException("index"); }/* w w w . j av a 2 s.c o m*/ if (schema == null) { throw new NullPointerException("schema"); } if (text == null) { throw new NullPointerException("text"); } final IndexSearcher searcher = index.getIndexSearcher(); final NGAnalyzer analyzer = (NGAnalyzer) index.getAnalyzer(); final Parameters parameters = schema.getParameters(); final NGramDistance ngDistance = new NGramDistance(analyzer.getNgramSize()); final Set<String> id_id = new HashSet<>(); final Set<Result> results = new HashSet<>(); final String ttext = text.replace(':', ' ').trim(); final String[] split = ttext.split(" *\\| *", Integer.MAX_VALUE); if (split.length != parameters.nameFields.size()) { throw new IOException("invalid number of fields: " + text); } searchRaw(parameters, searcher, analyzer, ngDistance, ttext, false, id_id, results); searcher.getIndexReader().close(); return original ? results2pipeFull(parameters, results) : results2pipe(parameters, results); }
From source file:br.bireme.ngrams.NGrams.java
public static Set<String> searchJson(final NGIndex index, final NGSchema schema, final String text) throws IOException, ParseException { if (index == null) { throw new NullPointerException("index"); }/*from www. j a v a2 s . c o m*/ if (schema == null) { throw new NullPointerException("schema"); } if (text == null) { throw new NullPointerException("text"); } final IndexSearcher searcher = index.getIndexSearcher(); final NGAnalyzer analyzer = (NGAnalyzer) index.getAnalyzer(); final Parameters parameters = schema.getParameters(); final NGramDistance ngDistance = new NGramDistance(analyzer.getNgramSize()); final Set<String> id_id = new HashSet<>(); final TreeSet<Result> results = new TreeSet<>(); final String ttext = text.replace(':', ' ').trim(); searchRaw(parameters, searcher, analyzer, ngDistance, ttext, true, id_id, results); searcher.getIndexReader().close(); return results2json(parameters, results.descendingSet()); }
From source file:ci6226.eval_index_reader.java
public static void Searchit(IndexReader reader, IndexSearcher searcher, Analyzer _analyzer, String field, String[] _searchList, int _topn, PrintWriter writer) throws org.apache.lucene.queryparser.classic.ParseException, IOException, InvalidTokenOffsetsException { Analyzer analyzer = _analyzer;/* www. jav a2s . co m*/ QueryParser parser = new QueryParser(Version.LUCENE_47, field, analyzer); String[] testString = _searchList;//{"to","123","impressed","Geezer","geezer","semi-busy","\"eggs vegetable\"","gs veget","\"gs veget\""};//,"good","I","but","coffee"}; for (int j = 0; j < testString.length; j++) { String lstr = String.valueOf(j) + "," + testString[j]; Query query = parser.parse(testString[j]); System.out.println("Searching for: " + query.toString(field)); TopDocs topdocs = searcher.search(query, _topn); lstr += "," + topdocs.totalHits; ScoreDoc[] scoreDocs = topdocs.scoreDocs; SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query.rewrite(reader))); for (int i = 0; i < scoreDocs.length; i++) { int doc = scoreDocs[i].doc; Document document = searcher.doc(doc); // System.out.println("Snippet=" + document.get(field)); System.out.println(i); String text = document.get(field); TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), doc, field, analyzer); TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "..."); String line = ""; for (int m = 0; m < frag.length; m++) { if ((frag[m] != null) && (frag[m].getScore() > 0)) { System.out.println((frag[m].toString())); line = frag[m].toString(); line = line.replaceAll("\n", ""); line = line.replaceAll("\r", ""); line = line.replaceAll("\"", ""); line = line.replaceAll(",", " "); } } lstr += "," + line; lstr += "," + String.valueOf(scoreDocs[i].score); } writer.write(lstr + "\n"); System.out.println("Search for:" + testString[j] + " Total hits=" + scoreDocs.length); System.out.println("////////////////////////////////////////////////////"); } }
From source file:ci6226.loadIndex.java
/** * This demonstrates a typical paging search scenario, where the search * engine presents pages of size n to the user. The user can then go to the * next page if interested in the next hits. * * When the query is executed for the first time, then only enough results * are collected to fill 5 result pages. If the user wants to page beyond * this limit, then the query is executed another time and all hits are * collected./*from w ww.j a va2 s . com*/ * */ public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive, Analyzer analyzer) throws IOException, InvalidTokenOffsetsException { // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("review_id"); if (path != null) { System.out.println(ANSI_BLUE + (i + 1) + ANSI_RESET + "\nScore=\t" + hits[i].score); String title = doc.get("business_id"); if (title != null) { String text = doc.get("text"); TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), hits[i].doc, "text", doc, analyzer);//TokenSources.getAnyTokenStream(searcher.getIndexReader() ,"text", analyzer); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(ANSI_RED, ANSI_RESET); // SimpleFragmenter fragmenter = new SimpleFragmenter(80); Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 4); System.out.print("Snippet=\t"); for (int j = 0; j < frag.length; j++) { if ((frag[j] != null) && (frag[j].getScore() > 0)) { System.out.println((frag[j].toString())); } } //System.out.print("\n"); System.out.println("Full Review=\t" + doc.get("text") + "\nBusinessID=\t" + title); } } else { System.out.println((i + 1) + ". " + "No path for this document"); } } if (!interactive || end == 0) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); int cpage = start / hitsPerPage; System.out.println(String.format("Current page=%d,max page=%d", cpage + 1, 1 + numTotalHits / hitsPerPage)); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) { break; } end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:com.agiletec.plugins.jacms.aps.system.services.searchengine.SearcherDAO.java
License:Open Source License
private void releaseSearcher(IndexSearcher searcher) throws ApsSystemException { try {//from w w w . ja v a 2 s .c o m if (searcher != null) { searcher.getIndexReader().close(); searcher.close(); } } catch (IOException e) { throw new ApsSystemException("Errore in chiusura searcher", e); } }
From source file:com.appeligo.search.actions.SearchResults.java
License:Apache License
public List<SearchResult> getSearchResults(int startIndex) { initializeStatics();//from w w w. ja v a 2 s . c o m hasMoreResults = false; try { IndexSearcher searcher = null; try { searcher = newIndexSearcher(); IndexReader reader = searcher.getIndexReader(); Query luceneQuery = generateLuceneQuery(searcher); luceneQuery = luceneQuery.rewrite(reader); Hits hits = searcher.search(luceneQuery); usingSuggestedQuery = false; suggestedQuery = null; if ((didYouMeanParser != null) && ((hits.length() < minimumHits) || (calcScore(searcher, getQuery()) < minimumScore))) { if (log.isDebugEnabled()) { log.debug("Need to suggest because either num hits " + hits.length() + " < " + minimumHits + "\n or top hit score " + (hits.length() > 0 ? hits.score(0) : "[NO HITS]") + " < " + minimumScore); } IndexSearcher compositeSearcher = new IndexSearcher(compositeIndexLocation); try { log.debug("calling suggest() with query=" + getQuery() + " and composite index from " + compositeIndexLocation); //Query didYouMean = didYouMeanParser.suggest(getQuery(), compositeSearcher.getIndexReader()); Query suggestedQueries[] = didYouMeanParser.getSuggestions(getQuery(), compositeSearcher.getIndexReader()); TreeSet<Suggestion> suggestions = new TreeSet<Suggestion>(); if (suggestedQueries != null) { for (int i = 0; i < suggestedQueries.length; i++) { log.debug("trying suggested query: " + suggestedQueries[i].toString(defaultField)); String suggestedQueryString = suggestedQueries[i].toString(defaultField); String constrainedQueryString = suggestedQueryString; if (constrainedQueryString.indexOf('"') < 0 && constrainedQueryString.indexOf('\'') < 0) { constrainedQueryString = "\"" + constrainedQueryString + "\"~5"; // proximity/distance query (within 5 words of each other) } Query suggestedLuceneQuery = generateLuceneQuery(constrainedQueryString, searcher); suggestedLuceneQuery = suggestedLuceneQuery.rewrite(reader); Hits suggestedHits = searcher.search(suggestedLuceneQuery); float score = calcScore(suggestedQueryString, suggestedHits); log.debug("========================================="); log.debug("SCORE = " + score); log.debug("========================================="); suggestions.add( new Suggestion(suggestedQueryString, suggestedLuceneQuery, suggestedHits, score, ((i == 0) ? didYouMeanParser.includesOriginal() : false))); log.debug("hits=" + suggestedHits.length() + ", score=" + score); } } Suggestion best = null; if (suggestions.size() > 0) { best = suggestions.last(); } if (best != null && !best.isOriginal()) { suggestedQuery = best.getQueryString(); if (suggestedQuery != null && suggestedQuery.indexOf('+') >= 0 && getQuery().indexOf('+') < 0) { suggestedQuery = suggestedQuery.replaceAll("\\+", ""); } if (hits.length() == 0) { if (best.getHits().length() > 0) { // Requery probably required because we added proximity before String suggestedQueryString = best.getQueryString(); luceneQuery = generateLuceneQuery(suggestedQueryString, searcher); luceneQuery = luceneQuery.rewrite(reader); hits = searcher.search(luceneQuery); //hits = best.getHits(); //luceneQuery = best.getLuceneQuery(); usingSuggestedQuery = true; } } log.debug("DidYouMeanParser suggested " + suggestedQuery); } else { if (best != null && best.isOriginal()) { log.debug("The suggestion was the original query after all"); } log.debug("DidYouMeanParser did not suggest anything"); } } finally { compositeSearcher.close(); } } /* if (hits.length() == 0 && suggestedQuery != null) { // If we didn't find anything at all, go ahead and show them what the suggested query // will give them Query suggestedLuceneQuery = generateLuceneQuery(suggestedQuery, searcher); suggestedLuceneQuery = suggestedLuceneQuery.rewrite(reader); Hits suggestedHits = searcher.search(suggestedLuceneQuery); if (suggestedHits.length() > 0) { hits = suggestedHits; luceneQuery = suggestedLuceneQuery; usingSuggestedQuery = true; } } */ totalHits = hits.length(); //Get the genere matches: try { BitSetFacetHitCounter facetHitCounter = new BitSetFacetHitCounter(); facetHitCounter.setSearcher(searcher); String baseQueryString = (isUsingSuggestedQuery() ? suggestedQuery : query); String quotedQueryString = baseQueryString; if (quotedQueryString.indexOf('"') == -1 && quotedQueryString.indexOf(' ') > -1) { quotedQueryString = "\"" + quotedQueryString + "\""; } facetHitCounter.setBaseQuery(luceneQuery, baseQueryString); List<HitCount> subQueries = new ArrayList<HitCount>(); for (Map.Entry<String, Query> entry : genreQueries.entrySet()) { subQueries.add( new HitCount(entry.getKey(), entry.getValue(), entry.getValue().toString(), 0)); } facetHitCounter.setSubQueries(subQueries); genreCounts = facetHitCounter.getFacetHitCounts(true); whatMatchedCounts = new ArrayList<HitCount>(); whatMatchedCounts .add(new HitCount("Title", getFieldQuery(baseQueryString, "programTitle", searcher), "programTitle:" + quotedQueryString, 0)); whatMatchedCounts.add( new HitCount("Episode Title", getFieldQuery(baseQueryString, "episodeTitle", searcher), "episodeTitle:" + quotedQueryString, 0)); whatMatchedCounts.add( new HitCount("Description", getFieldQuery(baseQueryString, "description", searcher), "description:" + quotedQueryString, 0)); whatMatchedCounts.add(new HitCount("Content", getFieldQuery(baseQueryString, "text", searcher), "text:" + quotedQueryString, 0)); whatMatchedCounts .add(new HitCount("Credits", getFieldQuery(baseQueryString, "credits", searcher), "credits:" + quotedQueryString, 0)); facetHitCounter.setSubQueries(whatMatchedCounts); whatMatchedCounts = facetHitCounter.getFacetHitCounts(true); //Program Count -- Not sure if there is a better way to do this. HashSet<String> programTitles = new HashSet<String>(); programCounts = new ArrayList<HitCount>(); for (int i = 0; i < hits.length() && programCounts.size() < 5; i++) { String title = hits.doc(i).get("programTitle"); if (!programTitles.contains(title)) { String queryTitle = title; queryTitle = QueryParser.escape(title); if (queryTitle.indexOf('"') > -1) { queryTitle.replace("\"", "\\\""); } if (queryTitle.indexOf(' ') > -1) { queryTitle = "\"" + queryTitle + "\""; } programCounts .add(new HitCount(title, getFieldQuery(queryTitle, "programTitle", searcher), "programTitle:" + queryTitle, 0)); programTitles.add(title); } } facetHitCounter.setSubQueries(programCounts); programCounts = facetHitCounter.getFacetHitCounts(false); } catch (Exception e) { e.printStackTrace(); } results = new ArrayList<SearchResult>(); programToSearchResult.clear(); Query userQuery = getContentQuery(query, searcher); userQuery.rewrite(reader); Highlighter highlighter = new Highlighter(new TermFormatter(), new QueryScorer(userQuery, "text")); log.debug("#hits=" + hits.length()); EPGProvider epgProvider = DefaultEpg.getInstance(); boolean missingWebPaths = false; // We added this to the index midstream, so some do and some don't. // Next index rebuild, and they'll all have it. for (int i = 0; i < pageSize && i + startIndex < hits.length(); i++) { if (hits.doc(i + startIndex).get("webPath") == null) { missingWebPaths = true; break; } } Program[] programs = null; if (missingWebPaths) { List<String> programIds = new ArrayList<String>(pageSize); for (int i = 0; i < pageSize && i + startIndex < hits.length(); i++) { programIds.add(hits.doc(i + startIndex).get("programID")); } programs = DefaultEpg.getInstance().getProgramList(programIds); } for (int i = 0; i < pageSize && i + startIndex < hits.length(); i++) { addDocument(hits.doc(i + startIndex), hits.score(i + startIndex), epgProvider, highlighter, analyzer, null, null, (programs == null ? null : programs[i])); } if (results.size() + startIndex < hits.length()) { hasMoreResults = true; } } finally { if (searcher != null) { searcher.close(); } } } catch (IOException e) { log.error("Error searching index", e); } catch (ParseException e) { log.error("Error searching index", e); } return results; }
From source file:com.appeligo.search.actions.SearchResults.java
License:Apache License
private float calcScore(IndexSearcher searcher, String suggestedQueryString) throws IOException, ParseException { String constrainedQueryString = suggestedQueryString; if (constrainedQueryString.indexOf('"') < 0 && constrainedQueryString.indexOf('\'') < 0) { constrainedQueryString = "\"" + constrainedQueryString + "\"~5"; // proximity/distance query (within 5 words of each other) }//from w ww. j a v a 2 s .com Query luceneQuery = generateLuceneQuery(constrainedQueryString, searcher); luceneQuery = luceneQuery.rewrite(searcher.getIndexReader()); Hits hits = searcher.search(luceneQuery); float score = calcScore(suggestedQueryString, hits); return score; }
From source file:com.aurel.track.lucene.search.LuceneSearcher.java
License:Open Source License
public static void closeIndexSearcherAndUnderlyingIndexReader(IndexSearcher indexSearcher, String index) { if (indexSearcher != null) { IndexReader indexReader = indexSearcher.getIndexReader(); try {/*from w ww.ja v a 2 s. c o m*/ if (indexReader != null) { indexReader.close(); } } catch (IOException e1) { LOGGER.error("Closing the " + index + " IndexReader failed with " + e1.getMessage()); } } }
From source file:com.b2international.snowowl.snomed.api.impl.ClassificationRunIndex.java
License:Apache License
public void invalidateClassificationRuns() throws IOException { final Query statusQuery = Fields.newQuery().field(FIELD_STATUS, ClassificationStatus.COMPLETED.name()) .field(FIELD_STATUS, ClassificationStatus.RUNNING.name()) .field(FIELD_STATUS, ClassificationStatus.SAVING_IN_PROGRESS.name()) .field(FIELD_STATUS, ClassificationStatus.SCHEDULED.name()).matchAny(); final Query query = Fields.newQuery().field(FIELD_CLASS, ClassificationRun.class.getSimpleName()) .and(statusQuery).matchAll(); IndexSearcher searcher = null; try {/*from w w w.j av a 2 s . c o m*/ searcher = manager.acquire(); final TotalHitCountCollector collector = new TotalHitCountCollector(); searcher.search(query, collector); final int totalHits = collector.getTotalHits(); final int docsToRetrieve = Ints.min(searcher.getIndexReader().maxDoc(), totalHits); if (docsToRetrieve < 1) { return; } final TopDocs docs = searcher.search(query, docsToRetrieve, Sort.INDEXORDER, false, false); final ScoreDoc[] scoreDocs = docs.scoreDocs; final ObjectReader reader = objectMapper.reader(ClassificationRun.class); for (int i = 0; i < scoreDocs.length; i++) { final Document sourceDocument = searcher.doc(scoreDocs[i].doc, ImmutableSet.of(FIELD_BRANCH_PATH, FIELD_SOURCE)); final String branchPath = sourceDocument.get(FIELD_BRANCH_PATH); final String source = sourceDocument.get(FIELD_SOURCE); final ClassificationRun run = reader.readValue(source); run.setStatus(ClassificationStatus.STALE); upsertClassificationRunNoCommit(branchPath, run); } commit(); } finally { if (null != searcher) { manager.release(searcher); } } }