List of usage examples for org.apache.lucene.search IndexSearcher IndexSearcher
public IndexSearcher(IndexReaderContext context)
From source file:com.aliasi.lingmed.test.functional.medline.TestDateSearch.java
License:Lingpipe license
public static void main(String[] args) throws Exception { File luceneDir = new File(args[0]); Searcher medlineLocalSearcher = new IndexSearcher(FSDirectory.getDirectory(luceneDir)); MedlineSearcher medlineSearcher = new MedlineSearcherImpl(new MedlineCodec(), medlineLocalSearcher); System.out.println("test1a: citations between 1985-1985"); try {/*from w ww . j av a 2s . co m*/ SearchResults<MedlineCitation> hits = medlineSearcher.getCitationsInYearRange("1985", "1985"); System.out.println("citations in 1985: " + hits.size()); for (MedlineCitation hit : hits) { PubDate pubDate = hit.article().journal().journalIssue().pubDate(); if (pubDate.isStructured()) { System.out.println("citation: " + hit.pmid() + "\tpubdate " + pubDate.toString()); } else { System.out.println("citation: " + hit.pmid() + "\tUNSTRUCTURED " + pubDate.medlineDate()); } } System.out.println("citations in 1985: " + hits.size()); } catch (DaoException de) { System.out.println("test1a failed: " + de.getMessage()); de.printStackTrace(); } System.out.println("test1b: citations between 2000-2005"); try { SearchResults<MedlineCitation> hits = medlineSearcher.getCitationsInYearRange("2000", "2005"); System.out.println("citations in 2000-2005: " + hits.size()); } catch (DaoException de) { System.out.println("test1b failed: " + de.getMessage()); de.printStackTrace(); } System.out.println("test1c: citations between 2004-2005"); try { SearchResults<MedlineCitation> hits = medlineSearcher.getCitationsInYearRange("2004", "2005"); System.out.println("citations in 2004-2005: " + hits.size()); } catch (DaoException de) { System.out.println("test1c failed: " + de.getMessage()); de.printStackTrace(); } System.out.println("test1d: citations between 1964-1965"); try { SearchResults<MedlineCitation> hits = medlineSearcher.getCitationsInYearRange("1964", "1965"); System.out.println("citations in 1964-1965: " + hits.size()); } catch (DaoException de) { System.out.println("test1d failed: " + de.getMessage()); de.printStackTrace(); } System.out.println("test1e: citations between 0000-9999"); try { SearchResults<MedlineCitation> hits = medlineSearcher.getCitationsInYearRange("0000", "9999"); System.out.println("citations in 0000-9999: " + hits.size()); } catch (DaoException de) { System.out.println("test1e failed: " + de.getMessage()); de.printStackTrace(); } System.out.println("test1f: citations between 0000-1000"); try { SearchResults<MedlineCitation> hits = medlineSearcher.getCitationsInYearRange("0000", "1000"); System.out.println("citations in 0000-1000: " + hits.size()); } catch (DaoException de) { System.out.println("test1f failed: " + de.getMessage()); de.printStackTrace(); } System.out.println("test1g: citations between 3000-5000"); try { SearchResults<MedlineCitation> hits = medlineSearcher.getCitationsInYearRange("3000", "5000"); System.out.println("citations in 3000-5000: " + hits.size()); } catch (DaoException de) { System.out.println("test1g failed: " + de.getMessage()); de.printStackTrace(); } System.out.println("test2: citations between 1999-1998"); try { SearchResults<MedlineCitation> hits = medlineSearcher.getCitationsInYearRange("1999", "1998"); } catch (DaoException de) { System.out.println("test2 passed (i.e. failed): " + de.getMessage()); } System.out.println("test3: citations between foo-bar"); try { SearchResults<MedlineCitation> hits = medlineSearcher.getCitationsInYearRange("foo", "bar"); } catch (DaoException de) { System.out.println("test3 passed (i.e. failed): " + de.getMessage()); } System.out.println("test4: citations between empty range"); try { SearchResults<MedlineCitation> hits = medlineSearcher.getCitationsInYearRange("", ""); } catch (DaoException de) { System.out.println("test4 passed (i.e. failed): " + de.getMessage()); } System.out.println("test5: citations between empty range"); try { SearchResults<MedlineCitation> hits = medlineSearcher.getCitationsInYearRange("2000", ""); } catch (DaoException de) { System.out.println("test4 passed (i.e. failed): " + de.getMessage()); } System.exit(0); }
From source file:com.aliasi.lingmed.test.functional.medline.VerifyDates.java
License:Lingpipe license
public static void main(String[] args) throws Exception { File luceneDir = new File(args[0]); Searcher medlineLocalSearcher = new IndexSearcher(FSDirectory.getDirectory(luceneDir)); MedlineSearcher medlineSearcher = new MedlineSearcherImpl(new MedlineCodec(), medlineLocalSearcher); int ct = 0;/* w w w. j ava 2 s .co m*/ for (MedlineCitation citation : medlineSearcher) { if (citation.article() == null || citation.article().journal() == null || citation.article().journal().journalIssue() == null || citation.article().journal().journalIssue().pubDate() == null) { continue; } PubDate pubDate = citation.article().journal().journalIssue().pubDate(); if (pubDate.isStructured()) { System.out.println("citation: " + citation.pmid() + "\tpubdate " + pubDate.toString()); } else { System.out.println("citation: " + citation.pmid() + "\tUNSTRUCTURED " + pubDate.medlineDate()); } } System.exit(0); }
From source file:com.aperigeek.dropvault.web.service.IndexService.java
License:Open Source License
private IndexSearcher getIndexSearcher(String username, String password) throws IOException { IndexSearcher searcher = new IndexSearcher(getDirectory(username, password)); return searcher; }
From source file:com.appeligo.alerts.KeywordAlertThread.java
License:Apache License
private IndexSearcher getIndexSearcher() throws IOException { if ((currentSearcher != null) && ((System.currentTimeMillis() - currentSearcherCreated) > shortestTimeBetweenQueriesMs)) { currentSearcher.close();// w ww .j a v a2s .co m currentSearcher = null; } if (currentSearcher == null) { if (log.isDebugEnabled()) log.debug("KeywordAlertThread time to get a new searcher"); currentSearcherCreated = System.currentTimeMillis(); currentSearcher = new IndexSearcher(liveIndexDir); } return currentSearcher; }
From source file:com.appeligo.amazon.AmazonSearcher.java
License:Apache License
protected void loadProductSearcher() { Configuration config = ConfigUtils.getAmazonConfig(); String indexLocation = config.getString("programIndex"); try {//from w w w . ja v a 2s. c om productSearcher = new IndexSearcher(indexLocation); } catch (IOException e) { if (log.isErrorEnabled()) { log.error("Cannot load product index: " + indexLocation, e); } } }
From source file:com.appeligo.amazon.ProgramIndexer.java
License:Apache License
public void run() { if (!indexLocation.exists()) { //create the index directory indexLocation.mkdirs();/*from w w w. j av a 2 s . com*/ } try { savedSearches.clear(); writer = new IndexWriter(indexLocation, analyzer); searcher = new IndexSearcher(indexLocation.getPath()); deleteExpiredPrograms(); writer.flush(); //reopen searcher so the deleted documents are not present close(searcher); searcher = new IndexSearcher(indexLocation.getPath()); addNewPrograms(); } catch (Exception e) { log.error("Error occurred during indexing.", e); } finally { savedSearches.clear(); close(searcher); searcher = null; close(writer); writer = null; close(conn); conn = null; } }
From source file:com.appeligo.captions.DeleteOldProgramsThread.java
License:Apache License
@Override public void run() { while (true) { LuceneIndexer liveIndex = LuceneIndexer.getInstance(liveIndexLocation); Calendar wayback = Calendar.getInstance(); wayback.add(Calendar.MONTH, -3); Calendar tenminutesago = Calendar.getInstance(); tenminutesago.add(Calendar.MINUTE, -10); log.info("Deleting old programs from live index, between " + wayback.getTime() + " and " + tenminutesago.getTime()); String dateField = "lineup-" + liveLineup + "-endTime"; ConstantScoreRangeQuery dateQuery = new ConstantScoreRangeQuery(dateField, DateTools.dateToString(wayback.getTime(), DateTools.Resolution.MINUTE), DateTools.dateToString(tenminutesago.getTime(), DateTools.Resolution.MINUTE), true, true); IndexSearcher searcher = null;// w w w . j a v a2 s . c o m try { searcher = new IndexSearcher(liveIndexLocation); Hits hits = searcher.search(dateQuery); Set<Term> terms = new HashSet<Term>(); if (hits.length() > 0) { for (int index = 0; index < hits.length(); index++) { Document doc = hits.doc(index); Term term = new Term(dateField, doc.get(dateField)); terms.add(term); } } liveIndex.deleteDocuments(terms.toArray(new Term[terms.size()])); } catch (IOException e) { log.error("Error deleting old programs from live index", e); } finally { if (searcher != null) { try { searcher.close(); } catch (IOException e) { log.error("Error closing searcher when deleting old programs from live index", e); } } } Calendar cal = Calendar.getInstance(); int minute = cal.get(Calendar.MINUTE); if (minute < 15) { cal.set(Calendar.MINUTE, 15); } else if (minute >= 45) { cal.set(Calendar.MINUTE, 15); cal.add(Calendar.HOUR, 1); } else { cal.set(Calendar.MINUTE, 45); } log.info("queued up that delete, now we're waiting until " + cal.getTime()); Utils.sleepUntil(cal.getTimeInMillis()); } }
From source file:com.appeligo.lucene.LuceneIndexer.java
License:Apache License
private Document getProgramDocument(String programId) { IndexSearcher searcher = null;//from www .ja v a 2 s . c o m Document programDoc = null; try { searcher = new IndexSearcher(indexLocation); TermQuery termQuery = new TermQuery(new Term("programID", programId)); Hits hits = searcher.search(termQuery); if (hits.length() > 0) { programDoc = hits.doc(0); } } catch (IOException e) { log.error(e); } finally { try { searcher.close(); } catch (IOException e) { log.error(e); } } return programDoc; }
From source file:com.appeligo.search.actions.SearchResults.java
License:Apache License
public IndexSearcher newIndexSearcher() throws IOException { log.debug("getting index " + indexLocation); return new IndexSearcher(indexLocation); }
From source file:com.appeligo.search.actions.SearchResults.java
License:Apache License
public List<SearchResult> getSearchResults(int startIndex) { initializeStatics();/*from www . j a v a2s . c o m*/ hasMoreResults = false; try { IndexSearcher searcher = null; try { searcher = newIndexSearcher(); IndexReader reader = searcher.getIndexReader(); Query luceneQuery = generateLuceneQuery(searcher); luceneQuery = luceneQuery.rewrite(reader); Hits hits = searcher.search(luceneQuery); usingSuggestedQuery = false; suggestedQuery = null; if ((didYouMeanParser != null) && ((hits.length() < minimumHits) || (calcScore(searcher, getQuery()) < minimumScore))) { if (log.isDebugEnabled()) { log.debug("Need to suggest because either num hits " + hits.length() + " < " + minimumHits + "\n or top hit score " + (hits.length() > 0 ? hits.score(0) : "[NO HITS]") + " < " + minimumScore); } IndexSearcher compositeSearcher = new IndexSearcher(compositeIndexLocation); try { log.debug("calling suggest() with query=" + getQuery() + " and composite index from " + compositeIndexLocation); //Query didYouMean = didYouMeanParser.suggest(getQuery(), compositeSearcher.getIndexReader()); Query suggestedQueries[] = didYouMeanParser.getSuggestions(getQuery(), compositeSearcher.getIndexReader()); TreeSet<Suggestion> suggestions = new TreeSet<Suggestion>(); if (suggestedQueries != null) { for (int i = 0; i < suggestedQueries.length; i++) { log.debug("trying suggested query: " + suggestedQueries[i].toString(defaultField)); String suggestedQueryString = suggestedQueries[i].toString(defaultField); String constrainedQueryString = suggestedQueryString; if (constrainedQueryString.indexOf('"') < 0 && constrainedQueryString.indexOf('\'') < 0) { constrainedQueryString = "\"" + constrainedQueryString + "\"~5"; // proximity/distance query (within 5 words of each other) } Query suggestedLuceneQuery = generateLuceneQuery(constrainedQueryString, searcher); suggestedLuceneQuery = suggestedLuceneQuery.rewrite(reader); Hits suggestedHits = searcher.search(suggestedLuceneQuery); float score = calcScore(suggestedQueryString, suggestedHits); log.debug("========================================="); log.debug("SCORE = " + score); log.debug("========================================="); suggestions.add( new Suggestion(suggestedQueryString, suggestedLuceneQuery, suggestedHits, score, ((i == 0) ? didYouMeanParser.includesOriginal() : false))); log.debug("hits=" + suggestedHits.length() + ", score=" + score); } } Suggestion best = null; if (suggestions.size() > 0) { best = suggestions.last(); } if (best != null && !best.isOriginal()) { suggestedQuery = best.getQueryString(); if (suggestedQuery != null && suggestedQuery.indexOf('+') >= 0 && getQuery().indexOf('+') < 0) { suggestedQuery = suggestedQuery.replaceAll("\\+", ""); } if (hits.length() == 0) { if (best.getHits().length() > 0) { // Requery probably required because we added proximity before String suggestedQueryString = best.getQueryString(); luceneQuery = generateLuceneQuery(suggestedQueryString, searcher); luceneQuery = luceneQuery.rewrite(reader); hits = searcher.search(luceneQuery); //hits = best.getHits(); //luceneQuery = best.getLuceneQuery(); usingSuggestedQuery = true; } } log.debug("DidYouMeanParser suggested " + suggestedQuery); } else { if (best != null && best.isOriginal()) { log.debug("The suggestion was the original query after all"); } log.debug("DidYouMeanParser did not suggest anything"); } } finally { compositeSearcher.close(); } } /* if (hits.length() == 0 && suggestedQuery != null) { // If we didn't find anything at all, go ahead and show them what the suggested query // will give them Query suggestedLuceneQuery = generateLuceneQuery(suggestedQuery, searcher); suggestedLuceneQuery = suggestedLuceneQuery.rewrite(reader); Hits suggestedHits = searcher.search(suggestedLuceneQuery); if (suggestedHits.length() > 0) { hits = suggestedHits; luceneQuery = suggestedLuceneQuery; usingSuggestedQuery = true; } } */ totalHits = hits.length(); //Get the genere matches: try { BitSetFacetHitCounter facetHitCounter = new BitSetFacetHitCounter(); facetHitCounter.setSearcher(searcher); String baseQueryString = (isUsingSuggestedQuery() ? suggestedQuery : query); String quotedQueryString = baseQueryString; if (quotedQueryString.indexOf('"') == -1 && quotedQueryString.indexOf(' ') > -1) { quotedQueryString = "\"" + quotedQueryString + "\""; } facetHitCounter.setBaseQuery(luceneQuery, baseQueryString); List<HitCount> subQueries = new ArrayList<HitCount>(); for (Map.Entry<String, Query> entry : genreQueries.entrySet()) { subQueries.add( new HitCount(entry.getKey(), entry.getValue(), entry.getValue().toString(), 0)); } facetHitCounter.setSubQueries(subQueries); genreCounts = facetHitCounter.getFacetHitCounts(true); whatMatchedCounts = new ArrayList<HitCount>(); whatMatchedCounts .add(new HitCount("Title", getFieldQuery(baseQueryString, "programTitle", searcher), "programTitle:" + quotedQueryString, 0)); whatMatchedCounts.add( new HitCount("Episode Title", getFieldQuery(baseQueryString, "episodeTitle", searcher), "episodeTitle:" + quotedQueryString, 0)); whatMatchedCounts.add( new HitCount("Description", getFieldQuery(baseQueryString, "description", searcher), "description:" + quotedQueryString, 0)); whatMatchedCounts.add(new HitCount("Content", getFieldQuery(baseQueryString, "text", searcher), "text:" + quotedQueryString, 0)); whatMatchedCounts .add(new HitCount("Credits", getFieldQuery(baseQueryString, "credits", searcher), "credits:" + quotedQueryString, 0)); facetHitCounter.setSubQueries(whatMatchedCounts); whatMatchedCounts = facetHitCounter.getFacetHitCounts(true); //Program Count -- Not sure if there is a better way to do this. HashSet<String> programTitles = new HashSet<String>(); programCounts = new ArrayList<HitCount>(); for (int i = 0; i < hits.length() && programCounts.size() < 5; i++) { String title = hits.doc(i).get("programTitle"); if (!programTitles.contains(title)) { String queryTitle = title; queryTitle = QueryParser.escape(title); if (queryTitle.indexOf('"') > -1) { queryTitle.replace("\"", "\\\""); } if (queryTitle.indexOf(' ') > -1) { queryTitle = "\"" + queryTitle + "\""; } programCounts .add(new HitCount(title, getFieldQuery(queryTitle, "programTitle", searcher), "programTitle:" + queryTitle, 0)); programTitles.add(title); } } facetHitCounter.setSubQueries(programCounts); programCounts = facetHitCounter.getFacetHitCounts(false); } catch (Exception e) { e.printStackTrace(); } results = new ArrayList<SearchResult>(); programToSearchResult.clear(); Query userQuery = getContentQuery(query, searcher); userQuery.rewrite(reader); Highlighter highlighter = new Highlighter(new TermFormatter(), new QueryScorer(userQuery, "text")); log.debug("#hits=" + hits.length()); EPGProvider epgProvider = DefaultEpg.getInstance(); boolean missingWebPaths = false; // We added this to the index midstream, so some do and some don't. // Next index rebuild, and they'll all have it. for (int i = 0; i < pageSize && i + startIndex < hits.length(); i++) { if (hits.doc(i + startIndex).get("webPath") == null) { missingWebPaths = true; break; } } Program[] programs = null; if (missingWebPaths) { List<String> programIds = new ArrayList<String>(pageSize); for (int i = 0; i < pageSize && i + startIndex < hits.length(); i++) { programIds.add(hits.doc(i + startIndex).get("programID")); } programs = DefaultEpg.getInstance().getProgramList(programIds); } for (int i = 0; i < pageSize && i + startIndex < hits.length(); i++) { addDocument(hits.doc(i + startIndex), hits.score(i + startIndex), epgProvider, highlighter, analyzer, null, null, (programs == null ? null : programs[i])); } if (results.size() + startIndex < hits.length()) { hasMoreResults = true; } } finally { if (searcher != null) { searcher.close(); } } } catch (IOException e) { log.error("Error searching index", e); } catch (ParseException e) { log.error("Error searching index", e); } return results; }