List of usage examples for org.apache.lucene.search IndexSearcher doc
public Document doc(int docID) throws IOException
.getIndexReader().document(docID)
From source file:fr.paris.lutece.plugins.helpdesk.service.helpdesksearch.HelpdeskLuceneSearchEngine.java
License:Open Source License
/** * Return search results// ww w . jav a 2s . c o m * @param nIdFaq The id Faq * @param strContent The search query * @param dateBegin The date begin * @param dateEnd The date end * @param subject The {@link Subject} * @param bSearchSubSubjects true if the query must include sub-subjects * @param request The {@link HttpServletRequest} * @return Results as a collection of SearchResult */ public List<SearchResult> getSearchResults(int nIdFaq, String strContent, Date dateBegin, Date dateEnd, Subject subject, boolean bSearchSubSubjects, HttpServletRequest request) { ArrayList<SearchItem> listResults = new ArrayList<SearchItem>(); IndexSearcher searcher = null; Query filterRole = getFilterRoles(request); try (Directory directory = IndexationService.getDirectoryIndex(); IndexReader ir = DirectoryReader.open(directory);) { searcher = new IndexSearcher(ir); Collection<String> queries = new ArrayList<String>(); Collection<String> fields = new ArrayList<String>(); Collection<BooleanClause.Occur> flags = new ArrayList<BooleanClause.Occur>(); //Faq Id if (nIdFaq != -1) { Query queryFaqId = new TermQuery(new Term(HelpdeskSearchItem.FIELD_FAQ_ID, String.valueOf(nIdFaq))); queries.add(queryFaqId.toString()); fields.add(HelpdeskSearchItem.FIELD_FAQ_ID); flags.add(BooleanClause.Occur.MUST); } //Type (=helpdesk) PhraseQuery.Builder queryTypeBuilder = new PhraseQuery.Builder(); queryTypeBuilder.add(new Term(HelpdeskSearchItem.FIELD_TYPE, HelpdeskPlugin.PLUGIN_NAME)); PhraseQuery queryType = queryTypeBuilder.build(); queries.add(queryType.toString()); fields.add(HelpdeskSearchItem.FIELD_TYPE); flags.add(BooleanClause.Occur.MUST); //Content if ((strContent != null) && !strContent.equals(EMPTY_STRING)) { Query queryContent = new TermQuery(new Term(HelpdeskSearchItem.FIELD_CONTENTS, strContent)); queries.add(queryContent.toString()); fields.add(HelpdeskSearchItem.FIELD_CONTENTS); flags.add(BooleanClause.Occur.MUST); } //Dates if ((dateBegin != null) && (dateEnd != null)) { BytesRef strDateBegin = new BytesRef(DateTools.dateToString(dateBegin, DateTools.Resolution.DAY)); BytesRef strDateEnd = new BytesRef(DateTools.dateToString(dateEnd, DateTools.Resolution.DAY)); Query queryDate = new TermRangeQuery(HelpdeskSearchItem.FIELD_DATE, strDateBegin, strDateEnd, true, true); queries.add(queryDate.toString()); fields.add(HelpdeskSearchItem.FIELD_DATE); flags.add(BooleanClause.Occur.MUST); } //Subjects if ((bSearchSubSubjects) && (subject != null)) { Plugin plugin = PluginService.getPlugin(HelpdeskPlugin.PLUGIN_NAME); Collection<Term> listSubjects = new ArrayList<Term>(); getListSubjects(listSubjects, subject, plugin); String strQuerySubject = OPEN_PARENTHESIS; for (Term term : listSubjects) { Query querySubject = new TermQuery(term); strQuerySubject += (querySubject.toString() + SPACE); } strQuerySubject += CLOSE_PARENTHESIS; queries.add(strQuerySubject); fields.add(HelpdeskSearchItem.FIELD_SUBJECT); flags.add(BooleanClause.Occur.MUST); } else { if ((subject != null)) { Query querySubject = new TermQuery( new Term(HelpdeskSearchItem.FIELD_SUBJECT, String.valueOf(subject.getId()))); queries.add(querySubject.toString()); fields.add(HelpdeskSearchItem.FIELD_SUBJECT); flags.add(BooleanClause.Occur.MUST); } } Query queryMulti = MultiFieldQueryParser.parse((String[]) queries.toArray(new String[queries.size()]), (String[]) fields.toArray(new String[fields.size()]), (BooleanClause.Occur[]) flags.toArray(new BooleanClause.Occur[flags.size()]), IndexationService.getAnalyser()); BooleanQuery.Builder bQueryMultiBuilder = new BooleanQuery.Builder(); bQueryMultiBuilder.add(queryMulti, BooleanClause.Occur.MUST); if (filterRole != null) { bQueryMultiBuilder.add(filterRole, BooleanClause.Occur.FILTER); } TopDocs topDocs = searcher.search(bQueryMultiBuilder.build(), LuceneSearchEngine.MAX_RESPONSES); ScoreDoc[] hits = topDocs.scoreDocs; for (int i = 0; i < hits.length; i++) { int docId = hits[i].doc; Document document = searcher.doc(docId); SearchItem si = new SearchItem(document); listResults.add(si); } } catch (Exception e) { AppLogService.error(e.getMessage(), e); } return convertList(listResults); }
From source file:fr.paris.lutece.plugins.sponsoredlinks.service.sponsoredlinkssearch.SponsoredLinksLuceneSearchEngine.java
License:Open Source License
/** * {@inheritDoc}/*ww w . j av a 2 s . com*/ */ public List<SponsoredLinksSearchItem> getSearchResults(String strQuery) { List<SponsoredLinksSearchItem> listResults = new ArrayList<SponsoredLinksSearchItem>(); IndexSearcher searcher = null; Filter filter = null; Query query = null; try { IndexReader ir = DirectoryReader.open(IndexationService.getDirectoryIndex()); searcher = new IndexSearcher(ir); //filter on content if (StringUtils.isNotBlank(strQuery)) { QueryParser parser = new QueryParser(IndexationService.LUCENE_INDEX_VERSION, SearchItem.FIELD_CONTENTS, IndexationService.getAnalyser()); query = parser.parse((strQuery != null) ? strQuery : ""); } //filter on sponsoredlink type Filter[] filters = null; Query queryTypeSponsoredLink = new TermQuery( new Term(SearchItem.FIELD_TYPE, SponsoredLinksIndexer.INDEX_TYPE_SPONSOREDLINKS)); filters = new Filter[1]; filters[filters.length - 1] = new CachingWrapperFilter(new QueryWrapperFilter(queryTypeSponsoredLink)); filter = new ChainedFilter(filters, ChainedFilter.AND); TopDocs topDocs = searcher.search(query, filter, LuceneSearchEngine.MAX_RESPONSES); ScoreDoc[] hits = topDocs.scoreDocs; for (int i = 0; i < hits.length; i++) { int docId = hits[i].doc; Document document = searcher.doc(docId); SponsoredLinksSearchItem si = new SponsoredLinksSearchItem(document); listResults.add(si); } } catch (Exception e) { AppLogService.error(e.getMessage(), e); } return listResults; }
From source file:fr.paris.lutece.plugins.suggest.service.suggestsearch.SuggestLuceneSearchEngine.java
License:Open Source License
/** * Return search results//w ww. ja v a 2 s . c o m * * @param strQuery The search query * @param filter the filter * @return Results as a collection of SearchResult */ public List<SuggestSearchItem> getSearchResults(String strQuery, SubmitFilter filter) { List<SuggestSearchItem> listResults = new ArrayList<SuggestSearchItem>(); IndexSearcher searcher = null; try { IndexReader ir = DirectoryReader.open(IndexationService.getDirectoryIndex()); searcher = new IndexSearcher(ir); Collection<String> queries = new ArrayList<String>(); Collection<String> fields = new ArrayList<String>(); Collection<BooleanClause.Occur> flags = new ArrayList<BooleanClause.Occur>(); //filter on content if ((strQuery != null) && !strQuery.equals("")) { Query queryContent = new TermQuery(new Term(SuggestSearchItem.FIELD_CONTENTS, strQuery)); queries.add(queryContent.toString()); fields.add(SuggestSearchItem.FIELD_CONTENTS); flags.add(BooleanClause.Occur.MUST); } //filter on content id suggest if (filter.containsIdSuggest()) { Query queryIdSuggest = new TermQuery( new Term(SuggestSearchItem.FIELD_ID_SUGGEST, String.valueOf(filter.getIdSuggest()))); queries.add(queryIdSuggest.toString()); fields.add(SuggestSearchItem.FIELD_ID_SUGGEST); flags.add(BooleanClause.Occur.MUST); } //filter on suggest submit state if (filter.containsIdSuggestSubmitState()) { Query queryState = new TermQuery( new Term(SuggestSearchItem.FIELD_STATE, String.valueOf(filter.getIdSuggestSubmitState()))); queries.add(queryState.toString()); fields.add(SuggestSearchItem.FIELD_STATE); flags.add(BooleanClause.Occur.MUST); } //filter on suggest type Query queryTypeSuggest = new TermQuery( new Term(SuggestSearchItem.FIELD_TYPE, SuggestIndexer.INDEX_TYPE_SUGGEST)); queries.add(queryTypeSuggest.toString()); fields.add(SuggestSearchItem.FIELD_TYPE); flags.add(BooleanClause.Occur.MUST); Query queryMulti = MultiFieldQueryParser.parse(IndexationService.LUCENE_INDEX_VERSION, (String[]) queries.toArray(new String[queries.size()]), (String[]) fields.toArray(new String[fields.size()]), (BooleanClause.Occur[]) flags.toArray(new BooleanClause.Occur[flags.size()]), IndexationService.getAnalyser()); TopDocs topDocs = searcher.search(queryMulti, LuceneSearchEngine.MAX_RESPONSES); ScoreDoc[] hits = topDocs.scoreDocs; for (int i = 0; i < hits.length; i++) { int docId = hits[i].doc; Document document = searcher.doc(docId); SuggestSearchItem si = new SuggestSearchItem(document); listResults.add(si); } } catch (Exception e) { AppLogService.error(e.getMessage(), e); } return listResults; }
From source file:fr.univ_tours.etu.searcher.LikeThisTest.java
private void findSilimar(String searchForSimilar) throws IOException { IndexReader reader = DirectoryReader.open(indexDir); IndexSearcher indexSearcher = new IndexSearcher(reader); MoreLikeThis mlt = new MoreLikeThis(reader); mlt.setMinTermFreq(0);/*w w w.j av a 2 s.c om*/ mlt.setMinDocFreq(0); mlt.setFieldNames(new String[] { "title", "content" }); mlt.setAnalyzer(analyzer); Reader sReader = new StringReader(searchForSimilar); Query query = mlt.like("content", sReader); TopDocs topDocs = indexSearcher.search(query, 10); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { Document aSimilar = indexSearcher.doc(scoreDoc.doc); String similarTitle = aSimilar.get("title"); String similarContent = aSimilar.get("content"); System.out.println("====similar finded===="); System.out.println("title: " + similarTitle); System.out.println("content: " + similarContent); } }
From source file:fr.univ_tours.etu.searcher.Searcher.java
public List<ResultObject> search(SearchQueriesRequest query) throws IOException, ParseException { Map<String, String> queriesDictionary = query.getQueriesDictionary(); boolean useQueryExpansion = query.isUseQueryExpansion(); List<Integer> docsToExpand = (useQueryExpansion) ? new ArrayList<>() : null; List<String> fsa = new ArrayList<>(); List<String> qsa = new ArrayList<>(); String contentLemmas = ""; if (queriesDictionary.containsKey(DocFields.CONTENTS)) { regularTokenizer.tokenize(queriesDictionary.get(DocFields.CONTENTS), true); caselessTokenizer.tokenize(queriesDictionary.get(DocFields.CONTENTS), true); contentLemmas = caselessTokenizer.getLemmaString(); System.out.println("Lemmas: " + caselessTokenizer.getLemmaList()); String neString = ""; if (caselessTokenizer.getNeList() != null && caselessTokenizer.getNeList().size() != 0) { neString = caselessTokenizer.getNeString(";", true); System.out.println("NE caseless: " + neString); }/*from w w w .j av a 2 s . c o m*/ if (regularTokenizer.getNeList() != null && regularTokenizer.getNeList().size() != 0) { neString += ";" + regularTokenizer.getNeString(";", true); System.out.println("NE all: " + neString); } if (!"".equals(neString)) { fsa.add(DocFields.NAMED_ENTITIES); qsa.add(neString); } } for (Map.Entry<String, String> entry : queriesDictionary.entrySet()) { fsa.add(entry.getKey()); if (entry.getKey().equals(DocFields.CONTENTS) || entry.getKey().equals(DocFields.SYNONYMS)) { qsa.add(contentLemmas); } else { qsa.add(entry.getValue()); } } Query q = MultiFieldQueryParser.parse(qsa.toArray(new String[qsa.size()]), fsa.toArray(new String[fsa.size()]), analyzer); IndexSearcher searcher = new IndexSearcher(reader); TopDocs docs = searcher.search(q, this.numRetrievedDocs); ScoreDoc[] hits = docs.scoreDocs; List<ResultObject> resultObjects = new ArrayList<>(); String result = ""; for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; if (useQueryExpansion) { docsToExpand.add(docId); } Document d = searcher.doc(docId); resultObjects.add(new ResultObject(docId, i, d.get(DocFields.TITLE), d.get(DocFields.AUTHOR), d.get(DocFields.FILE_PATH), d.get(DocFields.SUMMARY), d.get(DocFields.FILE_NAME))); result = d.get(DocFields.SUMMARY); } if (useQueryExpansion) { reader.close(); this.reader = DirectoryReader.open(FSDirectory.open(new File(this.indexDir).toPath())); searcher = new IndexSearcher(reader); MoreLikeThis mlt = new MoreLikeThis(reader); mlt.setMinTermFreq(0); mlt.setMinDocFreq(0); mlt.setAnalyzer(analyzer); for (int i = 0; i < Math.min(docsToExpand.size(), 5); i++) { Reader r = new StringReader(resultObjects.get(i).getSummary()); Query expandedQuery = mlt.like(DocFields.CONTENTS, r); TopDocs topDocs = searcher.search(expandedQuery, 5); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { if (!docsToExpand.contains(scoreDoc.doc)) { docsToExpand.add(scoreDoc.doc); Document aSimilar = searcher.doc(scoreDoc.doc); resultObjects.add(new ResultObject(1, resultObjects.size(), aSimilar.get(DocFields.TITLE), aSimilar.get(DocFields.AUTHOR), aSimilar.get(DocFields.FILE_PATH), aSimilar.get(DocFields.SUMMARY), aSimilar.get(DocFields.FILE_NAME))); } else { } } } } return resultObjects; }
From source file:gate.creole.ir.lucene.LuceneSearch.java
License:Open Source License
/** Search in corpus with this query. * In each QueryResult will be added values of these fields. * Result length is limited. *///from w w w . j av a 2 s . c o m @Override public QueryResultList search(String query, int limit, List<String> fieldNames) throws IndexException, SearchException { List<QueryResult> result = new Vector<QueryResult>(); try { IndexSearcher searcher = new IndexSearcher(IndexReader .open(FSDirectory.open(new File(indexedCorpus.getIndexDefinition().getIndexLocation())), true)); QueryParser parser = new QueryParser(Version.LUCENE_30, "body", new SimpleAnalyzer(Version.LUCENE_30)); Query luceneQuery = parser.parse(query); // JP was for lucene 2.2 // Hits hits = searcher.search(luceneQuery); //int resultlength = hits.length(); //if (limit>-1) { // resultlength = Math.min(limit,resultlength); //} TopDocs topDocs = searcher.search(luceneQuery, limit); ScoreDoc[] hits = topDocs.scoreDocs; int resultlength = hits.length; Vector<Term> fieldValues = null; for (int i = 0; i < resultlength; i++) { if (fieldNames != null) { fieldValues = new Vector<Term>(); for (int j = 0; j < fieldNames.size(); j++) { fieldValues .add(new Term(fieldNames.get(j), searcher.doc(hits[i].doc).get(fieldNames.get(j)))); } } result.add(new QueryResult(searcher.doc(hits[i].doc).get(LuceneIndexManager.DOCUMENT_ID), hits[i].score, fieldValues)); } // for (all search hints) searcher.close(); return new QueryResultList(query, indexedCorpus, result); } catch (java.io.IOException ioe) { throw new IndexException(ioe.getMessage()); } catch (org.apache.lucene.queryParser.ParseException pe) { throw new SearchException(pe.getMessage()); } }
From source file:gogoph.search.Server.java
License:Open Source License
/** * @param args//www . ja va2 s . c om * @throws IOException * @throws ParseException */ public static void main(String[] args) throws IOException, ParseException { Directory index; index = new SimpleFSDirectory(new File(args[0])); String searchTerms = args[1]; StandardAnalyzer analyzer; // 0. Specify the analyzer for tokenizing text. // The same analyzer should be used for indexing and searching analyzer = new StandardAnalyzer(Version.LUCENE_35); QueryParser parser = new QueryParser(Version.LUCENE_35, "content", analyzer); Query query = parser.parse(searchTerms); // 3. search int hitsPerPage = 40; IndexReader reader = IndexReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; // 4. display results SearchResult[] tab = new SearchResult[hits.length]; //System.out.println("Found " + hits.length + " hits."); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); //System.out.println((i + 1) + ". " + d.get("title")); GopherDirectoryEntity gop = new GopherDirectoryEntity(); gop.setType(d.get("type")); gop.setUsername(d.get("title")); gop.setHost(d.get("host")); gop.setPort(Integer.parseInt(d.get("port"))); gop.setSelector(d.get("selector")); tab[i] = new SearchResult(gop.getUsername(), gop, hits[i].score); } // searcher can only be closed when there // is no need to access the documents any more. searcher.close(); reader.close(); ArrayList<GopherDirectoryEntity> tib; tib = new ArrayList<GopherDirectoryEntity>(); for (int i = 0; i < tab.length; i++) { SearchResult item = tab[i]; GopherDirectoryEntity node = item.getEntity(); node.setUsername("(Score: " + item.getScore() + ") " + item.getTitle()); GopherDirectoryEntity nodeComment = newComment("gopher://" + node.getHost() + ":" + node.getPort() + "/" + node.getType() + node.getSelector()); //GopherDirectoryEntity nodeComment2 = // GopherDirectoryEntity.newComment(node.getUserName()); tib.add(node); tib.add(nodeComment); //tab.add(nodeComment2); } index.close(); // Load index for (GopherDirectoryEntity item : tib) { System.out.print(item.getType() + item.getUsername() + "\t" + item.getSelector() + "\t" + item.getHost() + "\t" + item.getPort() + "\r\n"); } }
From source file:gov.ssa.test.lucenedemo.SearchFiles.java
/** * This demonstrates a typical paging search scenario, where the search * engine presents pages of size n to the user. The user can then go to the * next page if interested in the next hits. * * When the query is executed for the first time, then only enough results * are collected to fill 5 result pages. If the user wants to page beyond * this limit, then the query is executed another time and all hits are * collected./*from w w w . ja v a 2 s . com*/ * */ public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive) throws IOException { // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); if (path != null) { System.out.println((i + 1) + ". " + path); String title = doc.get("title"); if (title != null) { System.out.println(" Title: " + doc.get("title")); } } else { System.out.println((i + 1) + ". " + "No path for this document"); } } if (!interactive || end == 0) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) { break; } end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:hellolucene.HelloLucene.java
public static void main(String[] args) throws IOException, ParseException { // 0. Specify the analyzer for tokenizing text. // The same analyzer should be used for indexing and searching StandardAnalyzer analyzer = new StandardAnalyzer(Version.LATEST); // 1. create the index Directory index = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer); IndexWriter w = new IndexWriter(index, config); addDoc(w, "Lucene in Action", "193398817"); addDoc(w, "Lucene for Dummies", "55320055Z"); addDoc(w, "Managing Gigabytes", "55063554A"); addDoc(w, "The Art of Computer Science", "9900333X"); w.close();/*from w ww . j a va2 s.c om*/ // 2. query String querystr = args.length > 0 ? args[0] : "lucene"; // the "title" arg specifies the default field to use // when no field is explicitly specified in the query. Query q = new QueryParser(Version.LATEST, "title", analyzer).parse(querystr); // 3. search int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; // 4. display results System.out.println("Found " + hits.length + " hits."); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); System.out.println((i + 1) + ". " + d.get("isbn") + "\t" + d.get("title")); } // reader can only be closed when there // is no need to access the documents any more. reader.close(); }
From source file:HW1.SearchFiles.java
License:Apache License
public static void main(String[] args) throws Exception { String queryString = "dislike football"; String indexPath = "/Users/yangyang/Desktop/lucene/hw1/index/index04"; IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); searcher.setSimilarity(new BM25Similarity()); QueryParser parser = new QueryParser("TEXT", analyzer); Query query = parser.parse(queryString); System.out.println("Searching for: " + query.toString("TEXT")); TopDocs results = searcher.search(query, 10); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); for (int i = 0; i < hits.length; i++) { Document doc = searcher.doc(hits[i].doc); System.out.println("DOCNO: " + doc.get("DOCNO")); }/*from w w w. j av a2 s.c o m*/ reader.close(); }