List of usage examples for org.apache.lucene.search IndexSearcher search
public <C extends Collector, T> T search(Query query, CollectorManager<C, T> collectorManager) throws IOException
From source file:collene.TestShakespeare.java
License:Apache License
@Test public void rest() throws IOException, ParseException { File shakespeareDir = new File("src/test/resources/shakespeare"); File[] files = shakespeareDir.listFiles(new FileFilter() { @Override/*from www. j a v a2 s . c om*/ public boolean accept(File pathname) { return !pathname.isHidden(); } }); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(directory, config); long startIndexTime = System.currentTimeMillis(); final int flushLines = 200; int totalLines = 0; Collection<Document> documents = new ArrayList<Document>(); for (File f : files) { String play = f.getName(); int lineNumber = 1; BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f))); String line = reader.readLine(); while (line != null) { // index it. Document doc = new Document(); doc.add(new NumericDocValuesField("line", lineNumber)); doc.add(new Field("play", play, TextField.TYPE_STORED)); doc.add(new Field("content", line, TextField.TYPE_STORED)); documents.add(doc); totalLines += 1; if (totalLines % flushLines == 0) { writer.addDocuments(documents); documents.clear(); } lineNumber += 1; line = reader.readLine(); } reader.close(); } if (documents.size() > 0) { writer.addDocuments(documents); } long endIndexTime = System.currentTimeMillis(); System.out.println( String.format("Index for %s took %d ms", directory.toString(), endIndexTime - startIndexTime)); //System.out.println(String.format("%s committed", directory.getClass().getSimpleName())); // writer.forceMerge(1); // System.out.println(String.format("%s merged", directory.getClass().getSimpleName())); // let's search! IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false)); QueryParser parser = new QueryParser(Version.LUCENE_4_9, "content", analyzer); String[] queryTerms = new String[] { "trumpet" }; for (String term : queryTerms) { long searchStart = System.currentTimeMillis(); Query query = parser.parse(term); TopDocs docs = searcher.search(query, 10); long searchEnd = System.currentTimeMillis(); System.out.println(String.format("%s %d total hits in %d", directory.getClass().getSimpleName(), docs.totalHits, searchEnd - searchStart)); for (ScoreDoc doc : docs.scoreDocs) { System.out.println(String.format("%d %.2f %d", doc.doc, doc.score, doc.shardIndex)); } } writer.close(true); //System.out.println(String.format("%s closed", directory.getClass().getSimpleName())); System.out.println("I think these are the files:"); for (String s : directory.listAll()) { System.out.println(s); } directory.close(); }
From source file:com.adanac.module.blog.search.LuceneHelper.java
License:Apache License
private static List<Map<String, String>> search(String searchText, String path, String title, LoadQuery loadQuery) {// ww w . j a v a 2 s . c om try { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(INDEX_PATH + path))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new SmartChineseAnalyzer(); QueryParser parser = new QueryParser("indexedContent", analyzer); Query query = parser.parse(searchText); TopDocs resultDocs = searcher.search(query, 100); ScoreDoc[] scoreDocs = resultDocs.scoreDocs; // SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<span style=\"color:red;\">", "</span>"); Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(150)); List<Map<String, String>> result = new ArrayList<>(); List<Integer> idList = new ArrayList<>(); for (int i = 0; i < scoreDocs.length; i++) { Document doc = searcher.doc(scoreDocs[i].doc); Integer id = Integer.valueOf(doc.get("id")); if (!idList.contains(id)) { String indexedContent = doc.get("indexedContent"); TokenStream tokenStream = analyzer.tokenStream("indexedContent", indexedContent); Map<String, String> data = loadQuery.getById(id); String highlighterString = highlighter.getBestFragment(tokenStream, indexedContent); if (highlighterString.contains(SEPARATOR)) { String[] array = highlighterString.split(SEPARATOR); data.put(title, array[0]); if (array.length > 1) { data.put("summary", array[1]); } } else { data.put("summary", highlighterString); } result.add(data); idList.add(id); } } return result; } catch (Exception e) { logger.error("search failed ...", e); } return new ArrayList<>(); }
From source file:com.aliasi.lingmed.medline.SearchableMedlineCodec.java
License:Lingpipe license
public static void main(String[] args) throws Exception { org.apache.lucene.store.RAMDirectory directory = new org.apache.lucene.store.RAMDirectory(); // org.apache.lucene.analysis.SimpleAnalyzer analyzer // = new org.apache.lucene.analysis.SimpleAnalyzer(); // org.apache.lucene.analysis.KeywordAnalyzer analyzer // = new org.apache.lucene.analysis.KeywordAnalyzer(); MedlineCodec codec = new MedlineCodec(); Analyzer analyzer = codec.getAnalyzer(); org.apache.lucene.index.IndexWriterConfig iwConf = new org.apache.lucene.index.IndexWriterConfig( org.apache.lucene.util.Version.LUCENE_36, analyzer); iwConf.setOpenMode(org.apache.lucene.index.IndexWriterConfig.OpenMode.CREATE_OR_APPEND); org.apache.lucene.index.IndexWriter indexWriter = new org.apache.lucene.index.IndexWriter(directory, iwConf);//from w w w . j a v a 2 s .c om Document doc = new Document(); doc.add(new Field(Fields.MESH_MINOR_FIELD, "abc", Field.Store.NO, Field.Index.ANALYZED)); doc.add(new Field(Fields.MESH_MINOR_FIELD, " xyz efg", Field.Store.NO, Field.Index.ANALYZED)); indexWriter.addDocument(doc); indexWriter.close(); org.apache.lucene.index.IndexReader reader = org.apache.lucene.index.IndexReader.open(directory); org.apache.lucene.search.IndexSearcher searcher = new org.apache.lucene.search.IndexSearcher(reader); org.apache.lucene.queryParser.QueryParser qp = new org.apache.lucene.queryParser.QueryParser( org.apache.lucene.util.Version.LUCENE_36, "foo", analyzer); org.apache.lucene.search.Query query = qp.parse(Fields.MESH_MINOR_FIELD + ":efg"); org.apache.lucene.search.TopDocs hits = searcher.search(query, 1000); System.out.println("hits.length()=" + hits.scoreDocs.length); org.apache.lucene.analysis.TokenStream ts = analyzer.tokenStream(Fields.MESH_MINOR_FIELD, new java.io.StringReader("abc xyz efg")); org.apache.lucene.analysis.tokenattributes.CharTermAttribute terms = ts .addAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class); org.apache.lucene.analysis.tokenattributes.OffsetAttribute offsets = ts .addAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute.class); org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute positions = ts .addAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute.class); while (ts.incrementToken()) { int increment = positions.getPositionIncrement(); int start = offsets.startOffset(); int end = offsets.endOffset(); String term = terms.toString(); System.out.println("token=|" + term + "|" + " startOffset=" + start + " endOffset=" + end + " positionIncr=" + increment); } }
From source file:com.andreig.jetty.Search.java
License:GNU General Public License
public Document[] search(String dbid, String k, String v, int count) throws IOException, ParseException { Term t = new Term(k, v); Query q = new TermQuery(t); Query q2 = add_dbid(q, dbid); TopScoreDocCollector collector = TopScoreDocCollector.create(count, true); IndexSearcher searcher = sm.acquire(); Document docs[] = null;/*w w w .j av a2s .c o m*/ try { searcher.search(q2, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; if (hits.length == 0) return null; docs = new Document[hits.length]; for (int i = 0; i < hits.length; i++) { int doc_id = hits[i].doc; docs[i] = searcher.doc(doc_id); } } finally { sm.release(searcher); } return docs; }
From source file:com.andreig.jetty.Search.java
License:GNU General Public License
public Document[] search2(String dbid, String q, int count) throws IOException, ParseException { Query query = tl.get().parse(QueryParser.escape(q)); Query q2 = add_dbid(query, dbid); TopScoreDocCollector collector = TopScoreDocCollector.create(count, true); IndexSearcher searcher = sm.acquire(); Document docs[] = null;/*from www. ja v a 2 s .co m*/ try { searcher.search(q2, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; if (hits.length == 0) return null; docs = new Document[hits.length]; for (int i = 0; i < hits.length; i++) { int doc_id = hits[i].doc; docs[i] = searcher.doc(doc_id); } } finally { sm.release(searcher); } return docs; }
From source file:com.aperigeek.dropvault.web.service.IndexService.java
License:Open Source License
public List<String> search(String username, String password, String query) throws IndexException { try {/*from w ww . java 2 s. c o m*/ IndexSearcher searcher = getIndexSearcher(username, password); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_33); QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_33, new String[] { "title", "body" }, analyzer); Query luceneQuery = parser.parse(query); TopDocs docs = searcher.search(luceneQuery, 10); List<String> results = new ArrayList<String>(); for (ScoreDoc doc : docs.scoreDocs) { results.add(searcher.doc(doc.doc).getFieldable("id").stringValue()); } searcher.close(); return results; } catch (IOException ex) { throw new IndexException(ex); } catch (ParseException ex) { throw new IndexException("Invalid query syntax", ex); } }
From source file:com.aurel.track.lucene.search.associatedFields.AbstractAssociatedFieldSearcher.java
License:Open Source License
/** * Get the OR separated IDs which match the specified field's user entered string * @param analyzer/* www .j a v a 2 s. c o m*/ * @param fieldName * @param fieldValue * @param fieldID * @param locale * @return */ @Override protected String searchExplicitField(Analyzer analyzer, String fieldName, String fieldValue, Integer fieldID, Locale locale) { IndexSearcher indexSearcher = null; try { Query query = getAssociatedFieldQuery(analyzer, fieldValue); if (query == null) { return fieldValue; } indexSearcher = LuceneSearcher.getIndexSearcher(getIndexSearcherID()); if (indexSearcher == null) { return fieldValue; } ScoreDoc[] scoreDocs; try { TopDocsCollector<ScoreDoc> collector = TopScoreDocCollector.create(LuceneSearcher.MAXIMAL_HITS); indexSearcher.search(query, collector); scoreDocs = collector.topDocs().scoreDocs; } catch (IOException e) { LOGGER.warn("Searching the " + getLuceneFieldName() + " failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); return fieldValue; } if (scoreDocs == null || scoreDocs.length == 0) { return fieldValue; } if (scoreDocs.length > LuceneSearcher.MAX_BOOLEAN_CLAUSES) { LOGGER.warn("Maximum number of boolean clauses was exceeded"); } Set<Integer> workItemIDs = new HashSet<Integer>(); for (int i = 0; i < scoreDocs.length; i++) { int docID = scoreDocs[i].doc; Document doc; try { doc = indexSearcher.doc(docID); } catch (IOException e) { LOGGER.error("Getting the documents from index searcher for " + getLuceneFieldName() + " failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); return fieldValue; } String workItemFieldName = getWorkItemFieldName(); String workItemIDStr = doc.get(workItemFieldName); Integer workItemID = null; if (workItemIDStr != null) { try { workItemID = Integer.valueOf(workItemIDStr); workItemIDs.add(workItemID); } catch (Exception e) { LOGGER.debug(e); } } //by links there are two workitems for bidirectional links String additionalWorkItemFieldName = getAdditionalWorkItemFieldName(doc); if (additionalWorkItemFieldName != null) { workItemIDStr = doc.get(additionalWorkItemFieldName); workItemID = null; if (workItemIDStr != null) { try { workItemID = Integer.valueOf(workItemIDStr); workItemIDs.add(workItemID); } catch (Exception e) { } } } } return LuceneSearcher.createORDividedIDs(workItemIDs); } catch (Exception e) { LOGGER.warn("Getting the " + getLuceneFieldName() + " field " + fieldValue + " failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); return fieldValue; } finally { LuceneSearcher.closeIndexSearcherAndUnderlyingIndexReader(indexSearcher, getLuceneFieldName()); } }
From source file:com.aurel.track.lucene.search.listFields.AbstractListFieldSearcher.java
License:Open Source License
/** * Finds the list options which match the user entered string in link descriptions * @param analyzer// ww w . j av a2 s . c om * @param fieldName * @param label * @param fieldID * @param locale * @return */ @Override protected String searchExplicitField(Analyzer analyzer, String fieldName, String label, Integer fieldID, Locale locale) { IndexSearcher indexSearcher = null; try { Query query = getExplicitFieldQuery(analyzer, fieldName, label, fieldID, locale); if (query == null) { return label; } indexSearcher = LuceneSearcher.getIndexSearcher(getIndexSearcherID()); if (indexSearcher == null) { return label; } ScoreDoc[] scoreDocs; try { TopDocsCollector<ScoreDoc> collector = TopScoreDocCollector.create(LuceneSearcher.MAXIMAL_HITS); indexSearcher.search(query, collector); scoreDocs = collector.topDocs().scoreDocs; } catch (IOException e) { LOGGER.warn("Searching by fieldName " + fieldName + " and fieldValue " + label + " failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); return label; } if (scoreDocs == null || scoreDocs.length == 0) { return label; } if (scoreDocs.length > LuceneSearcher.MAX_BOOLEAN_CLAUSES) { LOGGER.warn("Maximum number of boolean clauses was exceeded"); } Set<Integer> listOptionIDs = new HashSet<Integer>(); for (int i = 0; i < scoreDocs.length; i++) { int docID = scoreDocs[i].doc; Document doc; try { doc = indexSearcher.doc(docID); } catch (IOException e) { LOGGER.error("Getting the documents from index searcher for fieldName " + fieldName + " and fieldValue " + label + " failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); return label; } String listOptionIDStr = doc.get(getValueFieldName()); Integer listOptionID = null; if (listOptionIDStr != null) { try { listOptionID = Integer.valueOf(listOptionIDStr); listOptionIDs.add(listOptionID); } catch (Exception e) { } } } return LuceneSearcher.createORDividedIDs(listOptionIDs); } catch (Exception e) { LOGGER.error("Getting the fieldName " + fieldName + " and fieldValue " + label + " failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); return label; } finally { LuceneSearcher.closeIndexSearcherAndUnderlyingIndexReader(indexSearcher, fieldName); } }
From source file:com.aurel.track.lucene.search.listFields.AbstractListFieldSearcher.java
License:Open Source License
/** * Get the workItemIDs which match the user entered string * @param analyzer/* w ww. j a va2s. com*/ * @param fieldValue * @param locale * @return */ @Override protected String searchNoExplicitField(Analyzer analyzer, String toBeProcessedString, Locale locale) { IndexSearcher indexSearcher = null; Map<Integer, Set<Integer>> result = new HashMap<Integer, Set<Integer>>(); try { Query lookupQuery = getNoExlplicitFieldQuery(analyzer, toBeProcessedString, locale); if (lookupQuery == null) { return ""; } indexSearcher = LuceneSearcher.getIndexSearcher(getIndexSearcherID()); if (indexSearcher == null) { return ""; } ScoreDoc[] scoreDocs; try { TopDocsCollector<ScoreDoc> collector = TopScoreDocCollector.create(LuceneSearcher.MAXIMAL_HITS); indexSearcher.search(lookupQuery, collector); scoreDocs = collector.topDocs().scoreDocs; } catch (IOException e) { return ""; } if (scoreDocs == null || scoreDocs.length == 0) { return ""; } if (scoreDocs.length > LuceneSearcher.MAX_BOOLEAN_CLAUSES) { LOGGER.warn("Maximum number of boolean clauses was exceeded by not localized lookup"); } Document doc; for (int i = 0; i < scoreDocs.length; i++) { int docID = scoreDocs[i].doc; try { doc = indexSearcher.doc(docID); } catch (IOException e) { LOGGER.error("Getting the documents from index searcher for fieldValue " + toBeProcessedString + " failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); return ""; } String typeStr = doc.get(getTypeFieldName()); String idStr = doc.get(getValueFieldName()); Integer type = null; Integer id = null; try { type = Integer.valueOf(typeStr); id = Integer.valueOf(idStr); Set<Integer> ids = result.get(type); if (ids == null) { ids = new HashSet<Integer>(); result.put(type, ids); } ids.add(id); } catch (NumberFormatException ex) { continue; } } } catch (Exception ex) { } finally { LuceneSearcher.closeIndexSearcherAndUnderlyingIndexReader(indexSearcher, "no field"); } Set<Integer> types = result.keySet(); StringBuffer directQuery = new StringBuffer(); for (Integer type : types) { Set<Integer> ids = result.get(type); String orDividedIDs = LuceneSearcher.createORDividedIDs(ids); String[] workItemFieldNames = getWorkItemFieldNames(type); for (int i = 0; i < workItemFieldNames.length; i++) { if (i > 0) { directQuery.append(" OR "); } if (ids.size() > 1) { directQuery.append(workItemFieldNames[i] + LuceneSearcher.FIELD_NAME_VALUE_SEPARATOR + "(" + orDividedIDs + ")"); } else { directQuery.append( workItemFieldNames[i] + LuceneSearcher.FIELD_NAME_VALUE_SEPARATOR + orDividedIDs); } } } return directQuery.toString(); }
From source file:com.aurel.track.lucene.search.LuceneSearcher.java
License:Open Source License
private static int[] getQueryResults(Query query, String userQueryString, String preprocessedQueryString, Map<Integer, String> highlightedTextMap) { int[] hitIDs = new int[0]; IndexSearcher indexSearcher = null; try {// w w w . j a va 2s .co m long start = 0; if (LOGGER.isDebugEnabled()) { start = new Date().getTime(); } indexSearcher = getIndexSearcher(LuceneUtil.INDEXES.WORKITEM_INDEX); if (indexSearcher == null) { return hitIDs; } ScoreDoc[] scoreDocs; try { TopDocsCollector<ScoreDoc> collector = TopScoreDocCollector.create(MAXIMAL_HITS); indexSearcher.search(query, collector); scoreDocs = collector.topDocs().scoreDocs; } catch (IOException e) { LOGGER.warn("Getting the workitem search results failed with failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); return hitIDs; } if (LOGGER.isDebugEnabled()) { long end = new Date().getTime(); LOGGER.debug("Found " + scoreDocs.length + " document(s) (in " + (end - start) + " milliseconds) that matched the user query '" + userQueryString + "' the preprocessed query '" + preprocessedQueryString + "' and the query.toString() '" + query.toString() + "'"); } QueryScorer queryScorer = new QueryScorer(query/*, LuceneUtil.HIGHLIGHTER_FIELD*/); Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer); Highlighter highlighter = new Highlighter(queryScorer); // Set the best scorer fragments highlighter.setTextFragmenter(fragmenter); // Set fragment to highlight hitIDs = new int[scoreDocs.length]; for (int i = 0; i < scoreDocs.length; i++) { int docID = scoreDocs[i].doc; Document doc = null; try { doc = indexSearcher.doc(docID); } catch (IOException e) { LOGGER.error("Getting the workitem documents failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); } if (doc != null) { Integer itemID = Integer.valueOf(doc.get(LuceneUtil.getFieldName(SystemFields.ISSUENO))); if (itemID != null) { hitIDs[i] = itemID.intValue(); if (highlightedTextMap != null) { String highligherFieldValue = doc.get(LuceneUtil.HIGHLIGHTER_FIELD); TokenStream tokenStream = null; try { tokenStream = TokenSources.getTokenStream(LuceneUtil.HIGHLIGHTER_FIELD, null, highligherFieldValue, LuceneUtil.getAnalyzer(), -1); } catch (Exception ex) { LOGGER.debug(ex.getMessage()); } if (tokenStream != null) { String fragment = highlighter.getBestFragment(tokenStream, highligherFieldValue); if (fragment != null) { highlightedTextMap.put(itemID, fragment); } } } } } } return hitIDs; } catch (BooleanQuery.TooManyClauses e) { LOGGER.error("Searching the query resulted in too many clauses. Try to narrow the query results. " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); throw e; } catch (Exception e) { LOGGER.error("Searching the workitems failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); return hitIDs; } finally { closeIndexSearcherAndUnderlyingIndexReader(indexSearcher, "workItem"); } }