List of usage examples for org.apache.lucene.search IndexSearcher doc
public Document doc(int docID) throws IOException
.getIndexReader().document(docID)
From source file:com.duroty.service.BookmarkOptimizerThread.java
License:Open Source License
/** * DOCUMENT ME!//from ww w . j ava 2s . com * * @param childs DOCUMENT ME! * * @throws Exception DOCUMENT ME! */ private void flush(File[] childs) throws Exception { if ((childs == null) || (childs.length == 0)) { return; } try { Thread.sleep(100); } catch (Exception e) { } File optimized = new File(optimizedPath); boolean create = false; IndexWriter writer = null; try { if (!IndexReader.indexExists(optimized)) { optimized.mkdirs(); create = true; } synchronized (this) { if (IndexReader.isLocked(optimizedPath)) { return; } else { Directory dir = FSDirectory.getDirectory(new File(optimizedPath), create); writer = new IndexWriter(dir, analyzer, create); } for (int i = 0; i < childs.length; i++) { boolean lock = true; File child = childs[i]; File[] faux = child.listFiles(); for (int j = 0; j < faux.length; j++) { if (faux[j].getName().equals("is.unlock")) { faux[j].delete(); lock = false; break; } } if (!lock) { Directory[] aux = new Directory[1]; aux[0] = FSDirectory.getDirectory(child, false); IndexSearcher searcher = null; try { searcher = new IndexSearcher(aux[0]); Document doc = searcher.doc(0); if (doc != null) { BookmarkIndexer.createSpell(userPath + SPELL, Field_title, doc); BookmarkIndexer.createSpell(userPath + SPELL, Field_keywords, doc); BookmarkIndexer.createSpell(userPath + SPELL, Field_contents, doc); BookmarkIndexer.createSpell(userPath + SPELL, Field_comments, doc); } } catch (Exception ex) { if ((ex != null) && !(ex instanceof NullPointerException)) { DLog.log(DLog.INFO, this.getClass(), ex); } } finally { if (searcher != null) { try { searcher.close(); } catch (Exception e) { } } } writer.addIndexes(aux); writer.optimize(); for (int j = 0; j < faux.length; j++) { faux[j].delete(); } child.delete(); } } writer.close(); writer = null; } } finally { if (writer != null) { try { writer.close(); } catch (Exception e) { } } } }
From source file:com.duroty.service.MailOptimizerThread.java
License:Open Source License
/** * DOCUMENT ME!//from www .j a v a 2 s.c o m * * @param childs DOCUMENT ME! * * @throws Exception DOCUMENT ME! */ private void flush(File[] childs) throws Exception { if ((childs == null) || (childs.length == 0)) { return; } try { Thread.sleep(500); } catch (Exception e) { } File optimized = new File(optimizedPath); boolean create = false; IndexWriter writer = null; try { if (!IndexReader.indexExists(optimized)) { optimized.mkdirs(); create = true; } synchronized (this) { if (IndexReader.isLocked(optimizedPath)) { return; } else { Directory dir = FSDirectory.getDirectory(new File(optimizedPath), create); writer = new IndexWriter(dir, analyzer, create); } for (int i = 0; i < childs.length; i++) { boolean lock = true; File child = childs[i]; File[] faux = child.listFiles(); for (int j = 0; j < faux.length; j++) { if (faux[j].getName().equals("is.unlock")) { faux[j].delete(); lock = false; break; } } if (!lock) { Directory[] aux = new Directory[1]; aux[0] = FSDirectory.getDirectory(child, false); IndexSearcher searcher = null; try { searcher = new IndexSearcher(aux[0]); Document doc = searcher.doc(0); if (doc != null) { MailIndexer.createSpell(userPath + SPELL, Field_from, doc); MailIndexer.createSpell(userPath + SPELL, Field_to, doc); MailIndexer.createSpell(userPath + SPELL, Field_subject, doc); MailIndexer.createSpell(userPath + SPELL, Field_body, doc); } } catch (Exception ex) { if ((ex != null) && !(ex instanceof NullPointerException)) { DLog.log(DLog.INFO, this.getClass(), ex); } } finally { if (searcher != null) { try { searcher.close(); } catch (Exception e) { } } } writer.addIndexes(aux); writer.optimize(); for (int j = 0; j < faux.length; j++) { faux[j].delete(); } child.delete(); } } writer.close(); writer = null; } } finally { if (writer != null) { try { writer.close(); } catch (Exception e) { } } } }
From source file:com.edgenius.wiki.search.service.AbstractSearchService.java
License:Open Source License
private int detach(IndexSearcher searcher, List<SearchResultItem> viewableMatchedResults, TopDocs hits, Query hlQuery, int from, int to, User user) throws IOException { Assert.isTrue(from <= to && from >= 0 && (to >= 0 || to == -1)); //For performance issue, we simply return total result set length without permission filter out. //This means is total length might be larger than the set that user can view, as some result will be filter out //if user doesn't have permission to see. int len = hits.totalHits; if (len > 0 && from < len) { to = to == -1 ? len : (len > to ? to : len); //security filter from return result List<Integer> resultIdx = new ArrayList<Integer>(); for (int idx = from; idx < to; idx++) { //does not include "to" , For example, from:to is 0:10, then return index is 0-9 //TODO: if page includes some result that invisible to user, it is better display message to tell user //some result is hidden for security reason. if (!isAllowView(searcher.doc(hits.scoreDocs[idx].doc), user)) continue; resultIdx.add(idx);//from w w w . j a v a 2s . c om } //create a highlighter for all fragment parser Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlighter\">", "</span>"); Highlighter hl = null; if (hlQuery != null) { Scorer scorer = new QueryScorer(hlQuery); hl = new Highlighter(formatter, scorer); Fragmenter fragmenter = new SimpleFragmenter(FRAGMENT_LEN); hl.setTextFragmenter(fragmenter); } for (int idx : resultIdx) { SearchResultItem item = new SearchResultItem(); Document doc = searcher.doc(hits.scoreDocs[idx].doc); String docType = doc.get(FieldName.DOC_TYPE); //common items in search results item.setType(NumberUtils.toInt(docType)); item.setDatetime(doc.get(FieldName.UPDATE_DATE)); if (userReadingService != null && !new Integer(SharedConstants.SEARCH_USER).toString().equals(docType)) { String username = doc.get(FieldName.CONTRIBUTOR); User contirUser = userReadingService.getUserByName(username); if (contirUser != null) { item.setContributor(contirUser.getFullname()); item.setContributorUsername(username); } } if (Integer.valueOf(SharedConstants.SEARCH_PAGE).toString().equals(docType)) { String content = doc.get(FieldName.PAGE_CONTENT); item.setTitle(doc.get(FieldName.PAGE_TITLE)); item.setSpaceUname(doc.get(FieldName.UNSEARCH_SPACE_UNIXNAME)); //does set item.desc() as content, which maybe very big string. no necessary returned item.setFragment(createFragment(hl, StringUtil.join(" ", item.getTitle(), content))); } else if (Integer.valueOf(SharedConstants.SEARCH_COMMENT).toString().equals(docType)) { String content = doc.get(FieldName.CONTENT); item.setItemUid(doc.get(FieldName.COMMENT_UID)); item.setSpaceUname(doc.get(FieldName.UNSEARCH_SPACE_UNIXNAME)); item.setTitle(doc.get(FieldName.UNSEARCH_PAGE_TITLE)); //does set item.desc() as content, which maybe very big string. no necessary returned item.setFragment(createFragment(hl, content)); } else if (Integer.valueOf(SharedConstants.SEARCH_SPACE).toString().equals(docType)) { String title = doc.get(FieldName.SPACE_NAME); item.setTitle(title); item.setSpaceUname(doc.get(FieldName.SPACE_UNIXNAME)); item.setDesc(doc.get(FieldName.SPACE_DESC)); item.setFragment(createFragment(hl, StringUtil.join(" ", item.getTitle(), item.getDesc()))); } else if (Integer.valueOf(SharedConstants.SEARCH_WIDGET).toString().equals(docType)) { //wTitle-> title; wDesc-> desc; wTitle(could be pageTitle or markup title) ->spaceUname String widgetType = doc.get(FieldName.WIDGET_TYPE); String title = doc.get(FieldName.WIDGET_TITLE); //does content need transfer back?? so far no String content = doc.get(FieldName.WIDGET_CONTENT); if (WidgetModel.TYPE_PAGE_LINKER.equals(widgetType)) { //don't use as Highlighter fragment content = ""; } String desc = doc.get(FieldName.WIDGET_DESC); item.setDesc(desc); item.setTitle(title); //add little confuse field mapping :( item.setSpaceUname(doc.get(FieldName.WIDGET_KEY)); item.setItemUid(widgetType); item.setFragment(createFragment(hl, StringUtil.join(" ", item.getDesc(), content))); } else if (Integer.valueOf(SharedConstants.SEARCH_PAGE_TAG).toString().equals(docType)) { //page tag item.setTitle(doc.get(FieldName.PAGE_TAG_NAME)); item.setSpaceUname(doc.get(FieldName.UNSEARCH_SPACE_UNIXNAME)); item.setFragment(createFragment(hl, item.getTitle())); } else if (Integer.valueOf(SharedConstants.SEARCH_SPACE_TAG).toString().equals(docType)) { //space tag item.setTitle(doc.get(FieldName.SPACE_TAG_NAME)); item.setFragment(createFragment(hl, item.getTitle())); } else if (Integer.valueOf(SharedConstants.SEARCH_USER).toString().equals(docType)) { String username = doc.get(FieldName.USER_NAME); item.setTitle(username); String fullname = doc.get(FieldName.USER_FULLNAME); //hacker - contributor is current user fullname item.setContributor(fullname); if (userReadingService != null) item.setDesc(userReadingService.getUserByName(username).getSetting().getStatus()); item.setFragment(createFragment(hl, fullname)); } else if (Integer.valueOf(SharedConstants.SEARCH_ROLE).toString().equals(docType)) { item.setSpaceUname(doc.get(FieldName.ROLE_NAME)); item.setTitle(doc.get(FieldName.ROLE_DISPLAY_NAME)); item.setDesc(doc.get(FieldName.ROLE_DESC)); //item.setFragment(""); } else if (Integer.valueOf(SharedConstants.SEARCH_ATTACHMENT).toString().equals(docType)) { item.setTitle(doc.get(FieldName.FILE_NAME)); item.setDesc(doc.get(FieldName.FILE_COMMENT)); item.setItemUid(doc.get(FieldName.FILE_NODE_UUID)); item.setSpaceUname(doc.get(FieldName.UNSEARCH_SPACE_UNIXNAME)); String text = doc.get(FieldName.TEXT); //does not mark file content fragment, because it does not store in index String fragment = createFragment(hl, StringUtil.join(" ", item.getDesc(), text)); item.setFragment( (fragment == null || fragment.trim().length() == 0) ? ("Comment: " + item.getDesc()) : fragment); } viewableMatchedResults.add(item); } } return len; }
From source file:com.edgenius.wiki.search.service.AttachmentSearchServiceImpl.java
License:Open Source License
public Document searchByNodeUuid(final String nodeUuid) throws SearchException { return (Document) this.search(new SearcherCallback() { public Object doWithSearcher(IndexSearcher searcher) throws SearchException { try { Term identifierTerm = new Term(FieldName.KEY, nodeUuid.toLowerCase()); TermQuery query = new TermQuery(identifierTerm); TopDocs hits = searcher.search(query, LuceneConfig.MAX_RETURN); Document doc = null; if (hits.totalHits > 0) { //assume only one doc = searcher.doc(hits.scoreDocs[0].doc); }/*from ww w . j a v a2s.c o m*/ return doc; } catch (Exception e) { throw new SearchException(e); } } }); }
From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java
License:Open Source License
private FeatureIndexEntry createIndexEntry(ScoreDoc hit, Map<Long, BookmarkIndexEntry> foundBookmarkEntries, IndexSearcher searcher, List<String> vcfInfoFields) throws IOException { int docId = hit.doc; Document d = searcher.doc(docId); FeatureType featureType = FeatureType.forValue(d.get(FeatureIndexFields.FEATURE_TYPE.getFieldName())); FeatureIndexEntry entry;/* w w w .ja va2s . c om*/ switch (featureType) { case VARIATION: entry = createVcfIndexEntry(d, vcfInfoFields); break; case BOOKMARK: BookmarkIndexEntry bookmarkEntry = new BookmarkIndexEntry(); foundBookmarkEntries.put(Long.parseLong(d.get(FeatureIndexFields.FILE_ID.getFieldName())), bookmarkEntry); entry = bookmarkEntry; break; default: entry = new FeatureIndexEntry(); } entry.setFeatureType(featureType); BytesRef featureIdBytes = d.getBinaryValue(FeatureIndexFields.FEATURE_ID.getFieldName()); if (featureIdBytes != null) { entry.setFeatureId(featureIdBytes.utf8ToString()); } entry.setStartIndex(d.getField(FeatureIndexFields.START_INDEX.getFieldName()).numericValue().intValue()); entry.setEndIndex(d.getField(FeatureIndexFields.END_INDEX.getFieldName()).numericValue().intValue()); entry.setFeatureFileId(Long.parseLong(d.get(FeatureIndexFields.FILE_ID.getFieldName()))); entry.setFeatureName(d.get(FeatureIndexFields.FEATURE_NAME.getFieldName())); String chromosomeId = d.getBinaryValue(FeatureIndexFields.CHROMOSOME_ID.getFieldName()).utf8ToString(); if (!chromosomeId.isEmpty()) { entry.setChromosome(new Chromosome(Long.parseLong(chromosomeId))); entry.getChromosome() .setName(d.getBinaryValue(FeatureIndexFields.CHROMOSOME_NAME.getFieldName()).utf8ToString()); } return entry; }
From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java
License:Open Source License
private Set<String> fetchGeneIds(final ScoreDoc[] hits, IndexSearcher searcher) throws IOException { Set<String> geneIds = new HashSet<>(); for (ScoreDoc hit : hits) { int docId = hit.doc; Document d = searcher.doc(docId); String geneId = d.get(FeatureIndexFields.GENE_ID.getFieldName()); String geneName = d.get(FeatureIndexFields.GENE_NAME.getFieldName()); if (geneId != null) { geneIds.add(geneId);//from ww w .j a v a2s. c o m } if (geneName != null) { geneIds.add(geneName); } } return geneIds; }
From source file:com.evoapps.lucene.SearchFiles.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search engine presents * pages of size n to the user. The user can then go to the next page if interested in * the next hits.//from ww w . ja v a2 s . c o m * * When the query is executed for the first time, then only enough results are collected * to fill 5 result pages. If the user wants to page beyond this limit, then the query * is executed another time and all hits are collected. * */ public ArrayList<Publication> doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive) throws IOException { // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + "total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); String content = doc.get("contents"); if (path != null) { // System.out.println((i+1) + ". " + path+">>"+content); list.add(new Publication(content, path)); String title = doc.get("title"); if (title != null) { System.out.println(" Title: " + doc.get("title")); } } else { System.out.println((i + 1) + ". " + "No path for this document"); } } if (!interactive || end == 0) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) break; end = Math.min(numTotalHits, start + hitsPerPage); } } return list; }
From source file:com.example.search.SearchFiles.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search * engine presents pages of size n to the user. The user can then go to the * next page if interested in the next hits. * /*from ww w. ja v a 2s. co m*/ * When the query is executed for the first time, then only enough results * are collected to fill 5 result pages. If the user wants to page beyond * this limit, then the query is executed another time and all hits are * collected. * * @throws InvalidTokenOffsetsException * */ public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive, ArrayList<SearchResult> result, int startPage) throws IOException, InvalidTokenOffsetsException { startPage = Math.max(0, startPage); // Collect enough docs to show 5 pages 10 // System.out.println("need " + startPage); TopDocs results = searcher.search(query, startPage + hitsPerPage);// 5 * // hitsPerPage ScoreDoc[] hits = results.scoreDocs; if (startPage > hits.length) return; int numTotalHits = results.totalHits; // System.out.println("judge " + hits.length + " " + numTotalHits); System.out.println(numTotalHits + " total matching documents"); int start = startPage; // int end = Math.min(numTotalHits, hitsPerPage); /* * if (end > hits.length) { System.out .println("Only results 1 - " + * hits.length + " of " + numTotalHits + * " total matching documents collected."); * System.out.println("Collect more (y/n) ?"); String line = * in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { * break; } * * hits = searcher.search(query, numTotalHits).scoreDocs; } */ int end = Math.min(hits.length, start + hitsPerPage); // ICTCLASAnalyzer analyzer = new ICTCLASAnalyzer(); for (int i = start; i < end; i++) { if (raw) { // output raw format // System.out.println("doc="+hits[i].doc+" score="+hits[i].score); System.out.println(" score=" + hits[i].score); // continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("url"); if (path != null) { // System.out.println((i + 1) + ". " + path); String title = doc.get("title");// // if (title != null) { // System.out.println(" Title: " + title); String content = doc.get("content"); // if (content != null) // System.out.println("Content: " + content); SearchResult item = new SearchResult(); item.title = i + " " + title; item.url = path; item.score = hits[i].score; /*SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter( "<font color='red'>", "</font>"); Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(100));*/ // if (content != null) { /* TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(content)); String highLightText = highlighter.getBestFragment( tokenStream, content); System.out.println(" " + (i + 1) + " "); System.out.println(highLightText); item.content=highLightText;*/ if (content.length() > 403) content = content.substring(0, 399) + "..."; item.content = content; result.add(item); //} } else { System.out.println((i + 1) + ". " + "No path for this document"); } } // end for // analyzer.close(); // if (numTotalHits >= end) { // boolean quit = false; // while (true) { // System.out.print("Press "); // if (start - hitsPerPage >= 0) { // System.out.print("(p)revious page, "); SearchResult.hasPrePage = (start - hitsPerPage >= 0); // } // if (start + hitsPerPage < numTotalHits) { // System.out.print("(n)ext page, "); // } SearchResult.hasNextPage = (start + hitsPerPage < numTotalHits); System.out.println("hasNextPage" + SearchResult.hasNextPage); // System.out // .println("(q)uit or enter number to jump to a page."); // String line = in.readLine(); /* * String line=new String(); if (line.length() == 0 || line.charAt(0) == * 'q') { quit = true; break; } */ /* * if (line.charAt(0) == 'p') { start = Math.max(0, start - * hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + * hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { * int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < * numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { * System.out.println("No such page"); } } // } if (quit) break; */ // end = Math.min(numTotalHits, start + hitsPerPage); }
From source file:com.flaptor.hounder.indexer.IndexManager.java
License:Apache License
/** * Performs the deletes and remove duplicates from the index. *//* w w w . j a v a 2 s . c o m*/ private synchronized void applyDeletes() { IndexReader reader = null; IndexSearcher searcher = null; try { reader = IndexReader.open(indexDirectory); Set<Integer> documentsToDelete = new HashSet<Integer>(); Enumeration keysEnum = lastOperation.keys(); //First we collect the lucene ids of document to be deleted. while (keysEnum.hasMoreElements()) { searcher = new IndexSearcher(reader); String key = (String) keysEnum.nextElement(); // if the last operation is a delete lastAddition will be 0 and we'll find no match in the index. //This way, all the documents with that DocumentId will be erased. String lastAddition = String.valueOf((Long) (lastOperation.get(key))); if (logger.isEnabledFor(Level.DEBUG)) { logger.debug("Applying deletes: searching " + docIdName + " = [" + key + "]"); } ScorelessHitCollector collector = new HashSetScorelessHitCollector(); searcher.search(new TermQuery(new Term(docIdName, key)), collector); Set<Integer> docIds = collector.getMatchingDocuments(); if (logger.isEnabledFor(Level.DEBUG)) { logger.debug("Applying deletes: found matches: " + docIds.size()); } for (Integer docId : docIds) { Document d = searcher.doc(docId); String addId = d.get("AddId"); if (!lastAddition.equals(addId)) { if (logger.isEnabledFor(Level.DEBUG)) { logger.debug("Applying deletes: deleting AddId:" + addId); } documentsToDelete.add(docId); } } } //Now we have all lucene's ids of documents to be deleted and we can //proceed with the actual deletion. for (Integer i : documentsToDelete) { reader.deleteDocument(i); } } catch (IOException e) { logger.fatal("applyDeletes: IOException caught.", e); throw new RuntimeException(e); } finally { if (searcher != null) { try { searcher.close(); } catch (Exception e) { String s = "applyDeletes: Couldn't close searcher, nothing I can do about it" + e; logger.error(s); throw new IllegalStateException(s); } } if (reader != null) { try { reader.close(); } catch (Exception e) { logger.warn("Couldn't close reader, nothing I can do about it", e); } } } lastOperation.clear(); }
From source file:com.flaptor.hounder.indexer.LuceneUnicodeTest.java
License:Apache License
@TestInfo(testType = TestInfo.TestType.UNIT) public void testIndexedContent() { try {/*from w ww. j a va 2 s . com*/ String testString = "otorrinolaring\u00f3logo"; logger.debug("Using test string: " + testString); Document doc = new Document(); doc.add(new Field("field1", testString, Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); writer.optimize(); writer.close(); IndexReader reader = IndexReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); int docId = searcher.search(new TermQuery(new Term("field1", testString)), null, 10).scoreDocs[0].doc; Document doc2 = searcher.doc(docId); String recoveredString = doc2.get("field1"); logger.debug("Recovered String: " + recoveredString); assertTrue("Strings do not match", testString.equals(recoveredString)); } catch (Exception e) { logger.error("Exception caught:" + e); assertTrue("exception", false); } }