List of usage examples for org.apache.lucene.search IndexSearcher getIndexReader
public IndexReader getIndexReader()
From source file:perf.TestBenchNRTPKLookup.java
License:Apache License
public static void main(String[] args) throws IOException { Directory dir = new MMapDirectory(new File(args[0])); //Directory dir = new NIOFSDirectory(new File(args[0])); IndexWriterConfig iwc = new IndexWriterConfig(new StandardAnalyzer()); iwc.setRAMBufferSizeMB(250);/*ww w .j av a 2 s .c o m*/ IndexWriter writer = new IndexWriter(dir, iwc); final SearcherManager manager = new SearcherManager(writer, true, new SearcherFactory() { @Override public IndexSearcher newSearcher(IndexReader r) { return new IndexSearcher(r); } }); FieldType type = new FieldType(); type.setIndexed(true); type.setTokenized(false); type.setStored(false); type.freeze(); HashMap<Object, TermsEnum> cachedTermsEnum = new HashMap<Object, TermsEnum>(); long time = System.currentTimeMillis(); long lastTime = time; int num = 2500000; Random r = new Random(16); for (int i = 0; i < num; i++) { //Term t = new Term("_id", Integer.toString(i)); String id = String.format("%010d", r.nextInt(Integer.MAX_VALUE)); Term t = new Term("_id", id); IndexSearcher acquire = manager.acquire(); try { IndexReader indexReader = acquire.getIndexReader(); List<AtomicReaderContext> leaves = indexReader.leaves(); for (AtomicReaderContext atomicReaderContext : leaves) { AtomicReader reader = atomicReaderContext.reader(); TermsEnum termsEnum = cachedTermsEnum.get(reader.getCombinedCoreAndDeletesKey()); if (termsEnum == null) { termsEnum = reader.fields().terms("_id").iterator(null); //cachedTermsEnum.put(reader.getCombinedCoreAndDeletesKey(), termsEnum); // uncomment this line to see improvements } // MKM //System.out.println("\nlookup seg=: " + reader + " term=" + t); if (termsEnum.seekExact(t.bytes())) { DocsEnum termDocsEnum = termsEnum.docs(reader.getLiveDocs(), null); if (termDocsEnum != null) { break; } } } } finally { manager.release(acquire); } Document d = new Document(); d.add(new Field("_id", id, type)); writer.updateDocument(t, d); //writer.addDocument(d); if (i % 50000 == 0) { long t1 = System.currentTimeMillis(); System.out.println(i + " " + (t1 - lastTime) + " ms"); lastTime = t1; } if ((i + 1) % 250000 == 0) { System.out.println("Reopen..."); manager.maybeRefresh(); IndexSearcher s = manager.acquire(); try { System.out.println(" got: " + s); } finally { manager.release(s); } } } System.out.println("\nTotal: " + (System.currentTimeMillis() - time) + " msec"); //System.out.println("loadBlockCount: " + BlockTreeTermsReader.loadBlockCount); manager.close(); writer.close(); dir.close(); }
From source file:perLucene.Server.java
License:Open Source License
/** * Use at the beginning to obtain the last id that this replica has indexed */// www. j av a 2s . co m protected static long getId() { // not necessary if used at the beginning try { sm.maybeRefreshBlocking(); IndexSearcher s = sm.acquire(); List<AtomicReaderContext> leaves = s.getIndexReader().leaves(); int size = leaves.size(); AtomicReader r = leaves.get(size - 1).reader(); int lastDoc = r.maxDoc() - 1; long id = r.docValues("uid").getDirectSource().getInt(lastDoc); sm.release(s); return id; } catch (Exception e) { System.out.println("Couldnt get last Id(refresh)"); System.exit(-1); } // should never happen return -1; }
From source file:persistence.lucene.search.QueryExecutorHelper.java
License:Open Source License
List<QueryResult> queryDocuments(final String query, final String[] fields, final String user) throws ParseException, IOException, InvalidTokenOffsetsException { int hitsPerPage = 10; MultiFieldQueryParser queryParser = new MultiFieldQueryParser(fields, new StandardAnalyzer()); Query luceneQuery = queryParser.parse(query); Highlighter highlighter = new Highlighter(new QueryScorer(luceneQuery)); DirectoryReader indexReader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(indexReader); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); TermsFilter filter = new TermsFilter(new Term(FullTextSearchResource.DOCUMENT_OWNER, user.toLowerCase())); searcher.search(luceneQuery, filter, collector); ScoreDoc[] docs = collector.topDocs().scoreDocs; List<QueryResult> resultBeans = new ArrayList<>(docs.length); for (ScoreDoc doc : docs) { Document document = searcher.doc(doc.doc); String text = document.get(FullTextSearchResource.DOCUMENT_CONTENT_FIELD); TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), doc.doc, FullTextSearchResource.DOCUMENT_CONTENT_FIELD, standardAnalyzer); TextFragment[] fragments = highlighter.getBestTextFragments(tokenStream, text, false, 10); resultBeans.add(new QueryResult(doc.doc, doc.score, document, fragments)); }//from w w w . j av a2s .c o m indexReader.close(); return resultBeans; }
From source file:psidev.psi.mi.search.index.PsimiTabIndexWriterTest.java
License:Apache License
@Test public void testIndex() throws Exception { Directory indexDirectory = TestHelper.createIndexFromResource("/mitab_samples/intact.sample.tsv"); String matchedLine = "uniprotkb:P47077\tuniprotkb:P40069\t-\tgene name:KAP123\tlocus name:YJL010C|orf name:J1357\tgene name synonym:YRB4|locus name:YER110C\tpsi-mi:\"MI:0096\"(pull down)\t-\tpubmed:14690591\ttaxid:4932(yeast)\ttaxid:4932(yeast)\tpsi-mi:\"MI:0218\"(physical interaction)\tpsi-mi:\"MI:0469\"(intact)\tintact:EBI-854045\t-"; String[] valuesExpectedForLine = matchedLine.split("\t"); ColumnBasedDocumentDefinition docDefinition = MitabDocumentDefinitionFactory.mitab25(); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30); IndexReader reader = IndexReader.open(indexDirectory); IndexSearcher is = new IndexSearcher(reader); QueryParser parser = new QueryParser(Version.LUCENE_30, "id", analyzer); Query query = parser.parse("P47077"); TopDocs hits = is.search(query, 20); assertEquals(1, hits.totalHits);/* w w w . j ava 2s . co m*/ ScoreDoc[] docs = hits.scoreDocs; for (ScoreDoc hit : docs) { Document doc = is.getIndexReader().document(hit.doc); for (int i = 0; i < docDefinition.getHighestColumnPosition(); i++) { ColumnDefinition colDef = docDefinition.getColumnByPosition(i); if (colDef.getKey().equals("detmethod") || colDef.getKey().equals("type")) { Assert.assertEquals(valuesExpectedForLine[i], doc.get(colDef.getKey() + "_exact")); } else { Assert.assertEquals(valuesExpectedForLine[i], doc.get(colDef.getKey())); } } } }
From source file:retriever.QuantizedVecSearcher.java
List<DocVector> rerankByEuclideanDist(DocVector queryVec, IndexSearcher searcher, TopDocs topDocs) throws Exception { IndexReader reader = searcher.getIndexReader(); List<DocVector> nnList = new ArrayList<>(); int rank = 1; for (ScoreDoc sd : topDocs.scoreDocs) { Document d = reader.document(sd.doc); DocVector dvec = new DocVector(d, numDimensions, numIntervals); float dist = queryVec.getDist(dvec); dvec.setDistWithQry(dist);//from w w w.j a v a 2 s .c om //System.out.println("Doc " + sd.doc + " with distance " + dist + " retrieved at rank: " + rank + " (Sim = " + sd.score + ")"); nnList.add(dvec); rank++; } Collections.sort(nnList); return nnList; }
From source file:ro.uaic.info.nlptools.corpus.IndexedLuceneCorpus.java
License:Apache License
private static void UpdateInterIndexReferences(File indexFolder, IndexSearcher tempAnnotationSearcher, IndexSearcher tempTokenSearcher, IndexSearcher tempSentenceSearcher, Analyzer analyzer) throws IOException { List<Integer> annotations; IndexWriter annotationWriter = new IndexWriter( FSDirectory.open(Paths.get(indexFolder.toString(), "annotations")), new IndexWriterConfig(analyzer)); for (int i = 0; i < tempAnnotationSearcher.getIndexReader().numDocs(); i++) { Document doc = tempAnnotationSearcher.doc(i); Document newDoc = new Document(); for (IndexableField f : doc.getFields()) { if (f.name().equals("GGS:StartTokenRefId")) newDoc.add(new IntField("GGS:StartTokenIndex", tempTokenSearcher .search(new TermQuery(new Term("GGS:RefId", f.stringValue())), 1).scoreDocs[0].doc, Field.Store.YES)); else if (f.name().equals("GGS:EndTokenRefId")) newDoc.add(new IntField("GGS:EndTokenIndex", tempTokenSearcher .search(new TermQuery(new Term("GGS:RefId", f.stringValue())), 1).scoreDocs[0].doc, Field.Store.YES)); else/*from ww w .ja va2s .c o m*/ newDoc.add(f); } annotationWriter.addDocument(newDoc); } annotationWriter.close(); tempAnnotationSearcher = new IndexSearcher(DirectoryReader.open(annotationWriter.getDirectory())); Map<Integer, List<Integer>> toksAnnotations = new HashMap<>(); Map<Integer, List<Integer>> sentsAnnotations = new HashMap<>(); for (int i = 0; i < tempAnnotationSearcher.getIndexReader().numDocs(); i++) { Document doc = tempAnnotationSearcher.doc(i); int start = doc.getField("GGS:StartTokenIndex").numericValue().intValue(); int end = doc.getField("GGS:EndTokenIndex").numericValue().intValue(); for (int j = start; j <= end; j++) { annotations = toksAnnotations.get(j); if (annotations == null) { annotations = new ArrayList<>(); toksAnnotations.put(j, annotations); } annotations.add(i); } int sentIndex = tempTokenSearcher.doc(start).getField("GGS:Sentence").numericValue().intValue(); annotations = sentsAnnotations.get(sentIndex); if (annotations == null) { annotations = new ArrayList<>(); sentsAnnotations.put(sentIndex, annotations); } annotations.add(i); } IndexWriter tokenWriter = new IndexWriter(FSDirectory.open(Paths.get(indexFolder.toString(), "tokens")), new IndexWriterConfig(analyzer)); for (int i = 0; i < tempTokenSearcher.getIndexReader().numDocs(); i++) { Document doc = tempTokenSearcher.doc(i); Document newDoc = new Document(); for (IndexableField f : doc.getFields()) { newDoc.add(f); } annotations = toksAnnotations.get(i); if (annotations != null) { for (int k : annotations) newDoc.add(new IntField("GGS:SpanAnnotation", k, Field.Store.YES)); } tokenWriter.addDocument(newDoc); } tokenWriter.close(); IndexWriter sentenceWriter = new IndexWriter( FSDirectory.open(Paths.get(indexFolder.toString(), "sentences")), new IndexWriterConfig(analyzer)); for (int i = 0; i < tempSentenceSearcher.getIndexReader().numDocs(); i++) { Document doc = tempSentenceSearcher.doc(i); Document newDoc = new Document(); for (IndexableField f : doc.getFields()) { newDoc.add(f); } annotations = sentsAnnotations.get(i); if (annotations != null) { for (int k : annotations) newDoc.add(new IntField("GGS:SpanAnnotation", k, Field.Store.YES)); } sentenceWriter.addDocument(newDoc); } sentenceWriter.close(); tempTokenSearcher.getIndexReader().close(); tempAnnotationSearcher.getIndexReader().close(); tempSentenceSearcher.getIndexReader().close(); }
From source file:servlet.Checkcopy.java
/** * Processes requests for both HTTP <code>GET</code> and <code>POST</code> * methods.// ww w. ja va 2 s. c o m * * @param request servlet request * @param response servlet response * @throws ServletException if a servlet-specific error occurs * @throws IOException if an I/O error occurs */ protected void processRequest(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { HttpSession ss = request.getSession(); Assignment a = (Assignment) ss.getAttribute("curAm"); int safv_id = Integer.parseInt(request.getParameter("safv_id")); String studentAmPath = getServletContext().getRealPath("/") + "/file/student_assignment_file/"; if (a.getAss_type().equalsIgnoreCase("file")) { StAssignmentFile sa = (StAssignmentFile) ss.getAttribute("sa"); StAmFileList f = StAmFileList.getSafvByListIdSafv(safv_id, sa.getList_id()); String filename = f.getPath_file(); String fileExtension = filename.substring(filename.lastIndexOf(".") + 1); String keyword = ""; if (fileExtension.equalsIgnoreCase("docx")) { keyword = DocumentFunction.readDocxFile(studentAmPath + filename); } else if (fileExtension.equalsIgnoreCase("doc")) { keyword = DocumentFunction.readDocFile(studentAmPath + filename); } else if (fileExtension.equalsIgnoreCase("xls")) { keyword = DocumentFunction.readXlsFile(studentAmPath + filename); } else if (fileExtension.equalsIgnoreCase("xlsx")) { keyword = DocumentFunction.readXlsxFile(studentAmPath + filename); } else if (fileExtension.equalsIgnoreCase("pdf")) { keyword = DocumentFunction.readPdfFile(studentAmPath + filename); } if (!keyword.equals("")) { System.out.println("----------------------search..."); Directory directory = null; IndexReader indexReader; ArrayList<String[]> indexsetList = null; try { directory = FSDirectory.open( new File(studentAmPath + "//" + a.getCourse().getCourse_id() + "//" + sa.getAm_id())); indexReader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(indexReader); BooleanQuery.setMaxClauseCount(20000); QueryParser parser = new QueryParser(Version.LUCENE_47, "student_assignment", new ThaiAnalyzer(Version.LUCENE_47)); Query query = parser.parse(QueryParser.escape(keyword)); int hitsPerPage = 10; Sort sort = new Sort(new SortField[] { SortField.FIELD_SCORE, new SortField("student_assignment", SortField.Type.STRING) }); TopFieldCollector topField = TopFieldCollector.create(sort, hitsPerPage, true, true, true, false); searcher.search(query, topField); TopDocs docs = topField.topDocs(); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("<font color=red>", "</font>"); Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); indexsetList = new ArrayList<>(); for (int i = 0; i < docs.totalHits; i++) { String[] indexset = new String[5]; int id = docs.scoreDocs[i].doc; float score = docs.scoreDocs[i].score; Document doc = searcher.doc(id); String text = doc.get("student_assignment"); String st_am_id = doc.get("st_am_id"); String owner_safv_id = doc.get("safv_id"); // System.out.println(text); // System.out.println(st_am_id); // System.out.println(owner_safv_id); // System.out.println("-----------"); TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "student_assignment", new ThaiAnalyzer(Version.LUCENE_47)); String[] hltextArr = highlighter.getBestFragments(tokenStream, text, hitsPerPage); String hltext = ""; for (String string : hltextArr) { hltext += string.toString() + "<br/>"; } indexset[0] = st_am_id; indexset[1] = hltext; //getting owner of StAmFileList file = StAmFileList.getSafvBySafv(Integer.parseInt(owner_safv_id)); if (file != null) { System.out.println((a.getAm_id() + " /" + file.getList_id())); StAssignmentFile stam = StAssignmentFile.getStAmBbyAmIDAndList(a.getAm_id(), file.getList_id()); String html = ""; //add ??? boolean add = true; if (stam.getG_id() == 0) { //if no group that mean it's a individual work if (sa.getAcc_id() != stam.getAcc_id()) { Account owneracc = Account.getNameByID(stam.getAcc_id()); html = "<img style=\"width:30px\" src=\"" + owneracc.getProfile_pic() + "\" data-toggle=\"tooltip\" data-placement=\"top\" title=\"\" class=\"img-circle\" data-original-title=\"" + owneracc.getFirstname() + "\">"; } else { add = false; } } else { if (sa.getG_id() != stam.getG_id()) { List<Account> ownerlist = Account.getNameByGIDandAmID(stam.getG_id(), stam.getAm_id()); html = "<a class=\"showGroup\" data-toggle=\"popover\" data-html=\"true\" data-content=\"" + Util.createPopoverGroup(ownerlist) + "\">Group no. " + Group_member.getGNOById(stam.getG_id()) + "</a>"; } else { add = false; } } indexset[2] = html; indexset[3] = score + ""; indexset[4] = owner_safv_id; if (add) { indexsetList.add(indexset); } } } } catch (IOException ex) { Logger.getLogger(TestDriver.class.getName()).log(Level.SEVERE, null, ex); } catch (ParseException ex) { Logger.getLogger(TestDriver.class.getName()).log(Level.SEVERE, null, ex); } catch (InvalidTokenOffsetsException ex) { Logger.getLogger(TestDriver.class.getName()).log(Level.SEVERE, null, ex); } // for (String[] strings : indexsetList) { // System.out.println(strings[0] + " : "+ strings[2] +" : " + strings[1] ); // } request.setAttribute("nowUUid", f.getUuid()); request.setAttribute("keyword", keyword); request.setAttribute("indexsetList", indexsetList); } else { request.setAttribute("error_msg", "This assignment cannot use for check copy."); } // System.out.println(keyword); getServletContext().getRequestDispatcher("/Checkcopy.jsp?tab=AllAssignment").forward(request, response); } }
From source file:solutions.siren.join.action.terms.collector.BitSetHitStream.java
License:Open Source License
public BitSetHitStream(final Query query, final IndexSearcher searcher) throws IOException { // wraps the query into a ConstantScoreQuery since we do not need the score super(new ConstantScoreQuery(query), new LimitedBitSetHitCollector(searcher.getIndexReader().leaves().size()), searcher); }
From source file:tac.kbp.kb.index.spellchecker.SpellChecker.java
License:Apache License
private void releaseSearcher(final IndexSearcher aSearcher) throws IOException { // don't check if open - always decRef // don't decrement the private searcher - could have been swapped aSearcher.getIndexReader().decRef(); }
From source file:uk.gov.nationalarchives.discovery.taxonomy.common.repository.lucene.IAViewRepository.java
License:Mozilla Public License
/** * return the total nb of docs in IAView index * //from w w w . j av a2s. c o m * @return */ public int getTotalNbOfDocs() { IndexSearcher searcher = null; try { searcher = iaviewSearcherManager.acquire(); IndexReader indexReader = searcher.getIndexReader(); return indexReader.numDocs(); } catch (IOException ioException) { throw new TaxonomyException(TaxonomyErrorType.LUCENE_IO_EXCEPTION, ioException); } finally { LuceneHelperTools.releaseSearcherManagerQuietly(iaviewSearcherManager, searcher); } }