List of usage examples for org.apache.lucene.index IndexReader document
public final Document document(int docID) throws IOException
n
th Document
in this index. From source file:pretraga.IsolationSimilarity.java
public List<String> searchBool(long lowLine, long hightLine) { try {/* w w w .j a v a 2s. c om*/ Directory dir = FSDirectory.open(new File(indexDirectoryPath).toPath()); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); Query q = NumericRangeQuery.newLongRange(SIZE, lowLine, hightLine, true, true); TopScoreDocCollector collector = TopScoreDocCollector.create(10); searcher.search(q, collector); List<String> retList = new ArrayList<>(); TopDocs docs = collector.topDocs(); System.out.println("Hits: " + docs.totalHits); for (int i = 0; i < docs.scoreDocs.length; i++) { Document d = reader.document(docs.scoreDocs[i].doc); System.out.println(d.get(TITLE) + ", " + d.get(SIZE)); } reader.close(); dir.close(); return retList; } catch (Exception e) { System.err.println(e.toString()); return null; } }
From source file:pretraga.IsolationSimilarity.java
public List<String> searchByCategory(String searchingTerm, String category, boolean veryPrecision) { try {/*from w ww.j a v a 2s.c om*/ Directory dir = FSDirectory.open(new File(indexDirectoryPath).toPath()); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new ClassicSimilarity()); QueryParser parser = new QueryParser(category, analyzer); String queryText = searchingTerm.toLowerCase(); if (!veryPrecision) queryText += "*"; Query q = parser.parse(queryText); TopScoreDocCollector collector = TopScoreDocCollector.create(10); searcher.search(q, collector); TopDocs docs = collector.topDocs(); List<String> ret = new ArrayList<>(); for (int i = 0; i < docs.totalHits; i++) { Document d = reader.document(docs.scoreDocs[i].doc); ret.add(d.get(category) + ", " + d.get(SIZE) + ", score: " + docs.scoreDocs[i].score); } reader.close(); dir.close(); return ret; } catch (Exception e) { System.err.println(e.toString()); return new ArrayList<>(); } }
From source file:pretraga.IsolationSimilarity.java
public void test(String vec) { List<String> vector = processInput(vec); HashMap<String, Long> map = new HashMap<>(); try {//from ww w . j a va 2 s .c o m Directory dir = FSDirectory.open(new File(indexDirectoryPath).toPath()); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); List<Integer> docId = getDocumentsFromVector(vector, reader, searcher); for (int i = 0; i < docId.size(); i++) { Fields ff = reader.getTermVectors(docId.get(i)); Terms terms = ff.terms(CONTENT); TermsEnum te = terms.iterator(); Object tmp = te.next(); while (tmp != null) { BytesRef by = (BytesRef) tmp; String term = by.utf8ToString(); ClassicSimilarity sim = null; if (searcher.getSimilarity(true) instanceof ClassicSimilarity) { sim = (ClassicSimilarity) searcher.getSimilarity(true); } float idf = sim.idf(te.docFreq(), reader.maxDoc()); float tf = sim.tf(te.totalTermFreq()); //System.out.println("idf = " + idf + ", tf = " + tf + ", docF: " + te.totalTermFreq()); TermStatistics ts = new TermStatistics(by, te.docFreq(), te.totalTermFreq()); CollectionStatistics s = new CollectionStatistics(CONTENT, reader.maxDoc(), terms.getDocCount(), terms.getSumTotalTermFreq(), terms.getSumDocFreq()); Document d = reader.document(docId.get(i)); if (vector.contains(term)) { float ttt = sim.simScorer(sim.computeWeight(s, ts), reader.getContext().leaves().get(0)) .score(docId.get(i), te.totalTermFreq()); System.out.println(ttt + ", " + d.get(TITLE) + ", term: " + term); } tmp = te.next(); } /*Iterator<String> ss = ff.iterator(); while (ss.hasNext()) { String fieldString = ss.next(); System.out.println(fieldString); }*/ } } catch (Exception e) { } }
From source file:psidev.psi.mi.search.engine.impl.AbstractSearchEngine.java
License:Apache License
public SearchResult<T> searchAll(Integer firstResult, Integer maxResults) throws SearchEngineException { if (firstResult == null) firstResult = 0;/* w ww .ja v a 2 s .c o m*/ if (maxResults == null) maxResults = MAX_TOP_RESULTS; IndexReader reader = indexSearcher.getIndexReader(); int totalCount = reader.maxDoc(); if (maxResults == 0) { return new SearchResult(Collections.EMPTY_LIST, totalCount, firstResult, maxResults, new WildcardQuery(new Term("", "*"))); } // this is a hack to ignore any header introduced in the index by mistake (first development versions) if (reader.isDeleted(0)) { firstResult++; totalCount--; } if (firstResult > totalCount) { // closeIndexReader(reader); return new SearchResult(Collections.EMPTY_LIST, totalCount, firstResult, maxResults, new WildcardQuery(new Term("", "*"))); } int maxIndex = Math.min(totalCount, firstResult + maxResults); List<T> dataObjects = new ArrayList<T>(); for (int i = firstResult; i < maxIndex; i++) { try { Document doc = reader.document(i); T data = (T) createDocumentBuilder().createData(doc); dataObjects.add(data); } catch (Exception e) { // closeIndexReader(reader); throw new SearchEngineException(e); } } // closeIndexReader(reader); return new SearchResult(dataObjects, totalCount, firstResult, maxResults, new WildcardQuery(new Term("", "*"))); }
From source file:retriever.TermStats.java
String align(int docId) throws Exception { TopScoreDocCollector collector;//from ww w.j a va2 s.c o m TopDocs topDocs; IndexReader reader; IndexSearcher searcher; Directory inMemTemporalIndex = null; Query q = queryTranslation ? constructTranslatedQuery(docId) : constructQuery(docId); if (q == null) return null; if (temporalConstraint) { inMemTemporalIndex = buildTemporalIndex(docId); IndexReader ramDirReader = DirectoryReader.open(inMemTemporalIndex); reader = ramDirReader; searcher = buildTemporalIndexSearcher(reader); } else { reader = frIndexReader; searcher = frIndexSearcher; } collector = TopScoreDocCollector.create(numWanted); searcher.search(q, collector); topDocs = collector.topDocs(); if (topDocs.scoreDocs.length == 0) { if (temporalConstraint) { reader.close(); inMemTemporalIndex.close(); } return null; } if (textSimWt < 1) { topDocs = rerankTopDocsByWordVecSim(q, topDocs); // rerank by termStats sims } Document alignedDoc = reader.document(topDocs.scoreDocs[0].doc); String alignedDocId = alignedDoc.get(TextDocIndexer.FIELD_ID); if (temporalConstraint) { reader.close(); inMemTemporalIndex.close(); } return alignedDocId; }
From source file:retriever.QuantizedVecSearcher.java
List<DocVector> rerankByEuclideanDist(DocVector queryVec, IndexSearcher searcher, TopDocs topDocs) throws Exception { IndexReader reader = searcher.getIndexReader(); List<DocVector> nnList = new ArrayList<>(); int rank = 1; for (ScoreDoc sd : topDocs.scoreDocs) { Document d = reader.document(sd.doc); DocVector dvec = new DocVector(d, numDimensions, numIntervals); float dist = queryVec.getDist(dvec); dvec.setDistWithQry(dist);// www .ja va2s . c o m //System.out.println("Doc " + sd.doc + " with distance " + dist + " retrieved at rank: " + rank + " (Sim = " + sd.score + ")"); nnList.add(dvec); rank++; } Collections.sort(nnList); return nnList; }
From source file:searcher.CollStat.java
JsonArray constructJSONForDoc(IndexReader reader, Query q, int docid) throws Exception { Document doc = reader.document(docid); JsonArrayBuilder arrayBuilder = factory.createArrayBuilder(); JsonObjectBuilder objectBuilder = factory.createObjectBuilder(); objectBuilder.add("title", doc.get(WTDocument.WTDOC_FIELD_TITLE)); objectBuilder.add("snippet", getSnippet(q, doc, docid)); objectBuilder.add("id", doc.get(TrecDocIndexer.FIELD_ID)); objectBuilder.add("url", doc.get(WTDocument.WTDOC_FIELD_URL)); //objectBuilder.add("html", getBase64EncodedHTML(doc)); arrayBuilder.add(objectBuilder);//from ww w . j a va 2 s.c o m return arrayBuilder.build(); }
From source file:searcher.CollStat.java
public String getHTMLFromDocId(String indexNumStr, String docId) throws Exception { TopScoreDocCollector collector;/*w ww . j av a2 s . c om*/ TopDocs topDocs; int indexNum = indexNumStr == null ? -1 : Integer.parseInt(indexNumStr); System.out.println("Docid Query = |" + docId + "|"); IndexReader reader = indexNum == -1 ? multiReader : readers[indexNum]; Query query = new TermQuery(new Term(TrecDocIndexer.FIELD_ID, docId.trim())); collector = TopScoreDocCollector.create(1, true); IndexSearcher searcher = initSearcher(reader); searcher.search(query, collector); topDocs = collector.topDocs(); ScoreDoc sd = topDocs.scoreDocs[0]; Document doc = reader.document(sd.doc); String htmlDecompressed = IndexHtmlToText.decompress(doc.getBinaryValue(WTDocument.WTDOC_FIELD_HTML).bytes); return htmlDecompressed; }
From source file:sift.DocIdComparator.java
@Override void loadQueries() throws Exception { int nnId = 0; float nnDist = 0; boolean eval = Boolean.parseBoolean(prop.getProperty("eval", "false")); File indexDir = new File(prop.getProperty("query.index")); IndexReader siftVecReader = DirectoryReader.open(FSDirectory.open(indexDir.toPath())); int numDocs = siftVecReader.numDocs(); int numDimensions = Integer.parseInt(prop.getProperty("vec.numdimensions")); for (int i = 0; i < numDocs; i++) { Document d = siftVecReader.document(i); if (eval) { nnId = Integer.parseInt(d.get(QuerySiftVecIndexer.FIELD_NN_ID)); nnDist = Float.parseFloat(d.get(QuerySiftVecIndexer.FIELD_NN_DIST)); }/*www . j ava 2 s . com*/ qvecs.add(new QueryVector(d, numDimensions, DocVector.numIntervals, nnId, (float) Math.sqrt(nnDist))); } Collections.sort(qvecs, new DocIdComparator()); }
From source file:sift.Qrels.java
public List<DocVector> getSortedRelVecs(IndexReader reader, DocVector qvec, int qid) throws Exception { List<DocVector> relDocs = new ArrayList<>(); int[] relDocIds = this.relVecIds.get(qid); for (int id : relDocIds) { DocVector nnVec = new DocVector(reader.document(id), qvec.numDimensions, DocVector.numIntervals); nnVec.setDistWithQry(qvec.getDist(nnVec)); relDocs.add(nnVec);//from w w w .ja va 2 s . c om } Collections.sort(relDocs); return relDocs; }