Example usage for org.apache.lucene.index IndexReader document

List of usage examples for org.apache.lucene.index IndexReader document

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader document.

Prototype




public final Document document(int docID) throws IOException 

Source Link

Document

Returns the stored fields of the nth Document in this index.

Usage

From source file:pretraga.IsolationSimilarity.java

public List<String> searchBool(long lowLine, long hightLine) {
    try {/* w w  w  .j  a v  a 2s.  c om*/
        Directory dir = FSDirectory.open(new File(indexDirectoryPath).toPath());

        IndexReader reader = DirectoryReader.open(dir);
        IndexSearcher searcher = new IndexSearcher(reader);

        Query q = NumericRangeQuery.newLongRange(SIZE, lowLine, hightLine, true, true);

        TopScoreDocCollector collector = TopScoreDocCollector.create(10);
        searcher.search(q, collector);
        List<String> retList = new ArrayList<>();
        TopDocs docs = collector.topDocs();
        System.out.println("Hits: " + docs.totalHits);
        for (int i = 0; i < docs.scoreDocs.length; i++) {
            Document d = reader.document(docs.scoreDocs[i].doc);
            System.out.println(d.get(TITLE) + ", " + d.get(SIZE));
        }

        reader.close();
        dir.close();
        return retList;
    } catch (Exception e) {
        System.err.println(e.toString());
        return null;
    }
}

From source file:pretraga.IsolationSimilarity.java

public List<String> searchByCategory(String searchingTerm, String category, boolean veryPrecision) {
    try {/*from   w  ww.j  a  v a 2s.c om*/
        Directory dir = FSDirectory.open(new File(indexDirectoryPath).toPath());

        IndexReader reader = DirectoryReader.open(dir);
        IndexSearcher searcher = new IndexSearcher(reader);
        searcher.setSimilarity(new ClassicSimilarity());

        QueryParser parser = new QueryParser(category, analyzer);
        String queryText = searchingTerm.toLowerCase();
        if (!veryPrecision)
            queryText += "*";
        Query q = parser.parse(queryText);

        TopScoreDocCollector collector = TopScoreDocCollector.create(10);
        searcher.search(q, collector);
        TopDocs docs = collector.topDocs();
        List<String> ret = new ArrayList<>();
        for (int i = 0; i < docs.totalHits; i++) {
            Document d = reader.document(docs.scoreDocs[i].doc);
            ret.add(d.get(category) + ", " + d.get(SIZE) + ", score: " + docs.scoreDocs[i].score);
        }
        reader.close();
        dir.close();
        return ret;
    } catch (Exception e) {
        System.err.println(e.toString());
        return new ArrayList<>();
    }
}

From source file:pretraga.IsolationSimilarity.java

public void test(String vec) {
    List<String> vector = processInput(vec);
    HashMap<String, Long> map = new HashMap<>();
    try {//from   ww w  .  j a va  2  s .c  o m
        Directory dir = FSDirectory.open(new File(indexDirectoryPath).toPath());

        IndexReader reader = DirectoryReader.open(dir);
        IndexSearcher searcher = new IndexSearcher(reader);

        List<Integer> docId = getDocumentsFromVector(vector, reader, searcher);

        for (int i = 0; i < docId.size(); i++) {
            Fields ff = reader.getTermVectors(docId.get(i));
            Terms terms = ff.terms(CONTENT);

            TermsEnum te = terms.iterator();
            Object tmp = te.next();
            while (tmp != null) {
                BytesRef by = (BytesRef) tmp;
                String term = by.utf8ToString();

                ClassicSimilarity sim = null;
                if (searcher.getSimilarity(true) instanceof ClassicSimilarity) {
                    sim = (ClassicSimilarity) searcher.getSimilarity(true);
                }
                float idf = sim.idf(te.docFreq(), reader.maxDoc());
                float tf = sim.tf(te.totalTermFreq());
                //System.out.println("idf = " + idf + ", tf = " + tf + ", docF: " + te.totalTermFreq());
                TermStatistics ts = new TermStatistics(by, te.docFreq(), te.totalTermFreq());
                CollectionStatistics s = new CollectionStatistics(CONTENT, reader.maxDoc(), terms.getDocCount(),
                        terms.getSumTotalTermFreq(), terms.getSumDocFreq());
                Document d = reader.document(docId.get(i));
                if (vector.contains(term)) {
                    float ttt = sim.simScorer(sim.computeWeight(s, ts), reader.getContext().leaves().get(0))
                            .score(docId.get(i), te.totalTermFreq());
                    System.out.println(ttt + ", " + d.get(TITLE) + ", term: " + term);
                }
                tmp = te.next();
            }

            /*Iterator<String> ss = ff.iterator();
            while (ss.hasNext()) {
            String fieldString = ss.next();
            System.out.println(fieldString);
            }*/
        }
    } catch (Exception e) {

    }
}

From source file:psidev.psi.mi.search.engine.impl.AbstractSearchEngine.java

License:Apache License

public SearchResult<T> searchAll(Integer firstResult, Integer maxResults) throws SearchEngineException {
    if (firstResult == null)
        firstResult = 0;/* w ww .ja v  a 2  s  .c  o  m*/
    if (maxResults == null)
        maxResults = MAX_TOP_RESULTS;

    IndexReader reader = indexSearcher.getIndexReader();

    int totalCount = reader.maxDoc();

    if (maxResults == 0) {
        return new SearchResult(Collections.EMPTY_LIST, totalCount, firstResult, maxResults,
                new WildcardQuery(new Term("", "*")));
    }

    // this is a hack to ignore any header introduced in the index by mistake (first development versions)
    if (reader.isDeleted(0)) {
        firstResult++;
        totalCount--;
    }

    if (firstResult > totalCount) {
        //            closeIndexReader(reader);
        return new SearchResult(Collections.EMPTY_LIST, totalCount, firstResult, maxResults,
                new WildcardQuery(new Term("", "*")));
    }

    int maxIndex = Math.min(totalCount, firstResult + maxResults);

    List<T> dataObjects = new ArrayList<T>();

    for (int i = firstResult; i < maxIndex; i++) {
        try {
            Document doc = reader.document(i);
            T data = (T) createDocumentBuilder().createData(doc);
            dataObjects.add(data);
        } catch (Exception e) {
            //                closeIndexReader(reader);
            throw new SearchEngineException(e);
        }
    }

    //        closeIndexReader(reader);
    return new SearchResult(dataObjects, totalCount, firstResult, maxResults,
            new WildcardQuery(new Term("", "*")));
}

From source file:retriever.TermStats.java

String align(int docId) throws Exception {

    TopScoreDocCollector collector;//from  ww  w.j a  va2  s.c o  m
    TopDocs topDocs;
    IndexReader reader;
    IndexSearcher searcher;
    Directory inMemTemporalIndex = null;

    Query q = queryTranslation ? constructTranslatedQuery(docId) : constructQuery(docId);
    if (q == null)
        return null;

    if (temporalConstraint) {
        inMemTemporalIndex = buildTemporalIndex(docId);
        IndexReader ramDirReader = DirectoryReader.open(inMemTemporalIndex);
        reader = ramDirReader;
        searcher = buildTemporalIndexSearcher(reader);
    } else {
        reader = frIndexReader;
        searcher = frIndexSearcher;
    }

    collector = TopScoreDocCollector.create(numWanted);
    searcher.search(q, collector);

    topDocs = collector.topDocs();

    if (topDocs.scoreDocs.length == 0) {
        if (temporalConstraint) {
            reader.close();
            inMemTemporalIndex.close();
        }
        return null;
    }

    if (textSimWt < 1) {
        topDocs = rerankTopDocsByWordVecSim(q, topDocs); // rerank by termStats sims
    }

    Document alignedDoc = reader.document(topDocs.scoreDocs[0].doc);
    String alignedDocId = alignedDoc.get(TextDocIndexer.FIELD_ID);

    if (temporalConstraint) {
        reader.close();
        inMemTemporalIndex.close();
    }

    return alignedDocId;
}

From source file:retriever.QuantizedVecSearcher.java

List<DocVector> rerankByEuclideanDist(DocVector queryVec, IndexSearcher searcher, TopDocs topDocs)
        throws Exception {
    IndexReader reader = searcher.getIndexReader();
    List<DocVector> nnList = new ArrayList<>();
    int rank = 1;
    for (ScoreDoc sd : topDocs.scoreDocs) {
        Document d = reader.document(sd.doc);

        DocVector dvec = new DocVector(d, numDimensions, numIntervals);
        float dist = queryVec.getDist(dvec);
        dvec.setDistWithQry(dist);//  www .ja  va2s  .  c  o m
        //System.out.println("Doc " + sd.doc + " with distance " + dist + " retrieved at rank: " + rank + " (Sim = " + sd.score + ")");
        nnList.add(dvec);
        rank++;
    }
    Collections.sort(nnList);
    return nnList;
}

From source file:searcher.CollStat.java

JsonArray constructJSONForDoc(IndexReader reader, Query q, int docid) throws Exception {
    Document doc = reader.document(docid);

    JsonArrayBuilder arrayBuilder = factory.createArrayBuilder();
    JsonObjectBuilder objectBuilder = factory.createObjectBuilder();
    objectBuilder.add("title", doc.get(WTDocument.WTDOC_FIELD_TITLE));
    objectBuilder.add("snippet", getSnippet(q, doc, docid));
    objectBuilder.add("id", doc.get(TrecDocIndexer.FIELD_ID));
    objectBuilder.add("url", doc.get(WTDocument.WTDOC_FIELD_URL));
    //objectBuilder.add("html", getBase64EncodedHTML(doc));
    arrayBuilder.add(objectBuilder);//from  ww  w . j a  va 2 s.c o  m
    return arrayBuilder.build();
}

From source file:searcher.CollStat.java

public String getHTMLFromDocId(String indexNumStr, String docId) throws Exception {

    TopScoreDocCollector collector;/*w ww . j av  a2  s  . c om*/
    TopDocs topDocs;

    int indexNum = indexNumStr == null ? -1 : Integer.parseInt(indexNumStr);

    System.out.println("Docid Query = |" + docId + "|");
    IndexReader reader = indexNum == -1 ? multiReader : readers[indexNum];

    Query query = new TermQuery(new Term(TrecDocIndexer.FIELD_ID, docId.trim()));
    collector = TopScoreDocCollector.create(1, true);

    IndexSearcher searcher = initSearcher(reader);
    searcher.search(query, collector);
    topDocs = collector.topDocs();
    ScoreDoc sd = topDocs.scoreDocs[0];

    Document doc = reader.document(sd.doc);
    String htmlDecompressed = IndexHtmlToText.decompress(doc.getBinaryValue(WTDocument.WTDOC_FIELD_HTML).bytes);

    return htmlDecompressed;
}

From source file:sift.DocIdComparator.java

@Override
void loadQueries() throws Exception {

    int nnId = 0;
    float nnDist = 0;
    boolean eval = Boolean.parseBoolean(prop.getProperty("eval", "false"));

    File indexDir = new File(prop.getProperty("query.index"));
    IndexReader siftVecReader = DirectoryReader.open(FSDirectory.open(indexDir.toPath()));

    int numDocs = siftVecReader.numDocs();
    int numDimensions = Integer.parseInt(prop.getProperty("vec.numdimensions"));

    for (int i = 0; i < numDocs; i++) {
        Document d = siftVecReader.document(i);

        if (eval) {
            nnId = Integer.parseInt(d.get(QuerySiftVecIndexer.FIELD_NN_ID));
            nnDist = Float.parseFloat(d.get(QuerySiftVecIndexer.FIELD_NN_DIST));
        }/*www  . j ava  2 s  . com*/

        qvecs.add(new QueryVector(d, numDimensions, DocVector.numIntervals, nnId, (float) Math.sqrt(nnDist)));
    }

    Collections.sort(qvecs, new DocIdComparator());
}

From source file:sift.Qrels.java

public List<DocVector> getSortedRelVecs(IndexReader reader, DocVector qvec, int qid) throws Exception {
    List<DocVector> relDocs = new ArrayList<>();
    int[] relDocIds = this.relVecIds.get(qid);

    for (int id : relDocIds) {
        DocVector nnVec = new DocVector(reader.document(id), qvec.numDimensions, DocVector.numIntervals);
        nnVec.setDistWithQry(qvec.getDist(nnVec));
        relDocs.add(nnVec);//from   w  w w .ja  va 2  s . c om
    }
    Collections.sort(relDocs);
    return relDocs;
}