Example usage for org.apache.lucene.index IndexReader document

List of usage examples for org.apache.lucene.index IndexReader document

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader document.

Prototype




public final Document document(int docID) throws IOException 

Source Link

Document

Returns the stored fields of the nth Document in this index.

Usage

From source file:TestWang.java

License:Open Source License

private Document findDoc(IndexReader reader, String file) throws IOException {
    for (int i = 0; i < reader.numDocs(); i++) {
        Document document = reader.document(i);
        String s = document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        if (s.endsWith(File.separator + file)) {
            //                System.out.println("s = " + s);
            return document;
        }//w w w .  j  av a 2  s  .  c  o  m
    }
    return null;
}

From source file:TestWang.java

License:Open Source License

private Document[] findDocs(IndexReader reader, String[] file) throws IOException {
    Document[] result = new Document[file.length];
    for (int i = 0; i < reader.numDocs(); i++) {
        Document document = reader.document(i);
        String s = document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        for (int j = 0; j < result.length; j++) {
            if (s.endsWith("\\" + file[j])) {
                //                System.out.println("s = " + s);
                result[j] = document;//  w ww.  j a  v  a  2  s. c  o  m
            }
        }
    }
    return result;
}

From source file:ReadFiles.java

License:Apache License

public static Result doScan(String path, DIRTYPE type, IndexReader ir) throws IOException {
    IndexReader reader;
    Result r = new Result();
    long beginTs, endTs;

    if (ir != null)
        reader = ir;//from w w w.  ja v a 2 s .co  m
    else {
        beginTs = System.currentTimeMillis();
        switch (type) {
        default:
        case MMAP:
            reader = DirectoryReader.open(MMapDirectory.open(new File(path)));
            break;
        case NIO:
            reader = DirectoryReader.open(NIOFSDirectory.open(new File(path)));
            break;
        case SIMPLE:
            reader = DirectoryReader.open(SimpleFSDirectory.open(new File(path)));
            break;
        }
        endTs = System.currentTimeMillis();
        r.initTs += endTs - beginTs;
        r.initTsNr += 1;
    }

    System.out.println("-----Scan   it------" + reader.maxDoc());

    beginTs = System.currentTimeMillis();
    for (int i = 0; i < reader.maxDoc(); i++) {
        Document doc = reader.document(i);
        doc.get("foo");
        doc.get("bar");
        //System.out.println("Key: " + doc.get("foo") + ", Value: " + doc.get("bar") + ", Content: " + doc.get("content"));
    }
    endTs = System.currentTimeMillis();
    r.fetchTs += endTs - beginTs;
    r.fetchTsNr += reader.maxDoc();

    if (ir == null) {
        beginTs = System.currentTimeMillis();
        reader.close();
        endTs = System.currentTimeMillis();
        r.closeTs += endTs - beginTs;
        r.closeTsNr += 1;
    }

    return r;
}

From source file:ReadFiles.java

License:Apache License

public static Result doRandFetch(String path, DIRTYPE type, IndexReader ir, int randfetchnr)
        throws IOException {
    IndexReader reader;
    Result r = new Result();
    long beginTs, endTs;

    if (ir != null)
        reader = ir;/*from  w  w  w .  ja  v a2s  .  c o m*/
    else {
        beginTs = System.currentTimeMillis();
        switch (type) {
        default:
        case MMAP:
            reader = DirectoryReader.open(MMapDirectory.open(new File(path)));
            break;
        case NIO:
            reader = DirectoryReader.open(NIOFSDirectory.open(new File(path)));
            break;
        case SIMPLE:
            reader = DirectoryReader.open(SimpleFSDirectory.open(new File(path)));
            break;
        }
        endTs = System.currentTimeMillis();
        r.initTs += endTs - beginTs;
        r.initTsNr += 1;
    }

    System.out.println("-----RandFt it------");
    try {
        Thread.sleep(10000);
    } catch (InterruptedException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

    // Randomized the fetch
    Random rand = new Random();
    beginTs = System.currentTimeMillis();
    int maxDoc = reader.maxDoc();
    if (randfetchnr > 0)
        maxDoc = randfetchnr;
    for (int i = 0; i < maxDoc; i++) {
        Document doc = reader.document(rand.nextInt(maxDoc));
        doc.get("foo");
        doc.get("bar");
        //System.out.println("Key: " + doc.get("foo") + ", Value: " + doc.get("bar"));
    }
    endTs = System.currentTimeMillis();
    r.fetchTs += endTs - beginTs;
    r.fetchTsNr += maxDoc;

    if (ir == null) {
        beginTs = System.currentTimeMillis();
        reader.close();
        endTs = System.currentTimeMillis();
        r.closeTs += endTs - beginTs;
        r.closeTsNr += 1;
    }

    return r;
}

From source file:aos.lucene.search.advanced.BooksLikeThis.java

License:Apache License

public static void main(String[] args) throws IOException {
    Directory dir = TestUtil.getBookIndexDirectory();

    IndexReader reader = DirectoryReader.open(dir);
    int numDocs = reader.maxDoc();

    BooksLikeThis blt = new BooksLikeThis(reader);
    for (int i = 0; i < numDocs; i++) { //
        LOGGER.info();// w w w .j a  v a2 s .  c  o  m
        Document doc = reader.document(i);
        LOGGER.info(doc.get("title"));

        Document[] docs = blt.docsLike(i, 10); //
        if (docs.length == 0) {
            LOGGER.info("  None like this");
        }
        for (Document likeThisDoc : docs) {
            LOGGER.info("  -> " + likeThisDoc.get("title"));
        }
    }
    reader.close();
    dir.close();
}

From source file:aos.lucene.search.advanced.CategorizerTest.java

License:Apache License

private void buildCategoryVectors() throws IOException {
    IndexReader reader = DirectoryReader.open(TestUtil.getBookIndexDirectory());

    int maxDoc = reader.maxDoc();

    for (int i = 0; i < maxDoc; i++) {
        if (!reader.isDeleted(i)) {
            Document doc = reader.document(i);
            String category = doc.get("category");

            Map vectorMap = (Map) categoryMap.get(category);
            if (vectorMap == null) {
                vectorMap = new TreeMap();
                categoryMap.put(category, vectorMap);
            }// w w w.  ja v  a 2 s .  co m

            TermFreqVector termFreqVector = reader.getTermFreqVector(i, "subject");

            addTermFreqToMap(vectorMap, termFreqVector);
        }
    }
}

From source file:aos.lucene.search.advanced.FunctionQueryTest.java

License:Apache License

public void testRecency() throws Throwable {
    Directory dir = TestUtil.getBookIndexDirectory();
    IndexReader r = DirectoryReader.open(dir);
    IndexSearcher s = new IndexSearcher(r);
    s.setDefaultFieldSortScoring(true, true);

    QueryParser parser = new QueryParser(Version.LUCENE_46, "contents",
            new StandardAnalyzer(Version.LUCENE_46));
    Query q = parser.parse("java in action"); // #A
    Query q2 = new RecencyBoostingQuery(q, // #B
            2.0, 2 * 365, "pubmonthAsDay");
    Sort sort = new Sort(new SortField[] { SortField.FIELD_SCORE, new SortField("title2", SortField.STRING) });
    TopDocs hits = s.search(q2, null, 5, sort);

    for (int i = 0; i < hits.scoreDocs.length; i++) {
        Document doc = r.document(hits.scoreDocs[i].doc);
        LOGGER.info((1 + i) + ": " + doc.get("title") + ": pubmonth=" + doc.get("pubmonth") + " score="
                + hits.scoreDocs[i].score);
    }/* www  .j  a  v  a  2s . c o m*/
    s.close();
    r.close();
    dir.close();
}

From source file:aos.lucene.tools.BooksMoreLikeThis.java

License:Apache License

public static void main(String[] args) throws Throwable {

    String indexDir = System.getProperty("index.dir");
    FSDirectory directory = FSDirectory.open(new File(indexDir));
    IndexReader reader = DirectoryReader.open(directory);

    IndexSearcher searcher = new IndexSearcher(reader);

    int numDocs = reader.maxDoc();

    MoreLikeThis mlt = new MoreLikeThis(reader);
    mlt.setFieldNames(new String[] { "title", "author" });
    mlt.setMinTermFreq(1);/*www  . j  a va2s.co m*/
    mlt.setMinDocFreq(1);

    for (int docID = 0; docID < numDocs; docID++) {
        LOGGER.info();
        Document doc = reader.document(docID);
        LOGGER.info(doc.get("title"));

        Query query = mlt.like(docID);
        LOGGER.info("  query=" + query);

        TopDocs similarDocs = searcher.search(query, 10);
        if (similarDocs.totalHits == 0)
            LOGGER.info("  None like this");
        for (int i = 0; i < similarDocs.scoreDocs.length; i++) {
            if (similarDocs.scoreDocs[i].doc != docID) {
                doc = reader.document(similarDocs.scoreDocs[i].doc);
                LOGGER.info("  -> " + doc.getField("title").stringValue());
            }
        }
    }

    reader.close();
    directory.close();
}

From source file:approxnn.DocIdSim.java

public List<DocVector> selectTopK(DocVector qvec, IndexReader reader, int k) throws Exception {

    PriorityQueue<DocVector> nearest = new PriorityQueue();

    for (DocIdSim docIdSim : this.neighbors) {
        Document d = reader.document(docIdSim.sd.doc);
        DocVector dvec = new DocVector(d, qvec.numDimensions, DocVector.numIntervals);
        float dist = qvec.getDist(dvec);
        dvec.setDistWithQry(dist);// w  ww .  j  a  v a 2s .  com
        nearest.add(dvec);
    }

    List<DocVector> topDocsList = new ArrayList<>();
    k = Math.min(k, nearest.size());
    for (int i = 0; i < k; i++) {
        topDocsList.add(nearest.poll());
    }

    return topDocsList;
}

From source file:approxnn.DocIdSim.java

public List<DocVector> selectTop(DocVector qvec, IndexReader reader) throws Exception {
    float minDist = Float.MAX_VALUE;
    DocVector ann = null;// w  w w.j  a  va 2s  .c o m

    for (DocIdSim docIdSim : this.neighbors) {
        Document d = reader.document(docIdSim.sd.doc);
        DocVector dvec = new DocVector(d, qvec.numDimensions, DocVector.numIntervals, true);
        float dist = qvec.getDist(dvec);
        dvec.setDistWithQry(dist);
        if (dist < minDist) {
            minDist = dist;
            ann = dvec;
        }
    }

    List<DocVector> topList = new ArrayList<>(1);
    topList.add(ann);
    return topList;
}