Example usage for org.apache.lucene.index IndexReader document

List of usage examples for org.apache.lucene.index IndexReader document

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader document.

Prototype




public final Document document(int docID) throws IOException 

Source Link

Document

Returns the stored fields of the nth Document in this index.

Usage

From source file:org.apache.nifi.provenance.lucene.DocsReader.java

License:Apache License

public Set<ProvenanceEventRecord> read(final TopDocs topDocs, final EventAuthorizer authorizer,
        final IndexReader indexReader, final Collection<Path> allProvenanceLogFiles,
        final AtomicInteger retrievalCount, final int maxResults, final int maxAttributeChars)
        throws IOException {
    if (retrievalCount.get() >= maxResults) {
        return Collections.emptySet();
    }/*from   ww  w  .  j  a  v a 2 s  .c  o m*/

    final long start = System.nanoTime();
    final ScoreDoc[] scoreDocs = topDocs.scoreDocs;
    final int numDocs = Math.min(scoreDocs.length, maxResults);
    final List<Document> docs = new ArrayList<>(numDocs);

    for (int i = numDocs - 1; i >= 0; i--) {
        final int docId = scoreDocs[i].doc;
        final Document d = indexReader.document(docId);
        docs.add(d);
    }

    final long readDocuments = System.nanoTime() - start;
    logger.debug("Reading {} Lucene Documents took {} millis", docs.size(),
            TimeUnit.NANOSECONDS.toMillis(readDocuments));
    return read(docs, authorizer, allProvenanceLogFiles, retrievalCount, maxResults, maxAttributeChars);
}

From source file:org.apache.nutch.indexer.TestDeleteDuplicates.java

License:Apache License

private void hashDuplicatesHelper(Path index, String url) throws Exception {
    DeleteDuplicates dedup = new DeleteDuplicates(conf);
    dedup.dedup(new Path[] { index });
    FsDirectory dir = new FsDirectory(fs, new Path(index, "part-0000"), false, conf);
    IndexReader reader = IndexReader.open(dir);
    assertEquals("only one doc left", reader.numDocs(), 1);
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (reader.isDeleted(i)) {
            System.out.println("-doc " + i + " deleted");
            continue;
        }//from w  w w . jav  a  2s.c om
        Document doc = reader.document(i);
        // make sure we got the right one
        assertEquals("check url", url, doc.get("url"));
        System.out.println(doc);
    }
    reader.close();
}

From source file:org.apache.nutch.indexer.TestDeleteDuplicates.java

License:Apache License

public void testUrlDuplicates() throws Exception {
    DeleteDuplicates dedup = new DeleteDuplicates(conf);
    dedup.dedup(new Path[] { index2 });
    FsDirectory dir = new FsDirectory(fs, new Path(index2, "part-0000"), false, conf);
    IndexReader reader = IndexReader.open(dir);
    assertEquals("only one doc left", reader.numDocs(), 1);
    MD5Hash hash = MD5Hash.digest("2");
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (reader.isDeleted(i)) {
            System.out.println("-doc " + i + " deleted");
            continue;
        }//from www .j a v a2 s .  c o m
        Document doc = reader.document(i);
        // make sure we got the right one
        assertEquals("check hash", hash.toString(), doc.get("digest"));
        System.out.println(doc);
    }
    reader.close();
}

From source file:org.apache.nutch.indexer.TestDeleteDuplicates.java

License:Apache License

public void testMixedDuplicates() throws Exception {
    DeleteDuplicates dedup = new DeleteDuplicates(conf);
    dedup.dedup(new Path[] { index1, index2 });
    FsDirectory dir = new FsDirectory(fs, new Path(index1, "part-0000"), false, conf);
    IndexReader reader = IndexReader.open(dir);
    assertEquals("only one doc left", reader.numDocs(), 1);
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (reader.isDeleted(i)) {
            System.out.println("-doc " + i + " deleted");
            continue;
        }//from   www  . j  a va 2 s .  c o  m
        Document doc = reader.document(i);
        // make sure we got the right one
        assertEquals("check url", "http://www.example.com/2", doc.get("url"));
        System.out.println(doc);
    }
    reader.close();
    dir = new FsDirectory(fs, new Path(index2, "part-0000"), false, conf);
    reader = IndexReader.open(dir);
    assertEquals("only one doc left", reader.numDocs(), 1);
    MD5Hash hash = MD5Hash.digest("2");
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (reader.isDeleted(i)) {
            System.out.println("-doc " + i + " deleted");
            continue;
        }
        Document doc = reader.document(i);
        // make sure we got the right one
        assertEquals("check hash", hash.toString(), doc.get("digest"));
        System.out.println(doc);
    }
    reader.close();
}

From source file:org.apache.nutch.indexer.TestIndexSorter.java

License:Apache License

public void testSorting() throws Exception {
    IndexSorter sorter = new IndexSorter(conf);
    sorter.sort(testDir);//  w  w w.  j a v  a2s .  c  om

    // read back documents
    IndexReader reader = IndexReader.open(FSDirectory.open(new File(testDir, INDEX_SORTED)));
    assertEquals(reader.numDocs(), NUM_DOCS);
    for (int i = 0; i < reader.maxDoc(); i++) {
        Document doc = reader.document(i);
        Field f = doc.getField("content");
        assertNull(f);
        f = doc.getField("boost");
        float boost = Similarity.decodeNorm((byte) (NUM_DOCS - i));
        String cmp = String.valueOf(boost);
        assertEquals(cmp, f.stringValue());
    }
    reader.close();
}

From source file:org.apache.nutch.tools.SegmentMergeTool.java

License:Apache License

/** Run the tool, periodically reporting progress. */
public void run() {
    start = System.currentTimeMillis();
    stage = SegmentMergeStatus.STAGE_OPENING;
    long delta;/*from   ww w .  j  a v  a  2s. c  om*/
    LOG.info("* Opening " + allsegdirs.size() + " segments:");
    try {
        segdirs = new ArrayList();
        // open all segments
        for (int i = 0; i < allsegdirs.size(); i++) {
            File dir = (File) allsegdirs.get(i);
            SegmentReader sr = null;
            try {
                // try to autofix it if corrupted...
                sr = new SegmentReader(nfs, dir, true);
            } catch (Exception e) {
                // this segment is hosed beyond repair, don't use it
                LOG.warning("* Segment " + dir.getName() + " is corrupt beyond repair; skipping it.");
                continue;
            }
            segdirs.add(dir);
            totalRecords += sr.size;
            LOG.info(" - segment " + dir.getName() + ": " + sr.size + " records.");
            readers.put(dir.getName(), sr);
        }
        long total = totalRecords;
        LOG.info("* TOTAL " + total + " input records in " + segdirs.size() + " segments.");
        LOG.info("* Creating master index...");
        stage = SegmentMergeStatus.STAGE_MASTERIDX;
        // XXX Note that Lucene indexes don't work with NutchFileSystem for now.
        // XXX For now always assume LocalFileSystem here...
        Vector masters = new Vector();
        File fsmtIndexDir = new File(output, ".fastmerge_index");
        File masterDir = new File(fsmtIndexDir, "0");
        if (!masterDir.mkdirs()) {
            LOG.severe("Could not create a master index dir: " + masterDir);
            return;
        }
        masters.add(masterDir);
        IndexWriter iw = new IndexWriter(masterDir, new WhitespaceAnalyzer(), true);
        iw.setUseCompoundFile(false);
        iw.setMergeFactor(INDEX_MERGE_FACTOR);
        iw.setRAMBufferSizeMB(INDEX_MIN_MERGE_DOCS);
        long s1 = System.currentTimeMillis();
        Iterator it = readers.values().iterator();
        processedRecords = 0L;
        delta = System.currentTimeMillis();
        while (it.hasNext()) {
            SegmentReader sr = (SegmentReader) it.next();
            String name = sr.segmentDir.getName();
            FetcherOutput fo = new FetcherOutput();
            for (long i = 0; i < sr.size; i++) {
                try {
                    if (!sr.get(i, fo, null, null, null))
                        break;

                    Document doc = new Document();

                    // compute boost
                    float boost = IndexSegment.calculateBoost(fo.getFetchListEntry().getPage().getScore(),
                            scorePower, boostByLinkCount, fo.getAnchors().length);
                    //            doc.add(new Field("sd", name + "|" + i, true, false, false));
                    //            doc.add(new Field("uh", MD5Hash.digest(fo.getUrl().toString()).toString(), true, true, false));
                    //            doc.add(new Field("ch", fo.getMD5Hash().toString(), true, true, false));
                    //            doc.add(new Field("time", DateField.timeToString(fo.getFetchDate()), true, false, false));
                    //            doc.add(new Field("score", boost + "", true, false, false));
                    //            doc.add(new Field("ul", fo.getUrl().toString().length() + "", true, false, false));
                    iw.addDocument(doc);
                    processedRecords++;
                    if (processedRecords > 0 && (processedRecords % LOG_STEP == 0)) {
                        LOG.info(" Processed " + processedRecords + " records ("
                                + (float) (LOG_STEP * 1000) / (float) (System.currentTimeMillis() - delta)
                                + " rec/s)");
                        delta = System.currentTimeMillis();
                    }
                    if (processedRecords > 0 && (processedRecords % INDEX_SIZE == 0)) {
                        iw.optimize();
                        iw.close();
                        LOG.info(" - creating next subindex...");
                        masterDir = new File(fsmtIndexDir, "" + masters.size());
                        if (!masterDir.mkdirs()) {
                            LOG.severe("Could not create a master index dir: " + masterDir);
                            return;
                        }
                        masters.add(masterDir);
                        iw = new IndexWriter(masterDir, new WhitespaceAnalyzer(), true);
                        iw.setUseCompoundFile(false);
                        iw.setMergeFactor(INDEX_MERGE_FACTOR);
                        iw.setRAMBufferSizeMB(INDEX_MIN_MERGE_DOCS);
                    }
                } catch (Throwable t) {
                    // we can assume the data is invalid from now on - break here
                    LOG.info(" - segment " + name + " truncated to " + (i + 1) + " records");
                    break;
                }
            }
        }
        iw.optimize();
        LOG.info("* Creating index took " + (System.currentTimeMillis() - s1) + " ms");
        s1 = System.currentTimeMillis();
        // merge all other indexes using the latest IndexWriter (still open):
        if (masters.size() > 1) {
            LOG.info(" - merging subindexes...");
            stage = SegmentMergeStatus.STAGE_MERGEIDX;
            IndexReader[] ireaders = new IndexReader[masters.size() - 1];
            for (int i = 0; i < masters.size() - 1; i++)
                ireaders[i] = IndexReader.open((File) masters.get(i));
            iw.addIndexes(ireaders);
            for (int i = 0; i < masters.size() - 1; i++) {
                ireaders[i].close();
                FileUtil.fullyDelete((File) masters.get(i));
            }
        }
        iw.close();
        LOG.info("* Optimizing index took " + (System.currentTimeMillis() - s1) + " ms");
        LOG.info("* Removing duplicate entries...");
        stage = SegmentMergeStatus.STAGE_DEDUP;
        IndexReader ir = IndexReader.open(masterDir);
        int i = 0;
        long cnt = 0L;
        processedRecords = 0L;
        s1 = System.currentTimeMillis();
        delta = s1;
        TermEnum te = ir.terms();
        while (te.next()) {
            Term t = te.term();
            if (t == null)
                continue;
            if (!(t.field().equals("ch") || t.field().equals("uh")))
                continue;
            cnt++;
            processedRecords = cnt / 2;
            if (cnt > 0 && (cnt % (LOG_STEP * 2) == 0)) {
                LOG.info(" Processed " + processedRecords + " records ("
                        + (float) (LOG_STEP * 1000) / (float) (System.currentTimeMillis() - delta) + " rec/s)");
                delta = System.currentTimeMillis();
            }
            // Enumerate all docs with the same URL hash or content hash
            TermDocs td = ir.termDocs(t);
            if (td == null)
                continue;
            if (t.field().equals("uh")) {
                // Keep only the latest version of the document with
                // the same url hash. Note: even if the content
                // hash is identical, other metadata may be different, so even
                // in this case it makes sense to keep the latest version.
                int id = -1;
                String time = null;
                Document doc = null;
                while (td.next()) {
                    int docid = td.doc();
                    if (!ir.isDeleted(docid)) {
                        doc = ir.document(docid);
                        if (time == null) {
                            time = doc.get("time");
                            id = docid;
                            continue;
                        }
                        String dtime = doc.get("time");
                        // "time" is a DateField, and can be compared lexicographically
                        if (dtime.compareTo(time) > 0) {
                            if (id != -1) {
                                ir.deleteDocument(id);
                            }
                            time = dtime;
                            id = docid;
                        } else {
                            ir.deleteDocument(docid);
                        }
                    }
                }
            } else if (t.field().equals("ch")) {
                // Keep only the version of the document with
                // the highest score, and then with the shortest url.
                int id = -1;
                int ul = 0;
                float score = 0.0f;
                Document doc = null;
                while (td.next()) {
                    int docid = td.doc();
                    if (!ir.isDeleted(docid)) {
                        doc = ir.document(docid);
                        if (ul == 0) {
                            try {
                                ul = Integer.parseInt(doc.get("ul"));
                                score = Float.parseFloat(doc.get("score"));
                            } catch (Exception e) {
                            }
                            ;
                            id = docid;
                            continue;
                        }
                        int dul = 0;
                        float dscore = 0.0f;
                        try {
                            dul = Integer.parseInt(doc.get("ul"));
                            dscore = Float.parseFloat(doc.get("score"));
                        } catch (Exception e) {
                        }
                        ;
                        int cmp = Float.compare(dscore, score);
                        if (cmp == 0) {
                            // equal scores, select the one with shortest url
                            if (dul < ul) {
                                if (id != -1) {
                                    ir.deleteDocument(id);
                                }
                                ul = dul;
                                id = docid;
                            } else {
                                ir.deleteDocument(docid);
                            }
                        } else if (cmp < 0) {
                            ir.deleteDocument(docid);
                        } else {
                            if (id != -1) {
                                ir.deleteDocument(id);
                            }
                            ul = dul;
                            id = docid;
                        }
                    }
                }
            }
        }
        //
        // keep the IndexReader open...
        //

        LOG.info("* Deduplicating took " + (System.currentTimeMillis() - s1) + " ms");
        stage = SegmentMergeStatus.STAGE_WRITING;
        processedRecords = 0L;
        Vector outDirs = new Vector();
        File outDir = new File(output, SegmentWriter.getNewSegmentName());
        outDirs.add(outDir);
        LOG.info("* Merging all segments into " + output.getName());
        s1 = System.currentTimeMillis();
        delta = s1;
        nfs.mkdirs(outDir);
        SegmentWriter sw = new SegmentWriter(nfs, outDir, true);
        LOG.fine(" - opening first output segment in " + outDir.getName());
        FetcherOutput fo = new FetcherOutput();
        Content co = new Content();
        ParseText pt = new ParseText();
        ParseData pd = new ParseData();
        int outputCnt = 0;
        for (int n = 0; n < ir.maxDoc(); n++) {
            if (ir.isDeleted(n)) {
                //System.out.println("-del");
                continue;
            }
            Document doc = ir.document(n);
            String segDoc = doc.get("sd");
            int idx = segDoc.indexOf('|');
            String segName = segDoc.substring(0, idx);
            String docName = segDoc.substring(idx + 1);
            SegmentReader sr = (SegmentReader) readers.get(segName);
            long docid;
            try {
                docid = Long.parseLong(docName);
            } catch (Exception e) {
                continue;
            }
            try {
                // get data from the reader
                sr.get(docid, fo, co, pt, pd);
            } catch (Throwable thr) {
                // don't break the loop, because only one of the segments
                // may be corrupted...
                LOG.fine(" - corrupt record no. " + docid + " in segment " + sr.segmentDir.getName()
                        + " - skipping.");
                continue;
            }
            sw.append(fo, co, pt, pd);
            outputCnt++;
            processedRecords++;
            if (processedRecords > 0 && (processedRecords % LOG_STEP == 0)) {
                LOG.info(" Processed " + processedRecords + " records ("
                        + (float) (LOG_STEP * 1000) / (float) (System.currentTimeMillis() - delta) + " rec/s)");
                delta = System.currentTimeMillis();
            }
            if (processedRecords % maxCount == 0) {
                sw.close();
                outDir = new File(output, SegmentWriter.getNewSegmentName());
                LOG.fine(" - starting next output segment in " + outDir.getName());
                nfs.mkdirs(outDir);
                sw = new SegmentWriter(nfs, outDir, true);
                outDirs.add(outDir);
            }
        }
        LOG.info("* Merging took " + (System.currentTimeMillis() - s1) + " ms");
        ir.close();
        sw.close();
        FileUtil.fullyDelete(fsmtIndexDir);
        for (Iterator iter = readers.keySet().iterator(); iter.hasNext();) {
            SegmentReader sr = (SegmentReader) readers.get(iter.next());
            sr.close();
        }
        if (runIndexer) {
            stage = SegmentMergeStatus.STAGE_INDEXING;
            totalRecords = outDirs.size();
            processedRecords = 0L;
            LOG.info("* Creating new segment index(es)...");
            File workingDir = new File(output, "indexsegment-workingdir");
            for (int k = 0; k < outDirs.size(); k++) {
                processedRecords++;
                if (workingDir.exists()) {
                    FileUtil.fullyDelete(workingDir);
                }
                IndexSegment indexer = new IndexSegment(nfs, Integer.MAX_VALUE, (File) outDirs.get(k),
                        workingDir);
                indexer.indexPages();
                FileUtil.fullyDelete(workingDir);
            }
        }
        if (delSegs) {
            // This deletes also all corrupt segments, which are
            // unusable anyway
            stage = SegmentMergeStatus.STAGE_DELETING;
            totalRecords = allsegdirs.size();
            processedRecords = 0L;
            LOG.info("* Deleting old segments...");
            for (int k = 0; k < allsegdirs.size(); k++) {
                processedRecords++;
                FileUtil.fullyDelete((File) allsegdirs.get(k));
            }
        }
        delta = System.currentTimeMillis() - start;
        float eps = (float) total / (float) (delta / 1000);
        LOG.info("Finished SegmentMergeTool: INPUT: " + total + " -> OUTPUT: " + outputCnt + " entries in "
                + ((float) delta / 1000f) + " s (" + eps + " entries/sec).");
    } catch (Exception e) {
        e.printStackTrace();
        LOG.severe(e.getMessage());
    }
}

From source file:org.apache.solr.codecs.test.testGetStoredFields.java

License:Apache License

public static void getDoc(String searchField, String searchString) throws IOException, ParseException {

    System.out.println("Searching for '" + searchString + "'");
    Directory luceneDir = new ONSQLWrapperDirectory(new File(INDEX_ROOT_FOLDER));
    IndexReader indexReader = DirectoryReader.open(luceneDir);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    TotalHitCountCollector hitCountCollector = new TotalHitCountCollector();
    StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_1);
    QueryParser queryParser = new QueryParser(Version.LUCENE_4_10_1, searchField, analyzer);
    Query query = queryParser.parse(searchString);
    indexSearcher.search(query, hitCountCollector);
    System.out.println("Word: " + searchString + "; Number of hits: " + hitCountCollector.getTotalHits());
    System.out.println("maxdocs=" + indexReader.maxDoc());
    org.apache.lucene.search.TopDocs docs = indexSearcher.search(query, 100);
    for (int i = 0; i < docs.scoreDocs.length; i++) {
        Document doc1 = indexReader.document(docs.scoreDocs[i].doc);
        System.out.println("title=" + doc1.get("title"));
        System.out.println("content=" + doc1.get("content"));
        System.out.println("global_bu_id=" + doc1.get("global_bu_id"));
        System.out.println("omega_order_num=" + doc1.get("omega_order_num"));
        System.out.println("------");
    }/*from  w  ww .j  a  v  a 2s . c  o  m*/
    luceneDir.close();

}

From source file:org.apache.solr.search.TestQueryWrapperFilter.java

License:Apache License

public void testRandom() throws Exception {
    final Directory d = newDirectory();
    final RandomIndexWriter w = new RandomIndexWriter(random(), d);
    w.w.getConfig().setMaxBufferedDocs(17);
    final int numDocs = atLeast(100);
    final Set<String> aDocs = new HashSet<>();
    for (int i = 0; i < numDocs; i++) {
        final Document doc = new Document();
        final String v;
        if (random().nextInt(5) == 4) {
            v = "a";
            aDocs.add("" + i);
        } else {/*from  w  w w  .  j  ava2s  .  c  o m*/
            v = "b";
        }
        final Field f = newStringField("field", v, Field.Store.NO);
        doc.add(f);
        doc.add(newStringField("id", "" + i, Field.Store.YES));
        w.addDocument(doc);
    }

    final int numDelDocs = atLeast(10);
    for (int i = 0; i < numDelDocs; i++) {
        final String delID = "" + random().nextInt(numDocs);
        w.deleteDocuments(new Term("id", delID));
        aDocs.remove(delID);
    }

    final IndexReader r = w.getReader();
    w.close();
    final TopDocs hits = newSearcher(r).search(new QueryWrapperFilter(new TermQuery(new Term("field", "a"))),
            numDocs);
    assertEquals(aDocs.size(), hits.totalHits);
    for (ScoreDoc sd : hits.scoreDocs) {
        assertTrue(aDocs.contains(r.document(sd.doc).get("id")));
    }
    r.close();
    d.close();
}

From source file:org.apache.solr.search.TestStressLucene.java

License:Apache License

@Test
public void testStressLuceneNRT() throws Exception {
    final int commitPercent = 5 + random().nextInt(20);
    final int softCommitPercent = 30 + random().nextInt(75); // what percent of the commits are soft
    final int deletePercent = 4 + random().nextInt(25);
    final int deleteByQueryPercent = 1 + random().nextInt(5);
    final int ndocs = 5 + (random().nextBoolean() ? random().nextInt(25) : random().nextInt(200));
    int nWriteThreads = 5 + random().nextInt(25);

    final int maxConcurrentCommits = nWriteThreads; // number of committers at a time... it should be <= maxWarmingSearchers

    final AtomicLong operations = new AtomicLong(100000); // number of query operations to perform in total
    int nReadThreads = 5 + random().nextInt(25);
    final boolean tombstones = random().nextBoolean();
    final boolean syncCommits = random().nextBoolean();

    verbose("commitPercent=", commitPercent);
    verbose("softCommitPercent=", softCommitPercent);
    verbose("deletePercent=", deletePercent);
    verbose("deleteByQueryPercent=", deleteByQueryPercent);
    verbose("ndocs=", ndocs);
    verbose("nWriteThreads=", nWriteThreads);
    verbose("nReadThreads=", nReadThreads);
    verbose("maxConcurrentCommits=", maxConcurrentCommits);
    verbose("operations=", operations);
    verbose("tombstones=", tombstones);
    verbose("syncCommits=", syncCommits);

    initModel(ndocs);//from  w w  w .jav a 2s  .  co m

    final AtomicInteger numCommitting = new AtomicInteger();

    List<Thread> threads = new ArrayList<Thread>();

    final FieldType idFt = new FieldType();
    idFt.setIndexed(true);
    idFt.setStored(true);
    idFt.setOmitNorms(true);
    idFt.setTokenized(false);
    idFt.setIndexOptions(FieldInfo.IndexOptions.DOCS_ONLY);

    final FieldType ft2 = new FieldType();
    ft2.setIndexed(false);
    ft2.setStored(true);

    // model how solr does locking - only allow one thread to do a hard commit at once, and only one thread to do a soft commit, but
    // a hard commit in progress does not stop a soft commit.
    final Lock hardCommitLock = syncCommits ? new ReentrantLock() : null;
    final Lock reopenLock = syncCommits ? new ReentrantLock() : null;

    // RAMDirectory dir = new RAMDirectory();
    // final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_40, new WhitespaceAnalyzer(Version.LUCENE_40)));

    Directory dir = newDirectory();

    final RandomIndexWriter writer = new RandomIndexWriter(random(), dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
    writer.setDoRandomForceMergeAssert(false);

    // writer.commit();
    // reader = IndexReader.open(dir);
    // make this reader an NRT reader from the start to avoid the first non-writer openIfChanged
    // to only opening at the last commit point.
    reader = DirectoryReader.open(writer.w, true);

    for (int i = 0; i < nWriteThreads; i++) {
        Thread thread = new Thread("WRITER" + i) {
            Random rand = new Random(random().nextInt());

            @Override
            public void run() {
                try {
                    while (operations.get() > 0) {
                        int oper = rand.nextInt(100);

                        if (oper < commitPercent) {
                            if (numCommitting.incrementAndGet() <= maxConcurrentCommits) {
                                Map<Integer, DocInfo> newCommittedModel;
                                long version;
                                DirectoryReader oldReader;

                                boolean softCommit = rand.nextInt(100) < softCommitPercent;

                                if (!softCommit) {
                                    // only allow one hard commit to proceed at once
                                    if (hardCommitLock != null)
                                        hardCommitLock.lock();
                                    verbose("hardCommit start");

                                    writer.commit();
                                }

                                if (reopenLock != null)
                                    reopenLock.lock();

                                synchronized (globalLock) {
                                    newCommittedModel = new HashMap<Integer, DocInfo>(model); // take a snapshot
                                    version = snapshotCount++;
                                    oldReader = reader;
                                    oldReader.incRef(); // increment the reference since we will use this for reopening
                                }

                                if (!softCommit) {
                                    // must commit after taking a snapshot of the model
                                    // writer.commit();
                                }

                                verbose("reopen start using", oldReader);

                                DirectoryReader newReader;
                                if (softCommit) {
                                    newReader = DirectoryReader.openIfChanged(oldReader, writer.w, true);
                                } else {
                                    // will only open to last commit
                                    newReader = DirectoryReader.openIfChanged(oldReader);
                                }

                                if (newReader == null) {
                                    oldReader.incRef();
                                    newReader = oldReader;
                                }
                                oldReader.decRef();

                                verbose("reopen result", newReader);

                                synchronized (globalLock) {
                                    assert newReader.getRefCount() > 0;
                                    assert reader.getRefCount() > 0;

                                    // install the new reader if it's newest (and check the current version since another reader may have already been installed)
                                    if (newReader.getVersion() > reader.getVersion()) {
                                        reader.decRef();
                                        reader = newReader;

                                        // install this snapshot only if it's newer than the current one
                                        if (version >= committedModelClock) {
                                            committedModel = newCommittedModel;
                                            committedModelClock = version;
                                        }

                                    } else {
                                        // close if unused
                                        newReader.decRef();
                                    }

                                }

                                if (reopenLock != null)
                                    reopenLock.unlock();

                                if (!softCommit) {
                                    if (hardCommitLock != null)
                                        hardCommitLock.unlock();
                                }

                            }
                            numCommitting.decrementAndGet();
                            continue;
                        }

                        int id = rand.nextInt(ndocs);
                        Object sync = syncArr[id];

                        // set the lastId before we actually change it sometimes to try and
                        // uncover more race conditions between writing and reading
                        boolean before = rand.nextBoolean();
                        if (before) {
                            lastId = id;
                        }

                        // We can't concurrently update the same document and retain our invariants of increasing values
                        // since we can't guarantee what order the updates will be executed.
                        synchronized (sync) {
                            DocInfo info = model.get(id);
                            long val = info.val;
                            long nextVal = Math.abs(val) + 1;

                            if (oper < commitPercent + deletePercent) {
                                // add tombstone first
                                if (tombstones) {
                                    Document d = new Document();
                                    d.add(new Field("id", "-" + Integer.toString(id), idFt));
                                    d.add(new Field(field, Long.toString(nextVal), ft2));
                                    verbose("adding tombstone for id", id, "val=", nextVal);
                                    writer.updateDocument(new Term("id", "-" + Integer.toString(id)), d);
                                }

                                verbose("deleting id", id, "val=", nextVal);
                                writer.deleteDocuments(new Term("id", Integer.toString(id)));
                                model.put(id, new DocInfo(0, -nextVal));
                                verbose("deleting id", id, "val=", nextVal, "DONE");

                            } else if (oper < commitPercent + deletePercent + deleteByQueryPercent) {
                                //assertU("<delete><query>id:" + id + "</query></delete>");

                                // add tombstone first
                                if (tombstones) {
                                    Document d = new Document();
                                    d.add(new Field("id", "-" + Integer.toString(id), idFt));
                                    d.add(new Field(field, Long.toString(nextVal), ft2));
                                    verbose("adding tombstone for id", id, "val=", nextVal);
                                    writer.updateDocument(new Term("id", "-" + Integer.toString(id)), d);
                                }

                                verbose("deleteByQuery", id, "val=", nextVal);
                                writer.deleteDocuments(new TermQuery(new Term("id", Integer.toString(id))));
                                model.put(id, new DocInfo(0, -nextVal));
                                verbose("deleteByQuery", id, "val=", nextVal, "DONE");
                            } else {
                                // model.put(id, nextVal);   // uncomment this and this test should fail.

                                // assertU(adoc("id",Integer.toString(id), field, Long.toString(nextVal)));
                                Document d = new Document();
                                d.add(new Field("id", Integer.toString(id), idFt));
                                d.add(new Field(field, Long.toString(nextVal), ft2));
                                verbose("adding id", id, "val=", nextVal);
                                writer.updateDocument(new Term("id", Integer.toString(id)), d);
                                if (tombstones) {
                                    // remove tombstone after new addition (this should be optional?)
                                    verbose("deleting tombstone for id", id);
                                    writer.deleteDocuments(new Term("id", "-" + Integer.toString(id)));
                                    verbose("deleting tombstone for id", id, "DONE");
                                }

                                model.put(id, new DocInfo(0, nextVal));
                                verbose("adding id", id, "val=", nextVal, "DONE");
                            }
                        }

                        if (!before) {
                            lastId = id;
                        }
                    }
                } catch (Exception ex) {
                    throw new RuntimeException(ex);
                }
            }
        };

        threads.add(thread);
    }

    for (int i = 0; i < nReadThreads; i++) {
        Thread thread = new Thread("READER" + i) {
            Random rand = new Random(random().nextInt());

            @Override
            public void run() {
                try {
                    while (operations.decrementAndGet() >= 0) {
                        // bias toward a recently changed doc
                        int id = rand.nextInt(100) < 25 ? lastId : rand.nextInt(ndocs);

                        // when indexing, we update the index, then the model
                        // so when querying, we should first check the model, and then the index

                        DocInfo info;
                        synchronized (globalLock) {
                            info = committedModel.get(id);
                        }
                        long val = info.val;

                        IndexReader r;
                        synchronized (globalLock) {
                            r = reader;
                            r.incRef();
                        }

                        int docid = getFirstMatch(r, new Term("id", Integer.toString(id)));

                        if (docid < 0 && tombstones) {
                            // if we couldn't find the doc, look for it's tombstone
                            docid = getFirstMatch(r, new Term("id", "-" + Integer.toString(id)));
                            if (docid < 0) {
                                if (val == -1L) {
                                    // expected... no doc was added yet
                                    r.decRef();
                                    continue;
                                }
                                verbose("ERROR: Couldn't find a doc  or tombstone for id", id, "using reader",
                                        r, "expected value", val);
                                fail("No documents or tombstones found for id " + id + ", expected at least "
                                        + val);
                            }
                        }

                        if (docid < 0 && !tombstones) {
                            // nothing to do - we can't tell anything from a deleted doc without tombstones
                        } else {
                            if (docid < 0) {
                                verbose("ERROR: Couldn't find a doc for id", id, "using reader", r);
                            }
                            assertTrue(docid >= 0); // we should have found the document, or it's tombstone
                            Document doc = r.document(docid);
                            long foundVal = Long.parseLong(doc.get(field));
                            if (foundVal < Math.abs(val)) {
                                verbose("ERROR: id", id, "model_val=", val, " foundVal=", foundVal, "reader=",
                                        reader);
                            }
                            assertTrue(foundVal >= Math.abs(val));
                        }

                        r.decRef();
                    }
                } catch (Throwable e) {
                    operations.set(-1L);
                    throw new RuntimeException(e);
                }
            }
        };

        threads.add(thread);
    }

    for (Thread thread : threads) {
        thread.start();
    }

    for (Thread thread : threads) {
        thread.join();
    }

    writer.close();
    reader.close();
    dir.close();
}

From source file:org.arastreju.sge.index.ArastrejuIndex.java

License:Apache License

public void dump() {
    ContextIndex index = provider.forContext(conversationContext.getPrimaryContext());
    org.apache.lucene.search.IndexSearcher searcher = index.getSearcher();
    IndexReader reader = searcher.getIndexReader();

    try {/*from  w  w  w  .j  a va  2s.c  o m*/
        TopDocs top = searcher.search(new MatchAllDocsQuery(), 100);
        for (int i = 0; i < top.totalHits; i++) {
            Document doc = reader.document(top.scoreDocs[i].doc);
            LOGGER.info("---Document--- id: " + top.scoreDocs[i].doc);
            List<Fieldable> fields = doc.getFields();
            for (Fieldable f : fields) {
                LOGGER.info("\tField: name='" + f.name() + "', val='" + f.stringValue() + "'");
            }

        }
    } catch (IOException e) {
        String msg = "caught IOException while dumping index";
        LOGGER.error(msg, e);
        throw new RuntimeException(msg, e);
    }
}