List of usage examples for org.apache.lucene.index IndexReader document
public final Document document(int docID) throws IOException
n
th Document
in this index. From source file:org.apache.nifi.provenance.lucene.DocsReader.java
License:Apache License
public Set<ProvenanceEventRecord> read(final TopDocs topDocs, final EventAuthorizer authorizer, final IndexReader indexReader, final Collection<Path> allProvenanceLogFiles, final AtomicInteger retrievalCount, final int maxResults, final int maxAttributeChars) throws IOException { if (retrievalCount.get() >= maxResults) { return Collections.emptySet(); }/*from ww w . j a v a 2 s .c o m*/ final long start = System.nanoTime(); final ScoreDoc[] scoreDocs = topDocs.scoreDocs; final int numDocs = Math.min(scoreDocs.length, maxResults); final List<Document> docs = new ArrayList<>(numDocs); for (int i = numDocs - 1; i >= 0; i--) { final int docId = scoreDocs[i].doc; final Document d = indexReader.document(docId); docs.add(d); } final long readDocuments = System.nanoTime() - start; logger.debug("Reading {} Lucene Documents took {} millis", docs.size(), TimeUnit.NANOSECONDS.toMillis(readDocuments)); return read(docs, authorizer, allProvenanceLogFiles, retrievalCount, maxResults, maxAttributeChars); }
From source file:org.apache.nutch.indexer.TestDeleteDuplicates.java
License:Apache License
private void hashDuplicatesHelper(Path index, String url) throws Exception { DeleteDuplicates dedup = new DeleteDuplicates(conf); dedup.dedup(new Path[] { index }); FsDirectory dir = new FsDirectory(fs, new Path(index, "part-0000"), false, conf); IndexReader reader = IndexReader.open(dir); assertEquals("only one doc left", reader.numDocs(), 1); for (int i = 0; i < reader.maxDoc(); i++) { if (reader.isDeleted(i)) { System.out.println("-doc " + i + " deleted"); continue; }//from w w w . jav a 2s.c om Document doc = reader.document(i); // make sure we got the right one assertEquals("check url", url, doc.get("url")); System.out.println(doc); } reader.close(); }
From source file:org.apache.nutch.indexer.TestDeleteDuplicates.java
License:Apache License
public void testUrlDuplicates() throws Exception { DeleteDuplicates dedup = new DeleteDuplicates(conf); dedup.dedup(new Path[] { index2 }); FsDirectory dir = new FsDirectory(fs, new Path(index2, "part-0000"), false, conf); IndexReader reader = IndexReader.open(dir); assertEquals("only one doc left", reader.numDocs(), 1); MD5Hash hash = MD5Hash.digest("2"); for (int i = 0; i < reader.maxDoc(); i++) { if (reader.isDeleted(i)) { System.out.println("-doc " + i + " deleted"); continue; }//from www .j a v a2 s . c o m Document doc = reader.document(i); // make sure we got the right one assertEquals("check hash", hash.toString(), doc.get("digest")); System.out.println(doc); } reader.close(); }
From source file:org.apache.nutch.indexer.TestDeleteDuplicates.java
License:Apache License
public void testMixedDuplicates() throws Exception { DeleteDuplicates dedup = new DeleteDuplicates(conf); dedup.dedup(new Path[] { index1, index2 }); FsDirectory dir = new FsDirectory(fs, new Path(index1, "part-0000"), false, conf); IndexReader reader = IndexReader.open(dir); assertEquals("only one doc left", reader.numDocs(), 1); for (int i = 0; i < reader.maxDoc(); i++) { if (reader.isDeleted(i)) { System.out.println("-doc " + i + " deleted"); continue; }//from www . j a va 2 s . c o m Document doc = reader.document(i); // make sure we got the right one assertEquals("check url", "http://www.example.com/2", doc.get("url")); System.out.println(doc); } reader.close(); dir = new FsDirectory(fs, new Path(index2, "part-0000"), false, conf); reader = IndexReader.open(dir); assertEquals("only one doc left", reader.numDocs(), 1); MD5Hash hash = MD5Hash.digest("2"); for (int i = 0; i < reader.maxDoc(); i++) { if (reader.isDeleted(i)) { System.out.println("-doc " + i + " deleted"); continue; } Document doc = reader.document(i); // make sure we got the right one assertEquals("check hash", hash.toString(), doc.get("digest")); System.out.println(doc); } reader.close(); }
From source file:org.apache.nutch.indexer.TestIndexSorter.java
License:Apache License
public void testSorting() throws Exception { IndexSorter sorter = new IndexSorter(conf); sorter.sort(testDir);// w w w. j a v a2s . c om // read back documents IndexReader reader = IndexReader.open(FSDirectory.open(new File(testDir, INDEX_SORTED))); assertEquals(reader.numDocs(), NUM_DOCS); for (int i = 0; i < reader.maxDoc(); i++) { Document doc = reader.document(i); Field f = doc.getField("content"); assertNull(f); f = doc.getField("boost"); float boost = Similarity.decodeNorm((byte) (NUM_DOCS - i)); String cmp = String.valueOf(boost); assertEquals(cmp, f.stringValue()); } reader.close(); }
From source file:org.apache.nutch.tools.SegmentMergeTool.java
License:Apache License
/** Run the tool, periodically reporting progress. */ public void run() { start = System.currentTimeMillis(); stage = SegmentMergeStatus.STAGE_OPENING; long delta;/*from ww w . j a v a 2s. c om*/ LOG.info("* Opening " + allsegdirs.size() + " segments:"); try { segdirs = new ArrayList(); // open all segments for (int i = 0; i < allsegdirs.size(); i++) { File dir = (File) allsegdirs.get(i); SegmentReader sr = null; try { // try to autofix it if corrupted... sr = new SegmentReader(nfs, dir, true); } catch (Exception e) { // this segment is hosed beyond repair, don't use it LOG.warning("* Segment " + dir.getName() + " is corrupt beyond repair; skipping it."); continue; } segdirs.add(dir); totalRecords += sr.size; LOG.info(" - segment " + dir.getName() + ": " + sr.size + " records."); readers.put(dir.getName(), sr); } long total = totalRecords; LOG.info("* TOTAL " + total + " input records in " + segdirs.size() + " segments."); LOG.info("* Creating master index..."); stage = SegmentMergeStatus.STAGE_MASTERIDX; // XXX Note that Lucene indexes don't work with NutchFileSystem for now. // XXX For now always assume LocalFileSystem here... Vector masters = new Vector(); File fsmtIndexDir = new File(output, ".fastmerge_index"); File masterDir = new File(fsmtIndexDir, "0"); if (!masterDir.mkdirs()) { LOG.severe("Could not create a master index dir: " + masterDir); return; } masters.add(masterDir); IndexWriter iw = new IndexWriter(masterDir, new WhitespaceAnalyzer(), true); iw.setUseCompoundFile(false); iw.setMergeFactor(INDEX_MERGE_FACTOR); iw.setRAMBufferSizeMB(INDEX_MIN_MERGE_DOCS); long s1 = System.currentTimeMillis(); Iterator it = readers.values().iterator(); processedRecords = 0L; delta = System.currentTimeMillis(); while (it.hasNext()) { SegmentReader sr = (SegmentReader) it.next(); String name = sr.segmentDir.getName(); FetcherOutput fo = new FetcherOutput(); for (long i = 0; i < sr.size; i++) { try { if (!sr.get(i, fo, null, null, null)) break; Document doc = new Document(); // compute boost float boost = IndexSegment.calculateBoost(fo.getFetchListEntry().getPage().getScore(), scorePower, boostByLinkCount, fo.getAnchors().length); // doc.add(new Field("sd", name + "|" + i, true, false, false)); // doc.add(new Field("uh", MD5Hash.digest(fo.getUrl().toString()).toString(), true, true, false)); // doc.add(new Field("ch", fo.getMD5Hash().toString(), true, true, false)); // doc.add(new Field("time", DateField.timeToString(fo.getFetchDate()), true, false, false)); // doc.add(new Field("score", boost + "", true, false, false)); // doc.add(new Field("ul", fo.getUrl().toString().length() + "", true, false, false)); iw.addDocument(doc); processedRecords++; if (processedRecords > 0 && (processedRecords % LOG_STEP == 0)) { LOG.info(" Processed " + processedRecords + " records (" + (float) (LOG_STEP * 1000) / (float) (System.currentTimeMillis() - delta) + " rec/s)"); delta = System.currentTimeMillis(); } if (processedRecords > 0 && (processedRecords % INDEX_SIZE == 0)) { iw.optimize(); iw.close(); LOG.info(" - creating next subindex..."); masterDir = new File(fsmtIndexDir, "" + masters.size()); if (!masterDir.mkdirs()) { LOG.severe("Could not create a master index dir: " + masterDir); return; } masters.add(masterDir); iw = new IndexWriter(masterDir, new WhitespaceAnalyzer(), true); iw.setUseCompoundFile(false); iw.setMergeFactor(INDEX_MERGE_FACTOR); iw.setRAMBufferSizeMB(INDEX_MIN_MERGE_DOCS); } } catch (Throwable t) { // we can assume the data is invalid from now on - break here LOG.info(" - segment " + name + " truncated to " + (i + 1) + " records"); break; } } } iw.optimize(); LOG.info("* Creating index took " + (System.currentTimeMillis() - s1) + " ms"); s1 = System.currentTimeMillis(); // merge all other indexes using the latest IndexWriter (still open): if (masters.size() > 1) { LOG.info(" - merging subindexes..."); stage = SegmentMergeStatus.STAGE_MERGEIDX; IndexReader[] ireaders = new IndexReader[masters.size() - 1]; for (int i = 0; i < masters.size() - 1; i++) ireaders[i] = IndexReader.open((File) masters.get(i)); iw.addIndexes(ireaders); for (int i = 0; i < masters.size() - 1; i++) { ireaders[i].close(); FileUtil.fullyDelete((File) masters.get(i)); } } iw.close(); LOG.info("* Optimizing index took " + (System.currentTimeMillis() - s1) + " ms"); LOG.info("* Removing duplicate entries..."); stage = SegmentMergeStatus.STAGE_DEDUP; IndexReader ir = IndexReader.open(masterDir); int i = 0; long cnt = 0L; processedRecords = 0L; s1 = System.currentTimeMillis(); delta = s1; TermEnum te = ir.terms(); while (te.next()) { Term t = te.term(); if (t == null) continue; if (!(t.field().equals("ch") || t.field().equals("uh"))) continue; cnt++; processedRecords = cnt / 2; if (cnt > 0 && (cnt % (LOG_STEP * 2) == 0)) { LOG.info(" Processed " + processedRecords + " records (" + (float) (LOG_STEP * 1000) / (float) (System.currentTimeMillis() - delta) + " rec/s)"); delta = System.currentTimeMillis(); } // Enumerate all docs with the same URL hash or content hash TermDocs td = ir.termDocs(t); if (td == null) continue; if (t.field().equals("uh")) { // Keep only the latest version of the document with // the same url hash. Note: even if the content // hash is identical, other metadata may be different, so even // in this case it makes sense to keep the latest version. int id = -1; String time = null; Document doc = null; while (td.next()) { int docid = td.doc(); if (!ir.isDeleted(docid)) { doc = ir.document(docid); if (time == null) { time = doc.get("time"); id = docid; continue; } String dtime = doc.get("time"); // "time" is a DateField, and can be compared lexicographically if (dtime.compareTo(time) > 0) { if (id != -1) { ir.deleteDocument(id); } time = dtime; id = docid; } else { ir.deleteDocument(docid); } } } } else if (t.field().equals("ch")) { // Keep only the version of the document with // the highest score, and then with the shortest url. int id = -1; int ul = 0; float score = 0.0f; Document doc = null; while (td.next()) { int docid = td.doc(); if (!ir.isDeleted(docid)) { doc = ir.document(docid); if (ul == 0) { try { ul = Integer.parseInt(doc.get("ul")); score = Float.parseFloat(doc.get("score")); } catch (Exception e) { } ; id = docid; continue; } int dul = 0; float dscore = 0.0f; try { dul = Integer.parseInt(doc.get("ul")); dscore = Float.parseFloat(doc.get("score")); } catch (Exception e) { } ; int cmp = Float.compare(dscore, score); if (cmp == 0) { // equal scores, select the one with shortest url if (dul < ul) { if (id != -1) { ir.deleteDocument(id); } ul = dul; id = docid; } else { ir.deleteDocument(docid); } } else if (cmp < 0) { ir.deleteDocument(docid); } else { if (id != -1) { ir.deleteDocument(id); } ul = dul; id = docid; } } } } } // // keep the IndexReader open... // LOG.info("* Deduplicating took " + (System.currentTimeMillis() - s1) + " ms"); stage = SegmentMergeStatus.STAGE_WRITING; processedRecords = 0L; Vector outDirs = new Vector(); File outDir = new File(output, SegmentWriter.getNewSegmentName()); outDirs.add(outDir); LOG.info("* Merging all segments into " + output.getName()); s1 = System.currentTimeMillis(); delta = s1; nfs.mkdirs(outDir); SegmentWriter sw = new SegmentWriter(nfs, outDir, true); LOG.fine(" - opening first output segment in " + outDir.getName()); FetcherOutput fo = new FetcherOutput(); Content co = new Content(); ParseText pt = new ParseText(); ParseData pd = new ParseData(); int outputCnt = 0; for (int n = 0; n < ir.maxDoc(); n++) { if (ir.isDeleted(n)) { //System.out.println("-del"); continue; } Document doc = ir.document(n); String segDoc = doc.get("sd"); int idx = segDoc.indexOf('|'); String segName = segDoc.substring(0, idx); String docName = segDoc.substring(idx + 1); SegmentReader sr = (SegmentReader) readers.get(segName); long docid; try { docid = Long.parseLong(docName); } catch (Exception e) { continue; } try { // get data from the reader sr.get(docid, fo, co, pt, pd); } catch (Throwable thr) { // don't break the loop, because only one of the segments // may be corrupted... LOG.fine(" - corrupt record no. " + docid + " in segment " + sr.segmentDir.getName() + " - skipping."); continue; } sw.append(fo, co, pt, pd); outputCnt++; processedRecords++; if (processedRecords > 0 && (processedRecords % LOG_STEP == 0)) { LOG.info(" Processed " + processedRecords + " records (" + (float) (LOG_STEP * 1000) / (float) (System.currentTimeMillis() - delta) + " rec/s)"); delta = System.currentTimeMillis(); } if (processedRecords % maxCount == 0) { sw.close(); outDir = new File(output, SegmentWriter.getNewSegmentName()); LOG.fine(" - starting next output segment in " + outDir.getName()); nfs.mkdirs(outDir); sw = new SegmentWriter(nfs, outDir, true); outDirs.add(outDir); } } LOG.info("* Merging took " + (System.currentTimeMillis() - s1) + " ms"); ir.close(); sw.close(); FileUtil.fullyDelete(fsmtIndexDir); for (Iterator iter = readers.keySet().iterator(); iter.hasNext();) { SegmentReader sr = (SegmentReader) readers.get(iter.next()); sr.close(); } if (runIndexer) { stage = SegmentMergeStatus.STAGE_INDEXING; totalRecords = outDirs.size(); processedRecords = 0L; LOG.info("* Creating new segment index(es)..."); File workingDir = new File(output, "indexsegment-workingdir"); for (int k = 0; k < outDirs.size(); k++) { processedRecords++; if (workingDir.exists()) { FileUtil.fullyDelete(workingDir); } IndexSegment indexer = new IndexSegment(nfs, Integer.MAX_VALUE, (File) outDirs.get(k), workingDir); indexer.indexPages(); FileUtil.fullyDelete(workingDir); } } if (delSegs) { // This deletes also all corrupt segments, which are // unusable anyway stage = SegmentMergeStatus.STAGE_DELETING; totalRecords = allsegdirs.size(); processedRecords = 0L; LOG.info("* Deleting old segments..."); for (int k = 0; k < allsegdirs.size(); k++) { processedRecords++; FileUtil.fullyDelete((File) allsegdirs.get(k)); } } delta = System.currentTimeMillis() - start; float eps = (float) total / (float) (delta / 1000); LOG.info("Finished SegmentMergeTool: INPUT: " + total + " -> OUTPUT: " + outputCnt + " entries in " + ((float) delta / 1000f) + " s (" + eps + " entries/sec)."); } catch (Exception e) { e.printStackTrace(); LOG.severe(e.getMessage()); } }
From source file:org.apache.solr.codecs.test.testGetStoredFields.java
License:Apache License
public static void getDoc(String searchField, String searchString) throws IOException, ParseException { System.out.println("Searching for '" + searchString + "'"); Directory luceneDir = new ONSQLWrapperDirectory(new File(INDEX_ROOT_FOLDER)); IndexReader indexReader = DirectoryReader.open(luceneDir); IndexSearcher indexSearcher = new IndexSearcher(indexReader); TotalHitCountCollector hitCountCollector = new TotalHitCountCollector(); StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_1); QueryParser queryParser = new QueryParser(Version.LUCENE_4_10_1, searchField, analyzer); Query query = queryParser.parse(searchString); indexSearcher.search(query, hitCountCollector); System.out.println("Word: " + searchString + "; Number of hits: " + hitCountCollector.getTotalHits()); System.out.println("maxdocs=" + indexReader.maxDoc()); org.apache.lucene.search.TopDocs docs = indexSearcher.search(query, 100); for (int i = 0; i < docs.scoreDocs.length; i++) { Document doc1 = indexReader.document(docs.scoreDocs[i].doc); System.out.println("title=" + doc1.get("title")); System.out.println("content=" + doc1.get("content")); System.out.println("global_bu_id=" + doc1.get("global_bu_id")); System.out.println("omega_order_num=" + doc1.get("omega_order_num")); System.out.println("------"); }/*from w ww .j a v a 2s . c o m*/ luceneDir.close(); }
From source file:org.apache.solr.search.TestQueryWrapperFilter.java
License:Apache License
public void testRandom() throws Exception { final Directory d = newDirectory(); final RandomIndexWriter w = new RandomIndexWriter(random(), d); w.w.getConfig().setMaxBufferedDocs(17); final int numDocs = atLeast(100); final Set<String> aDocs = new HashSet<>(); for (int i = 0; i < numDocs; i++) { final Document doc = new Document(); final String v; if (random().nextInt(5) == 4) { v = "a"; aDocs.add("" + i); } else {/*from w w w . j ava2s . c o m*/ v = "b"; } final Field f = newStringField("field", v, Field.Store.NO); doc.add(f); doc.add(newStringField("id", "" + i, Field.Store.YES)); w.addDocument(doc); } final int numDelDocs = atLeast(10); for (int i = 0; i < numDelDocs; i++) { final String delID = "" + random().nextInt(numDocs); w.deleteDocuments(new Term("id", delID)); aDocs.remove(delID); } final IndexReader r = w.getReader(); w.close(); final TopDocs hits = newSearcher(r).search(new QueryWrapperFilter(new TermQuery(new Term("field", "a"))), numDocs); assertEquals(aDocs.size(), hits.totalHits); for (ScoreDoc sd : hits.scoreDocs) { assertTrue(aDocs.contains(r.document(sd.doc).get("id"))); } r.close(); d.close(); }
From source file:org.apache.solr.search.TestStressLucene.java
License:Apache License
@Test public void testStressLuceneNRT() throws Exception { final int commitPercent = 5 + random().nextInt(20); final int softCommitPercent = 30 + random().nextInt(75); // what percent of the commits are soft final int deletePercent = 4 + random().nextInt(25); final int deleteByQueryPercent = 1 + random().nextInt(5); final int ndocs = 5 + (random().nextBoolean() ? random().nextInt(25) : random().nextInt(200)); int nWriteThreads = 5 + random().nextInt(25); final int maxConcurrentCommits = nWriteThreads; // number of committers at a time... it should be <= maxWarmingSearchers final AtomicLong operations = new AtomicLong(100000); // number of query operations to perform in total int nReadThreads = 5 + random().nextInt(25); final boolean tombstones = random().nextBoolean(); final boolean syncCommits = random().nextBoolean(); verbose("commitPercent=", commitPercent); verbose("softCommitPercent=", softCommitPercent); verbose("deletePercent=", deletePercent); verbose("deleteByQueryPercent=", deleteByQueryPercent); verbose("ndocs=", ndocs); verbose("nWriteThreads=", nWriteThreads); verbose("nReadThreads=", nReadThreads); verbose("maxConcurrentCommits=", maxConcurrentCommits); verbose("operations=", operations); verbose("tombstones=", tombstones); verbose("syncCommits=", syncCommits); initModel(ndocs);//from w w w .jav a 2s . co m final AtomicInteger numCommitting = new AtomicInteger(); List<Thread> threads = new ArrayList<Thread>(); final FieldType idFt = new FieldType(); idFt.setIndexed(true); idFt.setStored(true); idFt.setOmitNorms(true); idFt.setTokenized(false); idFt.setIndexOptions(FieldInfo.IndexOptions.DOCS_ONLY); final FieldType ft2 = new FieldType(); ft2.setIndexed(false); ft2.setStored(true); // model how solr does locking - only allow one thread to do a hard commit at once, and only one thread to do a soft commit, but // a hard commit in progress does not stop a soft commit. final Lock hardCommitLock = syncCommits ? new ReentrantLock() : null; final Lock reopenLock = syncCommits ? new ReentrantLock() : null; // RAMDirectory dir = new RAMDirectory(); // final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_40, new WhitespaceAnalyzer(Version.LUCENE_40))); Directory dir = newDirectory(); final RandomIndexWriter writer = new RandomIndexWriter(random(), dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); writer.setDoRandomForceMergeAssert(false); // writer.commit(); // reader = IndexReader.open(dir); // make this reader an NRT reader from the start to avoid the first non-writer openIfChanged // to only opening at the last commit point. reader = DirectoryReader.open(writer.w, true); for (int i = 0; i < nWriteThreads; i++) { Thread thread = new Thread("WRITER" + i) { Random rand = new Random(random().nextInt()); @Override public void run() { try { while (operations.get() > 0) { int oper = rand.nextInt(100); if (oper < commitPercent) { if (numCommitting.incrementAndGet() <= maxConcurrentCommits) { Map<Integer, DocInfo> newCommittedModel; long version; DirectoryReader oldReader; boolean softCommit = rand.nextInt(100) < softCommitPercent; if (!softCommit) { // only allow one hard commit to proceed at once if (hardCommitLock != null) hardCommitLock.lock(); verbose("hardCommit start"); writer.commit(); } if (reopenLock != null) reopenLock.lock(); synchronized (globalLock) { newCommittedModel = new HashMap<Integer, DocInfo>(model); // take a snapshot version = snapshotCount++; oldReader = reader; oldReader.incRef(); // increment the reference since we will use this for reopening } if (!softCommit) { // must commit after taking a snapshot of the model // writer.commit(); } verbose("reopen start using", oldReader); DirectoryReader newReader; if (softCommit) { newReader = DirectoryReader.openIfChanged(oldReader, writer.w, true); } else { // will only open to last commit newReader = DirectoryReader.openIfChanged(oldReader); } if (newReader == null) { oldReader.incRef(); newReader = oldReader; } oldReader.decRef(); verbose("reopen result", newReader); synchronized (globalLock) { assert newReader.getRefCount() > 0; assert reader.getRefCount() > 0; // install the new reader if it's newest (and check the current version since another reader may have already been installed) if (newReader.getVersion() > reader.getVersion()) { reader.decRef(); reader = newReader; // install this snapshot only if it's newer than the current one if (version >= committedModelClock) { committedModel = newCommittedModel; committedModelClock = version; } } else { // close if unused newReader.decRef(); } } if (reopenLock != null) reopenLock.unlock(); if (!softCommit) { if (hardCommitLock != null) hardCommitLock.unlock(); } } numCommitting.decrementAndGet(); continue; } int id = rand.nextInt(ndocs); Object sync = syncArr[id]; // set the lastId before we actually change it sometimes to try and // uncover more race conditions between writing and reading boolean before = rand.nextBoolean(); if (before) { lastId = id; } // We can't concurrently update the same document and retain our invariants of increasing values // since we can't guarantee what order the updates will be executed. synchronized (sync) { DocInfo info = model.get(id); long val = info.val; long nextVal = Math.abs(val) + 1; if (oper < commitPercent + deletePercent) { // add tombstone first if (tombstones) { Document d = new Document(); d.add(new Field("id", "-" + Integer.toString(id), idFt)); d.add(new Field(field, Long.toString(nextVal), ft2)); verbose("adding tombstone for id", id, "val=", nextVal); writer.updateDocument(new Term("id", "-" + Integer.toString(id)), d); } verbose("deleting id", id, "val=", nextVal); writer.deleteDocuments(new Term("id", Integer.toString(id))); model.put(id, new DocInfo(0, -nextVal)); verbose("deleting id", id, "val=", nextVal, "DONE"); } else if (oper < commitPercent + deletePercent + deleteByQueryPercent) { //assertU("<delete><query>id:" + id + "</query></delete>"); // add tombstone first if (tombstones) { Document d = new Document(); d.add(new Field("id", "-" + Integer.toString(id), idFt)); d.add(new Field(field, Long.toString(nextVal), ft2)); verbose("adding tombstone for id", id, "val=", nextVal); writer.updateDocument(new Term("id", "-" + Integer.toString(id)), d); } verbose("deleteByQuery", id, "val=", nextVal); writer.deleteDocuments(new TermQuery(new Term("id", Integer.toString(id)))); model.put(id, new DocInfo(0, -nextVal)); verbose("deleteByQuery", id, "val=", nextVal, "DONE"); } else { // model.put(id, nextVal); // uncomment this and this test should fail. // assertU(adoc("id",Integer.toString(id), field, Long.toString(nextVal))); Document d = new Document(); d.add(new Field("id", Integer.toString(id), idFt)); d.add(new Field(field, Long.toString(nextVal), ft2)); verbose("adding id", id, "val=", nextVal); writer.updateDocument(new Term("id", Integer.toString(id)), d); if (tombstones) { // remove tombstone after new addition (this should be optional?) verbose("deleting tombstone for id", id); writer.deleteDocuments(new Term("id", "-" + Integer.toString(id))); verbose("deleting tombstone for id", id, "DONE"); } model.put(id, new DocInfo(0, nextVal)); verbose("adding id", id, "val=", nextVal, "DONE"); } } if (!before) { lastId = id; } } } catch (Exception ex) { throw new RuntimeException(ex); } } }; threads.add(thread); } for (int i = 0; i < nReadThreads; i++) { Thread thread = new Thread("READER" + i) { Random rand = new Random(random().nextInt()); @Override public void run() { try { while (operations.decrementAndGet() >= 0) { // bias toward a recently changed doc int id = rand.nextInt(100) < 25 ? lastId : rand.nextInt(ndocs); // when indexing, we update the index, then the model // so when querying, we should first check the model, and then the index DocInfo info; synchronized (globalLock) { info = committedModel.get(id); } long val = info.val; IndexReader r; synchronized (globalLock) { r = reader; r.incRef(); } int docid = getFirstMatch(r, new Term("id", Integer.toString(id))); if (docid < 0 && tombstones) { // if we couldn't find the doc, look for it's tombstone docid = getFirstMatch(r, new Term("id", "-" + Integer.toString(id))); if (docid < 0) { if (val == -1L) { // expected... no doc was added yet r.decRef(); continue; } verbose("ERROR: Couldn't find a doc or tombstone for id", id, "using reader", r, "expected value", val); fail("No documents or tombstones found for id " + id + ", expected at least " + val); } } if (docid < 0 && !tombstones) { // nothing to do - we can't tell anything from a deleted doc without tombstones } else { if (docid < 0) { verbose("ERROR: Couldn't find a doc for id", id, "using reader", r); } assertTrue(docid >= 0); // we should have found the document, or it's tombstone Document doc = r.document(docid); long foundVal = Long.parseLong(doc.get(field)); if (foundVal < Math.abs(val)) { verbose("ERROR: id", id, "model_val=", val, " foundVal=", foundVal, "reader=", reader); } assertTrue(foundVal >= Math.abs(val)); } r.decRef(); } } catch (Throwable e) { operations.set(-1L); throw new RuntimeException(e); } } }; threads.add(thread); } for (Thread thread : threads) { thread.start(); } for (Thread thread : threads) { thread.join(); } writer.close(); reader.close(); dir.close(); }
From source file:org.arastreju.sge.index.ArastrejuIndex.java
License:Apache License
public void dump() { ContextIndex index = provider.forContext(conversationContext.getPrimaryContext()); org.apache.lucene.search.IndexSearcher searcher = index.getSearcher(); IndexReader reader = searcher.getIndexReader(); try {/*from w w w .j a va 2s.c o m*/ TopDocs top = searcher.search(new MatchAllDocsQuery(), 100); for (int i = 0; i < top.totalHits; i++) { Document doc = reader.document(top.scoreDocs[i].doc); LOGGER.info("---Document--- id: " + top.scoreDocs[i].doc); List<Fieldable> fields = doc.getFields(); for (Fieldable f : fields) { LOGGER.info("\tField: name='" + f.name() + "', val='" + f.stringValue() + "'"); } } } catch (IOException e) { String msg = "caught IOException while dumping index"; LOGGER.error(msg, e); throw new RuntimeException(msg, e); } }