List of usage examples for org.apache.lucene.index IndexReader document
public final Document document(int docID) throws IOException
n
th Document
in this index. From source file:TestWang.java
License:Open Source License
private Document findDoc(IndexReader reader, String file) throws IOException { for (int i = 0; i < reader.numDocs(); i++) { Document document = reader.document(i); String s = document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; if (s.endsWith(File.separator + file)) { // System.out.println("s = " + s); return document; }//w w w . j av a 2 s . c o m } return null; }
From source file:TestWang.java
License:Open Source License
private Document[] findDocs(IndexReader reader, String[] file) throws IOException { Document[] result = new Document[file.length]; for (int i = 0; i < reader.numDocs(); i++) { Document document = reader.document(i); String s = document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; for (int j = 0; j < result.length; j++) { if (s.endsWith("\\" + file[j])) { // System.out.println("s = " + s); result[j] = document;// w ww. j a v a 2 s. c o m } } } return result; }
From source file:ReadFiles.java
License:Apache License
public static Result doScan(String path, DIRTYPE type, IndexReader ir) throws IOException { IndexReader reader; Result r = new Result(); long beginTs, endTs; if (ir != null) reader = ir;//from w w w. ja v a 2 s .co m else { beginTs = System.currentTimeMillis(); switch (type) { default: case MMAP: reader = DirectoryReader.open(MMapDirectory.open(new File(path))); break; case NIO: reader = DirectoryReader.open(NIOFSDirectory.open(new File(path))); break; case SIMPLE: reader = DirectoryReader.open(SimpleFSDirectory.open(new File(path))); break; } endTs = System.currentTimeMillis(); r.initTs += endTs - beginTs; r.initTsNr += 1; } System.out.println("-----Scan it------" + reader.maxDoc()); beginTs = System.currentTimeMillis(); for (int i = 0; i < reader.maxDoc(); i++) { Document doc = reader.document(i); doc.get("foo"); doc.get("bar"); //System.out.println("Key: " + doc.get("foo") + ", Value: " + doc.get("bar") + ", Content: " + doc.get("content")); } endTs = System.currentTimeMillis(); r.fetchTs += endTs - beginTs; r.fetchTsNr += reader.maxDoc(); if (ir == null) { beginTs = System.currentTimeMillis(); reader.close(); endTs = System.currentTimeMillis(); r.closeTs += endTs - beginTs; r.closeTsNr += 1; } return r; }
From source file:ReadFiles.java
License:Apache License
public static Result doRandFetch(String path, DIRTYPE type, IndexReader ir, int randfetchnr) throws IOException { IndexReader reader; Result r = new Result(); long beginTs, endTs; if (ir != null) reader = ir;/*from w w w . ja v a2s . c o m*/ else { beginTs = System.currentTimeMillis(); switch (type) { default: case MMAP: reader = DirectoryReader.open(MMapDirectory.open(new File(path))); break; case NIO: reader = DirectoryReader.open(NIOFSDirectory.open(new File(path))); break; case SIMPLE: reader = DirectoryReader.open(SimpleFSDirectory.open(new File(path))); break; } endTs = System.currentTimeMillis(); r.initTs += endTs - beginTs; r.initTsNr += 1; } System.out.println("-----RandFt it------"); try { Thread.sleep(10000); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } // Randomized the fetch Random rand = new Random(); beginTs = System.currentTimeMillis(); int maxDoc = reader.maxDoc(); if (randfetchnr > 0) maxDoc = randfetchnr; for (int i = 0; i < maxDoc; i++) { Document doc = reader.document(rand.nextInt(maxDoc)); doc.get("foo"); doc.get("bar"); //System.out.println("Key: " + doc.get("foo") + ", Value: " + doc.get("bar")); } endTs = System.currentTimeMillis(); r.fetchTs += endTs - beginTs; r.fetchTsNr += maxDoc; if (ir == null) { beginTs = System.currentTimeMillis(); reader.close(); endTs = System.currentTimeMillis(); r.closeTs += endTs - beginTs; r.closeTsNr += 1; } return r; }
From source file:aos.lucene.search.advanced.BooksLikeThis.java
License:Apache License
public static void main(String[] args) throws IOException { Directory dir = TestUtil.getBookIndexDirectory(); IndexReader reader = DirectoryReader.open(dir); int numDocs = reader.maxDoc(); BooksLikeThis blt = new BooksLikeThis(reader); for (int i = 0; i < numDocs; i++) { // LOGGER.info();// w w w .j a v a2 s . c o m Document doc = reader.document(i); LOGGER.info(doc.get("title")); Document[] docs = blt.docsLike(i, 10); // if (docs.length == 0) { LOGGER.info(" None like this"); } for (Document likeThisDoc : docs) { LOGGER.info(" -> " + likeThisDoc.get("title")); } } reader.close(); dir.close(); }
From source file:aos.lucene.search.advanced.CategorizerTest.java
License:Apache License
private void buildCategoryVectors() throws IOException { IndexReader reader = DirectoryReader.open(TestUtil.getBookIndexDirectory()); int maxDoc = reader.maxDoc(); for (int i = 0; i < maxDoc; i++) { if (!reader.isDeleted(i)) { Document doc = reader.document(i); String category = doc.get("category"); Map vectorMap = (Map) categoryMap.get(category); if (vectorMap == null) { vectorMap = new TreeMap(); categoryMap.put(category, vectorMap); }// w w w. ja v a 2 s . co m TermFreqVector termFreqVector = reader.getTermFreqVector(i, "subject"); addTermFreqToMap(vectorMap, termFreqVector); } } }
From source file:aos.lucene.search.advanced.FunctionQueryTest.java
License:Apache License
public void testRecency() throws Throwable { Directory dir = TestUtil.getBookIndexDirectory(); IndexReader r = DirectoryReader.open(dir); IndexSearcher s = new IndexSearcher(r); s.setDefaultFieldSortScoring(true, true); QueryParser parser = new QueryParser(Version.LUCENE_46, "contents", new StandardAnalyzer(Version.LUCENE_46)); Query q = parser.parse("java in action"); // #A Query q2 = new RecencyBoostingQuery(q, // #B 2.0, 2 * 365, "pubmonthAsDay"); Sort sort = new Sort(new SortField[] { SortField.FIELD_SCORE, new SortField("title2", SortField.STRING) }); TopDocs hits = s.search(q2, null, 5, sort); for (int i = 0; i < hits.scoreDocs.length; i++) { Document doc = r.document(hits.scoreDocs[i].doc); LOGGER.info((1 + i) + ": " + doc.get("title") + ": pubmonth=" + doc.get("pubmonth") + " score=" + hits.scoreDocs[i].score); }/* www .j a v a 2s . c o m*/ s.close(); r.close(); dir.close(); }
From source file:aos.lucene.tools.BooksMoreLikeThis.java
License:Apache License
public static void main(String[] args) throws Throwable { String indexDir = System.getProperty("index.dir"); FSDirectory directory = FSDirectory.open(new File(indexDir)); IndexReader reader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); int numDocs = reader.maxDoc(); MoreLikeThis mlt = new MoreLikeThis(reader); mlt.setFieldNames(new String[] { "title", "author" }); mlt.setMinTermFreq(1);/*www . j a va2s.co m*/ mlt.setMinDocFreq(1); for (int docID = 0; docID < numDocs; docID++) { LOGGER.info(); Document doc = reader.document(docID); LOGGER.info(doc.get("title")); Query query = mlt.like(docID); LOGGER.info(" query=" + query); TopDocs similarDocs = searcher.search(query, 10); if (similarDocs.totalHits == 0) LOGGER.info(" None like this"); for (int i = 0; i < similarDocs.scoreDocs.length; i++) { if (similarDocs.scoreDocs[i].doc != docID) { doc = reader.document(similarDocs.scoreDocs[i].doc); LOGGER.info(" -> " + doc.getField("title").stringValue()); } } } reader.close(); directory.close(); }
From source file:approxnn.DocIdSim.java
public List<DocVector> selectTopK(DocVector qvec, IndexReader reader, int k) throws Exception { PriorityQueue<DocVector> nearest = new PriorityQueue(); for (DocIdSim docIdSim : this.neighbors) { Document d = reader.document(docIdSim.sd.doc); DocVector dvec = new DocVector(d, qvec.numDimensions, DocVector.numIntervals); float dist = qvec.getDist(dvec); dvec.setDistWithQry(dist);// w ww . j a v a 2s . com nearest.add(dvec); } List<DocVector> topDocsList = new ArrayList<>(); k = Math.min(k, nearest.size()); for (int i = 0; i < k; i++) { topDocsList.add(nearest.poll()); } return topDocsList; }
From source file:approxnn.DocIdSim.java
public List<DocVector> selectTop(DocVector qvec, IndexReader reader) throws Exception { float minDist = Float.MAX_VALUE; DocVector ann = null;// w w w.j a va 2s .c o m for (DocIdSim docIdSim : this.neighbors) { Document d = reader.document(docIdSim.sd.doc); DocVector dvec = new DocVector(d, qvec.numDimensions, DocVector.numIntervals, true); float dist = qvec.getDist(dvec); dvec.setDistWithQry(dist); if (dist < minDist) { minDist = dist; ann = dvec; } } List<DocVector> topList = new ArrayList<>(1); topList.add(ann); return topList; }