List of usage examples for org.apache.lucene.index DirectoryReader open
public static DirectoryReader open(final IndexCommit commit) throws IOException
From source file:dk.statsbiblioteket.netark.dvenabler.DVReaderTest.java
License:Apache License
public void testLargerDVEnableIndex() throws IOException { final int DOCS = 1000; log.info("testLargerDVEnableIndex started"); final File INDEX_SRC = generateIndex(DOCS); final File INDEX_DEST = new File("target/testindex.deletefreely.dest"); try {//w ww .j a v a 2 s .co m IndexUtils.convert(INDEX_SRC, INDEX_DEST, createDVFieldDescriptions(INDEX_SRC)); IndexReader readerSrc = DirectoryReader.open(MMapDirectory.open(INDEX_SRC)); IndexReader readerDest = DirectoryReader.open(MMapDirectory.open(INDEX_DEST)); long multiCount = 0; long singleCount = 0; long longCount = 0; long doubleCount = 0; for (int docID = 0; docID < DOCS; docID++) { { String[] multisSrc = readerSrc.document(docID).getValues(MULTI); if (multisSrc != null) { List<String> dvs = getSortedSetDocValues(readerDest, docID, MULTI); Arrays.sort(multisSrc); Collections.sort(dvs); assertEquals("There should be as many DV as stored for field " + MULTI, multisSrc.length, dvs.size()); for (int i = 0; i < multisSrc.length; i++) { assertEquals("Value " + i + " for field " + MULTI + " should be equal", multisSrc[i], dvs.get(i)); multiCount++; } } } { String singleSrc = readerSrc.document(docID).get(SINGLE); if (singleSrc != null) { String dv = getSortedDocValue(readerDest, docID, SINGLE); assertEquals("The DV for field " + SINGLE + " should match the stored value", singleSrc, dv); singleCount++; } } { IndexableField fieldSrc = readerSrc.document(docID).getField(LONG); if (fieldSrc != null) { long longSrc = fieldSrc.numericValue().longValue(); long dv = getLongDocValue(readerDest, docID, LONG); assertEquals("The DV for field " + LONG + " should match the stored value", longSrc, dv); longCount++; } } { IndexableField fieldSrc = readerSrc.document(docID).getField(DOUBLE); if (fieldSrc != null) { double doubleSrc = fieldSrc.numericValue().doubleValue(); double dv = getDoubleDocValue(readerDest, docID, DOUBLE); assertEquals("The DV for field " + DOUBLE + " should match the stored value", doubleSrc, dv); doubleCount++; } } } assertTrue("There should be at least 1 value for field " + MULTI + " in a document", multiCount > 0); assertTrue("There should be at least 1 value for field " + SINGLE + " in a document", singleCount > 0); assertTrue("There should be at least 1 value for field " + LONG + " in a document", longCount > 0); assertTrue("There should be at least 1 value for field " + DOUBLE + " in a document", doubleCount > 0); readerSrc.close(); readerDest.close(); } finally { delete(INDEX_SRC); delete(INDEX_DEST); } }
From source file:dk.statsbiblioteket.netark.dvenabler.DVReaderTest.java
License:Apache License
public static void assertIndexValues(File index, boolean dvExpected) throws IOException, ParseException { IndexReader reader = DirectoryReader.open(MMapDirectory.open(index)); IndexSearcher searcher = new IndexSearcher(reader); try {//w w w . j a va 2 s . c om assertIndexValues(reader, searcher, dvExpected); } finally { reader.close(); } }
From source file:Dl4j.TermInfo.java
public LuceneDocFetcher(Directory dir, ArrayList<String> docIds) throws Exception { globalTermId = 0;/*from ww w .j a va 2 s .c o m*/ termSeen = new HashMap<>(); IndexReader reader = DirectoryReader.open(dir); // totalExamples = reader.numDocs(); //++Procheta totalExamples = docIds.size(); docWordMaps = new ArrayList<>(totalExamples); // build the per-doc word maps for (int i = 0; i < totalExamples; i++) { IndexSearcher searcher = new IndexSearcher(reader); Similarity sm = new DefaultSimilarity(); searcher.setSimilarity(sm); Analyzer analyzer = new KeywordAnalyzer(); //System.out.println(id); QueryParser queryParser = new QueryParser("id", analyzer); Query query = queryParser.parse(docIds.get(i)); TopDocs topDocs = searcher.search(query, 3); //System.out.println(query.toString()); ScoreDoc[] hits = topDocs.scoreDocs; // System.out.println(hits.length); Document doc = searcher.doc(hits[0].doc); docWordMaps.add(buildTerms(reader, hits[0].doc)); } // iterate through the word maps and build the one-hot vectors List<DataSet> allDocVecs = new ArrayList<>(totalExamples); for (Map<String, TermInfo> docwordMap : docWordMaps) { allDocVecs.add(constructTermVector(docwordMap)); } // Merge all doc vecs into one dataset this.dataSet = DataSet.merge(allDocVecs); reader.close(); }
From source file:Dl4j.TermInfo.java
public LuceneDocFetcher(Directory dir, ArrayList<String> docIds, ArrayList<String> labels) throws Exception { globalTermId = 0;//from www.j a v a2 s . co m termSeen = new HashMap<>(); IndexReader reader = DirectoryReader.open(dir); // totalExamples = reader.numDocs(); //++Procheta totalExamples = docIds.size(); docWordMaps = new ArrayList<>(totalExamples); // build the per-doc word maps for (int i = 0; i < totalExamples; i++) { IndexSearcher searcher = new IndexSearcher(reader); Similarity sm = new DefaultSimilarity(); searcher.setSimilarity(sm); Analyzer analyzer = new KeywordAnalyzer(); //System.out.println(id); QueryParser queryParser = new QueryParser("id", analyzer); Query query = queryParser.parse(docIds.get(i)); TopDocs topDocs = searcher.search(query, 3); //System.out.println(query.toString()); ScoreDoc[] hits = topDocs.scoreDocs; // System.out.println(hits.length); Document doc = searcher.doc(hits[0].doc); docWordMaps.add(buildTerms(reader, hits[0].doc)); } // iterate through the word maps and build the one-hot vectors List<DataSet> allDocVecs = new ArrayList<>(totalExamples); for (Map<String, TermInfo> docwordMap : docWordMaps) { allDocVecs.add(constructTermVector(docwordMap, labels)); } // Merge all doc vecs into one dataset this.dataSet = DataSet.merge(allDocVecs); reader.close(); }
From source file:doc2vec.LuceneDocIterator.java
public LuceneDocIterator(File indexDir, String stopFile, boolean labelsStoredWithWords) throws Exception { reader = DirectoryReader.open(FSDirectory.open(indexDir.toPath())); docId = 0;/*from w w w.ja v a 2 s . c o m*/ analyzer = AMIIndexer.constructAnalyzer(stopFile); numDocs = reader.numDocs(); this.labelsStoredWithWords = labelsStoredWithWords; }