List of usage examples for org.apache.lucene.index RandomIndexWriter getReader
public DirectoryReader getReader() throws IOException
From source file:brightsolid.solr.plugins.TestTargetPositionQueryFuzzy.java
License:Apache License
@Override public void setUp() throws Exception { super.setUp(); directory = newDirectory();// ww w . j av a 2 s.c om RandomIndexWriter iw = new RandomIndexWriter(random(), directory); Document doc = new Document(); FieldType newType = new FieldType(org.apache.lucene.document.TextField.TYPE_STORED); newType.setOmitNorms(true); Field field = newField("field", "", newType); field.fieldType().setOmitNorms(true); doc.add(field); field.setStringValue("one two threx"); iw.addDocument(doc); field.setStringValue("two threx one"); iw.addDocument(doc); field.setStringValue("threx one twp"); iw.addDocument(doc); reader = iw.getReader(); iw.close(); searcher = newSearcher(reader); }
From source file:brightsolid.solr.plugins.TestTargetPositionQuerySimple.java
License:Apache License
@Override public void setUp() throws Exception { super.setUp(); directory = newDirectory();/* ww w . ja va2 s .c o m*/ RandomIndexWriter iw = new RandomIndexWriter(random(), directory); Document doc = new Document(); FieldType newType = new FieldType(org.apache.lucene.document.TextField.TYPE_STORED); newType.setOmitNorms(true); Field field = newField("field", "", newType); field.fieldType().setOmitNorms(true); doc.add(field); field.setStringValue("two"); iw.addDocument(doc); field.setStringValue("one"); iw.addDocument(doc); field.setStringValue("one two"); iw.addDocument(doc); field.setStringValue("two one"); iw.addDocument(doc); field.setStringValue("one two three"); iw.addDocument(doc); field.setStringValue("three two one"); iw.addDocument(doc); field.setStringValue("one one two"); iw.addDocument(doc); reader = iw.getReader(); iw.close(); searcher = newSearcher(reader); }
From source file:brightsolid.solr.plugins.TestTargetPositionQuerySynonyms.java
License:Apache License
@Override public void setUp() throws Exception { super.setUp(); String testFile = "one, uno, un\n" + "two, dos, too\n" + "three, free, tres"; SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random())); parser.parse(new StringReader(testFile)); final SynonymMap map = parser.build(); Analyzer analyzer = new Analyzer() { @Override/*w w w .j a v a 2 s. com*/ protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false)); } }; directory = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), directory, analyzer); Document doc = new Document(); FieldType newType = new FieldType(org.apache.lucene.document.TextField.TYPE_STORED); newType.setOmitNorms(true); Field field = newField("field", "", newType); field.fieldType().setOmitNorms(true); doc.add(field); field.setStringValue("one two three"); iw.addDocument(doc); field.setStringValue("two three one"); iw.addDocument(doc); field.setStringValue("three one two"); iw.addDocument(doc); reader = iw.getReader(); iw.close(); searcher = newSearcher(reader); }
From source file:com.meizu.nlp.classification.utils.DocToDoubleVectorUtilsTest.java
License:Apache License
@Override @Before/* ww w . j a va2s.c om*/ public void setUp() throws Exception { super.setUp(); dir = newDirectory(); RandomIndexWriter indexWriter = new RandomIndexWriter(random(), dir); FieldType ft = new FieldType(TextField.TYPE_STORED); ft.setStoreTermVectors(true); ft.setStoreTermVectorOffsets(true); ft.setStoreTermVectorPositions(true); Document doc; for (int i = 0; i < 10; i++) { doc = new Document(); doc.add(new Field("id", Integer.toString(i), ft)); doc.add(new Field("text", random().nextInt(10) + " " + random().nextInt(10) + " " + random().nextInt(10), ft)); indexWriter.addDocument(doc); } indexWriter.commit(); index = indexWriter.getReader(); indexWriter.close(); }
From source file:com.rocana.lucene.codec.v1.RocanaBasePostingsFormatTestCase.java
License:Apache License
@Override public void testInvertedWrite() throws Exception { Directory dir = newDirectory();/* w w w .jav a2 s. co m*/ MockAnalyzer analyzer = new MockAnalyzer(random()); analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); IndexWriterConfig iwc = newIndexWriterConfig(analyzer); // Must be concurrent because thread(s) can be merging // while up to one thread flushes, and each of those // threads iterates over the map while the flushing // thread might be adding to it: final Map<String, TermFreqs> termFreqs = new ConcurrentHashMap<>(); final AtomicLong sumDocFreq = new AtomicLong(); final AtomicLong sumTotalTermFreq = new AtomicLong(); // TODO: would be better to use / delegate to the current // Codec returned by getCodec() iwc.setCodec(new AssertingCodec() { @Override public PostingsFormat getPostingsFormatForField(String field) { PostingsFormat p = getCodec().postingsFormat(); if (p instanceof PerFieldPostingsFormat) { p = ((PerFieldPostingsFormat) p).getPostingsFormatForField(field); } if (p instanceof RocanaPerFieldPostingsFormat) { p = ((RocanaPerFieldPostingsFormat) p).getPostingsFormatForField(field); } final PostingsFormat defaultPostingsFormat = p; final Thread mainThread = Thread.currentThread(); if (field.equals("body")) { // A PF that counts up some stats and then in // the end we verify the stats match what the // final IndexReader says, just to exercise the // new freedom of iterating the postings more // than once at flush/merge: return new PostingsFormat(defaultPostingsFormat.getName()) { @Override public FieldsConsumer fieldsConsumer(final SegmentWriteState state) throws IOException { final FieldsConsumer fieldsConsumer = defaultPostingsFormat.fieldsConsumer(state); return new FieldsConsumer() { @Override public void write(Fields fields) throws IOException { fieldsConsumer.write(fields); boolean isMerge = state.context.context == IOContext.Context.MERGE; // We only use one thread for flushing // in this test: assert isMerge || Thread.currentThread() == mainThread; // We iterate the provided TermsEnum // twice, so we excercise this new freedom // with the inverted API; if // addOnSecondPass is true, we add up // term stats on the 2nd iteration: boolean addOnSecondPass = random().nextBoolean(); //System.out.println("write isMerge=" + isMerge + " 2ndPass=" + addOnSecondPass); // Gather our own stats: Terms terms = fields.terms("body"); assert terms != null; TermsEnum termsEnum = terms.iterator(); PostingsEnum docs = null; while (termsEnum.next() != null) { BytesRef term = termsEnum.term(); // TODO: also sometimes ask for payloads/offsets? boolean noPositions = random().nextBoolean(); if (noPositions) { docs = termsEnum.postings(docs, PostingsEnum.FREQS); } else { docs = termsEnum.postings(null, PostingsEnum.POSITIONS); } int docFreq = 0; long totalTermFreq = 0; while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) { docFreq++; totalTermFreq += docs.freq(); int limit = TestUtil.nextInt(random(), 1, docs.freq()); if (!noPositions) { for (int i = 0; i < limit; i++) { docs.nextPosition(); } } } String termString = term.utf8ToString(); // During merge we should only see terms // we had already seen during a // previous flush: assertTrue(isMerge == false || termFreqs.containsKey(termString)); if (isMerge == false) { if (addOnSecondPass == false) { TermFreqs tf = termFreqs.get(termString); if (tf == null) { tf = new TermFreqs(); termFreqs.put(termString, tf); } tf.docFreq += docFreq; tf.totalTermFreq += totalTermFreq; sumDocFreq.addAndGet(docFreq); sumTotalTermFreq.addAndGet(totalTermFreq); } else if (termFreqs.containsKey(termString) == false) { // Add placeholder (2nd pass will // set its counts): termFreqs.put(termString, new TermFreqs()); } } } // Also test seeking the TermsEnum: for (String term : termFreqs.keySet()) { if (termsEnum.seekExact(new BytesRef(term))) { // TODO: also sometimes ask for payloads/offsets? boolean noPositions = random().nextBoolean(); if (noPositions) { docs = termsEnum.postings(docs, PostingsEnum.FREQS); } else { docs = termsEnum.postings(null, PostingsEnum.POSITIONS); } int docFreq = 0; long totalTermFreq = 0; while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) { docFreq++; totalTermFreq += docs.freq(); int limit = TestUtil.nextInt(random(), 1, docs.freq()); if (!noPositions) { for (int i = 0; i < limit; i++) { docs.nextPosition(); } } } if (isMerge == false && addOnSecondPass) { TermFreqs tf = termFreqs.get(term); assert tf != null; tf.docFreq += docFreq; tf.totalTermFreq += totalTermFreq; sumDocFreq.addAndGet(docFreq); sumTotalTermFreq.addAndGet(totalTermFreq); } //System.out.println(" term=" + term + " docFreq=" + docFreq + " ttDF=" + termToDocFreq.get(term)); assertTrue(docFreq <= termFreqs.get(term).docFreq); assertTrue(totalTermFreq <= termFreqs.get(term).totalTermFreq); } } // Also test seekCeil for (int iter = 0; iter < 10; iter++) { BytesRef term = new BytesRef( TestUtil.randomRealisticUnicodeString(random())); SeekStatus status = termsEnum.seekCeil(term); if (status == SeekStatus.NOT_FOUND) { assertTrue(term.compareTo(termsEnum.term()) < 0); } } } @Override public void close() throws IOException { fieldsConsumer.close(); } }; } @Override public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { return defaultPostingsFormat.fieldsProducer(state); } }; } else { return defaultPostingsFormat; } } }); RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); LineFileDocs docs = new LineFileDocs(random()); int bytesToIndex = atLeast(100) * 1024; int bytesIndexed = 0; while (bytesIndexed < bytesToIndex) { Document doc = docs.nextDoc(); w.addDocument(doc); bytesIndexed += RamUsageTester.sizeOf(doc); } IndexReader r = w.getReader(); w.close(); Terms terms = MultiFields.getTerms(r, "body"); assertEquals(sumDocFreq.get(), terms.getSumDocFreq()); assertEquals(sumTotalTermFreq.get(), terms.getSumTotalTermFreq()); TermsEnum termsEnum = terms.iterator(); long termCount = 0; boolean supportsOrds = true; while (termsEnum.next() != null) { BytesRef term = termsEnum.term(); assertEquals(termFreqs.get(term.utf8ToString()).docFreq, termsEnum.docFreq()); assertEquals(termFreqs.get(term.utf8ToString()).totalTermFreq, termsEnum.totalTermFreq()); if (supportsOrds) { long ord; try { ord = termsEnum.ord(); } catch (UnsupportedOperationException uoe) { supportsOrds = false; ord = -1; } if (ord != -1) { assertEquals(termCount, ord); } } termCount++; } assertEquals(termFreqs.size(), termCount); r.close(); dir.close(); }
From source file:com.sindicetech.siren.search.node.TestNodeFuzzyQuery.java
License:Open Source License
/** * MultiTermQuery provides (via attribute) information about which values * must be competitive to enter the priority queue. * * FuzzyQuery optimizes itself around this information, if the attribute * is not implemented correctly, there will be problems! *///from w w w.jav a2 s .c om public void testTieBreaker() throws Exception { this.addDocument("<a123456>"); this.addDocument("<c123456>"); this.addDocument("<d123456>"); this.addDocument("<e123456>"); final Directory directory2 = newDirectory(); final RandomIndexWriter writer2 = newRandomIndexWriter(directory2, analyzer, codec); addDocument(writer2, "<a123456>"); addDocument(writer2, "<b123456>"); addDocument(writer2, "<b123456>"); addDocument(writer2, "<b123456>"); addDocument(writer2, "<c123456>"); addDocument(writer2, "<f123456>"); final IndexReader ir1 = writer.getReader(); final IndexReader ir2 = writer2.getReader(); final MultiReader mr = new MultiReader(ir1, ir2); final IndexSearcher searcher = newSearcher(mr); final FuzzyQuery fq = new FuzzyQuery(new Term(DEFAULT_TEST_FIELD, "z123456"), 1, 0, 2, false); final TopDocs docs = searcher.search(fq, 2); assertEquals(5, docs.totalHits); // 5 docs, from the a and b's mr.close(); ir2.close(); writer2.close(); directory2.close(); }
From source file:com.sindicetech.siren.util.SirenTestCase.java
License:Open Source License
protected static IndexReader newIndexReader(final RandomIndexWriter writer) throws IOException { // We are wrapping by default the reader into a slow reader, as most of the // tests require an atomic reader return SlowCompositeReaderWrapper.wrap(writer.getReader()); }
From source file:lia.searching.ConstantScoreQuery.java
License:Apache License
public void testWrapped2Times() throws Exception { Directory directory = null;/*from w w w . j av a2 s. c o m*/ IndexReader reader = null; IndexSearcher searcher = null; try { directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory); Document doc = new Document(); doc.add(newStringField("field", "term", Field.Store.NO)); writer.addDocument(doc); reader = writer.getReader(); writer.close(); searcher = newSearcher(reader); // set a similarity that does not normalize our boost away searcher.setSimilarity(new DefaultSimilarity() { @Override public float queryNorm(float sumOfSquaredWeights) { return 1.0f; } }); final Query csq1 = new ConstantScoreQuery(new TermQuery(new Term("field", "term"))); csq1.setBoost(2.0f); final Query csq2 = new ConstantScoreQuery(csq1); csq2.setBoost(5.0f); final BooleanQuery bq = new BooleanQuery(); bq.add(csq1, BooleanClause.Occur.SHOULD); bq.add(csq2, BooleanClause.Occur.SHOULD); final Query csqbq = new ConstantScoreQuery(bq); csqbq.setBoost(17.0f); checkHits(searcher, csq1, csq1.getBoost(), ConstantScoreQuery.ConstantScorer.class.getName(), null); checkHits(searcher, csq2, csq2.getBoost(), ConstantScoreQuery.ConstantScorer.class.getName(), ConstantScoreQuery.ConstantScorer.class.getName()); // for the combined BQ, the scorer should always be BooleanScorer's BucketScorer, because our scorer supports out-of order collection! final String bucketScorerClass = BooleanScorer.class.getName() + "$BucketScorer"; checkHits(searcher, bq, csq1.getBoost() + csq2.getBoost(), bucketScorerClass, null); checkHits(searcher, csqbq, csqbq.getBoost(), ConstantScoreQuery.ConstantScorer.class.getName(), bucketScorerClass); } finally { if (reader != null) reader.close(); if (directory != null) directory.close(); } }
From source file:lia.searching.ConstantScoreQuery.java
License:Apache License
public void testConstantScoreQueryAndFilter() throws Exception { Directory d = newDirectory();/* w w w.ja va 2 s. c o m*/ RandomIndexWriter w = new RandomIndexWriter(random(), d); Document doc = new Document(); doc.add(newStringField("field", "a", Field.Store.NO)); w.addDocument(doc); doc = new Document(); doc.add(newStringField("field", "b", Field.Store.NO)); w.addDocument(doc); IndexReader r = w.getReader(); w.close(); Filter filterB = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("field", "b")))); Query query = new ConstantScoreQuery(filterB); IndexSearcher s = newSearcher(r); assertEquals(1, s.search(query, filterB, 1).totalHits); // Query for field:b, Filter field:b Filter filterA = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("field", "a")))); query = new ConstantScoreQuery(filterA); assertEquals(0, s.search(query, filterB, 1).totalHits); // Query field:b, Filter field:a r.close(); d.close(); }
From source file:lia.searching.ConstantScoreQuery.java
License:Apache License
public void testQueryWrapperFilter() throws IOException { Directory d = newDirectory();//from w w w .j a v a 2s . co m RandomIndexWriter w = new RandomIndexWriter(random(), d); Document doc = new Document(); doc.add(newStringField("field", "a", Field.Store.NO)); w.addDocument(doc); IndexReader r = w.getReader(); w.close(); Filter filter = new QueryWrapperFilter( AssertingQuery.wrap(random(), new TermQuery(new Term("field", "a")))); IndexSearcher s = newSearcher(r); assert s instanceof AssertingIndexSearcher; // this used to fail s.search(new ConstantScoreQuery(filter), new TotalHitCountCollector()); // check the rewrite Query rewritten = new ConstantScoreQuery(filter).rewrite(r); assertTrue(rewritten instanceof ConstantScoreQuery); assertTrue(((ConstantScoreQuery) rewritten).getQuery() instanceof AssertingQuery); r.close(); d.close(); }