Example usage for org.apache.lucene.index RandomIndexWriter getReader

List of usage examples for org.apache.lucene.index RandomIndexWriter getReader

Introduction

In this page you can find the example usage for org.apache.lucene.index RandomIndexWriter getReader.

Prototype

public DirectoryReader getReader() throws IOException 

Source Link

Usage

From source file:brightsolid.solr.plugins.TestTargetPositionQueryFuzzy.java

License:Apache License

@Override
public void setUp() throws Exception {
    super.setUp();
    directory = newDirectory();//  ww  w  . j av  a 2  s.c om
    RandomIndexWriter iw = new RandomIndexWriter(random(), directory);
    Document doc = new Document();
    FieldType newType = new FieldType(org.apache.lucene.document.TextField.TYPE_STORED);
    newType.setOmitNorms(true);
    Field field = newField("field", "", newType);
    field.fieldType().setOmitNorms(true);

    doc.add(field);

    field.setStringValue("one two threx");
    iw.addDocument(doc);
    field.setStringValue("two threx one");
    iw.addDocument(doc);
    field.setStringValue("threx one twp");
    iw.addDocument(doc);

    reader = iw.getReader();
    iw.close();
    searcher = newSearcher(reader);
}

From source file:brightsolid.solr.plugins.TestTargetPositionQuerySimple.java

License:Apache License

@Override
public void setUp() throws Exception {
    super.setUp();
    directory = newDirectory();/* ww  w . ja va2  s  .c o  m*/
    RandomIndexWriter iw = new RandomIndexWriter(random(), directory);

    Document doc = new Document();
    FieldType newType = new FieldType(org.apache.lucene.document.TextField.TYPE_STORED);
    newType.setOmitNorms(true);
    Field field = newField("field", "", newType);
    field.fieldType().setOmitNorms(true);

    doc.add(field);

    field.setStringValue("two");
    iw.addDocument(doc);
    field.setStringValue("one");
    iw.addDocument(doc);
    field.setStringValue("one two");
    iw.addDocument(doc);
    field.setStringValue("two one");
    iw.addDocument(doc);
    field.setStringValue("one two three");
    iw.addDocument(doc);
    field.setStringValue("three two one");
    iw.addDocument(doc);
    field.setStringValue("one one two");
    iw.addDocument(doc);

    reader = iw.getReader();
    iw.close();
    searcher = newSearcher(reader);
}

From source file:brightsolid.solr.plugins.TestTargetPositionQuerySynonyms.java

License:Apache License

@Override
public void setUp() throws Exception {
    super.setUp();

    String testFile = "one, uno, un\n" + "two, dos, too\n" + "three, free, tres";

    SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random()));
    parser.parse(new StringReader(testFile));

    final SynonymMap map = parser.build();
    Analyzer analyzer = new Analyzer() {
        @Override/*w w  w .j  a  v a  2 s.  com*/
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
            return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false));
        }
    };

    directory = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), directory, analyzer);
    Document doc = new Document();
    FieldType newType = new FieldType(org.apache.lucene.document.TextField.TYPE_STORED);
    newType.setOmitNorms(true);
    Field field = newField("field", "", newType);
    field.fieldType().setOmitNorms(true);

    doc.add(field);

    field.setStringValue("one two three");
    iw.addDocument(doc);
    field.setStringValue("two three one");
    iw.addDocument(doc);
    field.setStringValue("three one two");
    iw.addDocument(doc);

    reader = iw.getReader();
    iw.close();
    searcher = newSearcher(reader);
}

From source file:com.meizu.nlp.classification.utils.DocToDoubleVectorUtilsTest.java

License:Apache License

@Override
@Before/* ww  w  .  j a va2s.c  om*/
public void setUp() throws Exception {
    super.setUp();
    dir = newDirectory();
    RandomIndexWriter indexWriter = new RandomIndexWriter(random(), dir);

    FieldType ft = new FieldType(TextField.TYPE_STORED);
    ft.setStoreTermVectors(true);
    ft.setStoreTermVectorOffsets(true);
    ft.setStoreTermVectorPositions(true);

    Document doc;
    for (int i = 0; i < 10; i++) {
        doc = new Document();
        doc.add(new Field("id", Integer.toString(i), ft));
        doc.add(new Field("text",
                random().nextInt(10) + " " + random().nextInt(10) + " " + random().nextInt(10), ft));
        indexWriter.addDocument(doc);
    }

    indexWriter.commit();

    index = indexWriter.getReader();

    indexWriter.close();
}

From source file:com.rocana.lucene.codec.v1.RocanaBasePostingsFormatTestCase.java

License:Apache License

@Override
public void testInvertedWrite() throws Exception {
    Directory dir = newDirectory();/*  w w w .jav  a2 s. co  m*/
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);

    // Must be concurrent because thread(s) can be merging
    // while up to one thread flushes, and each of those
    // threads iterates over the map while the flushing
    // thread might be adding to it:
    final Map<String, TermFreqs> termFreqs = new ConcurrentHashMap<>();

    final AtomicLong sumDocFreq = new AtomicLong();
    final AtomicLong sumTotalTermFreq = new AtomicLong();

    // TODO: would be better to use / delegate to the current
    // Codec returned by getCodec()

    iwc.setCodec(new AssertingCodec() {
        @Override
        public PostingsFormat getPostingsFormatForField(String field) {

            PostingsFormat p = getCodec().postingsFormat();
            if (p instanceof PerFieldPostingsFormat) {
                p = ((PerFieldPostingsFormat) p).getPostingsFormatForField(field);
            }
            if (p instanceof RocanaPerFieldPostingsFormat) {
                p = ((RocanaPerFieldPostingsFormat) p).getPostingsFormatForField(field);
            }
            final PostingsFormat defaultPostingsFormat = p;

            final Thread mainThread = Thread.currentThread();

            if (field.equals("body")) {

                // A PF that counts up some stats and then in
                // the end we verify the stats match what the
                // final IndexReader says, just to exercise the
                // new freedom of iterating the postings more
                // than once at flush/merge:

                return new PostingsFormat(defaultPostingsFormat.getName()) {

                    @Override
                    public FieldsConsumer fieldsConsumer(final SegmentWriteState state) throws IOException {

                        final FieldsConsumer fieldsConsumer = defaultPostingsFormat.fieldsConsumer(state);

                        return new FieldsConsumer() {
                            @Override
                            public void write(Fields fields) throws IOException {
                                fieldsConsumer.write(fields);

                                boolean isMerge = state.context.context == IOContext.Context.MERGE;

                                // We only use one thread for flushing
                                // in this test:
                                assert isMerge || Thread.currentThread() == mainThread;

                                // We iterate the provided TermsEnum
                                // twice, so we excercise this new freedom
                                // with the inverted API; if
                                // addOnSecondPass is true, we add up
                                // term stats on the 2nd iteration:
                                boolean addOnSecondPass = random().nextBoolean();

                                //System.out.println("write isMerge=" + isMerge + " 2ndPass=" + addOnSecondPass);

                                // Gather our own stats:
                                Terms terms = fields.terms("body");
                                assert terms != null;

                                TermsEnum termsEnum = terms.iterator();
                                PostingsEnum docs = null;
                                while (termsEnum.next() != null) {
                                    BytesRef term = termsEnum.term();
                                    // TODO: also sometimes ask for payloads/offsets?
                                    boolean noPositions = random().nextBoolean();
                                    if (noPositions) {
                                        docs = termsEnum.postings(docs, PostingsEnum.FREQS);
                                    } else {
                                        docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
                                    }
                                    int docFreq = 0;
                                    long totalTermFreq = 0;
                                    while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
                                        docFreq++;
                                        totalTermFreq += docs.freq();
                                        int limit = TestUtil.nextInt(random(), 1, docs.freq());
                                        if (!noPositions) {
                                            for (int i = 0; i < limit; i++) {
                                                docs.nextPosition();
                                            }
                                        }
                                    }

                                    String termString = term.utf8ToString();

                                    // During merge we should only see terms
                                    // we had already seen during a
                                    // previous flush:
                                    assertTrue(isMerge == false || termFreqs.containsKey(termString));

                                    if (isMerge == false) {
                                        if (addOnSecondPass == false) {
                                            TermFreqs tf = termFreqs.get(termString);
                                            if (tf == null) {
                                                tf = new TermFreqs();
                                                termFreqs.put(termString, tf);
                                            }
                                            tf.docFreq += docFreq;
                                            tf.totalTermFreq += totalTermFreq;
                                            sumDocFreq.addAndGet(docFreq);
                                            sumTotalTermFreq.addAndGet(totalTermFreq);
                                        } else if (termFreqs.containsKey(termString) == false) {
                                            // Add placeholder (2nd pass will
                                            // set its counts):
                                            termFreqs.put(termString, new TermFreqs());
                                        }
                                    }
                                }

                                // Also test seeking the TermsEnum:
                                for (String term : termFreqs.keySet()) {
                                    if (termsEnum.seekExact(new BytesRef(term))) {
                                        // TODO: also sometimes ask for payloads/offsets?
                                        boolean noPositions = random().nextBoolean();
                                        if (noPositions) {
                                            docs = termsEnum.postings(docs, PostingsEnum.FREQS);
                                        } else {
                                            docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
                                        }

                                        int docFreq = 0;
                                        long totalTermFreq = 0;
                                        while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
                                            docFreq++;
                                            totalTermFreq += docs.freq();
                                            int limit = TestUtil.nextInt(random(), 1, docs.freq());
                                            if (!noPositions) {
                                                for (int i = 0; i < limit; i++) {
                                                    docs.nextPosition();
                                                }
                                            }
                                        }

                                        if (isMerge == false && addOnSecondPass) {
                                            TermFreqs tf = termFreqs.get(term);
                                            assert tf != null;
                                            tf.docFreq += docFreq;
                                            tf.totalTermFreq += totalTermFreq;
                                            sumDocFreq.addAndGet(docFreq);
                                            sumTotalTermFreq.addAndGet(totalTermFreq);
                                        }

                                        //System.out.println("  term=" + term + " docFreq=" + docFreq + " ttDF=" + termToDocFreq.get(term));
                                        assertTrue(docFreq <= termFreqs.get(term).docFreq);
                                        assertTrue(totalTermFreq <= termFreqs.get(term).totalTermFreq);
                                    }
                                }

                                // Also test seekCeil
                                for (int iter = 0; iter < 10; iter++) {
                                    BytesRef term = new BytesRef(
                                            TestUtil.randomRealisticUnicodeString(random()));
                                    SeekStatus status = termsEnum.seekCeil(term);
                                    if (status == SeekStatus.NOT_FOUND) {
                                        assertTrue(term.compareTo(termsEnum.term()) < 0);
                                    }
                                }
                            }

                            @Override
                            public void close() throws IOException {
                                fieldsConsumer.close();
                            }
                        };
                    }

                    @Override
                    public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
                        return defaultPostingsFormat.fieldsProducer(state);
                    }
                };
            } else {
                return defaultPostingsFormat;
            }
        }
    });

    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);

    LineFileDocs docs = new LineFileDocs(random());
    int bytesToIndex = atLeast(100) * 1024;
    int bytesIndexed = 0;
    while (bytesIndexed < bytesToIndex) {
        Document doc = docs.nextDoc();
        w.addDocument(doc);
        bytesIndexed += RamUsageTester.sizeOf(doc);
    }

    IndexReader r = w.getReader();
    w.close();

    Terms terms = MultiFields.getTerms(r, "body");
    assertEquals(sumDocFreq.get(), terms.getSumDocFreq());
    assertEquals(sumTotalTermFreq.get(), terms.getSumTotalTermFreq());

    TermsEnum termsEnum = terms.iterator();
    long termCount = 0;
    boolean supportsOrds = true;
    while (termsEnum.next() != null) {
        BytesRef term = termsEnum.term();
        assertEquals(termFreqs.get(term.utf8ToString()).docFreq, termsEnum.docFreq());
        assertEquals(termFreqs.get(term.utf8ToString()).totalTermFreq, termsEnum.totalTermFreq());
        if (supportsOrds) {
            long ord;
            try {
                ord = termsEnum.ord();
            } catch (UnsupportedOperationException uoe) {
                supportsOrds = false;
                ord = -1;
            }
            if (ord != -1) {
                assertEquals(termCount, ord);
            }
        }
        termCount++;
    }
    assertEquals(termFreqs.size(), termCount);

    r.close();
    dir.close();
}

From source file:com.sindicetech.siren.search.node.TestNodeFuzzyQuery.java

License:Open Source License

/**
 * MultiTermQuery provides (via attribute) information about which values
 * must be competitive to enter the priority queue.
 *
 * FuzzyQuery optimizes itself around this information, if the attribute
 * is not implemented correctly, there will be problems!
 *///from  w w w.jav  a2  s .c  om
public void testTieBreaker() throws Exception {
    this.addDocument("<a123456>");
    this.addDocument("<c123456>");
    this.addDocument("<d123456>");
    this.addDocument("<e123456>");

    final Directory directory2 = newDirectory();
    final RandomIndexWriter writer2 = newRandomIndexWriter(directory2, analyzer, codec);
    addDocument(writer2, "<a123456>");
    addDocument(writer2, "<b123456>");
    addDocument(writer2, "<b123456>");
    addDocument(writer2, "<b123456>");
    addDocument(writer2, "<c123456>");
    addDocument(writer2, "<f123456>");

    final IndexReader ir1 = writer.getReader();
    final IndexReader ir2 = writer2.getReader();

    final MultiReader mr = new MultiReader(ir1, ir2);
    final IndexSearcher searcher = newSearcher(mr);
    final FuzzyQuery fq = new FuzzyQuery(new Term(DEFAULT_TEST_FIELD, "z123456"), 1, 0, 2, false);
    final TopDocs docs = searcher.search(fq, 2);
    assertEquals(5, docs.totalHits); // 5 docs, from the a and b's

    mr.close();
    ir2.close();
    writer2.close();
    directory2.close();
}

From source file:com.sindicetech.siren.util.SirenTestCase.java

License:Open Source License

protected static IndexReader newIndexReader(final RandomIndexWriter writer) throws IOException {
    // We are wrapping by default the reader into a slow reader, as most of the
    // tests require an atomic reader
    return SlowCompositeReaderWrapper.wrap(writer.getReader());
}

From source file:lia.searching.ConstantScoreQuery.java

License:Apache License

public void testWrapped2Times() throws Exception {
    Directory directory = null;/*from  w  w w .  j av  a2 s.  c o m*/
    IndexReader reader = null;
    IndexSearcher searcher = null;
    try {
        directory = newDirectory();
        RandomIndexWriter writer = new RandomIndexWriter(random(), directory);

        Document doc = new Document();
        doc.add(newStringField("field", "term", Field.Store.NO));
        writer.addDocument(doc);

        reader = writer.getReader();
        writer.close();
        searcher = newSearcher(reader);

        // set a similarity that does not normalize our boost away
        searcher.setSimilarity(new DefaultSimilarity() {
            @Override
            public float queryNorm(float sumOfSquaredWeights) {
                return 1.0f;
            }
        });

        final Query csq1 = new ConstantScoreQuery(new TermQuery(new Term("field", "term")));
        csq1.setBoost(2.0f);
        final Query csq2 = new ConstantScoreQuery(csq1);
        csq2.setBoost(5.0f);

        final BooleanQuery bq = new BooleanQuery();
        bq.add(csq1, BooleanClause.Occur.SHOULD);
        bq.add(csq2, BooleanClause.Occur.SHOULD);

        final Query csqbq = new ConstantScoreQuery(bq);
        csqbq.setBoost(17.0f);

        checkHits(searcher, csq1, csq1.getBoost(), ConstantScoreQuery.ConstantScorer.class.getName(), null);
        checkHits(searcher, csq2, csq2.getBoost(), ConstantScoreQuery.ConstantScorer.class.getName(),
                ConstantScoreQuery.ConstantScorer.class.getName());

        // for the combined BQ, the scorer should always be BooleanScorer's BucketScorer, because our scorer supports out-of order collection!
        final String bucketScorerClass = BooleanScorer.class.getName() + "$BucketScorer";
        checkHits(searcher, bq, csq1.getBoost() + csq2.getBoost(), bucketScorerClass, null);
        checkHits(searcher, csqbq, csqbq.getBoost(), ConstantScoreQuery.ConstantScorer.class.getName(),
                bucketScorerClass);
    } finally {
        if (reader != null)
            reader.close();
        if (directory != null)
            directory.close();
    }
}

From source file:lia.searching.ConstantScoreQuery.java

License:Apache License

public void testConstantScoreQueryAndFilter() throws Exception {
    Directory d = newDirectory();/* w  w w.ja va 2 s. c  o m*/
    RandomIndexWriter w = new RandomIndexWriter(random(), d);
    Document doc = new Document();
    doc.add(newStringField("field", "a", Field.Store.NO));
    w.addDocument(doc);
    doc = new Document();
    doc.add(newStringField("field", "b", Field.Store.NO));
    w.addDocument(doc);
    IndexReader r = w.getReader();
    w.close();

    Filter filterB = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("field", "b"))));
    Query query = new ConstantScoreQuery(filterB);

    IndexSearcher s = newSearcher(r);
    assertEquals(1, s.search(query, filterB, 1).totalHits); // Query for field:b, Filter field:b

    Filter filterA = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("field", "a"))));
    query = new ConstantScoreQuery(filterA);

    assertEquals(0, s.search(query, filterB, 1).totalHits); // Query field:b, Filter field:a

    r.close();
    d.close();
}

From source file:lia.searching.ConstantScoreQuery.java

License:Apache License

public void testQueryWrapperFilter() throws IOException {
    Directory d = newDirectory();//from  w w w  .j  a  v  a 2s  .  co  m
    RandomIndexWriter w = new RandomIndexWriter(random(), d);
    Document doc = new Document();
    doc.add(newStringField("field", "a", Field.Store.NO));
    w.addDocument(doc);
    IndexReader r = w.getReader();
    w.close();

    Filter filter = new QueryWrapperFilter(
            AssertingQuery.wrap(random(), new TermQuery(new Term("field", "a"))));
    IndexSearcher s = newSearcher(r);
    assert s instanceof AssertingIndexSearcher;
    // this used to fail
    s.search(new ConstantScoreQuery(filter), new TotalHitCountCollector());

    // check the rewrite
    Query rewritten = new ConstantScoreQuery(filter).rewrite(r);
    assertTrue(rewritten instanceof ConstantScoreQuery);
    assertTrue(((ConstantScoreQuery) rewritten).getQuery() instanceof AssertingQuery);

    r.close();
    d.close();
}