Example usage for org.apache.lucene.index PostingsEnum POSITIONS

List of usage examples for org.apache.lucene.index PostingsEnum POSITIONS

Introduction

In this page you can find the example usage for org.apache.lucene.index PostingsEnum POSITIONS.

Prototype

short POSITIONS

To view the source code for org.apache.lucene.index PostingsEnum POSITIONS.

Click Source Link

Document

Flag to pass to TermsEnum#postings(PostingsEnum,int) if you require term positions in the returned enum.

Usage

From source file:com.rocana.lucene.codec.v1.RocanaBasePostingsFormatTestCase.java

License:Apache License

@Override
public void testInvertedWrite() throws Exception {
    Directory dir = newDirectory();/*from w  w  w. ja v  a2 s  .  c o  m*/
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);

    // Must be concurrent because thread(s) can be merging
    // while up to one thread flushes, and each of those
    // threads iterates over the map while the flushing
    // thread might be adding to it:
    final Map<String, TermFreqs> termFreqs = new ConcurrentHashMap<>();

    final AtomicLong sumDocFreq = new AtomicLong();
    final AtomicLong sumTotalTermFreq = new AtomicLong();

    // TODO: would be better to use / delegate to the current
    // Codec returned by getCodec()

    iwc.setCodec(new AssertingCodec() {
        @Override
        public PostingsFormat getPostingsFormatForField(String field) {

            PostingsFormat p = getCodec().postingsFormat();
            if (p instanceof PerFieldPostingsFormat) {
                p = ((PerFieldPostingsFormat) p).getPostingsFormatForField(field);
            }
            if (p instanceof RocanaPerFieldPostingsFormat) {
                p = ((RocanaPerFieldPostingsFormat) p).getPostingsFormatForField(field);
            }
            final PostingsFormat defaultPostingsFormat = p;

            final Thread mainThread = Thread.currentThread();

            if (field.equals("body")) {

                // A PF that counts up some stats and then in
                // the end we verify the stats match what the
                // final IndexReader says, just to exercise the
                // new freedom of iterating the postings more
                // than once at flush/merge:

                return new PostingsFormat(defaultPostingsFormat.getName()) {

                    @Override
                    public FieldsConsumer fieldsConsumer(final SegmentWriteState state) throws IOException {

                        final FieldsConsumer fieldsConsumer = defaultPostingsFormat.fieldsConsumer(state);

                        return new FieldsConsumer() {
                            @Override
                            public void write(Fields fields) throws IOException {
                                fieldsConsumer.write(fields);

                                boolean isMerge = state.context.context == IOContext.Context.MERGE;

                                // We only use one thread for flushing
                                // in this test:
                                assert isMerge || Thread.currentThread() == mainThread;

                                // We iterate the provided TermsEnum
                                // twice, so we excercise this new freedom
                                // with the inverted API; if
                                // addOnSecondPass is true, we add up
                                // term stats on the 2nd iteration:
                                boolean addOnSecondPass = random().nextBoolean();

                                //System.out.println("write isMerge=" + isMerge + " 2ndPass=" + addOnSecondPass);

                                // Gather our own stats:
                                Terms terms = fields.terms("body");
                                assert terms != null;

                                TermsEnum termsEnum = terms.iterator();
                                PostingsEnum docs = null;
                                while (termsEnum.next() != null) {
                                    BytesRef term = termsEnum.term();
                                    // TODO: also sometimes ask for payloads/offsets?
                                    boolean noPositions = random().nextBoolean();
                                    if (noPositions) {
                                        docs = termsEnum.postings(docs, PostingsEnum.FREQS);
                                    } else {
                                        docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
                                    }
                                    int docFreq = 0;
                                    long totalTermFreq = 0;
                                    while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
                                        docFreq++;
                                        totalTermFreq += docs.freq();
                                        int limit = TestUtil.nextInt(random(), 1, docs.freq());
                                        if (!noPositions) {
                                            for (int i = 0; i < limit; i++) {
                                                docs.nextPosition();
                                            }
                                        }
                                    }

                                    String termString = term.utf8ToString();

                                    // During merge we should only see terms
                                    // we had already seen during a
                                    // previous flush:
                                    assertTrue(isMerge == false || termFreqs.containsKey(termString));

                                    if (isMerge == false) {
                                        if (addOnSecondPass == false) {
                                            TermFreqs tf = termFreqs.get(termString);
                                            if (tf == null) {
                                                tf = new TermFreqs();
                                                termFreqs.put(termString, tf);
                                            }
                                            tf.docFreq += docFreq;
                                            tf.totalTermFreq += totalTermFreq;
                                            sumDocFreq.addAndGet(docFreq);
                                            sumTotalTermFreq.addAndGet(totalTermFreq);
                                        } else if (termFreqs.containsKey(termString) == false) {
                                            // Add placeholder (2nd pass will
                                            // set its counts):
                                            termFreqs.put(termString, new TermFreqs());
                                        }
                                    }
                                }

                                // Also test seeking the TermsEnum:
                                for (String term : termFreqs.keySet()) {
                                    if (termsEnum.seekExact(new BytesRef(term))) {
                                        // TODO: also sometimes ask for payloads/offsets?
                                        boolean noPositions = random().nextBoolean();
                                        if (noPositions) {
                                            docs = termsEnum.postings(docs, PostingsEnum.FREQS);
                                        } else {
                                            docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
                                        }

                                        int docFreq = 0;
                                        long totalTermFreq = 0;
                                        while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
                                            docFreq++;
                                            totalTermFreq += docs.freq();
                                            int limit = TestUtil.nextInt(random(), 1, docs.freq());
                                            if (!noPositions) {
                                                for (int i = 0; i < limit; i++) {
                                                    docs.nextPosition();
                                                }
                                            }
                                        }

                                        if (isMerge == false && addOnSecondPass) {
                                            TermFreqs tf = termFreqs.get(term);
                                            assert tf != null;
                                            tf.docFreq += docFreq;
                                            tf.totalTermFreq += totalTermFreq;
                                            sumDocFreq.addAndGet(docFreq);
                                            sumTotalTermFreq.addAndGet(totalTermFreq);
                                        }

                                        //System.out.println("  term=" + term + " docFreq=" + docFreq + " ttDF=" + termToDocFreq.get(term));
                                        assertTrue(docFreq <= termFreqs.get(term).docFreq);
                                        assertTrue(totalTermFreq <= termFreqs.get(term).totalTermFreq);
                                    }
                                }

                                // Also test seekCeil
                                for (int iter = 0; iter < 10; iter++) {
                                    BytesRef term = new BytesRef(
                                            TestUtil.randomRealisticUnicodeString(random()));
                                    SeekStatus status = termsEnum.seekCeil(term);
                                    if (status == SeekStatus.NOT_FOUND) {
                                        assertTrue(term.compareTo(termsEnum.term()) < 0);
                                    }
                                }
                            }

                            @Override
                            public void close() throws IOException {
                                fieldsConsumer.close();
                            }
                        };
                    }

                    @Override
                    public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
                        return defaultPostingsFormat.fieldsProducer(state);
                    }
                };
            } else {
                return defaultPostingsFormat;
            }
        }
    });

    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);

    LineFileDocs docs = new LineFileDocs(random());
    int bytesToIndex = atLeast(100) * 1024;
    int bytesIndexed = 0;
    while (bytesIndexed < bytesToIndex) {
        Document doc = docs.nextDoc();
        w.addDocument(doc);
        bytesIndexed += RamUsageTester.sizeOf(doc);
    }

    IndexReader r = w.getReader();
    w.close();

    Terms terms = MultiFields.getTerms(r, "body");
    assertEquals(sumDocFreq.get(), terms.getSumDocFreq());
    assertEquals(sumTotalTermFreq.get(), terms.getSumTotalTermFreq());

    TermsEnum termsEnum = terms.iterator();
    long termCount = 0;
    boolean supportsOrds = true;
    while (termsEnum.next() != null) {
        BytesRef term = termsEnum.term();
        assertEquals(termFreqs.get(term.utf8ToString()).docFreq, termsEnum.docFreq());
        assertEquals(termFreqs.get(term.utf8ToString()).totalTermFreq, termsEnum.totalTermFreq());
        if (supportsOrds) {
            long ord;
            try {
                ord = termsEnum.ord();
            } catch (UnsupportedOperationException uoe) {
                supportsOrds = false;
                ord = -1;
            }
            if (ord != -1) {
                assertEquals(termCount, ord);
            }
        }
        termCount++;
    }
    assertEquals(termFreqs.size(), termCount);

    r.close();
    dir.close();
}

From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat3.java

License:Apache License

/**
 * checks the terms enum sequentially/*w  ww.j a v  a2  s .  c  o m*/
 * if deep is false, it does a 'shallow' test that doesnt go down to the docsenums
 */
public void assertTermsEnum(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, boolean deep,
        boolean hasPositions) throws Exception {
    BytesRef term;
    PostingsEnum leftPositions = null;
    PostingsEnum rightPositions = null;
    PostingsEnum leftDocs = null;
    PostingsEnum rightDocs = null;

    while ((term = leftTermsEnum.next()) != null) {
        assertEquals(term, rightTermsEnum.next());
        assertTermStats(leftTermsEnum, rightTermsEnum);
        if (deep) {
            if (hasPositions) {
                // with payloads + off
                assertDocsAndPositionsEnum(
                        leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.ALL),
                        rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.ALL));

                assertPositionsSkipping(leftTermsEnum.docFreq(),
                        leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.ALL),
                        rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.ALL));
                // with payloads only
                assertDocsAndPositionsEnum(
                        leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.PAYLOADS),
                        rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.PAYLOADS));

                assertPositionsSkipping(leftTermsEnum.docFreq(),
                        leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.PAYLOADS),
                        rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.PAYLOADS));

                // with offsets only
                assertDocsAndPositionsEnum(
                        leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.OFFSETS),
                        rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.OFFSETS));

                assertPositionsSkipping(leftTermsEnum.docFreq(),
                        leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.OFFSETS),
                        rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.OFFSETS));

                // with positions only
                assertDocsAndPositionsEnum(
                        leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.POSITIONS),
                        rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.POSITIONS));

                assertPositionsSkipping(leftTermsEnum.docFreq(),
                        leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.POSITIONS),
                        rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.POSITIONS));
            }

            // with freqs:
            assertDocsEnum(leftDocs = leftTermsEnum.postings(leftDocs),
                    rightDocs = rightTermsEnum.postings(rightDocs));

            // w/o freqs:
            assertDocsEnum(leftDocs = leftTermsEnum.postings(leftDocs, PostingsEnum.NONE),
                    rightDocs = rightTermsEnum.postings(rightDocs, PostingsEnum.NONE));

            // with freqs:
            assertDocsSkipping(leftTermsEnum.docFreq(), leftDocs = leftTermsEnum.postings(leftDocs),
                    rightDocs = rightTermsEnum.postings(rightDocs));

            // w/o freqs:
            assertDocsSkipping(leftTermsEnum.docFreq(),
                    leftDocs = leftTermsEnum.postings(leftDocs, PostingsEnum.NONE),
                    rightDocs = rightTermsEnum.postings(rightDocs, PostingsEnum.NONE));
        }
    }
    assertNull(rightTermsEnum.next());
}

From source file:nl.inl.blacklab.search.SearcherImpl.java

License:Apache License

@Override
public void getCharacterOffsets(int doc, String fieldName, int[] startsOfWords, int[] endsOfWords,
        boolean fillInDefaultsIfNotFound) {

    if (startsOfWords.length == 0)
        return; // nothing to do
    try {/* www.  jav  a  2 s .c  o m*/
        // Determine lowest and highest word position we'd like to know something about.
        // This saves a little bit of time for large result sets.
        int minP = -1, maxP = -1;
        int numStarts = startsOfWords.length;
        int numEnds = endsOfWords.length;
        for (int i = 0; i < numStarts; i++) {
            if (startsOfWords[i] < minP || minP == -1)
                minP = startsOfWords[i];
            if (startsOfWords[i] > maxP)
                maxP = startsOfWords[i];
        }
        for (int i = 0; i < numEnds; i++) {
            if (endsOfWords[i] < minP || minP == -1)
                minP = endsOfWords[i];
            if (endsOfWords[i] > maxP)
                maxP = endsOfWords[i];
        }
        if (minP < 0 || maxP < 0)
            throw new RuntimeException("Can't determine min and max positions");

        String fieldPropName = ComplexFieldUtil.mainPropertyOffsetsField(indexStructure, fieldName);

        org.apache.lucene.index.Terms terms = reader.getTermVector(doc, fieldPropName);
        if (terms == null)
            throw new IllegalArgumentException(
                    "Field " + fieldPropName + " in doc " + doc + " has no term vector");
        if (!terms.hasPositions())
            throw new IllegalArgumentException(
                    "Field " + fieldPropName + " in doc " + doc + " has no character postion information");

        //int lowestPos = -1, highestPos = -1;
        int lowestPosFirstChar = -1, highestPosLastChar = -1;
        int total = numStarts + numEnds;
        boolean[] done = new boolean[total]; // NOTE: array is automatically initialized to zeroes!
        int found = 0;

        // Iterate over terms
        TermsEnum termsEnum = terms.iterator();
        while (termsEnum.next() != null) {
            PostingsEnum dpe = termsEnum.postings(null, null, PostingsEnum.POSITIONS);

            // Iterate over docs containing this term (NOTE: should be only one doc!)
            while (dpe.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                // Iterate over positions of this term in this doc
                int positionsRead = 0;
                int numberOfPositions = dpe.freq();
                while (positionsRead < numberOfPositions) {
                    int position = dpe.nextPosition();
                    if (position == -1)
                        break;
                    positionsRead++;

                    // Keep track of the lowest and highest char pos, so
                    // we can fill in the character positions we didn't find
                    int startOffset = dpe.startOffset();
                    if (startOffset < lowestPosFirstChar || lowestPosFirstChar == -1) {
                        lowestPosFirstChar = startOffset;
                    }
                    int endOffset = dpe.endOffset();
                    if (endOffset > highestPosLastChar) {
                        highestPosLastChar = endOffset;
                    }

                    // We've calculated the min and max word positions in advance, so
                    // we know we can skip this position if it's outside the range we're interested in.
                    // (Saves a little time for large result sets)
                    if (position < minP || position > maxP) {
                        continue;
                    }

                    for (int m = 0; m < numStarts; m++) {
                        if (!done[m] && position == startsOfWords[m]) {
                            done[m] = true;
                            startsOfWords[m] = startOffset;
                            found++;
                        }
                    }
                    for (int m = 0; m < numEnds; m++) {
                        if (!done[numStarts + m] && position == endsOfWords[m]) {
                            done[numStarts + m] = true;
                            endsOfWords[m] = endOffset;
                            found++;
                        }
                    }

                    // NOTE: we might be tempted to break here if found == total,
                    // but that would foul up our calculation of highestPosLastChar and
                    // lowestPosFirstChar.
                }
            }

        }
        if (found < total) {
            if (!fillInDefaultsIfNotFound)
                throw new RuntimeException("Could not find all character offsets!");

            if (lowestPosFirstChar < 0 || highestPosLastChar < 0)
                throw new RuntimeException("Could not find default char positions!");

            for (int m = 0; m < numStarts; m++) {
                if (!done[m])
                    startsOfWords[m] = lowestPosFirstChar;
            }
            for (int m = 0; m < numEnds; m++) {
                if (!done[numStarts + m])
                    endsOfWords[m] = highestPosLastChar;
            }
        }

    } catch (IOException e) {
        throw ExUtil.wrapRuntimeException(e);
    }
}

From source file:org.alfresco.solr.query.SolrPathScorer.java

License:Open Source License

public static SolrPathScorer createPathScorer(SolrPathQuery solrPathQuery, LeafReaderContext context,
        Weight weight, DictionaryService dictionarySertvice, boolean repeat) throws IOException {

    //        StructuredFieldPosition last = null;
    //        if(solrPathQuery.getPathStructuredFieldPositions().size() > 0)
    //        {//ww w .  j av  a 2s.  c  o m
    //           last = solrPathQuery.getPathStructuredFieldPositions().get(solrPathQuery.getPathStructuredFieldPositions().size() - 1);
    //        }

    if (solrPathQuery.getPathStructuredFieldPositions().size() == 0) {
        ArrayList<StructuredFieldPosition> answer = new ArrayList<StructuredFieldPosition>(2);
        answer.add(new SelfAxisStructuredFieldPosition());
        answer.add(new SelfAxisStructuredFieldPosition());

        solrPathQuery.appendQuery(answer);
    }

    for (StructuredFieldPosition sfp : solrPathQuery.getPathStructuredFieldPositions()) {
        if (sfp.getTermText() != null) {
            PostingsEnum p = context.reader().postings(
                    new Term(solrPathQuery.getPathField(), sfp.getTermText()), PostingsEnum.POSITIONS);
            if (p == null)
                return null;
            CachingTermPositions ctp = new CachingTermPositions(p);
            sfp.setCachingTermPositions(ctp);
        }
    }

    SolrContainerScorer cs = null;

    PostingsEnum rootContainerPositions = null;
    if (solrPathQuery.getPathRootTerm() != null) {
        rootContainerPositions = context.reader().postings(solrPathQuery.getPathRootTerm(),
                PostingsEnum.POSITIONS);
    }

    if (solrPathQuery.getPathStructuredFieldPositions().size() > 0) {
        cs = new SolrContainerScorer(weight, rootContainerPositions, (StructuredFieldPosition[]) solrPathQuery
                .getPathStructuredFieldPositions().toArray(new StructuredFieldPosition[] {}));
    }

    return new SolrPathScorer(weight, cs);
}

From source file:org.elasticsearch.action.termvectors.AbstractTermVectorsTestCase.java

License:Apache License

protected void validateResponse(TermVectorsResponse esResponse, Fields luceneFields, TestConfig testConfig)
        throws IOException {
    assertThat(esResponse.getIndex(), equalTo(testConfig.doc.index));
    TestDoc testDoc = testConfig.doc;//from  w w  w.  j  a  va 2 s  .c om
    HashSet<String> selectedFields = testConfig.selectedFields == null ? null
            : new HashSet<>(Arrays.asList(testConfig.selectedFields));
    Fields esTermVectorFields = esResponse.getFields();
    for (TestFieldSetting field : testDoc.fieldSettings) {
        Terms esTerms = esTermVectorFields.terms(field.name);
        if (selectedFields != null && !selectedFields.contains(field.name)) {
            assertNull(esTerms);
            continue;
        }

        assertNotNull(esTerms);

        Terms luceneTerms = luceneFields.terms(field.name);
        TermsEnum esTermEnum = esTerms.iterator();
        TermsEnum luceneTermEnum = luceneTerms.iterator();

        while (esTermEnum.next() != null) {
            assertNotNull(luceneTermEnum.next());

            assertThat(esTermEnum.totalTermFreq(), equalTo(luceneTermEnum.totalTermFreq()));
            PostingsEnum esDocsPosEnum = esTermEnum.postings(null, PostingsEnum.POSITIONS);
            PostingsEnum luceneDocsPosEnum = luceneTermEnum.postings(null, PostingsEnum.POSITIONS);
            if (luceneDocsPosEnum == null) {
                // test we expect that...
                assertFalse(field.storedOffset);
                assertFalse(field.storedPayloads);
                assertFalse(field.storedPositions);
                continue;
            }

            String currentTerm = esTermEnum.term().utf8ToString();

            assertThat("Token mismatch for field: " + field.name, currentTerm,
                    equalTo(luceneTermEnum.term().utf8ToString()));

            esDocsPosEnum.nextDoc();
            luceneDocsPosEnum.nextDoc();

            int freq = esDocsPosEnum.freq();
            assertThat(freq, equalTo(luceneDocsPosEnum.freq()));
            for (int i = 0; i < freq; i++) {
                String failDesc = " (field:" + field.name + " term:" + currentTerm + ")";
                int lucenePos = luceneDocsPosEnum.nextPosition();
                int esPos = esDocsPosEnum.nextPosition();
                if (field.storedPositions && testConfig.requestPositions) {
                    assertThat("Position test failed" + failDesc, lucenePos, equalTo(esPos));
                } else {
                    assertThat("Missing position test failed" + failDesc, esPos, equalTo(-1));
                }
                if (field.storedOffset && testConfig.requestOffsets) {
                    assertThat("Offset test failed" + failDesc, luceneDocsPosEnum.startOffset(),
                            equalTo(esDocsPosEnum.startOffset()));
                    assertThat("Offset test failed" + failDesc, luceneDocsPosEnum.endOffset(),
                            equalTo(esDocsPosEnum.endOffset()));
                } else {
                    assertThat("Missing offset test failed" + failDesc, esDocsPosEnum.startOffset(),
                            equalTo(-1));
                    assertThat("Missing offset test failed" + failDesc, esDocsPosEnum.endOffset(), equalTo(-1));
                }
                if (field.storedPayloads && testConfig.requestPayloads) {
                    assertThat("Payload test failed" + failDesc, luceneDocsPosEnum.getPayload(),
                            equalTo(esDocsPosEnum.getPayload()));
                } else {
                    assertThat("Missing payload test failed" + failDesc, esDocsPosEnum.getPayload(),
                            equalTo(null));
                }
            }
        }
        assertNull("Es returned terms are done but lucene isn't", luceneTermEnum.next());
    }
}

From source file:org.elasticsearch.action.termvectors.AbstractTermVectorsTests.java

License:Apache License

protected void validateResponse(TermVectorsResponse esResponse, Fields luceneFields, TestConfig testConfig)
        throws IOException {
    assertThat(esResponse.getIndex(), equalTo(testConfig.doc.index));
    TestDoc testDoc = testConfig.doc;//  www.j  ava  2s  . co m
    HashSet<String> selectedFields = testConfig.selectedFields == null ? null
            : new HashSet<>(Arrays.asList(testConfig.selectedFields));
    Fields esTermVectorFields = esResponse.getFields();
    for (TestFieldSetting field : testDoc.fieldSettings) {
        Terms esTerms = esTermVectorFields.terms(field.name);
        if (selectedFields != null && !selectedFields.contains(field.name)) {
            assertNull(esTerms);
            continue;
        }

        assertNotNull(esTerms);

        Terms luceneTerms = luceneFields.terms(field.name);
        TermsEnum esTermEnum = esTerms.iterator();
        TermsEnum luceneTermEnum = luceneTerms.iterator();

        while (esTermEnum.next() != null) {
            assertNotNull(luceneTermEnum.next());

            assertThat(esTermEnum.totalTermFreq(), equalTo(luceneTermEnum.totalTermFreq()));
            PostingsEnum esDocsPosEnum = esTermEnum.postings(null, null, PostingsEnum.POSITIONS);
            PostingsEnum luceneDocsPosEnum = luceneTermEnum.postings(null, null, PostingsEnum.POSITIONS);
            if (luceneDocsPosEnum == null) {
                // test we expect that...
                assertFalse(field.storedOffset);
                assertFalse(field.storedPayloads);
                assertFalse(field.storedPositions);
                continue;
            }

            String currentTerm = esTermEnum.term().utf8ToString();

            assertThat("Token mismatch for field: " + field.name, currentTerm,
                    equalTo(luceneTermEnum.term().utf8ToString()));

            esDocsPosEnum.nextDoc();
            luceneDocsPosEnum.nextDoc();

            int freq = esDocsPosEnum.freq();
            assertThat(freq, equalTo(luceneDocsPosEnum.freq()));
            for (int i = 0; i < freq; i++) {
                String failDesc = " (field:" + field.name + " term:" + currentTerm + ")";
                int lucenePos = luceneDocsPosEnum.nextPosition();
                int esPos = esDocsPosEnum.nextPosition();
                if (field.storedPositions && testConfig.requestPositions) {
                    assertThat("Position test failed" + failDesc, lucenePos, equalTo(esPos));
                } else {
                    assertThat("Missing position test failed" + failDesc, esPos, equalTo(-1));
                }
                if (field.storedOffset && testConfig.requestOffsets) {
                    assertThat("Offset test failed" + failDesc, luceneDocsPosEnum.startOffset(),
                            equalTo(esDocsPosEnum.startOffset()));
                    assertThat("Offset test failed" + failDesc, luceneDocsPosEnum.endOffset(),
                            equalTo(esDocsPosEnum.endOffset()));
                } else {
                    assertThat("Missing offset test failed" + failDesc, esDocsPosEnum.startOffset(),
                            equalTo(-1));
                    assertThat("Missing offset test failed" + failDesc, esDocsPosEnum.endOffset(), equalTo(-1));
                }
                if (field.storedPayloads && testConfig.requestPayloads) {
                    assertThat("Payload test failed" + failDesc, luceneDocsPosEnum.getPayload(),
                            equalTo(esDocsPosEnum.getPayload()));
                } else {
                    assertThat("Missing payload test failed" + failDesc, esDocsPosEnum.getPayload(),
                            equalTo(null));
                }
            }
        }
        assertNull("Es returned terms are done but lucene isn't", luceneTermEnum.next());
    }
}

From source file:org.elasticsearch.index.mapper.core.TextFieldMapperTests.java

License:Apache License

public void testDefaultPositionIncrementGap() throws IOException {
    String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties")
            .startObject("field").field("type", "text").endObject().endObject().endObject().endObject()
            .string();//from ww w .j  a v  a  2s  . co m

    DocumentMapper mapper = indexService.mapperService().merge("type", new CompressedXContent(mapping),
            MergeReason.MAPPING_UPDATE, false);

    assertEquals(mapping, mapper.mappingSource().toString());

    ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder().startObject()
            .field("field", new String[] { "a", "b" }).endObject().bytes());

    IndexableField[] fields = doc.rootDoc().getFields("field");
    assertEquals(2, fields.length);

    assertEquals("a", fields[0].stringValue());
    assertEquals("b", fields[1].stringValue());

    IndexShard shard = indexService.getShard(0);
    shard.index(new Engine.Index(new Term("_uid", "1"), doc));
    shard.refresh("test");
    try (Engine.Searcher searcher = shard.acquireSearcher("test")) {
        LeafReader leaf = searcher.getDirectoryReader().leaves().get(0).reader();
        TermsEnum terms = leaf.terms("field").iterator();
        assertTrue(terms.seekExact(new BytesRef("b")));
        PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
        assertEquals(0, postings.nextDoc());
        assertEquals(TextFieldMapper.Defaults.POSITION_INCREMENT_GAP + 1, postings.nextPosition());
    }
}

From source file:org.elasticsearch.index.mapper.core.TextFieldMapperTests.java

License:Apache License

public void testPositionIncrementGap() throws IOException {
    final int positionIncrementGap = randomIntBetween(1, 1000);
    String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties")
            .startObject("field").field("type", "text").field("position_increment_gap", positionIncrementGap)
            .endObject().endObject().endObject().endObject().string();

    DocumentMapper mapper = indexService.mapperService().merge("type", new CompressedXContent(mapping),
            MergeReason.MAPPING_UPDATE, false);

    assertEquals(mapping, mapper.mappingSource().toString());

    ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder().startObject()
            .field("field", new String[] { "a", "b" }).endObject().bytes());

    IndexableField[] fields = doc.rootDoc().getFields("field");
    assertEquals(2, fields.length);/*ww  w.  java  2  s.co m*/

    assertEquals("a", fields[0].stringValue());
    assertEquals("b", fields[1].stringValue());

    IndexShard shard = indexService.getShard(0);
    shard.index(new Engine.Index(new Term("_uid", "1"), doc));
    shard.refresh("test");
    try (Engine.Searcher searcher = shard.acquireSearcher("test")) {
        LeafReader leaf = searcher.getDirectoryReader().leaves().get(0).reader();
        TermsEnum terms = leaf.terms("field").iterator();
        assertTrue(terms.seekExact(new BytesRef("b")));
        PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
        assertEquals(0, postings.nextDoc());
        assertEquals(positionIncrementGap + 1, postings.nextPosition());
    }
}

From source file:org.opengrok.suggest.SuggesterSearcher.java

License:Open Source License

private List<LookupResultItem> suggest(final Query query, final LeafReaderContext leafReaderContext,
        final String project, final SuggesterQuery suggesterQuery, final PopularityCounter searchCounts)
        throws IOException {
    if (Thread.currentThread().isInterrupted()) {
        interrupted = true;//from  www.j  av  a 2  s  . com
        return Collections.emptyList();
    }

    boolean shouldLeaveOutSameTerms = shouldLeaveOutSameTerms(query, suggesterQuery);
    Set<BytesRef> tokensAlreadyIncluded = null;
    if (shouldLeaveOutSameTerms) {
        tokensAlreadyIncluded = SuggesterUtils.intoTermsExceptPhraseQuery(query).stream()
                .filter(t -> t.field().equals(suggesterQuery.getField())).map(Term::bytes)
                .collect(Collectors.toSet());
    }

    boolean needsDocumentIds = query != null && !(query instanceof MatchAllDocsQuery);

    ComplexQueryData complexQueryData = null;
    if (needsDocumentIds) {
        complexQueryData = getComplexQueryData(query, leafReaderContext);
        if (interrupted) {
            return Collections.emptyList();
        }
    }

    Terms terms = leafReaderContext.reader().terms(suggesterQuery.getField());

    TermsEnum termsEnum = suggesterQuery.getTermsEnumForSuggestions(terms);

    LookupPriorityQueue queue = new LookupPriorityQueue(resultSize);

    boolean needPositionsAndFrequencies = needPositionsAndFrequencies(query);

    PostingsEnum postingsEnum = null;

    BytesRef term = termsEnum.next();
    while (term != null) {
        if (Thread.currentThread().isInterrupted()) {
            interrupted = true;
            break;
        }

        if (needPositionsAndFrequencies) {
            postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.POSITIONS | PostingsEnum.FREQS);
        } else {
            postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
        }

        int score;
        if (!needsDocumentIds) {
            score = normalizeDocumentFrequency(termsEnum.docFreq(), numDocs);
        } else if (needPositionsAndFrequencies) {
            score = getPhraseScore(complexQueryData, leafReaderContext.docBase, postingsEnum);
        } else {
            score = getDocumentFrequency(complexQueryData.documentIds, leafReaderContext.docBase, postingsEnum);
        }

        if (score > 0) {
            if (!shouldLeaveOutSameTerms || !tokensAlreadyIncluded.contains(term)) {
                score += searchCounts.get(term) * TERM_ALREADY_SEARCHED_MULTIPLIER;

                if (queue.canInsert(score)) {
                    queue.insertWithOverflow(new LookupResultItem(term.utf8ToString(), project, score));
                }
            }
        }

        term = termsEnum.next();
    }

    return queue.getResult();
}

From source file:tw.com.kyle.luminance.LumQuery.java

public void ListTerm(int docId) throws IOException {

    Terms terms = idx_reader.getTermVector(docId, "content");
    TermsEnum term_enum = terms.iterator();
    while (term_enum.next() != null) {
        System.out.printf("%s", term_enum.term().utf8ToString());
        PostingsEnum post_enum = term_enum.postings(null, PostingsEnum.POSITIONS);
        post_enum.nextDoc();
        int freq = post_enum.freq();
        System.out.printf("%d: ", freq);
        for (int i = 0; i < freq; ++i) {
            System.out.printf("%d, ", post_enum.nextPosition());
        }//  w w w  .ja  va  2  s  .c  o m
        System.out.printf("%n");
    }
}