Example usage for org.apache.lucene.search.spans Spans advance

Introduction

In this page you can find the example usage for org.apache.lucene.search.spans Spans advance.

Prototype

public abstract int advance(int target) throws IOException;

Source Link

Document

Advances to the first beyond the current whose document number is greater than or equal to target, and returns the document number itself.

Usage

From source file:it.cnr.ilc.lc.clavius.search.Tester.java

private static void searchWithContext(String term) {

    try {/*from w  w  w .  j  a v a2 s .  co m*/
        logger.info("searchWithContext(" + term + ")");
        SpanQuery spanQuery = new SpanTermQuery(new Term("content", term));
        Directory indexDirectory = FSDirectory.open(
                Paths.get("/var/lucene/claviusTest/indexes/it.cnr.ilc.lc.clavius.search.entity.PlainText"));
        DirectoryReader indexReader = DirectoryReader.open(indexDirectory);
        IndexSearcher searcher = new IndexSearcher(indexReader);
        IndexReader reader = searcher.getIndexReader();
        //spanQuery = (SpanQuery) spanQuery.rewrite(reader);
        //SpanWeight weight = (SpanWeight) searcher.createWeight(spanQuery, false);
        Spans spans = spanQuery.createWeight(searcher, false)
                .getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS);
        //            Spans spans2 = weight.getSpans(reader.leaves().get(0),
        //                    SpanWeight.Postings.OFFSETS);
        //Spans spans = weight.getSpans(reader.leaves().get(0), SpanWeight.Postings.POSITIONS);
        ScoreDoc[] sc = searcher.search(spanQuery, 10).scoreDocs;

        logger.info("hits :" + sc.length);

        int i;
        if (null != spans) {
            //                while ((nextDoc = spans.nextDoc()) != Spans.NO_MORE_DOCS) {
            for (int k = 0; k < sc.length; k++) {
                int docId = sc[k].doc;
                logger.info("docID: " + docId);
                int newDocID = spans.advance(docId);
                logger.info("newDocID: " + newDocID);

                int nextSpan = -1;
                while ((nextSpan = spans.nextStartPosition()) != Spans.NO_MORE_POSITIONS) {
                    logger.info("nextSpan             : " + nextSpan);
                    logger.info("spans.startPosition(): " + spans.startPosition());
                    logger.info("spans.endPosition()  : " + spans.endPosition());
                    logger.info("spans.width()        : " + spans.width());

                    Fields fields = reader.getTermVectors(docId);
                    Terms terms = fields.terms("content");

                    TermsEnum termsEnum = terms.iterator();
                    BytesRef text;
                    PostingsEnum postingEnum = null;
                    int start = spans.startPosition() - 3;
                    int end = spans.endPosition() + 3;
                    while ((text = termsEnum.next()) != null) {
                        //could store the BytesRef here, but String is easier for this example
                        String s = new String(text.bytes, text.offset, text.length);
                        //                DocsAndPositionsEnum positionsEnum = termsEnum.docsAndPositions(null, null);
                        postingEnum = termsEnum.postings(postingEnum);
                        if (postingEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                            i = 0;
                            int position = -1;
                            while (i < postingEnum.freq() && (position = postingEnum.nextPosition()) != -1) {
                                if (position >= start && position <= end) {
                                    logger.info("pos: " + position + ", term: " + s + " offset: " + text.offset
                                            + " length: " + text.length);
                                }
                                i++;
                            }

                        }

                    }
                }
            }
        } else {
            logger.info("no " + term + " found!");
        }
    } catch (IOException e) {
        logger.error(e.getMessage());
    }
    logger.info("End.");
}

From source file:nl.inl.blacklab.search.lucene.TestSpansTagsPayload.java

License:Apache License

@Test
public void testSkip() throws IOException {
    int[] aDoc = { 1, 1, 2, 2 };
    int[] aStart = { 2, 4, 12, 14 };
    int[] aEnd = { 5, 7, 17, 15 };
    Spans a = MockSpans.withEndInPayload(aDoc, aStart, aEnd);

    Spans spans = new SpansTagsPayload(a);
    spans.advance(2);

    int[] expDoc = { 2, 2 };
    int[] expStart = { 12, 14 };
    int[] expEnd = { 17, 15 };
    Spans exp = new MockSpans(expDoc, expStart, expEnd);
    TestUtil.assertEquals(exp, spans, true);
}

From source file:uk.co.flax.luwak.util.SpanExtractor.java

License:Apache License

/**
 * Collect all Spans extracted from a Scorer using a SpanCollector
 * @param scorer the scorer to extract Spans from
 * @param collector the SpanCollector//from   w  w  w .  j a va2 s .  c o  m
 * @param errorOnNoSpans if true, throw an error if no Spans can be extracted
 *                       from the Scorer or any of its children
 * @throws IOException on error
 */
public static void collect(Scorer scorer, SpanCollector collector, boolean errorOnNoSpans) throws IOException {

    List<Spans> allSpans = getSpans(scorer, errorOnNoSpans);
    int doc = scorer.docID();

    for (Spans spans : allSpans) {
        int spanDoc = spans.docID();
        // if the Scorer advances lazily, then not all of its subspans may be on
        // the correct document
        if (spanDoc == doc || (spanDoc < doc && spans.advance(doc) == doc)) {
            while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
                spans.collect(collector);
            }
        }
    }

}