Example usage for org.apache.lucene.search.spans Spans advance

List of usage examples for org.apache.lucene.search.spans Spans advance

Introduction

In this page you can find the example usage for org.apache.lucene.search.spans Spans advance.

Prototype

public abstract int advance(int target) throws IOException;

Source Link

Document

Advances to the first beyond the current whose document number is greater than or equal to target, and returns the document number itself.

Usage

From source file:it.cnr.ilc.lc.clavius.search.Tester.java

private static void searchWithContext(String term) {

    try {/*from w  w  w .  j  a v a2 s .  co m*/
        logger.info("searchWithContext(" + term + ")");
        SpanQuery spanQuery = new SpanTermQuery(new Term("content", term));
        Directory indexDirectory = FSDirectory.open(
                Paths.get("/var/lucene/claviusTest/indexes/it.cnr.ilc.lc.clavius.search.entity.PlainText"));
        DirectoryReader indexReader = DirectoryReader.open(indexDirectory);
        IndexSearcher searcher = new IndexSearcher(indexReader);
        IndexReader reader = searcher.getIndexReader();
        //spanQuery = (SpanQuery) spanQuery.rewrite(reader);
        //SpanWeight weight = (SpanWeight) searcher.createWeight(spanQuery, false);
        Spans spans = spanQuery.createWeight(searcher, false)
                .getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS);
        //            Spans spans2 = weight.getSpans(reader.leaves().get(0),
        //                    SpanWeight.Postings.OFFSETS);
        //Spans spans = weight.getSpans(reader.leaves().get(0), SpanWeight.Postings.POSITIONS);
        ScoreDoc[] sc = searcher.search(spanQuery, 10).scoreDocs;

        logger.info("hits :" + sc.length);

        int i;
        if (null != spans) {
            //                while ((nextDoc = spans.nextDoc()) != Spans.NO_MORE_DOCS) {
            for (int k = 0; k < sc.length; k++) {
                int docId = sc[k].doc;
                logger.info("docID: " + docId);
                int newDocID = spans.advance(docId);
                logger.info("newDocID: " + newDocID);

                int nextSpan = -1;
                while ((nextSpan = spans.nextStartPosition()) != Spans.NO_MORE_POSITIONS) {
                    logger.info("nextSpan             : " + nextSpan);
                    logger.info("spans.startPosition(): " + spans.startPosition());
                    logger.info("spans.endPosition()  : " + spans.endPosition());
                    logger.info("spans.width()        : " + spans.width());

                    Fields fields = reader.getTermVectors(docId);
                    Terms terms = fields.terms("content");

                    TermsEnum termsEnum = terms.iterator();
                    BytesRef text;
                    PostingsEnum postingEnum = null;
                    int start = spans.startPosition() - 3;
                    int end = spans.endPosition() + 3;
                    while ((text = termsEnum.next()) != null) {
                        //could store the BytesRef here, but String is easier for this example
                        String s = new String(text.bytes, text.offset, text.length);
                        //                DocsAndPositionsEnum positionsEnum = termsEnum.docsAndPositions(null, null);
                        postingEnum = termsEnum.postings(postingEnum);
                        if (postingEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                            i = 0;
                            int position = -1;
                            while (i < postingEnum.freq() && (position = postingEnum.nextPosition()) != -1) {
                                if (position >= start && position <= end) {
                                    logger.info("pos: " + position + ", term: " + s + " offset: " + text.offset
                                            + " length: " + text.length);
                                }
                                i++;
                            }

                        }

                    }
                }
            }
        } else {
            logger.info("no " + term + " found!");
        }
    } catch (IOException e) {
        logger.error(e.getMessage());
    }
    logger.info("End.");
}

From source file:nl.inl.blacklab.search.lucene.TestSpansTagsPayload.java

License:Apache License

@Test
public void testSkip() throws IOException {
    int[] aDoc = { 1, 1, 2, 2 };
    int[] aStart = { 2, 4, 12, 14 };
    int[] aEnd = { 5, 7, 17, 15 };
    Spans a = MockSpans.withEndInPayload(aDoc, aStart, aEnd);

    Spans spans = new SpansTagsPayload(a);
    spans.advance(2);

    int[] expDoc = { 2, 2 };
    int[] expStart = { 12, 14 };
    int[] expEnd = { 17, 15 };
    Spans exp = new MockSpans(expDoc, expStart, expEnd);
    TestUtil.assertEquals(exp, spans, true);
}

From source file:uk.co.flax.luwak.util.SpanExtractor.java

License:Apache License

/**
 * Collect all Spans extracted from a Scorer using a SpanCollector
 * @param scorer the scorer to extract Spans from
 * @param collector the SpanCollector//from   w  w  w .  j a va2 s .  c o  m
 * @param errorOnNoSpans if true, throw an error if no Spans can be extracted
 *                       from the Scorer or any of its children
 * @throws IOException on error
 */
public static void collect(Scorer scorer, SpanCollector collector, boolean errorOnNoSpans) throws IOException {

    List<Spans> allSpans = getSpans(scorer, errorOnNoSpans);
    int doc = scorer.docID();

    for (Spans spans : allSpans) {
        int spanDoc = spans.docID();
        // if the Scorer advances lazily, then not all of its subspans may be on
        // the correct document
        if (spanDoc == doc || (spanDoc < doc && spans.advance(doc) == doc)) {
            while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
                spans.collect(collector);
            }
        }
    }

}