Example usage for org.apache.lucene.index Fields terms

List of usage examples for org.apache.lucene.index Fields terms

Introduction

In this page you can find the example usage for org.apache.lucene.index Fields terms.

Prototype

public abstract Terms terms(String field) throws IOException;

Source Link

Document

Get the Terms for this field.

Usage

From source file:org.elasticsearch.action.termvector.GetTermVectorTests.java

License:Apache License

@Test
public void testRandomPayloadWithDelimitedPayloadTokenFilter() throws ElasticsearchException, IOException {

    //create the test document
    int encoding = randomIntBetween(0, 2);
    String encodingString = "";
    if (encoding == 0) {
        encodingString = "float";
    }/*ww w.j a v  a 2 s .c  om*/
    if (encoding == 1) {
        encodingString = "int";
    }
    if (encoding == 2) {
        encodingString = "identity";
    }
    String[] tokens = crateRandomTokens();
    Map<String, List<BytesRef>> payloads = createPayloads(tokens, encoding);
    String delimiter = createRandomDelimiter(tokens);
    String queryString = createString(tokens, payloads, encoding, delimiter.charAt(0));
    //create the mapping
    XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
            .startObject("properties").startObject("field").field("type", "string")
            .field("term_vector", "with_positions_offsets_payloads").field("analyzer", "payload_test")
            .endObject().endObject().endObject().endObject();
    ElasticsearchAssertions.assertAcked(prepareCreate("test").addMapping("type1", mapping)
            .setSettings(ImmutableSettings.settingsBuilder()
                    .put("index.analysis.analyzer.payload_test.tokenizer", "whitespace")
                    .putArray("index.analysis.analyzer.payload_test.filter", "my_delimited_payload_filter")
                    .put("index.analysis.filter.my_delimited_payload_filter.delimiter", delimiter)
                    .put("index.analysis.filter.my_delimited_payload_filter.encoding", encodingString)
                    .put("index.analysis.filter.my_delimited_payload_filter.type",
                            "delimited_payload_filter")));
    ensureYellow();

    client().prepareIndex("test", "type1", Integer.toString(1))
            .setSource(XContentFactory.jsonBuilder().startObject().field("field", queryString).endObject())
            .execute().actionGet();
    refresh();
    TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(1))
            .setPayloads(true).setOffsets(true).setPositions(true).setSelectedFields();
    TermVectorResponse response = resp.execute().actionGet();
    assertThat("doc id 1 doesn't exists but should", response.isExists(), equalTo(true));
    Fields fields = response.getFields();
    assertThat(fields.size(), equalTo(1));
    Terms terms = fields.terms("field");
    TermsEnum iterator = terms.iterator(null);
    while (iterator.next() != null) {
        String term = iterator.term().utf8ToString();
        DocsAndPositionsEnum docsAndPositions = iterator.docsAndPositions(null, null);
        assertThat(docsAndPositions.nextDoc(), equalTo(0));
        List<BytesRef> curPayloads = payloads.get(term);
        assertThat(term, curPayloads, Matchers.notNullValue());
        assertNotNull(docsAndPositions);
        for (int k = 0; k < docsAndPositions.freq(); k++) {
            docsAndPositions.nextPosition();
            if (docsAndPositions.getPayload() != null) {
                String infoString = "\nterm: " + term + " has payload \n"
                        + docsAndPositions.getPayload().toString() + "\n but should have payload \n"
                        + curPayloads.get(k).toString();
                assertThat(infoString, docsAndPositions.getPayload(), equalTo(curPayloads.get(k)));
            } else {
                String infoString = "\nterm: " + term + " has no payload but should have payload \n"
                        + curPayloads.get(k).toString();
                assertThat(infoString, curPayloads.get(k).length, equalTo(0));
            }
        }
    }
    assertThat(iterator.next(), Matchers.nullValue());
}

From source file:org.elasticsearch.action.termvector.TermVectorResponse.java

License:Apache License

private void buildField(XContentBuilder builder, final CharsRef spare, Fields theFields,
        Iterator<String> fieldIter) throws IOException {
    String fieldName = fieldIter.next();
    builder.startObject(fieldName);/*from   w ww.  j a  v a 2  s  .co  m*/
    Terms curTerms = theFields.terms(fieldName);
    // write field statistics
    buildFieldStatistics(builder, curTerms);
    builder.startObject(FieldStrings.TERMS);
    TermsEnum termIter = curTerms.iterator(null);
    for (int i = 0; i < curTerms.size(); i++) {
        buildTerm(builder, spare, curTerms, termIter);
    }
    builder.endObject();
    builder.endObject();
}

From source file:org.elasticsearch.action.termvector.TermVectorUnitTests.java

License:Apache License

private void checkIfStandardTermVector(TermVectorResponse inResponse) throws IOException {

    Fields fields = inResponse.getFields();
    assertThat(fields.terms("title"), Matchers.notNullValue());
    assertThat(fields.terms("desc"), Matchers.notNullValue());
    assertThat(fields.size(), equalTo(2));
}

From source file:org.elasticsearch.action.termvector.TermVectorWriter.java

License:Apache License

void setFields(Fields termVectorsByField, Set<String> selectedFields, EnumSet<Flag> flags,
        Fields topLevelFields) throws IOException {

    int numFieldsWritten = 0;
    TermsEnum iterator = null;/* w  w  w. ja  v  a 2s  . c o  m*/
    DocsAndPositionsEnum docsAndPosEnum = null;
    DocsEnum docsEnum = null;
    TermsEnum topLevelIterator = null;
    for (String field : termVectorsByField) {
        if ((selectedFields != null) && (!selectedFields.contains(field))) {
            continue;
        }

        Terms fieldTermVector = termVectorsByField.terms(field);
        Terms topLevelTerms = topLevelFields.terms(field);

        topLevelIterator = topLevelTerms.iterator(topLevelIterator);
        boolean positions = flags.contains(Flag.Positions) && fieldTermVector.hasPositions();
        boolean offsets = flags.contains(Flag.Offsets) && fieldTermVector.hasOffsets();
        boolean payloads = flags.contains(Flag.Payloads) && fieldTermVector.hasPayloads();
        startField(field, fieldTermVector.size(), positions, offsets, payloads);
        if (flags.contains(Flag.FieldStatistics)) {
            writeFieldStatistics(topLevelTerms);
        }
        iterator = fieldTermVector.iterator(iterator);
        final boolean useDocsAndPos = positions || offsets || payloads;
        while (iterator.next() != null) { // iterate all terms of the
            // current field
            // get the doc frequency
            BytesRef term = iterator.term();
            boolean foundTerm = topLevelIterator.seekExact(term);
            assert (foundTerm);
            startTerm(term);
            if (flags.contains(Flag.TermStatistics)) {
                writeTermStatistics(topLevelIterator);
            }
            if (useDocsAndPos) {
                // given we have pos or offsets
                docsAndPosEnum = writeTermWithDocsAndPos(iterator, docsAndPosEnum, positions, offsets,
                        payloads);
            } else {
                // if we do not have the positions stored, we need to
                // get the frequency from a DocsEnum.
                docsEnum = writeTermWithDocsOnly(iterator, docsEnum);
            }
        }
        numFieldsWritten++;
    }
    response.setTermVectorField(output);
    response.setHeader(writeHeader(numFieldsWritten, flags.contains(Flag.TermStatistics),
            flags.contains(Flag.FieldStatistics)));
}

From source file:org.elasticsearch.action.termvectors.AbstractTermVectorsTestCase.java

License:Apache License

protected void validateResponse(TermVectorsResponse esResponse, Fields luceneFields, TestConfig testConfig)
        throws IOException {
    assertThat(esResponse.getIndex(), equalTo(testConfig.doc.index));
    TestDoc testDoc = testConfig.doc;/* ww  w  .  j av a  2  s.  c  om*/
    HashSet<String> selectedFields = testConfig.selectedFields == null ? null
            : new HashSet<>(Arrays.asList(testConfig.selectedFields));
    Fields esTermVectorFields = esResponse.getFields();
    for (TestFieldSetting field : testDoc.fieldSettings) {
        Terms esTerms = esTermVectorFields.terms(field.name);
        if (selectedFields != null && !selectedFields.contains(field.name)) {
            assertNull(esTerms);
            continue;
        }

        assertNotNull(esTerms);

        Terms luceneTerms = luceneFields.terms(field.name);
        TermsEnum esTermEnum = esTerms.iterator();
        TermsEnum luceneTermEnum = luceneTerms.iterator();

        while (esTermEnum.next() != null) {
            assertNotNull(luceneTermEnum.next());

            assertThat(esTermEnum.totalTermFreq(), equalTo(luceneTermEnum.totalTermFreq()));
            PostingsEnum esDocsPosEnum = esTermEnum.postings(null, PostingsEnum.POSITIONS);
            PostingsEnum luceneDocsPosEnum = luceneTermEnum.postings(null, PostingsEnum.POSITIONS);
            if (luceneDocsPosEnum == null) {
                // test we expect that...
                assertFalse(field.storedOffset);
                assertFalse(field.storedPayloads);
                assertFalse(field.storedPositions);
                continue;
            }

            String currentTerm = esTermEnum.term().utf8ToString();

            assertThat("Token mismatch for field: " + field.name, currentTerm,
                    equalTo(luceneTermEnum.term().utf8ToString()));

            esDocsPosEnum.nextDoc();
            luceneDocsPosEnum.nextDoc();

            int freq = esDocsPosEnum.freq();
            assertThat(freq, equalTo(luceneDocsPosEnum.freq()));
            for (int i = 0; i < freq; i++) {
                String failDesc = " (field:" + field.name + " term:" + currentTerm + ")";
                int lucenePos = luceneDocsPosEnum.nextPosition();
                int esPos = esDocsPosEnum.nextPosition();
                if (field.storedPositions && testConfig.requestPositions) {
                    assertThat("Position test failed" + failDesc, lucenePos, equalTo(esPos));
                } else {
                    assertThat("Missing position test failed" + failDesc, esPos, equalTo(-1));
                }
                if (field.storedOffset && testConfig.requestOffsets) {
                    assertThat("Offset test failed" + failDesc, luceneDocsPosEnum.startOffset(),
                            equalTo(esDocsPosEnum.startOffset()));
                    assertThat("Offset test failed" + failDesc, luceneDocsPosEnum.endOffset(),
                            equalTo(esDocsPosEnum.endOffset()));
                } else {
                    assertThat("Missing offset test failed" + failDesc, esDocsPosEnum.startOffset(),
                            equalTo(-1));
                    assertThat("Missing offset test failed" + failDesc, esDocsPosEnum.endOffset(), equalTo(-1));
                }
                if (field.storedPayloads && testConfig.requestPayloads) {
                    assertThat("Payload test failed" + failDesc, luceneDocsPosEnum.getPayload(),
                            equalTo(esDocsPosEnum.getPayload()));
                } else {
                    assertThat("Missing payload test failed" + failDesc, esDocsPosEnum.getPayload(),
                            equalTo(null));
                }
            }
        }
        assertNull("Es returned terms are done but lucene isn't", luceneTermEnum.next());
    }
}

From source file:org.elasticsearch.action.termvectors.AbstractTermVectorsTests.java

License:Apache License

protected void validateResponse(TermVectorsResponse esResponse, Fields luceneFields, TestConfig testConfig)
        throws IOException {
    assertThat(esResponse.getIndex(), equalTo(testConfig.doc.index));
    TestDoc testDoc = testConfig.doc;/*from w  ww.ja  va 2 s . c o m*/
    HashSet<String> selectedFields = testConfig.selectedFields == null ? null
            : new HashSet<>(Arrays.asList(testConfig.selectedFields));
    Fields esTermVectorFields = esResponse.getFields();
    for (TestFieldSetting field : testDoc.fieldSettings) {
        Terms esTerms = esTermVectorFields.terms(field.name);
        if (selectedFields != null && !selectedFields.contains(field.name)) {
            assertNull(esTerms);
            continue;
        }

        assertNotNull(esTerms);

        Terms luceneTerms = luceneFields.terms(field.name);
        TermsEnum esTermEnum = esTerms.iterator();
        TermsEnum luceneTermEnum = luceneTerms.iterator();

        while (esTermEnum.next() != null) {
            assertNotNull(luceneTermEnum.next());

            assertThat(esTermEnum.totalTermFreq(), equalTo(luceneTermEnum.totalTermFreq()));
            PostingsEnum esDocsPosEnum = esTermEnum.postings(null, null, PostingsEnum.POSITIONS);
            PostingsEnum luceneDocsPosEnum = luceneTermEnum.postings(null, null, PostingsEnum.POSITIONS);
            if (luceneDocsPosEnum == null) {
                // test we expect that...
                assertFalse(field.storedOffset);
                assertFalse(field.storedPayloads);
                assertFalse(field.storedPositions);
                continue;
            }

            String currentTerm = esTermEnum.term().utf8ToString();

            assertThat("Token mismatch for field: " + field.name, currentTerm,
                    equalTo(luceneTermEnum.term().utf8ToString()));

            esDocsPosEnum.nextDoc();
            luceneDocsPosEnum.nextDoc();

            int freq = esDocsPosEnum.freq();
            assertThat(freq, equalTo(luceneDocsPosEnum.freq()));
            for (int i = 0; i < freq; i++) {
                String failDesc = " (field:" + field.name + " term:" + currentTerm + ")";
                int lucenePos = luceneDocsPosEnum.nextPosition();
                int esPos = esDocsPosEnum.nextPosition();
                if (field.storedPositions && testConfig.requestPositions) {
                    assertThat("Position test failed" + failDesc, lucenePos, equalTo(esPos));
                } else {
                    assertThat("Missing position test failed" + failDesc, esPos, equalTo(-1));
                }
                if (field.storedOffset && testConfig.requestOffsets) {
                    assertThat("Offset test failed" + failDesc, luceneDocsPosEnum.startOffset(),
                            equalTo(esDocsPosEnum.startOffset()));
                    assertThat("Offset test failed" + failDesc, luceneDocsPosEnum.endOffset(),
                            equalTo(esDocsPosEnum.endOffset()));
                } else {
                    assertThat("Missing offset test failed" + failDesc, esDocsPosEnum.startOffset(),
                            equalTo(-1));
                    assertThat("Missing offset test failed" + failDesc, esDocsPosEnum.endOffset(), equalTo(-1));
                }
                if (field.storedPayloads && testConfig.requestPayloads) {
                    assertThat("Payload test failed" + failDesc, luceneDocsPosEnum.getPayload(),
                            equalTo(esDocsPosEnum.getPayload()));
                } else {
                    assertThat("Missing payload test failed" + failDesc, esDocsPosEnum.getPayload(),
                            equalTo(null));
                }
            }
        }
        assertNull("Es returned terms are done but lucene isn't", luceneTermEnum.next());
    }
}

From source file:org.elasticsearch.action.termvectors.dfs.DfsOnlyRequest.java

License:Apache License

public DfsOnlyRequest(Fields termVectorsFields, String[] indices, String[] types, Set<String> selectedFields)
        throws IOException {
    super(indices);

    // build a search request with a query of all the terms
    final BoolQueryBuilder boolBuilder = boolQuery();
    for (String fieldName : termVectorsFields) {
        if ((selectedFields != null) && (!selectedFields.contains(fieldName))) {
            continue;
        }//from  w  w  w .  j a  va2 s .c o m
        Terms terms = termVectorsFields.terms(fieldName);
        TermsEnum iterator = terms.iterator();
        while (iterator.next() != null) {
            String text = iterator.term().utf8ToString();
            boolBuilder.should(QueryBuilders.termQuery(fieldName, text));
        }
    }
    // wrap a search request object
    this.searchRequest = new SearchRequest(indices).types(types)
            .source(new SearchSourceBuilder().query(boolBuilder));
}

From source file:org.elasticsearch.action.termvectors.GetTermVectorsCheckDocFreqIT.java

License:Apache License

private void checkWithoutFieldStatistics(int numDocs, String[] values, int[] freq, int[][] pos,
        int[][] startOffset, int[][] endOffset, int i) throws IOException {
    TermVectorsRequestBuilder resp = client().prepareTermVectors("test", "type1", Integer.toString(i))
            .setPayloads(true).setOffsets(true).setPositions(true).setTermStatistics(true)
            .setFieldStatistics(false).setSelectedFields();
    TermVectorsResponse response = resp.execute().actionGet();
    assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
    Fields fields = response.getFields();
    assertThat(fields.size(), equalTo(1));
    Terms terms = fields.terms("field");
    assertThat(terms.size(), equalTo(8l));
    assertThat(terms.getSumTotalTermFreq(), Matchers.equalTo((long) -1));
    assertThat(terms.getDocCount(), Matchers.equalTo(-1));
    assertThat(terms.getSumDocFreq(), equalTo((long) -1));
    TermsEnum iterator = terms.iterator();
    for (int j = 0; j < values.length; j++) {
        String string = values[j];
        BytesRef next = iterator.next();
        assertThat(next, Matchers.notNullValue());
        assertThat("expected " + string, string, equalTo(next.utf8ToString()));
        assertThat(next, Matchers.notNullValue());
        if (string.equals("the")) {
            assertThat("expected ttf of " + string, numDocs * 2, equalTo((int) iterator.totalTermFreq()));
        } else {//from w w  w.ja va 2s  .c om
            assertThat("expected ttf of " + string, numDocs, equalTo((int) iterator.totalTermFreq()));
        }

        PostingsEnum docsAndPositions = iterator.postings(null, PostingsEnum.ALL);
        assertThat(docsAndPositions.nextDoc(), equalTo(0));
        assertThat(freq[j], equalTo(docsAndPositions.freq()));
        assertThat(iterator.docFreq(), equalTo(numDocs));
        int[] termPos = pos[j];
        int[] termStartOffset = startOffset[j];
        int[] termEndOffset = endOffset[j];
        assertThat(termPos.length, equalTo(freq[j]));
        assertThat(termStartOffset.length, equalTo(freq[j]));
        assertThat(termEndOffset.length, equalTo(freq[j]));
        for (int k = 0; k < freq[j]; k++) {
            int nextPosition = docsAndPositions.nextPosition();
            assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
            assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
            assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
            assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
        }
    }
    assertThat(iterator.next(), Matchers.nullValue());

    XContentBuilder xBuilder = XContentFactory.jsonBuilder();
    xBuilder.startObject();
    response.toXContent(xBuilder, null);
    xBuilder.endObject();
    String utf8 = xBuilder.bytes().toUtf8().replaceFirst("\"took\":\\d+,", "");
    ;
    String expectedString = "{\"_index\":\"test\",\"_type\":\"type1\",\"_id\":\"" + i
            + "\",\"_version\":1,\"found\":true,\"term_vectors\":{\"field\":{\"terms\":{\"brown\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":2,\"start_offset\":10,\"end_offset\":15,\"payload\":\"d29yZA==\"}]},\"dog\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":8,\"start_offset\":40,\"end_offset\":43,\"payload\":\"d29yZA==\"}]},\"fox\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":3,\"start_offset\":16,\"end_offset\":19,\"payload\":\"d29yZA==\"}]},\"jumps\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":4,\"start_offset\":20,\"end_offset\":25,\"payload\":\"d29yZA==\"}]},\"lazy\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":7,\"start_offset\":35,\"end_offset\":39,\"payload\":\"d29yZA==\"}]},\"over\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":5,\"start_offset\":26,\"end_offset\":30,\"payload\":\"d29yZA==\"}]},\"quick\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":1,\"start_offset\":4,\"end_offset\":9,\"payload\":\"d29yZA==\"}]},\"the\":{\"doc_freq\":15,\"ttf\":30,\"term_freq\":2,\"tokens\":[{\"position\":0,\"start_offset\":0,\"end_offset\":3,\"payload\":\"d29yZA==\"},{\"position\":6,\"start_offset\":31,\"end_offset\":34,\"payload\":\"d29yZA==\"}]}}}}}";
    assertThat(utf8, equalTo(expectedString));

}

From source file:org.elasticsearch.action.termvectors.GetTermVectorsCheckDocFreqIT.java

License:Apache License

private void checkWithoutTermStatistics(int numDocs, String[] values, int[] freq, int[][] pos,
        int[][] startOffset, int[][] endOffset, int i) throws IOException {
    TermVectorsRequestBuilder resp = client().prepareTermVectors("test", "type1", Integer.toString(i))
            .setPayloads(true).setOffsets(true).setPositions(true).setTermStatistics(false)
            .setFieldStatistics(true).setSelectedFields();
    assertThat(resp.request().termStatistics(), equalTo(false));
    TermVectorsResponse response = resp.execute().actionGet();
    assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
    Fields fields = response.getFields();
    assertThat(fields.size(), equalTo(1));
    Terms terms = fields.terms("field");
    assertThat(terms.size(), equalTo(8l));
    assertThat(terms.getSumTotalTermFreq(), Matchers.equalTo((long) (9 * numDocs)));
    assertThat(terms.getDocCount(), Matchers.equalTo(numDocs));
    assertThat(terms.getSumDocFreq(), equalTo((long) numDocs * values.length));
    TermsEnum iterator = terms.iterator();
    for (int j = 0; j < values.length; j++) {
        String string = values[j];
        BytesRef next = iterator.next();
        assertThat(next, Matchers.notNullValue());
        assertThat("expected " + string, string, equalTo(next.utf8ToString()));
        assertThat(next, Matchers.notNullValue());

        assertThat("expected ttf of " + string, -1, equalTo((int) iterator.totalTermFreq()));

        PostingsEnum docsAndPositions = iterator.postings(null, PostingsEnum.ALL);
        assertThat(docsAndPositions.nextDoc(), equalTo(0));
        assertThat(freq[j], equalTo(docsAndPositions.freq()));
        assertThat(iterator.docFreq(), equalTo(-1));
        int[] termPos = pos[j];
        int[] termStartOffset = startOffset[j];
        int[] termEndOffset = endOffset[j];
        assertThat(termPos.length, equalTo(freq[j]));
        assertThat(termStartOffset.length, equalTo(freq[j]));
        assertThat(termEndOffset.length, equalTo(freq[j]));
        for (int k = 0; k < freq[j]; k++) {
            int nextPosition = docsAndPositions.nextPosition();
            assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
            assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
            assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
            assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
        }//from   w  w  w.jav a  2s .  c o  m
    }
    assertThat(iterator.next(), Matchers.nullValue());

    XContentBuilder xBuilder = XContentFactory.jsonBuilder();
    xBuilder.startObject();
    response.toXContent(xBuilder, null);
    xBuilder.endObject();
    String utf8 = xBuilder.bytes().toUtf8().replaceFirst("\"took\":\\d+,", "");
    ;
    String expectedString = "{\"_index\":\"test\",\"_type\":\"type1\",\"_id\":\"" + i
            + "\",\"_version\":1,\"found\":true,\"term_vectors\":{\"field\":{\"field_statistics\":{\"sum_doc_freq\":120,\"doc_count\":15,\"sum_ttf\":135},\"terms\":{\"brown\":{\"term_freq\":1,\"tokens\":[{\"position\":2,\"start_offset\":10,\"end_offset\":15,\"payload\":\"d29yZA==\"}]},\"dog\":{\"term_freq\":1,\"tokens\":[{\"position\":8,\"start_offset\":40,\"end_offset\":43,\"payload\":\"d29yZA==\"}]},\"fox\":{\"term_freq\":1,\"tokens\":[{\"position\":3,\"start_offset\":16,\"end_offset\":19,\"payload\":\"d29yZA==\"}]},\"jumps\":{\"term_freq\":1,\"tokens\":[{\"position\":4,\"start_offset\":20,\"end_offset\":25,\"payload\":\"d29yZA==\"}]},\"lazy\":{\"term_freq\":1,\"tokens\":[{\"position\":7,\"start_offset\":35,\"end_offset\":39,\"payload\":\"d29yZA==\"}]},\"over\":{\"term_freq\":1,\"tokens\":[{\"position\":5,\"start_offset\":26,\"end_offset\":30,\"payload\":\"d29yZA==\"}]},\"quick\":{\"term_freq\":1,\"tokens\":[{\"position\":1,\"start_offset\":4,\"end_offset\":9,\"payload\":\"d29yZA==\"}]},\"the\":{\"term_freq\":2,\"tokens\":[{\"position\":0,\"start_offset\":0,\"end_offset\":3,\"payload\":\"d29yZA==\"},{\"position\":6,\"start_offset\":31,\"end_offset\":34,\"payload\":\"d29yZA==\"}]}}}}}";
    assertThat(utf8, equalTo(expectedString));

}

From source file:org.elasticsearch.action.termvectors.GetTermVectorsCheckDocFreqIT.java

License:Apache License

private void checkAllInfo(int numDocs, String[] values, int[] freq, int[][] pos, int[][] startOffset,
        int[][] endOffset, int i) throws IOException {
    TermVectorsRequestBuilder resp = client().prepareTermVectors("test", "type1", Integer.toString(i))
            .setPayloads(true).setOffsets(true).setPositions(true).setFieldStatistics(true)
            .setTermStatistics(true).setSelectedFields();
    assertThat(resp.request().fieldStatistics(), equalTo(true));
    TermVectorsResponse response = resp.execute().actionGet();
    assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
    Fields fields = response.getFields();
    assertThat(fields.size(), equalTo(1));
    Terms terms = fields.terms("field");
    assertThat(terms.size(), equalTo(8l));
    assertThat(terms.getSumTotalTermFreq(), Matchers.equalTo((long) (9 * numDocs)));
    assertThat(terms.getDocCount(), Matchers.equalTo(numDocs));
    assertThat(terms.getSumDocFreq(), equalTo((long) numDocs * values.length));
    TermsEnum iterator = terms.iterator();
    for (int j = 0; j < values.length; j++) {
        String string = values[j];
        BytesRef next = iterator.next();
        assertThat(next, Matchers.notNullValue());
        assertThat("expected " + string, string, equalTo(next.utf8ToString()));
        assertThat(next, Matchers.notNullValue());
        if (string.equals("the")) {
            assertThat("expected ttf of " + string, numDocs * 2, equalTo((int) iterator.totalTermFreq()));
        } else {/*from  w ww.  j  av  a 2s . c  om*/
            assertThat("expected ttf of " + string, numDocs, equalTo((int) iterator.totalTermFreq()));
        }

        PostingsEnum docsAndPositions = iterator.postings(null, PostingsEnum.ALL);
        assertThat(docsAndPositions.nextDoc(), equalTo(0));
        assertThat(freq[j], equalTo(docsAndPositions.freq()));
        assertThat(iterator.docFreq(), equalTo(numDocs));
        int[] termPos = pos[j];
        int[] termStartOffset = startOffset[j];
        int[] termEndOffset = endOffset[j];
        assertThat(termPos.length, equalTo(freq[j]));
        assertThat(termStartOffset.length, equalTo(freq[j]));
        assertThat(termEndOffset.length, equalTo(freq[j]));
        for (int k = 0; k < freq[j]; k++) {
            int nextPosition = docsAndPositions.nextPosition();
            assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
            assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
            assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
            assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
        }
    }
    assertThat(iterator.next(), Matchers.nullValue());

    XContentBuilder xBuilder = XContentFactory.jsonBuilder();
    xBuilder.startObject();
    response.toXContent(xBuilder, ToXContent.EMPTY_PARAMS);
    xBuilder.endObject();
    String utf8 = xBuilder.bytes().toUtf8().replaceFirst("\"took\":\\d+,", "");
    ;
    String expectedString = "{\"_index\":\"test\",\"_type\":\"type1\",\"_id\":\"" + i
            + "\",\"_version\":1,\"found\":true,\"term_vectors\":{\"field\":{\"field_statistics\":{\"sum_doc_freq\":120,\"doc_count\":15,\"sum_ttf\":135},\"terms\":{\"brown\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":2,\"start_offset\":10,\"end_offset\":15,\"payload\":\"d29yZA==\"}]},\"dog\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":8,\"start_offset\":40,\"end_offset\":43,\"payload\":\"d29yZA==\"}]},\"fox\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":3,\"start_offset\":16,\"end_offset\":19,\"payload\":\"d29yZA==\"}]},\"jumps\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":4,\"start_offset\":20,\"end_offset\":25,\"payload\":\"d29yZA==\"}]},\"lazy\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":7,\"start_offset\":35,\"end_offset\":39,\"payload\":\"d29yZA==\"}]},\"over\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":5,\"start_offset\":26,\"end_offset\":30,\"payload\":\"d29yZA==\"}]},\"quick\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":1,\"start_offset\":4,\"end_offset\":9,\"payload\":\"d29yZA==\"}]},\"the\":{\"doc_freq\":15,\"ttf\":30,\"term_freq\":2,\"tokens\":[{\"position\":0,\"start_offset\":0,\"end_offset\":3,\"payload\":\"d29yZA==\"},{\"position\":6,\"start_offset\":31,\"end_offset\":34,\"payload\":\"d29yZA==\"}]}}}}}";
    assertThat(utf8, equalTo(expectedString));
}