Example usage for org.apache.lucene.index Fields size

List of usage examples for org.apache.lucene.index Fields size

Introduction

In this page you can find the example usage for org.apache.lucene.index Fields size.

Prototype

public abstract int size();

Source Link

Document

Returns the number of fields or -1 if the number of distinct field names is unknown.

Usage

From source file:org.elasticsearch.action.termvectors.GetTermVectorsIT.java

License:Apache License

@Test
public void testSimpleTermVectorsWithGenerate() throws IOException {
    String[] fieldNames = new String[10];
    for (int i = 0; i < fieldNames.length; i++) {
        fieldNames[i] = "field" + String.valueOf(i);
    }//from w w w .  ja v a2  s. co m

    XContentBuilder mapping = jsonBuilder().startObject().startObject("type1").startObject("properties");
    XContentBuilder source = jsonBuilder().startObject();
    for (String field : fieldNames) {
        mapping.startObject(field).field("type", "string")
                .field("term_vector", randomBoolean() ? "with_positions_offsets_payloads" : "no")
                .field("analyzer", "tv_test").endObject();
        source.field(field, "the quick brown fox jumps over the lazy dog");
    }
    mapping.endObject().endObject().endObject();
    source.endObject();

    assertAcked(prepareCreate("test").addMapping("type1", mapping)
            .setSettings(settingsBuilder().put(indexSettings())
                    .put("index.analysis.analyzer.tv_test.tokenizer", "whitespace")
                    .putArray("index.analysis.analyzer.tv_test.filter", "type_as_payload", "lowercase")));

    ensureGreen();

    for (int i = 0; i < 10; i++) {
        client().prepareIndex("test", "type1", Integer.toString(i)).setSource(source).execute().actionGet();
        refresh();
    }

    for (int i = 0; i < 10; i++) {
        TermVectorsResponse response = client().prepareTermVectors("test", "type1", Integer.toString(i))
                .setPayloads(true).setOffsets(true).setPositions(true).setSelectedFields(fieldNames).execute()
                .actionGet();
        assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
        Fields fields = response.getFields();
        assertThat(fields.size(), equalTo(fieldNames.length));
        for (String fieldName : fieldNames) {
            // MemoryIndex does not support payloads
            checkBrownFoxTermVector(fields, fieldName, false);
        }
    }
}

From source file:org.elasticsearch.action.termvectors.GetTermVectorsIT.java

License:Apache License

private void checkAnalyzedFields(Fields fieldsObject, Set<String> fieldNames,
        Map<String, String> perFieldAnalyzer) throws IOException {
    Set<String> validFields = new HashSet<>();
    for (String fieldName : fieldNames) {
        if (fieldName.startsWith("non_existing")) {
            assertThat("Non existing field\"" + fieldName + "\" should not be returned!",
                    fieldsObject.terms(fieldName), nullValue());
            continue;
        }/*  w  w w  . ja  v  a2 s.  c o  m*/
        Terms terms = fieldsObject.terms(fieldName);
        assertThat("Existing field " + fieldName + "should have been returned", terms, notNullValue());
        // check overridden by keyword analyzer ...
        if (perFieldAnalyzer.containsKey(fieldName)) {
            TermsEnum iterator = terms.iterator();
            assertThat("Analyzer for " + fieldName + " should have been overridden!",
                    iterator.next().utf8ToString(), equalTo("some text here"));
            assertThat(iterator.next(), nullValue());
        }
        validFields.add(fieldName);
    }
    // ensure no other fields are returned
    assertThat("More fields than expected are returned!", fieldsObject.size(), equalTo(validFields.size()));
}

From source file:org.elasticsearch.action.termvectors.GetTermVectorsIT.java

License:Apache License

private void checkStats(Fields fields, XContentBuilder xContentBuilder, boolean isEqual) throws IOException {
    Map<String, Object> stats = JsonXContent.jsonXContent.createParser(xContentBuilder.bytes()).map();
    assertThat("number of fields expected:", fields.size(), equalTo(stats.size()));
    for (String fieldName : fields) {
        logger.info("Checking field statistics for field: {}", fieldName);
        Terms terms = fields.terms(fieldName);
        Map<String, Integer> fieldStatistics = getFieldStatistics(stats, fieldName);
        String msg = "field: " + fieldName + " ";
        assertThat(msg + "sum_doc_freq:", (int) terms.getSumDocFreq(),
                equalOrLessThanTo(fieldStatistics.get("sum_doc_freq"), isEqual));
        assertThat(msg + "doc_count:", terms.getDocCount(),
                equalOrLessThanTo(fieldStatistics.get("doc_count"), isEqual));
        assertThat(msg + "sum_ttf:", (int) terms.getSumTotalTermFreq(),
                equalOrLessThanTo(fieldStatistics.get("sum_ttf"), isEqual));

        final TermsEnum termsEnum = terms.iterator();
        BytesRef text;/*w ww.  ja v  a  2s .  c o  m*/
        while ((text = termsEnum.next()) != null) {
            String term = text.utf8ToString();
            logger.info("Checking term statistics for term: ({}, {})", fieldName, term);
            Map<String, Integer> termStatistics = getTermStatistics(stats, fieldName, term);
            msg = "term: (" + fieldName + "," + term + ") ";
            assertThat(msg + "doc_freq:", termsEnum.docFreq(),
                    equalOrLessThanTo(termStatistics.get("doc_freq"), isEqual));
            assertThat(msg + "ttf:", (int) termsEnum.totalTermFreq(),
                    equalOrLessThanTo(termStatistics.get("ttf"), isEqual));
        }
    }
}

From source file:org.elasticsearch.action.termvectors.GetTermVectorsTests.java

License:Apache License

@Test
public void testSimpleTermVectors() throws ElasticsearchException, IOException {
    XContentBuilder mapping = jsonBuilder().startObject().startObject("type1").startObject("properties")
            .startObject("field").field("type", "string")
            .field("term_vector", "with_positions_offsets_payloads").field("analyzer", "tv_test").endObject()
            .endObject().endObject().endObject();
    assertAcked(prepareCreate("test").addMapping("type1", mapping).addAlias(new Alias("alias"))
            .setSettings(settingsBuilder().put(indexSettings())
                    .put("index.analysis.analyzer.tv_test.tokenizer", "whitespace")
                    .putArray("index.analysis.analyzer.tv_test.filter", "type_as_payload", "lowercase")));
    ensureYellow();//  w  ww. j  a  va2s.c  o  m
    for (int i = 0; i < 10; i++) {
        client().prepareIndex("test", "type1", Integer.toString(i))
                .setSource(jsonBuilder().startObject()
                        .field("field", "the quick brown fox jumps over the lazy dog")
                        // 0the3 4quick9 10brown15 16fox19 20jumps25 26over30
                        // 31the34 35lazy39 40dog43
                        .endObject())
                .execute().actionGet();
        refresh();
    }
    for (int i = 0; i < 10; i++) {
        TermVectorsRequestBuilder resp = client()
                .prepareTermVectors(indexOrAlias(), "type1", Integer.toString(i)).setPayloads(true)
                .setOffsets(true).setPositions(true).setSelectedFields();
        TermVectorsResponse response = resp.execute().actionGet();
        assertThat(response.getIndex(), equalTo("test"));
        assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
        Fields fields = response.getFields();
        assertThat(fields.size(), equalTo(1));
        checkBrownFoxTermVector(fields, "field", true);
    }
}

From source file:org.elasticsearch.action.termvectors.GetTermVectorsTests.java

License:Apache License

@Test
public void testRandomSingleTermVectors() throws ElasticsearchException, IOException {
    FieldType ft = new FieldType();
    int config = randomInt(6);
    boolean storePositions = false;
    boolean storeOffsets = false;
    boolean storePayloads = false;
    boolean storeTermVectors = false;
    switch (config) {
    case 0: {//from ww  w  .j a  v a  2  s .  co  m
        // do nothing
        break;
    }
    case 1: {
        storeTermVectors = true;
        break;
    }
    case 2: {
        storeTermVectors = true;
        storePositions = true;
        break;
    }
    case 3: {
        storeTermVectors = true;
        storeOffsets = true;
        break;
    }
    case 4: {
        storeTermVectors = true;
        storePositions = true;
        storeOffsets = true;
        break;
    }
    case 5: {
        storeTermVectors = true;
        storePositions = true;
        storePayloads = true;
        break;
    }
    case 6: {
        storeTermVectors = true;
        storePositions = true;
        storeOffsets = true;
        storePayloads = true;
        break;
    }
    }
    ft.setStoreTermVectors(storeTermVectors);
    ft.setStoreTermVectorOffsets(storeOffsets);
    ft.setStoreTermVectorPayloads(storePayloads);
    ft.setStoreTermVectorPositions(storePositions);

    String optionString = AbstractFieldMapper.termVectorOptionsToString(ft);
    XContentBuilder mapping = jsonBuilder().startObject().startObject("type1").startObject("properties")
            .startObject("field").field("type", "string").field("term_vector", optionString)
            .field("analyzer", "tv_test").endObject().endObject().endObject().endObject();
    assertAcked(prepareCreate("test").addMapping("type1", mapping)
            .setSettings(settingsBuilder().put("index.analysis.analyzer.tv_test.tokenizer", "whitespace")
                    .putArray("index.analysis.analyzer.tv_test.filter", "type_as_payload", "lowercase")));
    ensureYellow();
    for (int i = 0; i < 10; i++) {
        client().prepareIndex("test", "type1", Integer.toString(i))
                .setSource(jsonBuilder().startObject()
                        .field("field", "the quick brown fox jumps over the lazy dog")
                        // 0the3 4quick9 10brown15 16fox19 20jumps25 26over30
                        // 31the34 35lazy39 40dog43
                        .endObject())
                .execute().actionGet();
        refresh();
    }
    String[] values = { "brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the" };
    int[] freq = { 1, 1, 1, 1, 1, 1, 1, 2 };
    int[][] pos = { { 2 }, { 8 }, { 3 }, { 4 }, { 7 }, { 5 }, { 1 }, { 0, 6 } };
    int[][] startOffset = { { 10 }, { 40 }, { 16 }, { 20 }, { 35 }, { 26 }, { 4 }, { 0, 31 } };
    int[][] endOffset = { { 15 }, { 43 }, { 19 }, { 25 }, { 39 }, { 30 }, { 9 }, { 3, 34 } };

    boolean isPayloadRequested = randomBoolean();
    boolean isOffsetRequested = randomBoolean();
    boolean isPositionsRequested = randomBoolean();
    String infoString = createInfoString(isPositionsRequested, isOffsetRequested, isPayloadRequested,
            optionString);
    for (int i = 0; i < 10; i++) {
        TermVectorsRequestBuilder resp = client().prepareTermVectors("test", "type1", Integer.toString(i))
                .setPayloads(isPayloadRequested).setOffsets(isOffsetRequested)
                .setPositions(isPositionsRequested).setSelectedFields();
        TermVectorsResponse response = resp.execute().actionGet();
        assertThat(infoString + "doc id: " + i + " doesn't exists but should", response.isExists(),
                equalTo(true));
        Fields fields = response.getFields();
        assertThat(fields.size(), equalTo(ft.storeTermVectors() ? 1 : 0));
        if (ft.storeTermVectors()) {
            Terms terms = fields.terms("field");
            assertThat(terms.size(), equalTo(8l));
            TermsEnum iterator = terms.iterator();
            for (int j = 0; j < values.length; j++) {
                String string = values[j];
                BytesRef next = iterator.next();
                assertThat(infoString, next, notNullValue());
                assertThat(infoString + "expected " + string, string, equalTo(next.utf8ToString()));
                assertThat(infoString, next, notNullValue());
                // do not test ttf or doc frequency, because here we have
                // many shards and do not know how documents are distributed
                PostingsEnum docsAndPositions = iterator.postings(null, null, PostingsEnum.ALL);
                // docs and pos only returns something if positions or
                // payloads or offsets are stored / requestd Otherwise use
                // DocsEnum?
                assertThat(infoString, docsAndPositions.nextDoc(), equalTo(0));
                assertThat(infoString, freq[j], equalTo(docsAndPositions.freq()));
                int[] termPos = pos[j];
                int[] termStartOffset = startOffset[j];
                int[] termEndOffset = endOffset[j];
                if (isPositionsRequested && storePositions) {
                    assertThat(infoString, termPos.length, equalTo(freq[j]));
                }
                if (isOffsetRequested && storeOffsets) {
                    assertThat(termStartOffset.length, equalTo(freq[j]));
                    assertThat(termEndOffset.length, equalTo(freq[j]));
                }
                for (int k = 0; k < freq[j]; k++) {
                    int nextPosition = docsAndPositions.nextPosition();
                    // only return something useful if requested and stored
                    if (isPositionsRequested && storePositions) {
                        assertThat(infoString + "positions for term: " + string, nextPosition,
                                equalTo(termPos[k]));
                    } else {
                        assertThat(infoString + "positions for term: ", nextPosition, equalTo(-1));
                    }
                    // only return something useful if requested and stored
                    if (isPayloadRequested && storePayloads) {
                        assertThat(infoString + "payloads for term: " + string, docsAndPositions.getPayload(),
                                equalTo(new BytesRef("word")));
                    } else {
                        assertThat(infoString + "payloads for term: " + string, docsAndPositions.getPayload(),
                                equalTo(null));
                    }
                    // only return something useful if requested and stored
                    if (isOffsetRequested && storeOffsets) {

                        assertThat(infoString + "startOffsets term: " + string, docsAndPositions.startOffset(),
                                equalTo(termStartOffset[k]));
                        assertThat(infoString + "endOffsets term: " + string, docsAndPositions.endOffset(),
                                equalTo(termEndOffset[k]));
                    } else {
                        assertThat(infoString + "startOffsets term: " + string, docsAndPositions.startOffset(),
                                equalTo(-1));
                        assertThat(infoString + "endOffsets term: " + string, docsAndPositions.endOffset(),
                                equalTo(-1));
                    }

                }
            }
            assertThat(iterator.next(), nullValue());
        }
    }
}

From source file:org.elasticsearch.action.termvectors.GetTermVectorsTests.java

License:Apache License

@Test
public void testRandomPayloadWithDelimitedPayloadTokenFilter() throws ElasticsearchException, IOException {
    //create the test document
    int encoding = randomIntBetween(0, 2);
    String encodingString = "";
    if (encoding == 0) {
        encodingString = "float";
    }//from   ww  w  .j  a  v  a2 s  .  com
    if (encoding == 1) {
        encodingString = "int";
    }
    if (encoding == 2) {
        encodingString = "identity";
    }
    String[] tokens = crateRandomTokens();
    Map<String, List<BytesRef>> payloads = createPayloads(tokens, encoding);
    String delimiter = createRandomDelimiter(tokens);
    String queryString = createString(tokens, payloads, encoding, delimiter.charAt(0));
    //create the mapping
    XContentBuilder mapping = jsonBuilder().startObject().startObject("type1").startObject("properties")
            .startObject("field").field("type", "string")
            .field("term_vector", "with_positions_offsets_payloads").field("analyzer", "payload_test")
            .endObject().endObject().endObject().endObject();
    assertAcked(prepareCreate("test").addMapping("type1", mapping).setSettings(settingsBuilder()
            .put(indexSettings()).put("index.analysis.analyzer.payload_test.tokenizer", "whitespace")
            .putArray("index.analysis.analyzer.payload_test.filter", "my_delimited_payload_filter")
            .put("index.analysis.filter.my_delimited_payload_filter.delimiter", delimiter)
            .put("index.analysis.filter.my_delimited_payload_filter.encoding", encodingString)
            .put("index.analysis.filter.my_delimited_payload_filter.type", "delimited_payload_filter")));
    ensureYellow();

    client().prepareIndex("test", "type1", Integer.toString(1))
            .setSource(jsonBuilder().startObject().field("field", queryString).endObject()).execute()
            .actionGet();
    refresh();
    TermVectorsRequestBuilder resp = client().prepareTermVectors("test", "type1", Integer.toString(1))
            .setPayloads(true).setOffsets(true).setPositions(true).setSelectedFields();
    TermVectorsResponse response = resp.execute().actionGet();
    assertThat("doc id 1 doesn't exists but should", response.isExists(), equalTo(true));
    Fields fields = response.getFields();
    assertThat(fields.size(), equalTo(1));
    Terms terms = fields.terms("field");
    TermsEnum iterator = terms.iterator();
    while (iterator.next() != null) {
        String term = iterator.term().utf8ToString();
        PostingsEnum docsAndPositions = iterator.postings(null, null, PostingsEnum.ALL);
        assertThat(docsAndPositions.nextDoc(), equalTo(0));
        List<BytesRef> curPayloads = payloads.get(term);
        assertThat(term, curPayloads, notNullValue());
        assertNotNull(docsAndPositions);
        for (int k = 0; k < docsAndPositions.freq(); k++) {
            docsAndPositions.nextPosition();
            if (docsAndPositions.getPayload() != null) {
                String infoString = "\nterm: " + term + " has payload \n"
                        + docsAndPositions.getPayload().toString() + "\n but should have payload \n"
                        + curPayloads.get(k).toString();
                assertThat(infoString, docsAndPositions.getPayload(), equalTo(curPayloads.get(k)));
            } else {
                String infoString = "\nterm: " + term + " has no payload but should have payload \n"
                        + curPayloads.get(k).toString();
                assertThat(infoString, curPayloads.get(k).length, equalTo(0));
            }
        }
    }
    assertThat(iterator.next(), nullValue());
}

From source file:org.elasticsearch.action.termvectors.GetTermVectorsTests.java

License:Apache License

@Test
public void testSimpleTermVectorsWithGenerate() throws ElasticsearchException, IOException {
    String[] fieldNames = new String[10];
    for (int i = 0; i < fieldNames.length; i++) {
        fieldNames[i] = "field" + String.valueOf(i);
    }//from  w  w  w. java2 s  .c  o m

    XContentBuilder mapping = jsonBuilder().startObject().startObject("type1").startObject("properties");
    XContentBuilder source = jsonBuilder().startObject();
    for (String field : fieldNames) {
        mapping.startObject(field).field("type", "string")
                .field("term_vector", randomBoolean() ? "with_positions_offsets_payloads" : "no")
                .field("analyzer", "tv_test").endObject();
        source.field(field, "the quick brown fox jumps over the lazy dog");
    }
    mapping.endObject().endObject().endObject();
    source.endObject();

    assertAcked(prepareCreate("test").addMapping("type1", mapping)
            .setSettings(settingsBuilder().put(indexSettings())
                    .put("index.analysis.analyzer.tv_test.tokenizer", "whitespace")
                    .putArray("index.analysis.analyzer.tv_test.filter", "type_as_payload", "lowercase")));

    ensureGreen();

    for (int i = 0; i < 10; i++) {
        client().prepareIndex("test", "type1", Integer.toString(i)).setSource(source).execute().actionGet();
        refresh();
    }

    for (int i = 0; i < 10; i++) {
        TermVectorsResponse response = client().prepareTermVectors("test", "type1", Integer.toString(i))
                .setPayloads(true).setOffsets(true).setPositions(true).setSelectedFields(fieldNames).execute()
                .actionGet();
        assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
        Fields fields = response.getFields();
        assertThat(fields.size(), equalTo(fieldNames.length));
        for (String fieldName : fieldNames) {
            // MemoryIndex does not support payloads
            checkBrownFoxTermVector(fields, fieldName, false);
        }
    }
}

From source file:org.elasticsearch.action.termvectors.TermVectorsUnitTests.java

License:Apache License

private void checkIfStandardTermVector(TermVectorsResponse inResponse) throws IOException {

    Fields fields = inResponse.getFields();
    assertThat(fields.terms("title"), Matchers.notNullValue());
    assertThat(fields.terms("desc"), Matchers.notNullValue());
    assertThat(fields.size(), equalTo(2));
}

From source file:org.elasticsearch.bwcompat.BasicBackwardsCompatibilityIT.java

License:Apache License

public void testGetTermVector() throws IOException {
    createIndexWithAlias();//from  w ww  .  j ava  2s  .  c  o  m
    assertAcked(client().admin().indices().preparePutMapping("test").setType("type1")
            .setSource("field", "type=string,term_vector=with_positions_offsets_payloads").get());
    ensureYellow("test");

    client().prepareIndex(indexOrAlias(), "type1", "1")
            .setSource("field", "the quick brown fox jumps over the lazy dog").get();
    refresh();

    TermVectorsResponse termVectorsResponse = client().prepareTermVectors(indexOrAlias(), "type1", "1").get();
    assertThat(termVectorsResponse.getIndex(), equalTo("test"));
    assertThat(termVectorsResponse.isExists(), equalTo(true));
    Fields fields = termVectorsResponse.getFields();
    assertThat(fields.size(), equalTo(1));
    assertThat(fields.terms("field").size(), equalTo(8l));
}

From source file:org.elasticsearch.bwcompat.BasicBackwardsCompatibilityTest.java

License:Apache License

@Test
public void testGetTermVector() throws IOException {
    createIndexWithAlias();/*from   w  ww.  j a  v a 2  s .c o  m*/
    assertAcked(client().admin().indices().preparePutMapping("test").setType("type1")
            .setSource("field", "type=string,term_vector=with_positions_offsets_payloads").get());
    ensureYellow("test");

    client().prepareIndex(indexOrAlias(), "type1", "1")
            .setSource("field", "the quick brown fox jumps over the lazy dog").get();
    refresh();

    TermVectorResponse termVectorResponse = client().prepareTermVector(indexOrAlias(), "type1", "1").get();
    assertThat(termVectorResponse.getIndex(), equalTo("test"));
    assertThat(termVectorResponse.isExists(), equalTo(true));
    Fields fields = termVectorResponse.getFields();
    assertThat(fields.size(), equalTo(1));
    assertThat(fields.terms("field").size(), equalTo(8l));
}