Example usage for org.apache.lucene.index Fields size

List of usage examples for org.apache.lucene.index Fields size

Introduction

In this page you can find the example usage for org.apache.lucene.index Fields size.

Prototype

public abstract int size();

Source Link

Document

Returns the number of fields or -1 if the number of distinct field names is unknown.

Usage

From source file:de.unihildesheim.iw.lucene.index.FilteredDirectoryReaderTest.java

License:Open Source License

/**
 * Test with a single field negated.//from w ww .j a  va2s. c o  m
 *
 * @throws Exception
 */
@SuppressWarnings({ "ImplicitNumericConversion", "AnonymousInnerClassMayBeStatic" })
@Test
public void testBuilder_singleField_negate() throws Exception {
    try (TestMemIndex idx = new TestMemIndex(Index.ALL_FIELDS)) {
        final DirectoryReader reader = DirectoryReader.open(idx.dir);
        final FilteredDirectoryReader fReader = new Builder(reader).fields(Collections.singleton("f2"), true) // f2 is hidden
                .build();

        new LeafReaderInstanceTest() {

            @Override
            void testHasDeletions() throws Exception {
                Assert.assertFalse("Reader has deletions.", fReader.hasDeletions());
            }

            @Override
            void testFieldCount() throws Exception {
                Assert.assertEquals("Field count mismatch.", idx.flds.size() - 1, fReader.getFields().size());
            }

            @Override
            void testFieldNames() throws Exception {
                Assert.assertFalse("Excluded field found.", fReader.getFields().contains("f2"));
            }

            @Override
            void testTotalTermFreq() throws Exception {
                // visible fields
                Assert.assertEquals("TotalTermFreq mismatch for visible field & term.", idx.docs,
                        fReader.totalTermFreq(new Term("f1", "field1")));
                Assert.assertEquals("TotalTermFreq mismatch for visible field & term.", idx.docs,
                        fReader.totalTermFreq(new Term("f3", "field3")));
                Assert.assertEquals("TotalTermFreq mismatch for visible field & missing term.", 0L,
                        fReader.totalTermFreq(new Term("f3", "foo")));
                // hidden fields
                Assert.assertEquals("TotalTermFreq mismatch for hidden field & term.", 0L,
                        fReader.totalTermFreq(new Term("f2", "field2")));
            }

            @Override
            void testSumTotalTermFreq() throws Exception {
                Assert.assertEquals("SumTotalTermFreq mismatch for visible field.", 18L,
                        fReader.getSumTotalTermFreq("f1"));
                Assert.assertEquals("SumTotalTermFreq mismatch for visible field.", 18L,
                        fReader.getSumTotalTermFreq("f3"));
                Assert.assertEquals("SumTotalTermFreq mismatch for hidden field.", 0L,
                        fReader.getSumTotalTermFreq("f2"));
            }

            @Override
            void testDocCount() throws Exception {
                Assert.assertEquals("Doc count mismatch for visible field.", idx.docs,
                        fReader.getDocCount("f1"));
                Assert.assertEquals("Doc count mismatch for visible field.", idx.docs,
                        fReader.getDocCount("f3"));
                // check visibility of all docs without visible fields
                Assert.assertEquals("Document visible with hidden field.", 0L, fReader.getDocCount("f2"));
            }

            @Override
            void testDocFreq() throws Exception {
                Assert.assertEquals("Missing term from all documents.", idx.docs,
                        fReader.docFreq(new Term("f1", "field1")));
                Assert.assertEquals("Found term from hidden documents.", 0L,
                        fReader.docFreq(new Term("f2", "field2")));
                Assert.assertEquals("Missing term from all documents.", idx.docs,
                        fReader.docFreq(new Term("f3", "field3")));
            }

            @Override
            void testSumDocFreq() throws Exception {
                Assert.assertEquals("SumDocFreq mismatch for visible field.", 18L, fReader.getSumDocFreq("f1"));
                Assert.assertEquals("SumDocFreq mismatch for visible field.", 18L, fReader.getSumDocFreq("f3"));
                Assert.assertEquals("SumDocFreq has result for hidden field.", 0L, fReader.getSumDocFreq("f2"));
            }

            @Override
            void testTermVectors() throws Exception {
                for (int i = 0; i < idx.docs - 1; i++) {
                    final Fields f = fReader.getTermVectors(i);
                    Assert.assertEquals("Too much fields retrieved from TermVector.", 2L, f.size());
                    f.forEach(fld -> Assert.assertFalse("Hidden field found.", "f2".equals(fld)));
                }
            }

            @Override
            void testNumDocs() throws Exception {
                Assert.assertEquals("NumDocs mismatch.", idx.docs, fReader.numDocs());
            }

            @Override
            void testMaxDoc() throws Exception {
                Assert.assertEquals("MaxDoc mismatch.", idx.docs, fReader.maxDoc());
            }
        };
    }
}

From source file:de.unihildesheim.iw.lucene.index.FilteredDirectoryReaderTest.java

License:Open Source License

/**
 * Test specifying multiple fields to include.
 *
 * @throws Exception/*w ww  .j  a va 2 s. co m*/
 */
@SuppressWarnings({ "ImplicitNumericConversion", "AnonymousInnerClassMayBeStatic" })
@Test
public void testBuilder_multiFields() throws Exception {
    try (TestMemIndex idx = new TestMemIndex(Index.ALL_FIELDS)) {
        final DirectoryReader reader = DirectoryReader.open(idx.dir);

        final FilteredDirectoryReader fReader = new Builder(reader).fields(Arrays.asList("f1", "f3")).build();

        Assert.assertEquals("Field count mismatch.", 2, fReader.getFields().size());

        new LeafReaderInstanceTest() {

            @Override
            void testHasDeletions() throws Exception {
                Assert.assertFalse("Reader has deletions.", fReader.hasDeletions());
            }

            @Override
            void testFieldCount() throws Exception {
                Assert.assertEquals("Field count mismatch.", 2L, fReader.getFields().size());
            }

            @Override
            void testFieldNames() throws Exception {
                Assert.assertFalse("Hidden field found.", fReader.getFields().contains("f2"));
            }

            @Override
            void testTotalTermFreq() throws Exception {
                Assert.assertEquals("TotalTermFreq mismatch for visible field & term.", idx.docs,
                        fReader.totalTermFreq(new Term("f1", "field1")));
                Assert.assertEquals("TotalTermFreq mismatch for visible field & term.", idx.docs,
                        fReader.totalTermFreq(new Term("f3", "field3")));
                Assert.assertEquals("TotalTermFreq mismatch for visible field & missing term.", 0L,
                        fReader.totalTermFreq(new Term("f1", "foo")));
                Assert.assertEquals("TotalTermFreq mismatch for hidden field & term.", 0L,
                        fReader.totalTermFreq(new Term("f2", "field2")));
            }

            @Override
            void testSumTotalTermFreq() throws Exception {
                Assert.assertEquals("SumTotalTermFreq mismatch for visible field.", 18L,
                        fReader.getSumTotalTermFreq("f1"));
                Assert.assertEquals("SumTotalTermFreq mismatch for visible field.", 18L,
                        fReader.getSumTotalTermFreq("f3"));

                Assert.assertEquals("SumTotalTermFreq mismatch for hidden field.", 0L,
                        fReader.getSumTotalTermFreq("f2"));
            }

            @Override
            void testDocCount() throws Exception {
                Assert.assertEquals("Doc count mismatch for visible field.", idx.docs,
                        fReader.getDocCount("f1"));
                Assert.assertEquals("Doc count mismatch for visible field.", idx.docs,
                        fReader.getDocCount("f3"));

                Assert.assertEquals("Document visible with hidden field.", 0L, fReader.getDocCount("f2"));
            }

            @Override
            void testDocFreq() throws Exception {
                Assert.assertEquals("Missing term from all documents.", idx.docs,
                        fReader.docFreq(new Term("f1", "field1")));
                Assert.assertEquals("Missing term from all documents.", idx.docs,
                        fReader.docFreq(new Term("f3", "field3")));

                Assert.assertEquals("Found term from hidden documents.", 0L,
                        fReader.docFreq(new Term("f2", "field2")));
            }

            @Override
            void testSumDocFreq() throws Exception {
                Assert.assertEquals("SumDocFreq mismatch for visible field.", 18L, fReader.getSumDocFreq("f1"));
                Assert.assertEquals("SumDocFreq mismatch for visible field.", 18L, fReader.getSumDocFreq("f3"));

                Assert.assertEquals("SumDocFreq has result for hidden field.", 0L, fReader.getSumDocFreq("f2"));
            }

            @Override
            void testTermVectors() throws Exception {
                for (int i = 0; i < idx.docs - 1; i++) {
                    final Fields f = fReader.getTermVectors(i);
                    Assert.assertEquals("Too much fields retrieved from TermVector.", 2L, f.size());
                    f.forEach(fld -> Assert.assertFalse("Hidden field found.", "f2".equals(fld)));
                }
            }

            @Override
            void testNumDocs() throws Exception {
                Assert.assertEquals("NumDocs mismatch.", idx.docs, fReader.numDocs());
            }

            @Override
            void testMaxDoc() throws Exception {
                Assert.assertEquals("MaxDoc mismatch.", idx.docs, fReader.maxDoc());
            }
        };
    }
}

From source file:de.unihildesheim.iw.lucene.index.FilteredDirectoryReaderTest.java

License:Open Source License

/**
 * Test {@link Filter} usage in combination with field restriction.
 *
 * @throws Exception/*from  www . ja  v  a  2  s  .  c om*/
 */
@SuppressWarnings({ "AnonymousInnerClassMayBeStatic", "ImplicitNumericConversion" })
@Test
public void testBuilder_filter_and_field() throws Exception {
    try (TestMemIndex idx = new TestMemIndex(Index.ALL_FIELDS)) {
        final Query q = new TermQuery(new Term("f1", "document2field1"));
        final Filter f = new QueryWrapperFilter(q);
        final DirectoryReader reader = DirectoryReader.open(idx.dir);
        final FilteredDirectoryReader fReader = new Builder(reader).queryFilter(f)
                .fields(Collections.singleton("f2")).build();

        new LeafReaderInstanceTest() {

            @Override
            void testHasDeletions() throws Exception {
                Assert.assertFalse("Reader has deletions.", fReader.hasDeletions());
            }

            @Override
            void testFieldCount() throws Exception {
                Assert.assertEquals("Field count mismatch.", 1L, fReader.getFields().size());
            }

            @Override
            void testFieldNames() throws Exception {
                Assert.assertTrue("Visible field not found.", fReader.getFields().contains("f2"));
            }

            @Override
            void testTotalTermFreq() throws Exception {
                Assert.assertEquals("TotalTermFreq mismatch for visible term.", 1L,
                        fReader.totalTermFreq(new Term("f2", "field2")));
                Assert.assertEquals("TotalTermFreq mismatch for hidden term.", 0L,
                        fReader.totalTermFreq(new Term("f1", "field1")));
            }

            @Override
            void testSumTotalTermFreq() throws Exception {
                Assert.assertEquals("SumTotalTermFreq mismatch for visible terms.", 6L,
                        fReader.getSumTotalTermFreq("f2"));
            }

            @Override
            void testDocCount() throws Exception {
                Assert.assertEquals("Doc count mismatch.", 1L, fReader.getDocCount("f2"));
            }

            @SuppressWarnings("ObjectAllocationInLoop")
            @Override
            void testDocFreq() throws Exception {
                Assert.assertEquals("Missing term from visible document.", 1L,
                        fReader.docFreq(new Term("f2", "value")));
            }

            @Override
            void testSumDocFreq() throws Exception {
                Assert.assertEquals("SumDocFreq mismatch for visible term.", 6L, fReader.getSumDocFreq("f2"));
            }

            @Override
            void testTermVectors() throws Exception {
                boolean match = false;
                for (int i = 0; i < fReader.maxDoc(); i++) {
                    final Fields fld = fReader.getTermVectors(i);
                    if (fld != null) {
                        match = true;
                        Assert.assertEquals("Too much fields retrieved from TermVector.", 1L, fld.size());
                    }
                }
                Assert.assertTrue("Fields not found.", match);
            }

            @Override
            void testNumDocs() throws Exception {
                Assert.assertEquals("NumDocs mismatch.", 1L, fReader.numDocs());
            }

            @Override
            void testMaxDoc() throws Exception {
                Assert.assertEquals("MaxDoc mismatch.", 2L, fReader.maxDoc());
            }
        };
    }
}

From source file:de.unihildesheim.iw.lucene.index.FilteredDirectoryReaderTest.java

License:Open Source License

/**
 * Test {@link Filter} usage in combination with field restriction using both
 * the same field. The excluded field is the same the query-filter will run
 * on.//from w  w w. j  a v a 2  s  . c o  m
 *
 * @throws Exception
 */
@SuppressWarnings({ "AnonymousInnerClassMayBeStatic", "ImplicitNumericConversion" })
@Test
public void testBuilder_filter_and_field_sameField() throws Exception {
    try (TestMemIndex idx = new TestMemIndex(Index.ALL_FIELDS)) {
        final Query q = new TermQuery(new Term("f2", "document2field2"));
        final Filter f = new QueryWrapperFilter(q);
        final DirectoryReader reader = DirectoryReader.open(idx.dir);
        final FilteredDirectoryReader fReader = new Builder(reader).queryFilter(f)
                .fields(Collections.singleton("f2"), true).build();

        new LeafReaderInstanceTest() {
            @Override
            void testHasDeletions() throws Exception {
                Assert.assertFalse("Reader has deletions.", fReader.hasDeletions());
            }

            @Override
            void testFieldCount() throws Exception {
                Assert.assertEquals("Field count mismatch.", idx.flds.size() - 1, fReader.getFields().size());
            }

            @Override
            void testFieldNames() throws Exception {
                Assert.assertFalse("Excluded field found.", fReader.getFields().contains("f2"));
            }

            @Override
            void testTotalTermFreq() throws Exception {
                // visible fields
                Assert.assertEquals("TotalTermFreq mismatch for visible field & term.", 1L,
                        fReader.totalTermFreq(new Term("f1", "field1")));
                Assert.assertEquals("TotalTermFreq mismatch for visible field & term.", 1L,
                        fReader.totalTermFreq(new Term("f3", "field3")));
                Assert.assertEquals("TotalTermFreq mismatch for visible field & missing term.", 0L,
                        fReader.totalTermFreq(new Term("f3", "foo")));
                // hidden fields
                Assert.assertEquals("TotalTermFreq mismatch for hidden field & term.", 0L,
                        fReader.totalTermFreq(new Term("f2", "field2")));
            }

            @Override
            void testSumTotalTermFreq() throws Exception {
                Assert.assertEquals("SumTotalTermFreq mismatch for visible field.", 6L,
                        fReader.getSumTotalTermFreq("f1"));
                Assert.assertEquals("SumTotalTermFreq mismatch for visible field.", 6L,
                        fReader.getSumTotalTermFreq("f3"));
                Assert.assertEquals("SumTotalTermFreq mismatch for hidden field.", 0L,
                        fReader.getSumTotalTermFreq("f2"));
            }

            @Override
            void testDocCount() throws Exception {
                Assert.assertEquals("Doc count mismatch for visible field.", 1L, fReader.getDocCount("f1"));
                Assert.assertEquals("Doc count mismatch for visible field.", 1L, fReader.getDocCount("f3"));
                // check visibility of all docs without visible fields
                Assert.assertEquals("Document visible with hidden field.", 0L, fReader.getDocCount("f2"));
            }

            @Override
            void testDocFreq() throws Exception {
                Assert.assertEquals("Missing term from all documents.", 1L,
                        fReader.docFreq(new Term("f1", "field1")));
                Assert.assertEquals("Found term from hidden documents.", 0L,
                        fReader.docFreq(new Term("f2", "field2")));
                Assert.assertEquals("Missing term from all documents.", 1L,
                        fReader.docFreq(new Term("f3", "field3")));
            }

            @Override
            void testSumDocFreq() throws Exception {
                Assert.assertEquals("SumDocFreq mismatch for visible field.", 6L, fReader.getSumDocFreq("f1"));
                Assert.assertEquals("SumDocFreq mismatch for visible field.", 6L, fReader.getSumDocFreq("f3"));
                Assert.assertEquals("SumDocFreq has result for hidden field.", 0L, fReader.getSumDocFreq("f2"));
            }

            @Override
            void testTermVectors() throws Exception {
                for (int i = 0; i < idx.docs - 1; i++) {
                    final Fields flds = fReader.getTermVectors(i);
                    Assert.assertEquals("Too much fields retrieved from TermVector.", 2L, flds.size());
                    flds.forEach(fld -> Assert.assertFalse("Hidden field found.", "f2".equals(fld)));
                }
            }

            @Override
            void testNumDocs() throws Exception {
                Assert.assertEquals("NumDocs mismatch.", 1L, fReader.numDocs());
            }

            @Override
            void testMaxDoc() throws Exception {
                Assert.assertEquals("MaxDoc mismatch.", 2L, fReader.maxDoc());
            }
        };
    }
}

From source file:org.elasticsearch.action.termvector.GetTermVectorCheckDocFreqTests.java

License:Apache License

private void checkWithoutFieldStatistics(int numDocs, String[] values, int[] freq, int[][] pos,
        int[][] startOffset, int[][] endOffset, int i) throws IOException {
    TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i))
            .setPayloads(true).setOffsets(true).setPositions(true).setTermStatistics(true)
            .setFieldStatistics(false).setSelectedFields();
    TermVectorResponse response = resp.execute().actionGet();
    assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
    Fields fields = response.getFields();
    assertThat(fields.size(), equalTo(1));
    Terms terms = fields.terms("field");
    assertThat(terms.size(), equalTo(8l));
    assertThat(terms.getSumTotalTermFreq(), Matchers.equalTo((long) -1));
    assertThat(terms.getDocCount(), Matchers.equalTo(-1));
    assertThat(terms.getSumDocFreq(), equalTo((long) -1));
    TermsEnum iterator = terms.iterator(null);
    for (int j = 0; j < values.length; j++) {
        String string = values[j];
        BytesRef next = iterator.next();
        assertThat(next, Matchers.notNullValue());
        assertThat("expected " + string, string, equalTo(next.utf8ToString()));
        assertThat(next, Matchers.notNullValue());
        if (string.equals("the")) {
            assertThat("expected ttf of " + string, numDocs * 2, equalTo((int) iterator.totalTermFreq()));
        } else {//from  www .ja v a 2s  .  c  om
            assertThat("expected ttf of " + string, numDocs, equalTo((int) iterator.totalTermFreq()));
        }

        DocsAndPositionsEnum docsAndPositions = iterator.docsAndPositions(null, null);
        assertThat(docsAndPositions.nextDoc(), equalTo(0));
        assertThat(freq[j], equalTo(docsAndPositions.freq()));
        assertThat(iterator.docFreq(), equalTo(numDocs));
        int[] termPos = pos[j];
        int[] termStartOffset = startOffset[j];
        int[] termEndOffset = endOffset[j];
        assertThat(termPos.length, equalTo(freq[j]));
        assertThat(termStartOffset.length, equalTo(freq[j]));
        assertThat(termEndOffset.length, equalTo(freq[j]));
        for (int k = 0; k < freq[j]; k++) {
            int nextPosition = docsAndPositions.nextPosition();
            assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
            assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
            assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
            assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
        }
    }
    assertThat(iterator.next(), Matchers.nullValue());

    XContentBuilder xBuilder = new XContentFactory().jsonBuilder();

    response.toXContent(xBuilder, null);
    BytesStream bytesStream = xBuilder.bytesStream();
    String utf8 = bytesStream.bytes().toUtf8();
    String expectedString = "{\"_index\":\"test\",\"_type\":\"type1\",\"_id\":\"" + i
            + "\",\"_version\":1,\"found\":true,\"term_vectors\":{\"field\":{\"terms\":{\"brown\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":2,\"start_offset\":10,\"end_offset\":15,\"payload\":\"d29yZA==\"}]},\"dog\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":8,\"start_offset\":40,\"end_offset\":43,\"payload\":\"d29yZA==\"}]},\"fox\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":3,\"start_offset\":16,\"end_offset\":19,\"payload\":\"d29yZA==\"}]},\"jumps\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":4,\"start_offset\":20,\"end_offset\":25,\"payload\":\"d29yZA==\"}]},\"lazy\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":7,\"start_offset\":35,\"end_offset\":39,\"payload\":\"d29yZA==\"}]},\"over\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":5,\"start_offset\":26,\"end_offset\":30,\"payload\":\"d29yZA==\"}]},\"quick\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":1,\"start_offset\":4,\"end_offset\":9,\"payload\":\"d29yZA==\"}]},\"the\":{\"doc_freq\":15,\"ttf\":30,\"term_freq\":2,\"tokens\":[{\"position\":0,\"start_offset\":0,\"end_offset\":3,\"payload\":\"d29yZA==\"},{\"position\":6,\"start_offset\":31,\"end_offset\":34,\"payload\":\"d29yZA==\"}]}}}}}";
    assertThat(utf8, equalTo(expectedString));

}

From source file:org.elasticsearch.action.termvector.GetTermVectorCheckDocFreqTests.java

License:Apache License

private void checkWithoutTermStatistics(int numDocs, String[] values, int[] freq, int[][] pos,
        int[][] startOffset, int[][] endOffset, int i) throws IOException {
    TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i))
            .setPayloads(true).setOffsets(true).setPositions(true).setTermStatistics(false)
            .setFieldStatistics(true).setSelectedFields();
    assertThat(resp.request().termStatistics(), equalTo(false));
    TermVectorResponse response = resp.execute().actionGet();
    assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
    Fields fields = response.getFields();
    assertThat(fields.size(), equalTo(1));
    Terms terms = fields.terms("field");
    assertThat(terms.size(), equalTo(8l));
    assertThat(terms.getSumTotalTermFreq(), Matchers.equalTo((long) (9 * numDocs)));
    assertThat(terms.getDocCount(), Matchers.equalTo(numDocs));
    assertThat(terms.getSumDocFreq(), equalTo((long) numDocs * values.length));
    TermsEnum iterator = terms.iterator(null);
    for (int j = 0; j < values.length; j++) {
        String string = values[j];
        BytesRef next = iterator.next();
        assertThat(next, Matchers.notNullValue());
        assertThat("expected " + string, string, equalTo(next.utf8ToString()));
        assertThat(next, Matchers.notNullValue());

        assertThat("expected ttf of " + string, -1, equalTo((int) iterator.totalTermFreq()));

        DocsAndPositionsEnum docsAndPositions = iterator.docsAndPositions(null, null);
        assertThat(docsAndPositions.nextDoc(), equalTo(0));
        assertThat(freq[j], equalTo(docsAndPositions.freq()));
        assertThat(iterator.docFreq(), equalTo(-1));
        int[] termPos = pos[j];
        int[] termStartOffset = startOffset[j];
        int[] termEndOffset = endOffset[j];
        assertThat(termPos.length, equalTo(freq[j]));
        assertThat(termStartOffset.length, equalTo(freq[j]));
        assertThat(termEndOffset.length, equalTo(freq[j]));
        for (int k = 0; k < freq[j]; k++) {
            int nextPosition = docsAndPositions.nextPosition();
            assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
            assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
            assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
            assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
        }//from   www . j  a va  2s .co  m
    }
    assertThat(iterator.next(), Matchers.nullValue());

    XContentBuilder xBuilder = new XContentFactory().jsonBuilder();

    response.toXContent(xBuilder, null);
    BytesStream bytesStream = xBuilder.bytesStream();
    String utf8 = bytesStream.bytes().toUtf8();
    String expectedString = "{\"_index\":\"test\",\"_type\":\"type1\",\"_id\":\"" + i
            + "\",\"_version\":1,\"found\":true,\"term_vectors\":{\"field\":{\"field_statistics\":{\"sum_doc_freq\":120,\"doc_count\":15,\"sum_ttf\":135},\"terms\":{\"brown\":{\"term_freq\":1,\"tokens\":[{\"position\":2,\"start_offset\":10,\"end_offset\":15,\"payload\":\"d29yZA==\"}]},\"dog\":{\"term_freq\":1,\"tokens\":[{\"position\":8,\"start_offset\":40,\"end_offset\":43,\"payload\":\"d29yZA==\"}]},\"fox\":{\"term_freq\":1,\"tokens\":[{\"position\":3,\"start_offset\":16,\"end_offset\":19,\"payload\":\"d29yZA==\"}]},\"jumps\":{\"term_freq\":1,\"tokens\":[{\"position\":4,\"start_offset\":20,\"end_offset\":25,\"payload\":\"d29yZA==\"}]},\"lazy\":{\"term_freq\":1,\"tokens\":[{\"position\":7,\"start_offset\":35,\"end_offset\":39,\"payload\":\"d29yZA==\"}]},\"over\":{\"term_freq\":1,\"tokens\":[{\"position\":5,\"start_offset\":26,\"end_offset\":30,\"payload\":\"d29yZA==\"}]},\"quick\":{\"term_freq\":1,\"tokens\":[{\"position\":1,\"start_offset\":4,\"end_offset\":9,\"payload\":\"d29yZA==\"}]},\"the\":{\"term_freq\":2,\"tokens\":[{\"position\":0,\"start_offset\":0,\"end_offset\":3,\"payload\":\"d29yZA==\"},{\"position\":6,\"start_offset\":31,\"end_offset\":34,\"payload\":\"d29yZA==\"}]}}}}}";
    assertThat(utf8, equalTo(expectedString));

}

From source file:org.elasticsearch.action.termvector.GetTermVectorCheckDocFreqTests.java

License:Apache License

private void checkAllInfo(int numDocs, String[] values, int[] freq, int[][] pos, int[][] startOffset,
        int[][] endOffset, int i) throws IOException {
    TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i))
            .setPayloads(true).setOffsets(true).setPositions(true).setFieldStatistics(true)
            .setTermStatistics(true).setSelectedFields();
    assertThat(resp.request().fieldStatistics(), equalTo(true));
    TermVectorResponse response = resp.execute().actionGet();
    assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
    Fields fields = response.getFields();
    assertThat(fields.size(), equalTo(1));
    Terms terms = fields.terms("field");
    assertThat(terms.size(), equalTo(8l));
    assertThat(terms.getSumTotalTermFreq(), Matchers.equalTo((long) (9 * numDocs)));
    assertThat(terms.getDocCount(), Matchers.equalTo(numDocs));
    assertThat(terms.getSumDocFreq(), equalTo((long) numDocs * values.length));
    TermsEnum iterator = terms.iterator(null);
    for (int j = 0; j < values.length; j++) {
        String string = values[j];
        BytesRef next = iterator.next();
        assertThat(next, Matchers.notNullValue());
        assertThat("expected " + string, string, equalTo(next.utf8ToString()));
        assertThat(next, Matchers.notNullValue());
        if (string.equals("the")) {
            assertThat("expected ttf of " + string, numDocs * 2, equalTo((int) iterator.totalTermFreq()));
        } else {/*from www  .j  a v a2 s.  co  m*/
            assertThat("expected ttf of " + string, numDocs, equalTo((int) iterator.totalTermFreq()));
        }

        DocsAndPositionsEnum docsAndPositions = iterator.docsAndPositions(null, null);
        assertThat(docsAndPositions.nextDoc(), equalTo(0));
        assertThat(freq[j], equalTo(docsAndPositions.freq()));
        assertThat(iterator.docFreq(), equalTo(numDocs));
        int[] termPos = pos[j];
        int[] termStartOffset = startOffset[j];
        int[] termEndOffset = endOffset[j];
        assertThat(termPos.length, equalTo(freq[j]));
        assertThat(termStartOffset.length, equalTo(freq[j]));
        assertThat(termEndOffset.length, equalTo(freq[j]));
        for (int k = 0; k < freq[j]; k++) {
            int nextPosition = docsAndPositions.nextPosition();
            assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
            assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
            assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
            assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
        }
    }
    assertThat(iterator.next(), Matchers.nullValue());

    XContentBuilder xBuilder = new XContentFactory().jsonBuilder();

    response.toXContent(xBuilder, null);
    BytesStream bytesStream = xBuilder.bytesStream();
    String utf8 = bytesStream.bytes().toUtf8();
    String expectedString = "{\"_index\":\"test\",\"_type\":\"type1\",\"_id\":\"" + i
            + "\",\"_version\":1,\"found\":true,\"term_vectors\":{\"field\":{\"field_statistics\":{\"sum_doc_freq\":120,\"doc_count\":15,\"sum_ttf\":135},\"terms\":{\"brown\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":2,\"start_offset\":10,\"end_offset\":15,\"payload\":\"d29yZA==\"}]},\"dog\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":8,\"start_offset\":40,\"end_offset\":43,\"payload\":\"d29yZA==\"}]},\"fox\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":3,\"start_offset\":16,\"end_offset\":19,\"payload\":\"d29yZA==\"}]},\"jumps\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":4,\"start_offset\":20,\"end_offset\":25,\"payload\":\"d29yZA==\"}]},\"lazy\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":7,\"start_offset\":35,\"end_offset\":39,\"payload\":\"d29yZA==\"}]},\"over\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":5,\"start_offset\":26,\"end_offset\":30,\"payload\":\"d29yZA==\"}]},\"quick\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":1,\"start_offset\":4,\"end_offset\":9,\"payload\":\"d29yZA==\"}]},\"the\":{\"doc_freq\":15,\"ttf\":30,\"term_freq\":2,\"tokens\":[{\"position\":0,\"start_offset\":0,\"end_offset\":3,\"payload\":\"d29yZA==\"},{\"position\":6,\"start_offset\":31,\"end_offset\":34,\"payload\":\"d29yZA==\"}]}}}}}";
    assertThat(utf8, equalTo(expectedString));
}

From source file:org.elasticsearch.action.termvector.GetTermVectorTests.java

License:Apache License

@Test
public void testSimpleTermVectors() throws ElasticsearchException, IOException {
    XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
            .startObject("properties").startObject("field").field("type", "string")
            .field("term_vector", "with_positions_offsets_payloads").field("analyzer", "tv_test").endObject()
            .endObject().endObject().endObject();
    ElasticsearchAssertions.assertAcked(prepareCreate("test").addMapping("type1", mapping)
            .setSettings(ImmutableSettings.settingsBuilder()
                    .put("index.analysis.analyzer.tv_test.tokenizer", "whitespace")
                    .putArray("index.analysis.analyzer.tv_test.filter", "type_as_payload", "lowercase")));
    ensureYellow();//from  w  ww .j  a  v a 2  s.com
    for (int i = 0; i < 10; i++) {
        client().prepareIndex("test", "type1", Integer.toString(i))
                .setSource(XContentFactory.jsonBuilder().startObject()
                        .field("field", "the quick brown fox jumps over the lazy dog")
                        // 0the3 4quick9 10brown15 16fox19 20jumps25 26over30
                        // 31the34 35lazy39 40dog43
                        .endObject())
                .execute().actionGet();
        refresh();
    }
    String[] values = { "brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the" };
    int[] freq = { 1, 1, 1, 1, 1, 1, 1, 2 };
    int[][] pos = { { 2 }, { 8 }, { 3 }, { 4 }, { 7 }, { 5 }, { 1 }, { 0, 6 } };
    int[][] startOffset = { { 10 }, { 40 }, { 16 }, { 20 }, { 35 }, { 26 }, { 4 }, { 0, 31 } };
    int[][] endOffset = { { 15 }, { 43 }, { 19 }, { 25 }, { 39 }, { 30 }, { 9 }, { 3, 34 } };
    for (int i = 0; i < 10; i++) {
        TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i))
                .setPayloads(true).setOffsets(true).setPositions(true).setSelectedFields();
        TermVectorResponse response = resp.execute().actionGet();
        assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
        Fields fields = response.getFields();
        assertThat(fields.size(), equalTo(1));
        Terms terms = fields.terms("field");
        assertThat(terms.size(), equalTo(8l));
        TermsEnum iterator = terms.iterator(null);
        for (int j = 0; j < values.length; j++) {
            String string = values[j];
            BytesRef next = iterator.next();
            assertThat(next, Matchers.notNullValue());
            assertThat("expected " + string, string, equalTo(next.utf8ToString()));
            assertThat(next, Matchers.notNullValue());
            // do not test ttf or doc frequency, because here we have many
            // shards and do not know how documents are distributed
            DocsAndPositionsEnum docsAndPositions = iterator.docsAndPositions(null, null);
            assertThat(docsAndPositions.nextDoc(), equalTo(0));
            assertThat(freq[j], equalTo(docsAndPositions.freq()));
            int[] termPos = pos[j];
            int[] termStartOffset = startOffset[j];
            int[] termEndOffset = endOffset[j];
            assertThat(termPos.length, equalTo(freq[j]));
            assertThat(termStartOffset.length, equalTo(freq[j]));
            assertThat(termEndOffset.length, equalTo(freq[j]));
            for (int k = 0; k < freq[j]; k++) {
                int nextPosition = docsAndPositions.nextPosition();
                assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
                assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
                assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
                assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
            }
        }
        assertThat(iterator.next(), Matchers.nullValue());
    }
}

From source file:org.elasticsearch.action.termvector.GetTermVectorTests.java

License:Apache License

@Test
public void testRandomSingleTermVectors() throws ElasticsearchException, IOException {
    FieldType ft = new FieldType();
    int config = randomInt(6);
    boolean storePositions = false;
    boolean storeOffsets = false;
    boolean storePayloads = false;
    boolean storeTermVectors = false;
    switch (config) {
    case 0: {//from  w ww  . j  av  a 2  s.c  o  m
        // do nothing
    }
    case 1: {
        storeTermVectors = true;
    }
    case 2: {
        storeTermVectors = true;
        storePositions = true;
    }
    case 3: {
        storeTermVectors = true;
        storeOffsets = true;
    }
    case 4: {
        storeTermVectors = true;
        storePositions = true;
        storeOffsets = true;
    }
    case 5: {
        storeTermVectors = true;
        storePositions = true;
        storePayloads = true;
    }
    case 6: {
        storeTermVectors = true;
        storePositions = true;
        storeOffsets = true;
        storePayloads = true;
    }
    }
    ft.setStoreTermVectors(storeTermVectors);
    ft.setStoreTermVectorOffsets(storeOffsets);
    ft.setStoreTermVectorPayloads(storePayloads);
    ft.setStoreTermVectorPositions(storePositions);

    String optionString = AbstractFieldMapper.termVectorOptionsToString(ft);
    XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
            .startObject("properties").startObject("field").field("type", "string")
            .field("term_vector", optionString).field("analyzer", "tv_test").endObject().endObject().endObject()
            .endObject();
    ElasticsearchAssertions.assertAcked(prepareCreate("test").addMapping("type1", mapping)
            .setSettings(ImmutableSettings.settingsBuilder()
                    .put("index.analysis.analyzer.tv_test.tokenizer", "whitespace")
                    .putArray("index.analysis.analyzer.tv_test.filter", "type_as_payload", "lowercase")));
    ensureYellow();
    for (int i = 0; i < 10; i++) {
        client().prepareIndex("test", "type1", Integer.toString(i))
                .setSource(XContentFactory.jsonBuilder().startObject()
                        .field("field", "the quick brown fox jumps over the lazy dog")
                        // 0the3 4quick9 10brown15 16fox19 20jumps25 26over30
                        // 31the34 35lazy39 40dog43
                        .endObject())
                .execute().actionGet();
        refresh();
    }
    String[] values = { "brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the" };
    int[] freq = { 1, 1, 1, 1, 1, 1, 1, 2 };
    int[][] pos = { { 2 }, { 8 }, { 3 }, { 4 }, { 7 }, { 5 }, { 1 }, { 0, 6 } };
    int[][] startOffset = { { 10 }, { 40 }, { 16 }, { 20 }, { 35 }, { 26 }, { 4 }, { 0, 31 } };
    int[][] endOffset = { { 15 }, { 43 }, { 19 }, { 25 }, { 39 }, { 30 }, { 9 }, { 3, 34 } };

    boolean isPayloadRequested = randomBoolean();
    boolean isOffsetRequested = randomBoolean();
    boolean isPositionsRequested = randomBoolean();
    String infoString = createInfoString(isPositionsRequested, isOffsetRequested, isPayloadRequested,
            optionString);
    for (int i = 0; i < 10; i++) {
        TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i))
                .setPayloads(isPayloadRequested).setOffsets(isOffsetRequested)
                .setPositions(isPositionsRequested).setSelectedFields();
        TermVectorResponse response = resp.execute().actionGet();
        assertThat(infoString + "doc id: " + i + " doesn't exists but should", response.isExists(),
                equalTo(true));
        Fields fields = response.getFields();
        assertThat(fields.size(), equalTo(ft.storeTermVectors() ? 1 : 0));
        if (ft.storeTermVectors()) {
            Terms terms = fields.terms("field");
            assertThat(terms.size(), equalTo(8l));
            TermsEnum iterator = terms.iterator(null);
            for (int j = 0; j < values.length; j++) {
                String string = values[j];
                BytesRef next = iterator.next();
                assertThat(infoString, next, Matchers.notNullValue());
                assertThat(infoString + "expected " + string, string, equalTo(next.utf8ToString()));
                assertThat(infoString, next, Matchers.notNullValue());
                // do not test ttf or doc frequency, because here we have
                // many shards and do not know how documents are distributed
                DocsAndPositionsEnum docsAndPositions = iterator.docsAndPositions(null, null);
                // docs and pos only returns something if positions or
                // payloads or offsets are stored / requestd Otherwise use
                // DocsEnum?
                assertThat(infoString, docsAndPositions.nextDoc(), equalTo(0));
                assertThat(infoString, freq[j], equalTo(docsAndPositions.freq()));
                int[] termPos = pos[j];
                int[] termStartOffset = startOffset[j];
                int[] termEndOffset = endOffset[j];
                if (isPositionsRequested && storePositions) {
                    assertThat(infoString, termPos.length, equalTo(freq[j]));
                }
                if (isOffsetRequested && storeOffsets) {
                    assertThat(termStartOffset.length, equalTo(freq[j]));
                    assertThat(termEndOffset.length, equalTo(freq[j]));
                }
                for (int k = 0; k < freq[j]; k++) {
                    int nextPosition = docsAndPositions.nextPosition();
                    // only return something useful if requested and stored
                    if (isPositionsRequested && storePositions) {
                        assertThat(infoString + "positions for term: " + string, nextPosition,
                                equalTo(termPos[k]));
                    } else {
                        assertThat(infoString + "positions for term: ", nextPosition, equalTo(-1));
                    }

                    // only return something useful if requested and stored
                    if (isPayloadRequested && storePayloads) {
                        assertThat(infoString + "payloads for term: " + string, docsAndPositions.getPayload(),
                                equalTo(new BytesRef("word")));
                    } else {
                        assertThat(infoString + "payloads for term: " + string, docsAndPositions.getPayload(),
                                equalTo(null));
                    }
                    // only return something useful if requested and stored
                    if (isOffsetRequested && storeOffsets) {

                        assertThat(infoString + "startOffsets term: " + string, docsAndPositions.startOffset(),
                                equalTo(termStartOffset[k]));
                        assertThat(infoString + "endOffsets term: " + string, docsAndPositions.endOffset(),
                                equalTo(termEndOffset[k]));
                    } else {
                        assertThat(infoString + "startOffsets term: " + string, docsAndPositions.startOffset(),
                                equalTo(-1));
                        assertThat(infoString + "endOffsets term: " + string, docsAndPositions.endOffset(),
                                equalTo(-1));
                    }

                }
            }
            assertThat(iterator.next(), Matchers.nullValue());
        }

    }
}

From source file:org.elasticsearch.action.termvector.GetTermVectorTests.java

License:Apache License

@Test
public void testRandomPayloadWithDelimitedPayloadTokenFilter() throws ElasticsearchException, IOException {

    //create the test document
    int encoding = randomIntBetween(0, 2);
    String encodingString = "";
    if (encoding == 0) {
        encodingString = "float";
    }/*from   w  w  w . ja  va  2  s . c o m*/
    if (encoding == 1) {
        encodingString = "int";
    }
    if (encoding == 2) {
        encodingString = "identity";
    }
    String[] tokens = crateRandomTokens();
    Map<String, List<BytesRef>> payloads = createPayloads(tokens, encoding);
    String delimiter = createRandomDelimiter(tokens);
    String queryString = createString(tokens, payloads, encoding, delimiter.charAt(0));
    //create the mapping
    XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
            .startObject("properties").startObject("field").field("type", "string")
            .field("term_vector", "with_positions_offsets_payloads").field("analyzer", "payload_test")
            .endObject().endObject().endObject().endObject();
    ElasticsearchAssertions.assertAcked(prepareCreate("test").addMapping("type1", mapping)
            .setSettings(ImmutableSettings.settingsBuilder()
                    .put("index.analysis.analyzer.payload_test.tokenizer", "whitespace")
                    .putArray("index.analysis.analyzer.payload_test.filter", "my_delimited_payload_filter")
                    .put("index.analysis.filter.my_delimited_payload_filter.delimiter", delimiter)
                    .put("index.analysis.filter.my_delimited_payload_filter.encoding", encodingString)
                    .put("index.analysis.filter.my_delimited_payload_filter.type",
                            "delimited_payload_filter")));
    ensureYellow();

    client().prepareIndex("test", "type1", Integer.toString(1))
            .setSource(XContentFactory.jsonBuilder().startObject().field("field", queryString).endObject())
            .execute().actionGet();
    refresh();
    TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(1))
            .setPayloads(true).setOffsets(true).setPositions(true).setSelectedFields();
    TermVectorResponse response = resp.execute().actionGet();
    assertThat("doc id 1 doesn't exists but should", response.isExists(), equalTo(true));
    Fields fields = response.getFields();
    assertThat(fields.size(), equalTo(1));
    Terms terms = fields.terms("field");
    TermsEnum iterator = terms.iterator(null);
    while (iterator.next() != null) {
        String term = iterator.term().utf8ToString();
        DocsAndPositionsEnum docsAndPositions = iterator.docsAndPositions(null, null);
        assertThat(docsAndPositions.nextDoc(), equalTo(0));
        List<BytesRef> curPayloads = payloads.get(term);
        assertThat(term, curPayloads, Matchers.notNullValue());
        assertNotNull(docsAndPositions);
        for (int k = 0; k < docsAndPositions.freq(); k++) {
            docsAndPositions.nextPosition();
            if (docsAndPositions.getPayload() != null) {
                String infoString = "\nterm: " + term + " has payload \n"
                        + docsAndPositions.getPayload().toString() + "\n but should have payload \n"
                        + curPayloads.get(k).toString();
                assertThat(infoString, docsAndPositions.getPayload(), equalTo(curPayloads.get(k)));
            } else {
                String infoString = "\nterm: " + term + " has no payload but should have payload \n"
                        + curPayloads.get(k).toString();
                assertThat(infoString, curPayloads.get(k).length, equalTo(0));
            }
        }
    }
    assertThat(iterator.next(), Matchers.nullValue());
}