Example usage for org.apache.lucene.util BytesRef utf8ToString

List of usage examples for org.apache.lucene.util BytesRef utf8ToString

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef utf8ToString.

Prototype

public String utf8ToString() 

Source Link

Document

Interprets stored bytes as UTF8 bytes, returning the resulting string

Usage

From source file:org.elasticsearch.search.sort.FieldSortIT.java

License:Apache License

public void testRandomSorting() throws IOException, InterruptedException, ExecutionException {
    Random random = random();//  ww w  . j a  v  a 2  s . c o  m
    assertAcked(prepareCreate("test").addMapping("type",
            XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties")
                    .startObject("sparse_bytes").field("type", "keyword").endObject().startObject("dense_bytes")
                    .field("type", "keyword").endObject().endObject().endObject().endObject()));
    ensureGreen();

    TreeMap<BytesRef, String> sparseBytes = new TreeMap<>();
    TreeMap<BytesRef, String> denseBytes = new TreeMap<>();
    int numDocs = randomIntBetween(200, 300);
    IndexRequestBuilder[] builders = new IndexRequestBuilder[numDocs];
    for (int i = 0; i < numDocs; i++) {
        String docId = Integer.toString(i);
        BytesRef ref = null;
        do {
            ref = new BytesRef(TestUtil.randomRealisticUnicodeString(random));
        } while (denseBytes.containsKey(ref));
        denseBytes.put(ref, docId);
        XContentBuilder src = jsonBuilder().startObject().field("dense_bytes", ref.utf8ToString());
        if (rarely()) {
            src.field("sparse_bytes", ref.utf8ToString());
            sparseBytes.put(ref, docId);
        }
        src.endObject();
        builders[i] = client().prepareIndex("test", "type", docId).setSource(src);
    }
    indexRandom(true, builders);
    {
        int size = between(1, denseBytes.size());
        SearchResponse searchResponse = client().prepareSearch("test").setQuery(matchAllQuery()).setSize(size)
                .addSort("dense_bytes", SortOrder.ASC).execute().actionGet();
        assertNoFailures(searchResponse);
        assertThat(searchResponse.getHits().getTotalHits(), equalTo((long) numDocs));
        assertThat(searchResponse.getHits().hits().length, equalTo(size));
        Set<Entry<BytesRef, String>> entrySet = denseBytes.entrySet();
        Iterator<Entry<BytesRef, String>> iterator = entrySet.iterator();
        for (int i = 0; i < size; i++) {
            assertThat(iterator.hasNext(), equalTo(true));
            Entry<BytesRef, String> next = iterator.next();
            assertThat("pos: " + i, searchResponse.getHits().getAt(i).id(), equalTo(next.getValue()));
            assertThat(searchResponse.getHits().getAt(i).sortValues()[0].toString(),
                    equalTo(next.getKey().utf8ToString()));
        }
    }
    if (!sparseBytes.isEmpty()) {
        int size = between(1, sparseBytes.size());
        SearchResponse searchResponse = client().prepareSearch().setQuery(matchAllQuery())
                .setPostFilter(QueryBuilders.existsQuery("sparse_bytes")).setSize(size)
                .addSort("sparse_bytes", SortOrder.ASC).execute().actionGet();
        assertNoFailures(searchResponse);
        assertThat(searchResponse.getHits().getTotalHits(), equalTo((long) sparseBytes.size()));
        assertThat(searchResponse.getHits().hits().length, equalTo(size));
        Set<Entry<BytesRef, String>> entrySet = sparseBytes.entrySet();
        Iterator<Entry<BytesRef, String>> iterator = entrySet.iterator();
        for (int i = 0; i < size; i++) {
            assertThat(iterator.hasNext(), equalTo(true));
            Entry<BytesRef, String> next = iterator.next();
            assertThat(searchResponse.getHits().getAt(i).id(), equalTo(next.getValue()));
            assertThat(searchResponse.getHits().getAt(i).sortValues()[0].toString(),
                    equalTo(next.getKey().utf8ToString()));
        }
    }
}

From source file:org.elasticsearch.search.sort.SimpleSortTests.java

License:Apache License

public void testRandomSorting()
        throws ElasticsearchException, IOException, InterruptedException, ExecutionException {
    int numberOfShards = between(1, 10);
    Random random = getRandom();//from  www  . ja v  a  2  s.  com
    prepareCreate("test")
            .setSettings(ImmutableSettings.builder().put("index.number_of_shards", numberOfShards)
                    .put("index.number_of_replicas", 0))
            .addMapping("type",
                    XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties")
                            .startObject("sparse_bytes").field("type", "string").field("index", "not_analyzed")
                            .endObject().startObject("dense_bytes").field("type", "string")
                            .field("index", "not_analyzed").endObject().endObject().endObject().endObject())
            .execute().actionGet();
    ensureGreen();

    TreeMap<BytesRef, String> sparseBytes = new TreeMap<BytesRef, String>();
    TreeMap<BytesRef, String> denseBytes = new TreeMap<BytesRef, String>();
    int numDocs = atLeast(200);
    IndexRequestBuilder[] builders = new IndexRequestBuilder[numDocs];
    for (int i = 0; i < numDocs; i++) {
        String docId = Integer.toString(i);
        BytesRef ref = null;
        do {
            ref = new BytesRef(_TestUtil.randomRealisticUnicodeString(random));
        } while (denseBytes.containsKey(ref));
        denseBytes.put(ref, docId);
        XContentBuilder src = jsonBuilder().startObject().field("dense_bytes", ref.utf8ToString());
        if (rarely()) {
            src.field("sparse_bytes", ref.utf8ToString());
            sparseBytes.put(ref, docId);
        }
        src.endObject();
        builders[i] = client().prepareIndex("test", "type", docId).setSource(src);
    }
    indexRandom(true, builders);
    {
        int size = between(1, denseBytes.size());
        SearchResponse searchResponse = client().prepareSearch("test").setQuery(matchAllQuery()).setSize(size)
                .addSort("dense_bytes", SortOrder.ASC).execute().actionGet();
        assertNoFailures(searchResponse);
        assertThat(searchResponse.getHits().getTotalHits(), equalTo((long) numDocs));
        assertThat(searchResponse.getHits().hits().length, equalTo(size));
        Set<Entry<BytesRef, String>> entrySet = denseBytes.entrySet();
        Iterator<Entry<BytesRef, String>> iterator = entrySet.iterator();
        for (int i = 0; i < size; i++) {
            assertThat(iterator.hasNext(), equalTo(true));
            Entry<BytesRef, String> next = iterator.next();
            assertThat("pos: " + i, searchResponse.getHits().getAt(i).id(), equalTo(next.getValue()));
            assertThat(searchResponse.getHits().getAt(i).sortValues()[0].toString(),
                    equalTo(next.getKey().utf8ToString()));
        }
    }
    if (!sparseBytes.isEmpty()) {
        int size = between(1, sparseBytes.size());
        SearchResponse searchResponse = client().prepareSearch().setQuery(matchAllQuery())
                .setPostFilter(FilterBuilders.existsFilter("sparse_bytes")).setSize(size)
                .addSort("sparse_bytes", SortOrder.ASC).execute().actionGet();
        assertNoFailures(searchResponse);
        assertThat(searchResponse.getHits().getTotalHits(), equalTo((long) sparseBytes.size()));
        assertThat(searchResponse.getHits().hits().length, equalTo(size));
        Set<Entry<BytesRef, String>> entrySet = sparseBytes.entrySet();
        Iterator<Entry<BytesRef, String>> iterator = entrySet.iterator();
        for (int i = 0; i < size; i++) {
            assertThat(iterator.hasNext(), equalTo(true));
            Entry<BytesRef, String> next = iterator.next();
            assertThat(searchResponse.getHits().getAt(i).id(), equalTo(next.getValue()));
            assertThat(searchResponse.getHits().getAt(i).sortValues()[0].toString(),
                    equalTo(next.getKey().utf8ToString()));
        }
    }
}

From source file:org.elasticsearch.search.suggest.CompletionTokenStreamTest.java

License:Apache License

@Test
public void testSuggestTokenFilterProperlyDelegateInputStream() throws Exception {
    TokenStream tokenStream = new MockTokenizer(new StringReader("mykeyword"), MockTokenizer.WHITESPACE, true);
    BytesRef payload = new BytesRef("Surface keyword|friggin payload|10");
    TokenStream suggestTokenStream = new ByteTermAttrToCharTermAttrFilter(
            new CompletionTokenStream(tokenStream, payload, new CompletionTokenStream.ToFiniteStrings() {
                @Override/*ww  w  .j  a  va 2  s. com*/
                public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException {
                    return suggester.toFiniteStrings(suggester.getTokenStreamToAutomaton(), stream);
                }
            }));
    TermToBytesRefAttribute termAtt = suggestTokenStream.getAttribute(TermToBytesRefAttribute.class);
    BytesRef ref = termAtt.getBytesRef();
    assertNotNull(ref);
    suggestTokenStream.reset();

    while (suggestTokenStream.incrementToken()) {
        termAtt.fillBytesRef();
        assertThat(ref.utf8ToString(), equalTo("mykeyword"));
    }
    suggestTokenStream.end();
    suggestTokenStream.close();
}

From source file:org.elasticsearch.termvectors.GetTermVectorCheckDocFreqTests.java

License:Apache License

private void checkWithoutFieldStatistics(int numDocs, String[] values, int[] freq, int[][] pos,
        int[][] startOffset, int[][] endOffset, int i) throws IOException {
    TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i))
            .setPayloads(true).setOffsets(true).setPositions(true).setTermStatistics(true)
            .setFieldStatistics(false).setSelectedFields();
    TermVectorResponse response = resp.execute().actionGet();
    assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
    Fields fields = response.getFields();
    assertThat(fields.size(), equalTo(1));
    Terms terms = fields.terms("field");
    assertThat(terms.size(), equalTo(8l));
    assertThat(terms.getSumTotalTermFreq(), Matchers.equalTo((long) -1));
    assertThat(terms.getDocCount(), Matchers.equalTo(-1));
    assertThat(terms.getSumDocFreq(), equalTo((long) -1));
    TermsEnum iterator = terms.iterator(null);
    for (int j = 0; j < values.length; j++) {
        String string = values[j];
        BytesRef next = iterator.next();
        assertThat(next, Matchers.notNullValue());
        assertThat("expected " + string, string, equalTo(next.utf8ToString()));
        assertThat(next, Matchers.notNullValue());
        if (string.equals("the")) {
            assertThat("expected ttf of " + string, numDocs * 2, equalTo((int) iterator.totalTermFreq()));
        } else {/*from  w w  w.  j av a  2s  .c  om*/
            assertThat("expected ttf of " + string, numDocs, equalTo((int) iterator.totalTermFreq()));
        }

        DocsAndPositionsEnum docsAndPositions = iterator.docsAndPositions(null, null);
        assertThat(docsAndPositions.nextDoc(), equalTo(0));
        assertThat(freq[j], equalTo(docsAndPositions.freq()));
        assertThat(iterator.docFreq(), equalTo(numDocs));
        int[] termPos = pos[j];
        int[] termStartOffset = startOffset[j];
        int[] termEndOffset = endOffset[j];
        assertThat(termPos.length, equalTo(freq[j]));
        assertThat(termStartOffset.length, equalTo(freq[j]));
        assertThat(termEndOffset.length, equalTo(freq[j]));
        for (int k = 0; k < freq[j]; k++) {
            int nextPosition = docsAndPositions.nextPosition();
            assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
            assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
            assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
            assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
        }
    }
    assertThat(iterator.next(), Matchers.nullValue());

    XContentBuilder xBuilder = new XContentFactory().jsonBuilder();

    response.toXContent(xBuilder, null);
    BytesStream bytesStream = xBuilder.bytesStream();
    String utf8 = bytesStream.bytes().toUtf8();
    String expectedString = "{\"_index\":\"test\",\"_type\":\"type1\",\"_id\":\"" + i
            + "\",\"_version\":1,\"exists\":true,\"term_vectors\":{\"field\":{\"terms\":{\"brown\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":2,\"start_offset\":10,\"end_offset\":15,\"payload\":\"d29yZA==\"}]},\"dog\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":8,\"start_offset\":40,\"end_offset\":43,\"payload\":\"d29yZA==\"}]},\"fox\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":3,\"start_offset\":16,\"end_offset\":19,\"payload\":\"d29yZA==\"}]},\"jumps\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":4,\"start_offset\":20,\"end_offset\":25,\"payload\":\"d29yZA==\"}]},\"lazy\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":7,\"start_offset\":35,\"end_offset\":39,\"payload\":\"d29yZA==\"}]},\"over\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":5,\"start_offset\":26,\"end_offset\":30,\"payload\":\"d29yZA==\"}]},\"quick\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":1,\"start_offset\":4,\"end_offset\":9,\"payload\":\"d29yZA==\"}]},\"the\":{\"doc_freq\":15,\"ttf\":30,\"term_freq\":2,\"tokens\":[{\"position\":0,\"start_offset\":0,\"end_offset\":3,\"payload\":\"d29yZA==\"},{\"position\":6,\"start_offset\":31,\"end_offset\":34,\"payload\":\"d29yZA==\"}]}}}}}";
    assertThat(utf8, equalTo(expectedString));

}

From source file:org.elasticsearch.termvectors.GetTermVectorCheckDocFreqTests.java

License:Apache License

private void checkWithoutTermStatistics(int numDocs, String[] values, int[] freq, int[][] pos,
        int[][] startOffset, int[][] endOffset, int i) throws IOException {
    TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i))
            .setPayloads(true).setOffsets(true).setPositions(true).setTermStatistics(false)
            .setFieldStatistics(true).setSelectedFields();
    assertThat(resp.request().termStatistics(), equalTo(false));
    TermVectorResponse response = resp.execute().actionGet();
    assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
    Fields fields = response.getFields();
    assertThat(fields.size(), equalTo(1));
    Terms terms = fields.terms("field");
    assertThat(terms.size(), equalTo(8l));
    assertThat(terms.getSumTotalTermFreq(), Matchers.equalTo((long) (9 * numDocs)));
    assertThat(terms.getDocCount(), Matchers.equalTo(numDocs));
    assertThat(terms.getSumDocFreq(), equalTo((long) numDocs * values.length));
    TermsEnum iterator = terms.iterator(null);
    for (int j = 0; j < values.length; j++) {
        String string = values[j];
        BytesRef next = iterator.next();
        assertThat(next, Matchers.notNullValue());
        assertThat("expected " + string, string, equalTo(next.utf8ToString()));
        assertThat(next, Matchers.notNullValue());

        assertThat("expected ttf of " + string, -1, equalTo((int) iterator.totalTermFreq()));

        DocsAndPositionsEnum docsAndPositions = iterator.docsAndPositions(null, null);
        assertThat(docsAndPositions.nextDoc(), equalTo(0));
        assertThat(freq[j], equalTo(docsAndPositions.freq()));
        assertThat(iterator.docFreq(), equalTo(-1));
        int[] termPos = pos[j];
        int[] termStartOffset = startOffset[j];
        int[] termEndOffset = endOffset[j];
        assertThat(termPos.length, equalTo(freq[j]));
        assertThat(termStartOffset.length, equalTo(freq[j]));
        assertThat(termEndOffset.length, equalTo(freq[j]));
        for (int k = 0; k < freq[j]; k++) {
            int nextPosition = docsAndPositions.nextPosition();
            assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
            assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
            assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
            assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
        }/*from   ww  w  .  j a  v a 2 s .c  o  m*/
    }
    assertThat(iterator.next(), Matchers.nullValue());

    XContentBuilder xBuilder = new XContentFactory().jsonBuilder();

    response.toXContent(xBuilder, null);
    BytesStream bytesStream = xBuilder.bytesStream();
    String utf8 = bytesStream.bytes().toUtf8();
    String expectedString = "{\"_index\":\"test\",\"_type\":\"type1\",\"_id\":\"" + i
            + "\",\"_version\":1,\"exists\":true,\"term_vectors\":{\"field\":{\"field_statistics\":{\"sum_doc_freq\":120,\"doc_count\":15,\"sum_ttf\":135},\"terms\":{\"brown\":{\"term_freq\":1,\"tokens\":[{\"position\":2,\"start_offset\":10,\"end_offset\":15,\"payload\":\"d29yZA==\"}]},\"dog\":{\"term_freq\":1,\"tokens\":[{\"position\":8,\"start_offset\":40,\"end_offset\":43,\"payload\":\"d29yZA==\"}]},\"fox\":{\"term_freq\":1,\"tokens\":[{\"position\":3,\"start_offset\":16,\"end_offset\":19,\"payload\":\"d29yZA==\"}]},\"jumps\":{\"term_freq\":1,\"tokens\":[{\"position\":4,\"start_offset\":20,\"end_offset\":25,\"payload\":\"d29yZA==\"}]},\"lazy\":{\"term_freq\":1,\"tokens\":[{\"position\":7,\"start_offset\":35,\"end_offset\":39,\"payload\":\"d29yZA==\"}]},\"over\":{\"term_freq\":1,\"tokens\":[{\"position\":5,\"start_offset\":26,\"end_offset\":30,\"payload\":\"d29yZA==\"}]},\"quick\":{\"term_freq\":1,\"tokens\":[{\"position\":1,\"start_offset\":4,\"end_offset\":9,\"payload\":\"d29yZA==\"}]},\"the\":{\"term_freq\":2,\"tokens\":[{\"position\":0,\"start_offset\":0,\"end_offset\":3,\"payload\":\"d29yZA==\"},{\"position\":6,\"start_offset\":31,\"end_offset\":34,\"payload\":\"d29yZA==\"}]}}}}}";
    assertThat(utf8, equalTo(expectedString));

}

From source file:org.elasticsearch.termvectors.GetTermVectorCheckDocFreqTests.java

License:Apache License

private void checkAllInfo(int numDocs, String[] values, int[] freq, int[][] pos, int[][] startOffset,
        int[][] endOffset, int i) throws IOException {
    TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i))
            .setPayloads(true).setOffsets(true).setPositions(true).setFieldStatistics(true)
            .setTermStatistics(true).setSelectedFields();
    assertThat(resp.request().fieldStatistics(), equalTo(true));
    TermVectorResponse response = resp.execute().actionGet();
    assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
    Fields fields = response.getFields();
    assertThat(fields.size(), equalTo(1));
    Terms terms = fields.terms("field");
    assertThat(terms.size(), equalTo(8l));
    assertThat(terms.getSumTotalTermFreq(), Matchers.equalTo((long) (9 * numDocs)));
    assertThat(terms.getDocCount(), Matchers.equalTo(numDocs));
    assertThat(terms.getSumDocFreq(), equalTo((long) numDocs * values.length));
    TermsEnum iterator = terms.iterator(null);
    for (int j = 0; j < values.length; j++) {
        String string = values[j];
        BytesRef next = iterator.next();
        assertThat(next, Matchers.notNullValue());
        assertThat("expected " + string, string, equalTo(next.utf8ToString()));
        assertThat(next, Matchers.notNullValue());
        if (string.equals("the")) {
            assertThat("expected ttf of " + string, numDocs * 2, equalTo((int) iterator.totalTermFreq()));
        } else {/*w  ww  . ja va2s . c o  m*/
            assertThat("expected ttf of " + string, numDocs, equalTo((int) iterator.totalTermFreq()));
        }

        DocsAndPositionsEnum docsAndPositions = iterator.docsAndPositions(null, null);
        assertThat(docsAndPositions.nextDoc(), equalTo(0));
        assertThat(freq[j], equalTo(docsAndPositions.freq()));
        assertThat(iterator.docFreq(), equalTo(numDocs));
        int[] termPos = pos[j];
        int[] termStartOffset = startOffset[j];
        int[] termEndOffset = endOffset[j];
        assertThat(termPos.length, equalTo(freq[j]));
        assertThat(termStartOffset.length, equalTo(freq[j]));
        assertThat(termEndOffset.length, equalTo(freq[j]));
        for (int k = 0; k < freq[j]; k++) {
            int nextPosition = docsAndPositions.nextPosition();
            assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
            assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
            assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
            assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
        }
    }
    assertThat(iterator.next(), Matchers.nullValue());

    XContentBuilder xBuilder = new XContentFactory().jsonBuilder();

    response.toXContent(xBuilder, null);
    BytesStream bytesStream = xBuilder.bytesStream();
    String utf8 = bytesStream.bytes().toUtf8();
    String expectedString = "{\"_index\":\"test\",\"_type\":\"type1\",\"_id\":\"" + i
            + "\",\"_version\":1,\"exists\":true,\"term_vectors\":{\"field\":{\"field_statistics\":{\"sum_doc_freq\":120,\"doc_count\":15,\"sum_ttf\":135},\"terms\":{\"brown\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":2,\"start_offset\":10,\"end_offset\":15,\"payload\":\"d29yZA==\"}]},\"dog\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":8,\"start_offset\":40,\"end_offset\":43,\"payload\":\"d29yZA==\"}]},\"fox\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":3,\"start_offset\":16,\"end_offset\":19,\"payload\":\"d29yZA==\"}]},\"jumps\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":4,\"start_offset\":20,\"end_offset\":25,\"payload\":\"d29yZA==\"}]},\"lazy\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":7,\"start_offset\":35,\"end_offset\":39,\"payload\":\"d29yZA==\"}]},\"over\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":5,\"start_offset\":26,\"end_offset\":30,\"payload\":\"d29yZA==\"}]},\"quick\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":1,\"start_offset\":4,\"end_offset\":9,\"payload\":\"d29yZA==\"}]},\"the\":{\"doc_freq\":15,\"ttf\":30,\"term_freq\":2,\"tokens\":[{\"position\":0,\"start_offset\":0,\"end_offset\":3,\"payload\":\"d29yZA==\"},{\"position\":6,\"start_offset\":31,\"end_offset\":34,\"payload\":\"d29yZA==\"}]}}}}}";
    assertThat(utf8, equalTo(expectedString));
}

From source file:org.elasticsearch.termvectors.GetTermVectorTests.java

License:Apache License

@Test
public void testSimpleTermVectors() throws ElasticSearchException, IOException {
    XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
            .startObject("properties").startObject("field").field("type", "string")
            .field("term_vector", "with_positions_offsets_payloads").field("analyzer", "tv_test").endObject()
            .endObject().endObject().endObject();
    ElasticSearchAssertions.assertAcked(prepareCreate("test").addMapping("type1", mapping)
            .setSettings(ImmutableSettings.settingsBuilder()
                    .put("index.analysis.analyzer.tv_test.tokenizer", "whitespace")
                    .putArray("index.analysis.analyzer.tv_test.filter", "type_as_payload", "lowercase")));
    ensureYellow();//w w w .  ja  v  a 2s.  c o m
    for (int i = 0; i < 10; i++) {
        client().prepareIndex("test", "type1", Integer.toString(i))
                .setSource(XContentFactory.jsonBuilder().startObject()
                        .field("field", "the quick brown fox jumps over the lazy dog")
                        // 0the3 4quick9 10brown15 16fox19 20jumps25 26over30
                        // 31the34 35lazy39 40dog43
                        .endObject())
                .execute().actionGet();
        refresh();
    }
    String[] values = { "brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the" };
    int[] freq = { 1, 1, 1, 1, 1, 1, 1, 2 };
    int[][] pos = { { 2 }, { 8 }, { 3 }, { 4 }, { 7 }, { 5 }, { 1 }, { 0, 6 } };
    int[][] startOffset = { { 10 }, { 40 }, { 16 }, { 20 }, { 35 }, { 26 }, { 4 }, { 0, 31 } };
    int[][] endOffset = { { 15 }, { 43 }, { 19 }, { 25 }, { 39 }, { 30 }, { 9 }, { 3, 34 } };
    for (int i = 0; i < 10; i++) {
        TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i))
                .setPayloads(true).setOffsets(true).setPositions(true).setSelectedFields();
        TermVectorResponse response = resp.execute().actionGet();
        assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
        Fields fields = response.getFields();
        assertThat(fields.size(), equalTo(1));
        Terms terms = fields.terms("field");
        assertThat(terms.size(), equalTo(8l));
        TermsEnum iterator = terms.iterator(null);
        for (int j = 0; j < values.length; j++) {
            String string = values[j];
            BytesRef next = iterator.next();
            assertThat(next, Matchers.notNullValue());
            assertThat("expected " + string, string, equalTo(next.utf8ToString()));
            assertThat(next, Matchers.notNullValue());
            // do not test ttf or doc frequency, because here we have many
            // shards and do not know how documents are distributed
            DocsAndPositionsEnum docsAndPositions = iterator.docsAndPositions(null, null);
            assertThat(docsAndPositions.nextDoc(), equalTo(0));
            assertThat(freq[j], equalTo(docsAndPositions.freq()));
            int[] termPos = pos[j];
            int[] termStartOffset = startOffset[j];
            int[] termEndOffset = endOffset[j];
            assertThat(termPos.length, equalTo(freq[j]));
            assertThat(termStartOffset.length, equalTo(freq[j]));
            assertThat(termEndOffset.length, equalTo(freq[j]));
            for (int k = 0; k < freq[j]; k++) {
                int nextPosition = docsAndPositions.nextPosition();
                assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
                assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
                assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
                assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
            }
        }
        assertThat(iterator.next(), Matchers.nullValue());
    }
}

From source file:org.elasticsearch.termvectors.GetTermVectorTests.java

License:Apache License

@Test
public void testRandomSingleTermVectors() throws ElasticSearchException, IOException {
    FieldType ft = new FieldType();
    int config = randomInt(6);
    boolean storePositions = false;
    boolean storeOffsets = false;
    boolean storePayloads = false;
    boolean storeTermVectors = false;
    switch (config) {
    case 0: {//from   ww w  .j  av a 2s. c om
        // do nothing
    }
    case 1: {
        storeTermVectors = true;
    }
    case 2: {
        storeTermVectors = true;
        storePositions = true;
    }
    case 3: {
        storeTermVectors = true;
        storeOffsets = true;
    }
    case 4: {
        storeTermVectors = true;
        storePositions = true;
        storeOffsets = true;
    }
    case 5: {
        storeTermVectors = true;
        storePositions = true;
        storePayloads = true;
    }
    case 6: {
        storeTermVectors = true;
        storePositions = true;
        storeOffsets = true;
        storePayloads = true;
    }
    }
    ft.setStoreTermVectors(storeTermVectors);
    ft.setStoreTermVectorOffsets(storeOffsets);
    ft.setStoreTermVectorPayloads(storePayloads);
    ft.setStoreTermVectorPositions(storePositions);

    String optionString = AbstractFieldMapper.termVectorOptionsToString(ft);
    XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
            .startObject("properties").startObject("field").field("type", "string")
            .field("term_vector", optionString).field("analyzer", "tv_test").endObject().endObject().endObject()
            .endObject();
    ElasticSearchAssertions.assertAcked(prepareCreate("test").addMapping("type1", mapping)
            .setSettings(ImmutableSettings.settingsBuilder()
                    .put("index.analysis.analyzer.tv_test.tokenizer", "whitespace")
                    .putArray("index.analysis.analyzer.tv_test.filter", "type_as_payload", "lowercase")));
    ensureYellow();
    for (int i = 0; i < 10; i++) {
        client().prepareIndex("test", "type1", Integer.toString(i))
                .setSource(XContentFactory.jsonBuilder().startObject()
                        .field("field", "the quick brown fox jumps over the lazy dog")
                        // 0the3 4quick9 10brown15 16fox19 20jumps25 26over30
                        // 31the34 35lazy39 40dog43
                        .endObject())
                .execute().actionGet();
        refresh();
    }
    String[] values = { "brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the" };
    int[] freq = { 1, 1, 1, 1, 1, 1, 1, 2 };
    int[][] pos = { { 2 }, { 8 }, { 3 }, { 4 }, { 7 }, { 5 }, { 1 }, { 0, 6 } };
    int[][] startOffset = { { 10 }, { 40 }, { 16 }, { 20 }, { 35 }, { 26 }, { 4 }, { 0, 31 } };
    int[][] endOffset = { { 15 }, { 43 }, { 19 }, { 25 }, { 39 }, { 30 }, { 9 }, { 3, 34 } };

    boolean isPayloadRequested = randomBoolean();
    boolean isOffsetRequested = randomBoolean();
    boolean isPositionsRequested = randomBoolean();
    String infoString = createInfoString(isPositionsRequested, isOffsetRequested, isPayloadRequested,
            optionString);
    for (int i = 0; i < 10; i++) {
        TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i))
                .setPayloads(isPayloadRequested).setOffsets(isOffsetRequested)
                .setPositions(isPositionsRequested).setSelectedFields();
        TermVectorResponse response = resp.execute().actionGet();
        assertThat(infoString + "doc id: " + i + " doesn't exists but should", response.isExists(),
                equalTo(true));
        Fields fields = response.getFields();
        assertThat(fields.size(), equalTo(ft.storeTermVectors() ? 1 : 0));
        if (ft.storeTermVectors()) {
            Terms terms = fields.terms("field");
            assertThat(terms.size(), equalTo(8l));
            TermsEnum iterator = terms.iterator(null);
            for (int j = 0; j < values.length; j++) {
                String string = values[j];
                BytesRef next = iterator.next();
                assertThat(infoString, next, Matchers.notNullValue());
                assertThat(infoString + "expected " + string, string, equalTo(next.utf8ToString()));
                assertThat(infoString, next, Matchers.notNullValue());
                // do not test ttf or doc frequency, because here we have
                // many shards and do not know how documents are distributed
                DocsAndPositionsEnum docsAndPositions = iterator.docsAndPositions(null, null);
                // docs and pos only returns something if positions or
                // payloads or offsets are stored / requestd Otherwise use
                // DocsEnum?
                assertThat(infoString, docsAndPositions.nextDoc(), equalTo(0));
                assertThat(infoString, freq[j], equalTo(docsAndPositions.freq()));
                int[] termPos = pos[j];
                int[] termStartOffset = startOffset[j];
                int[] termEndOffset = endOffset[j];
                if (isPositionsRequested && storePositions) {
                    assertThat(infoString, termPos.length, equalTo(freq[j]));
                }
                if (isOffsetRequested && storeOffsets) {
                    assertThat(termStartOffset.length, equalTo(freq[j]));
                    assertThat(termEndOffset.length, equalTo(freq[j]));
                }
                for (int k = 0; k < freq[j]; k++) {
                    int nextPosition = docsAndPositions.nextPosition();
                    // only return something useful if requested and stored
                    if (isPositionsRequested && storePositions) {
                        assertThat(infoString + "positions for term: " + string, nextPosition,
                                equalTo(termPos[k]));
                    } else {
                        assertThat(infoString + "positions for term: ", nextPosition, equalTo(-1));
                    }

                    // only return something useful if requested and stored
                    if (isPayloadRequested && storePayloads) {
                        assertThat(infoString + "payloads for term: " + string, docsAndPositions.getPayload(),
                                equalTo(new BytesRef("word")));
                    } else {
                        assertThat(infoString + "payloads for term: " + string, docsAndPositions.getPayload(),
                                equalTo(null));
                    }
                    // only return something useful if requested and stored
                    if (isOffsetRequested && storeOffsets) {

                        assertThat(infoString + "startOffsets term: " + string, docsAndPositions.startOffset(),
                                equalTo(termStartOffset[k]));
                        assertThat(infoString + "endOffsets term: " + string, docsAndPositions.endOffset(),
                                equalTo(termEndOffset[k]));
                    } else {
                        assertThat(infoString + "startOffsets term: " + string, docsAndPositions.startOffset(),
                                equalTo(-1));
                        assertThat(infoString + "endOffsets term: " + string, docsAndPositions.endOffset(),
                                equalTo(-1));
                    }

                }
            }
            assertThat(iterator.next(), Matchers.nullValue());
        }

    }
}

From source file:org.elasticsearch.test.integration.search.sort.SimpleSortTests.java

License:Apache License

public void testRandomSorting()
        throws ElasticSearchException, IOException, InterruptedException, ExecutionException {
    int numberOfShards = between(1, 10);
    Random random = getRandom();//w  w  w .j  ava 2 s  .  com
    prepareCreate("test")
            .setSettings(randomSettingsBuilder().put("index.number_of_shards", numberOfShards)
                    .put("index.number_of_replicas", 0))
            .addMapping("type",
                    XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties")
                            .startObject("sparse_bytes").field("type", "string").field("index", "not_analyzed")
                            .endObject().startObject("dense_bytes").field("type", "string")
                            .field("index", "not_analyzed").endObject().endObject().endObject().endObject())
            .execute().actionGet();
    ensureGreen();

    TreeMap<BytesRef, String> sparseBytes = new TreeMap<BytesRef, String>();
    TreeMap<BytesRef, String> denseBytes = new TreeMap<BytesRef, String>();
    int numDocs = atLeast(200);
    IndexRequestBuilder[] builders = new IndexRequestBuilder[numDocs];
    for (int i = 0; i < numDocs; i++) {
        String docId = Integer.toString(i);
        BytesRef ref = null;
        do {
            ref = new BytesRef(_TestUtil.randomRealisticUnicodeString(random));
        } while (denseBytes.containsKey(ref));
        denseBytes.put(ref, docId);
        XContentBuilder src = jsonBuilder().startObject().field("dense_bytes", ref.utf8ToString());
        if (rarely()) {
            src.field("sparse_bytes", ref.utf8ToString());
            sparseBytes.put(ref, docId);
        }
        src.endObject();
        builders[i] = client().prepareIndex("test", "type", docId).setSource(src);
    }
    indexRandom("test", true, builders);
    {
        int size = between(1, denseBytes.size());
        SearchResponse searchResponse = client().prepareSearch("test").setQuery(matchAllQuery()).setSize(size)
                .addSort("dense_bytes", SortOrder.ASC).execute().actionGet();
        assertNoFailures(searchResponse);
        assertThat(searchResponse.getHits().getTotalHits(), equalTo((long) numDocs));
        assertThat(searchResponse.getHits().hits().length, equalTo(size));
        Set<Entry<BytesRef, String>> entrySet = denseBytes.entrySet();
        Iterator<Entry<BytesRef, String>> iterator = entrySet.iterator();
        for (int i = 0; i < size; i++) {
            assertThat(iterator.hasNext(), equalTo(true));
            Entry<BytesRef, String> next = iterator.next();
            assertThat("pos: " + i, searchResponse.getHits().getAt(i).id(), equalTo(next.getValue()));
            assertThat(searchResponse.getHits().getAt(i).sortValues()[0].toString(),
                    equalTo(next.getKey().utf8ToString()));
        }
    }
    if (!sparseBytes.isEmpty()) {
        int size = between(1, sparseBytes.size());
        SearchResponse searchResponse = client().prepareSearch().setQuery(matchAllQuery())
                .setFilter(FilterBuilders.existsFilter("sparse_bytes")).setSize(size)
                .addSort("sparse_bytes", SortOrder.ASC).execute().actionGet();
        assertNoFailures(searchResponse);
        assertThat(searchResponse.getHits().getTotalHits(), equalTo((long) sparseBytes.size()));
        assertThat(searchResponse.getHits().hits().length, equalTo(size));
        Set<Entry<BytesRef, String>> entrySet = sparseBytes.entrySet();
        Iterator<Entry<BytesRef, String>> iterator = entrySet.iterator();
        for (int i = 0; i < size; i++) {
            assertThat(iterator.hasNext(), equalTo(true));
            Entry<BytesRef, String> next = iterator.next();
            assertThat(searchResponse.getHits().getAt(i).id(), equalTo(next.getValue()));
            assertThat(searchResponse.getHits().getAt(i).sortValues()[0].toString(),
                    equalTo(next.getKey().utf8ToString()));
        }
    }
}

From source file:org.elasticsearch.test.integration.termvectors.GetTermVectorTests.java

License:Apache License

@Test
public void testSimpleTermVectors() throws ElasticSearchException, IOException {

    run(addMapping(prepareCreate("test"), "type1",
            new Object[] { "field", "type", "string", "term_vector", "with_positions_offsets_payloads",
                    "analyzer", "tv_test" })
                            .setSettings(ImmutableSettings.settingsBuilder()
                                    .put("index.analysis.analyzer.tv_test.tokenizer", "whitespace")
                                    .putArray("index.analysis.analyzer.tv_test.filter", "type_as_payload",
                                            "lowercase")));
    ensureYellow();//from   ww w .  j a  v  a  2 s.  c  o  m
    for (int i = 0; i < 10; i++) {
        client().prepareIndex("test", "type1", Integer.toString(i))
                .setSource(XContentFactory.jsonBuilder().startObject()
                        .field("field", "the quick brown fox jumps over the lazy dog")
                        // 0the3 4quick9 10brown15 16fox19 20jumps25 26over30
                        // 31the34 35lazy39 40dog43
                        .endObject())
                .execute().actionGet();
        refresh();
    }
    String[] values = { "brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the" };
    int[] freq = { 1, 1, 1, 1, 1, 1, 1, 2 };
    int[][] pos = { { 2 }, { 8 }, { 3 }, { 4 }, { 7 }, { 5 }, { 1 }, { 0, 6 } };
    int[][] startOffset = { { 10 }, { 40 }, { 16 }, { 20 }, { 35 }, { 26 }, { 4 }, { 0, 31 } };
    int[][] endOffset = { { 15 }, { 43 }, { 19 }, { 25 }, { 39 }, { 30 }, { 9 }, { 3, 34 } };
    for (int i = 0; i < 10; i++) {
        TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i))
                .setPayloads(true).setOffsets(true).setPositions(true).setSelectedFields();
        TermVectorResponse response = resp.execute().actionGet();
        assertThat("doc id: " + i + " doesn't exists but should", response.documentExists(), equalTo(true));
        Fields fields = response.getFields();
        assertThat(fields.size(), equalTo(1));
        Terms terms = fields.terms("field");
        assertThat(terms.size(), equalTo(8l));
        TermsEnum iterator = terms.iterator(null);
        for (int j = 0; j < values.length; j++) {
            String string = values[j];
            BytesRef next = iterator.next();
            assertThat(next, Matchers.notNullValue());
            assertThat("expected " + string, string, equalTo(next.utf8ToString()));
            assertThat(next, Matchers.notNullValue());
            // do not test ttf or doc frequency, because here we have many
            // shards and do not know how documents are distributed
            DocsAndPositionsEnum docsAndPositions = iterator.docsAndPositions(null, null);
            assertThat(docsAndPositions.nextDoc(), equalTo(0));
            assertThat(freq[j], equalTo(docsAndPositions.freq()));
            int[] termPos = pos[j];
            int[] termStartOffset = startOffset[j];
            int[] termEndOffset = endOffset[j];
            assertThat(termPos.length, equalTo(freq[j]));
            assertThat(termStartOffset.length, equalTo(freq[j]));
            assertThat(termEndOffset.length, equalTo(freq[j]));
            for (int k = 0; k < freq[j]; k++) {
                int nextPosition = docsAndPositions.nextPosition();
                assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
                assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
                assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
                assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
            }
        }
        assertThat(iterator.next(), Matchers.nullValue());
    }
}