Example usage for org.apache.lucene.index Fields terms

List of usage examples for org.apache.lucene.index Fields terms

Introduction

In this page you can find the example usage for org.apache.lucene.index Fields terms.

Prototype

public abstract Terms terms(String field) throws IOException;

Source Link

Document

Get the Terms for this field.

Usage

From source file:org.elasticsearch.action.termvectors.GetTermVectorsTests.java

License:Apache License

@Test
public void testRandomPayloadWithDelimitedPayloadTokenFilter() throws ElasticsearchException, IOException {
    //create the test document
    int encoding = randomIntBetween(0, 2);
    String encodingString = "";
    if (encoding == 0) {
        encodingString = "float";
    }/*from   w w w  .j  a va 2s  .c  om*/
    if (encoding == 1) {
        encodingString = "int";
    }
    if (encoding == 2) {
        encodingString = "identity";
    }
    String[] tokens = crateRandomTokens();
    Map<String, List<BytesRef>> payloads = createPayloads(tokens, encoding);
    String delimiter = createRandomDelimiter(tokens);
    String queryString = createString(tokens, payloads, encoding, delimiter.charAt(0));
    //create the mapping
    XContentBuilder mapping = jsonBuilder().startObject().startObject("type1").startObject("properties")
            .startObject("field").field("type", "string")
            .field("term_vector", "with_positions_offsets_payloads").field("analyzer", "payload_test")
            .endObject().endObject().endObject().endObject();
    assertAcked(prepareCreate("test").addMapping("type1", mapping).setSettings(settingsBuilder()
            .put(indexSettings()).put("index.analysis.analyzer.payload_test.tokenizer", "whitespace")
            .putArray("index.analysis.analyzer.payload_test.filter", "my_delimited_payload_filter")
            .put("index.analysis.filter.my_delimited_payload_filter.delimiter", delimiter)
            .put("index.analysis.filter.my_delimited_payload_filter.encoding", encodingString)
            .put("index.analysis.filter.my_delimited_payload_filter.type", "delimited_payload_filter")));
    ensureYellow();

    client().prepareIndex("test", "type1", Integer.toString(1))
            .setSource(jsonBuilder().startObject().field("field", queryString).endObject()).execute()
            .actionGet();
    refresh();
    TermVectorsRequestBuilder resp = client().prepareTermVectors("test", "type1", Integer.toString(1))
            .setPayloads(true).setOffsets(true).setPositions(true).setSelectedFields();
    TermVectorsResponse response = resp.execute().actionGet();
    assertThat("doc id 1 doesn't exists but should", response.isExists(), equalTo(true));
    Fields fields = response.getFields();
    assertThat(fields.size(), equalTo(1));
    Terms terms = fields.terms("field");
    TermsEnum iterator = terms.iterator();
    while (iterator.next() != null) {
        String term = iterator.term().utf8ToString();
        PostingsEnum docsAndPositions = iterator.postings(null, null, PostingsEnum.ALL);
        assertThat(docsAndPositions.nextDoc(), equalTo(0));
        List<BytesRef> curPayloads = payloads.get(term);
        assertThat(term, curPayloads, notNullValue());
        assertNotNull(docsAndPositions);
        for (int k = 0; k < docsAndPositions.freq(); k++) {
            docsAndPositions.nextPosition();
            if (docsAndPositions.getPayload() != null) {
                String infoString = "\nterm: " + term + " has payload \n"
                        + docsAndPositions.getPayload().toString() + "\n but should have payload \n"
                        + curPayloads.get(k).toString();
                assertThat(infoString, docsAndPositions.getPayload(), equalTo(curPayloads.get(k)));
            } else {
                String infoString = "\nterm: " + term + " has no payload but should have payload \n"
                        + curPayloads.get(k).toString();
                assertThat(infoString, curPayloads.get(k).length, equalTo(0));
            }
        }
    }
    assertThat(iterator.next(), nullValue());
}

From source file:org.elasticsearch.action.termvectors.GetTermVectorsTests.java

License:Apache License

private void checkBrownFoxTermVector(Fields fields, String fieldName, boolean withPayloads)
        throws ElasticsearchException, IOException {
    String[] values = { "brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the" };
    int[] freq = { 1, 1, 1, 1, 1, 1, 1, 2 };
    int[][] pos = { { 2 }, { 8 }, { 3 }, { 4 }, { 7 }, { 5 }, { 1 }, { 0, 6 } };
    int[][] startOffset = { { 10 }, { 40 }, { 16 }, { 20 }, { 35 }, { 26 }, { 4 }, { 0, 31 } };
    int[][] endOffset = { { 15 }, { 43 }, { 19 }, { 25 }, { 39 }, { 30 }, { 9 }, { 3, 34 } };

    Terms terms = fields.terms(fieldName);
    assertThat(terms.size(), equalTo(8l));
    TermsEnum iterator = terms.iterator();
    for (int j = 0; j < values.length; j++) {
        String string = values[j];
        BytesRef next = iterator.next();
        assertThat(next, notNullValue());
        assertThat("expected " + string, string, equalTo(next.utf8ToString()));
        assertThat(next, notNullValue());
        // do not test ttf or doc frequency, because here we have many
        // shards and do not know how documents are distributed
        PostingsEnum docsAndPositions = iterator.postings(null, null, PostingsEnum.ALL);
        assertThat(docsAndPositions.nextDoc(), equalTo(0));
        assertThat(freq[j], equalTo(docsAndPositions.freq()));
        int[] termPos = pos[j];
        int[] termStartOffset = startOffset[j];
        int[] termEndOffset = endOffset[j];
        assertThat(termPos.length, equalTo(freq[j]));
        assertThat(termStartOffset.length, equalTo(freq[j]));
        assertThat(termEndOffset.length, equalTo(freq[j]));
        for (int k = 0; k < freq[j]; k++) {
            int nextPosition = docsAndPositions.nextPosition();
            assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
            assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
            assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
            if (withPayloads) {
                assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
            }/* www.  ja v a  2 s  .  c  o m*/
        }
    }
    assertThat(iterator.next(), nullValue());
}

From source file:org.elasticsearch.action.termvectors.GetTermVectorsTests.java

License:Apache License

private void compareTermVectors(String fieldName, Fields fields0, Fields fields1) throws IOException {
    Terms terms0 = fields0.terms(fieldName);
    Terms terms1 = fields1.terms(fieldName);
    assertThat(terms0, notNullValue());//from ww w.  jav  a2  s  . com
    assertThat(terms1, notNullValue());
    assertThat(terms0.size(), equalTo(terms1.size()));

    TermsEnum iter0 = terms0.iterator();
    TermsEnum iter1 = terms1.iterator();
    for (int i = 0; i < terms0.size(); i++) {
        BytesRef next0 = iter0.next();
        assertThat(next0, notNullValue());
        BytesRef next1 = iter1.next();
        assertThat(next1, notNullValue());

        // compare field value
        String string0 = next0.utf8ToString();
        String string1 = next1.utf8ToString();
        assertThat("expected: " + string0, string0, equalTo(string1));

        // compare df and ttf
        assertThat("term: " + string0, iter0.docFreq(), equalTo(iter1.docFreq()));
        assertThat("term: " + string0, iter0.totalTermFreq(), equalTo(iter1.totalTermFreq()));

        // compare freq and docs
        PostingsEnum docsAndPositions0 = iter0.postings(null, null, PostingsEnum.ALL);
        PostingsEnum docsAndPositions1 = iter1.postings(null, null, PostingsEnum.ALL);
        assertThat("term: " + string0, docsAndPositions0.nextDoc(), equalTo(docsAndPositions1.nextDoc()));
        assertThat("term: " + string0, docsAndPositions0.freq(), equalTo(docsAndPositions1.freq()));

        // compare position, start offsets and end offsets
        for (int j = 0; j < docsAndPositions0.freq(); j++) {
            assertThat("term: " + string0, docsAndPositions0.nextPosition(),
                    equalTo(docsAndPositions1.nextPosition()));
            assertThat("term: " + string0, docsAndPositions0.startOffset(),
                    equalTo(docsAndPositions1.startOffset()));
            assertThat("term: " + string0, docsAndPositions0.endOffset(),
                    equalTo(docsAndPositions1.endOffset()));
        }
    }
    assertThat(iter0.next(), nullValue());
    assertThat(iter1.next(), nullValue());
}

From source file:org.elasticsearch.action.termvectors.TermVectorsResponse.java

License:Apache License

private void buildField(XContentBuilder builder, final CharsRefBuilder spare, Fields theFields,
        Iterator<String> fieldIter) throws IOException {
    String fieldName = fieldIter.next();
    builder.startObject(fieldName);/* w  w w. j  ava 2  s  .  c  om*/
    Terms curTerms = theFields.terms(fieldName);
    // write field statistics
    buildFieldStatistics(builder, curTerms);
    builder.startObject(FieldStrings.TERMS);
    TermsEnum termIter = curTerms.iterator();
    BoostAttribute boostAtt = termIter.attributes().addAttribute(BoostAttribute.class);
    for (int i = 0; i < curTerms.size(); i++) {
        buildTerm(builder, spare, curTerms, termIter, boostAtt);
    }
    builder.endObject();
    builder.endObject();
}

From source file:org.elasticsearch.action.termvectors.TermVectorsUnitTests.java

License:Apache License

private void checkIfStandardTermVector(TermVectorsResponse inResponse) throws IOException {

    Fields fields = inResponse.getFields();
    assertThat(fields.terms("title"), Matchers.notNullValue());
    assertThat(fields.terms("desc"), Matchers.notNullValue());
    assertThat(fields.size(), equalTo(2));
}

From source file:org.elasticsearch.action.termvectors.TermVectorsWriter.java

License:Apache License

void setFields(Fields termVectorsByField, Set<String> selectedFields, EnumSet<Flag> flags,
        Fields topLevelFields, @Nullable AggregatedDfs dfs, @Nullable TermVectorsFilter termVectorsFilter)
        throws IOException {
    int numFieldsWritten = 0;
    PostingsEnum docsAndPosEnum = null;//from  ww  w  .  ja  v  a2s .  c  o  m
    PostingsEnum docsEnum = null;
    boolean hasScores = termVectorsFilter != null;

    for (String field : termVectorsByField) {
        if ((selectedFields != null) && (!selectedFields.contains(field))) {
            continue;
        }

        Terms fieldTermVector = termVectorsByField.terms(field);
        Terms topLevelTerms = topLevelFields.terms(field);

        // if no terms found, take the retrieved term vector fields for stats
        if (topLevelTerms == null) {
            topLevelTerms = fieldTermVector;
        }

        TermsEnum topLevelIterator = topLevelTerms.iterator();
        boolean positions = flags.contains(Flag.Positions) && fieldTermVector.hasPositions();
        boolean offsets = flags.contains(Flag.Offsets) && fieldTermVector.hasOffsets();
        boolean payloads = flags.contains(Flag.Payloads) && fieldTermVector.hasPayloads();

        long termsSize = fieldTermVector.size();
        if (hasScores) {
            termsSize = Math.min(termsSize, termVectorsFilter.size(field));
        }
        startField(field, termsSize, positions, offsets, payloads);

        if (flags.contains(Flag.FieldStatistics)) {
            if (dfs != null) {
                writeFieldStatistics(dfs.fieldStatistics().get(field));
            } else {
                writeFieldStatistics(topLevelTerms);
            }
        }
        TermsEnum iterator = fieldTermVector.iterator();
        final boolean useDocsAndPos = positions || offsets || payloads;
        while (iterator.next() != null) { // iterate all terms of the current field
            BytesRef termBytesRef = iterator.term();
            Term term = new Term(field, termBytesRef);

            // with filtering we only keep the best terms
            if (hasScores && !termVectorsFilter.hasScoreTerm(term)) {
                continue;
            }

            startTerm(termBytesRef);
            if (flags.contains(Flag.TermStatistics)) {
                // get the doc frequency
                if (dfs != null) {
                    final TermStatistics statistics = dfs.termStatistics().get(term);
                    writeTermStatistics(
                            statistics == null ? new TermStatistics(termBytesRef, 0, 0) : statistics);
                } else {
                    boolean foundTerm = topLevelIterator.seekExact(termBytesRef);
                    if (foundTerm) {
                        writeTermStatistics(topLevelIterator);
                    } else {
                        writeTermStatistics(new TermStatistics(termBytesRef, 0, 0));
                    }
                }
            }
            if (useDocsAndPos) {
                // given we have pos or offsets
                docsAndPosEnum = writeTermWithDocsAndPos(iterator, docsAndPosEnum, positions, offsets,
                        payloads);
            } else {
                // if we do not have the positions stored, we need to
                // get the frequency from a PostingsEnum.
                docsEnum = writeTermWithDocsOnly(iterator, docsEnum);
            }
            if (hasScores) {
                writeScoreTerm(termVectorsFilter.getScoreTerm(term));
            }
        }
        numFieldsWritten++;
    }
    response.setTermVectorsField(output);
    response.setHeader(writeHeader(numFieldsWritten, flags.contains(Flag.TermStatistics),
            flags.contains(Flag.FieldStatistics), hasScores));
}

From source file:org.elasticsearch.action.termwalker.TransportTermwalkerAction.java

License:Apache License

@Override
protected ShardTermwalkerResponse shardOperation(ShardTermwalkerRequest request) throws ElasticSearchException {
    synchronized (mutex) {
        try {/*from  w ww.  j  a v  a2  s.  c o  m*/
            Map<String, Object> response = new HashMap();
            IndexService indexService = indicesService.indexServiceSafe(request.index());
            InternalIndexShard indexShard = (InternalIndexShard) indexService.shardSafe(request.shardId());
            Store store = indexShard.store();
            IndexReader reader = indexShard.searcher().reader();

            Integer termCount = 0;
            Long totalCount = 0L;
            List termList = new ArrayList();
            Fields fields = MultiFields.getFields(reader);
            Terms terms = fields.terms("_all");

            Boolean includeDF = request.includeDF();
            Boolean includeTTF = request.includeTTF();

            logger.info("termwalker:" + " shard: " + request.shardId() + " df: " + includeDF + " ttf: "
                    + includeTTF);

            if (terms != null) {
                TermsEnum iterator = terms.iterator(null);

                for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
                    Integer df = iterator.docFreq();
                    Long ttf = iterator.totalTermFreq();

                    termCount += 1;
                    totalCount += ttf;

                    if ((includeDF || includeTTF) && df > 1) {
                        Map tiMap = new HashMap();
                        tiMap.put("text", term.utf8ToString());
                        if (includeDF) {
                            tiMap.put("df", df);
                        }
                        if (includeTTF) {
                            tiMap.put("ttf", ttf);
                        }
                        termList.add(tiMap);
                    }
                }
            } else {
                logger.error("Terms for _all is null.");
            }
            response.put("terms", termList);
            response.put("num_docs", reader.numDocs());
            response.put("num_terms", termCount);
            response.put("total_terms", totalCount);

            return new ShardTermwalkerResponse(request.index(), request.shardId()).setResponse(response);
        } catch (IOException ex) {
            throw new ElasticSearchException(ex.getMessage(), ex);
        }
    }
}

From source file:org.elasticsearch.bwcompat.BasicBackwardsCompatibilityIT.java

License:Apache License

public void testGetTermVector() throws IOException {
    createIndexWithAlias();//from  w  w  w  .ja v  a2  s .c  o m
    assertAcked(client().admin().indices().preparePutMapping("test").setType("type1")
            .setSource("field", "type=string,term_vector=with_positions_offsets_payloads").get());
    ensureYellow("test");

    client().prepareIndex(indexOrAlias(), "type1", "1")
            .setSource("field", "the quick brown fox jumps over the lazy dog").get();
    refresh();

    TermVectorsResponse termVectorsResponse = client().prepareTermVectors(indexOrAlias(), "type1", "1").get();
    assertThat(termVectorsResponse.getIndex(), equalTo("test"));
    assertThat(termVectorsResponse.isExists(), equalTo(true));
    Fields fields = termVectorsResponse.getFields();
    assertThat(fields.size(), equalTo(1));
    assertThat(fields.terms("field").size(), equalTo(8l));
}

From source file:org.elasticsearch.bwcompat.BasicBackwardsCompatibilityTest.java

License:Apache License

@Test
public void testGetTermVector() throws IOException {
    createIndexWithAlias();/*from   w w  w . j av  a2s .c  o  m*/
    assertAcked(client().admin().indices().preparePutMapping("test").setType("type1")
            .setSource("field", "type=string,term_vector=with_positions_offsets_payloads").get());
    ensureYellow("test");

    client().prepareIndex(indexOrAlias(), "type1", "1")
            .setSource("field", "the quick brown fox jumps over the lazy dog").get();
    refresh();

    TermVectorResponse termVectorResponse = client().prepareTermVector(indexOrAlias(), "type1", "1").get();
    assertThat(termVectorResponse.getIndex(), equalTo("test"));
    assertThat(termVectorResponse.isExists(), equalTo(true));
    Fields fields = termVectorResponse.getFields();
    assertThat(fields.size(), equalTo(1));
    assertThat(fields.terms("field").size(), equalTo(8l));
}

From source file:org.elasticsearch.common.lucene.search.XMoreLikeThis.java

License:Apache License

/**
 * Return a query that will return docs like the passed Fields.
 *
 * @return a query that will return docs like the passed Fields.
 *//*from  www  .j  av  a2s.c o  m*/
public Query like(Fields... likeFields) throws IOException {
    // get all field names
    Set<String> fieldNames = new HashSet<>();
    for (Fields fields : likeFields) {
        for (String fieldName : fields) {
            fieldNames.add(fieldName);
        }
    }
    // term selection is per field, then appended to a single boolean query
    BooleanQuery bq = new BooleanQuery();
    for (String fieldName : fieldNames) {
        Map<String, Int> termFreqMap = new HashMap<>();
        for (Fields fields : likeFields) {
            Terms vector = fields.terms(fieldName);
            if (vector != null) {
                addTermFrequencies(termFreqMap, vector, fieldName);
            }
        }
        addToQuery(createQueue(termFreqMap, fieldName), bq);
    }
    return bq;
}