List of usage examples for org.apache.lucene.index Fields terms
public abstract Terms terms(String field) throws IOException;
From source file:org.elasticsearch.action.termvectors.GetTermVectorsTests.java
License:Apache License
@Test public void testRandomPayloadWithDelimitedPayloadTokenFilter() throws ElasticsearchException, IOException { //create the test document int encoding = randomIntBetween(0, 2); String encodingString = ""; if (encoding == 0) { encodingString = "float"; }/*from w w w .j a va 2s .c om*/ if (encoding == 1) { encodingString = "int"; } if (encoding == 2) { encodingString = "identity"; } String[] tokens = crateRandomTokens(); Map<String, List<BytesRef>> payloads = createPayloads(tokens, encoding); String delimiter = createRandomDelimiter(tokens); String queryString = createString(tokens, payloads, encoding, delimiter.charAt(0)); //create the mapping XContentBuilder mapping = jsonBuilder().startObject().startObject("type1").startObject("properties") .startObject("field").field("type", "string") .field("term_vector", "with_positions_offsets_payloads").field("analyzer", "payload_test") .endObject().endObject().endObject().endObject(); assertAcked(prepareCreate("test").addMapping("type1", mapping).setSettings(settingsBuilder() .put(indexSettings()).put("index.analysis.analyzer.payload_test.tokenizer", "whitespace") .putArray("index.analysis.analyzer.payload_test.filter", "my_delimited_payload_filter") .put("index.analysis.filter.my_delimited_payload_filter.delimiter", delimiter) .put("index.analysis.filter.my_delimited_payload_filter.encoding", encodingString) .put("index.analysis.filter.my_delimited_payload_filter.type", "delimited_payload_filter"))); ensureYellow(); client().prepareIndex("test", "type1", Integer.toString(1)) .setSource(jsonBuilder().startObject().field("field", queryString).endObject()).execute() .actionGet(); refresh(); TermVectorsRequestBuilder resp = client().prepareTermVectors("test", "type1", Integer.toString(1)) .setPayloads(true).setOffsets(true).setPositions(true).setSelectedFields(); TermVectorsResponse response = resp.execute().actionGet(); assertThat("doc id 1 doesn't exists but should", response.isExists(), equalTo(true)); Fields fields = response.getFields(); assertThat(fields.size(), equalTo(1)); Terms terms = fields.terms("field"); TermsEnum iterator = terms.iterator(); while (iterator.next() != null) { String term = iterator.term().utf8ToString(); PostingsEnum docsAndPositions = iterator.postings(null, null, PostingsEnum.ALL); assertThat(docsAndPositions.nextDoc(), equalTo(0)); List<BytesRef> curPayloads = payloads.get(term); assertThat(term, curPayloads, notNullValue()); assertNotNull(docsAndPositions); for (int k = 0; k < docsAndPositions.freq(); k++) { docsAndPositions.nextPosition(); if (docsAndPositions.getPayload() != null) { String infoString = "\nterm: " + term + " has payload \n" + docsAndPositions.getPayload().toString() + "\n but should have payload \n" + curPayloads.get(k).toString(); assertThat(infoString, docsAndPositions.getPayload(), equalTo(curPayloads.get(k))); } else { String infoString = "\nterm: " + term + " has no payload but should have payload \n" + curPayloads.get(k).toString(); assertThat(infoString, curPayloads.get(k).length, equalTo(0)); } } } assertThat(iterator.next(), nullValue()); }
From source file:org.elasticsearch.action.termvectors.GetTermVectorsTests.java
License:Apache License
private void checkBrownFoxTermVector(Fields fields, String fieldName, boolean withPayloads) throws ElasticsearchException, IOException { String[] values = { "brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the" }; int[] freq = { 1, 1, 1, 1, 1, 1, 1, 2 }; int[][] pos = { { 2 }, { 8 }, { 3 }, { 4 }, { 7 }, { 5 }, { 1 }, { 0, 6 } }; int[][] startOffset = { { 10 }, { 40 }, { 16 }, { 20 }, { 35 }, { 26 }, { 4 }, { 0, 31 } }; int[][] endOffset = { { 15 }, { 43 }, { 19 }, { 25 }, { 39 }, { 30 }, { 9 }, { 3, 34 } }; Terms terms = fields.terms(fieldName); assertThat(terms.size(), equalTo(8l)); TermsEnum iterator = terms.iterator(); for (int j = 0; j < values.length; j++) { String string = values[j]; BytesRef next = iterator.next(); assertThat(next, notNullValue()); assertThat("expected " + string, string, equalTo(next.utf8ToString())); assertThat(next, notNullValue()); // do not test ttf or doc frequency, because here we have many // shards and do not know how documents are distributed PostingsEnum docsAndPositions = iterator.postings(null, null, PostingsEnum.ALL); assertThat(docsAndPositions.nextDoc(), equalTo(0)); assertThat(freq[j], equalTo(docsAndPositions.freq())); int[] termPos = pos[j]; int[] termStartOffset = startOffset[j]; int[] termEndOffset = endOffset[j]; assertThat(termPos.length, equalTo(freq[j])); assertThat(termStartOffset.length, equalTo(freq[j])); assertThat(termEndOffset.length, equalTo(freq[j])); for (int k = 0; k < freq[j]; k++) { int nextPosition = docsAndPositions.nextPosition(); assertThat("term: " + string, nextPosition, equalTo(termPos[k])); assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k])); assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k])); if (withPayloads) { assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word"))); }/* www. ja v a 2 s . c o m*/ } } assertThat(iterator.next(), nullValue()); }
From source file:org.elasticsearch.action.termvectors.GetTermVectorsTests.java
License:Apache License
private void compareTermVectors(String fieldName, Fields fields0, Fields fields1) throws IOException { Terms terms0 = fields0.terms(fieldName); Terms terms1 = fields1.terms(fieldName); assertThat(terms0, notNullValue());//from ww w. jav a2 s . com assertThat(terms1, notNullValue()); assertThat(terms0.size(), equalTo(terms1.size())); TermsEnum iter0 = terms0.iterator(); TermsEnum iter1 = terms1.iterator(); for (int i = 0; i < terms0.size(); i++) { BytesRef next0 = iter0.next(); assertThat(next0, notNullValue()); BytesRef next1 = iter1.next(); assertThat(next1, notNullValue()); // compare field value String string0 = next0.utf8ToString(); String string1 = next1.utf8ToString(); assertThat("expected: " + string0, string0, equalTo(string1)); // compare df and ttf assertThat("term: " + string0, iter0.docFreq(), equalTo(iter1.docFreq())); assertThat("term: " + string0, iter0.totalTermFreq(), equalTo(iter1.totalTermFreq())); // compare freq and docs PostingsEnum docsAndPositions0 = iter0.postings(null, null, PostingsEnum.ALL); PostingsEnum docsAndPositions1 = iter1.postings(null, null, PostingsEnum.ALL); assertThat("term: " + string0, docsAndPositions0.nextDoc(), equalTo(docsAndPositions1.nextDoc())); assertThat("term: " + string0, docsAndPositions0.freq(), equalTo(docsAndPositions1.freq())); // compare position, start offsets and end offsets for (int j = 0; j < docsAndPositions0.freq(); j++) { assertThat("term: " + string0, docsAndPositions0.nextPosition(), equalTo(docsAndPositions1.nextPosition())); assertThat("term: " + string0, docsAndPositions0.startOffset(), equalTo(docsAndPositions1.startOffset())); assertThat("term: " + string0, docsAndPositions0.endOffset(), equalTo(docsAndPositions1.endOffset())); } } assertThat(iter0.next(), nullValue()); assertThat(iter1.next(), nullValue()); }
From source file:org.elasticsearch.action.termvectors.TermVectorsResponse.java
License:Apache License
private void buildField(XContentBuilder builder, final CharsRefBuilder spare, Fields theFields, Iterator<String> fieldIter) throws IOException { String fieldName = fieldIter.next(); builder.startObject(fieldName);/* w w w. j ava 2 s . c om*/ Terms curTerms = theFields.terms(fieldName); // write field statistics buildFieldStatistics(builder, curTerms); builder.startObject(FieldStrings.TERMS); TermsEnum termIter = curTerms.iterator(); BoostAttribute boostAtt = termIter.attributes().addAttribute(BoostAttribute.class); for (int i = 0; i < curTerms.size(); i++) { buildTerm(builder, spare, curTerms, termIter, boostAtt); } builder.endObject(); builder.endObject(); }
From source file:org.elasticsearch.action.termvectors.TermVectorsUnitTests.java
License:Apache License
private void checkIfStandardTermVector(TermVectorsResponse inResponse) throws IOException { Fields fields = inResponse.getFields(); assertThat(fields.terms("title"), Matchers.notNullValue()); assertThat(fields.terms("desc"), Matchers.notNullValue()); assertThat(fields.size(), equalTo(2)); }
From source file:org.elasticsearch.action.termvectors.TermVectorsWriter.java
License:Apache License
void setFields(Fields termVectorsByField, Set<String> selectedFields, EnumSet<Flag> flags, Fields topLevelFields, @Nullable AggregatedDfs dfs, @Nullable TermVectorsFilter termVectorsFilter) throws IOException { int numFieldsWritten = 0; PostingsEnum docsAndPosEnum = null;//from ww w . ja v a2s . c o m PostingsEnum docsEnum = null; boolean hasScores = termVectorsFilter != null; for (String field : termVectorsByField) { if ((selectedFields != null) && (!selectedFields.contains(field))) { continue; } Terms fieldTermVector = termVectorsByField.terms(field); Terms topLevelTerms = topLevelFields.terms(field); // if no terms found, take the retrieved term vector fields for stats if (topLevelTerms == null) { topLevelTerms = fieldTermVector; } TermsEnum topLevelIterator = topLevelTerms.iterator(); boolean positions = flags.contains(Flag.Positions) && fieldTermVector.hasPositions(); boolean offsets = flags.contains(Flag.Offsets) && fieldTermVector.hasOffsets(); boolean payloads = flags.contains(Flag.Payloads) && fieldTermVector.hasPayloads(); long termsSize = fieldTermVector.size(); if (hasScores) { termsSize = Math.min(termsSize, termVectorsFilter.size(field)); } startField(field, termsSize, positions, offsets, payloads); if (flags.contains(Flag.FieldStatistics)) { if (dfs != null) { writeFieldStatistics(dfs.fieldStatistics().get(field)); } else { writeFieldStatistics(topLevelTerms); } } TermsEnum iterator = fieldTermVector.iterator(); final boolean useDocsAndPos = positions || offsets || payloads; while (iterator.next() != null) { // iterate all terms of the current field BytesRef termBytesRef = iterator.term(); Term term = new Term(field, termBytesRef); // with filtering we only keep the best terms if (hasScores && !termVectorsFilter.hasScoreTerm(term)) { continue; } startTerm(termBytesRef); if (flags.contains(Flag.TermStatistics)) { // get the doc frequency if (dfs != null) { final TermStatistics statistics = dfs.termStatistics().get(term); writeTermStatistics( statistics == null ? new TermStatistics(termBytesRef, 0, 0) : statistics); } else { boolean foundTerm = topLevelIterator.seekExact(termBytesRef); if (foundTerm) { writeTermStatistics(topLevelIterator); } else { writeTermStatistics(new TermStatistics(termBytesRef, 0, 0)); } } } if (useDocsAndPos) { // given we have pos or offsets docsAndPosEnum = writeTermWithDocsAndPos(iterator, docsAndPosEnum, positions, offsets, payloads); } else { // if we do not have the positions stored, we need to // get the frequency from a PostingsEnum. docsEnum = writeTermWithDocsOnly(iterator, docsEnum); } if (hasScores) { writeScoreTerm(termVectorsFilter.getScoreTerm(term)); } } numFieldsWritten++; } response.setTermVectorsField(output); response.setHeader(writeHeader(numFieldsWritten, flags.contains(Flag.TermStatistics), flags.contains(Flag.FieldStatistics), hasScores)); }
From source file:org.elasticsearch.action.termwalker.TransportTermwalkerAction.java
License:Apache License
@Override protected ShardTermwalkerResponse shardOperation(ShardTermwalkerRequest request) throws ElasticSearchException { synchronized (mutex) { try {/*from w ww. j a v a2 s. c o m*/ Map<String, Object> response = new HashMap(); IndexService indexService = indicesService.indexServiceSafe(request.index()); InternalIndexShard indexShard = (InternalIndexShard) indexService.shardSafe(request.shardId()); Store store = indexShard.store(); IndexReader reader = indexShard.searcher().reader(); Integer termCount = 0; Long totalCount = 0L; List termList = new ArrayList(); Fields fields = MultiFields.getFields(reader); Terms terms = fields.terms("_all"); Boolean includeDF = request.includeDF(); Boolean includeTTF = request.includeTTF(); logger.info("termwalker:" + " shard: " + request.shardId() + " df: " + includeDF + " ttf: " + includeTTF); if (terms != null) { TermsEnum iterator = terms.iterator(null); for (BytesRef term = iterator.next(); term != null; term = iterator.next()) { Integer df = iterator.docFreq(); Long ttf = iterator.totalTermFreq(); termCount += 1; totalCount += ttf; if ((includeDF || includeTTF) && df > 1) { Map tiMap = new HashMap(); tiMap.put("text", term.utf8ToString()); if (includeDF) { tiMap.put("df", df); } if (includeTTF) { tiMap.put("ttf", ttf); } termList.add(tiMap); } } } else { logger.error("Terms for _all is null."); } response.put("terms", termList); response.put("num_docs", reader.numDocs()); response.put("num_terms", termCount); response.put("total_terms", totalCount); return new ShardTermwalkerResponse(request.index(), request.shardId()).setResponse(response); } catch (IOException ex) { throw new ElasticSearchException(ex.getMessage(), ex); } } }
From source file:org.elasticsearch.bwcompat.BasicBackwardsCompatibilityIT.java
License:Apache License
public void testGetTermVector() throws IOException { createIndexWithAlias();//from w w w .ja v a2 s .c o m assertAcked(client().admin().indices().preparePutMapping("test").setType("type1") .setSource("field", "type=string,term_vector=with_positions_offsets_payloads").get()); ensureYellow("test"); client().prepareIndex(indexOrAlias(), "type1", "1") .setSource("field", "the quick brown fox jumps over the lazy dog").get(); refresh(); TermVectorsResponse termVectorsResponse = client().prepareTermVectors(indexOrAlias(), "type1", "1").get(); assertThat(termVectorsResponse.getIndex(), equalTo("test")); assertThat(termVectorsResponse.isExists(), equalTo(true)); Fields fields = termVectorsResponse.getFields(); assertThat(fields.size(), equalTo(1)); assertThat(fields.terms("field").size(), equalTo(8l)); }
From source file:org.elasticsearch.bwcompat.BasicBackwardsCompatibilityTest.java
License:Apache License
@Test public void testGetTermVector() throws IOException { createIndexWithAlias();/*from w w w . j av a2s .c o m*/ assertAcked(client().admin().indices().preparePutMapping("test").setType("type1") .setSource("field", "type=string,term_vector=with_positions_offsets_payloads").get()); ensureYellow("test"); client().prepareIndex(indexOrAlias(), "type1", "1") .setSource("field", "the quick brown fox jumps over the lazy dog").get(); refresh(); TermVectorResponse termVectorResponse = client().prepareTermVector(indexOrAlias(), "type1", "1").get(); assertThat(termVectorResponse.getIndex(), equalTo("test")); assertThat(termVectorResponse.isExists(), equalTo(true)); Fields fields = termVectorResponse.getFields(); assertThat(fields.size(), equalTo(1)); assertThat(fields.terms("field").size(), equalTo(8l)); }
From source file:org.elasticsearch.common.lucene.search.XMoreLikeThis.java
License:Apache License
/** * Return a query that will return docs like the passed Fields. * * @return a query that will return docs like the passed Fields. *//*from www .j av a2s.c o m*/ public Query like(Fields... likeFields) throws IOException { // get all field names Set<String> fieldNames = new HashSet<>(); for (Fields fields : likeFields) { for (String fieldName : fields) { fieldNames.add(fieldName); } } // term selection is per field, then appended to a single boolean query BooleanQuery bq = new BooleanQuery(); for (String fieldName : fieldNames) { Map<String, Int> termFreqMap = new HashMap<>(); for (Fields fields : likeFields) { Terms vector = fields.terms(fieldName); if (vector != null) { addTermFrequencies(termFreqMap, vector, fieldName); } } addToQuery(createQueue(termFreqMap, fieldName), bq); } return bq; }