List of usage examples for org.apache.lucene.document FieldType storeTermVectors
boolean storeTermVectors
To view the source code for org.apache.lucene.document FieldType storeTermVectors.
Click Source Link
From source file:com.sindicetech.siren.solr.schema.ExtendedJsonField.java
License:Open Source License
@Override protected IndexableField createField(final String name, final String val, final org.apache.lucene.document.FieldType type, final float boost) { if (!type.indexed()) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "ExtendedJsonField instances must be indexed: " + name); }/*from w w w . j a v a 2 s . co m*/ if (!type.tokenized()) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "ExtendedJsonField instances must be tokenised: " + name); } if (!type.omitNorms()) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "ExtendedJsonField instances must omit norms: " + name); } if (!type.indexOptions().equals(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "ExtendedJsonField instances must not omit term " + "frequencies and positions: " + name); } if (type.storeTermVectors()) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "ExtendedJsonField instances can not store term vectors: " + name); } return super.createField(name, val, type, boost); }
From source file:org.codelibs.elasticsearch.index.mapper.FieldMapper.java
License:Apache License
public static String termVectorOptionsToString(FieldType fieldType) { if (!fieldType.storeTermVectors()) { return "no"; } else if (!fieldType.storeTermVectorOffsets() && !fieldType.storeTermVectorPositions()) { return "yes"; } else if (fieldType.storeTermVectorOffsets() && !fieldType.storeTermVectorPositions()) { return "with_offsets"; } else {/*from w w w. j av a 2s. co m*/ StringBuilder builder = new StringBuilder("with"); if (fieldType.storeTermVectorPositions()) { builder.append("_positions"); } if (fieldType.storeTermVectorOffsets()) { builder.append("_offsets"); } if (fieldType.storeTermVectorPayloads()) { builder.append("_payloads"); } return builder.toString(); } }
From source file:org.elasticsearch.action.termvector.GetTermVectorTests.java
License:Apache License
@Test public void testRandomSingleTermVectors() throws ElasticsearchException, IOException { FieldType ft = new FieldType(); int config = randomInt(6); boolean storePositions = false; boolean storeOffsets = false; boolean storePayloads = false; boolean storeTermVectors = false; switch (config) { case 0: {/* ww w. ja v a 2s . co m*/ // do nothing } case 1: { storeTermVectors = true; } case 2: { storeTermVectors = true; storePositions = true; } case 3: { storeTermVectors = true; storeOffsets = true; } case 4: { storeTermVectors = true; storePositions = true; storeOffsets = true; } case 5: { storeTermVectors = true; storePositions = true; storePayloads = true; } case 6: { storeTermVectors = true; storePositions = true; storeOffsets = true; storePayloads = true; } } ft.setStoreTermVectors(storeTermVectors); ft.setStoreTermVectorOffsets(storeOffsets); ft.setStoreTermVectorPayloads(storePayloads); ft.setStoreTermVectorPositions(storePositions); String optionString = AbstractFieldMapper.termVectorOptionsToString(ft); XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1") .startObject("properties").startObject("field").field("type", "string") .field("term_vector", optionString).field("analyzer", "tv_test").endObject().endObject().endObject() .endObject(); ElasticsearchAssertions.assertAcked(prepareCreate("test").addMapping("type1", mapping) .setSettings(ImmutableSettings.settingsBuilder() .put("index.analysis.analyzer.tv_test.tokenizer", "whitespace") .putArray("index.analysis.analyzer.tv_test.filter", "type_as_payload", "lowercase"))); ensureYellow(); for (int i = 0; i < 10; i++) { client().prepareIndex("test", "type1", Integer.toString(i)) .setSource(XContentFactory.jsonBuilder().startObject() .field("field", "the quick brown fox jumps over the lazy dog") // 0the3 4quick9 10brown15 16fox19 20jumps25 26over30 // 31the34 35lazy39 40dog43 .endObject()) .execute().actionGet(); refresh(); } String[] values = { "brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the" }; int[] freq = { 1, 1, 1, 1, 1, 1, 1, 2 }; int[][] pos = { { 2 }, { 8 }, { 3 }, { 4 }, { 7 }, { 5 }, { 1 }, { 0, 6 } }; int[][] startOffset = { { 10 }, { 40 }, { 16 }, { 20 }, { 35 }, { 26 }, { 4 }, { 0, 31 } }; int[][] endOffset = { { 15 }, { 43 }, { 19 }, { 25 }, { 39 }, { 30 }, { 9 }, { 3, 34 } }; boolean isPayloadRequested = randomBoolean(); boolean isOffsetRequested = randomBoolean(); boolean isPositionsRequested = randomBoolean(); String infoString = createInfoString(isPositionsRequested, isOffsetRequested, isPayloadRequested, optionString); for (int i = 0; i < 10; i++) { TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i)) .setPayloads(isPayloadRequested).setOffsets(isOffsetRequested) .setPositions(isPositionsRequested).setSelectedFields(); TermVectorResponse response = resp.execute().actionGet(); assertThat(infoString + "doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true)); Fields fields = response.getFields(); assertThat(fields.size(), equalTo(ft.storeTermVectors() ? 1 : 0)); if (ft.storeTermVectors()) { Terms terms = fields.terms("field"); assertThat(terms.size(), equalTo(8l)); TermsEnum iterator = terms.iterator(null); for (int j = 0; j < values.length; j++) { String string = values[j]; BytesRef next = iterator.next(); assertThat(infoString, next, Matchers.notNullValue()); assertThat(infoString + "expected " + string, string, equalTo(next.utf8ToString())); assertThat(infoString, next, Matchers.notNullValue()); // do not test ttf or doc frequency, because here we have // many shards and do not know how documents are distributed DocsAndPositionsEnum docsAndPositions = iterator.docsAndPositions(null, null); // docs and pos only returns something if positions or // payloads or offsets are stored / requestd Otherwise use // DocsEnum? assertThat(infoString, docsAndPositions.nextDoc(), equalTo(0)); assertThat(infoString, freq[j], equalTo(docsAndPositions.freq())); int[] termPos = pos[j]; int[] termStartOffset = startOffset[j]; int[] termEndOffset = endOffset[j]; if (isPositionsRequested && storePositions) { assertThat(infoString, termPos.length, equalTo(freq[j])); } if (isOffsetRequested && storeOffsets) { assertThat(termStartOffset.length, equalTo(freq[j])); assertThat(termEndOffset.length, equalTo(freq[j])); } for (int k = 0; k < freq[j]; k++) { int nextPosition = docsAndPositions.nextPosition(); // only return something useful if requested and stored if (isPositionsRequested && storePositions) { assertThat(infoString + "positions for term: " + string, nextPosition, equalTo(termPos[k])); } else { assertThat(infoString + "positions for term: ", nextPosition, equalTo(-1)); } // only return something useful if requested and stored if (isPayloadRequested && storePayloads) { assertThat(infoString + "payloads for term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word"))); } else { assertThat(infoString + "payloads for term: " + string, docsAndPositions.getPayload(), equalTo(null)); } // only return something useful if requested and stored if (isOffsetRequested && storeOffsets) { assertThat(infoString + "startOffsets term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k])); assertThat(infoString + "endOffsets term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k])); } else { assertThat(infoString + "startOffsets term: " + string, docsAndPositions.startOffset(), equalTo(-1)); assertThat(infoString + "endOffsets term: " + string, docsAndPositions.endOffset(), equalTo(-1)); } } } assertThat(iterator.next(), Matchers.nullValue()); } } }
From source file:org.elasticsearch.action.termvectors.GetTermVectorsIT.java
License:Apache License
@Test public void testRandomSingleTermVectors() throws IOException { FieldType ft = new FieldType(); int config = randomInt(6); boolean storePositions = false; boolean storeOffsets = false; boolean storePayloads = false; boolean storeTermVectors = false; switch (config) { case 0: {/*from ww w . ja v a 2 s . co m*/ // do nothing break; } case 1: { storeTermVectors = true; break; } case 2: { storeTermVectors = true; storePositions = true; break; } case 3: { storeTermVectors = true; storeOffsets = true; break; } case 4: { storeTermVectors = true; storePositions = true; storeOffsets = true; break; } case 5: { storeTermVectors = true; storePositions = true; storePayloads = true; break; } case 6: { storeTermVectors = true; storePositions = true; storeOffsets = true; storePayloads = true; break; } } ft.setStoreTermVectors(storeTermVectors); ft.setStoreTermVectorOffsets(storeOffsets); ft.setStoreTermVectorPayloads(storePayloads); ft.setStoreTermVectorPositions(storePositions); String optionString = FieldMapper.termVectorOptionsToString(ft); XContentBuilder mapping = jsonBuilder().startObject().startObject("type1").startObject("properties") .startObject("field").field("type", "string").field("term_vector", optionString) .field("analyzer", "tv_test").endObject().endObject().endObject().endObject(); assertAcked(prepareCreate("test").addMapping("type1", mapping) .setSettings(settingsBuilder().put("index.analysis.analyzer.tv_test.tokenizer", "whitespace") .putArray("index.analysis.analyzer.tv_test.filter", "type_as_payload", "lowercase"))); ensureYellow(); for (int i = 0; i < 10; i++) { client().prepareIndex("test", "type1", Integer.toString(i)) .setSource(jsonBuilder().startObject() .field("field", "the quick brown fox jumps over the lazy dog") // 0the3 4quick9 10brown15 16fox19 20jumps25 26over30 // 31the34 35lazy39 40dog43 .endObject()) .execute().actionGet(); refresh(); } String[] values = { "brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the" }; int[] freq = { 1, 1, 1, 1, 1, 1, 1, 2 }; int[][] pos = { { 2 }, { 8 }, { 3 }, { 4 }, { 7 }, { 5 }, { 1 }, { 0, 6 } }; int[][] startOffset = { { 10 }, { 40 }, { 16 }, { 20 }, { 35 }, { 26 }, { 4 }, { 0, 31 } }; int[][] endOffset = { { 15 }, { 43 }, { 19 }, { 25 }, { 39 }, { 30 }, { 9 }, { 3, 34 } }; boolean isPayloadRequested = randomBoolean(); boolean isOffsetRequested = randomBoolean(); boolean isPositionsRequested = randomBoolean(); String infoString = createInfoString(isPositionsRequested, isOffsetRequested, isPayloadRequested, optionString); for (int i = 0; i < 10; i++) { TermVectorsRequestBuilder resp = client().prepareTermVectors("test", "type1", Integer.toString(i)) .setPayloads(isPayloadRequested).setOffsets(isOffsetRequested) .setPositions(isPositionsRequested).setSelectedFields(); TermVectorsResponse response = resp.execute().actionGet(); assertThat(infoString + "doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true)); Fields fields = response.getFields(); assertThat(fields.size(), equalTo(ft.storeTermVectors() ? 1 : 0)); if (ft.storeTermVectors()) { Terms terms = fields.terms("field"); assertThat(terms.size(), equalTo(8l)); TermsEnum iterator = terms.iterator(); for (int j = 0; j < values.length; j++) { String string = values[j]; BytesRef next = iterator.next(); assertThat(infoString, next, notNullValue()); assertThat(infoString + "expected " + string, string, equalTo(next.utf8ToString())); assertThat(infoString, next, notNullValue()); // do not test ttf or doc frequency, because here we have // many shards and do not know how documents are distributed PostingsEnum docsAndPositions = iterator.postings(null, PostingsEnum.ALL); // docs and pos only returns something if positions or // payloads or offsets are stored / requestd Otherwise use // DocsEnum? assertThat(infoString, docsAndPositions.nextDoc(), equalTo(0)); assertThat(infoString, freq[j], equalTo(docsAndPositions.freq())); int[] termPos = pos[j]; int[] termStartOffset = startOffset[j]; int[] termEndOffset = endOffset[j]; if (isPositionsRequested && storePositions) { assertThat(infoString, termPos.length, equalTo(freq[j])); } if (isOffsetRequested && storeOffsets) { assertThat(termStartOffset.length, equalTo(freq[j])); assertThat(termEndOffset.length, equalTo(freq[j])); } for (int k = 0; k < freq[j]; k++) { int nextPosition = docsAndPositions.nextPosition(); // only return something useful if requested and stored if (isPositionsRequested && storePositions) { assertThat(infoString + "positions for term: " + string, nextPosition, equalTo(termPos[k])); } else { assertThat(infoString + "positions for term: ", nextPosition, equalTo(-1)); } // only return something useful if requested and stored if (isPayloadRequested && storePayloads) { assertThat(infoString + "payloads for term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word"))); } else { assertThat(infoString + "payloads for term: " + string, docsAndPositions.getPayload(), equalTo(null)); } // only return something useful if requested and stored if (isOffsetRequested && storeOffsets) { assertThat(infoString + "startOffsets term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k])); assertThat(infoString + "endOffsets term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k])); } else { assertThat(infoString + "startOffsets term: " + string, docsAndPositions.startOffset(), equalTo(-1)); assertThat(infoString + "endOffsets term: " + string, docsAndPositions.endOffset(), equalTo(-1)); } } } assertThat(iterator.next(), nullValue()); } } }
From source file:org.elasticsearch.action.termvectors.GetTermVectorsTests.java
License:Apache License
@Test public void testRandomSingleTermVectors() throws ElasticsearchException, IOException { FieldType ft = new FieldType(); int config = randomInt(6); boolean storePositions = false; boolean storeOffsets = false; boolean storePayloads = false; boolean storeTermVectors = false; switch (config) { case 0: {//from w w w . j a v a 2s . c o m // do nothing break; } case 1: { storeTermVectors = true; break; } case 2: { storeTermVectors = true; storePositions = true; break; } case 3: { storeTermVectors = true; storeOffsets = true; break; } case 4: { storeTermVectors = true; storePositions = true; storeOffsets = true; break; } case 5: { storeTermVectors = true; storePositions = true; storePayloads = true; break; } case 6: { storeTermVectors = true; storePositions = true; storeOffsets = true; storePayloads = true; break; } } ft.setStoreTermVectors(storeTermVectors); ft.setStoreTermVectorOffsets(storeOffsets); ft.setStoreTermVectorPayloads(storePayloads); ft.setStoreTermVectorPositions(storePositions); String optionString = AbstractFieldMapper.termVectorOptionsToString(ft); XContentBuilder mapping = jsonBuilder().startObject().startObject("type1").startObject("properties") .startObject("field").field("type", "string").field("term_vector", optionString) .field("analyzer", "tv_test").endObject().endObject().endObject().endObject(); assertAcked(prepareCreate("test").addMapping("type1", mapping) .setSettings(settingsBuilder().put("index.analysis.analyzer.tv_test.tokenizer", "whitespace") .putArray("index.analysis.analyzer.tv_test.filter", "type_as_payload", "lowercase"))); ensureYellow(); for (int i = 0; i < 10; i++) { client().prepareIndex("test", "type1", Integer.toString(i)) .setSource(jsonBuilder().startObject() .field("field", "the quick brown fox jumps over the lazy dog") // 0the3 4quick9 10brown15 16fox19 20jumps25 26over30 // 31the34 35lazy39 40dog43 .endObject()) .execute().actionGet(); refresh(); } String[] values = { "brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the" }; int[] freq = { 1, 1, 1, 1, 1, 1, 1, 2 }; int[][] pos = { { 2 }, { 8 }, { 3 }, { 4 }, { 7 }, { 5 }, { 1 }, { 0, 6 } }; int[][] startOffset = { { 10 }, { 40 }, { 16 }, { 20 }, { 35 }, { 26 }, { 4 }, { 0, 31 } }; int[][] endOffset = { { 15 }, { 43 }, { 19 }, { 25 }, { 39 }, { 30 }, { 9 }, { 3, 34 } }; boolean isPayloadRequested = randomBoolean(); boolean isOffsetRequested = randomBoolean(); boolean isPositionsRequested = randomBoolean(); String infoString = createInfoString(isPositionsRequested, isOffsetRequested, isPayloadRequested, optionString); for (int i = 0; i < 10; i++) { TermVectorsRequestBuilder resp = client().prepareTermVectors("test", "type1", Integer.toString(i)) .setPayloads(isPayloadRequested).setOffsets(isOffsetRequested) .setPositions(isPositionsRequested).setSelectedFields(); TermVectorsResponse response = resp.execute().actionGet(); assertThat(infoString + "doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true)); Fields fields = response.getFields(); assertThat(fields.size(), equalTo(ft.storeTermVectors() ? 1 : 0)); if (ft.storeTermVectors()) { Terms terms = fields.terms("field"); assertThat(terms.size(), equalTo(8l)); TermsEnum iterator = terms.iterator(); for (int j = 0; j < values.length; j++) { String string = values[j]; BytesRef next = iterator.next(); assertThat(infoString, next, notNullValue()); assertThat(infoString + "expected " + string, string, equalTo(next.utf8ToString())); assertThat(infoString, next, notNullValue()); // do not test ttf or doc frequency, because here we have // many shards and do not know how documents are distributed PostingsEnum docsAndPositions = iterator.postings(null, null, PostingsEnum.ALL); // docs and pos only returns something if positions or // payloads or offsets are stored / requestd Otherwise use // DocsEnum? assertThat(infoString, docsAndPositions.nextDoc(), equalTo(0)); assertThat(infoString, freq[j], equalTo(docsAndPositions.freq())); int[] termPos = pos[j]; int[] termStartOffset = startOffset[j]; int[] termEndOffset = endOffset[j]; if (isPositionsRequested && storePositions) { assertThat(infoString, termPos.length, equalTo(freq[j])); } if (isOffsetRequested && storeOffsets) { assertThat(termStartOffset.length, equalTo(freq[j])); assertThat(termEndOffset.length, equalTo(freq[j])); } for (int k = 0; k < freq[j]; k++) { int nextPosition = docsAndPositions.nextPosition(); // only return something useful if requested and stored if (isPositionsRequested && storePositions) { assertThat(infoString + "positions for term: " + string, nextPosition, equalTo(termPos[k])); } else { assertThat(infoString + "positions for term: ", nextPosition, equalTo(-1)); } // only return something useful if requested and stored if (isPayloadRequested && storePayloads) { assertThat(infoString + "payloads for term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word"))); } else { assertThat(infoString + "payloads for term: " + string, docsAndPositions.getPayload(), equalTo(null)); } // only return something useful if requested and stored if (isOffsetRequested && storeOffsets) { assertThat(infoString + "startOffsets term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k])); assertThat(infoString + "endOffsets term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k])); } else { assertThat(infoString + "startOffsets term: " + string, docsAndPositions.startOffset(), equalTo(-1)); assertThat(infoString + "endOffsets term: " + string, docsAndPositions.endOffset(), equalTo(-1)); } } } assertThat(iterator.next(), nullValue()); } } }
From source file:org.elasticsearch.index.mapper.core.AbstractFieldMapper.java
License:Apache License
protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException { builder.field("type", contentType()); if (includeDefaults || !names.name().equals(names.indexNameClean())) { builder.field("index_name", names.indexNameClean()); }// w w w . j ava 2 s . co m if (includeDefaults || boost != 1.0f) { builder.field("boost", boost); } FieldType defaultFieldType = defaultFieldType(); if (includeDefaults || fieldType.indexed() != defaultFieldType.indexed() || fieldType.tokenized() != defaultFieldType.tokenized()) { builder.field("index", indexTokenizeOptionToString(fieldType.indexed(), fieldType.tokenized())); } if (includeDefaults || fieldType.stored() != defaultFieldType.stored()) { builder.field("store", fieldType.stored()); } if (includeDefaults || hasDocValues() != Defaults.DOC_VALUES) { builder.field(TypeParsers.DOC_VALUES, docValues); } if (includeDefaults || fieldType.storeTermVectors() != defaultFieldType.storeTermVectors()) { builder.field("term_vector", termVectorOptionsToString(fieldType)); } if (includeDefaults || fieldType.omitNorms() != defaultFieldType.omitNorms() || normsLoading != null) { builder.startObject("norms"); if (includeDefaults || fieldType.omitNorms() != defaultFieldType.omitNorms()) { builder.field("enabled", !fieldType.omitNorms()); } if (normsLoading != null) { builder.field(Loading.KEY, normsLoading); } builder.endObject(); } if (includeDefaults || fieldType.indexOptions() != defaultFieldType.indexOptions()) { builder.field("index_options", indexOptionToString(fieldType.indexOptions())); } if (indexAnalyzer == null && searchAnalyzer == null) { if (includeDefaults) { builder.field("analyzer", "default"); } } else if (indexAnalyzer == null) { // searchAnalyzer != null if (includeDefaults || (!searchAnalyzer.name().startsWith("_") && !searchAnalyzer.name().equals("default"))) { builder.field("search_analyzer", searchAnalyzer.name()); } } else if (searchAnalyzer == null) { // indexAnalyzer != null if (includeDefaults || (!indexAnalyzer.name().startsWith("_") && !indexAnalyzer.name().equals("default"))) { builder.field("index_analyzer", indexAnalyzer.name()); } } else if (indexAnalyzer.name().equals(searchAnalyzer.name())) { // indexAnalyzer == searchAnalyzer if (includeDefaults || (!indexAnalyzer.name().startsWith("_") && !indexAnalyzer.name().equals("default"))) { builder.field("analyzer", indexAnalyzer.name()); } } else { // both are there but different if (includeDefaults || (!indexAnalyzer.name().startsWith("_") && !indexAnalyzer.name().equals("default"))) { builder.field("index_analyzer", indexAnalyzer.name()); } if (includeDefaults || (!searchAnalyzer.name().startsWith("_") && !searchAnalyzer.name().equals("default"))) { builder.field("search_analyzer", searchAnalyzer.name()); } } if (postingsFormat != null) { if (includeDefaults || !postingsFormat.name().equals(defaultPostingFormat())) { builder.field("postings_format", postingsFormat.name()); } } else if (includeDefaults) { String format = defaultPostingFormat(); if (format == null) { format = PostingsFormatService.DEFAULT_FORMAT; } builder.field("postings_format", format); } if (docValuesFormat != null) { if (includeDefaults || !docValuesFormat.name().equals(defaultDocValuesFormat())) { builder.field(DOC_VALUES_FORMAT, docValuesFormat.name()); } } else if (includeDefaults) { String format = defaultDocValuesFormat(); if (format == null) { format = DocValuesFormatService.DEFAULT_FORMAT; } builder.field(DOC_VALUES_FORMAT, format); } if (similarity() != null) { builder.field("similarity", similarity().name()); } else if (includeDefaults) { builder.field("similariry", SimilarityLookupService.DEFAULT_SIMILARITY); } if (customFieldDataSettings != null) { builder.field("fielddata", (Map) customFieldDataSettings.getAsMap()); } else if (includeDefaults) { builder.field("fielddata", (Map) fieldDataType.getSettings().getAsMap()); } multiFields.toXContent(builder, params); if (copyTo != null) { copyTo.toXContent(builder, params); } }
From source file:org.elasticsearch.termvectors.GetTermVectorTests.java
License:Apache License
@Test public void testRandomSingleTermVectors() throws ElasticSearchException, IOException { FieldType ft = new FieldType(); int config = randomInt(6); boolean storePositions = false; boolean storeOffsets = false; boolean storePayloads = false; boolean storeTermVectors = false; switch (config) { case 0: {/*from ww w. j a v a2 s . com*/ // do nothing } case 1: { storeTermVectors = true; } case 2: { storeTermVectors = true; storePositions = true; } case 3: { storeTermVectors = true; storeOffsets = true; } case 4: { storeTermVectors = true; storePositions = true; storeOffsets = true; } case 5: { storeTermVectors = true; storePositions = true; storePayloads = true; } case 6: { storeTermVectors = true; storePositions = true; storeOffsets = true; storePayloads = true; } } ft.setStoreTermVectors(storeTermVectors); ft.setStoreTermVectorOffsets(storeOffsets); ft.setStoreTermVectorPayloads(storePayloads); ft.setStoreTermVectorPositions(storePositions); String optionString = AbstractFieldMapper.termVectorOptionsToString(ft); XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1") .startObject("properties").startObject("field").field("type", "string") .field("term_vector", optionString).field("analyzer", "tv_test").endObject().endObject().endObject() .endObject(); ElasticSearchAssertions.assertAcked(prepareCreate("test").addMapping("type1", mapping) .setSettings(ImmutableSettings.settingsBuilder() .put("index.analysis.analyzer.tv_test.tokenizer", "whitespace") .putArray("index.analysis.analyzer.tv_test.filter", "type_as_payload", "lowercase"))); ensureYellow(); for (int i = 0; i < 10; i++) { client().prepareIndex("test", "type1", Integer.toString(i)) .setSource(XContentFactory.jsonBuilder().startObject() .field("field", "the quick brown fox jumps over the lazy dog") // 0the3 4quick9 10brown15 16fox19 20jumps25 26over30 // 31the34 35lazy39 40dog43 .endObject()) .execute().actionGet(); refresh(); } String[] values = { "brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the" }; int[] freq = { 1, 1, 1, 1, 1, 1, 1, 2 }; int[][] pos = { { 2 }, { 8 }, { 3 }, { 4 }, { 7 }, { 5 }, { 1 }, { 0, 6 } }; int[][] startOffset = { { 10 }, { 40 }, { 16 }, { 20 }, { 35 }, { 26 }, { 4 }, { 0, 31 } }; int[][] endOffset = { { 15 }, { 43 }, { 19 }, { 25 }, { 39 }, { 30 }, { 9 }, { 3, 34 } }; boolean isPayloadRequested = randomBoolean(); boolean isOffsetRequested = randomBoolean(); boolean isPositionsRequested = randomBoolean(); String infoString = createInfoString(isPositionsRequested, isOffsetRequested, isPayloadRequested, optionString); for (int i = 0; i < 10; i++) { TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i)) .setPayloads(isPayloadRequested).setOffsets(isOffsetRequested) .setPositions(isPositionsRequested).setSelectedFields(); TermVectorResponse response = resp.execute().actionGet(); assertThat(infoString + "doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true)); Fields fields = response.getFields(); assertThat(fields.size(), equalTo(ft.storeTermVectors() ? 1 : 0)); if (ft.storeTermVectors()) { Terms terms = fields.terms("field"); assertThat(terms.size(), equalTo(8l)); TermsEnum iterator = terms.iterator(null); for (int j = 0; j < values.length; j++) { String string = values[j]; BytesRef next = iterator.next(); assertThat(infoString, next, Matchers.notNullValue()); assertThat(infoString + "expected " + string, string, equalTo(next.utf8ToString())); assertThat(infoString, next, Matchers.notNullValue()); // do not test ttf or doc frequency, because here we have // many shards and do not know how documents are distributed DocsAndPositionsEnum docsAndPositions = iterator.docsAndPositions(null, null); // docs and pos only returns something if positions or // payloads or offsets are stored / requestd Otherwise use // DocsEnum? assertThat(infoString, docsAndPositions.nextDoc(), equalTo(0)); assertThat(infoString, freq[j], equalTo(docsAndPositions.freq())); int[] termPos = pos[j]; int[] termStartOffset = startOffset[j]; int[] termEndOffset = endOffset[j]; if (isPositionsRequested && storePositions) { assertThat(infoString, termPos.length, equalTo(freq[j])); } if (isOffsetRequested && storeOffsets) { assertThat(termStartOffset.length, equalTo(freq[j])); assertThat(termEndOffset.length, equalTo(freq[j])); } for (int k = 0; k < freq[j]; k++) { int nextPosition = docsAndPositions.nextPosition(); // only return something useful if requested and stored if (isPositionsRequested && storePositions) { assertThat(infoString + "positions for term: " + string, nextPosition, equalTo(termPos[k])); } else { assertThat(infoString + "positions for term: ", nextPosition, equalTo(-1)); } // only return something useful if requested and stored if (isPayloadRequested && storePayloads) { assertThat(infoString + "payloads for term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word"))); } else { assertThat(infoString + "payloads for term: " + string, docsAndPositions.getPayload(), equalTo(null)); } // only return something useful if requested and stored if (isOffsetRequested && storeOffsets) { assertThat(infoString + "startOffsets term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k])); assertThat(infoString + "endOffsets term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k])); } else { assertThat(infoString + "startOffsets term: " + string, docsAndPositions.startOffset(), equalTo(-1)); assertThat(infoString + "endOffsets term: " + string, docsAndPositions.endOffset(), equalTo(-1)); } } } assertThat(iterator.next(), Matchers.nullValue()); } } }
From source file:org.elasticsearch.test.integration.termvectors.GetTermVectorTests.java
License:Apache License
@Test public void testRandomSingleTermVectors() throws ElasticSearchException, IOException { Random random = getRandom();//from w ww .j ava2 s . c om FieldType ft = new FieldType(); int config = random.nextInt(6); boolean storePositions = false; boolean storeOffsets = false; boolean storePayloads = false; boolean storeTermVectors = false; switch (config) { case 0: { // do nothing } case 1: { storeTermVectors = true; } case 2: { storeTermVectors = true; storePositions = true; } case 3: { storeTermVectors = true; storeOffsets = true; } case 4: { storeTermVectors = true; storePositions = true; storeOffsets = true; } case 5: { storeTermVectors = true; storePositions = true; storePayloads = true; } case 6: { storeTermVectors = true; storePositions = true; storeOffsets = true; storePayloads = true; } } ft.setStoreTermVectors(storeTermVectors); ft.setStoreTermVectorOffsets(storeOffsets); ft.setStoreTermVectorPayloads(storePayloads); ft.setStoreTermVectorPositions(storePositions); String optionString = AbstractFieldMapper.termVectorOptionsToString(ft); run(addMapping(prepareCreate("test"), "type1", new Object[] { "field", "type", "string", "term_vector", optionString, "analyzer", "tv_test" }) .setSettings(ImmutableSettings.settingsBuilder() .put("index.analysis.analyzer.tv_test.tokenizer", "whitespace").putArray( "index.analysis.analyzer.tv_test.filter", "type_as_payload", "lowercase"))); ensureYellow(); for (int i = 0; i < 10; i++) { client().prepareIndex("test", "type1", Integer.toString(i)) .setSource(XContentFactory.jsonBuilder().startObject() .field("field", "the quick brown fox jumps over the lazy dog") // 0the3 4quick9 10brown15 16fox19 20jumps25 26over30 // 31the34 35lazy39 40dog43 .endObject()) .execute().actionGet(); refresh(); } String[] values = { "brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the" }; int[] freq = { 1, 1, 1, 1, 1, 1, 1, 2 }; int[][] pos = { { 2 }, { 8 }, { 3 }, { 4 }, { 7 }, { 5 }, { 1 }, { 0, 6 } }; int[][] startOffset = { { 10 }, { 40 }, { 16 }, { 20 }, { 35 }, { 26 }, { 4 }, { 0, 31 } }; int[][] endOffset = { { 15 }, { 43 }, { 19 }, { 25 }, { 39 }, { 30 }, { 9 }, { 3, 34 } }; boolean isPayloadRequested = random.nextBoolean(); boolean isOffsetRequested = random.nextBoolean(); boolean isPositionsRequested = random.nextBoolean(); String infoString = createInfoString(isPositionsRequested, isOffsetRequested, isPayloadRequested, optionString); for (int i = 0; i < 10; i++) { TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i)) .setPayloads(isPayloadRequested).setOffsets(isOffsetRequested) .setPositions(isPositionsRequested).setSelectedFields(); TermVectorResponse response = resp.execute().actionGet(); assertThat(infoString + "doc id: " + i + " doesn't exists but should", response.documentExists(), equalTo(true)); Fields fields = response.getFields(); assertThat(fields.size(), equalTo(ft.storeTermVectors() ? 1 : 0)); if (ft.storeTermVectors()) { Terms terms = fields.terms("field"); assertThat(terms.size(), equalTo(8l)); TermsEnum iterator = terms.iterator(null); for (int j = 0; j < values.length; j++) { String string = values[j]; BytesRef next = iterator.next(); assertThat(infoString, next, Matchers.notNullValue()); assertThat(infoString + "expected " + string, string, equalTo(next.utf8ToString())); assertThat(infoString, next, Matchers.notNullValue()); // do not test ttf or doc frequency, because here we have // many shards and do not know how documents are distributed DocsAndPositionsEnum docsAndPositions = iterator.docsAndPositions(null, null); // docs and pos only returns something if positions or // payloads or offsets are stored / requestd Otherwise use // DocsEnum? assertThat(infoString, docsAndPositions.nextDoc(), equalTo(0)); assertThat(infoString, freq[j], equalTo(docsAndPositions.freq())); int[] termPos = pos[j]; int[] termStartOffset = startOffset[j]; int[] termEndOffset = endOffset[j]; if (isPositionsRequested && storePositions) { assertThat(infoString, termPos.length, equalTo(freq[j])); } if (isOffsetRequested && storeOffsets) { assertThat(termStartOffset.length, equalTo(freq[j])); assertThat(termEndOffset.length, equalTo(freq[j])); } for (int k = 0; k < freq[j]; k++) { int nextPosition = docsAndPositions.nextPosition(); // only return something useful if requested and stored if (isPositionsRequested && storePositions) { assertThat(infoString + "positions for term: " + string, nextPosition, equalTo(termPos[k])); } else { assertThat(infoString + "positions for term: ", nextPosition, equalTo(-1)); } // only return something useful if requested and stored if (isPayloadRequested && storePayloads) { assertThat(infoString + "payloads for term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word"))); } else { assertThat(infoString + "payloads for term: " + string, docsAndPositions.getPayload(), equalTo(null)); } // only return something useful if requested and stored if (isOffsetRequested && storeOffsets) { assertThat(infoString + "startOffsets term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k])); assertThat(infoString + "endOffsets term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k])); } else { assertThat(infoString + "startOffsets term: " + string, docsAndPositions.startOffset(), equalTo(-1)); assertThat(infoString + "endOffsets term: " + string, docsAndPositions.endOffset(), equalTo(-1)); } } } assertThat(iterator.next(), Matchers.nullValue()); } } }
From source file:org.hibernate.search.test.util.SerializationTestHelper.java
License:LGPL
private static void assertFieldType(FieldType copy, FieldType original) { assertThat(copy.omitNorms()).isEqualTo(original.omitNorms()); assertThat(copy.storeTermVectorOffsets()).isEqualTo(original.storeTermVectorOffsets()); assertThat(copy.storeTermVectorPayloads()).isEqualTo(original.storeTermVectorPayloads()); assertThat(copy.storeTermVectorOffsets()).isEqualTo(original.storeTermVectorOffsets()); assertThat(copy.docValuesType()).isEqualTo(original.docValuesType()); assertThat(copy.indexOptions()).isEqualTo(original.indexOptions()); assertThat(copy.numericPrecisionStep()).isEqualTo(original.numericPrecisionStep()); assertThat(copy.numericType()).isEqualTo(original.numericType()); assertThat(copy.stored()).isEqualTo(original.stored()); assertThat(copy.storeTermVectors()).isEqualTo(original.storeTermVectors()); assertThat(copy.tokenized()).isEqualTo(original.tokenized()); assertThat(copy.toString()).isEqualTo(original.toString()); }