List of usage examples for org.apache.lucene.codecs TermStats TermStats
public TermStats(int docFreq, long totalTermFreq)
From source file:com.sindicetech.siren.index.codecs.siren10.Siren10PostingsWriter.java
License:Open Source License
/** * Default merge impl: append documents, nodes and positions, mapping around * deletes.//from ww w. ja va2s . c o m * <p> * Bypass the {@link org.apache.lucene.codecs.PostingsConsumer#merge(org.apache.lucene.index.MergeState, org.apache.lucene.index.FieldInfo.IndexOptions, org.apache.lucene.index.DocsEnum, org.apache.lucene.util.FixedBitSet)} * methods and work directly with the BlockWriters for maximum efficiency. * <p> * TODO - Optimisation: If document blocks match the block size, and no * document deleted, then it would be possible to copy block directly as byte * array, avoiding decoding and encoding. **/ @Override public TermStats merge(final MergeState mergeState, final IndexOptions indexOptions, final DocsEnum postings, final FixedBitSet visitedDocs) throws IOException { int df = 0; long totTF = 0; postingsEnum.setMergeState(mergeState); postingsEnum.reset((MappingMultiDocsAndPositionsEnum) postings); while (postingsEnum.nextDocument()) { final int doc = postingsEnum.doc(); visitedDocs.set(doc); this.startDoc(doc, -1); final int nodeFreq = postingsEnum.nodeFreqInDoc(); docWriter.writeNodeFreq(nodeFreq); while (postingsEnum.nextNode()) { final IntsRef node = postingsEnum.node(); nodWriter.write(node); final int termFreqInNode = postingsEnum.termFreqInNode(); nodWriter.writeTermFreq(termFreqInNode); // reset current position for delta computation posWriter.resetCurrentPosition(); while (postingsEnum.nextPosition()) { final int position = postingsEnum.pos(); posWriter.write(position); totTF++; } } df++; } return new TermStats(df, totTF); }
From source file:org.elasticsearch.search.suggest.completion.CompletionPostingsFormatTest.java
License:Apache License
private void writeData(Directory dir, Completion090PostingsFormat.CompletionLookupProvider provider) throws IOException { IndexOutput output = dir.createOutput("foo.txt", IOContext.DEFAULT); FieldsConsumer consumer = provider.consumer(output); FieldInfo fieldInfo = new FieldInfo("foo", true, 1, false, true, true, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, DocValuesType.SORTED, DocValuesType.BINARY, new HashMap<String, String>()); TermsConsumer addField = consumer.addField(fieldInfo); PostingsConsumer postingsConsumer = addField.startTerm(new BytesRef("foofightersgenerator")); postingsConsumer.startDoc(0, 1);/*from ww w . ja v a 2 s . c om*/ postingsConsumer.addPosition(256 - 2, provider.buildPayload(new BytesRef("Generator - Foo Fighters"), 9, new BytesRef("id:10")), 0, 1); postingsConsumer.finishDoc(); addField.finishTerm(new BytesRef("foofightersgenerator"), new TermStats(1, 1)); addField.startTerm(new BytesRef("generator")); postingsConsumer.startDoc(0, 1); postingsConsumer.addPosition(256 - 1, provider.buildPayload(new BytesRef("Generator - Foo Fighters"), 9, new BytesRef("id:10")), 0, 1); postingsConsumer.finishDoc(); addField.finishTerm(new BytesRef("generator"), new TermStats(1, 1)); addField.finish(1, 1, 1); consumer.close(); output.close(); }
From source file:org.elasticsearch.search.suggest.CompletionPostingsFormatTest.java
License:Apache License
@Test public void testCompletionPostingsFormat() throws IOException { AnalyzingCompletionLookupProvider provider = new AnalyzingCompletionLookupProvider(true, false, true, true); RAMDirectory dir = new RAMDirectory(); IndexOutput output = dir.createOutput("foo.txt", IOContext.DEFAULT); FieldsConsumer consumer = provider.consumer(output); FieldInfo fieldInfo = new FieldInfo("foo", true, 1, false, true, true, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, DocValuesType.SORTED, DocValuesType.BINARY, new HashMap<String, String>()); TermsConsumer addField = consumer.addField(fieldInfo); PostingsConsumer postingsConsumer = addField.startTerm(new BytesRef("foofightersgenerator")); postingsConsumer.startDoc(0, 1);// www.j a va2 s. co m postingsConsumer.addPosition(256 - 2, provider.buildPayload(new BytesRef("Generator - Foo Fighters"), 9, new BytesRef("id:10")), 0, 1); postingsConsumer.finishDoc(); addField.finishTerm(new BytesRef("foofightersgenerator"), new TermStats(1, 1)); addField.startTerm(new BytesRef("generator")); postingsConsumer.startDoc(0, 1); postingsConsumer.addPosition(256 - 1, provider.buildPayload(new BytesRef("Generator - Foo Fighters"), 9, new BytesRef("id:10")), 0, 1); postingsConsumer.finishDoc(); addField.finishTerm(new BytesRef("generator"), new TermStats(1, 1)); addField.finish(1, 1, 1); consumer.close(); output.close(); IndexInput input = dir.openInput("foo.txt", IOContext.DEFAULT); LookupFactory load = provider.load(input); PostingsFormatProvider format = new PreBuiltPostingsFormatProvider(new ElasticSearch090PostingsFormat()); NamedAnalyzer analyzer = new NamedAnalyzer("foo", new StandardAnalyzer(TEST_VERSION_CURRENT)); Lookup lookup = load.getLookup(new CompletionFieldMapper(new Names("foo"), analyzer, analyzer, format, null, true, true, true, Integer.MAX_VALUE), new CompletionSuggestionContext(null)); List<LookupResult> result = lookup.lookup("ge", false, 10); assertThat(result.get(0).key.toString(), equalTo("Generator - Foo Fighters")); assertThat(result.get(0).payload.utf8ToString(), equalTo("id:10")); dir.close(); }
From source file:org.elasticsearch.test.integration.search.suggest.CompletionPostingsFormatTest.java
License:Apache License
@Test public void testCompletionPostingsFormat() throws IOException { AnalyzingCompletionLookupProvider provider = new AnalyzingCompletionLookupProvider(true, false, true, true); RAMDirectory dir = new RAMDirectory(); IndexOutput output = dir.createOutput("foo.txt", IOContext.DEFAULT); FieldsConsumer consumer = provider.consumer(output); FieldInfo fieldInfo = new FieldInfo("foo", true, 1, false, true, true, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, DocValuesType.SORTED, DocValuesType.BINARY, new HashMap<String, String>()); TermsConsumer addField = consumer.addField(fieldInfo); PostingsConsumer postingsConsumer = addField.startTerm(new BytesRef("foofightersgenerator")); postingsConsumer.startDoc(0, 1);// w ww .j a v a2 s. c o m postingsConsumer.addPosition(256 - 2, provider.buildPayload(new BytesRef("Generator - Foo Fighters"), 9, new BytesRef("id:10")), 0, 1); postingsConsumer.finishDoc(); addField.finishTerm(new BytesRef("foofightersgenerator"), new TermStats(1, 1)); addField.startTerm(new BytesRef("generator")); postingsConsumer.startDoc(0, 1); postingsConsumer.addPosition(256 - 1, provider.buildPayload(new BytesRef("Generator - Foo Fighters"), 9, new BytesRef("id:10")), 0, 1); postingsConsumer.finishDoc(); addField.finishTerm(new BytesRef("generator"), new TermStats(1, 1)); addField.finish(1, 1, 1); consumer.close(); output.close(); IndexInput input = dir.openInput("foo.txt", IOContext.DEFAULT); LookupFactory load = provider.load(input); PostingsFormatProvider format = new PreBuiltPostingsFormatProvider(new ElasticSearch090PostingsFormat()); NamedAnalyzer analyzer = new NamedAnalyzer("foo", new StandardAnalyzer(TEST_VERSION_CURRENT)); Lookup lookup = load.getLookup( new CompletionFieldMapper(new Names("foo"), analyzer, analyzer, format, null, true, true, true), false); List<LookupResult> result = lookup.lookup("ge", false, 10); assertThat(result.get(0).key.toString(), equalTo("Generator - Foo Fighters")); assertThat(result.get(0).payload.utf8ToString(), equalTo("id:10")); dir.close(); }
From source file:org.sindice.siren.index.codecs.siren10.Siren10PostingsWriter.java
License:Apache License
/** * Default merge impl: append documents, nodes and positions, mapping around * deletes.// www . ja v a 2 s . c o m * <p> * Bypass the {@link Siren10PostingsWriter} methods and work directly with * the BlockWriters for maximum efficiency. * <p> * TODO - Optimisation: If document blocks match the block size, and no * document deleted, then it would be possible to copy block directly as byte * array, avoiding decoding and encoding. **/ @Override public TermStats merge(final MergeState mergeState, final DocsEnum postings, final FixedBitSet visitedDocs) throws IOException { int df = 0; long totTF = 0; postingsEnum.setMergeState(mergeState); postingsEnum.reset((MappingMultiDocsAndPositionsEnum) postings); while (postingsEnum.nextDocument()) { final int doc = postingsEnum.doc(); visitedDocs.set(doc); this.startDoc(doc, -1); final int nodeFreq = postingsEnum.nodeFreqInDoc(); docWriter.writeNodeFreq(nodeFreq); while (postingsEnum.nextNode()) { final IntsRef node = postingsEnum.node(); nodWriter.write(node); final int termFreqInNode = postingsEnum.termFreqInNode(); nodWriter.writeTermFreq(termFreqInNode); // reset current position for delta computation posWriter.resetCurrentPosition(); while (postingsEnum.nextPosition()) { final int position = postingsEnum.pos(); posWriter.write(position); totTF++; } } df++; } return new TermStats(df, totTF); }