Example usage for org.apache.lucene.codecs TermStats TermStats

List of usage examples for org.apache.lucene.codecs TermStats TermStats

Introduction

In this page you can find the example usage for org.apache.lucene.codecs TermStats TermStats.

Prototype

public TermStats(int docFreq, long totalTermFreq) 

Source Link

Document

Sole constructor.

Usage

From source file:com.sindicetech.siren.index.codecs.siren10.Siren10PostingsWriter.java

License:Open Source License

/**
 * Default merge impl: append documents, nodes and positions, mapping around
 * deletes.//from ww w. ja va2s .  c o m
 * <p>
 * Bypass the {@link org.apache.lucene.codecs.PostingsConsumer#merge(org.apache.lucene.index.MergeState, org.apache.lucene.index.FieldInfo.IndexOptions, org.apache.lucene.index.DocsEnum, org.apache.lucene.util.FixedBitSet)}
 * methods and work directly with the BlockWriters for maximum efficiency.
 * <p>
 * TODO - Optimisation: If document blocks match the block size, and no
 * document deleted, then it would be possible to copy block directly as byte
 * array, avoiding decoding and encoding.
 **/
@Override
public TermStats merge(final MergeState mergeState, final IndexOptions indexOptions, final DocsEnum postings,
        final FixedBitSet visitedDocs) throws IOException {
    int df = 0;
    long totTF = 0;

    postingsEnum.setMergeState(mergeState);
    postingsEnum.reset((MappingMultiDocsAndPositionsEnum) postings);

    while (postingsEnum.nextDocument()) {
        final int doc = postingsEnum.doc();
        visitedDocs.set(doc);

        this.startDoc(doc, -1);

        final int nodeFreq = postingsEnum.nodeFreqInDoc();
        docWriter.writeNodeFreq(nodeFreq);

        while (postingsEnum.nextNode()) {
            final IntsRef node = postingsEnum.node();
            nodWriter.write(node);

            final int termFreqInNode = postingsEnum.termFreqInNode();
            nodWriter.writeTermFreq(termFreqInNode);

            // reset current position for delta computation
            posWriter.resetCurrentPosition();

            while (postingsEnum.nextPosition()) {
                final int position = postingsEnum.pos();
                posWriter.write(position);
                totTF++;
            }
        }
        df++;
    }

    return new TermStats(df, totTF);
}

From source file:org.elasticsearch.search.suggest.completion.CompletionPostingsFormatTest.java

License:Apache License

private void writeData(Directory dir, Completion090PostingsFormat.CompletionLookupProvider provider)
        throws IOException {
    IndexOutput output = dir.createOutput("foo.txt", IOContext.DEFAULT);
    FieldsConsumer consumer = provider.consumer(output);
    FieldInfo fieldInfo = new FieldInfo("foo", true, 1, false, true, true,
            IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, DocValuesType.SORTED, DocValuesType.BINARY,
            new HashMap<String, String>());
    TermsConsumer addField = consumer.addField(fieldInfo);

    PostingsConsumer postingsConsumer = addField.startTerm(new BytesRef("foofightersgenerator"));
    postingsConsumer.startDoc(0, 1);/*from   ww w  .  ja  v a  2 s  .  c om*/
    postingsConsumer.addPosition(256 - 2,
            provider.buildPayload(new BytesRef("Generator - Foo Fighters"), 9, new BytesRef("id:10")), 0, 1);
    postingsConsumer.finishDoc();
    addField.finishTerm(new BytesRef("foofightersgenerator"), new TermStats(1, 1));
    addField.startTerm(new BytesRef("generator"));
    postingsConsumer.startDoc(0, 1);
    postingsConsumer.addPosition(256 - 1,
            provider.buildPayload(new BytesRef("Generator - Foo Fighters"), 9, new BytesRef("id:10")), 0, 1);
    postingsConsumer.finishDoc();
    addField.finishTerm(new BytesRef("generator"), new TermStats(1, 1));
    addField.finish(1, 1, 1);
    consumer.close();
    output.close();

}

From source file:org.elasticsearch.search.suggest.CompletionPostingsFormatTest.java

License:Apache License

@Test
public void testCompletionPostingsFormat() throws IOException {
    AnalyzingCompletionLookupProvider provider = new AnalyzingCompletionLookupProvider(true, false, true, true);
    RAMDirectory dir = new RAMDirectory();
    IndexOutput output = dir.createOutput("foo.txt", IOContext.DEFAULT);
    FieldsConsumer consumer = provider.consumer(output);
    FieldInfo fieldInfo = new FieldInfo("foo", true, 1, false, true, true,
            IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, DocValuesType.SORTED, DocValuesType.BINARY,
            new HashMap<String, String>());
    TermsConsumer addField = consumer.addField(fieldInfo);

    PostingsConsumer postingsConsumer = addField.startTerm(new BytesRef("foofightersgenerator"));
    postingsConsumer.startDoc(0, 1);//  www.j  a  va2  s.  co m
    postingsConsumer.addPosition(256 - 2,
            provider.buildPayload(new BytesRef("Generator - Foo Fighters"), 9, new BytesRef("id:10")), 0, 1);
    postingsConsumer.finishDoc();
    addField.finishTerm(new BytesRef("foofightersgenerator"), new TermStats(1, 1));
    addField.startTerm(new BytesRef("generator"));
    postingsConsumer.startDoc(0, 1);
    postingsConsumer.addPosition(256 - 1,
            provider.buildPayload(new BytesRef("Generator - Foo Fighters"), 9, new BytesRef("id:10")), 0, 1);
    postingsConsumer.finishDoc();
    addField.finishTerm(new BytesRef("generator"), new TermStats(1, 1));
    addField.finish(1, 1, 1);
    consumer.close();
    output.close();

    IndexInput input = dir.openInput("foo.txt", IOContext.DEFAULT);
    LookupFactory load = provider.load(input);
    PostingsFormatProvider format = new PreBuiltPostingsFormatProvider(new ElasticSearch090PostingsFormat());
    NamedAnalyzer analyzer = new NamedAnalyzer("foo", new StandardAnalyzer(TEST_VERSION_CURRENT));
    Lookup lookup = load.getLookup(new CompletionFieldMapper(new Names("foo"), analyzer, analyzer, format, null,
            true, true, true, Integer.MAX_VALUE), new CompletionSuggestionContext(null));
    List<LookupResult> result = lookup.lookup("ge", false, 10);
    assertThat(result.get(0).key.toString(), equalTo("Generator - Foo Fighters"));
    assertThat(result.get(0).payload.utf8ToString(), equalTo("id:10"));
    dir.close();
}

From source file:org.elasticsearch.test.integration.search.suggest.CompletionPostingsFormatTest.java

License:Apache License

@Test
public void testCompletionPostingsFormat() throws IOException {
    AnalyzingCompletionLookupProvider provider = new AnalyzingCompletionLookupProvider(true, false, true, true);
    RAMDirectory dir = new RAMDirectory();
    IndexOutput output = dir.createOutput("foo.txt", IOContext.DEFAULT);
    FieldsConsumer consumer = provider.consumer(output);
    FieldInfo fieldInfo = new FieldInfo("foo", true, 1, false, true, true,
            IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, DocValuesType.SORTED, DocValuesType.BINARY,
            new HashMap<String, String>());
    TermsConsumer addField = consumer.addField(fieldInfo);

    PostingsConsumer postingsConsumer = addField.startTerm(new BytesRef("foofightersgenerator"));
    postingsConsumer.startDoc(0, 1);//  w ww .j a  v a2  s.  c  o  m
    postingsConsumer.addPosition(256 - 2,
            provider.buildPayload(new BytesRef("Generator - Foo Fighters"), 9, new BytesRef("id:10")), 0, 1);
    postingsConsumer.finishDoc();
    addField.finishTerm(new BytesRef("foofightersgenerator"), new TermStats(1, 1));
    addField.startTerm(new BytesRef("generator"));
    postingsConsumer.startDoc(0, 1);
    postingsConsumer.addPosition(256 - 1,
            provider.buildPayload(new BytesRef("Generator - Foo Fighters"), 9, new BytesRef("id:10")), 0, 1);
    postingsConsumer.finishDoc();
    addField.finishTerm(new BytesRef("generator"), new TermStats(1, 1));
    addField.finish(1, 1, 1);
    consumer.close();
    output.close();

    IndexInput input = dir.openInput("foo.txt", IOContext.DEFAULT);
    LookupFactory load = provider.load(input);
    PostingsFormatProvider format = new PreBuiltPostingsFormatProvider(new ElasticSearch090PostingsFormat());
    NamedAnalyzer analyzer = new NamedAnalyzer("foo", new StandardAnalyzer(TEST_VERSION_CURRENT));
    Lookup lookup = load.getLookup(
            new CompletionFieldMapper(new Names("foo"), analyzer, analyzer, format, null, true, true, true),
            false);
    List<LookupResult> result = lookup.lookup("ge", false, 10);
    assertThat(result.get(0).key.toString(), equalTo("Generator - Foo Fighters"));
    assertThat(result.get(0).payload.utf8ToString(), equalTo("id:10"));
    dir.close();
}

From source file:org.sindice.siren.index.codecs.siren10.Siren10PostingsWriter.java

License:Apache License

/**
 * Default merge impl: append documents, nodes and positions, mapping around
 * deletes.//  www  .  ja v  a  2 s  . c  o m
 * <p>
 * Bypass the {@link Siren10PostingsWriter} methods and work directly with
 * the BlockWriters for maximum efficiency.
 * <p>
 * TODO - Optimisation: If document blocks match the block size, and no
 * document deleted, then it would be possible to copy block directly as byte
 * array, avoiding decoding and encoding.
 **/
@Override
public TermStats merge(final MergeState mergeState, final DocsEnum postings, final FixedBitSet visitedDocs)
        throws IOException {
    int df = 0;
    long totTF = 0;

    postingsEnum.setMergeState(mergeState);
    postingsEnum.reset((MappingMultiDocsAndPositionsEnum) postings);

    while (postingsEnum.nextDocument()) {
        final int doc = postingsEnum.doc();
        visitedDocs.set(doc);

        this.startDoc(doc, -1);

        final int nodeFreq = postingsEnum.nodeFreqInDoc();
        docWriter.writeNodeFreq(nodeFreq);

        while (postingsEnum.nextNode()) {
            final IntsRef node = postingsEnum.node();
            nodWriter.write(node);

            final int termFreqInNode = postingsEnum.termFreqInNode();
            nodWriter.writeTermFreq(termFreqInNode);

            // reset current position for delta computation
            posWriter.resetCurrentPosition();

            while (postingsEnum.nextPosition()) {
                final int position = postingsEnum.pos();
                posWriter.write(position);
                totTF++;
            }
        }
        df++;
    }

    return new TermStats(df, totTF);
}