Example usage for org.apache.lucene.codecs FieldsConsumer FieldsConsumer

List of usage examples for org.apache.lucene.codecs FieldsConsumer FieldsConsumer

Introduction

In this page you can find the example usage for org.apache.lucene.codecs FieldsConsumer FieldsConsumer.

Prototype

protected FieldsConsumer() 

Source Link

Document

Sole constructor.

Usage

From source file:com.rocana.lucene.codec.v1.RocanaBasePostingsFormatTestCase.java

License:Apache License

@Override
public void testInvertedWrite() throws Exception {
    Directory dir = newDirectory();//  w w  w.  j  a v  a  2 s . c  om
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);

    // Must be concurrent because thread(s) can be merging
    // while up to one thread flushes, and each of those
    // threads iterates over the map while the flushing
    // thread might be adding to it:
    final Map<String, TermFreqs> termFreqs = new ConcurrentHashMap<>();

    final AtomicLong sumDocFreq = new AtomicLong();
    final AtomicLong sumTotalTermFreq = new AtomicLong();

    // TODO: would be better to use / delegate to the current
    // Codec returned by getCodec()

    iwc.setCodec(new AssertingCodec() {
        @Override
        public PostingsFormat getPostingsFormatForField(String field) {

            PostingsFormat p = getCodec().postingsFormat();
            if (p instanceof PerFieldPostingsFormat) {
                p = ((PerFieldPostingsFormat) p).getPostingsFormatForField(field);
            }
            if (p instanceof RocanaPerFieldPostingsFormat) {
                p = ((RocanaPerFieldPostingsFormat) p).getPostingsFormatForField(field);
            }
            final PostingsFormat defaultPostingsFormat = p;

            final Thread mainThread = Thread.currentThread();

            if (field.equals("body")) {

                // A PF that counts up some stats and then in
                // the end we verify the stats match what the
                // final IndexReader says, just to exercise the
                // new freedom of iterating the postings more
                // than once at flush/merge:

                return new PostingsFormat(defaultPostingsFormat.getName()) {

                    @Override
                    public FieldsConsumer fieldsConsumer(final SegmentWriteState state) throws IOException {

                        final FieldsConsumer fieldsConsumer = defaultPostingsFormat.fieldsConsumer(state);

                        return new FieldsConsumer() {
                            @Override
                            public void write(Fields fields) throws IOException {
                                fieldsConsumer.write(fields);

                                boolean isMerge = state.context.context == IOContext.Context.MERGE;

                                // We only use one thread for flushing
                                // in this test:
                                assert isMerge || Thread.currentThread() == mainThread;

                                // We iterate the provided TermsEnum
                                // twice, so we excercise this new freedom
                                // with the inverted API; if
                                // addOnSecondPass is true, we add up
                                // term stats on the 2nd iteration:
                                boolean addOnSecondPass = random().nextBoolean();

                                //System.out.println("write isMerge=" + isMerge + " 2ndPass=" + addOnSecondPass);

                                // Gather our own stats:
                                Terms terms = fields.terms("body");
                                assert terms != null;

                                TermsEnum termsEnum = terms.iterator();
                                PostingsEnum docs = null;
                                while (termsEnum.next() != null) {
                                    BytesRef term = termsEnum.term();
                                    // TODO: also sometimes ask for payloads/offsets?
                                    boolean noPositions = random().nextBoolean();
                                    if (noPositions) {
                                        docs = termsEnum.postings(docs, PostingsEnum.FREQS);
                                    } else {
                                        docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
                                    }
                                    int docFreq = 0;
                                    long totalTermFreq = 0;
                                    while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
                                        docFreq++;
                                        totalTermFreq += docs.freq();
                                        int limit = TestUtil.nextInt(random(), 1, docs.freq());
                                        if (!noPositions) {
                                            for (int i = 0; i < limit; i++) {
                                                docs.nextPosition();
                                            }
                                        }
                                    }

                                    String termString = term.utf8ToString();

                                    // During merge we should only see terms
                                    // we had already seen during a
                                    // previous flush:
                                    assertTrue(isMerge == false || termFreqs.containsKey(termString));

                                    if (isMerge == false) {
                                        if (addOnSecondPass == false) {
                                            TermFreqs tf = termFreqs.get(termString);
                                            if (tf == null) {
                                                tf = new TermFreqs();
                                                termFreqs.put(termString, tf);
                                            }
                                            tf.docFreq += docFreq;
                                            tf.totalTermFreq += totalTermFreq;
                                            sumDocFreq.addAndGet(docFreq);
                                            sumTotalTermFreq.addAndGet(totalTermFreq);
                                        } else if (termFreqs.containsKey(termString) == false) {
                                            // Add placeholder (2nd pass will
                                            // set its counts):
                                            termFreqs.put(termString, new TermFreqs());
                                        }
                                    }
                                }

                                // Also test seeking the TermsEnum:
                                for (String term : termFreqs.keySet()) {
                                    if (termsEnum.seekExact(new BytesRef(term))) {
                                        // TODO: also sometimes ask for payloads/offsets?
                                        boolean noPositions = random().nextBoolean();
                                        if (noPositions) {
                                            docs = termsEnum.postings(docs, PostingsEnum.FREQS);
                                        } else {
                                            docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
                                        }

                                        int docFreq = 0;
                                        long totalTermFreq = 0;
                                        while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
                                            docFreq++;
                                            totalTermFreq += docs.freq();
                                            int limit = TestUtil.nextInt(random(), 1, docs.freq());
                                            if (!noPositions) {
                                                for (int i = 0; i < limit; i++) {
                                                    docs.nextPosition();
                                                }
                                            }
                                        }

                                        if (isMerge == false && addOnSecondPass) {
                                            TermFreqs tf = termFreqs.get(term);
                                            assert tf != null;
                                            tf.docFreq += docFreq;
                                            tf.totalTermFreq += totalTermFreq;
                                            sumDocFreq.addAndGet(docFreq);
                                            sumTotalTermFreq.addAndGet(totalTermFreq);
                                        }

                                        //System.out.println("  term=" + term + " docFreq=" + docFreq + " ttDF=" + termToDocFreq.get(term));
                                        assertTrue(docFreq <= termFreqs.get(term).docFreq);
                                        assertTrue(totalTermFreq <= termFreqs.get(term).totalTermFreq);
                                    }
                                }

                                // Also test seekCeil
                                for (int iter = 0; iter < 10; iter++) {
                                    BytesRef term = new BytesRef(
                                            TestUtil.randomRealisticUnicodeString(random()));
                                    SeekStatus status = termsEnum.seekCeil(term);
                                    if (status == SeekStatus.NOT_FOUND) {
                                        assertTrue(term.compareTo(termsEnum.term()) < 0);
                                    }
                                }
                            }

                            @Override
                            public void close() throws IOException {
                                fieldsConsumer.close();
                            }
                        };
                    }

                    @Override
                    public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
                        return defaultPostingsFormat.fieldsProducer(state);
                    }
                };
            } else {
                return defaultPostingsFormat;
            }
        }
    });

    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);

    LineFileDocs docs = new LineFileDocs(random());
    int bytesToIndex = atLeast(100) * 1024;
    int bytesIndexed = 0;
    while (bytesIndexed < bytesToIndex) {
        Document doc = docs.nextDoc();
        w.addDocument(doc);
        bytesIndexed += RamUsageTester.sizeOf(doc);
    }

    IndexReader r = w.getReader();
    w.close();

    Terms terms = MultiFields.getTerms(r, "body");
    assertEquals(sumDocFreq.get(), terms.getSumDocFreq());
    assertEquals(sumTotalTermFreq.get(), terms.getSumTotalTermFreq());

    TermsEnum termsEnum = terms.iterator();
    long termCount = 0;
    boolean supportsOrds = true;
    while (termsEnum.next() != null) {
        BytesRef term = termsEnum.term();
        assertEquals(termFreqs.get(term.utf8ToString()).docFreq, termsEnum.docFreq());
        assertEquals(termFreqs.get(term.utf8ToString()).totalTermFreq, termsEnum.totalTermFreq());
        if (supportsOrds) {
            long ord;
            try {
                ord = termsEnum.ord();
            } catch (UnsupportedOperationException uoe) {
                supportsOrds = false;
                ord = -1;
            }
            if (ord != -1) {
                assertEquals(termCount, ord);
            }
        }
        termCount++;
    }
    assertEquals(termFreqs.size(), termCount);

    r.close();
    dir.close();
}

From source file:org.codelibs.elasticsearch.search.suggest.completion2x.AnalyzingCompletionLookupProvider.java

License:Apache License

@Override
public FieldsConsumer consumer(final IndexOutput output) throws IOException {
    CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION_LATEST);
    return new FieldsConsumer() {
        private Map<String, Long> fieldOffsets = new HashMap<>();

        @Override//www . j  av  a2  s  .  c om
        public void close() throws IOException {
            try {
                /*
                 * write the offsets per field such that we know where
                 * we need to load the FSTs from
                 */
                long pointer = output.getFilePointer();
                output.writeVInt(fieldOffsets.size());
                for (Map.Entry<String, Long> entry : fieldOffsets.entrySet()) {
                    output.writeString(entry.getKey());
                    output.writeVLong(entry.getValue());
                }
                output.writeLong(pointer);
                CodecUtil.writeFooter(output);
            } finally {
                IOUtils.close(output);
            }
        }

        @Override
        public void write(Fields fields) throws IOException {
            for (String field : fields) {
                Terms terms = fields.terms(field);
                if (terms == null) {
                    continue;
                }
                terms.iterator();
                new SuggestPayload();
                throw new UnsupportedOperationException("QueryBuilders does not support this operation.");
                //                    final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder(
                //                        maxSurfaceFormsPerAnalyzedForm, hasPayloads, XAnalyzingSuggester.PAYLOAD_SEP);
                //                    int docCount = 0;
                //                    while (true) {
                //                        BytesRef term = termsEnum.next();
                //                        if (term == null) {
                //                            break;
                //                        }
                //                        docsEnum = termsEnum.postings(docsEnum, PostingsEnum.PAYLOADS);
                //                        builder.startTerm(term);
                //                        int docFreq = 0;
                //                        while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                //                            for (int i = 0; i < docsEnum.freq(); i++) {
                //                                final int position = docsEnum.nextPosition();
                //                                AnalyzingCompletionLookupProvider.this.parsePayload(docsEnum.getPayload(), spare);
                //                                builder.addSurface(spare.surfaceForm.get(), spare.payload.get(), spare.weight);
                //                                // multi fields have the same surface form so we sum up here
                //                                maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1);
                //                            }
                //                            docFreq++;
                //                            docCount = Math.max(docCount, docsEnum.docID()+1);
                //                        }
                //                        builder.finishTerm(docFreq);
                //                    }
                //                    /*
                //                     * Here we are done processing the field and we can
                //                     * buid the FST and write it to disk.
                //                     */
                //                    FST<Pair<Long, BytesRef>> build = builder.build();
                //                    assert build != null || docCount == 0: "the FST is null but docCount is != 0 actual value: [" + docCount + "]";
                //                    /*
                //                     * it's possible that the FST is null if we have 2 segments that get merged
                //                     * and all docs that have a value in this field are deleted. This will cause
                //                     * a consumer to be created but it doesn't consume any values causing the FSTBuilder
                //                     * to return null.
                //                     */
                //                    if (build != null) {
                //                        fieldOffsets.put(field, output.getFilePointer());
                //                        build.save(output);
                //                        /* write some more meta-info */
                //                        output.writeVInt(maxAnalyzedPathsForOneInput);
                //                        output.writeVInt(maxSurfaceFormsPerAnalyzedForm);
                //                        output.writeInt(maxGraphExpansions); // can be negative
                //                        int options = 0;
                //                        options |= preserveSep ? SERIALIZE_PRESERVE_SEPARATORS : 0;
                //                        options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0;
                //                        options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0;
                //                        output.writeVInt(options);
                //                        output.writeVInt(XAnalyzingSuggester.SEP_LABEL);
                //                        output.writeVInt(XAnalyzingSuggester.END_BYTE);
                //                        output.writeVInt(XAnalyzingSuggester.PAYLOAD_SEP);
                //                        output.writeVInt(XAnalyzingSuggester.HOLE_CHARACTER);
                //                    }
            }
        }
    };
}

From source file:org.elasticsearch.index.codec.postingformat.Elasticsearch090RWPostingsFormat.java

License:Apache License

@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
    final PostingsFormat delegate = getDefaultWrapped();
    final BloomFilteredFieldsConsumer fieldsConsumer = new BloomFilterPostingsFormat(delegate,
            BloomFilter.Factory.DEFAULT) {
        @Override/*from   w  ww .j  a  v a  2  s.  c  o  m*/
        public BloomFilteredFieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
            return new BloomFilteredFieldsConsumer(delegate.fieldsConsumer(state), state, delegate);
        }
    }.fieldsConsumer(state);
    return new FieldsConsumer() {

        @Override
        public void write(Fields fields) throws IOException {

            Fields maskedFields = new FilterLeafReader.FilterFields(fields) {
                @Override
                public Iterator<String> iterator() {
                    return Iterators.filter(this.in.iterator(), Predicates.not(UID_FIELD_FILTER));
                }
            };
            fieldsConsumer.getDelegate().write(maskedFields);
            maskedFields = new FilterLeafReader.FilterFields(fields) {
                @Override
                public Iterator<String> iterator() {
                    return Iterators.singletonIterator(UidFieldMapper.NAME);
                }
            };
            // only go through bloom for the UID field
            fieldsConsumer.write(maskedFields);
        }

        @Override
        public void close() throws IOException {
            fieldsConsumer.close();
        }
    };
}

From source file:org.elasticsearch.index.codec.postingsformat.Elasticsearch090PostingsFormat.java

License:Apache License

@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
    final BloomFilteredFieldsConsumer fieldsConsumer = bloomPostings.fieldsConsumer(state);
    return new FieldsConsumer() {

        @Override/*from   w  ww.j a v a2  s  . c  o m*/
        public void close() throws IOException {
            fieldsConsumer.close();
        }

        @Override
        public TermsConsumer addField(FieldInfo field) throws IOException {
            if (UidFieldMapper.NAME.equals(field.name)) {
                // only go through bloom for the UID field
                return fieldsConsumer.addField(field);
            }
            return fieldsConsumer.getDelegate().addField(field);
        }
    };
}

From source file:org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProvider.java

License:Apache License

@Override
public FieldsConsumer consumer(final IndexOutput output) throws IOException {
    CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION_LATEST);
    return new FieldsConsumer() {
        private Map<FieldInfo, Long> fieldOffsets = new HashMap<FieldInfo, Long>();

        @Override/*from  w  w  w.ja  va2 s  .c o  m*/
        public void close() throws IOException {
            try { /*
                   * write the offsets per field such that we know where
                   * we need to load the FSTs from
                   */
                long pointer = output.getFilePointer();
                output.writeVInt(fieldOffsets.size());
                for (Map.Entry<FieldInfo, Long> entry : fieldOffsets.entrySet()) {
                    output.writeString(entry.getKey().name);
                    output.writeVLong(entry.getValue());
                }
                output.writeLong(pointer);
                output.flush();
            } finally {
                IOUtils.close(output);
            }
        }

        @Override
        public TermsConsumer addField(final FieldInfo field) throws IOException {

            return new TermsConsumer() {
                final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder(
                        maxSurfaceFormsPerAnalyzedForm, hasPayloads, XAnalyzingSuggester.PAYLOAD_SEP);
                final CompletionPostingsConsumer postingsConsumer = new CompletionPostingsConsumer(
                        AnalyzingCompletionLookupProvider.this, builder);

                @Override
                public PostingsConsumer startTerm(BytesRef text) throws IOException {
                    builder.startTerm(text);
                    return postingsConsumer;
                }

                @Override
                public Comparator<BytesRef> getComparator() throws IOException {
                    return BytesRef.getUTF8SortedAsUnicodeComparator();
                }

                @Override
                public void finishTerm(BytesRef text, TermStats stats) throws IOException {
                    builder.finishTerm(stats.docFreq); // use  doc freq as a fallback
                }

                @Override
                public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
                    /*
                     * Here we are done processing the field and we can
                     * buid the FST and write it to disk.
                     */
                    FST<Pair<Long, BytesRef>> build = builder.build();
                    assert build != null
                            || docCount == 0 : "the FST is null but docCount is != 0 actual value: [" + docCount
                                    + "]";
                    /*
                     * it's possible that the FST is null if we have 2 segments that get merged
                     * and all docs that have a value in this field are deleted. This will cause
                     * a consumer to be created but it doesn't consume any values causing the FSTBuilder
                     * to return null.
                     */
                    if (build != null) {
                        fieldOffsets.put(field, output.getFilePointer());
                        build.save(output);
                        /* write some more meta-info */
                        output.writeVInt(postingsConsumer.getMaxAnalyzedPathsForOneInput());
                        output.writeVInt(maxSurfaceFormsPerAnalyzedForm);
                        output.writeInt(maxGraphExpansions); // can be negative
                        int options = 0;
                        options |= preserveSep ? SERIALIZE_PRESERVE_SEPERATORS : 0;
                        options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0;
                        options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0;
                        output.writeVInt(options);
                        output.writeVInt(XAnalyzingSuggester.SEP_LABEL);
                        output.writeVInt(XAnalyzingSuggester.END_BYTE);
                        output.writeVInt(XAnalyzingSuggester.PAYLOAD_SEP);
                        output.writeVInt(XAnalyzingSuggester.HOLE_CHARACTER);
                    }
                }
            };
        }
    };
}

From source file:org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProviderV1.java

License:Apache License

@Override
public FieldsConsumer consumer(final IndexOutput output) throws IOException {
    CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION);
    return new FieldsConsumer() {
        private Map<FieldInfo, Long> fieldOffsets = new HashMap<FieldInfo, Long>();

        @Override/*from w w  w .  j av  a  2  s  .  c  o  m*/
        public void close() throws IOException {
            try { /*
                   * write the offsets per field such that we know where
                   * we need to load the FSTs from
                   */
                long pointer = output.getFilePointer();
                output.writeVInt(fieldOffsets.size());
                for (Map.Entry<FieldInfo, Long> entry : fieldOffsets.entrySet()) {
                    output.writeString(entry.getKey().name);
                    output.writeVLong(entry.getValue());
                }
                output.writeLong(pointer);
                output.flush();
            } finally {
                IOUtils.close(output);
            }
        }

        @Override
        public TermsConsumer addField(final FieldInfo field) throws IOException {

            return new TermsConsumer() {
                final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder(
                        maxSurfaceFormsPerAnalyzedForm, hasPayloads, PAYLOAD_SEP);
                final CompletionPostingsConsumer postingsConsumer = new CompletionPostingsConsumer(
                        AnalyzingCompletionLookupProviderV1.this, builder);

                @Override
                public PostingsConsumer startTerm(BytesRef text) throws IOException {
                    builder.startTerm(text);
                    return postingsConsumer;
                }

                @Override
                public Comparator<BytesRef> getComparator() throws IOException {
                    return BytesRef.getUTF8SortedAsUnicodeComparator();
                }

                @Override
                public void finishTerm(BytesRef text, TermStats stats) throws IOException {
                    builder.finishTerm(stats.docFreq); // use  doc freq as a fallback
                }

                @Override
                public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
                    /*
                     * Here we are done processing the field and we can
                     * buid the FST and write it to disk.
                     */
                    FST<Pair<Long, BytesRef>> build = builder.build();
                    assert build != null
                            || docCount == 0 : "the FST is null but docCount is != 0 actual value: [" + docCount
                                    + "]";
                    /*
                     * it's possible that the FST is null if we have 2 segments that get merged
                     * and all docs that have a value in this field are deleted. This will cause
                     * a consumer to be created but it doesn't consume any values causing the FSTBuilder
                     * to return null.
                     */
                    if (build != null) {
                        fieldOffsets.put(field, output.getFilePointer());
                        build.save(output);
                        /* write some more meta-info */
                        output.writeVInt(postingsConsumer.getMaxAnalyzedPathsForOneInput());
                        output.writeVInt(maxSurfaceFormsPerAnalyzedForm);
                        output.writeInt(maxGraphExpansions); // can be negative
                        int options = 0;
                        options |= preserveSep ? SERIALIZE_PRESERVE_SEPERATORS : 0;
                        options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0;
                        options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0;
                        output.writeVInt(options);
                    }
                }
            };
        }
    };
}

From source file:org.elasticsearch.search.suggest.completion.old.AnalyzingCompletionLookupProvider.java

License:Apache License

@Override
public FieldsConsumer consumer(final IndexOutput output) throws IOException {
    CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION_LATEST);
    return new FieldsConsumer() {
        private Map<String, Long> fieldOffsets = new HashMap<>();

        @Override/*from   w  ww.j  a v  a  2  s  .c om*/
        public void close() throws IOException {
            try {
                /*
                 * write the offsets per field such that we know where
                 * we need to load the FSTs from
                 */
                long pointer = output.getFilePointer();
                output.writeVInt(fieldOffsets.size());
                for (Map.Entry<String, Long> entry : fieldOffsets.entrySet()) {
                    output.writeString(entry.getKey());
                    output.writeVLong(entry.getValue());
                }
                output.writeLong(pointer);
                CodecUtil.writeFooter(output);
            } finally {
                IOUtils.close(output);
            }
        }

        @Override
        public void write(Fields fields) throws IOException {
            for (String field : fields) {
                Terms terms = fields.terms(field);
                if (terms == null) {
                    continue;
                }
                TermsEnum termsEnum = terms.iterator();
                PostingsEnum docsEnum = null;
                final SuggestPayload spare = new SuggestPayload();
                int maxAnalyzedPathsForOneInput = 0;
                final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder(
                        maxSurfaceFormsPerAnalyzedForm, hasPayloads, XAnalyzingSuggester.PAYLOAD_SEP);
                int docCount = 0;
                while (true) {
                    BytesRef term = termsEnum.next();
                    if (term == null) {
                        break;
                    }
                    docsEnum = termsEnum.postings(null, docsEnum, PostingsEnum.PAYLOADS);
                    builder.startTerm(term);
                    int docFreq = 0;
                    while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                        for (int i = 0; i < docsEnum.freq(); i++) {
                            final int position = docsEnum.nextPosition();
                            AnalyzingCompletionLookupProvider.this.parsePayload(docsEnum.getPayload(), spare);
                            builder.addSurface(spare.surfaceForm.get(), spare.payload.get(), spare.weight);
                            // multi fields have the same surface form so we sum up here
                            maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1);
                        }
                        docFreq++;
                        docCount = Math.max(docCount, docsEnum.docID() + 1);
                    }
                    builder.finishTerm(docFreq);
                }
                /*
                 * Here we are done processing the field and we can
                 * buid the FST and write it to disk.
                 */
                FST<Pair<Long, BytesRef>> build = builder.build();
                assert build != null || docCount == 0 : "the FST is null but docCount is != 0 actual value: ["
                        + docCount + "]";
                /*
                 * it's possible that the FST is null if we have 2 segments that get merged
                 * and all docs that have a value in this field are deleted. This will cause
                 * a consumer to be created but it doesn't consume any values causing the FSTBuilder
                 * to return null.
                 */
                if (build != null) {
                    fieldOffsets.put(field, output.getFilePointer());
                    build.save(output);
                    /* write some more meta-info */
                    output.writeVInt(maxAnalyzedPathsForOneInput);
                    output.writeVInt(maxSurfaceFormsPerAnalyzedForm);
                    output.writeInt(maxGraphExpansions); // can be negative
                    int options = 0;
                    options |= preserveSep ? SERIALIZE_PRESERVE_SEPARATORS : 0;
                    options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0;
                    options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0;
                    output.writeVInt(options);
                    output.writeVInt(XAnalyzingSuggester.SEP_LABEL);
                    output.writeVInt(XAnalyzingSuggester.END_BYTE);
                    output.writeVInt(XAnalyzingSuggester.PAYLOAD_SEP);
                    output.writeVInt(XAnalyzingSuggester.HOLE_CHARACTER);
                }
            }
        }
    };
}

From source file:org.elasticsearch.search.suggest.completion.old.AnalyzingCompletionLookupProviderV1.java

License:Apache License

@Override
public FieldsConsumer consumer(final IndexOutput output) throws IOException {
    // TODO write index header?
    CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION);
    return new FieldsConsumer() {
        private Map<String, Long> fieldOffsets = new HashMap<>();

        @Override//  w  ww . j  ava 2  s.  c  o m
        public void close() throws IOException {
            try { /*
                   * write the offsets per field such that we know where
                   * we need to load the FSTs from
                   */
                long pointer = output.getFilePointer();
                output.writeVInt(fieldOffsets.size());
                for (Map.Entry<String, Long> entry : fieldOffsets.entrySet()) {
                    output.writeString(entry.getKey());
                    output.writeVLong(entry.getValue());
                }
                output.writeLong(pointer);
            } finally {
                IOUtils.close(output);
            }
        }

        @Override
        public void write(Fields fields) throws IOException {
            for (String field : fields) {
                Terms terms = fields.terms(field);
                if (terms == null) {
                    continue;
                }
                TermsEnum termsEnum = terms.iterator();
                PostingsEnum docsEnum = null;
                final SuggestPayload spare = new SuggestPayload();
                int maxAnalyzedPathsForOneInput = 0;
                final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder(
                        maxSurfaceFormsPerAnalyzedForm, hasPayloads, XAnalyzingSuggester.PAYLOAD_SEP);
                int docCount = 0;
                while (true) {
                    BytesRef term = termsEnum.next();
                    if (term == null) {
                        break;
                    }
                    docsEnum = termsEnum.postings(null, docsEnum, PostingsEnum.PAYLOADS);
                    builder.startTerm(term);
                    int docFreq = 0;
                    while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                        for (int i = 0; i < docsEnum.freq(); i++) {
                            final int position = docsEnum.nextPosition();
                            AnalyzingCompletionLookupProviderV1.this.parsePayload(docsEnum.getPayload(), spare);
                            builder.addSurface(spare.surfaceForm.get(), spare.payload.get(), spare.weight);
                            // multi fields have the same surface form so we sum up here
                            maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1);
                        }
                        docFreq++;
                        docCount = Math.max(docCount, docsEnum.docID() + 1);
                    }
                    builder.finishTerm(docFreq);
                }
                /*
                 * Here we are done processing the field and we can
                 * buid the FST and write it to disk.
                 */
                FST<Pair<Long, BytesRef>> build = builder.build();
                assert build != null || docCount == 0 : "the FST is null but docCount is != 0 actual value: ["
                        + docCount + "]";
                /*
                 * it's possible that the FST is null if we have 2 segments that get merged
                 * and all docs that have a value in this field are deleted. This will cause
                 * a consumer to be created but it doesn't consume any values causing the FSTBuilder
                 * to return null.
                 */
                if (build != null) {
                    fieldOffsets.put(field, output.getFilePointer());
                    build.save(output);
                    /* write some more meta-info */
                    output.writeVInt(maxAnalyzedPathsForOneInput);
                    output.writeVInt(maxSurfaceFormsPerAnalyzedForm);
                    output.writeInt(maxGraphExpansions); // can be negative
                    int options = 0;
                    options |= preserveSep ? SERIALIZE_PRESERVE_SEPARATORS : 0;
                    options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0;
                    options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0;
                    output.writeVInt(options);
                }
            }
        }
    };
}

From source file:org.elasticsearch.search.suggest.completion2x.AnalyzingCompletionLookupProvider.java

License:Apache License

@Override
public FieldsConsumer consumer(final IndexOutput output) throws IOException {
    CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION_LATEST);
    return new FieldsConsumer() {
        private Map<String, Long> fieldOffsets = new HashMap<>();

        @Override/*w w  w  .  ja  v  a2  s . co  m*/
        public void close() throws IOException {
            try {
                /*
                 * write the offsets per field such that we know where
                 * we need to load the FSTs from
                 */
                long pointer = output.getFilePointer();
                output.writeVInt(fieldOffsets.size());
                for (Map.Entry<String, Long> entry : fieldOffsets.entrySet()) {
                    output.writeString(entry.getKey());
                    output.writeVLong(entry.getValue());
                }
                output.writeLong(pointer);
                CodecUtil.writeFooter(output);
            } finally {
                IOUtils.close(output);
            }
        }

        @Override
        public void write(Fields fields) throws IOException {
            for (String field : fields) {
                Terms terms = fields.terms(field);
                if (terms == null) {
                    continue;
                }
                TermsEnum termsEnum = terms.iterator();
                PostingsEnum docsEnum = null;
                final SuggestPayload spare = new SuggestPayload();
                int maxAnalyzedPathsForOneInput = 0;
                final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder(
                        maxSurfaceFormsPerAnalyzedForm, hasPayloads, XAnalyzingSuggester.PAYLOAD_SEP);
                int docCount = 0;
                while (true) {
                    BytesRef term = termsEnum.next();
                    if (term == null) {
                        break;
                    }
                    docsEnum = termsEnum.postings(docsEnum, PostingsEnum.PAYLOADS);
                    builder.startTerm(term);
                    int docFreq = 0;
                    while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                        for (int i = 0; i < docsEnum.freq(); i++) {
                            final int position = docsEnum.nextPosition();
                            AnalyzingCompletionLookupProvider.this.parsePayload(docsEnum.getPayload(), spare);
                            builder.addSurface(spare.surfaceForm.get(), spare.payload.get(), spare.weight);
                            // multi fields have the same surface form so we sum up here
                            maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1);
                        }
                        docFreq++;
                        docCount = Math.max(docCount, docsEnum.docID() + 1);
                    }
                    builder.finishTerm(docFreq);
                }
                /*
                 * Here we are done processing the field and we can
                 * buid the FST and write it to disk.
                 */
                FST<Pair<Long, BytesRef>> build = builder.build();
                assert build != null || docCount == 0 : "the FST is null but docCount is != 0 actual value: ["
                        + docCount + "]";
                /*
                 * it's possible that the FST is null if we have 2 segments that get merged
                 * and all docs that have a value in this field are deleted. This will cause
                 * a consumer to be created but it doesn't consume any values causing the FSTBuilder
                 * to return null.
                 */
                if (build != null) {
                    fieldOffsets.put(field, output.getFilePointer());
                    build.save(output);
                    /* write some more meta-info */
                    output.writeVInt(maxAnalyzedPathsForOneInput);
                    output.writeVInt(maxSurfaceFormsPerAnalyzedForm);
                    output.writeInt(maxGraphExpansions); // can be negative
                    int options = 0;
                    options |= preserveSep ? SERIALIZE_PRESERVE_SEPARATORS : 0;
                    options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0;
                    options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0;
                    output.writeVInt(options);
                    output.writeVInt(XAnalyzingSuggester.SEP_LABEL);
                    output.writeVInt(XAnalyzingSuggester.END_BYTE);
                    output.writeVInt(XAnalyzingSuggester.PAYLOAD_SEP);
                    output.writeVInt(XAnalyzingSuggester.HOLE_CHARACTER);
                }
            }
        }
    };
}