Example usage for org.apache.lucene.util.fst FST save

List of usage examples for org.apache.lucene.util.fst FST save

Introduction

In this page you can find the example usage for org.apache.lucene.util.fst FST save.

Prototype

public void save(final Path path) throws IOException 

Source Link

Document

Writes an automaton to a file.

Usage

From source file:org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProvider.java

License:Apache License

@Override
public FieldsConsumer consumer(final IndexOutput output) throws IOException {
    CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION_LATEST);
    return new FieldsConsumer() {
        private Map<FieldInfo, Long> fieldOffsets = new HashMap<FieldInfo, Long>();

        @Override// www . j a v  a 2s  .  c om
        public void close() throws IOException {
            try { /*
                   * write the offsets per field such that we know where
                   * we need to load the FSTs from
                   */
                long pointer = output.getFilePointer();
                output.writeVInt(fieldOffsets.size());
                for (Map.Entry<FieldInfo, Long> entry : fieldOffsets.entrySet()) {
                    output.writeString(entry.getKey().name);
                    output.writeVLong(entry.getValue());
                }
                output.writeLong(pointer);
                output.flush();
            } finally {
                IOUtils.close(output);
            }
        }

        @Override
        public TermsConsumer addField(final FieldInfo field) throws IOException {

            return new TermsConsumer() {
                final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder(
                        maxSurfaceFormsPerAnalyzedForm, hasPayloads, XAnalyzingSuggester.PAYLOAD_SEP);
                final CompletionPostingsConsumer postingsConsumer = new CompletionPostingsConsumer(
                        AnalyzingCompletionLookupProvider.this, builder);

                @Override
                public PostingsConsumer startTerm(BytesRef text) throws IOException {
                    builder.startTerm(text);
                    return postingsConsumer;
                }

                @Override
                public Comparator<BytesRef> getComparator() throws IOException {
                    return BytesRef.getUTF8SortedAsUnicodeComparator();
                }

                @Override
                public void finishTerm(BytesRef text, TermStats stats) throws IOException {
                    builder.finishTerm(stats.docFreq); // use  doc freq as a fallback
                }

                @Override
                public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
                    /*
                     * Here we are done processing the field and we can
                     * buid the FST and write it to disk.
                     */
                    FST<Pair<Long, BytesRef>> build = builder.build();
                    assert build != null
                            || docCount == 0 : "the FST is null but docCount is != 0 actual value: [" + docCount
                                    + "]";
                    /*
                     * it's possible that the FST is null if we have 2 segments that get merged
                     * and all docs that have a value in this field are deleted. This will cause
                     * a consumer to be created but it doesn't consume any values causing the FSTBuilder
                     * to return null.
                     */
                    if (build != null) {
                        fieldOffsets.put(field, output.getFilePointer());
                        build.save(output);
                        /* write some more meta-info */
                        output.writeVInt(postingsConsumer.getMaxAnalyzedPathsForOneInput());
                        output.writeVInt(maxSurfaceFormsPerAnalyzedForm);
                        output.writeInt(maxGraphExpansions); // can be negative
                        int options = 0;
                        options |= preserveSep ? SERIALIZE_PRESERVE_SEPERATORS : 0;
                        options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0;
                        options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0;
                        output.writeVInt(options);
                        output.writeVInt(XAnalyzingSuggester.SEP_LABEL);
                        output.writeVInt(XAnalyzingSuggester.END_BYTE);
                        output.writeVInt(XAnalyzingSuggester.PAYLOAD_SEP);
                        output.writeVInt(XAnalyzingSuggester.HOLE_CHARACTER);
                    }
                }
            };
        }
    };
}

From source file:org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProviderV1.java

License:Apache License

@Override
public FieldsConsumer consumer(final IndexOutput output) throws IOException {
    CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION);
    return new FieldsConsumer() {
        private Map<FieldInfo, Long> fieldOffsets = new HashMap<FieldInfo, Long>();

        @Override//from  w w w . j  av  a  2 s  .  c  om
        public void close() throws IOException {
            try { /*
                   * write the offsets per field such that we know where
                   * we need to load the FSTs from
                   */
                long pointer = output.getFilePointer();
                output.writeVInt(fieldOffsets.size());
                for (Map.Entry<FieldInfo, Long> entry : fieldOffsets.entrySet()) {
                    output.writeString(entry.getKey().name);
                    output.writeVLong(entry.getValue());
                }
                output.writeLong(pointer);
                output.flush();
            } finally {
                IOUtils.close(output);
            }
        }

        @Override
        public TermsConsumer addField(final FieldInfo field) throws IOException {

            return new TermsConsumer() {
                final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder(
                        maxSurfaceFormsPerAnalyzedForm, hasPayloads, PAYLOAD_SEP);
                final CompletionPostingsConsumer postingsConsumer = new CompletionPostingsConsumer(
                        AnalyzingCompletionLookupProviderV1.this, builder);

                @Override
                public PostingsConsumer startTerm(BytesRef text) throws IOException {
                    builder.startTerm(text);
                    return postingsConsumer;
                }

                @Override
                public Comparator<BytesRef> getComparator() throws IOException {
                    return BytesRef.getUTF8SortedAsUnicodeComparator();
                }

                @Override
                public void finishTerm(BytesRef text, TermStats stats) throws IOException {
                    builder.finishTerm(stats.docFreq); // use  doc freq as a fallback
                }

                @Override
                public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
                    /*
                     * Here we are done processing the field and we can
                     * buid the FST and write it to disk.
                     */
                    FST<Pair<Long, BytesRef>> build = builder.build();
                    assert build != null
                            || docCount == 0 : "the FST is null but docCount is != 0 actual value: [" + docCount
                                    + "]";
                    /*
                     * it's possible that the FST is null if we have 2 segments that get merged
                     * and all docs that have a value in this field are deleted. This will cause
                     * a consumer to be created but it doesn't consume any values causing the FSTBuilder
                     * to return null.
                     */
                    if (build != null) {
                        fieldOffsets.put(field, output.getFilePointer());
                        build.save(output);
                        /* write some more meta-info */
                        output.writeVInt(postingsConsumer.getMaxAnalyzedPathsForOneInput());
                        output.writeVInt(maxSurfaceFormsPerAnalyzedForm);
                        output.writeInt(maxGraphExpansions); // can be negative
                        int options = 0;
                        options |= preserveSep ? SERIALIZE_PRESERVE_SEPERATORS : 0;
                        options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0;
                        options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0;
                        output.writeVInt(options);
                    }
                }
            };
        }
    };
}

From source file:org.elasticsearch.search.suggest.completion.old.AnalyzingCompletionLookupProvider.java

License:Apache License

@Override
public FieldsConsumer consumer(final IndexOutput output) throws IOException {
    CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION_LATEST);
    return new FieldsConsumer() {
        private Map<String, Long> fieldOffsets = new HashMap<>();

        @Override//from  w ww.  jav a  2s. c o  m
        public void close() throws IOException {
            try {
                /*
                 * write the offsets per field such that we know where
                 * we need to load the FSTs from
                 */
                long pointer = output.getFilePointer();
                output.writeVInt(fieldOffsets.size());
                for (Map.Entry<String, Long> entry : fieldOffsets.entrySet()) {
                    output.writeString(entry.getKey());
                    output.writeVLong(entry.getValue());
                }
                output.writeLong(pointer);
                CodecUtil.writeFooter(output);
            } finally {
                IOUtils.close(output);
            }
        }

        @Override
        public void write(Fields fields) throws IOException {
            for (String field : fields) {
                Terms terms = fields.terms(field);
                if (terms == null) {
                    continue;
                }
                TermsEnum termsEnum = terms.iterator();
                PostingsEnum docsEnum = null;
                final SuggestPayload spare = new SuggestPayload();
                int maxAnalyzedPathsForOneInput = 0;
                final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder(
                        maxSurfaceFormsPerAnalyzedForm, hasPayloads, XAnalyzingSuggester.PAYLOAD_SEP);
                int docCount = 0;
                while (true) {
                    BytesRef term = termsEnum.next();
                    if (term == null) {
                        break;
                    }
                    docsEnum = termsEnum.postings(null, docsEnum, PostingsEnum.PAYLOADS);
                    builder.startTerm(term);
                    int docFreq = 0;
                    while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                        for (int i = 0; i < docsEnum.freq(); i++) {
                            final int position = docsEnum.nextPosition();
                            AnalyzingCompletionLookupProvider.this.parsePayload(docsEnum.getPayload(), spare);
                            builder.addSurface(spare.surfaceForm.get(), spare.payload.get(), spare.weight);
                            // multi fields have the same surface form so we sum up here
                            maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1);
                        }
                        docFreq++;
                        docCount = Math.max(docCount, docsEnum.docID() + 1);
                    }
                    builder.finishTerm(docFreq);
                }
                /*
                 * Here we are done processing the field and we can
                 * buid the FST and write it to disk.
                 */
                FST<Pair<Long, BytesRef>> build = builder.build();
                assert build != null || docCount == 0 : "the FST is null but docCount is != 0 actual value: ["
                        + docCount + "]";
                /*
                 * it's possible that the FST is null if we have 2 segments that get merged
                 * and all docs that have a value in this field are deleted. This will cause
                 * a consumer to be created but it doesn't consume any values causing the FSTBuilder
                 * to return null.
                 */
                if (build != null) {
                    fieldOffsets.put(field, output.getFilePointer());
                    build.save(output);
                    /* write some more meta-info */
                    output.writeVInt(maxAnalyzedPathsForOneInput);
                    output.writeVInt(maxSurfaceFormsPerAnalyzedForm);
                    output.writeInt(maxGraphExpansions); // can be negative
                    int options = 0;
                    options |= preserveSep ? SERIALIZE_PRESERVE_SEPARATORS : 0;
                    options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0;
                    options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0;
                    output.writeVInt(options);
                    output.writeVInt(XAnalyzingSuggester.SEP_LABEL);
                    output.writeVInt(XAnalyzingSuggester.END_BYTE);
                    output.writeVInt(XAnalyzingSuggester.PAYLOAD_SEP);
                    output.writeVInt(XAnalyzingSuggester.HOLE_CHARACTER);
                }
            }
        }
    };
}

From source file:org.elasticsearch.search.suggest.completion.old.AnalyzingCompletionLookupProviderV1.java

License:Apache License

@Override
public FieldsConsumer consumer(final IndexOutput output) throws IOException {
    // TODO write index header?
    CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION);
    return new FieldsConsumer() {
        private Map<String, Long> fieldOffsets = new HashMap<>();

        @Override/*from  w  w  w  . ja v  a 2  s  .c  o  m*/
        public void close() throws IOException {
            try { /*
                   * write the offsets per field such that we know where
                   * we need to load the FSTs from
                   */
                long pointer = output.getFilePointer();
                output.writeVInt(fieldOffsets.size());
                for (Map.Entry<String, Long> entry : fieldOffsets.entrySet()) {
                    output.writeString(entry.getKey());
                    output.writeVLong(entry.getValue());
                }
                output.writeLong(pointer);
            } finally {
                IOUtils.close(output);
            }
        }

        @Override
        public void write(Fields fields) throws IOException {
            for (String field : fields) {
                Terms terms = fields.terms(field);
                if (terms == null) {
                    continue;
                }
                TermsEnum termsEnum = terms.iterator();
                PostingsEnum docsEnum = null;
                final SuggestPayload spare = new SuggestPayload();
                int maxAnalyzedPathsForOneInput = 0;
                final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder(
                        maxSurfaceFormsPerAnalyzedForm, hasPayloads, XAnalyzingSuggester.PAYLOAD_SEP);
                int docCount = 0;
                while (true) {
                    BytesRef term = termsEnum.next();
                    if (term == null) {
                        break;
                    }
                    docsEnum = termsEnum.postings(null, docsEnum, PostingsEnum.PAYLOADS);
                    builder.startTerm(term);
                    int docFreq = 0;
                    while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                        for (int i = 0; i < docsEnum.freq(); i++) {
                            final int position = docsEnum.nextPosition();
                            AnalyzingCompletionLookupProviderV1.this.parsePayload(docsEnum.getPayload(), spare);
                            builder.addSurface(spare.surfaceForm.get(), spare.payload.get(), spare.weight);
                            // multi fields have the same surface form so we sum up here
                            maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1);
                        }
                        docFreq++;
                        docCount = Math.max(docCount, docsEnum.docID() + 1);
                    }
                    builder.finishTerm(docFreq);
                }
                /*
                 * Here we are done processing the field and we can
                 * buid the FST and write it to disk.
                 */
                FST<Pair<Long, BytesRef>> build = builder.build();
                assert build != null || docCount == 0 : "the FST is null but docCount is != 0 actual value: ["
                        + docCount + "]";
                /*
                 * it's possible that the FST is null if we have 2 segments that get merged
                 * and all docs that have a value in this field are deleted. This will cause
                 * a consumer to be created but it doesn't consume any values causing the FSTBuilder
                 * to return null.
                 */
                if (build != null) {
                    fieldOffsets.put(field, output.getFilePointer());
                    build.save(output);
                    /* write some more meta-info */
                    output.writeVInt(maxAnalyzedPathsForOneInput);
                    output.writeVInt(maxSurfaceFormsPerAnalyzedForm);
                    output.writeInt(maxGraphExpansions); // can be negative
                    int options = 0;
                    options |= preserveSep ? SERIALIZE_PRESERVE_SEPARATORS : 0;
                    options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0;
                    options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0;
                    output.writeVInt(options);
                }
            }
        }
    };
}

From source file:org.elasticsearch.search.suggest.completion2x.AnalyzingCompletionLookupProvider.java

License:Apache License

@Override
public FieldsConsumer consumer(final IndexOutput output) throws IOException {
    CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION_LATEST);
    return new FieldsConsumer() {
        private Map<String, Long> fieldOffsets = new HashMap<>();

        @Override/*w ww  . ja  v  a2  s  .  c  om*/
        public void close() throws IOException {
            try {
                /*
                 * write the offsets per field such that we know where
                 * we need to load the FSTs from
                 */
                long pointer = output.getFilePointer();
                output.writeVInt(fieldOffsets.size());
                for (Map.Entry<String, Long> entry : fieldOffsets.entrySet()) {
                    output.writeString(entry.getKey());
                    output.writeVLong(entry.getValue());
                }
                output.writeLong(pointer);
                CodecUtil.writeFooter(output);
            } finally {
                IOUtils.close(output);
            }
        }

        @Override
        public void write(Fields fields) throws IOException {
            for (String field : fields) {
                Terms terms = fields.terms(field);
                if (terms == null) {
                    continue;
                }
                TermsEnum termsEnum = terms.iterator();
                PostingsEnum docsEnum = null;
                final SuggestPayload spare = new SuggestPayload();
                int maxAnalyzedPathsForOneInput = 0;
                final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder(
                        maxSurfaceFormsPerAnalyzedForm, hasPayloads, XAnalyzingSuggester.PAYLOAD_SEP);
                int docCount = 0;
                while (true) {
                    BytesRef term = termsEnum.next();
                    if (term == null) {
                        break;
                    }
                    docsEnum = termsEnum.postings(docsEnum, PostingsEnum.PAYLOADS);
                    builder.startTerm(term);
                    int docFreq = 0;
                    while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                        for (int i = 0; i < docsEnum.freq(); i++) {
                            final int position = docsEnum.nextPosition();
                            AnalyzingCompletionLookupProvider.this.parsePayload(docsEnum.getPayload(), spare);
                            builder.addSurface(spare.surfaceForm.get(), spare.payload.get(), spare.weight);
                            // multi fields have the same surface form so we sum up here
                            maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1);
                        }
                        docFreq++;
                        docCount = Math.max(docCount, docsEnum.docID() + 1);
                    }
                    builder.finishTerm(docFreq);
                }
                /*
                 * Here we are done processing the field and we can
                 * buid the FST and write it to disk.
                 */
                FST<Pair<Long, BytesRef>> build = builder.build();
                assert build != null || docCount == 0 : "the FST is null but docCount is != 0 actual value: ["
                        + docCount + "]";
                /*
                 * it's possible that the FST is null if we have 2 segments that get merged
                 * and all docs that have a value in this field are deleted. This will cause
                 * a consumer to be created but it doesn't consume any values causing the FSTBuilder
                 * to return null.
                 */
                if (build != null) {
                    fieldOffsets.put(field, output.getFilePointer());
                    build.save(output);
                    /* write some more meta-info */
                    output.writeVInt(maxAnalyzedPathsForOneInput);
                    output.writeVInt(maxSurfaceFormsPerAnalyzedForm);
                    output.writeInt(maxGraphExpansions); // can be negative
                    int options = 0;
                    options |= preserveSep ? SERIALIZE_PRESERVE_SEPARATORS : 0;
                    options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0;
                    options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0;
                    output.writeVInt(options);
                    output.writeVInt(XAnalyzingSuggester.SEP_LABEL);
                    output.writeVInt(XAnalyzingSuggester.END_BYTE);
                    output.writeVInt(XAnalyzingSuggester.PAYLOAD_SEP);
                    output.writeVInt(XAnalyzingSuggester.HOLE_CHARACTER);
                }
            }
        }
    };
}