Example usage for org.apache.lucene.util.fst FST FST

List of usage examples for org.apache.lucene.util.fst FST FST

Introduction

In this page you can find the example usage for org.apache.lucene.util.fst FST FST.

Prototype

public FST(DataInput in, Outputs<T> outputs) throws IOException 

Source Link

Document

Load a previously saved FST.

Usage

From source file:com.github.cstoku.neologd.unidic.lucene.analysis.ja.dict.TokenInfoDictionary.java

License:Apache License

private TokenInfoDictionary() throws IOException {
    super();/*ww w . j a va  2s .  co m*/
    InputStream is = null;
    FST<Long> fst = null;
    boolean success = false;
    try {
        is = getResource(FST_FILENAME_SUFFIX);
        is = new BufferedInputStream(is);
        fst = new FST<>(new InputStreamDataInput(is), PositiveIntOutputs.getSingleton());
        success = true;
    } finally {
        if (success) {
            IOUtils.close(is);
        } else {
            IOUtils.closeWhileHandlingException(is);
        }
    }
    // TODO: some way to configure?
    this.fst = new TokenInfoFST(fst, true);
}

From source file:com.rocana.lucene.codec.v1.RocanaFieldReader.java

License:Apache License

RocanaFieldReader(RocanaBlockTreeTermsReader parent, FieldInfo fieldInfo, long numTerms, BytesRef rootCode,
        long sumTotalTermFreq, long sumDocFreq, int docCount, long indexStartFP, int longsSize,
        IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException {
    assert numTerms > 0;
    this.fieldInfo = fieldInfo;
    //DEBUG = RocanaBlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
    this.parent = parent;
    this.numTerms = numTerms;
    this.sumTotalTermFreq = sumTotalTermFreq;
    this.sumDocFreq = sumDocFreq;
    this.docCount = docCount;
    this.indexStartFP = indexStartFP;
    this.rootCode = rootCode;
    this.longsSize = longsSize;
    this.minTerm = minTerm;
    this.maxTerm = maxTerm;
    // if (DEBUG) {
    //   System.out.println("BTTR: seg=" + segment + " field=" + fieldInfo.name + " rootBlockCode=" + rootCode + " divisor=" + indexDivisor);
    // }/*from w ww  . j a v a  2  s  .  com*/

    rootBlockFP = (new ByteArrayDataInput(rootCode.bytes, rootCode.offset, rootCode.length))
            .readVLong() >>> RocanaBlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS;

    if (indexIn != null) {
        final IndexInput clone = indexIn.clone();
        //System.out.println("start=" + indexStartFP + " field=" + fieldInfo.name);
        clone.seek(indexStartFP);
        index = new FST<>(clone, ByteSequenceOutputs.getSingleton());

        /*
          if (false) {
          final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
          Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
          Util.toDot(index, w, false, false);
          System.out.println("FST INDEX: SAVED to " + dotFileName);
          w.close();
          }
        */
    } else {
        index = null;
    }
}

From source file:org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProvider.java

License:Apache License

@Override
public LookupFactory load(IndexInput input) throws IOException {
    long sizeInBytes = 0;
    int version = CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION_START, CODEC_VERSION_LATEST);
    final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<String, AnalyzingSuggestHolder>();
    input.seek(input.length() - 8);/*ww  w .ja  v  a 2s  . c  o m*/
    long metaPointer = input.readLong();
    input.seek(metaPointer);
    int numFields = input.readVInt();

    Map<Long, String> meta = new TreeMap<Long, String>();
    for (int i = 0; i < numFields; i++) {
        String name = input.readString();
        long offset = input.readVLong();
        meta.put(offset, name);
    }

    for (Map.Entry<Long, String> entry : meta.entrySet()) {
        input.seek(entry.getKey());
        FST<Pair<Long, BytesRef>> fst = new FST<Pair<Long, BytesRef>>(input, new PairOutputs<Long, BytesRef>(
                PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
        int maxAnalyzedPathsForOneInput = input.readVInt();
        int maxSurfaceFormsPerAnalyzedForm = input.readVInt();
        int maxGraphExpansions = input.readInt();
        int options = input.readVInt();
        boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPERATORS) != 0;
        boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0;
        boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0;

        // first version did not include these three fields, so fall back to old default (before the analyzingsuggester
        // was updated in Lucene, so we cannot use the suggester defaults)
        int sepLabel, payloadSep, endByte, holeCharacter;
        switch (version) {
        case CODEC_VERSION_START:
            sepLabel = 0xFF;
            payloadSep = '\u001f';
            endByte = 0x0;
            holeCharacter = '\u001E';
            break;
        default:
            sepLabel = input.readVInt();
            endByte = input.readVInt();
            payloadSep = input.readVInt();
            holeCharacter = input.readVInt();
        }

        AnalyzingSuggestHolder holder = new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements,
                maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads, maxAnalyzedPathsForOneInput,
                fst, sepLabel, payloadSep, endByte, holeCharacter);
        sizeInBytes += fst.sizeInBytes();
        lookupMap.put(entry.getValue(), holder);
    }
    final long ramBytesUsed = sizeInBytes;
    return new LookupFactory() {
        @Override
        public Lookup getLookup(FieldMapper<?> mapper, CompletionSuggestionContext suggestionContext) {
            AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(mapper.names().indexName());
            if (analyzingSuggestHolder == null) {
                return null;
            }
            int flags = analyzingSuggestHolder.preserveSep ? XAnalyzingSuggester.PRESERVE_SEP : 0;

            XAnalyzingSuggester suggester;
            if (suggestionContext.isFuzzy()) {
                suggester = new XFuzzySuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags,
                        analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm,
                        analyzingSuggestHolder.maxGraphExpansions, suggestionContext.getFuzzyEditDistance(),
                        suggestionContext.isFuzzyTranspositions(), suggestionContext.getFuzzyPrefixLength(),
                        suggestionContext.getFuzzyMinLength(), suggestionContext.isFuzzyUnicodeAware(),
                        analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads,
                        analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel,
                        analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte,
                        analyzingSuggestHolder.holeCharacter);

            } else {
                suggester = new XAnalyzingSuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags,
                        analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm,
                        analyzingSuggestHolder.maxGraphExpansions,
                        analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst,
                        analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput,
                        analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep,
                        analyzingSuggestHolder.endByte, analyzingSuggestHolder.holeCharacter);
            }
            return suggester;
        }

        @Override
        public CompletionStats stats(String... fields) {
            long sizeInBytes = 0;
            ObjectLongOpenHashMap<String> completionFields = null;
            if (fields != null && fields.length > 0) {
                completionFields = new ObjectLongOpenHashMap<String>(fields.length);
            }

            for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) {
                sizeInBytes += entry.getValue().fst.sizeInBytes();
                if (fields == null || fields.length == 0) {
                    continue;
                }
                for (String field : fields) {
                    // support for getting fields by regex as in fielddata
                    if (Regex.simpleMatch(field, entry.getKey())) {
                        long fstSize = entry.getValue().fst.sizeInBytes();
                        completionFields.addTo(field, fstSize);
                    }
                }
            }

            return new CompletionStats(sizeInBytes, completionFields);
        }

        @Override
        AnalyzingSuggestHolder getAnalyzingSuggestHolder(FieldMapper<?> mapper) {
            return lookupMap.get(mapper.names().indexName());
        }

        @Override
        public long ramBytesUsed() {
            return ramBytesUsed;
        }
    };
}

From source file:org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProviderV1.java

License:Apache License

@Override
public LookupFactory load(IndexInput input) throws IOException {
    CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION, CODEC_VERSION);
    final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<String, AnalyzingSuggestHolder>();
    input.seek(input.length() - 8);//from www  . j  a  v  a  2  s  . co m
    long metaPointer = input.readLong();
    input.seek(metaPointer);
    int numFields = input.readVInt();

    Map<Long, String> meta = new TreeMap<Long, String>();
    for (int i = 0; i < numFields; i++) {
        String name = input.readString();
        long offset = input.readVLong();
        meta.put(offset, name);
    }
    long sizeInBytes = 0;
    for (Map.Entry<Long, String> entry : meta.entrySet()) {
        input.seek(entry.getKey());
        FST<Pair<Long, BytesRef>> fst = new FST<Pair<Long, BytesRef>>(input, new PairOutputs<Long, BytesRef>(
                PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
        int maxAnalyzedPathsForOneInput = input.readVInt();
        int maxSurfaceFormsPerAnalyzedForm = input.readVInt();
        int maxGraphExpansions = input.readInt();
        int options = input.readVInt();
        boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPERATORS) != 0;
        boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0;
        boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0;
        sizeInBytes += fst.sizeInBytes();
        lookupMap.put(entry.getValue(),
                new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements,
                        maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads,
                        maxAnalyzedPathsForOneInput, fst));
    }
    final long ramBytesUsed = sizeInBytes;
    return new LookupFactory() {
        @Override
        public Lookup getLookup(FieldMapper<?> mapper, CompletionSuggestionContext suggestionContext) {
            AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(mapper.names().indexName());
            if (analyzingSuggestHolder == null) {
                return null;
            }
            int flags = analyzingSuggestHolder.preserveSep ? XAnalyzingSuggester.PRESERVE_SEP : 0;

            XAnalyzingSuggester suggester;
            if (suggestionContext.isFuzzy()) {
                suggester = new XFuzzySuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags,
                        analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm,
                        analyzingSuggestHolder.maxGraphExpansions, suggestionContext.getFuzzyEditDistance(),
                        suggestionContext.isFuzzyTranspositions(), suggestionContext.getFuzzyPrefixLength(),
                        suggestionContext.getFuzzyMinLength(), false, analyzingSuggestHolder.fst,
                        analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput,
                        SEP_LABEL, PAYLOAD_SEP, END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER);

            } else {
                suggester = new XAnalyzingSuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags,
                        analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm,
                        analyzingSuggestHolder.maxGraphExpansions,
                        analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst,
                        analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput,
                        SEP_LABEL, PAYLOAD_SEP, END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER);
            }
            return suggester;
        }

        @Override
        public CompletionStats stats(String... fields) {
            long sizeInBytes = 0;
            ObjectLongOpenHashMap<String> completionFields = null;
            if (fields != null && fields.length > 0) {
                completionFields = new ObjectLongOpenHashMap<String>(fields.length);
            }

            for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) {
                sizeInBytes += entry.getValue().fst.sizeInBytes();
                if (fields == null || fields.length == 0) {
                    continue;
                }
                for (String field : fields) {
                    // support for getting fields by regex as in fielddata
                    if (Regex.simpleMatch(field, entry.getKey())) {
                        long fstSize = entry.getValue().fst.sizeInBytes();
                        completionFields.addTo(field, fstSize);
                    }
                }
            }

            return new CompletionStats(sizeInBytes, completionFields);
        }

        @Override
        AnalyzingSuggestHolder getAnalyzingSuggestHolder(FieldMapper<?> mapper) {
            return lookupMap.get(mapper.names().indexName());
        }

        @Override
        public long ramBytesUsed() {
            return ramBytesUsed;
        }
    };
}

From source file:org.elasticsearch.search.suggest.completion.old.AnalyzingCompletionLookupProvider.java

License:Apache License

@Override
public Completion090PostingsFormat.LookupFactory load(IndexInput input) throws IOException {
    long sizeInBytes = 0;
    int version = CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION_START, CODEC_VERSION_LATEST);
    if (version >= CODEC_VERSION_CHECKSUMS) {
        CodecUtil.checksumEntireFile(input);
    }/*from  w  ww .  j av a  2s . c  o m*/
    final long metaPointerPosition = input.length()
            - (version >= CODEC_VERSION_CHECKSUMS ? 8 + CodecUtil.footerLength() : 8);
    final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<>();
    input.seek(metaPointerPosition);
    long metaPointer = input.readLong();
    input.seek(metaPointer);
    int numFields = input.readVInt();

    Map<Long, String> meta = new TreeMap<>();
    for (int i = 0; i < numFields; i++) {
        String name = input.readString();
        long offset = input.readVLong();
        meta.put(offset, name);
    }

    for (Map.Entry<Long, String> entry : meta.entrySet()) {
        input.seek(entry.getKey());
        FST<Pair<Long, BytesRef>> fst = new FST<>(input,
                new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
        int maxAnalyzedPathsForOneInput = input.readVInt();
        int maxSurfaceFormsPerAnalyzedForm = input.readVInt();
        int maxGraphExpansions = input.readInt();
        int options = input.readVInt();
        boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPARATORS) != 0;
        boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0;
        boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0;

        // first version did not include these three fields, so fall back to old default (before the analyzingsuggester
        // was updated in Lucene, so we cannot use the suggester defaults)
        int sepLabel, payloadSep, endByte, holeCharacter;
        switch (version) {
        case CODEC_VERSION_START:
            sepLabel = 0xFF;
            payloadSep = '\u001f';
            endByte = 0x0;
            holeCharacter = '\u001E';
            break;
        default:
            sepLabel = input.readVInt();
            endByte = input.readVInt();
            payloadSep = input.readVInt();
            holeCharacter = input.readVInt();
        }

        AnalyzingSuggestHolder holder = new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements,
                maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads, maxAnalyzedPathsForOneInput,
                fst, sepLabel, payloadSep, endByte, holeCharacter);
        sizeInBytes += fst.ramBytesUsed();
        lookupMap.put(entry.getValue(), holder);
    }
    final long ramBytesUsed = sizeInBytes;
    return new Completion090PostingsFormat.LookupFactory() {
        @Override
        public Lookup getLookup(OldCompletionFieldMapper.CompletionFieldType fieldType,
                CompletionSuggestionContext suggestionContext) {
            AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(fieldType.names().indexName());
            if (analyzingSuggestHolder == null) {
                return null;
            }
            int flags = analyzingSuggestHolder.getPreserveSeparator() ? XAnalyzingSuggester.PRESERVE_SEP : 0;

            final XAnalyzingSuggester suggester;
            final Automaton queryPrefix = fieldType.requiresContext()
                    ? ContextMapping.ContextQuery.toAutomaton(analyzingSuggestHolder.getPreserveSeparator(),
                            suggestionContext.getContextQueries())
                    : null;

            if (suggestionContext.isFuzzy()) {
                suggester = new XFuzzySuggester(fieldType.indexAnalyzer(), queryPrefix,
                        fieldType.searchAnalyzer(), flags,
                        analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm,
                        analyzingSuggestHolder.maxGraphExpansions, suggestionContext.getFuzzyEditDistance(),
                        suggestionContext.isFuzzyTranspositions(), suggestionContext.getFuzzyPrefixLength(),
                        suggestionContext.getFuzzyMinLength(), suggestionContext.isFuzzyUnicodeAware(),
                        analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads,
                        analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel,
                        analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte,
                        analyzingSuggestHolder.holeCharacter);
            } else {
                suggester = new XAnalyzingSuggester(fieldType.indexAnalyzer(), queryPrefix,
                        fieldType.searchAnalyzer(), flags,
                        analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm,
                        analyzingSuggestHolder.maxGraphExpansions,
                        analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst,
                        analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput,
                        analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep,
                        analyzingSuggestHolder.endByte, analyzingSuggestHolder.holeCharacter);
            }
            return suggester;
        }

        @Override
        public CompletionStats stats(String... fields) {
            long sizeInBytes = 0;
            ObjectLongHashMap<String> completionFields = null;
            if (fields != null && fields.length > 0) {
                completionFields = new ObjectLongHashMap<>(fields.length);
            }

            for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) {
                sizeInBytes += entry.getValue().fst.ramBytesUsed();
                if (fields == null || fields.length == 0) {
                    continue;
                }
                if (Regex.simpleMatch(fields, entry.getKey())) {
                    long fstSize = entry.getValue().fst.ramBytesUsed();
                    completionFields.addTo(entry.getKey(), fstSize);
                }
            }

            return new CompletionStats(sizeInBytes, completionFields);
        }

        @Override
        AnalyzingSuggestHolder getAnalyzingSuggestHolder(MappedFieldType fieldType) {
            return lookupMap.get(fieldType.names().indexName());
        }

        @Override
        public long ramBytesUsed() {
            return ramBytesUsed;
        }

        @Override
        public Collection<Accountable> getChildResources() {
            return Accountables.namedAccountables("field", lookupMap);
        }
    };
}

From source file:org.elasticsearch.search.suggest.completion.old.AnalyzingCompletionLookupProviderV1.java

License:Apache License

@Override
public LookupFactory load(IndexInput input) throws IOException {
    CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION, CODEC_VERSION);
    final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<>();
    input.seek(input.length() - 8);/*ww w .  ja v  a 2  s .c  o m*/
    long metaPointer = input.readLong();
    input.seek(metaPointer);
    int numFields = input.readVInt();

    Map<Long, String> meta = new TreeMap<>();
    for (int i = 0; i < numFields; i++) {
        String name = input.readString();
        long offset = input.readVLong();
        meta.put(offset, name);
    }
    long sizeInBytes = 0;
    for (Map.Entry<Long, String> entry : meta.entrySet()) {
        input.seek(entry.getKey());
        FST<Pair<Long, BytesRef>> fst = new FST<>(input,
                new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
        int maxAnalyzedPathsForOneInput = input.readVInt();
        int maxSurfaceFormsPerAnalyzedForm = input.readVInt();
        int maxGraphExpansions = input.readInt();
        int options = input.readVInt();
        boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPARATORS) != 0;
        boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0;
        boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0;
        sizeInBytes += fst.ramBytesUsed();
        lookupMap.put(entry.getValue(),
                new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements,
                        maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads,
                        maxAnalyzedPathsForOneInput, fst));
    }
    final long ramBytesUsed = sizeInBytes;
    return new LookupFactory() {
        @Override
        public Lookup getLookup(OldCompletionFieldMapper.CompletionFieldType fieldType,
                CompletionSuggestionContext suggestionContext) {
            AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(fieldType.names().indexName());
            if (analyzingSuggestHolder == null) {
                return null;
            }
            int flags = analyzingSuggestHolder.getPreserveSeparator() ? XAnalyzingSuggester.PRESERVE_SEP : 0;

            final Automaton queryPrefix = fieldType.requiresContext()
                    ? ContextQuery.toAutomaton(analyzingSuggestHolder.getPreserveSeparator(),
                            suggestionContext.getContextQueries())
                    : null;

            XAnalyzingSuggester suggester;
            if (suggestionContext.isFuzzy()) {
                suggester = new XFuzzySuggester(fieldType.indexAnalyzer(), queryPrefix,
                        fieldType.searchAnalyzer(), flags,
                        analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm,
                        analyzingSuggestHolder.maxGraphExpansions, suggestionContext.getFuzzyEditDistance(),
                        suggestionContext.isFuzzyTranspositions(), suggestionContext.getFuzzyPrefixLength(),
                        suggestionContext.getFuzzyMinLength(), false, analyzingSuggestHolder.fst,
                        analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput,
                        SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER);
            } else {
                suggester = new XAnalyzingSuggester(fieldType.indexAnalyzer(), queryPrefix,
                        fieldType.searchAnalyzer(), flags,
                        analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm,
                        analyzingSuggestHolder.maxGraphExpansions,
                        analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst,
                        analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput,
                        SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER);
            }
            return suggester;
        }

        @Override
        public CompletionStats stats(String... fields) {
            long sizeInBytes = 0;
            ObjectLongHashMap<String> completionFields = null;
            if (fields != null && fields.length > 0) {
                completionFields = new ObjectLongHashMap<>(fields.length);
            }

            for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) {
                sizeInBytes += entry.getValue().fst.ramBytesUsed();
                if (fields == null || fields.length == 0) {
                    continue;
                }
                for (String field : fields) {
                    // support for getting fields by regex as in fielddata
                    if (Regex.simpleMatch(field, entry.getKey())) {
                        long fstSize = entry.getValue().fst.ramBytesUsed();
                        completionFields.addTo(field, fstSize);
                    }
                }
            }

            return new CompletionStats(sizeInBytes, completionFields);
        }

        @Override
        AnalyzingSuggestHolder getAnalyzingSuggestHolder(MappedFieldType fieldType) {
            return lookupMap.get(fieldType.names().indexName());
        }

        @Override
        public long ramBytesUsed() {
            return ramBytesUsed;
        }

        @Override
        public Collection<Accountable> getChildResources() {
            return Accountables.namedAccountables("field", lookupMap);
        }
    };
}

From source file:org.elasticsearch.search.suggest.completion2x.AnalyzingCompletionLookupProvider.java

License:Apache License

@Override
public LookupFactory load(IndexInput input) throws IOException {
    long sizeInBytes = 0;
    int version = CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION_START, CODEC_VERSION_LATEST);
    if (version >= CODEC_VERSION_CHECKSUMS) {
        CodecUtil.checksumEntireFile(input);
    }/*ww  w.ja va  2s .  com*/
    final long metaPointerPosition = input.length()
            - (version >= CODEC_VERSION_CHECKSUMS ? 8 + CodecUtil.footerLength() : 8);
    final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<>();
    input.seek(metaPointerPosition);
    long metaPointer = input.readLong();
    input.seek(metaPointer);
    int numFields = input.readVInt();

    Map<Long, String> meta = new TreeMap<>();
    for (int i = 0; i < numFields; i++) {
        String name = input.readString();
        long offset = input.readVLong();
        meta.put(offset, name);
    }

    for (Map.Entry<Long, String> entry : meta.entrySet()) {
        input.seek(entry.getKey());
        FST<Pair<Long, BytesRef>> fst = new FST<>(input,
                new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
        int maxAnalyzedPathsForOneInput = input.readVInt();
        int maxSurfaceFormsPerAnalyzedForm = input.readVInt();
        int maxGraphExpansions = input.readInt();
        int options = input.readVInt();
        boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPARATORS) != 0;
        boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0;
        boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0;

        // first version did not include these three fields, so fall back to old default (before the analyzingsuggester
        // was updated in Lucene, so we cannot use the suggester defaults)
        int sepLabel, payloadSep, endByte, holeCharacter;
        switch (version) {
        case CODEC_VERSION_START:
            sepLabel = 0xFF;
            payloadSep = '\u001f';
            endByte = 0x0;
            holeCharacter = '\u001E';
            break;
        default:
            sepLabel = input.readVInt();
            endByte = input.readVInt();
            payloadSep = input.readVInt();
            holeCharacter = input.readVInt();
        }

        AnalyzingSuggestHolder holder = new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements,
                maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads, maxAnalyzedPathsForOneInput,
                fst, sepLabel, payloadSep, endByte, holeCharacter);
        sizeInBytes += fst.ramBytesUsed();
        lookupMap.put(entry.getValue(), holder);
    }
    final long ramBytesUsed = sizeInBytes;
    return new LookupFactory() {
        @Override
        public Lookup getLookup(CompletionFieldMapper2x.CompletionFieldType fieldType,
                CompletionSuggestionContext suggestionContext) {
            AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(fieldType.name());
            if (analyzingSuggestHolder == null) {
                return null;
            }
            int flags = analyzingSuggestHolder.getPreserveSeparator() ? XAnalyzingSuggester.PRESERVE_SEP : 0;

            final XAnalyzingSuggester suggester;
            final Automaton queryPrefix = fieldType.requiresContext()
                    ? ContextQuery.toAutomaton(analyzingSuggestHolder.getPreserveSeparator(),
                            suggestionContext.getContextQueries())
                    : null;

            final FuzzyOptions fuzzyOptions = suggestionContext.getFuzzyOptions();
            if (fuzzyOptions != null) {
                suggester = new XFuzzySuggester(fieldType.indexAnalyzer(), queryPrefix,
                        fieldType.searchAnalyzer(), flags,
                        analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm,
                        analyzingSuggestHolder.maxGraphExpansions, fuzzyOptions.getEditDistance(),
                        fuzzyOptions.isTranspositions(), fuzzyOptions.getFuzzyPrefixLength(),
                        fuzzyOptions.getFuzzyMinLength(), fuzzyOptions.isUnicodeAware(),
                        analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads,
                        analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel,
                        analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte,
                        analyzingSuggestHolder.holeCharacter);
            } else {
                suggester = new XAnalyzingSuggester(fieldType.indexAnalyzer(), queryPrefix,
                        fieldType.searchAnalyzer(), flags,
                        analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm,
                        analyzingSuggestHolder.maxGraphExpansions,
                        analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst,
                        analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput,
                        analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep,
                        analyzingSuggestHolder.endByte, analyzingSuggestHolder.holeCharacter);
            }
            return suggester;
        }

        @Override
        public CompletionStats stats(String... fields) {
            long sizeInBytes = 0;
            ObjectLongHashMap<String> completionFields = null;
            if (fields != null && fields.length > 0) {
                completionFields = new ObjectLongHashMap<>(fields.length);
            }

            for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) {
                sizeInBytes += entry.getValue().fst.ramBytesUsed();
                if (fields == null || fields.length == 0) {
                    continue;
                }
                if (Regex.simpleMatch(fields, entry.getKey())) {
                    long fstSize = entry.getValue().fst.ramBytesUsed();
                    completionFields.addTo(entry.getKey(), fstSize);
                }
            }

            return new CompletionStats(sizeInBytes, completionFields);
        }

        @Override
        AnalyzingSuggestHolder getAnalyzingSuggestHolder(MappedFieldType fieldType) {
            return lookupMap.get(fieldType.name());
        }

        @Override
        public long ramBytesUsed() {
            return ramBytesUsed;
        }

        @Override
        public Collection<Accountable> getChildResources() {
            return Accountables.namedAccountables("field", lookupMap);
        }
    };
}