Example usage for org.apache.lucene.util.fst PositiveIntOutputs getSingleton

List of usage examples for org.apache.lucene.util.fst PositiveIntOutputs getSingleton

Introduction

In this page you can find the example usage for org.apache.lucene.util.fst PositiveIntOutputs getSingleton.

Prototype

public static PositiveIntOutputs getSingleton() 

Source Link

Usage

From source file:BuildFST.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
public static void main(String[] args) throws IOException {

    boolean numeric = true;
    boolean negative = false;
    for (int i = 0; i < args.length; i++) {
        int j = args[i].lastIndexOf('/');
        if (j != -1) {
            try {
                negative |= Long.parseLong(args[i].substring(j + 1)) < 0;
            } catch (NumberFormatException nfe) {
                numeric = false;/*from w  w  w . j  a va  2 s .  com*/
                break;
            }
        }
    }

    Outputs outputs;
    if (numeric) {
        if (negative) {
            throw new RuntimeException("can only handle numeric outputs >= 0");
        }
        outputs = PositiveIntOutputs.getSingleton();
    } else {
        outputs = ByteSequenceOutputs.getSingleton();
    }

    Pair<?>[] inputs = new Pair[args.length];
    for (int i = 0; i < args.length; i++) {
        int j = args[i].lastIndexOf('/');
        String input;
        Object output;
        if (j == -1) {
            output = outputs.getNoOutput();
            input = args[i];
        } else {
            input = args[i].substring(0, j);
            String outputString = args[i].substring(j + 1);
            if (numeric) {
                output = Long.parseLong(outputString);
            } else {
                output = new BytesRef(outputString);
            }
        }
        inputs[i] = new Pair(new BytesRef(input), output);
    }
    Arrays.sort(inputs);

    FST<?> fst;
    if (numeric) {
        Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
        for (Pair pair : inputs) {
            IntsRefBuilder intsBuilder = new IntsRefBuilder();
            Util.toIntsRef(pair.input, intsBuilder);
            b.add(intsBuilder.get(), (Long) pair.output);
        }
        fst = b.finish();
    } else {
        Builder<BytesRef> b = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, outputs);
        for (Pair pair : inputs) {
            IntsRefBuilder intsBuilder = new IntsRefBuilder();
            Util.toIntsRef(pair.input, intsBuilder);
            b.add(intsBuilder.get(), (BytesRef) pair.output);
        }
        fst = b.finish();
    }
    Util.toDot(fst, new PrintWriter(System.out), true, true);
}

From source file:com.github.cstoku.neologd.unidic.lucene.analysis.ja.dict.TokenInfoDictionary.java

License:Apache License

private TokenInfoDictionary() throws IOException {
    super();/*ww  w  . ja  v  a  2  s  .  c  om*/
    InputStream is = null;
    FST<Long> fst = null;
    boolean success = false;
    try {
        is = getResource(FST_FILENAME_SUFFIX);
        is = new BufferedInputStream(is);
        fst = new FST<>(new InputStreamDataInput(is), PositiveIntOutputs.getSingleton());
        success = true;
    } finally {
        if (success) {
            IOUtils.close(is);
        } else {
            IOUtils.closeWhileHandlingException(is);
        }
    }
    // TODO: some way to configure?
    this.fst = new TokenInfoFST(fst, true);
}

From source file:com.github.cstoku.neologd.unidic.lucene.analysis.ja.dict.UserDictionary.java

License:Apache License

private UserDictionary(List<String[]> featureEntries) throws IOException {

    int wordId = CUSTOM_DICTIONARY_WORD_ID_OFFSET;
    // TODO: should we allow multiple segmentations per input 'phrase'?
    // the old treemap didn't support this either, and i'm not sure if it's needed/useful?

    Collections.sort(featureEntries, new Comparator<String[]>() {
        @Override//from  w w  w .j a  v  a  2  s  .c o  m
        public int compare(String[] left, String[] right) {
            return left[0].compareTo(right[0]);
        }
    });

    List<String> data = new ArrayList<>(featureEntries.size());
    List<int[]> segmentations = new ArrayList<>(featureEntries.size());

    PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton();
    Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE2, fstOutput);
    IntsRefBuilder scratch = new IntsRefBuilder();
    long ord = 0;

    for (String[] values : featureEntries) {
        String[] segmentation = values[1].replaceAll("  *", " ").split(" ");
        String[] readings = values[2].replaceAll("  *", " ").split(" ");
        String pos = values[3];

        if (segmentation.length != readings.length) {
            throw new RuntimeException("Illegal user dictionary entry " + values[0]
                    + " - the number of segmentations (" + segmentation.length + ")"
                    + " does not the match number of readings (" + readings.length + ")");
        }

        int[] wordIdAndLength = new int[segmentation.length + 1]; // wordId offset, length, length....
        wordIdAndLength[0] = wordId;
        for (int i = 0; i < segmentation.length; i++) {
            wordIdAndLength[i + 1] = segmentation[i].length();
            data.add(readings[i] + INTERNAL_SEPARATOR + pos);
            wordId++;
        }
        // add mapping to FST
        String token = values[0];
        scratch.grow(token.length());
        scratch.setLength(token.length());
        for (int i = 0; i < token.length(); i++) {
            scratch.setIntAt(i, (int) token.charAt(i));
        }
        fstBuilder.add(scratch.get(), ord);
        segmentations.add(wordIdAndLength);
        ord++;
    }
    this.fst = new TokenInfoFST(fstBuilder.finish(), false);
    this.data = data.toArray(new String[data.size()]);
    this.segmentations = segmentations.toArray(new int[segmentations.size()][]);
}

From source file:com.meizu.nlp.classification.BooleanPerceptronClassifier.java

License:Apache License

private void updateFST(SortedMap<String, Double> weights) throws IOException {
    PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
    Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
    BytesRefBuilder scratchBytes = new BytesRefBuilder();
    IntsRefBuilder scratchInts = new IntsRefBuilder();
    for (Map.Entry<String, Double> entry : weights.entrySet()) {
        scratchBytes.copyChars(entry.getKey());
        fstBuilder.add(Util.toIntsRef(scratchBytes.get(), scratchInts), entry.getValue().longValue());
    }//from w w  w.  java2s.c  o  m
    fst = fstBuilder.finish();
}

From source file:examples.fst.FstTest.java

public static void main(String[] args) throws IOException {
    // Input values (keys). These must be provided to Builder in Unicode sorted order!
    String inputValues[] = { "cat", "dog", "dogs" };
    long outputValues[] = { 5, 7, 12 };

    PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
    Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1, outputs);
    BytesRefBuilder scratchBytes = new BytesRefBuilder();
    IntsRefBuilder scratchInts = new IntsRefBuilder();
    for (int i = 0; i < inputValues.length; i++) {
        scratchBytes.copyChars(inputValues[i]);
        builder.add(Util.toIntsRef(scratchBytes.toBytesRef(), scratchInts), outputValues[i]);
    }//from ww w .j  a  v a  2 s .com
    FST<Long> fst = builder.finish();

    Long value = Util.get(fst, new BytesRef("dog"));
    System.out.println(value); // 7

    // Only works because outputs are also in sorted order
    IntsRef key = Util.getByOutput(fst, 12);
    System.out.println(Util.toBytesRef(key, scratchBytes).utf8ToString()); // dogs

}

From source file:org.elasticsearch.index.fielddata.plain.FSTBytesIndexFieldData.java

License:Apache License

@Override
public FSTBytesAtomicFieldData loadDirect(AtomicReaderContext context) throws Exception {
    AtomicReader reader = context.reader();

    Terms terms = reader.terms(getFieldNames().indexName());
    FSTBytesAtomicFieldData data = null;
    // TODO: Use an actual estimator to estimate before loading.
    NonEstimatingEstimator estimator = new NonEstimatingEstimator(breakerService.getBreaker());
    if (terms == null) {
        data = FSTBytesAtomicFieldData.empty(reader.maxDoc());
        estimator.afterLoad(null, data.getMemorySizeInBytes());
        return data;
    }//www.ja  v a  2  s  .co m
    PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
    org.apache.lucene.util.fst.Builder<Long> fstBuilder = new org.apache.lucene.util.fst.Builder<Long>(
            INPUT_TYPE.BYTE1, outputs);
    final IntsRef scratch = new IntsRef();

    final long numTerms;
    if (regex == null && frequency == null) {
        numTerms = terms.size();
    } else {
        numTerms = -1;
    }
    final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat(
            "acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
    OrdinalsBuilder builder = new OrdinalsBuilder(numTerms, reader.maxDoc(), acceptableTransientOverheadRatio);
    boolean success = false;
    try {

        // we don't store an ord 0 in the FST since we could have an empty string in there and FST don't support
        // empty strings twice. ie. them merge fails for long output.
        TermsEnum termsEnum = filter(terms, reader);
        DocsEnum docsEnum = null;
        for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) {
            final long termOrd = builder.nextOrdinal();
            assert termOrd > 0;
            fstBuilder.add(Util.toIntsRef(term, scratch), (long) termOrd);
            docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE);
            for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
                builder.addDoc(docId);
            }
        }

        FST<Long> fst = fstBuilder.finish();

        final Ordinals ordinals = builder.build(fieldDataType.getSettings());

        data = new FSTBytesAtomicFieldData(fst, ordinals);
        success = true;
        return data;
    } finally {
        if (success) {
            estimator.afterLoad(null, data.getMemorySizeInBytes());
        }
        builder.close();
    }
}

From source file:org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProvider.java

License:Apache License

@Override
public LookupFactory load(IndexInput input) throws IOException {
    long sizeInBytes = 0;
    int version = CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION_START, CODEC_VERSION_LATEST);
    final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<String, AnalyzingSuggestHolder>();
    input.seek(input.length() - 8);//from w w w  .j a va2  s  .co m
    long metaPointer = input.readLong();
    input.seek(metaPointer);
    int numFields = input.readVInt();

    Map<Long, String> meta = new TreeMap<Long, String>();
    for (int i = 0; i < numFields; i++) {
        String name = input.readString();
        long offset = input.readVLong();
        meta.put(offset, name);
    }

    for (Map.Entry<Long, String> entry : meta.entrySet()) {
        input.seek(entry.getKey());
        FST<Pair<Long, BytesRef>> fst = new FST<Pair<Long, BytesRef>>(input, new PairOutputs<Long, BytesRef>(
                PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
        int maxAnalyzedPathsForOneInput = input.readVInt();
        int maxSurfaceFormsPerAnalyzedForm = input.readVInt();
        int maxGraphExpansions = input.readInt();
        int options = input.readVInt();
        boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPERATORS) != 0;
        boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0;
        boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0;

        // first version did not include these three fields, so fall back to old default (before the analyzingsuggester
        // was updated in Lucene, so we cannot use the suggester defaults)
        int sepLabel, payloadSep, endByte, holeCharacter;
        switch (version) {
        case CODEC_VERSION_START:
            sepLabel = 0xFF;
            payloadSep = '\u001f';
            endByte = 0x0;
            holeCharacter = '\u001E';
            break;
        default:
            sepLabel = input.readVInt();
            endByte = input.readVInt();
            payloadSep = input.readVInt();
            holeCharacter = input.readVInt();
        }

        AnalyzingSuggestHolder holder = new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements,
                maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads, maxAnalyzedPathsForOneInput,
                fst, sepLabel, payloadSep, endByte, holeCharacter);
        sizeInBytes += fst.sizeInBytes();
        lookupMap.put(entry.getValue(), holder);
    }
    final long ramBytesUsed = sizeInBytes;
    return new LookupFactory() {
        @Override
        public Lookup getLookup(FieldMapper<?> mapper, CompletionSuggestionContext suggestionContext) {
            AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(mapper.names().indexName());
            if (analyzingSuggestHolder == null) {
                return null;
            }
            int flags = analyzingSuggestHolder.preserveSep ? XAnalyzingSuggester.PRESERVE_SEP : 0;

            XAnalyzingSuggester suggester;
            if (suggestionContext.isFuzzy()) {
                suggester = new XFuzzySuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags,
                        analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm,
                        analyzingSuggestHolder.maxGraphExpansions, suggestionContext.getFuzzyEditDistance(),
                        suggestionContext.isFuzzyTranspositions(), suggestionContext.getFuzzyPrefixLength(),
                        suggestionContext.getFuzzyMinLength(), suggestionContext.isFuzzyUnicodeAware(),
                        analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads,
                        analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel,
                        analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte,
                        analyzingSuggestHolder.holeCharacter);

            } else {
                suggester = new XAnalyzingSuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags,
                        analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm,
                        analyzingSuggestHolder.maxGraphExpansions,
                        analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst,
                        analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput,
                        analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep,
                        analyzingSuggestHolder.endByte, analyzingSuggestHolder.holeCharacter);
            }
            return suggester;
        }

        @Override
        public CompletionStats stats(String... fields) {
            long sizeInBytes = 0;
            ObjectLongOpenHashMap<String> completionFields = null;
            if (fields != null && fields.length > 0) {
                completionFields = new ObjectLongOpenHashMap<String>(fields.length);
            }

            for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) {
                sizeInBytes += entry.getValue().fst.sizeInBytes();
                if (fields == null || fields.length == 0) {
                    continue;
                }
                for (String field : fields) {
                    // support for getting fields by regex as in fielddata
                    if (Regex.simpleMatch(field, entry.getKey())) {
                        long fstSize = entry.getValue().fst.sizeInBytes();
                        completionFields.addTo(field, fstSize);
                    }
                }
            }

            return new CompletionStats(sizeInBytes, completionFields);
        }

        @Override
        AnalyzingSuggestHolder getAnalyzingSuggestHolder(FieldMapper<?> mapper) {
            return lookupMap.get(mapper.names().indexName());
        }

        @Override
        public long ramBytesUsed() {
            return ramBytesUsed;
        }
    };
}

From source file:org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProviderV1.java

License:Apache License

@Override
public LookupFactory load(IndexInput input) throws IOException {
    CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION, CODEC_VERSION);
    final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<String, AnalyzingSuggestHolder>();
    input.seek(input.length() - 8);// w  w  w. j  a  v a  2s .  c o  m
    long metaPointer = input.readLong();
    input.seek(metaPointer);
    int numFields = input.readVInt();

    Map<Long, String> meta = new TreeMap<Long, String>();
    for (int i = 0; i < numFields; i++) {
        String name = input.readString();
        long offset = input.readVLong();
        meta.put(offset, name);
    }
    long sizeInBytes = 0;
    for (Map.Entry<Long, String> entry : meta.entrySet()) {
        input.seek(entry.getKey());
        FST<Pair<Long, BytesRef>> fst = new FST<Pair<Long, BytesRef>>(input, new PairOutputs<Long, BytesRef>(
                PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
        int maxAnalyzedPathsForOneInput = input.readVInt();
        int maxSurfaceFormsPerAnalyzedForm = input.readVInt();
        int maxGraphExpansions = input.readInt();
        int options = input.readVInt();
        boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPERATORS) != 0;
        boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0;
        boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0;
        sizeInBytes += fst.sizeInBytes();
        lookupMap.put(entry.getValue(),
                new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements,
                        maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads,
                        maxAnalyzedPathsForOneInput, fst));
    }
    final long ramBytesUsed = sizeInBytes;
    return new LookupFactory() {
        @Override
        public Lookup getLookup(FieldMapper<?> mapper, CompletionSuggestionContext suggestionContext) {
            AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(mapper.names().indexName());
            if (analyzingSuggestHolder == null) {
                return null;
            }
            int flags = analyzingSuggestHolder.preserveSep ? XAnalyzingSuggester.PRESERVE_SEP : 0;

            XAnalyzingSuggester suggester;
            if (suggestionContext.isFuzzy()) {
                suggester = new XFuzzySuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags,
                        analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm,
                        analyzingSuggestHolder.maxGraphExpansions, suggestionContext.getFuzzyEditDistance(),
                        suggestionContext.isFuzzyTranspositions(), suggestionContext.getFuzzyPrefixLength(),
                        suggestionContext.getFuzzyMinLength(), false, analyzingSuggestHolder.fst,
                        analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput,
                        SEP_LABEL, PAYLOAD_SEP, END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER);

            } else {
                suggester = new XAnalyzingSuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags,
                        analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm,
                        analyzingSuggestHolder.maxGraphExpansions,
                        analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst,
                        analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput,
                        SEP_LABEL, PAYLOAD_SEP, END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER);
            }
            return suggester;
        }

        @Override
        public CompletionStats stats(String... fields) {
            long sizeInBytes = 0;
            ObjectLongOpenHashMap<String> completionFields = null;
            if (fields != null && fields.length > 0) {
                completionFields = new ObjectLongOpenHashMap<String>(fields.length);
            }

            for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) {
                sizeInBytes += entry.getValue().fst.sizeInBytes();
                if (fields == null || fields.length == 0) {
                    continue;
                }
                for (String field : fields) {
                    // support for getting fields by regex as in fielddata
                    if (Regex.simpleMatch(field, entry.getKey())) {
                        long fstSize = entry.getValue().fst.sizeInBytes();
                        completionFields.addTo(field, fstSize);
                    }
                }
            }

            return new CompletionStats(sizeInBytes, completionFields);
        }

        @Override
        AnalyzingSuggestHolder getAnalyzingSuggestHolder(FieldMapper<?> mapper) {
            return lookupMap.get(mapper.names().indexName());
        }

        @Override
        public long ramBytesUsed() {
            return ramBytesUsed;
        }
    };
}

From source file:org.elasticsearch.search.suggest.completion.old.AnalyzingCompletionLookupProvider.java

License:Apache License

@Override
public Completion090PostingsFormat.LookupFactory load(IndexInput input) throws IOException {
    long sizeInBytes = 0;
    int version = CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION_START, CODEC_VERSION_LATEST);
    if (version >= CODEC_VERSION_CHECKSUMS) {
        CodecUtil.checksumEntireFile(input);
    }//from   w  w  w .  j  a  va2 s  . com
    final long metaPointerPosition = input.length()
            - (version >= CODEC_VERSION_CHECKSUMS ? 8 + CodecUtil.footerLength() : 8);
    final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<>();
    input.seek(metaPointerPosition);
    long metaPointer = input.readLong();
    input.seek(metaPointer);
    int numFields = input.readVInt();

    Map<Long, String> meta = new TreeMap<>();
    for (int i = 0; i < numFields; i++) {
        String name = input.readString();
        long offset = input.readVLong();
        meta.put(offset, name);
    }

    for (Map.Entry<Long, String> entry : meta.entrySet()) {
        input.seek(entry.getKey());
        FST<Pair<Long, BytesRef>> fst = new FST<>(input,
                new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
        int maxAnalyzedPathsForOneInput = input.readVInt();
        int maxSurfaceFormsPerAnalyzedForm = input.readVInt();
        int maxGraphExpansions = input.readInt();
        int options = input.readVInt();
        boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPARATORS) != 0;
        boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0;
        boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0;

        // first version did not include these three fields, so fall back to old default (before the analyzingsuggester
        // was updated in Lucene, so we cannot use the suggester defaults)
        int sepLabel, payloadSep, endByte, holeCharacter;
        switch (version) {
        case CODEC_VERSION_START:
            sepLabel = 0xFF;
            payloadSep = '\u001f';
            endByte = 0x0;
            holeCharacter = '\u001E';
            break;
        default:
            sepLabel = input.readVInt();
            endByte = input.readVInt();
            payloadSep = input.readVInt();
            holeCharacter = input.readVInt();
        }

        AnalyzingSuggestHolder holder = new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements,
                maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads, maxAnalyzedPathsForOneInput,
                fst, sepLabel, payloadSep, endByte, holeCharacter);
        sizeInBytes += fst.ramBytesUsed();
        lookupMap.put(entry.getValue(), holder);
    }
    final long ramBytesUsed = sizeInBytes;
    return new Completion090PostingsFormat.LookupFactory() {
        @Override
        public Lookup getLookup(OldCompletionFieldMapper.CompletionFieldType fieldType,
                CompletionSuggestionContext suggestionContext) {
            AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(fieldType.names().indexName());
            if (analyzingSuggestHolder == null) {
                return null;
            }
            int flags = analyzingSuggestHolder.getPreserveSeparator() ? XAnalyzingSuggester.PRESERVE_SEP : 0;

            final XAnalyzingSuggester suggester;
            final Automaton queryPrefix = fieldType.requiresContext()
                    ? ContextMapping.ContextQuery.toAutomaton(analyzingSuggestHolder.getPreserveSeparator(),
                            suggestionContext.getContextQueries())
                    : null;

            if (suggestionContext.isFuzzy()) {
                suggester = new XFuzzySuggester(fieldType.indexAnalyzer(), queryPrefix,
                        fieldType.searchAnalyzer(), flags,
                        analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm,
                        analyzingSuggestHolder.maxGraphExpansions, suggestionContext.getFuzzyEditDistance(),
                        suggestionContext.isFuzzyTranspositions(), suggestionContext.getFuzzyPrefixLength(),
                        suggestionContext.getFuzzyMinLength(), suggestionContext.isFuzzyUnicodeAware(),
                        analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads,
                        analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel,
                        analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte,
                        analyzingSuggestHolder.holeCharacter);
            } else {
                suggester = new XAnalyzingSuggester(fieldType.indexAnalyzer(), queryPrefix,
                        fieldType.searchAnalyzer(), flags,
                        analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm,
                        analyzingSuggestHolder.maxGraphExpansions,
                        analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst,
                        analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput,
                        analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep,
                        analyzingSuggestHolder.endByte, analyzingSuggestHolder.holeCharacter);
            }
            return suggester;
        }

        @Override
        public CompletionStats stats(String... fields) {
            long sizeInBytes = 0;
            ObjectLongHashMap<String> completionFields = null;
            if (fields != null && fields.length > 0) {
                completionFields = new ObjectLongHashMap<>(fields.length);
            }

            for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) {
                sizeInBytes += entry.getValue().fst.ramBytesUsed();
                if (fields == null || fields.length == 0) {
                    continue;
                }
                if (Regex.simpleMatch(fields, entry.getKey())) {
                    long fstSize = entry.getValue().fst.ramBytesUsed();
                    completionFields.addTo(entry.getKey(), fstSize);
                }
            }

            return new CompletionStats(sizeInBytes, completionFields);
        }

        @Override
        AnalyzingSuggestHolder getAnalyzingSuggestHolder(MappedFieldType fieldType) {
            return lookupMap.get(fieldType.names().indexName());
        }

        @Override
        public long ramBytesUsed() {
            return ramBytesUsed;
        }

        @Override
        public Collection<Accountable> getChildResources() {
            return Accountables.namedAccountables("field", lookupMap);
        }
    };
}

From source file:org.elasticsearch.search.suggest.completion.old.AnalyzingCompletionLookupProviderV1.java

License:Apache License

@Override
public LookupFactory load(IndexInput input) throws IOException {
    CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION, CODEC_VERSION);
    final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<>();
    input.seek(input.length() - 8);//from   w  w  w .  j  ava2 s  . c o m
    long metaPointer = input.readLong();
    input.seek(metaPointer);
    int numFields = input.readVInt();

    Map<Long, String> meta = new TreeMap<>();
    for (int i = 0; i < numFields; i++) {
        String name = input.readString();
        long offset = input.readVLong();
        meta.put(offset, name);
    }
    long sizeInBytes = 0;
    for (Map.Entry<Long, String> entry : meta.entrySet()) {
        input.seek(entry.getKey());
        FST<Pair<Long, BytesRef>> fst = new FST<>(input,
                new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
        int maxAnalyzedPathsForOneInput = input.readVInt();
        int maxSurfaceFormsPerAnalyzedForm = input.readVInt();
        int maxGraphExpansions = input.readInt();
        int options = input.readVInt();
        boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPARATORS) != 0;
        boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0;
        boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0;
        sizeInBytes += fst.ramBytesUsed();
        lookupMap.put(entry.getValue(),
                new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements,
                        maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads,
                        maxAnalyzedPathsForOneInput, fst));
    }
    final long ramBytesUsed = sizeInBytes;
    return new LookupFactory() {
        @Override
        public Lookup getLookup(OldCompletionFieldMapper.CompletionFieldType fieldType,
                CompletionSuggestionContext suggestionContext) {
            AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(fieldType.names().indexName());
            if (analyzingSuggestHolder == null) {
                return null;
            }
            int flags = analyzingSuggestHolder.getPreserveSeparator() ? XAnalyzingSuggester.PRESERVE_SEP : 0;

            final Automaton queryPrefix = fieldType.requiresContext()
                    ? ContextQuery.toAutomaton(analyzingSuggestHolder.getPreserveSeparator(),
                            suggestionContext.getContextQueries())
                    : null;

            XAnalyzingSuggester suggester;
            if (suggestionContext.isFuzzy()) {
                suggester = new XFuzzySuggester(fieldType.indexAnalyzer(), queryPrefix,
                        fieldType.searchAnalyzer(), flags,
                        analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm,
                        analyzingSuggestHolder.maxGraphExpansions, suggestionContext.getFuzzyEditDistance(),
                        suggestionContext.isFuzzyTranspositions(), suggestionContext.getFuzzyPrefixLength(),
                        suggestionContext.getFuzzyMinLength(), false, analyzingSuggestHolder.fst,
                        analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput,
                        SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER);
            } else {
                suggester = new XAnalyzingSuggester(fieldType.indexAnalyzer(), queryPrefix,
                        fieldType.searchAnalyzer(), flags,
                        analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm,
                        analyzingSuggestHolder.maxGraphExpansions,
                        analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst,
                        analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput,
                        SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER);
            }
            return suggester;
        }

        @Override
        public CompletionStats stats(String... fields) {
            long sizeInBytes = 0;
            ObjectLongHashMap<String> completionFields = null;
            if (fields != null && fields.length > 0) {
                completionFields = new ObjectLongHashMap<>(fields.length);
            }

            for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) {
                sizeInBytes += entry.getValue().fst.ramBytesUsed();
                if (fields == null || fields.length == 0) {
                    continue;
                }
                for (String field : fields) {
                    // support for getting fields by regex as in fielddata
                    if (Regex.simpleMatch(field, entry.getKey())) {
                        long fstSize = entry.getValue().fst.ramBytesUsed();
                        completionFields.addTo(field, fstSize);
                    }
                }
            }

            return new CompletionStats(sizeInBytes, completionFields);
        }

        @Override
        AnalyzingSuggestHolder getAnalyzingSuggestHolder(MappedFieldType fieldType) {
            return lookupMap.get(fieldType.names().indexName());
        }

        @Override
        public long ramBytesUsed() {
            return ramBytesUsed;
        }

        @Override
        public Collection<Accountable> getChildResources() {
            return Accountables.namedAccountables("field", lookupMap);
        }
    };
}