Example usage for org.apache.lucene.util.fst ByteSequenceOutputs getSingleton

List of usage examples for org.apache.lucene.util.fst ByteSequenceOutputs getSingleton

Introduction

In this page you can find the example usage for org.apache.lucene.util.fst ByteSequenceOutputs getSingleton.

Prototype

public static ByteSequenceOutputs getSingleton() 

Source Link

Usage

From source file:BuildFST.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
public static void main(String[] args) throws IOException {

    boolean numeric = true;
    boolean negative = false;
    for (int i = 0; i < args.length; i++) {
        int j = args[i].lastIndexOf('/');
        if (j != -1) {
            try {
                negative |= Long.parseLong(args[i].substring(j + 1)) < 0;
            } catch (NumberFormatException nfe) {
                numeric = false;//from w  ww  .  jav a  2 s.c  o  m
                break;
            }
        }
    }

    Outputs outputs;
    if (numeric) {
        if (negative) {
            throw new RuntimeException("can only handle numeric outputs >= 0");
        }
        outputs = PositiveIntOutputs.getSingleton();
    } else {
        outputs = ByteSequenceOutputs.getSingleton();
    }

    Pair<?>[] inputs = new Pair[args.length];
    for (int i = 0; i < args.length; i++) {
        int j = args[i].lastIndexOf('/');
        String input;
        Object output;
        if (j == -1) {
            output = outputs.getNoOutput();
            input = args[i];
        } else {
            input = args[i].substring(0, j);
            String outputString = args[i].substring(j + 1);
            if (numeric) {
                output = Long.parseLong(outputString);
            } else {
                output = new BytesRef(outputString);
            }
        }
        inputs[i] = new Pair(new BytesRef(input), output);
    }
    Arrays.sort(inputs);

    FST<?> fst;
    if (numeric) {
        Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
        for (Pair pair : inputs) {
            IntsRefBuilder intsBuilder = new IntsRefBuilder();
            Util.toIntsRef(pair.input, intsBuilder);
            b.add(intsBuilder.get(), (Long) pair.output);
        }
        fst = b.finish();
    } else {
        Builder<BytesRef> b = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, outputs);
        for (Pair pair : inputs) {
            IntsRefBuilder intsBuilder = new IntsRefBuilder();
            Util.toIntsRef(pair.input, intsBuilder);
            b.add(intsBuilder.get(), (BytesRef) pair.output);
        }
        fst = b.finish();
    }
    Util.toDot(fst, new PrintWriter(System.out), true, true);
}

From source file:com.rocana.lucene.codec.v1.RocanaFieldReader.java

License:Apache License

RocanaFieldReader(RocanaBlockTreeTermsReader parent, FieldInfo fieldInfo, long numTerms, BytesRef rootCode,
        long sumTotalTermFreq, long sumDocFreq, int docCount, long indexStartFP, int longsSize,
        IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException {
    assert numTerms > 0;
    this.fieldInfo = fieldInfo;
    //DEBUG = RocanaBlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
    this.parent = parent;
    this.numTerms = numTerms;
    this.sumTotalTermFreq = sumTotalTermFreq;
    this.sumDocFreq = sumDocFreq;
    this.docCount = docCount;
    this.indexStartFP = indexStartFP;
    this.rootCode = rootCode;
    this.longsSize = longsSize;
    this.minTerm = minTerm;
    this.maxTerm = maxTerm;
    // if (DEBUG) {
    //   System.out.println("BTTR: seg=" + segment + " field=" + fieldInfo.name + " rootBlockCode=" + rootCode + " divisor=" + indexDivisor);
    // }//from  w  ww .  j ava  2s  .  c  om

    rootBlockFP = (new ByteArrayDataInput(rootCode.bytes, rootCode.offset, rootCode.length))
            .readVLong() >>> RocanaBlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS;

    if (indexIn != null) {
        final IndexInput clone = indexIn.clone();
        //System.out.println("start=" + indexStartFP + " field=" + fieldInfo.name);
        clone.seek(indexStartFP);
        index = new FST<>(clone, ByteSequenceOutputs.getSingleton());

        /*
          if (false) {
          final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
          Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
          Util.toDot(index, w, false, false);
          System.out.println("FST INDEX: SAVED to " + dotFileName);
          w.close();
          }
        */
    } else {
        index = null;
    }
}

From source file:org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProvider.java

License:Apache License

@Override
public LookupFactory load(IndexInput input) throws IOException {
    long sizeInBytes = 0;
    int version = CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION_START, CODEC_VERSION_LATEST);
    final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<String, AnalyzingSuggestHolder>();
    input.seek(input.length() - 8);//from w ww  .j  a v a2  s .  c o m
    long metaPointer = input.readLong();
    input.seek(metaPointer);
    int numFields = input.readVInt();

    Map<Long, String> meta = new TreeMap<Long, String>();
    for (int i = 0; i < numFields; i++) {
        String name = input.readString();
        long offset = input.readVLong();
        meta.put(offset, name);
    }

    for (Map.Entry<Long, String> entry : meta.entrySet()) {
        input.seek(entry.getKey());
        FST<Pair<Long, BytesRef>> fst = new FST<Pair<Long, BytesRef>>(input, new PairOutputs<Long, BytesRef>(
                PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
        int maxAnalyzedPathsForOneInput = input.readVInt();
        int maxSurfaceFormsPerAnalyzedForm = input.readVInt();
        int maxGraphExpansions = input.readInt();
        int options = input.readVInt();
        boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPERATORS) != 0;
        boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0;
        boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0;

        // first version did not include these three fields, so fall back to old default (before the analyzingsuggester
        // was updated in Lucene, so we cannot use the suggester defaults)
        int sepLabel, payloadSep, endByte, holeCharacter;
        switch (version) {
        case CODEC_VERSION_START:
            sepLabel = 0xFF;
            payloadSep = '\u001f';
            endByte = 0x0;
            holeCharacter = '\u001E';
            break;
        default:
            sepLabel = input.readVInt();
            endByte = input.readVInt();
            payloadSep = input.readVInt();
            holeCharacter = input.readVInt();
        }

        AnalyzingSuggestHolder holder = new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements,
                maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads, maxAnalyzedPathsForOneInput,
                fst, sepLabel, payloadSep, endByte, holeCharacter);
        sizeInBytes += fst.sizeInBytes();
        lookupMap.put(entry.getValue(), holder);
    }
    final long ramBytesUsed = sizeInBytes;
    return new LookupFactory() {
        @Override
        public Lookup getLookup(FieldMapper<?> mapper, CompletionSuggestionContext suggestionContext) {
            AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(mapper.names().indexName());
            if (analyzingSuggestHolder == null) {
                return null;
            }
            int flags = analyzingSuggestHolder.preserveSep ? XAnalyzingSuggester.PRESERVE_SEP : 0;

            XAnalyzingSuggester suggester;
            if (suggestionContext.isFuzzy()) {
                suggester = new XFuzzySuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags,
                        analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm,
                        analyzingSuggestHolder.maxGraphExpansions, suggestionContext.getFuzzyEditDistance(),
                        suggestionContext.isFuzzyTranspositions(), suggestionContext.getFuzzyPrefixLength(),
                        suggestionContext.getFuzzyMinLength(), suggestionContext.isFuzzyUnicodeAware(),
                        analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads,
                        analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel,
                        analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte,
                        analyzingSuggestHolder.holeCharacter);

            } else {
                suggester = new XAnalyzingSuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags,
                        analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm,
                        analyzingSuggestHolder.maxGraphExpansions,
                        analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst,
                        analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput,
                        analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep,
                        analyzingSuggestHolder.endByte, analyzingSuggestHolder.holeCharacter);
            }
            return suggester;
        }

        @Override
        public CompletionStats stats(String... fields) {
            long sizeInBytes = 0;
            ObjectLongOpenHashMap<String> completionFields = null;
            if (fields != null && fields.length > 0) {
                completionFields = new ObjectLongOpenHashMap<String>(fields.length);
            }

            for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) {
                sizeInBytes += entry.getValue().fst.sizeInBytes();
                if (fields == null || fields.length == 0) {
                    continue;
                }
                for (String field : fields) {
                    // support for getting fields by regex as in fielddata
                    if (Regex.simpleMatch(field, entry.getKey())) {
                        long fstSize = entry.getValue().fst.sizeInBytes();
                        completionFields.addTo(field, fstSize);
                    }
                }
            }

            return new CompletionStats(sizeInBytes, completionFields);
        }

        @Override
        AnalyzingSuggestHolder getAnalyzingSuggestHolder(FieldMapper<?> mapper) {
            return lookupMap.get(mapper.names().indexName());
        }

        @Override
        public long ramBytesUsed() {
            return ramBytesUsed;
        }
    };
}

From source file:org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProviderV1.java

License:Apache License

@Override
public LookupFactory load(IndexInput input) throws IOException {
    CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION, CODEC_VERSION);
    final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<String, AnalyzingSuggestHolder>();
    input.seek(input.length() - 8);//from   w w w. ja va2s  .  c o  m
    long metaPointer = input.readLong();
    input.seek(metaPointer);
    int numFields = input.readVInt();

    Map<Long, String> meta = new TreeMap<Long, String>();
    for (int i = 0; i < numFields; i++) {
        String name = input.readString();
        long offset = input.readVLong();
        meta.put(offset, name);
    }
    long sizeInBytes = 0;
    for (Map.Entry<Long, String> entry : meta.entrySet()) {
        input.seek(entry.getKey());
        FST<Pair<Long, BytesRef>> fst = new FST<Pair<Long, BytesRef>>(input, new PairOutputs<Long, BytesRef>(
                PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
        int maxAnalyzedPathsForOneInput = input.readVInt();
        int maxSurfaceFormsPerAnalyzedForm = input.readVInt();
        int maxGraphExpansions = input.readInt();
        int options = input.readVInt();
        boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPERATORS) != 0;
        boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0;
        boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0;
        sizeInBytes += fst.sizeInBytes();
        lookupMap.put(entry.getValue(),
                new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements,
                        maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads,
                        maxAnalyzedPathsForOneInput, fst));
    }
    final long ramBytesUsed = sizeInBytes;
    return new LookupFactory() {
        @Override
        public Lookup getLookup(FieldMapper<?> mapper, CompletionSuggestionContext suggestionContext) {
            AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(mapper.names().indexName());
            if (analyzingSuggestHolder == null) {
                return null;
            }
            int flags = analyzingSuggestHolder.preserveSep ? XAnalyzingSuggester.PRESERVE_SEP : 0;

            XAnalyzingSuggester suggester;
            if (suggestionContext.isFuzzy()) {
                suggester = new XFuzzySuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags,
                        analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm,
                        analyzingSuggestHolder.maxGraphExpansions, suggestionContext.getFuzzyEditDistance(),
                        suggestionContext.isFuzzyTranspositions(), suggestionContext.getFuzzyPrefixLength(),
                        suggestionContext.getFuzzyMinLength(), false, analyzingSuggestHolder.fst,
                        analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput,
                        SEP_LABEL, PAYLOAD_SEP, END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER);

            } else {
                suggester = new XAnalyzingSuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags,
                        analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm,
                        analyzingSuggestHolder.maxGraphExpansions,
                        analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst,
                        analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput,
                        SEP_LABEL, PAYLOAD_SEP, END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER);
            }
            return suggester;
        }

        @Override
        public CompletionStats stats(String... fields) {
            long sizeInBytes = 0;
            ObjectLongOpenHashMap<String> completionFields = null;
            if (fields != null && fields.length > 0) {
                completionFields = new ObjectLongOpenHashMap<String>(fields.length);
            }

            for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) {
                sizeInBytes += entry.getValue().fst.sizeInBytes();
                if (fields == null || fields.length == 0) {
                    continue;
                }
                for (String field : fields) {
                    // support for getting fields by regex as in fielddata
                    if (Regex.simpleMatch(field, entry.getKey())) {
                        long fstSize = entry.getValue().fst.sizeInBytes();
                        completionFields.addTo(field, fstSize);
                    }
                }
            }

            return new CompletionStats(sizeInBytes, completionFields);
        }

        @Override
        AnalyzingSuggestHolder getAnalyzingSuggestHolder(FieldMapper<?> mapper) {
            return lookupMap.get(mapper.names().indexName());
        }

        @Override
        public long ramBytesUsed() {
            return ramBytesUsed;
        }
    };
}

From source file:org.elasticsearch.search.suggest.completion.old.AnalyzingCompletionLookupProvider.java

License:Apache License

@Override
public Completion090PostingsFormat.LookupFactory load(IndexInput input) throws IOException {
    long sizeInBytes = 0;
    int version = CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION_START, CODEC_VERSION_LATEST);
    if (version >= CODEC_VERSION_CHECKSUMS) {
        CodecUtil.checksumEntireFile(input);
    }//from   w  ww.  ja v a 2  s  .  c  o m
    final long metaPointerPosition = input.length()
            - (version >= CODEC_VERSION_CHECKSUMS ? 8 + CodecUtil.footerLength() : 8);
    final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<>();
    input.seek(metaPointerPosition);
    long metaPointer = input.readLong();
    input.seek(metaPointer);
    int numFields = input.readVInt();

    Map<Long, String> meta = new TreeMap<>();
    for (int i = 0; i < numFields; i++) {
        String name = input.readString();
        long offset = input.readVLong();
        meta.put(offset, name);
    }

    for (Map.Entry<Long, String> entry : meta.entrySet()) {
        input.seek(entry.getKey());
        FST<Pair<Long, BytesRef>> fst = new FST<>(input,
                new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
        int maxAnalyzedPathsForOneInput = input.readVInt();
        int maxSurfaceFormsPerAnalyzedForm = input.readVInt();
        int maxGraphExpansions = input.readInt();
        int options = input.readVInt();
        boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPARATORS) != 0;
        boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0;
        boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0;

        // first version did not include these three fields, so fall back to old default (before the analyzingsuggester
        // was updated in Lucene, so we cannot use the suggester defaults)
        int sepLabel, payloadSep, endByte, holeCharacter;
        switch (version) {
        case CODEC_VERSION_START:
            sepLabel = 0xFF;
            payloadSep = '\u001f';
            endByte = 0x0;
            holeCharacter = '\u001E';
            break;
        default:
            sepLabel = input.readVInt();
            endByte = input.readVInt();
            payloadSep = input.readVInt();
            holeCharacter = input.readVInt();
        }

        AnalyzingSuggestHolder holder = new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements,
                maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads, maxAnalyzedPathsForOneInput,
                fst, sepLabel, payloadSep, endByte, holeCharacter);
        sizeInBytes += fst.ramBytesUsed();
        lookupMap.put(entry.getValue(), holder);
    }
    final long ramBytesUsed = sizeInBytes;
    return new Completion090PostingsFormat.LookupFactory() {
        @Override
        public Lookup getLookup(OldCompletionFieldMapper.CompletionFieldType fieldType,
                CompletionSuggestionContext suggestionContext) {
            AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(fieldType.names().indexName());
            if (analyzingSuggestHolder == null) {
                return null;
            }
            int flags = analyzingSuggestHolder.getPreserveSeparator() ? XAnalyzingSuggester.PRESERVE_SEP : 0;

            final XAnalyzingSuggester suggester;
            final Automaton queryPrefix = fieldType.requiresContext()
                    ? ContextMapping.ContextQuery.toAutomaton(analyzingSuggestHolder.getPreserveSeparator(),
                            suggestionContext.getContextQueries())
                    : null;

            if (suggestionContext.isFuzzy()) {
                suggester = new XFuzzySuggester(fieldType.indexAnalyzer(), queryPrefix,
                        fieldType.searchAnalyzer(), flags,
                        analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm,
                        analyzingSuggestHolder.maxGraphExpansions, suggestionContext.getFuzzyEditDistance(),
                        suggestionContext.isFuzzyTranspositions(), suggestionContext.getFuzzyPrefixLength(),
                        suggestionContext.getFuzzyMinLength(), suggestionContext.isFuzzyUnicodeAware(),
                        analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads,
                        analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel,
                        analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte,
                        analyzingSuggestHolder.holeCharacter);
            } else {
                suggester = new XAnalyzingSuggester(fieldType.indexAnalyzer(), queryPrefix,
                        fieldType.searchAnalyzer(), flags,
                        analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm,
                        analyzingSuggestHolder.maxGraphExpansions,
                        analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst,
                        analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput,
                        analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep,
                        analyzingSuggestHolder.endByte, analyzingSuggestHolder.holeCharacter);
            }
            return suggester;
        }

        @Override
        public CompletionStats stats(String... fields) {
            long sizeInBytes = 0;
            ObjectLongHashMap<String> completionFields = null;
            if (fields != null && fields.length > 0) {
                completionFields = new ObjectLongHashMap<>(fields.length);
            }

            for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) {
                sizeInBytes += entry.getValue().fst.ramBytesUsed();
                if (fields == null || fields.length == 0) {
                    continue;
                }
                if (Regex.simpleMatch(fields, entry.getKey())) {
                    long fstSize = entry.getValue().fst.ramBytesUsed();
                    completionFields.addTo(entry.getKey(), fstSize);
                }
            }

            return new CompletionStats(sizeInBytes, completionFields);
        }

        @Override
        AnalyzingSuggestHolder getAnalyzingSuggestHolder(MappedFieldType fieldType) {
            return lookupMap.get(fieldType.names().indexName());
        }

        @Override
        public long ramBytesUsed() {
            return ramBytesUsed;
        }

        @Override
        public Collection<Accountable> getChildResources() {
            return Accountables.namedAccountables("field", lookupMap);
        }
    };
}

From source file:org.elasticsearch.search.suggest.completion.old.AnalyzingCompletionLookupProviderV1.java

License:Apache License

@Override
public LookupFactory load(IndexInput input) throws IOException {
    CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION, CODEC_VERSION);
    final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<>();
    input.seek(input.length() - 8);//from  w ww.j  av  a2s .  c  o m
    long metaPointer = input.readLong();
    input.seek(metaPointer);
    int numFields = input.readVInt();

    Map<Long, String> meta = new TreeMap<>();
    for (int i = 0; i < numFields; i++) {
        String name = input.readString();
        long offset = input.readVLong();
        meta.put(offset, name);
    }
    long sizeInBytes = 0;
    for (Map.Entry<Long, String> entry : meta.entrySet()) {
        input.seek(entry.getKey());
        FST<Pair<Long, BytesRef>> fst = new FST<>(input,
                new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
        int maxAnalyzedPathsForOneInput = input.readVInt();
        int maxSurfaceFormsPerAnalyzedForm = input.readVInt();
        int maxGraphExpansions = input.readInt();
        int options = input.readVInt();
        boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPARATORS) != 0;
        boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0;
        boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0;
        sizeInBytes += fst.ramBytesUsed();
        lookupMap.put(entry.getValue(),
                new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements,
                        maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads,
                        maxAnalyzedPathsForOneInput, fst));
    }
    final long ramBytesUsed = sizeInBytes;
    return new LookupFactory() {
        @Override
        public Lookup getLookup(OldCompletionFieldMapper.CompletionFieldType fieldType,
                CompletionSuggestionContext suggestionContext) {
            AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(fieldType.names().indexName());
            if (analyzingSuggestHolder == null) {
                return null;
            }
            int flags = analyzingSuggestHolder.getPreserveSeparator() ? XAnalyzingSuggester.PRESERVE_SEP : 0;

            final Automaton queryPrefix = fieldType.requiresContext()
                    ? ContextQuery.toAutomaton(analyzingSuggestHolder.getPreserveSeparator(),
                            suggestionContext.getContextQueries())
                    : null;

            XAnalyzingSuggester suggester;
            if (suggestionContext.isFuzzy()) {
                suggester = new XFuzzySuggester(fieldType.indexAnalyzer(), queryPrefix,
                        fieldType.searchAnalyzer(), flags,
                        analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm,
                        analyzingSuggestHolder.maxGraphExpansions, suggestionContext.getFuzzyEditDistance(),
                        suggestionContext.isFuzzyTranspositions(), suggestionContext.getFuzzyPrefixLength(),
                        suggestionContext.getFuzzyMinLength(), false, analyzingSuggestHolder.fst,
                        analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput,
                        SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER);
            } else {
                suggester = new XAnalyzingSuggester(fieldType.indexAnalyzer(), queryPrefix,
                        fieldType.searchAnalyzer(), flags,
                        analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm,
                        analyzingSuggestHolder.maxGraphExpansions,
                        analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst,
                        analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput,
                        SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER);
            }
            return suggester;
        }

        @Override
        public CompletionStats stats(String... fields) {
            long sizeInBytes = 0;
            ObjectLongHashMap<String> completionFields = null;
            if (fields != null && fields.length > 0) {
                completionFields = new ObjectLongHashMap<>(fields.length);
            }

            for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) {
                sizeInBytes += entry.getValue().fst.ramBytesUsed();
                if (fields == null || fields.length == 0) {
                    continue;
                }
                for (String field : fields) {
                    // support for getting fields by regex as in fielddata
                    if (Regex.simpleMatch(field, entry.getKey())) {
                        long fstSize = entry.getValue().fst.ramBytesUsed();
                        completionFields.addTo(field, fstSize);
                    }
                }
            }

            return new CompletionStats(sizeInBytes, completionFields);
        }

        @Override
        AnalyzingSuggestHolder getAnalyzingSuggestHolder(MappedFieldType fieldType) {
            return lookupMap.get(fieldType.names().indexName());
        }

        @Override
        public long ramBytesUsed() {
            return ramBytesUsed;
        }

        @Override
        public Collection<Accountable> getChildResources() {
            return Accountables.namedAccountables("field", lookupMap);
        }
    };
}

From source file:org.elasticsearch.search.suggest.completion2x.AnalyzingCompletionLookupProvider.java

License:Apache License

@Override
public LookupFactory load(IndexInput input) throws IOException {
    long sizeInBytes = 0;
    int version = CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION_START, CODEC_VERSION_LATEST);
    if (version >= CODEC_VERSION_CHECKSUMS) {
        CodecUtil.checksumEntireFile(input);
    }/* w w  w.  j  a v  a  2s. com*/
    final long metaPointerPosition = input.length()
            - (version >= CODEC_VERSION_CHECKSUMS ? 8 + CodecUtil.footerLength() : 8);
    final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<>();
    input.seek(metaPointerPosition);
    long metaPointer = input.readLong();
    input.seek(metaPointer);
    int numFields = input.readVInt();

    Map<Long, String> meta = new TreeMap<>();
    for (int i = 0; i < numFields; i++) {
        String name = input.readString();
        long offset = input.readVLong();
        meta.put(offset, name);
    }

    for (Map.Entry<Long, String> entry : meta.entrySet()) {
        input.seek(entry.getKey());
        FST<Pair<Long, BytesRef>> fst = new FST<>(input,
                new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
        int maxAnalyzedPathsForOneInput = input.readVInt();
        int maxSurfaceFormsPerAnalyzedForm = input.readVInt();
        int maxGraphExpansions = input.readInt();
        int options = input.readVInt();
        boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPARATORS) != 0;
        boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0;
        boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0;

        // first version did not include these three fields, so fall back to old default (before the analyzingsuggester
        // was updated in Lucene, so we cannot use the suggester defaults)
        int sepLabel, payloadSep, endByte, holeCharacter;
        switch (version) {
        case CODEC_VERSION_START:
            sepLabel = 0xFF;
            payloadSep = '\u001f';
            endByte = 0x0;
            holeCharacter = '\u001E';
            break;
        default:
            sepLabel = input.readVInt();
            endByte = input.readVInt();
            payloadSep = input.readVInt();
            holeCharacter = input.readVInt();
        }

        AnalyzingSuggestHolder holder = new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements,
                maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads, maxAnalyzedPathsForOneInput,
                fst, sepLabel, payloadSep, endByte, holeCharacter);
        sizeInBytes += fst.ramBytesUsed();
        lookupMap.put(entry.getValue(), holder);
    }
    final long ramBytesUsed = sizeInBytes;
    return new LookupFactory() {
        @Override
        public Lookup getLookup(CompletionFieldMapper2x.CompletionFieldType fieldType,
                CompletionSuggestionContext suggestionContext) {
            AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(fieldType.name());
            if (analyzingSuggestHolder == null) {
                return null;
            }
            int flags = analyzingSuggestHolder.getPreserveSeparator() ? XAnalyzingSuggester.PRESERVE_SEP : 0;

            final XAnalyzingSuggester suggester;
            final Automaton queryPrefix = fieldType.requiresContext()
                    ? ContextQuery.toAutomaton(analyzingSuggestHolder.getPreserveSeparator(),
                            suggestionContext.getContextQueries())
                    : null;

            final FuzzyOptions fuzzyOptions = suggestionContext.getFuzzyOptions();
            if (fuzzyOptions != null) {
                suggester = new XFuzzySuggester(fieldType.indexAnalyzer(), queryPrefix,
                        fieldType.searchAnalyzer(), flags,
                        analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm,
                        analyzingSuggestHolder.maxGraphExpansions, fuzzyOptions.getEditDistance(),
                        fuzzyOptions.isTranspositions(), fuzzyOptions.getFuzzyPrefixLength(),
                        fuzzyOptions.getFuzzyMinLength(), fuzzyOptions.isUnicodeAware(),
                        analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads,
                        analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel,
                        analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte,
                        analyzingSuggestHolder.holeCharacter);
            } else {
                suggester = new XAnalyzingSuggester(fieldType.indexAnalyzer(), queryPrefix,
                        fieldType.searchAnalyzer(), flags,
                        analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm,
                        analyzingSuggestHolder.maxGraphExpansions,
                        analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst,
                        analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput,
                        analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep,
                        analyzingSuggestHolder.endByte, analyzingSuggestHolder.holeCharacter);
            }
            return suggester;
        }

        @Override
        public CompletionStats stats(String... fields) {
            long sizeInBytes = 0;
            ObjectLongHashMap<String> completionFields = null;
            if (fields != null && fields.length > 0) {
                completionFields = new ObjectLongHashMap<>(fields.length);
            }

            for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) {
                sizeInBytes += entry.getValue().fst.ramBytesUsed();
                if (fields == null || fields.length == 0) {
                    continue;
                }
                if (Regex.simpleMatch(fields, entry.getKey())) {
                    long fstSize = entry.getValue().fst.ramBytesUsed();
                    completionFields.addTo(entry.getKey(), fstSize);
                }
            }

            return new CompletionStats(sizeInBytes, completionFields);
        }

        @Override
        AnalyzingSuggestHolder getAnalyzingSuggestHolder(MappedFieldType fieldType) {
            return lookupMap.get(fieldType.name());
        }

        @Override
        public long ramBytesUsed() {
            return ramBytesUsed;
        }

        @Override
        public Collection<Accountable> getChildResources() {
            return Accountables.namedAccountables("field", lookupMap);
        }
    };
}