List of usage examples for org.apache.lucene.util.fst PositiveIntOutputs getSingleton
public static PositiveIntOutputs getSingleton()
From source file:BuildFST.java
License:Apache License
@SuppressWarnings({ "rawtypes", "unchecked" })
public static void main(String[] args) throws IOException {
boolean numeric = true;
boolean negative = false;
for (int i = 0; i < args.length; i++) {
int j = args[i].lastIndexOf('/');
if (j != -1) {
try {
negative |= Long.parseLong(args[i].substring(j + 1)) < 0;
} catch (NumberFormatException nfe) {
numeric = false;/*from w w w . j a va 2 s . com*/
break;
}
}
}
Outputs outputs;
if (numeric) {
if (negative) {
throw new RuntimeException("can only handle numeric outputs >= 0");
}
outputs = PositiveIntOutputs.getSingleton();
} else {
outputs = ByteSequenceOutputs.getSingleton();
}
Pair<?>[] inputs = new Pair[args.length];
for (int i = 0; i < args.length; i++) {
int j = args[i].lastIndexOf('/');
String input;
Object output;
if (j == -1) {
output = outputs.getNoOutput();
input = args[i];
} else {
input = args[i].substring(0, j);
String outputString = args[i].substring(j + 1);
if (numeric) {
output = Long.parseLong(outputString);
} else {
output = new BytesRef(outputString);
}
}
inputs[i] = new Pair(new BytesRef(input), output);
}
Arrays.sort(inputs);
FST<?> fst;
if (numeric) {
Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
for (Pair pair : inputs) {
IntsRefBuilder intsBuilder = new IntsRefBuilder();
Util.toIntsRef(pair.input, intsBuilder);
b.add(intsBuilder.get(), (Long) pair.output);
}
fst = b.finish();
} else {
Builder<BytesRef> b = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, outputs);
for (Pair pair : inputs) {
IntsRefBuilder intsBuilder = new IntsRefBuilder();
Util.toIntsRef(pair.input, intsBuilder);
b.add(intsBuilder.get(), (BytesRef) pair.output);
}
fst = b.finish();
}
Util.toDot(fst, new PrintWriter(System.out), true, true);
}
From source file:com.github.cstoku.neologd.unidic.lucene.analysis.ja.dict.TokenInfoDictionary.java
License:Apache License
private TokenInfoDictionary() throws IOException { super();/*ww w . ja v a 2 s . c om*/ InputStream is = null; FST<Long> fst = null; boolean success = false; try { is = getResource(FST_FILENAME_SUFFIX); is = new BufferedInputStream(is); fst = new FST<>(new InputStreamDataInput(is), PositiveIntOutputs.getSingleton()); success = true; } finally { if (success) { IOUtils.close(is); } else { IOUtils.closeWhileHandlingException(is); } } // TODO: some way to configure? this.fst = new TokenInfoFST(fst, true); }
From source file:com.github.cstoku.neologd.unidic.lucene.analysis.ja.dict.UserDictionary.java
License:Apache License
private UserDictionary(List<String[]> featureEntries) throws IOException { int wordId = CUSTOM_DICTIONARY_WORD_ID_OFFSET; // TODO: should we allow multiple segmentations per input 'phrase'? // the old treemap didn't support this either, and i'm not sure if it's needed/useful? Collections.sort(featureEntries, new Comparator<String[]>() { @Override//from w w w .j a v a 2 s .c o m public int compare(String[] left, String[] right) { return left[0].compareTo(right[0]); } }); List<String> data = new ArrayList<>(featureEntries.size()); List<int[]> segmentations = new ArrayList<>(featureEntries.size()); PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton(); Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE2, fstOutput); IntsRefBuilder scratch = new IntsRefBuilder(); long ord = 0; for (String[] values : featureEntries) { String[] segmentation = values[1].replaceAll(" *", " ").split(" "); String[] readings = values[2].replaceAll(" *", " ").split(" "); String pos = values[3]; if (segmentation.length != readings.length) { throw new RuntimeException("Illegal user dictionary entry " + values[0] + " - the number of segmentations (" + segmentation.length + ")" + " does not the match number of readings (" + readings.length + ")"); } int[] wordIdAndLength = new int[segmentation.length + 1]; // wordId offset, length, length.... wordIdAndLength[0] = wordId; for (int i = 0; i < segmentation.length; i++) { wordIdAndLength[i + 1] = segmentation[i].length(); data.add(readings[i] + INTERNAL_SEPARATOR + pos); wordId++; } // add mapping to FST String token = values[0]; scratch.grow(token.length()); scratch.setLength(token.length()); for (int i = 0; i < token.length(); i++) { scratch.setIntAt(i, (int) token.charAt(i)); } fstBuilder.add(scratch.get(), ord); segmentations.add(wordIdAndLength); ord++; } this.fst = new TokenInfoFST(fstBuilder.finish(), false); this.data = data.toArray(new String[data.size()]); this.segmentations = segmentations.toArray(new int[segmentations.size()][]); }
From source file:com.meizu.nlp.classification.BooleanPerceptronClassifier.java
License:Apache License
private void updateFST(SortedMap<String, Double> weights) throws IOException { PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs); BytesRefBuilder scratchBytes = new BytesRefBuilder(); IntsRefBuilder scratchInts = new IntsRefBuilder(); for (Map.Entry<String, Double> entry : weights.entrySet()) { scratchBytes.copyChars(entry.getKey()); fstBuilder.add(Util.toIntsRef(scratchBytes.get(), scratchInts), entry.getValue().longValue()); }//from w w w. java2s.c o m fst = fstBuilder.finish(); }
From source file:examples.fst.FstTest.java
public static void main(String[] args) throws IOException { // Input values (keys). These must be provided to Builder in Unicode sorted order! String inputValues[] = { "cat", "dog", "dogs" }; long outputValues[] = { 5, 7, 12 }; PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1, outputs); BytesRefBuilder scratchBytes = new BytesRefBuilder(); IntsRefBuilder scratchInts = new IntsRefBuilder(); for (int i = 0; i < inputValues.length; i++) { scratchBytes.copyChars(inputValues[i]); builder.add(Util.toIntsRef(scratchBytes.toBytesRef(), scratchInts), outputValues[i]); }//from ww w .j a v a 2 s .com FST<Long> fst = builder.finish(); Long value = Util.get(fst, new BytesRef("dog")); System.out.println(value); // 7 // Only works because outputs are also in sorted order IntsRef key = Util.getByOutput(fst, 12); System.out.println(Util.toBytesRef(key, scratchBytes).utf8ToString()); // dogs }
From source file:org.elasticsearch.index.fielddata.plain.FSTBytesIndexFieldData.java
License:Apache License
@Override public FSTBytesAtomicFieldData loadDirect(AtomicReaderContext context) throws Exception { AtomicReader reader = context.reader(); Terms terms = reader.terms(getFieldNames().indexName()); FSTBytesAtomicFieldData data = null; // TODO: Use an actual estimator to estimate before loading. NonEstimatingEstimator estimator = new NonEstimatingEstimator(breakerService.getBreaker()); if (terms == null) { data = FSTBytesAtomicFieldData.empty(reader.maxDoc()); estimator.afterLoad(null, data.getMemorySizeInBytes()); return data; }//www.ja v a 2 s .co m PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); org.apache.lucene.util.fst.Builder<Long> fstBuilder = new org.apache.lucene.util.fst.Builder<Long>( INPUT_TYPE.BYTE1, outputs); final IntsRef scratch = new IntsRef(); final long numTerms; if (regex == null && frequency == null) { numTerms = terms.size(); } else { numTerms = -1; } final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat( "acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO); OrdinalsBuilder builder = new OrdinalsBuilder(numTerms, reader.maxDoc(), acceptableTransientOverheadRatio); boolean success = false; try { // we don't store an ord 0 in the FST since we could have an empty string in there and FST don't support // empty strings twice. ie. them merge fails for long output. TermsEnum termsEnum = filter(terms, reader); DocsEnum docsEnum = null; for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) { final long termOrd = builder.nextOrdinal(); assert termOrd > 0; fstBuilder.add(Util.toIntsRef(term, scratch), (long) termOrd); docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE); for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) { builder.addDoc(docId); } } FST<Long> fst = fstBuilder.finish(); final Ordinals ordinals = builder.build(fieldDataType.getSettings()); data = new FSTBytesAtomicFieldData(fst, ordinals); success = true; return data; } finally { if (success) { estimator.afterLoad(null, data.getMemorySizeInBytes()); } builder.close(); } }
From source file:org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProvider.java
License:Apache License
@Override public LookupFactory load(IndexInput input) throws IOException { long sizeInBytes = 0; int version = CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION_START, CODEC_VERSION_LATEST); final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<String, AnalyzingSuggestHolder>(); input.seek(input.length() - 8);//from w w w .j a va2 s .co m long metaPointer = input.readLong(); input.seek(metaPointer); int numFields = input.readVInt(); Map<Long, String> meta = new TreeMap<Long, String>(); for (int i = 0; i < numFields; i++) { String name = input.readString(); long offset = input.readVLong(); meta.put(offset, name); } for (Map.Entry<Long, String> entry : meta.entrySet()) { input.seek(entry.getKey()); FST<Pair<Long, BytesRef>> fst = new FST<Pair<Long, BytesRef>>(input, new PairOutputs<Long, BytesRef>( PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton())); int maxAnalyzedPathsForOneInput = input.readVInt(); int maxSurfaceFormsPerAnalyzedForm = input.readVInt(); int maxGraphExpansions = input.readInt(); int options = input.readVInt(); boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPERATORS) != 0; boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0; boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0; // first version did not include these three fields, so fall back to old default (before the analyzingsuggester // was updated in Lucene, so we cannot use the suggester defaults) int sepLabel, payloadSep, endByte, holeCharacter; switch (version) { case CODEC_VERSION_START: sepLabel = 0xFF; payloadSep = '\u001f'; endByte = 0x0; holeCharacter = '\u001E'; break; default: sepLabel = input.readVInt(); endByte = input.readVInt(); payloadSep = input.readVInt(); holeCharacter = input.readVInt(); } AnalyzingSuggestHolder holder = new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads, maxAnalyzedPathsForOneInput, fst, sepLabel, payloadSep, endByte, holeCharacter); sizeInBytes += fst.sizeInBytes(); lookupMap.put(entry.getValue(), holder); } final long ramBytesUsed = sizeInBytes; return new LookupFactory() { @Override public Lookup getLookup(FieldMapper<?> mapper, CompletionSuggestionContext suggestionContext) { AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(mapper.names().indexName()); if (analyzingSuggestHolder == null) { return null; } int flags = analyzingSuggestHolder.preserveSep ? XAnalyzingSuggester.PRESERVE_SEP : 0; XAnalyzingSuggester suggester; if (suggestionContext.isFuzzy()) { suggester = new XFuzzySuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags, analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, suggestionContext.getFuzzyEditDistance(), suggestionContext.isFuzzyTranspositions(), suggestionContext.getFuzzyPrefixLength(), suggestionContext.getFuzzyMinLength(), suggestionContext.isFuzzyUnicodeAware(), analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte, analyzingSuggestHolder.holeCharacter); } else { suggester = new XAnalyzingSuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags, analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte, analyzingSuggestHolder.holeCharacter); } return suggester; } @Override public CompletionStats stats(String... fields) { long sizeInBytes = 0; ObjectLongOpenHashMap<String> completionFields = null; if (fields != null && fields.length > 0) { completionFields = new ObjectLongOpenHashMap<String>(fields.length); } for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) { sizeInBytes += entry.getValue().fst.sizeInBytes(); if (fields == null || fields.length == 0) { continue; } for (String field : fields) { // support for getting fields by regex as in fielddata if (Regex.simpleMatch(field, entry.getKey())) { long fstSize = entry.getValue().fst.sizeInBytes(); completionFields.addTo(field, fstSize); } } } return new CompletionStats(sizeInBytes, completionFields); } @Override AnalyzingSuggestHolder getAnalyzingSuggestHolder(FieldMapper<?> mapper) { return lookupMap.get(mapper.names().indexName()); } @Override public long ramBytesUsed() { return ramBytesUsed; } }; }
From source file:org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProviderV1.java
License:Apache License
@Override public LookupFactory load(IndexInput input) throws IOException { CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION, CODEC_VERSION); final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<String, AnalyzingSuggestHolder>(); input.seek(input.length() - 8);// w w w. j a v a 2s . c o m long metaPointer = input.readLong(); input.seek(metaPointer); int numFields = input.readVInt(); Map<Long, String> meta = new TreeMap<Long, String>(); for (int i = 0; i < numFields; i++) { String name = input.readString(); long offset = input.readVLong(); meta.put(offset, name); } long sizeInBytes = 0; for (Map.Entry<Long, String> entry : meta.entrySet()) { input.seek(entry.getKey()); FST<Pair<Long, BytesRef>> fst = new FST<Pair<Long, BytesRef>>(input, new PairOutputs<Long, BytesRef>( PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton())); int maxAnalyzedPathsForOneInput = input.readVInt(); int maxSurfaceFormsPerAnalyzedForm = input.readVInt(); int maxGraphExpansions = input.readInt(); int options = input.readVInt(); boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPERATORS) != 0; boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0; boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0; sizeInBytes += fst.sizeInBytes(); lookupMap.put(entry.getValue(), new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads, maxAnalyzedPathsForOneInput, fst)); } final long ramBytesUsed = sizeInBytes; return new LookupFactory() { @Override public Lookup getLookup(FieldMapper<?> mapper, CompletionSuggestionContext suggestionContext) { AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(mapper.names().indexName()); if (analyzingSuggestHolder == null) { return null; } int flags = analyzingSuggestHolder.preserveSep ? XAnalyzingSuggester.PRESERVE_SEP : 0; XAnalyzingSuggester suggester; if (suggestionContext.isFuzzy()) { suggester = new XFuzzySuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags, analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, suggestionContext.getFuzzyEditDistance(), suggestionContext.isFuzzyTranspositions(), suggestionContext.getFuzzyPrefixLength(), suggestionContext.getFuzzyMinLength(), false, analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput, SEP_LABEL, PAYLOAD_SEP, END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER); } else { suggester = new XAnalyzingSuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags, analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput, SEP_LABEL, PAYLOAD_SEP, END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER); } return suggester; } @Override public CompletionStats stats(String... fields) { long sizeInBytes = 0; ObjectLongOpenHashMap<String> completionFields = null; if (fields != null && fields.length > 0) { completionFields = new ObjectLongOpenHashMap<String>(fields.length); } for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) { sizeInBytes += entry.getValue().fst.sizeInBytes(); if (fields == null || fields.length == 0) { continue; } for (String field : fields) { // support for getting fields by regex as in fielddata if (Regex.simpleMatch(field, entry.getKey())) { long fstSize = entry.getValue().fst.sizeInBytes(); completionFields.addTo(field, fstSize); } } } return new CompletionStats(sizeInBytes, completionFields); } @Override AnalyzingSuggestHolder getAnalyzingSuggestHolder(FieldMapper<?> mapper) { return lookupMap.get(mapper.names().indexName()); } @Override public long ramBytesUsed() { return ramBytesUsed; } }; }
From source file:org.elasticsearch.search.suggest.completion.old.AnalyzingCompletionLookupProvider.java
License:Apache License
@Override public Completion090PostingsFormat.LookupFactory load(IndexInput input) throws IOException { long sizeInBytes = 0; int version = CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION_START, CODEC_VERSION_LATEST); if (version >= CODEC_VERSION_CHECKSUMS) { CodecUtil.checksumEntireFile(input); }//from w w w . j a va2 s . com final long metaPointerPosition = input.length() - (version >= CODEC_VERSION_CHECKSUMS ? 8 + CodecUtil.footerLength() : 8); final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<>(); input.seek(metaPointerPosition); long metaPointer = input.readLong(); input.seek(metaPointer); int numFields = input.readVInt(); Map<Long, String> meta = new TreeMap<>(); for (int i = 0; i < numFields; i++) { String name = input.readString(); long offset = input.readVLong(); meta.put(offset, name); } for (Map.Entry<Long, String> entry : meta.entrySet()) { input.seek(entry.getKey()); FST<Pair<Long, BytesRef>> fst = new FST<>(input, new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton())); int maxAnalyzedPathsForOneInput = input.readVInt(); int maxSurfaceFormsPerAnalyzedForm = input.readVInt(); int maxGraphExpansions = input.readInt(); int options = input.readVInt(); boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPARATORS) != 0; boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0; boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0; // first version did not include these three fields, so fall back to old default (before the analyzingsuggester // was updated in Lucene, so we cannot use the suggester defaults) int sepLabel, payloadSep, endByte, holeCharacter; switch (version) { case CODEC_VERSION_START: sepLabel = 0xFF; payloadSep = '\u001f'; endByte = 0x0; holeCharacter = '\u001E'; break; default: sepLabel = input.readVInt(); endByte = input.readVInt(); payloadSep = input.readVInt(); holeCharacter = input.readVInt(); } AnalyzingSuggestHolder holder = new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads, maxAnalyzedPathsForOneInput, fst, sepLabel, payloadSep, endByte, holeCharacter); sizeInBytes += fst.ramBytesUsed(); lookupMap.put(entry.getValue(), holder); } final long ramBytesUsed = sizeInBytes; return new Completion090PostingsFormat.LookupFactory() { @Override public Lookup getLookup(OldCompletionFieldMapper.CompletionFieldType fieldType, CompletionSuggestionContext suggestionContext) { AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(fieldType.names().indexName()); if (analyzingSuggestHolder == null) { return null; } int flags = analyzingSuggestHolder.getPreserveSeparator() ? XAnalyzingSuggester.PRESERVE_SEP : 0; final XAnalyzingSuggester suggester; final Automaton queryPrefix = fieldType.requiresContext() ? ContextMapping.ContextQuery.toAutomaton(analyzingSuggestHolder.getPreserveSeparator(), suggestionContext.getContextQueries()) : null; if (suggestionContext.isFuzzy()) { suggester = new XFuzzySuggester(fieldType.indexAnalyzer(), queryPrefix, fieldType.searchAnalyzer(), flags, analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, suggestionContext.getFuzzyEditDistance(), suggestionContext.isFuzzyTranspositions(), suggestionContext.getFuzzyPrefixLength(), suggestionContext.getFuzzyMinLength(), suggestionContext.isFuzzyUnicodeAware(), analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte, analyzingSuggestHolder.holeCharacter); } else { suggester = new XAnalyzingSuggester(fieldType.indexAnalyzer(), queryPrefix, fieldType.searchAnalyzer(), flags, analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte, analyzingSuggestHolder.holeCharacter); } return suggester; } @Override public CompletionStats stats(String... fields) { long sizeInBytes = 0; ObjectLongHashMap<String> completionFields = null; if (fields != null && fields.length > 0) { completionFields = new ObjectLongHashMap<>(fields.length); } for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) { sizeInBytes += entry.getValue().fst.ramBytesUsed(); if (fields == null || fields.length == 0) { continue; } if (Regex.simpleMatch(fields, entry.getKey())) { long fstSize = entry.getValue().fst.ramBytesUsed(); completionFields.addTo(entry.getKey(), fstSize); } } return new CompletionStats(sizeInBytes, completionFields); } @Override AnalyzingSuggestHolder getAnalyzingSuggestHolder(MappedFieldType fieldType) { return lookupMap.get(fieldType.names().indexName()); } @Override public long ramBytesUsed() { return ramBytesUsed; } @Override public Collection<Accountable> getChildResources() { return Accountables.namedAccountables("field", lookupMap); } }; }
From source file:org.elasticsearch.search.suggest.completion.old.AnalyzingCompletionLookupProviderV1.java
License:Apache License
@Override public LookupFactory load(IndexInput input) throws IOException { CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION, CODEC_VERSION); final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<>(); input.seek(input.length() - 8);//from w w w . j ava2 s . c o m long metaPointer = input.readLong(); input.seek(metaPointer); int numFields = input.readVInt(); Map<Long, String> meta = new TreeMap<>(); for (int i = 0; i < numFields; i++) { String name = input.readString(); long offset = input.readVLong(); meta.put(offset, name); } long sizeInBytes = 0; for (Map.Entry<Long, String> entry : meta.entrySet()) { input.seek(entry.getKey()); FST<Pair<Long, BytesRef>> fst = new FST<>(input, new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton())); int maxAnalyzedPathsForOneInput = input.readVInt(); int maxSurfaceFormsPerAnalyzedForm = input.readVInt(); int maxGraphExpansions = input.readInt(); int options = input.readVInt(); boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPARATORS) != 0; boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0; boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0; sizeInBytes += fst.ramBytesUsed(); lookupMap.put(entry.getValue(), new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads, maxAnalyzedPathsForOneInput, fst)); } final long ramBytesUsed = sizeInBytes; return new LookupFactory() { @Override public Lookup getLookup(OldCompletionFieldMapper.CompletionFieldType fieldType, CompletionSuggestionContext suggestionContext) { AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(fieldType.names().indexName()); if (analyzingSuggestHolder == null) { return null; } int flags = analyzingSuggestHolder.getPreserveSeparator() ? XAnalyzingSuggester.PRESERVE_SEP : 0; final Automaton queryPrefix = fieldType.requiresContext() ? ContextQuery.toAutomaton(analyzingSuggestHolder.getPreserveSeparator(), suggestionContext.getContextQueries()) : null; XAnalyzingSuggester suggester; if (suggestionContext.isFuzzy()) { suggester = new XFuzzySuggester(fieldType.indexAnalyzer(), queryPrefix, fieldType.searchAnalyzer(), flags, analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, suggestionContext.getFuzzyEditDistance(), suggestionContext.isFuzzyTranspositions(), suggestionContext.getFuzzyPrefixLength(), suggestionContext.getFuzzyMinLength(), false, analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER); } else { suggester = new XAnalyzingSuggester(fieldType.indexAnalyzer(), queryPrefix, fieldType.searchAnalyzer(), flags, analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER); } return suggester; } @Override public CompletionStats stats(String... fields) { long sizeInBytes = 0; ObjectLongHashMap<String> completionFields = null; if (fields != null && fields.length > 0) { completionFields = new ObjectLongHashMap<>(fields.length); } for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) { sizeInBytes += entry.getValue().fst.ramBytesUsed(); if (fields == null || fields.length == 0) { continue; } for (String field : fields) { // support for getting fields by regex as in fielddata if (Regex.simpleMatch(field, entry.getKey())) { long fstSize = entry.getValue().fst.ramBytesUsed(); completionFields.addTo(field, fstSize); } } } return new CompletionStats(sizeInBytes, completionFields); } @Override AnalyzingSuggestHolder getAnalyzingSuggestHolder(MappedFieldType fieldType) { return lookupMap.get(fieldType.names().indexName()); } @Override public long ramBytesUsed() { return ramBytesUsed; } @Override public Collection<Accountable> getChildResources() { return Accountables.namedAccountables("field", lookupMap); } }; }