List of usage examples for org.apache.lucene.util.fst ByteSequenceOutputs getSingleton
public static ByteSequenceOutputs getSingleton()
From source file:BuildFST.java
License:Apache License
@SuppressWarnings({ "rawtypes", "unchecked" })
public static void main(String[] args) throws IOException {
boolean numeric = true;
boolean negative = false;
for (int i = 0; i < args.length; i++) {
int j = args[i].lastIndexOf('/');
if (j != -1) {
try {
negative |= Long.parseLong(args[i].substring(j + 1)) < 0;
} catch (NumberFormatException nfe) {
numeric = false;//from w ww . jav a 2 s.c o m
break;
}
}
}
Outputs outputs;
if (numeric) {
if (negative) {
throw new RuntimeException("can only handle numeric outputs >= 0");
}
outputs = PositiveIntOutputs.getSingleton();
} else {
outputs = ByteSequenceOutputs.getSingleton();
}
Pair<?>[] inputs = new Pair[args.length];
for (int i = 0; i < args.length; i++) {
int j = args[i].lastIndexOf('/');
String input;
Object output;
if (j == -1) {
output = outputs.getNoOutput();
input = args[i];
} else {
input = args[i].substring(0, j);
String outputString = args[i].substring(j + 1);
if (numeric) {
output = Long.parseLong(outputString);
} else {
output = new BytesRef(outputString);
}
}
inputs[i] = new Pair(new BytesRef(input), output);
}
Arrays.sort(inputs);
FST<?> fst;
if (numeric) {
Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
for (Pair pair : inputs) {
IntsRefBuilder intsBuilder = new IntsRefBuilder();
Util.toIntsRef(pair.input, intsBuilder);
b.add(intsBuilder.get(), (Long) pair.output);
}
fst = b.finish();
} else {
Builder<BytesRef> b = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, outputs);
for (Pair pair : inputs) {
IntsRefBuilder intsBuilder = new IntsRefBuilder();
Util.toIntsRef(pair.input, intsBuilder);
b.add(intsBuilder.get(), (BytesRef) pair.output);
}
fst = b.finish();
}
Util.toDot(fst, new PrintWriter(System.out), true, true);
}
From source file:com.rocana.lucene.codec.v1.RocanaFieldReader.java
License:Apache License
RocanaFieldReader(RocanaBlockTreeTermsReader parent, FieldInfo fieldInfo, long numTerms, BytesRef rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount, long indexStartFP, int longsSize, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException { assert numTerms > 0; this.fieldInfo = fieldInfo; //DEBUG = RocanaBlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id"); this.parent = parent; this.numTerms = numTerms; this.sumTotalTermFreq = sumTotalTermFreq; this.sumDocFreq = sumDocFreq; this.docCount = docCount; this.indexStartFP = indexStartFP; this.rootCode = rootCode; this.longsSize = longsSize; this.minTerm = minTerm; this.maxTerm = maxTerm; // if (DEBUG) { // System.out.println("BTTR: seg=" + segment + " field=" + fieldInfo.name + " rootBlockCode=" + rootCode + " divisor=" + indexDivisor); // }//from w ww . j ava 2s . c om rootBlockFP = (new ByteArrayDataInput(rootCode.bytes, rootCode.offset, rootCode.length)) .readVLong() >>> RocanaBlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS; if (indexIn != null) { final IndexInput clone = indexIn.clone(); //System.out.println("start=" + indexStartFP + " field=" + fieldInfo.name); clone.seek(indexStartFP); index = new FST<>(clone, ByteSequenceOutputs.getSingleton()); /* if (false) { final String dotFileName = segment + "_" + fieldInfo.name + ".dot"; Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName)); Util.toDot(index, w, false, false); System.out.println("FST INDEX: SAVED to " + dotFileName); w.close(); } */ } else { index = null; } }
From source file:org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProvider.java
License:Apache License
@Override public LookupFactory load(IndexInput input) throws IOException { long sizeInBytes = 0; int version = CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION_START, CODEC_VERSION_LATEST); final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<String, AnalyzingSuggestHolder>(); input.seek(input.length() - 8);//from w ww .j a v a2 s . c o m long metaPointer = input.readLong(); input.seek(metaPointer); int numFields = input.readVInt(); Map<Long, String> meta = new TreeMap<Long, String>(); for (int i = 0; i < numFields; i++) { String name = input.readString(); long offset = input.readVLong(); meta.put(offset, name); } for (Map.Entry<Long, String> entry : meta.entrySet()) { input.seek(entry.getKey()); FST<Pair<Long, BytesRef>> fst = new FST<Pair<Long, BytesRef>>(input, new PairOutputs<Long, BytesRef>( PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton())); int maxAnalyzedPathsForOneInput = input.readVInt(); int maxSurfaceFormsPerAnalyzedForm = input.readVInt(); int maxGraphExpansions = input.readInt(); int options = input.readVInt(); boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPERATORS) != 0; boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0; boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0; // first version did not include these three fields, so fall back to old default (before the analyzingsuggester // was updated in Lucene, so we cannot use the suggester defaults) int sepLabel, payloadSep, endByte, holeCharacter; switch (version) { case CODEC_VERSION_START: sepLabel = 0xFF; payloadSep = '\u001f'; endByte = 0x0; holeCharacter = '\u001E'; break; default: sepLabel = input.readVInt(); endByte = input.readVInt(); payloadSep = input.readVInt(); holeCharacter = input.readVInt(); } AnalyzingSuggestHolder holder = new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads, maxAnalyzedPathsForOneInput, fst, sepLabel, payloadSep, endByte, holeCharacter); sizeInBytes += fst.sizeInBytes(); lookupMap.put(entry.getValue(), holder); } final long ramBytesUsed = sizeInBytes; return new LookupFactory() { @Override public Lookup getLookup(FieldMapper<?> mapper, CompletionSuggestionContext suggestionContext) { AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(mapper.names().indexName()); if (analyzingSuggestHolder == null) { return null; } int flags = analyzingSuggestHolder.preserveSep ? XAnalyzingSuggester.PRESERVE_SEP : 0; XAnalyzingSuggester suggester; if (suggestionContext.isFuzzy()) { suggester = new XFuzzySuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags, analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, suggestionContext.getFuzzyEditDistance(), suggestionContext.isFuzzyTranspositions(), suggestionContext.getFuzzyPrefixLength(), suggestionContext.getFuzzyMinLength(), suggestionContext.isFuzzyUnicodeAware(), analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte, analyzingSuggestHolder.holeCharacter); } else { suggester = new XAnalyzingSuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags, analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte, analyzingSuggestHolder.holeCharacter); } return suggester; } @Override public CompletionStats stats(String... fields) { long sizeInBytes = 0; ObjectLongOpenHashMap<String> completionFields = null; if (fields != null && fields.length > 0) { completionFields = new ObjectLongOpenHashMap<String>(fields.length); } for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) { sizeInBytes += entry.getValue().fst.sizeInBytes(); if (fields == null || fields.length == 0) { continue; } for (String field : fields) { // support for getting fields by regex as in fielddata if (Regex.simpleMatch(field, entry.getKey())) { long fstSize = entry.getValue().fst.sizeInBytes(); completionFields.addTo(field, fstSize); } } } return new CompletionStats(sizeInBytes, completionFields); } @Override AnalyzingSuggestHolder getAnalyzingSuggestHolder(FieldMapper<?> mapper) { return lookupMap.get(mapper.names().indexName()); } @Override public long ramBytesUsed() { return ramBytesUsed; } }; }
From source file:org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProviderV1.java
License:Apache License
@Override public LookupFactory load(IndexInput input) throws IOException { CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION, CODEC_VERSION); final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<String, AnalyzingSuggestHolder>(); input.seek(input.length() - 8);//from w w w. ja va2s . c o m long metaPointer = input.readLong(); input.seek(metaPointer); int numFields = input.readVInt(); Map<Long, String> meta = new TreeMap<Long, String>(); for (int i = 0; i < numFields; i++) { String name = input.readString(); long offset = input.readVLong(); meta.put(offset, name); } long sizeInBytes = 0; for (Map.Entry<Long, String> entry : meta.entrySet()) { input.seek(entry.getKey()); FST<Pair<Long, BytesRef>> fst = new FST<Pair<Long, BytesRef>>(input, new PairOutputs<Long, BytesRef>( PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton())); int maxAnalyzedPathsForOneInput = input.readVInt(); int maxSurfaceFormsPerAnalyzedForm = input.readVInt(); int maxGraphExpansions = input.readInt(); int options = input.readVInt(); boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPERATORS) != 0; boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0; boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0; sizeInBytes += fst.sizeInBytes(); lookupMap.put(entry.getValue(), new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads, maxAnalyzedPathsForOneInput, fst)); } final long ramBytesUsed = sizeInBytes; return new LookupFactory() { @Override public Lookup getLookup(FieldMapper<?> mapper, CompletionSuggestionContext suggestionContext) { AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(mapper.names().indexName()); if (analyzingSuggestHolder == null) { return null; } int flags = analyzingSuggestHolder.preserveSep ? XAnalyzingSuggester.PRESERVE_SEP : 0; XAnalyzingSuggester suggester; if (suggestionContext.isFuzzy()) { suggester = new XFuzzySuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags, analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, suggestionContext.getFuzzyEditDistance(), suggestionContext.isFuzzyTranspositions(), suggestionContext.getFuzzyPrefixLength(), suggestionContext.getFuzzyMinLength(), false, analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput, SEP_LABEL, PAYLOAD_SEP, END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER); } else { suggester = new XAnalyzingSuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags, analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput, SEP_LABEL, PAYLOAD_SEP, END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER); } return suggester; } @Override public CompletionStats stats(String... fields) { long sizeInBytes = 0; ObjectLongOpenHashMap<String> completionFields = null; if (fields != null && fields.length > 0) { completionFields = new ObjectLongOpenHashMap<String>(fields.length); } for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) { sizeInBytes += entry.getValue().fst.sizeInBytes(); if (fields == null || fields.length == 0) { continue; } for (String field : fields) { // support for getting fields by regex as in fielddata if (Regex.simpleMatch(field, entry.getKey())) { long fstSize = entry.getValue().fst.sizeInBytes(); completionFields.addTo(field, fstSize); } } } return new CompletionStats(sizeInBytes, completionFields); } @Override AnalyzingSuggestHolder getAnalyzingSuggestHolder(FieldMapper<?> mapper) { return lookupMap.get(mapper.names().indexName()); } @Override public long ramBytesUsed() { return ramBytesUsed; } }; }
From source file:org.elasticsearch.search.suggest.completion.old.AnalyzingCompletionLookupProvider.java
License:Apache License
@Override public Completion090PostingsFormat.LookupFactory load(IndexInput input) throws IOException { long sizeInBytes = 0; int version = CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION_START, CODEC_VERSION_LATEST); if (version >= CODEC_VERSION_CHECKSUMS) { CodecUtil.checksumEntireFile(input); }//from w ww. ja v a 2 s . c o m final long metaPointerPosition = input.length() - (version >= CODEC_VERSION_CHECKSUMS ? 8 + CodecUtil.footerLength() : 8); final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<>(); input.seek(metaPointerPosition); long metaPointer = input.readLong(); input.seek(metaPointer); int numFields = input.readVInt(); Map<Long, String> meta = new TreeMap<>(); for (int i = 0; i < numFields; i++) { String name = input.readString(); long offset = input.readVLong(); meta.put(offset, name); } for (Map.Entry<Long, String> entry : meta.entrySet()) { input.seek(entry.getKey()); FST<Pair<Long, BytesRef>> fst = new FST<>(input, new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton())); int maxAnalyzedPathsForOneInput = input.readVInt(); int maxSurfaceFormsPerAnalyzedForm = input.readVInt(); int maxGraphExpansions = input.readInt(); int options = input.readVInt(); boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPARATORS) != 0; boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0; boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0; // first version did not include these three fields, so fall back to old default (before the analyzingsuggester // was updated in Lucene, so we cannot use the suggester defaults) int sepLabel, payloadSep, endByte, holeCharacter; switch (version) { case CODEC_VERSION_START: sepLabel = 0xFF; payloadSep = '\u001f'; endByte = 0x0; holeCharacter = '\u001E'; break; default: sepLabel = input.readVInt(); endByte = input.readVInt(); payloadSep = input.readVInt(); holeCharacter = input.readVInt(); } AnalyzingSuggestHolder holder = new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads, maxAnalyzedPathsForOneInput, fst, sepLabel, payloadSep, endByte, holeCharacter); sizeInBytes += fst.ramBytesUsed(); lookupMap.put(entry.getValue(), holder); } final long ramBytesUsed = sizeInBytes; return new Completion090PostingsFormat.LookupFactory() { @Override public Lookup getLookup(OldCompletionFieldMapper.CompletionFieldType fieldType, CompletionSuggestionContext suggestionContext) { AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(fieldType.names().indexName()); if (analyzingSuggestHolder == null) { return null; } int flags = analyzingSuggestHolder.getPreserveSeparator() ? XAnalyzingSuggester.PRESERVE_SEP : 0; final XAnalyzingSuggester suggester; final Automaton queryPrefix = fieldType.requiresContext() ? ContextMapping.ContextQuery.toAutomaton(analyzingSuggestHolder.getPreserveSeparator(), suggestionContext.getContextQueries()) : null; if (suggestionContext.isFuzzy()) { suggester = new XFuzzySuggester(fieldType.indexAnalyzer(), queryPrefix, fieldType.searchAnalyzer(), flags, analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, suggestionContext.getFuzzyEditDistance(), suggestionContext.isFuzzyTranspositions(), suggestionContext.getFuzzyPrefixLength(), suggestionContext.getFuzzyMinLength(), suggestionContext.isFuzzyUnicodeAware(), analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte, analyzingSuggestHolder.holeCharacter); } else { suggester = new XAnalyzingSuggester(fieldType.indexAnalyzer(), queryPrefix, fieldType.searchAnalyzer(), flags, analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte, analyzingSuggestHolder.holeCharacter); } return suggester; } @Override public CompletionStats stats(String... fields) { long sizeInBytes = 0; ObjectLongHashMap<String> completionFields = null; if (fields != null && fields.length > 0) { completionFields = new ObjectLongHashMap<>(fields.length); } for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) { sizeInBytes += entry.getValue().fst.ramBytesUsed(); if (fields == null || fields.length == 0) { continue; } if (Regex.simpleMatch(fields, entry.getKey())) { long fstSize = entry.getValue().fst.ramBytesUsed(); completionFields.addTo(entry.getKey(), fstSize); } } return new CompletionStats(sizeInBytes, completionFields); } @Override AnalyzingSuggestHolder getAnalyzingSuggestHolder(MappedFieldType fieldType) { return lookupMap.get(fieldType.names().indexName()); } @Override public long ramBytesUsed() { return ramBytesUsed; } @Override public Collection<Accountable> getChildResources() { return Accountables.namedAccountables("field", lookupMap); } }; }
From source file:org.elasticsearch.search.suggest.completion.old.AnalyzingCompletionLookupProviderV1.java
License:Apache License
@Override public LookupFactory load(IndexInput input) throws IOException { CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION, CODEC_VERSION); final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<>(); input.seek(input.length() - 8);//from w ww.j av a2s . c o m long metaPointer = input.readLong(); input.seek(metaPointer); int numFields = input.readVInt(); Map<Long, String> meta = new TreeMap<>(); for (int i = 0; i < numFields; i++) { String name = input.readString(); long offset = input.readVLong(); meta.put(offset, name); } long sizeInBytes = 0; for (Map.Entry<Long, String> entry : meta.entrySet()) { input.seek(entry.getKey()); FST<Pair<Long, BytesRef>> fst = new FST<>(input, new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton())); int maxAnalyzedPathsForOneInput = input.readVInt(); int maxSurfaceFormsPerAnalyzedForm = input.readVInt(); int maxGraphExpansions = input.readInt(); int options = input.readVInt(); boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPARATORS) != 0; boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0; boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0; sizeInBytes += fst.ramBytesUsed(); lookupMap.put(entry.getValue(), new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads, maxAnalyzedPathsForOneInput, fst)); } final long ramBytesUsed = sizeInBytes; return new LookupFactory() { @Override public Lookup getLookup(OldCompletionFieldMapper.CompletionFieldType fieldType, CompletionSuggestionContext suggestionContext) { AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(fieldType.names().indexName()); if (analyzingSuggestHolder == null) { return null; } int flags = analyzingSuggestHolder.getPreserveSeparator() ? XAnalyzingSuggester.PRESERVE_SEP : 0; final Automaton queryPrefix = fieldType.requiresContext() ? ContextQuery.toAutomaton(analyzingSuggestHolder.getPreserveSeparator(), suggestionContext.getContextQueries()) : null; XAnalyzingSuggester suggester; if (suggestionContext.isFuzzy()) { suggester = new XFuzzySuggester(fieldType.indexAnalyzer(), queryPrefix, fieldType.searchAnalyzer(), flags, analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, suggestionContext.getFuzzyEditDistance(), suggestionContext.isFuzzyTranspositions(), suggestionContext.getFuzzyPrefixLength(), suggestionContext.getFuzzyMinLength(), false, analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER); } else { suggester = new XAnalyzingSuggester(fieldType.indexAnalyzer(), queryPrefix, fieldType.searchAnalyzer(), flags, analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER); } return suggester; } @Override public CompletionStats stats(String... fields) { long sizeInBytes = 0; ObjectLongHashMap<String> completionFields = null; if (fields != null && fields.length > 0) { completionFields = new ObjectLongHashMap<>(fields.length); } for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) { sizeInBytes += entry.getValue().fst.ramBytesUsed(); if (fields == null || fields.length == 0) { continue; } for (String field : fields) { // support for getting fields by regex as in fielddata if (Regex.simpleMatch(field, entry.getKey())) { long fstSize = entry.getValue().fst.ramBytesUsed(); completionFields.addTo(field, fstSize); } } } return new CompletionStats(sizeInBytes, completionFields); } @Override AnalyzingSuggestHolder getAnalyzingSuggestHolder(MappedFieldType fieldType) { return lookupMap.get(fieldType.names().indexName()); } @Override public long ramBytesUsed() { return ramBytesUsed; } @Override public Collection<Accountable> getChildResources() { return Accountables.namedAccountables("field", lookupMap); } }; }
From source file:org.elasticsearch.search.suggest.completion2x.AnalyzingCompletionLookupProvider.java
License:Apache License
@Override public LookupFactory load(IndexInput input) throws IOException { long sizeInBytes = 0; int version = CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION_START, CODEC_VERSION_LATEST); if (version >= CODEC_VERSION_CHECKSUMS) { CodecUtil.checksumEntireFile(input); }/* w w w. j a v a 2s. com*/ final long metaPointerPosition = input.length() - (version >= CODEC_VERSION_CHECKSUMS ? 8 + CodecUtil.footerLength() : 8); final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<>(); input.seek(metaPointerPosition); long metaPointer = input.readLong(); input.seek(metaPointer); int numFields = input.readVInt(); Map<Long, String> meta = new TreeMap<>(); for (int i = 0; i < numFields; i++) { String name = input.readString(); long offset = input.readVLong(); meta.put(offset, name); } for (Map.Entry<Long, String> entry : meta.entrySet()) { input.seek(entry.getKey()); FST<Pair<Long, BytesRef>> fst = new FST<>(input, new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton())); int maxAnalyzedPathsForOneInput = input.readVInt(); int maxSurfaceFormsPerAnalyzedForm = input.readVInt(); int maxGraphExpansions = input.readInt(); int options = input.readVInt(); boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPARATORS) != 0; boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0; boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0; // first version did not include these three fields, so fall back to old default (before the analyzingsuggester // was updated in Lucene, so we cannot use the suggester defaults) int sepLabel, payloadSep, endByte, holeCharacter; switch (version) { case CODEC_VERSION_START: sepLabel = 0xFF; payloadSep = '\u001f'; endByte = 0x0; holeCharacter = '\u001E'; break; default: sepLabel = input.readVInt(); endByte = input.readVInt(); payloadSep = input.readVInt(); holeCharacter = input.readVInt(); } AnalyzingSuggestHolder holder = new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads, maxAnalyzedPathsForOneInput, fst, sepLabel, payloadSep, endByte, holeCharacter); sizeInBytes += fst.ramBytesUsed(); lookupMap.put(entry.getValue(), holder); } final long ramBytesUsed = sizeInBytes; return new LookupFactory() { @Override public Lookup getLookup(CompletionFieldMapper2x.CompletionFieldType fieldType, CompletionSuggestionContext suggestionContext) { AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(fieldType.name()); if (analyzingSuggestHolder == null) { return null; } int flags = analyzingSuggestHolder.getPreserveSeparator() ? XAnalyzingSuggester.PRESERVE_SEP : 0; final XAnalyzingSuggester suggester; final Automaton queryPrefix = fieldType.requiresContext() ? ContextQuery.toAutomaton(analyzingSuggestHolder.getPreserveSeparator(), suggestionContext.getContextQueries()) : null; final FuzzyOptions fuzzyOptions = suggestionContext.getFuzzyOptions(); if (fuzzyOptions != null) { suggester = new XFuzzySuggester(fieldType.indexAnalyzer(), queryPrefix, fieldType.searchAnalyzer(), flags, analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, fuzzyOptions.getEditDistance(), fuzzyOptions.isTranspositions(), fuzzyOptions.getFuzzyPrefixLength(), fuzzyOptions.getFuzzyMinLength(), fuzzyOptions.isUnicodeAware(), analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte, analyzingSuggestHolder.holeCharacter); } else { suggester = new XAnalyzingSuggester(fieldType.indexAnalyzer(), queryPrefix, fieldType.searchAnalyzer(), flags, analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte, analyzingSuggestHolder.holeCharacter); } return suggester; } @Override public CompletionStats stats(String... fields) { long sizeInBytes = 0; ObjectLongHashMap<String> completionFields = null; if (fields != null && fields.length > 0) { completionFields = new ObjectLongHashMap<>(fields.length); } for (Map.Entry<String, AnalyzingSuggestHolder> entry : lookupMap.entrySet()) { sizeInBytes += entry.getValue().fst.ramBytesUsed(); if (fields == null || fields.length == 0) { continue; } if (Regex.simpleMatch(fields, entry.getKey())) { long fstSize = entry.getValue().fst.ramBytesUsed(); completionFields.addTo(entry.getKey(), fstSize); } } return new CompletionStats(sizeInBytes, completionFields); } @Override AnalyzingSuggestHolder getAnalyzingSuggestHolder(MappedFieldType fieldType) { return lookupMap.get(fieldType.name()); } @Override public long ramBytesUsed() { return ramBytesUsed; } @Override public Collection<Accountable> getChildResources() { return Accountables.namedAccountables("field", lookupMap); } }; }