List of usage examples for org.apache.lucene.util.fst FST save
public void save(final Path path) throws IOException
From source file:org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProvider.java
License:Apache License
@Override public FieldsConsumer consumer(final IndexOutput output) throws IOException { CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION_LATEST); return new FieldsConsumer() { private Map<FieldInfo, Long> fieldOffsets = new HashMap<FieldInfo, Long>(); @Override// www . j a v a 2s . c om public void close() throws IOException { try { /* * write the offsets per field such that we know where * we need to load the FSTs from */ long pointer = output.getFilePointer(); output.writeVInt(fieldOffsets.size()); for (Map.Entry<FieldInfo, Long> entry : fieldOffsets.entrySet()) { output.writeString(entry.getKey().name); output.writeVLong(entry.getValue()); } output.writeLong(pointer); output.flush(); } finally { IOUtils.close(output); } } @Override public TermsConsumer addField(final FieldInfo field) throws IOException { return new TermsConsumer() { final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder( maxSurfaceFormsPerAnalyzedForm, hasPayloads, XAnalyzingSuggester.PAYLOAD_SEP); final CompletionPostingsConsumer postingsConsumer = new CompletionPostingsConsumer( AnalyzingCompletionLookupProvider.this, builder); @Override public PostingsConsumer startTerm(BytesRef text) throws IOException { builder.startTerm(text); return postingsConsumer; } @Override public Comparator<BytesRef> getComparator() throws IOException { return BytesRef.getUTF8SortedAsUnicodeComparator(); } @Override public void finishTerm(BytesRef text, TermStats stats) throws IOException { builder.finishTerm(stats.docFreq); // use doc freq as a fallback } @Override public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException { /* * Here we are done processing the field and we can * buid the FST and write it to disk. */ FST<Pair<Long, BytesRef>> build = builder.build(); assert build != null || docCount == 0 : "the FST is null but docCount is != 0 actual value: [" + docCount + "]"; /* * it's possible that the FST is null if we have 2 segments that get merged * and all docs that have a value in this field are deleted. This will cause * a consumer to be created but it doesn't consume any values causing the FSTBuilder * to return null. */ if (build != null) { fieldOffsets.put(field, output.getFilePointer()); build.save(output); /* write some more meta-info */ output.writeVInt(postingsConsumer.getMaxAnalyzedPathsForOneInput()); output.writeVInt(maxSurfaceFormsPerAnalyzedForm); output.writeInt(maxGraphExpansions); // can be negative int options = 0; options |= preserveSep ? SERIALIZE_PRESERVE_SEPERATORS : 0; options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0; options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0; output.writeVInt(options); output.writeVInt(XAnalyzingSuggester.SEP_LABEL); output.writeVInt(XAnalyzingSuggester.END_BYTE); output.writeVInt(XAnalyzingSuggester.PAYLOAD_SEP); output.writeVInt(XAnalyzingSuggester.HOLE_CHARACTER); } } }; } }; }
From source file:org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProviderV1.java
License:Apache License
@Override public FieldsConsumer consumer(final IndexOutput output) throws IOException { CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION); return new FieldsConsumer() { private Map<FieldInfo, Long> fieldOffsets = new HashMap<FieldInfo, Long>(); @Override//from w w w . j av a 2 s . c om public void close() throws IOException { try { /* * write the offsets per field such that we know where * we need to load the FSTs from */ long pointer = output.getFilePointer(); output.writeVInt(fieldOffsets.size()); for (Map.Entry<FieldInfo, Long> entry : fieldOffsets.entrySet()) { output.writeString(entry.getKey().name); output.writeVLong(entry.getValue()); } output.writeLong(pointer); output.flush(); } finally { IOUtils.close(output); } } @Override public TermsConsumer addField(final FieldInfo field) throws IOException { return new TermsConsumer() { final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder( maxSurfaceFormsPerAnalyzedForm, hasPayloads, PAYLOAD_SEP); final CompletionPostingsConsumer postingsConsumer = new CompletionPostingsConsumer( AnalyzingCompletionLookupProviderV1.this, builder); @Override public PostingsConsumer startTerm(BytesRef text) throws IOException { builder.startTerm(text); return postingsConsumer; } @Override public Comparator<BytesRef> getComparator() throws IOException { return BytesRef.getUTF8SortedAsUnicodeComparator(); } @Override public void finishTerm(BytesRef text, TermStats stats) throws IOException { builder.finishTerm(stats.docFreq); // use doc freq as a fallback } @Override public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException { /* * Here we are done processing the field and we can * buid the FST and write it to disk. */ FST<Pair<Long, BytesRef>> build = builder.build(); assert build != null || docCount == 0 : "the FST is null but docCount is != 0 actual value: [" + docCount + "]"; /* * it's possible that the FST is null if we have 2 segments that get merged * and all docs that have a value in this field are deleted. This will cause * a consumer to be created but it doesn't consume any values causing the FSTBuilder * to return null. */ if (build != null) { fieldOffsets.put(field, output.getFilePointer()); build.save(output); /* write some more meta-info */ output.writeVInt(postingsConsumer.getMaxAnalyzedPathsForOneInput()); output.writeVInt(maxSurfaceFormsPerAnalyzedForm); output.writeInt(maxGraphExpansions); // can be negative int options = 0; options |= preserveSep ? SERIALIZE_PRESERVE_SEPERATORS : 0; options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0; options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0; output.writeVInt(options); } } }; } }; }
From source file:org.elasticsearch.search.suggest.completion.old.AnalyzingCompletionLookupProvider.java
License:Apache License
@Override public FieldsConsumer consumer(final IndexOutput output) throws IOException { CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION_LATEST); return new FieldsConsumer() { private Map<String, Long> fieldOffsets = new HashMap<>(); @Override//from w ww. jav a 2s. c o m public void close() throws IOException { try { /* * write the offsets per field such that we know where * we need to load the FSTs from */ long pointer = output.getFilePointer(); output.writeVInt(fieldOffsets.size()); for (Map.Entry<String, Long> entry : fieldOffsets.entrySet()) { output.writeString(entry.getKey()); output.writeVLong(entry.getValue()); } output.writeLong(pointer); CodecUtil.writeFooter(output); } finally { IOUtils.close(output); } } @Override public void write(Fields fields) throws IOException { for (String field : fields) { Terms terms = fields.terms(field); if (terms == null) { continue; } TermsEnum termsEnum = terms.iterator(); PostingsEnum docsEnum = null; final SuggestPayload spare = new SuggestPayload(); int maxAnalyzedPathsForOneInput = 0; final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder( maxSurfaceFormsPerAnalyzedForm, hasPayloads, XAnalyzingSuggester.PAYLOAD_SEP); int docCount = 0; while (true) { BytesRef term = termsEnum.next(); if (term == null) { break; } docsEnum = termsEnum.postings(null, docsEnum, PostingsEnum.PAYLOADS); builder.startTerm(term); int docFreq = 0; while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { for (int i = 0; i < docsEnum.freq(); i++) { final int position = docsEnum.nextPosition(); AnalyzingCompletionLookupProvider.this.parsePayload(docsEnum.getPayload(), spare); builder.addSurface(spare.surfaceForm.get(), spare.payload.get(), spare.weight); // multi fields have the same surface form so we sum up here maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1); } docFreq++; docCount = Math.max(docCount, docsEnum.docID() + 1); } builder.finishTerm(docFreq); } /* * Here we are done processing the field and we can * buid the FST and write it to disk. */ FST<Pair<Long, BytesRef>> build = builder.build(); assert build != null || docCount == 0 : "the FST is null but docCount is != 0 actual value: [" + docCount + "]"; /* * it's possible that the FST is null if we have 2 segments that get merged * and all docs that have a value in this field are deleted. This will cause * a consumer to be created but it doesn't consume any values causing the FSTBuilder * to return null. */ if (build != null) { fieldOffsets.put(field, output.getFilePointer()); build.save(output); /* write some more meta-info */ output.writeVInt(maxAnalyzedPathsForOneInput); output.writeVInt(maxSurfaceFormsPerAnalyzedForm); output.writeInt(maxGraphExpansions); // can be negative int options = 0; options |= preserveSep ? SERIALIZE_PRESERVE_SEPARATORS : 0; options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0; options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0; output.writeVInt(options); output.writeVInt(XAnalyzingSuggester.SEP_LABEL); output.writeVInt(XAnalyzingSuggester.END_BYTE); output.writeVInt(XAnalyzingSuggester.PAYLOAD_SEP); output.writeVInt(XAnalyzingSuggester.HOLE_CHARACTER); } } } }; }
From source file:org.elasticsearch.search.suggest.completion.old.AnalyzingCompletionLookupProviderV1.java
License:Apache License
@Override public FieldsConsumer consumer(final IndexOutput output) throws IOException { // TODO write index header? CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION); return new FieldsConsumer() { private Map<String, Long> fieldOffsets = new HashMap<>(); @Override/*from w w w . ja v a 2 s .c o m*/ public void close() throws IOException { try { /* * write the offsets per field such that we know where * we need to load the FSTs from */ long pointer = output.getFilePointer(); output.writeVInt(fieldOffsets.size()); for (Map.Entry<String, Long> entry : fieldOffsets.entrySet()) { output.writeString(entry.getKey()); output.writeVLong(entry.getValue()); } output.writeLong(pointer); } finally { IOUtils.close(output); } } @Override public void write(Fields fields) throws IOException { for (String field : fields) { Terms terms = fields.terms(field); if (terms == null) { continue; } TermsEnum termsEnum = terms.iterator(); PostingsEnum docsEnum = null; final SuggestPayload spare = new SuggestPayload(); int maxAnalyzedPathsForOneInput = 0; final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder( maxSurfaceFormsPerAnalyzedForm, hasPayloads, XAnalyzingSuggester.PAYLOAD_SEP); int docCount = 0; while (true) { BytesRef term = termsEnum.next(); if (term == null) { break; } docsEnum = termsEnum.postings(null, docsEnum, PostingsEnum.PAYLOADS); builder.startTerm(term); int docFreq = 0; while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { for (int i = 0; i < docsEnum.freq(); i++) { final int position = docsEnum.nextPosition(); AnalyzingCompletionLookupProviderV1.this.parsePayload(docsEnum.getPayload(), spare); builder.addSurface(spare.surfaceForm.get(), spare.payload.get(), spare.weight); // multi fields have the same surface form so we sum up here maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1); } docFreq++; docCount = Math.max(docCount, docsEnum.docID() + 1); } builder.finishTerm(docFreq); } /* * Here we are done processing the field and we can * buid the FST and write it to disk. */ FST<Pair<Long, BytesRef>> build = builder.build(); assert build != null || docCount == 0 : "the FST is null but docCount is != 0 actual value: [" + docCount + "]"; /* * it's possible that the FST is null if we have 2 segments that get merged * and all docs that have a value in this field are deleted. This will cause * a consumer to be created but it doesn't consume any values causing the FSTBuilder * to return null. */ if (build != null) { fieldOffsets.put(field, output.getFilePointer()); build.save(output); /* write some more meta-info */ output.writeVInt(maxAnalyzedPathsForOneInput); output.writeVInt(maxSurfaceFormsPerAnalyzedForm); output.writeInt(maxGraphExpansions); // can be negative int options = 0; options |= preserveSep ? SERIALIZE_PRESERVE_SEPARATORS : 0; options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0; options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0; output.writeVInt(options); } } } }; }
From source file:org.elasticsearch.search.suggest.completion2x.AnalyzingCompletionLookupProvider.java
License:Apache License
@Override public FieldsConsumer consumer(final IndexOutput output) throws IOException { CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION_LATEST); return new FieldsConsumer() { private Map<String, Long> fieldOffsets = new HashMap<>(); @Override/*w ww . ja v a2 s . c om*/ public void close() throws IOException { try { /* * write the offsets per field such that we know where * we need to load the FSTs from */ long pointer = output.getFilePointer(); output.writeVInt(fieldOffsets.size()); for (Map.Entry<String, Long> entry : fieldOffsets.entrySet()) { output.writeString(entry.getKey()); output.writeVLong(entry.getValue()); } output.writeLong(pointer); CodecUtil.writeFooter(output); } finally { IOUtils.close(output); } } @Override public void write(Fields fields) throws IOException { for (String field : fields) { Terms terms = fields.terms(field); if (terms == null) { continue; } TermsEnum termsEnum = terms.iterator(); PostingsEnum docsEnum = null; final SuggestPayload spare = new SuggestPayload(); int maxAnalyzedPathsForOneInput = 0; final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder( maxSurfaceFormsPerAnalyzedForm, hasPayloads, XAnalyzingSuggester.PAYLOAD_SEP); int docCount = 0; while (true) { BytesRef term = termsEnum.next(); if (term == null) { break; } docsEnum = termsEnum.postings(docsEnum, PostingsEnum.PAYLOADS); builder.startTerm(term); int docFreq = 0; while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { for (int i = 0; i < docsEnum.freq(); i++) { final int position = docsEnum.nextPosition(); AnalyzingCompletionLookupProvider.this.parsePayload(docsEnum.getPayload(), spare); builder.addSurface(spare.surfaceForm.get(), spare.payload.get(), spare.weight); // multi fields have the same surface form so we sum up here maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1); } docFreq++; docCount = Math.max(docCount, docsEnum.docID() + 1); } builder.finishTerm(docFreq); } /* * Here we are done processing the field and we can * buid the FST and write it to disk. */ FST<Pair<Long, BytesRef>> build = builder.build(); assert build != null || docCount == 0 : "the FST is null but docCount is != 0 actual value: [" + docCount + "]"; /* * it's possible that the FST is null if we have 2 segments that get merged * and all docs that have a value in this field are deleted. This will cause * a consumer to be created but it doesn't consume any values causing the FSTBuilder * to return null. */ if (build != null) { fieldOffsets.put(field, output.getFilePointer()); build.save(output); /* write some more meta-info */ output.writeVInt(maxAnalyzedPathsForOneInput); output.writeVInt(maxSurfaceFormsPerAnalyzedForm); output.writeInt(maxGraphExpansions); // can be negative int options = 0; options |= preserveSep ? SERIALIZE_PRESERVE_SEPARATORS : 0; options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0; options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0; output.writeVInt(options); output.writeVInt(XAnalyzingSuggester.SEP_LABEL); output.writeVInt(XAnalyzingSuggester.END_BYTE); output.writeVInt(XAnalyzingSuggester.PAYLOAD_SEP); output.writeVInt(XAnalyzingSuggester.HOLE_CHARACTER); } } } }; }