Java tutorial
/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.codelibs.elasticsearch.search.suggest.completion2x; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Fields; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.Accountable; import org.apache.lucene.util.Accountables; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.PairOutputs.Pair; import org.codelibs.elasticsearch.search.suggest.completion2x.Completion090PostingsFormat.CompletionLookupProvider; import org.codelibs.elasticsearch.search.suggest.completion2x.Completion090PostingsFormat.LookupFactory; import java.io.IOException; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.Set; public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider { // for serialization public static final int SERIALIZE_PRESERVE_SEPARATORS = 1; public static final int SERIALIZE_HAS_PAYLOADS = 2; public static final int SERIALIZE_PRESERVE_POSITION_INCREMENTS = 4; private static final int MAX_SURFACE_FORMS_PER_ANALYZED_FORM = 256; private static final int MAX_GRAPH_EXPANSIONS = -1; public static final String CODEC_NAME = "analyzing"; public static final int CODEC_VERSION_START = 1; public static final int CODEC_VERSION_SERIALIZED_LABELS = 2; public static final int CODEC_VERSION_CHECKSUMS = 3; public static final int CODEC_VERSION_LATEST = CODEC_VERSION_CHECKSUMS; private final boolean preserveSep; private final boolean preservePositionIncrements; private final boolean hasPayloads; // private final XAnalyzingSuggester prototype; public AnalyzingCompletionLookupProvider(boolean preserveSep, boolean preservePositionIncrements, boolean hasPayloads) { this.preserveSep = preserveSep; this.preservePositionIncrements = preservePositionIncrements; this.hasPayloads = hasPayloads; throw new UnsupportedOperationException("QueryBuilders does not support this operation."); /*int options = preserveSep ? XAnalyzingSuggester.PRESERVE_SEP : 0; // needs to fixed in the suggester first before it can be supported //options |= exactFirst ? XAnalyzingSuggester.EXACT_FIRST : 0; prototype = new XAnalyzingSuggester(null, null, null, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements, null, false, 1, XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP, XAnalyzingSuggester.END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER);*/ } @Override public String getName() { return "analyzing"; } public boolean getPreserveSep() { return preserveSep; } public boolean getPreservePositionsIncrements() { return preservePositionIncrements; } public boolean hasPayloads() { return hasPayloads; } @Override public FieldsConsumer consumer(final IndexOutput output) throws IOException { CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION_LATEST); return new FieldsConsumer() { private Map<String, Long> fieldOffsets = new HashMap<>(); @Override public void close() throws IOException { try { /* * write the offsets per field such that we know where * we need to load the FSTs from */ long pointer = output.getFilePointer(); output.writeVInt(fieldOffsets.size()); for (Map.Entry<String, Long> entry : fieldOffsets.entrySet()) { output.writeString(entry.getKey()); output.writeVLong(entry.getValue()); } output.writeLong(pointer); CodecUtil.writeFooter(output); } finally { IOUtils.close(output); } } @Override public void write(Fields fields) throws IOException { for (String field : fields) { Terms terms = fields.terms(field); if (terms == null) { continue; } terms.iterator(); new SuggestPayload(); throw new UnsupportedOperationException("QueryBuilders does not support this operation."); // final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder( // maxSurfaceFormsPerAnalyzedForm, hasPayloads, XAnalyzingSuggester.PAYLOAD_SEP); // int docCount = 0; // while (true) { // BytesRef term = termsEnum.next(); // if (term == null) { // break; // } // docsEnum = termsEnum.postings(docsEnum, PostingsEnum.PAYLOADS); // builder.startTerm(term); // int docFreq = 0; // while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { // for (int i = 0; i < docsEnum.freq(); i++) { // final int position = docsEnum.nextPosition(); // AnalyzingCompletionLookupProvider.this.parsePayload(docsEnum.getPayload(), spare); // builder.addSurface(spare.surfaceForm.get(), spare.payload.get(), spare.weight); // // multi fields have the same surface form so we sum up here // maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1); // } // docFreq++; // docCount = Math.max(docCount, docsEnum.docID()+1); // } // builder.finishTerm(docFreq); // } // /* // * Here we are done processing the field and we can // * buid the FST and write it to disk. // */ // FST<Pair<Long, BytesRef>> build = builder.build(); // assert build != null || docCount == 0: "the FST is null but docCount is != 0 actual value: [" + docCount + "]"; // /* // * it's possible that the FST is null if we have 2 segments that get merged // * and all docs that have a value in this field are deleted. This will cause // * a consumer to be created but it doesn't consume any values causing the FSTBuilder // * to return null. // */ // if (build != null) { // fieldOffsets.put(field, output.getFilePointer()); // build.save(output); // /* write some more meta-info */ // output.writeVInt(maxAnalyzedPathsForOneInput); // output.writeVInt(maxSurfaceFormsPerAnalyzedForm); // output.writeInt(maxGraphExpansions); // can be negative // int options = 0; // options |= preserveSep ? SERIALIZE_PRESERVE_SEPARATORS : 0; // options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0; // options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0; // output.writeVInt(options); // output.writeVInt(XAnalyzingSuggester.SEP_LABEL); // output.writeVInt(XAnalyzingSuggester.END_BYTE); // output.writeVInt(XAnalyzingSuggester.PAYLOAD_SEP); // output.writeVInt(XAnalyzingSuggester.HOLE_CHARACTER); // } } } }; } @Override public LookupFactory load(IndexInput input) throws IOException { throw new UnsupportedOperationException(); } static class AnalyzingSuggestHolder implements Accountable { final boolean preserveSep; final boolean preservePositionIncrements; final int maxSurfaceFormsPerAnalyzedForm; final int maxGraphExpansions; final boolean hasPayloads; final int maxAnalyzedPathsForOneInput; final FST<Pair<Long, BytesRef>> fst; final int sepLabel; final int payloadSep; final int endByte; final int holeCharacter; public AnalyzingSuggestHolder(boolean preserveSep, boolean preservePositionIncrements, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, boolean hasPayloads, int maxAnalyzedPathsForOneInput, FST<Pair<Long, BytesRef>> fst, int sepLabel, int payloadSep, int endByte, int holeCharacter) { this.preserveSep = preserveSep; this.preservePositionIncrements = preservePositionIncrements; this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm; this.maxGraphExpansions = maxGraphExpansions; this.hasPayloads = hasPayloads; this.maxAnalyzedPathsForOneInput = maxAnalyzedPathsForOneInput; this.fst = fst; this.sepLabel = sepLabel; this.payloadSep = payloadSep; this.endByte = endByte; this.holeCharacter = holeCharacter; } public boolean getPreserveSeparator() { return preserveSep; } public boolean getPreservePositionIncrements() { return preservePositionIncrements; } public boolean hasPayloads() { return hasPayloads; } @Override public long ramBytesUsed() { if (fst != null) { return fst.ramBytesUsed(); } else { return 0; } } @Override public Collection<Accountable> getChildResources() { if (fst != null) { return Collections.singleton(Accountables.namedAccountable("fst", fst)); } else { return Collections.emptyList(); } } } @Override public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { throw new UnsupportedOperationException("QueryBuilders does not support this operation."); // return prototype.toFiniteStrings(stream); } }