sh.isaac.provider.query.lucene.indexers.SemanticIndexer.java Source code

Java tutorial

Introduction

Here is the source code for sh.isaac.provider.query.lucene.indexers.SemanticIndexer.java

Source

/* 
 * Licensed under the Apache License, Version 2.0 (the "License");
 *
 * You may not use this file except in compliance with the License.
 *
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * Contributions from 2013-2017 where performed either by US government 
 * employees, or under US Veterans Health Administration contracts. 
 *
 * US Veterans Health Administration contributions by government employees
 * are work of the U.S. Government and are not subject to copyright
 * protection in the United States. Portions contributed by government 
 * employees are USGovWork (17USC 105). Not subject to copyright. 
 * 
 * Contribution by contractors to the US Veterans Health Administration
 * during this period are contractually contributed under the
 * Apache License, Version 2.0.
 *
 * See: https://www.usa.gov/government-works
 * 
 * Contributions prior to 2013:
 *
 * Copyright (C) International Health Terminology Standards Development Organisation.
 * Licensed under the Apache License, Version 2.0.
 *
 */

package sh.isaac.provider.query.lucene.indexers;

//~--- JDK imports ------------------------------------------------------------

import java.io.IOException;

import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.UUID;
import java.util.concurrent.Future;

import javax.inject.Inject;

//~--- non-JDK imports --------------------------------------------------------

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.mahout.math.set.OpenIntHashSet;

import sh.isaac.api.chronicle.VersionType;
import sh.isaac.api.index.SearchResult;
import sh.isaac.api.logic.LogicNode;
import sh.isaac.model.semantic.types.DynamicLongImpl;
import sh.isaac.model.semantic.types.DynamicNidImpl;
import sh.isaac.model.semantic.types.DynamicStringImpl;
import sh.isaac.provider.query.lucene.LuceneIndexer;
import sh.isaac.provider.query.lucene.PerFieldAnalyzer;
import sh.isaac.api.chronicle.Chronology;
import sh.isaac.api.component.semantic.version.ComponentNidVersion;
import sh.isaac.api.component.semantic.version.LogicGraphVersion;
import sh.isaac.api.component.semantic.version.LongVersion;
import sh.isaac.api.component.semantic.version.StringVersion;
import sh.isaac.api.component.semantic.SemanticChronology;
import sh.isaac.api.component.semantic.version.DynamicVersion;
import sh.isaac.api.component.semantic.version.dynamic.DynamicData;
import sh.isaac.api.component.semantic.version.dynamic.types.DynamicArray;
import sh.isaac.api.component.semantic.version.dynamic.types.DynamicBoolean;
import sh.isaac.api.component.semantic.version.dynamic.types.DynamicByteArray;
import sh.isaac.api.component.semantic.version.dynamic.types.DynamicDouble;
import sh.isaac.api.component.semantic.version.dynamic.types.DynamicFloat;
import sh.isaac.api.component.semantic.version.dynamic.types.DynamicInteger;
import sh.isaac.api.component.semantic.version.dynamic.types.DynamicLong;
import sh.isaac.api.component.semantic.version.dynamic.types.DynamicNid;
import sh.isaac.api.component.semantic.version.dynamic.types.DynamicPolymorphic;
import sh.isaac.api.component.semantic.version.dynamic.types.DynamicSequence;
import sh.isaac.api.component.semantic.version.dynamic.types.DynamicString;
import sh.isaac.api.component.semantic.version.dynamic.types.DynamicUUID;
import sh.isaac.api.tree.TreeNodeVisitData;

//~--- classes ----------------------------------------------------------------

/**
 * This class provides indexing for all String, Nid, Long and Logic Graph sememe types.
 *
 * Additionally, this class provides flexible indexing of all DynamicVersion data types.
 *
 * @author kec
 * @author <a href="mailto:daniel.armbrust.list@gmail.com">Dan Armbrust</a>
 * 
 * TODO much of this functionality has been replaced by the single assemblage indexer. 
 * Need to see what aspects of the Dynamic data types need to be migrated. 
 */
//@Service(name = "sememe indexer")
//@RunLevel(value = 2)
public class SemanticIndexer extends LuceneIndexer {
    /** The Constant LOG. */
    private static final Logger LOG = LogManager.getLogger();

    /** The Constant INDEX_NAME. */
    public static final String INDEX_NAME = "semantics";

    /** The Constant COLUMN_FIELD_DATA. */
    private static final String COLUMN_FIELD_DATA = "colData";

    //~--- fields --------------------------------------------------------------
    // TODO persist dataStoreId.
    private final UUID dataStoreId = UUID.randomUUID();

    @Override
    public UUID getDataStoreId() {
        return dataStoreId;
    }

    /** The lric. */
    @Inject
    private SemanticIndexerConfiguration lric;

    //~--- constructors --------------------------------------------------------

    /**
     * Instantiates a new semantic indexer.
     *
     * @throws IOException Signals that an I/O exception has occurred.
     */
    private SemanticIndexer() throws IOException {
        // For HK2
        super(INDEX_NAME);
    }

    //~--- methods -------------------------------------------------------------

    /**
     * Search for matches to the specified nid. Note that in the current implementation, you will only find matches to sememes
     * of type {@link VersionType#COMPONENT_NID} or {@link VersionType#LOGIC_GRAPH}.
     *
     * This only supports nids, not sequences.
     *
     * If searching a component nid sememe, this will only match on the attached component nid value.  It will not match
     * on the assemblage concept, nor the referenced component nid.  Those can be found directly via standard sememe APIs.
     * If searching a logic graph sememe, it will find a match in any concept that is involved in the graph, except for the
     * root concept.
     *
     * @param nid the id reference to search for
     * @param assemblageConceptNids the assemblages to include in the search
     * @param searchColumns (optional) limit the search to the specified columns of attached data.  May ONLY be provided if
    ONE and only one assemblageConceptNids is provided.  May not be provided if 0 or more than 1 assemblageConceptNids values are provided.
     * @param sizeLimit The maximum size of the result list.
     * @param targetGeneration target generation that must be included in the search or Long.MIN_VALUE if there is no need
     * to wait for a target generation. Long.MAX_VALUE can be passed in to force this query to wait until any in-progress
     * indexing operations are completed - and then use the latest index.
     * @return a List of {@code SearchResult} that contains the nid of the component that matched, and the score of that
     * match relative to other matches. Note that scores are pointless for exact id matches - they will all be the same.
     */
    public List<SearchResult> query(int nid, int[] assemblageConceptNids, Integer[] searchColumns, int sizeLimit,
            Long targetGeneration) {
        final Query q = new QueryWrapperForColumnHandling() {
            @Override
            Query buildQuery(String columnName) {
                return new TermQuery(new Term(columnName + PerFieldAnalyzer.WHITE_SPACE_FIELD_MARKER, nid + ""));
            }
        }.buildColumnHandlingQuery(assemblageConceptNids, searchColumns);

        return search(restrictToSemantic(q, assemblageConceptNids), sizeLimit, targetGeneration, null);
    }

    /**
     * A convenience method.
     *
     * Search DynamicData columns, treating them as text - and handling the search in the same mechanism as if this were a
    call to the method {@link LuceneIndexer#query(String, boolean, Integer, int, long)}
     *
     * Calls the method {@link #query(DynamicSememeDataBI, Integer, boolean, Integer[], int, long) with a null parameter for
     * the searchColumns, and wraps the queryString into a DynamicSememeString.
     *
     * @param queryString the query string
     * @param prefixSearch the prefix search
     * @param assemblageConceptNids the assemblages to include in the search. Null is a wildcard. 
     * @param sizeLimit the size limit
     * @param targetGeneration the target generation
     * @return the list
     */
    @Override
    public final List<SearchResult> query(String queryString, boolean prefixSearch, int[] assemblageConceptNids,
            int sizeLimit, Long targetGeneration) {
        return query(new DynamicStringImpl(queryString), prefixSearch, assemblageConceptNids, null, sizeLimit,
                targetGeneration);
    }

    /**
     * Query.
     *
     * @param queryData - The query data object (string, int, etc)
     * @param prefixSearch see {@link LuceneIndexer#query(String, boolean, ComponentProperty, int, Long)} for a description.  Only applicable
     * when the queryData type is string.  Ignored for all other data types.
     * @param assemblageConceptNids (optional) limit the search to the specified assemblage
     * @param searchColumns (optional) limit the search to the specified columns of attached data.  May ONLY be provided if
    ONE and only one assemblageConceptNids is provided.  May not be provided if 0 or more than 1 assemblageConceptNids values are provided.
     * @param sizeLimit the size limit
     * @param targetGeneration (optional) wait for an index to build, or null to not wait
     * @return the list
     */

    // TODO fix this limitation on the column restriction...
    public final List<SearchResult> query(final DynamicData queryData, final boolean prefixSearch,
            int[] assemblageConceptNids, Integer[] searchColumns, int sizeLimit, Long targetGeneration) {
        Query q = null;

        if (queryData instanceof DynamicString) {
            q = new QueryWrapperForColumnHandling() {
                @Override
                Query buildQuery(String columnName) {
                    // This is the only query type that needs tokenizing, etc.
                    String queryString = ((DynamicString) queryData).getDataString();

                    // '-' signs are operators to lucene... but we want to allow nid lookups.  So escape any leading hyphens
                    // and any hyphens that are preceeded by spaces.  This way, we don't mess up UUID handling.
                    // (lucene handles UUIDs ok, because the - sign is only treated special at the beginning, or when preceeded by a space)
                    if (queryString.startsWith("-")) {
                        queryString = "\\" + queryString;
                    }

                    queryString = queryString.replaceAll("\\s-", " \\\\-");
                    LOG.debug("Modified search string is: ''{}''", queryString);
                    return buildTokenizedStringQuery(queryString, columnName, prefixSearch);
                }
            }.buildColumnHandlingQuery(assemblageConceptNids, searchColumns);
        } else {
            if ((queryData instanceof DynamicBoolean) || (queryData instanceof DynamicNid)
                    || (queryData instanceof DynamicUUID)) {
                q = new QueryWrapperForColumnHandling() {
                    @Override
                    Query buildQuery(String columnName) {
                        return new TermQuery(new Term(columnName + PerFieldAnalyzer.WHITE_SPACE_FIELD_MARKER,
                                queryData.getDataObject().toString()));
                    }
                }.buildColumnHandlingQuery(assemblageConceptNids, searchColumns);
            } else if ((queryData instanceof DynamicDouble) || (queryData instanceof DynamicFloat)
                    || (queryData instanceof DynamicInteger) || (queryData instanceof DynamicLong)
                    || (queryData instanceof DynamicSequence)) {
                q = new QueryWrapperForColumnHandling() {
                    @Override
                    Query buildQuery(String columnName) {
                        Query temp = buildNumericQuery(queryData, queryData, columnName);

                        if (((queryData instanceof DynamicLong) && ((DynamicLong) queryData).getDataLong() < 0)
                                || ((queryData instanceof DynamicInteger)
                                        && ((DynamicInteger) queryData).getDataInteger() < 0)) {
                            // Looks like a nid... wrap in an or clause that would do a match on the exact term if it was indexed as a nid, rather than a numeric
                            final BooleanQuery.Builder wrapper = new BooleanQuery.Builder();

                            wrapper.add(new TermQuery(new Term(columnName, queryData.getDataObject().toString())),
                                    Occur.SHOULD);
                            wrapper.add(temp, Occur.SHOULD);
                            temp = wrapper.build();
                        }

                        return temp;
                    }
                }.buildColumnHandlingQuery(assemblageConceptNids, searchColumns);
            } else if (queryData instanceof DynamicByteArray) {
                throw new RuntimeException("DynamicSememeByteArray isn't indexed");
            } else if (queryData instanceof DynamicPolymorphic) {
                throw new RuntimeException("This should have been impossible (polymorphic?)");
            } else if (queryData instanceof DynamicArray) {
                throw new RuntimeException("DynamicSememeArray isn't a searchable type");
            } else {
                LOG.error("This should have been impossible (no match on col type)");
                throw new RuntimeException("unexpected error, see logs");
            }
        }

        return search(restrictToSemantic(q, assemblageConceptNids), sizeLimit, targetGeneration, null);
    }

    /**
     * Adds the fields.
     *
     * @param chronicle the chronicle
     * @param doc the doc
     */
    @Override
    protected void addFields(Chronology chronicle, Document doc) {
        final SemanticChronology semanticChronology = (SemanticChronology) chronicle;

        doc.add(new TextField(FIELD_SEMANTIC_ASSEMBLAGE_SEQUENCE, semanticChronology.getAssemblageNid() + "",
                Field.Store.NO));

        for (final Object sv : semanticChronology.getVersionList()) {
            if (sv instanceof DynamicVersion) {
                final DynamicVersion dsv = (DynamicVersion) sv;
                final Integer[] columns = this.lric.whatColumnsToIndex(dsv.getAssemblageNid());

                if (columns != null) {
                    final int dataColCount = dsv.getData().length;

                    for (final int col : columns) {
                        final DynamicData dataCol = (col >= dataColCount) ? null : dsv.getData(col);

                        // Only pass in a column number if we were asked to index more than one column for this sememe
                        handleType(doc, dataCol, (columns.length > 1) ? col : -1);
                    }
                }
            }

            // TODO enhance the index configuration to allow us to configure Static sememes as indexed, or not indexed
            // static sememe types are never more than 1 column, always pass -1
            else if (sv instanceof StringVersion) {
                final StringVersion ssv = (StringVersion) sv;

                handleType(doc, new DynamicStringImpl(ssv.getString()), -1);
                incrementIndexedItemCount("Sememe String");
            } else if (sv instanceof LongVersion) {
                final LongVersion lsv = (LongVersion) sv;

                handleType(doc, new DynamicLongImpl(lsv.getLongValue()), -1);
                incrementIndexedItemCount("Sememe Long");
            } else if (sv instanceof ComponentNidVersion) {
                final ComponentNidVersion csv = (ComponentNidVersion) sv;

                handleType(doc, new DynamicNidImpl(csv.getComponentNid()), -1);
                incrementIndexedItemCount("Sememe Component Nid");
            } else if (sv instanceof LogicGraphVersion) {
                final LogicGraphVersion lgsv = (LogicGraphVersion) sv;
                final OpenIntHashSet css = new OpenIntHashSet();

                lgsv.getLogicalExpression().processDepthFirst((LogicNode logicNode, TreeNodeVisitData data) -> {
                    logicNode.addConceptsReferencedByNode(css);
                });
                css.forEachKey(sequence -> {
                    handleType(doc, new DynamicNidImpl(sequence), -1);
                    return true;
                });
            } else {
                LOG.error(
                        "Unexpected type handed to addFields in Sememe Indexer: " + semanticChronology.toString());
            }
        }

        // Due to indexing all of the versions, we may have added duplicate field name/value combinations to the document.
        // Remove the dupes.
        final Iterator<IndexableField> it = doc.iterator();
        final HashSet<String> uniqueFields = new HashSet<>();

        while (it.hasNext()) {
            final IndexableField field = it.next();
            final String temp = field.name() + "::" + field.stringValue();

            if (uniqueFields.contains(temp)) {
                it.remove();
            } else {
                uniqueFields.add(temp);
            }
        }
    }

    /**
     * Index chronicle.
     *
     * @param chronicle the chronicle
     * @return true, if successful
     */
    @Override
    protected boolean indexChronicle(Chronology chronicle) {
        if (chronicle instanceof SemanticChronology) {
            final SemanticChronology semanticChronology = (SemanticChronology) chronicle;

            if ((semanticChronology.getVersionType() == VersionType.DYNAMIC)
                    || (semanticChronology.getVersionType() == VersionType.STRING)
                    || (semanticChronology.getVersionType() == VersionType.LONG)
                    || (semanticChronology.getVersionType() == VersionType.COMPONENT_NID)
                    || (semanticChronology.getVersionType() == VersionType.LOGIC_GRAPH)) {
                return true;
            }
        }

        return false;
    }

    /**
     * Builds the numeric query.
     *
     * @param queryDataLower the query data lower
     * @param queryDataLowerInclusive the query data lower inclusive
     * @param queryDataUpper the query data upper
     * @param queryDataUpperInclusive the query data upper inclusive
     * @param columnName the column name
     * @return the query
     */
    private Query buildNumericQuery(DynamicData queryDataLower, DynamicData queryDataUpper, String columnName) {
        // Convert both to the same type (if they differ) - go largest data type to smallest, so we don't lose precision
        // Also - if they pass in longs that would fit in an int, also generate an int query.
        // likewise, with Double - if they pass in a double, that would fit in a float, also generate a float query.
        try {
            final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
            boolean fitsInFloat = false;
            boolean fitsInInt = false;

            if ((queryDataLower instanceof DynamicDouble) || (queryDataUpper instanceof DynamicDouble)) {
                final Double upperVal = ((queryDataUpper == null) ? null
                        : ((queryDataUpper instanceof DynamicDouble)
                                ? ((DynamicDouble) queryDataUpper).getDataDouble()
                                : ((Number) queryDataUpper.getDataObject()).doubleValue()));
                final Double lowerVal = ((queryDataLower == null) ? null
                        : ((queryDataLower instanceof DynamicDouble)
                                ? ((DynamicDouble) queryDataLower).getDataDouble()
                                : ((Number) queryDataLower.getDataObject()).doubleValue()));

                bqBuilder.add(DoublePoint.newRangeQuery(columnName, lowerVal, upperVal), Occur.SHOULD);

                if (((upperVal != null) && (upperVal <= Float.MAX_VALUE) && (upperVal >= Float.MIN_VALUE))
                        || ((lowerVal != null) && (lowerVal <= Float.MAX_VALUE) && (lowerVal >= Float.MIN_VALUE))) {
                    fitsInFloat = true;
                }
            }

            if (fitsInFloat || (queryDataLower instanceof DynamicFloat)
                    || (queryDataUpper instanceof DynamicFloat)) {
                final Float upperVal = ((queryDataUpper == null) ? null
                        : ((queryDataUpper == null) ? null
                                : ((queryDataUpper instanceof DynamicFloat)
                                        ? ((DynamicFloat) queryDataUpper).getDataFloat()
                                        : ((fitsInFloat && ((Number) queryDataUpper.getDataObject())
                                                .doubleValue() > Float.MAX_VALUE) ? Float.MAX_VALUE
                                                        : ((Number) queryDataUpper.getDataObject())
                                                                .floatValue()))));
                final Float lowerVal = ((queryDataLower == null) ? null
                        : ((queryDataLower instanceof DynamicFloat) ? ((DynamicFloat) queryDataLower).getDataFloat()
                                : ((fitsInFloat && ((Number) queryDataLower.getDataObject())
                                        .doubleValue() < Float.MIN_VALUE) ? Float.MIN_VALUE
                                                : ((Number) queryDataLower.getDataObject()).floatValue())));

                bqBuilder.add(FloatPoint.newRangeQuery(columnName, lowerVal, upperVal), Occur.SHOULD);
            }

            if ((queryDataLower instanceof DynamicLong) || (queryDataUpper instanceof DynamicLong)) {
                final Long upperVal = ((queryDataUpper == null) ? null
                        : ((queryDataUpper instanceof DynamicLong) ? ((DynamicLong) queryDataUpper).getDataLong()
                                : ((Number) queryDataUpper.getDataObject()).longValue()));
                final Long lowerVal = ((queryDataLower == null) ? null
                        : ((queryDataLower instanceof DynamicLong) ? ((DynamicLong) queryDataLower).getDataLong()
                                : ((Number) queryDataLower.getDataObject()).longValue()));

                bqBuilder.add(LongPoint.newRangeQuery(columnName, lowerVal, upperVal), Occur.SHOULD);

                if (((upperVal != null) && (upperVal <= Integer.MAX_VALUE) && (upperVal >= Integer.MIN_VALUE))
                        || ((lowerVal != null) && (lowerVal <= Integer.MAX_VALUE)
                                && (lowerVal >= Integer.MIN_VALUE))) {
                    fitsInInt = true;
                }
            }

            if (fitsInInt || (queryDataLower instanceof DynamicInteger)
                    || (queryDataUpper instanceof DynamicInteger) || (queryDataLower instanceof DynamicSequence)
                    || (queryDataUpper instanceof DynamicSequence)) {
                final Integer upperVal = ((queryDataUpper == null) ? null
                        : ((queryDataUpper instanceof DynamicInteger)
                                ? ((DynamicInteger) queryDataUpper).getDataInteger()
                                : ((queryDataUpper instanceof DynamicSequence)
                                        ? ((DynamicSequence) queryDataUpper).getDataSequence()
                                        : ((fitsInInt && ((Number) queryDataUpper.getDataObject())
                                                .longValue() > Integer.MAX_VALUE) ? Integer.MAX_VALUE
                                                        : ((Number) queryDataUpper.getDataObject()).intValue()))));
                final Integer lowerVal = ((queryDataLower == null) ? null
                        : ((queryDataLower instanceof DynamicInteger)
                                ? ((DynamicInteger) queryDataLower).getDataInteger()
                                : ((queryDataLower instanceof DynamicSequence)
                                        ? ((DynamicSequence) queryDataLower).getDataSequence()
                                        : ((fitsInInt && ((Number) queryDataLower.getDataObject())
                                                .longValue() < Integer.MIN_VALUE) ? Integer.MIN_VALUE
                                                        : ((Number) queryDataLower.getDataObject()).intValue()))));

                bqBuilder.add(IntPoint.newRangeQuery(columnName, lowerVal, upperVal), Occur.SHOULD);
            }
            BooleanQuery bq = bqBuilder.build();
            if (bq.clauses().isEmpty()) {
                throw new RuntimeException("Not a numeric data type - can't perform a range query");
            } else {
                final BooleanQuery.Builder must = new BooleanQuery.Builder();

                must.add(bq, Occur.MUST);
                return must.build();
            }
        } catch (final ClassCastException e) {
            throw new RuntimeException(
                    "One of the values is not a numeric data type - can't perform a range query");
        }
    }

    /**
     * Handle type.
     *
     * @param doc the doc
     * @param dataCol the data col
     * @param colNumber the col number
     */
    private void handleType(Document doc, DynamicData dataCol, int colNumber) {
        // Not the greatest design for diskspace / performance... but if we want to be able to support searching across
        // all fields / all sememes - and also support searching per-field within a single sememe, we need to double index
        // all of the data.  Once with a standard field name, and once with a field name that includes the column number.
        // at search time, restricting to certain field matches is only allowed if they are also restricting to an assemblage,
        // so we can compute the correct field number list at search time.
        // Note, we optimize by only doing the double indexing in cases where the sememe has more than one column to begin with.
        // At query time, we construct the query appropriately to handle this optimization.
        // the cheaper option from a disk space perspective (maybe, depending on the data) would be to create a document per
        // column.  The queries would be trivial to write then, but we would be duplicating the component nid and assemblage nid
        // in each document, which is also expensive.  It also doesn't fit the model in OTF, of a document per component.
        // We also duplicate again, on string fields by indexing with the white space analyzer, in addition to the normal one.
        if (dataCol == null) {
            // noop
        } else if (dataCol instanceof DynamicBoolean) {
            doc.add(new StringField(COLUMN_FIELD_DATA + PerFieldAnalyzer.WHITE_SPACE_FIELD_MARKER,
                    ((DynamicBoolean) dataCol).getDataBoolean() + "", Store.NO));

            if (colNumber >= 0) {
                doc.add(new StringField(
                        COLUMN_FIELD_DATA + "_" + colNumber + PerFieldAnalyzer.WHITE_SPACE_FIELD_MARKER,
                        ((DynamicBoolean) dataCol).getDataBoolean() + "", Store.NO));
            }

            incrementIndexedItemCount("Dynamic Boolean");
        } else if (dataCol instanceof DynamicByteArray) {
            LOG.warn("Sememe Indexer configured to index a field that isn''t indexable (byte array)");
        } else if (dataCol instanceof DynamicDouble) {
            doc.add(new DoublePoint(COLUMN_FIELD_DATA, ((DynamicDouble) dataCol).getDataDouble()));

            if (colNumber >= 0) {
                doc.add(new DoublePoint(COLUMN_FIELD_DATA + "_" + colNumber,
                        ((DynamicDouble) dataCol).getDataDouble()));
            }

            incrementIndexedItemCount("Dynamic Double");
        } else if (dataCol instanceof DynamicFloat) {
            doc.add(new FloatPoint(COLUMN_FIELD_DATA, ((DynamicFloat) dataCol).getDataFloat()));

            if (colNumber >= 0) {
                doc.add(new FloatPoint(COLUMN_FIELD_DATA + "_" + colNumber,
                        ((DynamicFloat) dataCol).getDataFloat()));
            }

            incrementIndexedItemCount("Dynamic Float");
        } else if (dataCol instanceof DynamicInteger) {
            doc.add(new IntPoint(COLUMN_FIELD_DATA, ((DynamicInteger) dataCol).getDataInteger()));

            if (colNumber >= 0) {
                doc.add(new IntPoint(COLUMN_FIELD_DATA + "_" + colNumber,
                        ((DynamicInteger) dataCol).getDataInteger()));
            }

            incrementIndexedItemCount("Dynamic Integer");
        } else if (dataCol instanceof DynamicSequence) {
            doc.add(new IntPoint(COLUMN_FIELD_DATA, ((DynamicSequence) dataCol).getDataSequence()));

            if (colNumber >= 0) {
                doc.add(new IntPoint(COLUMN_FIELD_DATA + "_" + colNumber,
                        ((DynamicSequence) dataCol).getDataSequence()));
            }

            incrementIndexedItemCount("Dynamic Sequence");
        } else if (dataCol instanceof DynamicLong) {
            doc.add(new LongPoint(COLUMN_FIELD_DATA, ((DynamicLong) dataCol).getDataLong()));

            if (colNumber >= 0) {
                doc.add(new LongPoint(COLUMN_FIELD_DATA + "_" + colNumber, ((DynamicLong) dataCol).getDataLong()));
            }

            incrementIndexedItemCount("Dynamic Long");
        } else if (dataCol instanceof DynamicNid) {
            // No need for ranges on a nid, no need for tokenization (so textField, instead of string field).
            doc.add(new StringField(COLUMN_FIELD_DATA + PerFieldAnalyzer.WHITE_SPACE_FIELD_MARKER,
                    ((DynamicNid) dataCol).getDataNid() + "", Store.NO));

            if (colNumber >= 0) {
                doc.add(new StringField(
                        COLUMN_FIELD_DATA + "_" + colNumber + PerFieldAnalyzer.WHITE_SPACE_FIELD_MARKER,
                        ((DynamicNid) dataCol).getDataNid() + "", Store.NO));
            }

            incrementIndexedItemCount("Dynamic Nid");
        } else if (dataCol instanceof DynamicPolymorphic) {
            LOG.error("This should have been impossible (polymorphic?)");
        } else if (dataCol instanceof DynamicString) {
            doc.add(new TextField(COLUMN_FIELD_DATA, ((DynamicString) dataCol).getDataString(), Store.NO));

            if (colNumber >= 0) {
                doc.add(new TextField(COLUMN_FIELD_DATA + "_" + colNumber,
                        ((DynamicString) dataCol).getDataString(), Store.NO));
            }

            // yes, indexed 4 different times - twice with the standard analyzer, twice with the whitespace analyzer.
            doc.add(new TextField(COLUMN_FIELD_DATA + PerFieldAnalyzer.WHITE_SPACE_FIELD_MARKER,
                    ((DynamicString) dataCol).getDataString(), Store.NO));

            if (colNumber >= 0) {
                doc.add(new TextField(
                        COLUMN_FIELD_DATA + "_" + colNumber + PerFieldAnalyzer.WHITE_SPACE_FIELD_MARKER,
                        ((DynamicString) dataCol).getDataString(), Store.NO));
            }

            incrementIndexedItemCount("Dynamic String");
        } else if (dataCol instanceof DynamicUUID) {
            // Use the whitespace analyzer on UUIDs
            doc.add(new StringField(COLUMN_FIELD_DATA + PerFieldAnalyzer.WHITE_SPACE_FIELD_MARKER,
                    ((DynamicUUID) dataCol).getDataUUID().toString(), Store.NO));

            if (colNumber >= 0) {
                doc.add(new StringField(
                        COLUMN_FIELD_DATA + "_" + colNumber + PerFieldAnalyzer.WHITE_SPACE_FIELD_MARKER,
                        ((DynamicUUID) dataCol).getDataUUID().toString(), Store.NO));
            }

            incrementIndexedItemCount("Dynamic UUID");
        } else if (dataCol instanceof DynamicArray) {
            for (final DynamicData nestedData : ((DynamicArray) dataCol).getDataArray()) {
                handleType(doc, nestedData, colNumber);
            }
        } else {
            LOG.error("This should have been impossible (no match on col type) {}", dataCol);
        }
    }

    //~--- inner classes -------------------------------------------------------

    /**
     * The Class QueryWrapperForColumnHandling.
     */
    private abstract class QueryWrapperForColumnHandling {
        /**
         * Builds the query.
         *
         * @param columnName the column name
         * @return the query
         */
        abstract Query buildQuery(String columnName);

        /**
         * Builds the column handling query.
         *
         * @param assemblageConcepteNids the sememe concept sequence
         * @param searchColumns the search columns
         * @return the query
         */
        protected Query buildColumnHandlingQuery(int[] assemblageConcepteNids, Integer[] searchColumns) {
            Integer[] sememeIndexedColumns = null;

            if ((searchColumns != null) && (searchColumns.length > 0)) {
                // If they provide a search column - then they MUST provide one and only one assemblageConcepteNids
                if ((assemblageConcepteNids == null) || (assemblageConcepteNids.length != 1)) {
                    throw new RuntimeException(
                            "If a list of search columns is provided, then the sememeConceptSequence variable must contain 1 (and only 1) sememe");
                } else {
                    sememeIndexedColumns = SemanticIndexer.this.lric.whatColumnsToIndex(assemblageConcepteNids[0]);
                }
            }

            // If only 1 column was indexed from a sememe, we don't create field specific columns.
            if ((searchColumns == null) || (searchColumns.length == 0) || (sememeIndexedColumns == null)
                    || (sememeIndexedColumns.length < 2)) {
                return buildQuery(COLUMN_FIELD_DATA);
            } else // If they passed a specific column to search AND the Dynamic type has more than 1 indexed column, then do a column specific search.
            {
                final BooleanQuery.Builder group = new BooleanQuery.Builder();

                for (final int i : searchColumns) {
                    group.add(buildQuery(COLUMN_FIELD_DATA + "_" + i), Occur.SHOULD);
                }

                return group.build();
            }
        }
    }

    @Override
    public Future<Void> sync() {
        throw new UnsupportedOperationException();
    }
}