org.apache.lucene.server.handlers.AddDocumentHandler.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.lucene.server.handlers.AddDocumentHandler.java

Source

package org.apache.lucene.server.handlers;

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CopyOnWriteArrayList;

import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleDocValuesField;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LatLonDocValuesField;
import org.apache.lucene.document.LatLonPoint;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.facet.FacetField;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.server.Constants;
import org.apache.lucene.server.FieldDef;
import org.apache.lucene.server.FinishRequest;
import org.apache.lucene.server.GlobalState;
import org.apache.lucene.server.IndexState;
import org.apache.lucene.server.params.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;

import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonToken;

import net.minidev.json.JSONArray;
import net.minidev.json.JSONObject;
import net.minidev.json.JSONValue;

/** Handles {@code addDocument}, by delegating the single
 *  document to {@link BulkAddDocumentHandler}. */
public class AddDocumentHandler extends Handler {

    /** Type for a document. */
    public final static StructType DOCUMENT_TYPE = new StructType(new Param("fields",
            "Fields to index into one document",
            new StructType(new Param("anyFieldName",
                    "A name/value pair for this document.  Multiple name/values can be specified, but each field name must already have been registered via @registerFields.  The type of the value must match how the field was registered.",
                    new AnyType()))));

    /** Parmeter type. */
    final StructType TYPE = new StructType(new Param("indexName", "Index name", new StringType()));

    /** Sole constructor. */
    public AddDocumentHandler(GlobalState state) {
        super(state);
        TYPE.params.putAll(DOCUMENT_TYPE.params);
    }

    @Override
    public StructType getType() {
        return TYPE;
    }

    @Override
    public String getTopDoc() {
        return "Adds one document to the index.  Returns the index generation (indexGen) that contains this added document.";
    }

    static class MyField extends Field {
        public MyField(String name, FieldType ft, Object value) {
            super(name, ft);
            fieldsData = value;
        }
    }

    /** Parses value for one field. */
    @SuppressWarnings({ "unchecked" })
    private static void parseOneNativeValue(FieldDef fd, Document doc, Object o, float boost) {

        assert o != null;
        assert fd != null;

        if (fd.fieldType.stored() || fd.fieldType.indexOptions() != IndexOptions.NONE
                || fd.fieldType.docValuesType() != DocValuesType.NONE) {
            switch (fd.valueType) {
            case TEXT:
            case ATOM:
                if (!(o instanceof String)) {
                    fail(fd.name, "expected String value but got " + o);
                }
                break;
            case BOOLEAN:
                if (!(o instanceof Boolean)) {
                    fail(fd.name, "expected Boolean value but got " + o.getClass());
                }
                // Turn boolean -> int now
                if (o == Boolean.TRUE) {
                    o = Integer.valueOf(1);
                } else {
                    o = Integer.valueOf(0);
                }
                break;
            case FLOAT:
            case DOUBLE:
                if (!(o instanceof Number)) {
                    fail(fd.name, "for float or double field, expected Number value but got " + o);
                }
                break;
            case LAT_LON:
                if (o instanceof double[] == false) {
                    fail(fd.name, "for latlon field, expected [lat, lon] double array " + o);
                }
                break;
            case INT:
            case LONG:
                // int or long
                if (!(o instanceof Integer) && !(o instanceof Long)) {
                    fail(fd.name, "for int or long field, expected Integer or Long value but got " + o
                            + " of class=" + o.getClass());
                }
                break;
            case DATE_TIME:
                // turn date time into msec since epoch:
                if (!(o instanceof String)) {
                    fail(fd.name,
                            "for date_time field, expected String but got " + o + " of class=" + o.getClass());
                }
                String s = (String) o;
                FieldDef.DateTimeParser parser = fd.getDateTimeParser();
                parser.position.setIndex(0);
                Date date = parser.parser.parse(s, parser.position);
                if (parser.position.getErrorIndex() != -1) {
                    fail(fd.name,
                            "could not parse \"" + o + "\" as date_time with format \"" + fd.dateTimeFormat + "\"");
                }
                if (parser.position.getIndex() != s.length()) {
                    fail(fd.name,
                            "could not parse \"" + o + "\" as date_time with format \"" + fd.dateTimeFormat + "\"");
                }
                o = date.getTime();
                break;
            default:
                throw new AssertionError();
            }
        }

        if (fd.faceted.equals("flat")) {

            if (o instanceof List) {
                fail(fd.name, "value should be String when facet=flat; got JSONArray");
            }

            doc.add(new FacetField(fd.name, o.toString()));
        } else if (fd.faceted.equals("hierarchy")) {
            if (o instanceof List) {
                if (fd.multiValued) {
                    List<List<String>> values = (List<List<String>>) o;
                    for (List<String> sub : values) {
                        doc.add(new FacetField(fd.name, sub.toArray(new String[sub.size()])));
                    }
                } else {
                    List<String> values = (List<String>) o;
                    doc.add(new FacetField(fd.name, values.toArray(new String[values.size()])));
                }
            } else {
                doc.add(new FacetField(fd.name, o.toString()));
            }
        } else if (fd.faceted.equals("sortedSetDocValues")) {
            if (o instanceof List) {
                fail(fd.name, "value should be String when facet=sortedSetDocValues; got JSONArray");
            }
            doc.add(new SortedSetDocValuesFacetField(fd.name, o.toString()));
        }

        if (fd.highlighted) {
            assert o instanceof String;
            if (fd.multiValued && (((String) o).indexOf(Constants.INFORMATION_SEP) != -1)) {
                // TODO: we could remove this restriction if it
                // ever matters ... we can highlight multi-valued
                // fields at search time without stealing a
                // character:
                fail(fd.name,
                        "multiValued and hihglighted fields cannot contain INFORMATION_SEPARATOR (U+001F) character: this character is used internally when highlighting multi-valued fields");
            }
        }

        // Separately index doc values:
        DocValuesType dvType = fd.fieldType.docValuesType();
        if (dvType == DocValuesType.BINARY || dvType == DocValuesType.SORTED) {
            if (o instanceof String == false) {
                fail(fd.name, "expected String but got: " + o);
            }
            BytesRef br = new BytesRef((String) o);
            if (fd.fieldType.docValuesType() == DocValuesType.BINARY) {
                doc.add(new BinaryDocValuesField(fd.name, br));
            } else {
                doc.add(new SortedDocValuesField(fd.name, br));
            }
        } else if (dvType == DocValuesType.SORTED_SET) {
            if (o instanceof List) {
                List<String> values = (List<String>) o;
                for (String _o : values) {
                    doc.add(new SortedSetDocValuesField(fd.name, new BytesRef(_o)));
                }
            } else {
                doc.add(new SortedSetDocValuesField(fd.name, new BytesRef((String) o)));
            }
        } else if (fd.valueType == FieldDef.FieldValueType.LAT_LON && dvType == DocValuesType.SORTED_NUMERIC) {
            double[] latLon = (double[]) o;
            doc.add(new LatLonDocValuesField(fd.name, latLon[0], latLon[1]));
        } else if (dvType == DocValuesType.NUMERIC || dvType == DocValuesType.SORTED_NUMERIC) {
            if (fd.valueType == FieldDef.FieldValueType.FLOAT) {
                if (fd.multiValued) {
                    doc.add(new SortedNumericDocValuesField(fd.name,
                            NumericUtils.floatToSortableInt(((Number) o).floatValue())));
                } else {
                    doc.add(new FloatDocValuesField(fd.name, ((Number) o).floatValue()));
                }
            } else if (fd.valueType == FieldDef.FieldValueType.DOUBLE) {
                if (fd.multiValued) {
                    doc.add(new SortedNumericDocValuesField(fd.name,
                            NumericUtils.doubleToSortableLong(((Number) o).doubleValue())));
                } else {
                    doc.add(new DoubleDocValuesField(fd.name, ((Number) o).doubleValue()));
                }
            } else if (fd.valueType == FieldDef.FieldValueType.INT) {
                if (fd.multiValued) {
                    doc.add(new SortedNumericDocValuesField(fd.name, ((Number) o).intValue()));
                } else {
                    doc.add(new NumericDocValuesField(fd.name, ((Number) o).intValue()));
                }
            } else if (fd.valueType == FieldDef.FieldValueType.LONG
                    || fd.valueType == FieldDef.FieldValueType.DATE_TIME) {
                if (fd.multiValued) {
                    doc.add(new SortedNumericDocValuesField(fd.name, ((Number) o).longValue()));
                } else {
                    doc.add(new NumericDocValuesField(fd.name, ((Number) o).longValue()));
                }
            } else {
                assert fd.valueType == FieldDef.FieldValueType.BOOLEAN;
                if (fd.multiValued) {
                    doc.add(new SortedNumericDocValuesField(fd.name, ((Integer) o).intValue()));
                } else {
                    doc.add(new NumericDocValuesField(fd.name, ((Integer) o).intValue()));
                }
            }
        }

        // maybe add separate points field:
        if (fd.usePoints) {
            if (fd.valueType == FieldDef.FieldValueType.INT) {
                doc.add(new IntPoint(fd.name, ((Number) o).intValue()));
            } else if (fd.valueType == FieldDef.FieldValueType.LONG
                    || fd.valueType == FieldDef.FieldValueType.LONG) {
                doc.add(new LongPoint(fd.name, ((Number) o).longValue()));
            } else if (fd.valueType == FieldDef.FieldValueType.FLOAT) {
                doc.add(new FloatPoint(fd.name, ((Number) o).floatValue()));
            } else if (fd.valueType == FieldDef.FieldValueType.DOUBLE) {
                doc.add(new DoublePoint(fd.name, ((Number) o).doubleValue()));
            } else if (fd.valueType == FieldDef.FieldValueType.LAT_LON) {
                double[] latLon = (double[]) o;
                doc.add(new LatLonPoint(fd.name, latLon[0], latLon[1]));
            } else {
                throw new AssertionError();
            }
        }

        if (fd.fieldType.stored() || fd.fieldType.indexOptions() != IndexOptions.NONE) {
            // We use fieldTypeNoDV because we separately added (above) the doc values field:
            Field f = new MyField(fd.name, fd.fieldTypeNoDV, o);
            f.setBoost(boost);
            doc.add(f);
        }
    }

    /** Used by plugins to process a document after it was
     *  created from the JSON request. */
    public interface PostHandle {
        // nocommit need test coverage:
        /** Invoke the handler, non-streaming. */
        public void invoke(IndexState state, Request r, Document doc) throws IOException;

        /** Invoke the handler, streaming. */
        public boolean invoke(IndexState state, String fieldName, JsonParser p, Document doc) throws IOException;
    }

    static final List<PostHandle> postHandlers = new CopyOnWriteArrayList<PostHandle>();

    /** Record a new {@link PostHandle}. */
    public static void addPostHandle(PostHandle handler) {
        postHandlers.add(handler);
    }

    /** Parses the string value to the appropriate type. */
    /*
    public static Object fixType(FieldDef fd, String value) {
      Object o;
      if (fd.valueType.equals("int")) {
        o = Integer.valueOf(Integer.parseInt(value));
      } else if (fd.valueType.equals("long")) {
        o = Long.valueOf(Long.parseLong(value));
      } else if (fd.valueType.equals("float")) {
        o = Float.valueOf(Float.parseFloat(value));
      } else if (fd.valueType.equals("double")) {
        o = Double.valueOf(Double.parseDouble(value));
      } else {
        o = value;
      }
      return o;
    }
    */

    static void fail(String fieldName, String message) {
        throw new IllegalArgumentException("field=" + fieldName + ": " + message);
    }

    /** Parses the fields, which should look like {field1:
     *  ..., field2: ..., ...} */
    public static void parseFields(IndexState state, Document doc, JsonParser p) throws IOException {
        JsonToken token = p.nextToken();
        if (token != JsonToken.START_OBJECT) {
            throw new IllegalArgumentException("fields should be an object");
        }
        while (true) {
            token = p.nextToken();
            if (token == JsonToken.END_OBJECT) {
                break;
            }
            assert token == JsonToken.FIELD_NAME;
            parseOneField(p, state, doc, p.getText());
        }
    }

    /** Parse a Document using Jackson's streaming parser
     *  API.  The document should look like {indexName: 'foo',
     *  fields: {..., ...}} */
    public static Document parseDocument(IndexState state, JsonParser p) throws IOException {
        JsonToken token = p.nextToken();
        if (token == JsonToken.END_ARRAY) {
            // nocommit hackish.. caller should tell us this means "end"?
            return null;
        } else if (token != JsonToken.START_OBJECT) {
            throw new IllegalArgumentException("expected JSON Object");
        }

        final Document doc = new Document();
        while (true) {
            token = p.nextToken();
            if (token == JsonToken.END_OBJECT) {
                break;
            }
            assert token == JsonToken.FIELD_NAME : token;

            String fieldName = p.getText();
            if (fieldName.equals("fields")) {
                parseFields(state, doc, p);
            } else {
                // Let a plugin handle it:
                boolean handled = false;
                for (PostHandle postHandle : postHandlers) {
                    if (postHandle.invoke(state, fieldName, p, doc)) {
                        handled = true;
                        break;
                    }
                }

                if (!handled) {
                    throw new IllegalArgumentException("unrecognized field " + p.getText());
                }
            }

            // nocommit need test that same field name can't
            // appear more than once?  app must put all values for
            // a given field into an array (for a multi-valued
            // field) 
        }

        return doc;
    }

    /** Parses a field's value, which is an array in the
     *  multi-valued case, or an object of the appropriate type
     *  in the single-valued case. */
    private static void parseOneField(JsonParser p, IndexState state, Document doc, String name)
            throws IOException {
        parseOneValue(state.getField(name), p, doc);
    }

    /** Parses the current json token into the corresponding
     *  java object. */
    private static Object getNativeValue(FieldDef fd, JsonToken token, JsonParser p) throws IOException {
        Object o;
        if (token == JsonToken.VALUE_STRING) {
            o = p.getText();
        } else if (token == JsonToken.VALUE_NUMBER_INT) {
            o = Long.valueOf(p.getLongValue());
        } else if (token == JsonToken.VALUE_NUMBER_FLOAT) {
            o = Double.valueOf(p.getDoubleValue());
        } else if (token == JsonToken.VALUE_TRUE) {
            o = Boolean.TRUE;
        } else if (token == JsonToken.VALUE_FALSE) {
            o = Boolean.FALSE;
        } else if (fd.faceted.equals("hierarchy") && token == JsonToken.START_ARRAY) {
            if (fd.multiValued == false) {
                List<String> values = new ArrayList<>();
                while (true) {
                    token = p.nextToken();
                    if (token == JsonToken.END_ARRAY) {
                        break;
                    } else if (token != JsonToken.VALUE_STRING) {
                        if (token == JsonToken.START_ARRAY) {
                            fail(fd.name, "expected array of strings, but saw array inside array");
                        } else {
                            fail(fd.name, "expected array of strings, but saw " + token + " inside array");
                        }
                    }
                    values.add(p.getText());
                }
                o = values;
            } else {
                List<List<String>> values = new ArrayList<>();
                while (true) {
                    token = p.nextToken();
                    if (token == JsonToken.END_ARRAY) {
                        break;
                    } else if (token == JsonToken.START_ARRAY) {
                        List<String> sub = new ArrayList<>();
                        values.add(sub);
                        while (true) {
                            token = p.nextToken();
                            if (token == JsonToken.VALUE_STRING) {
                                sub.add(p.getText());
                            } else if (token == JsonToken.END_ARRAY) {
                                break;
                            } else {
                                fail(fd.name, "expected array of strings or array of array of strings, but saw "
                                        + token + " inside inner array");
                            }
                        }
                    } else if (token == JsonToken.VALUE_STRING) {
                        List<String> sub = new ArrayList<>();
                        values.add(sub);
                        sub.add(p.getText());
                    } else if (token == JsonToken.START_ARRAY) {
                        fail(fd.name, "expected array of strings, but saw array inside array");
                    } else {
                        fail(fd.name, "expected array of strings, but saw " + token + " inside array");
                    }
                }
                o = values;
            }
        } else if (fd.valueType == FieldDef.FieldValueType.LAT_LON) {
            if (token != JsonToken.START_ARRAY) {
                fail(fd.name, "latlon field must be [lat, lon] value; got " + token);
            }
            double[] latLon = new double[2];
            token = p.nextToken();
            if (token != JsonToken.VALUE_NUMBER_FLOAT) {
                fail(fd.name, "latlon field must be [lat, lon] value; got " + token);
            }
            latLon[0] = p.getDoubleValue();
            token = p.nextToken();
            if (token != JsonToken.VALUE_NUMBER_FLOAT) {
                fail(fd.name, "latlon field must be [lat, lon] value; got " + token);
            }
            latLon[1] = p.getDoubleValue();
            token = p.nextToken();
            if (token != JsonToken.END_ARRAY) {
                fail(fd.name, "latlon field must be [lat, lon] value; got " + token);
            }
            o = latLon;
        } else {
            String message;
            if (token == JsonToken.VALUE_NULL) {
                message = "null field value not supported; just omit this field from the document instead";
            } else {
                message = "value in inner object field value should be string, int/long, float/double or boolean; got "
                        + token;
            }

            fail(fd.name, message);

            // Dead code but compiler disagrees:
            o = null;
        }
        return o;
    }

    /** Parse one value for a field, which is either an
     *  object matching the type of the field, or a {boost:
     *  ..., value: ...}. */
    private static boolean parseOneValue(FieldDef fd, JsonParser p, Document doc) throws IOException {

        Object o = null;
        float boost = 1.0f;

        JsonToken token = p.nextToken();
        if (token == JsonToken.START_ARRAY) {
            if ("hierarchy".equals(fd.faceted) || fd.valueType == FieldDef.FieldValueType.LAT_LON) {
                o = getNativeValue(fd, token, p);
            } else {
                if (fd.multiValued == false) {
                    fail(fd.name, "expected single value, not array, since this field is not multiValued");
                }
                while (true) {
                    if (!parseOneValue(fd, p, doc)) {
                        break;
                    }
                }
                return true;
            }
        } else {

            if (token == JsonToken.END_ARRAY) {
                assert fd.multiValued;
                return false;
            }

            if (fd.fieldType.indexOptions() != IndexOptions.NONE && token == JsonToken.START_OBJECT) {
                // Parse a {boost: X, value: Y}
                while (true) {
                    token = p.nextToken();
                    if (token == JsonToken.END_OBJECT) {
                        break;
                    }
                    assert token == JsonToken.FIELD_NAME;
                    String key = p.getText();
                    if (key.equals("boost")) {
                        token = p.nextToken();
                        if (token == JsonToken.VALUE_NUMBER_INT || token == JsonToken.VALUE_NUMBER_FLOAT) {
                            boost = p.getFloatValue();
                        } else {
                            fail(fd.name, "boost in inner object field value must have float or int value; got: "
                                    + token);
                        }
                    } else if (key.equals("value")) {
                        o = getNativeValue(fd, p.nextToken(), p);
                    } else {
                        fail(fd.name, "unrecognized json key \"" + key
                                + "\" in inner object field value; must be boost or value");
                    }
                }
                if (o == null) {
                    fail(fd.name, "missing 'value' key");
                }
            } else {
                // Parse a native value:
                o = getNativeValue(fd, token, p);
            }
        }

        parseOneNativeValue(fd, doc, o, boost);
        return true;
    }

    @Override
    public FinishRequest handle(final IndexState indexState, final Request r, Map<String, List<String>> params)
            throws Exception {

        indexState.verifyStarted(r);

        // NOTE: somewhat wasteful since we re-serialize to
        // string only to re-parse the JSON, but this allows
        // single-source (bulk) for parsing, and apps that care
        // about performance will use bulk APIs:

        JSONObject raw = r.getRawParams();
        StringBuilder sb = new StringBuilder();
        sb.append(raw.get("fields").toString());
        sb.append('\n');
        raw.clear();

        final String bulkRequestString = sb.toString();

        return new FinishRequest() {
            @Override
            public String finish() throws Exception {
                String result = globalState.getHandler("bulkAddDocument").handleStreamed(
                        new StringReader(bulkRequestString),
                        Collections.singletonMap("indexName", Collections.singletonList(indexState.name)));
                if (result.indexOf("errors") != -1) {
                    JSONObject o = (JSONObject) JSONValue.parseStrict(result);
                    if (o.containsKey("errors")) {
                        JSONObject err = (JSONObject) ((JSONArray) o.get("errors")).get(0);
                        throw new IllegalArgumentException((String) err.get("exception"));
                    }
                }
                return result;
            }
        };
    }
}