org.apache.solr.schema.JsonPreAnalyzedParser.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.solr.schema.JsonPreAnalyzedParser.java

Source

package org.apache.solr.schema;

import java.io.IOException;
import java.io.Reader;
import java.util.LinkedHashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeMap;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.document.Field;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.AttributeSource.State;
import org.apache.lucene.util.BytesRef;
import org.noggit.JSONUtil;
import org.noggit.JSONWriter;
import org.noggit.ObjectBuilder;
import org.apache.solr.common.util.Base64;
import org.apache.solr.schema.PreAnalyzedField.ParseResult;
import org.apache.solr.schema.PreAnalyzedField.PreAnalyzedParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

public class JsonPreAnalyzedParser implements PreAnalyzedParser {
    private static final Logger LOG = LoggerFactory.getLogger(JsonPreAnalyzedParser.class);

    public static final String VERSION = "1";

    public static final String VERSION_KEY = "v";
    public static final String STRING_KEY = "str";
    public static final String BINARY_KEY = "bin";
    public static final String TOKENS_KEY = "tokens";
    public static final String TOKEN_KEY = "t";
    public static final String OFFSET_START_KEY = "s";
    public static final String OFFSET_END_KEY = "e";
    public static final String POSINCR_KEY = "i";
    public static final String PAYLOAD_KEY = "p";
    public static final String TYPE_KEY = "y";
    public static final String FLAGS_KEY = "f";

    @SuppressWarnings("unchecked")
    @Override
    public ParseResult parse(Reader reader, AttributeSource parent) throws IOException {
        ParseResult res = new ParseResult();
        StringBuilder sb = new StringBuilder();
        char[] buf = new char[128];
        int cnt;
        while ((cnt = reader.read(buf)) > 0) {
            sb.append(buf, 0, cnt);
        }
        String val = sb.toString();
        // empty string - accept even without version number
        if (val.length() == 0) {
            return res;
        }
        Object o = ObjectBuilder.fromJSON(val);
        if (!(o instanceof Map)) {
            throw new IOException("Invalid JSON type " + o.getClass().getName() + ", expected Map");
        }
        Map<String, Object> map = (Map<String, Object>) o;
        // check version
        String version = (String) map.get(VERSION_KEY);
        if (version == null) {
            throw new IOException("Missing VERSION key");
        }
        if (!VERSION.equals(version)) {
            throw new IOException("Unknown VERSION '" + version + "', expected " + VERSION);
        }
        if (map.containsKey(STRING_KEY) && map.containsKey(BINARY_KEY)) {
            throw new IOException("Field cannot have both stringValue and binaryValue");
        }
        res.str = (String) map.get(STRING_KEY);
        String bin = (String) map.get(BINARY_KEY);
        if (bin != null) {
            byte[] data = Base64.base64ToByteArray(bin);
            res.bin = data;
        }
        List<Object> tokens = (List<Object>) map.get(TOKENS_KEY);
        if (tokens == null) {
            return res;
        }
        int tokenStart = 0;
        int tokenEnd = 0;
        parent.clearAttributes();
        for (Object ot : tokens) {
            tokenStart = tokenEnd + 1; // automatic increment by 1 separator
            Map<String, Object> tok = (Map<String, Object>) ot;
            boolean hasOffsetStart = false;
            boolean hasOffsetEnd = false;
            int len = -1;
            for (Entry<String, Object> e : tok.entrySet()) {
                String key = e.getKey();
                if (key.equals(TOKEN_KEY)) {
                    CharTermAttribute catt = parent.addAttribute(CharTermAttribute.class);
                    String str = String.valueOf(e.getValue());
                    catt.append(str);
                    len = str.length();
                } else if (key.equals(OFFSET_START_KEY)) {
                    Object obj = e.getValue();
                    hasOffsetStart = true;
                    if (obj instanceof Number) {
                        tokenStart = ((Number) obj).intValue();
                    } else {
                        try {
                            tokenStart = Integer.parseInt(String.valueOf(obj));
                        } catch (NumberFormatException nfe) {
                            LOG.warn("Invalid " + OFFSET_START_KEY + " attribute, skipped: '" + obj + "'");
                            hasOffsetStart = false;
                        }
                    }
                } else if (key.equals(OFFSET_END_KEY)) {
                    hasOffsetEnd = true;
                    Object obj = e.getValue();
                    if (obj instanceof Number) {
                        tokenEnd = ((Number) obj).intValue();
                    } else {
                        try {
                            tokenEnd = Integer.parseInt(String.valueOf(obj));
                        } catch (NumberFormatException nfe) {
                            LOG.warn("Invalid " + OFFSET_END_KEY + " attribute, skipped: '" + obj + "'");
                            hasOffsetEnd = false;
                        }
                    }
                } else if (key.equals(POSINCR_KEY)) {
                    Object obj = e.getValue();
                    int posIncr = 1;
                    if (obj instanceof Number) {
                        posIncr = ((Number) obj).intValue();
                    } else {
                        try {
                            posIncr = Integer.parseInt(String.valueOf(obj));
                        } catch (NumberFormatException nfe) {
                            LOG.warn("Invalid " + POSINCR_KEY + " attribute, skipped: '" + obj + "'");
                        }
                    }
                    PositionIncrementAttribute patt = parent.addAttribute(PositionIncrementAttribute.class);
                    patt.setPositionIncrement(posIncr);
                } else if (key.equals(PAYLOAD_KEY)) {
                    String str = String.valueOf(e.getValue());
                    if (str.length() > 0) {
                        byte[] data = Base64.base64ToByteArray(str);
                        PayloadAttribute p = parent.addAttribute(PayloadAttribute.class);
                        if (data != null && data.length > 0) {
                            p.setPayload(new BytesRef(data));
                        }
                    }
                } else if (key.equals(FLAGS_KEY)) {
                    try {
                        int f = Integer.parseInt(String.valueOf(e.getValue()), 16);
                        FlagsAttribute flags = parent.addAttribute(FlagsAttribute.class);
                        flags.setFlags(f);
                    } catch (NumberFormatException nfe) {
                        LOG.warn("Invalid " + FLAGS_KEY + " attribute, skipped: '" + e.getValue() + "'");
                    }
                } else if (key.equals(TYPE_KEY)) {
                    TypeAttribute tattr = parent.addAttribute(TypeAttribute.class);
                    tattr.setType(String.valueOf(e.getValue()));
                } else {
                    LOG.warn("Unknown attribute, skipped: " + e.getKey() + "=" + e.getValue());
                }
            }
            // handle offset attr
            OffsetAttribute offset = parent.addAttribute(OffsetAttribute.class);
            if (!hasOffsetEnd && len > -1) {
                tokenEnd = tokenStart + len;
            }
            offset.setOffset(tokenStart, tokenEnd);
            if (!hasOffsetStart) {
                tokenStart = tokenEnd + 1;
            }
            // capture state and add to result
            State state = parent.captureState();
            res.states.add(state.clone());
            // reset for reuse
            parent.clearAttributes();
        }
        return res;
    }

    @Override
    public String toFormattedString(Field f) throws IOException {
        Map<String, Object> map = new LinkedHashMap<String, Object>();
        map.put(VERSION_KEY, VERSION);
        if (f.fieldType().stored()) {
            String stringValue = f.stringValue();
            if (stringValue != null) {
                map.put(STRING_KEY, stringValue);
            }
            BytesRef binaryValue = f.binaryValue();
            if (binaryValue != null) {
                map.put(BINARY_KEY,
                        Base64.byteArrayToBase64(binaryValue.bytes, binaryValue.offset, binaryValue.length));
            }
        }
        TokenStream ts = f.tokenStreamValue();
        if (ts != null) {
            List<Map<String, Object>> tokens = new LinkedList<Map<String, Object>>();
            while (ts.incrementToken()) {
                Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator();
                String cTerm = null;
                String tTerm = null;
                Map<String, Object> tok = new TreeMap<String, Object>();
                while (it.hasNext()) {
                    Class<? extends Attribute> cl = it.next();
                    if (!ts.hasAttribute(cl)) {
                        continue;
                    }
                    Attribute att = ts.getAttribute(cl);
                    if (cl.isAssignableFrom(CharTermAttribute.class)) {
                        CharTermAttribute catt = (CharTermAttribute) att;
                        cTerm = new String(catt.buffer(), 0, catt.length());
                    } else if (cl.isAssignableFrom(TermToBytesRefAttribute.class)) {
                        TermToBytesRefAttribute tatt = (TermToBytesRefAttribute) att;
                        tTerm = tatt.getBytesRef().utf8ToString();
                    } else {
                        if (cl.isAssignableFrom(FlagsAttribute.class)) {
                            tok.put(FLAGS_KEY, Integer.toHexString(((FlagsAttribute) att).getFlags()));
                        } else if (cl.isAssignableFrom(OffsetAttribute.class)) {
                            tok.put(OFFSET_START_KEY, ((OffsetAttribute) att).startOffset());
                            tok.put(OFFSET_END_KEY, ((OffsetAttribute) att).endOffset());
                        } else if (cl.isAssignableFrom(PayloadAttribute.class)) {
                            BytesRef p = ((PayloadAttribute) att).getPayload();
                            if (p != null && p.length > 0) {
                                tok.put(PAYLOAD_KEY, Base64.byteArrayToBase64(p.bytes, p.offset, p.length));
                            }
                        } else if (cl.isAssignableFrom(PositionIncrementAttribute.class)) {
                            tok.put(POSINCR_KEY, ((PositionIncrementAttribute) att).getPositionIncrement());
                        } else if (cl.isAssignableFrom(TypeAttribute.class)) {
                            tok.put(TYPE_KEY, ((TypeAttribute) att).type());
                        } else {
                            tok.put(cl.getName(), att.toString());
                        }
                    }
                }
                String term = null;
                if (cTerm != null) {
                    term = cTerm;
                } else {
                    term = tTerm;
                }
                if (term != null && term.length() > 0) {
                    tok.put(TOKEN_KEY, term);
                }
                tokens.add(tok);
            }
            map.put(TOKENS_KEY, tokens);
        }
        return JSONUtil.toJSON(map, -1);
    }

}