org.openrdf.sail.solr.SolrIndex.java Source code

Java tutorial

Introduction

Here is the source code for org.openrdf.sail.solr.SolrIndex.java

Source

/* 
 * Licensed to Aduna under one or more contributor license agreements.  
 * See the NOTICE.txt file distributed with this work for additional 
 * information regarding copyright ownership. 
 *
 * Aduna licenses this file to you under the terms of the Aduna BSD 
 * License (the "License"); you may not use this file except in compliance 
 * with the License. See the LICENSE.txt file distributed with this work 
 * for the full License.
 *
 * Unless required by applicable law or agreed to in writing, software 
 * distributed under the License is distributed on an "AS IS" BASIS, 
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
 * implied. See the License for the specific language governing permissions
 * and limitations under the License.
 */
package org.openrdf.sail.solr;

import java.io.IOException;
import java.text.ParseException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;

import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SpatialParams;
import org.openrdf.model.Resource;
import org.openrdf.model.URI;
import org.openrdf.model.vocabulary.GEOF;
import org.openrdf.query.MalformedQueryException;
import org.openrdf.query.algebra.Var;
import org.openrdf.sail.SailException;
import org.openrdf.sail.lucene.AbstractSearchIndex;
import org.openrdf.sail.lucene.BulkUpdater;
import org.openrdf.sail.lucene.DocumentDistance;
import org.openrdf.sail.lucene.DocumentResult;
import org.openrdf.sail.lucene.DocumentScore;
import org.openrdf.sail.lucene.LuceneSail;
import org.openrdf.sail.lucene.SearchDocument;
import org.openrdf.sail.lucene.SearchFields;
import org.openrdf.sail.lucene.SearchQuery;
import org.openrdf.sail.lucene.util.GeoUnits;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Function;
import com.google.common.base.Functions;
import com.google.common.collect.Iterables;
import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.context.SpatialContextFactory;
import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Rectangle;
import com.spatial4j.core.shape.Shape;
import com.spatial4j.core.shape.SpatialRelation;

/**
 * @see LuceneSail
 */
public class SolrIndex extends AbstractSearchIndex {

    public static final String SERVER_KEY = "server";

    public static final String DISTANCE_FIELD = "_dist";

    private final Logger logger = LoggerFactory.getLogger(getClass());

    private SolrClient client;

    private Function<? super String, ? extends SpatialContext> geoContextMapper;

    @Override
    public void initialize(Properties parameters) throws Exception {
        super.initialize(parameters);
        // slightly hacky cast to cope with the fact that Properties is
        // Map<Object,Object>
        // even though it is effectively Map<String,String>
        this.geoContextMapper = createSpatialContextMapper((Map<String, String>) (Map<?, ?>) parameters);

        String server = parameters.getProperty(SERVER_KEY);
        if (server == null) {
            throw new SailException("Missing " + SERVER_KEY + " parameter");
        }
        int pos = server.indexOf(':');
        if (pos == -1) {
            throw new SailException("Missing scheme in " + SERVER_KEY + " parameter: " + server);
        }
        String scheme = server.substring(0, pos);
        Class<?> clientFactoryCls = Class.forName("org.openrdf.sail.solr.client." + scheme + ".Factory");
        SolrClientFactory clientFactory = (SolrClientFactory) clientFactoryCls.newInstance();
        client = clientFactory.create(server);
    }

    protected Function<? super String, ? extends SpatialContext> createSpatialContextMapper(
            Map<String, String> parameters) {
        // this should really be based on the schema
        ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
        SpatialContext geoContext = SpatialContextFactory.makeSpatialContext(parameters, classLoader);
        return Functions.constant(geoContext);
    }

    public SolrClient getClient() {
        return client;
    }

    @Override
    protected SpatialContext getSpatialContext(String property) {
        return geoContextMapper.apply(property);
    }

    @Override
    public void shutDown() throws IOException {
        if (client != null) {
            client.close();
            client = null;
        }
    }

    // //////////////////////////////// Methods for updating the index

    /**
     * Returns a Document representing the specified document ID (combination of
     * resource and context), or null when no such Document exists yet.
     * 
     * @throws SolrServerException
     */
    @Override
    protected SearchDocument getDocument(String id) throws IOException {
        SolrDocument doc;
        try {
            doc = (SolrDocument) client
                    .query(new SolrQuery().setRequestHandler("/get").set(SearchFields.ID_FIELD_NAME, id))
                    .getResponse().get("doc");
        } catch (SolrServerException e) {
            throw new IOException(e);
        }
        return (doc != null) ? new SolrSearchDocument(doc) : null;
    }

    @Override
    protected Iterable<? extends SearchDocument> getDocuments(String resourceId) throws IOException {
        SolrQuery query = new SolrQuery(termQuery(SearchFields.URI_FIELD_NAME, resourceId));
        SolrDocumentList docs;
        try {
            docs = getDocuments(query);
        } catch (SolrServerException e) {
            throw new IOException(e);
        }
        return Iterables.transform(docs, new Function<SolrDocument, SearchDocument>() {

            @Override
            public SearchDocument apply(SolrDocument hit) {
                return new SolrSearchDocument(hit);
            }
        });
    }

    @Override
    protected SearchDocument newDocument(String id, String resourceId, String context) {
        return new SolrSearchDocument(id, resourceId, context);
    }

    @Override
    protected SearchDocument copyDocument(SearchDocument doc) {
        SolrDocument document = ((SolrSearchDocument) doc).getDocument();
        SolrDocument newDocument = new SolrDocument();
        newDocument.putAll(document);
        return new SolrSearchDocument(newDocument);
    }

    @Override
    protected void addDocument(SearchDocument doc) throws IOException {
        SolrDocument document = ((SolrSearchDocument) doc).getDocument();
        try {
            client.add(ClientUtils.toSolrInputDocument(document));
        } catch (SolrServerException e) {
            throw new IOException(e);
        }
    }

    @Override
    protected void updateDocument(SearchDocument doc) throws IOException {
        addDocument(doc);
    }

    @Override
    protected void deleteDocument(SearchDocument doc) throws IOException {
        try {
            client.deleteById(doc.getId());
        } catch (SolrServerException e) {
            throw new IOException(e);
        }
    }

    @Override
    protected BulkUpdater newBulkUpdate() {
        return new SolrBulkUpdater(client);
    }

    static String termQuery(String field, String value) {
        return field + ":\"" + value + "\"";
    }

    /**
     * Returns a list of Documents representing the specified Resource (empty
     * when no such Document exists yet). Each document represent a set of
     * statements with the specified Resource as a subject, which are stored in a
     * specific context
     */
    private SolrDocumentList getDocuments(SolrQuery query) throws SolrServerException, IOException {
        return search(query).getResults();
    }

    /**
     * Returns a Document representing the specified Resource & Context
     * combination, or null when no such Document exists yet.
     */
    public SearchDocument getDocument(Resource subject, Resource context) throws IOException {
        // fetch the Document representing this Resource
        String resourceId = SearchFields.getResourceID(subject);
        String contextId = SearchFields.getContextID(context);
        return getDocument(SearchFields.formIdString(resourceId, contextId));
    }

    /**
     * Returns a list of Documents representing the specified Resource (empty
     * when no such Document exists yet). Each document represent a set of
     * statements with the specified Resource as a subject, which are stored in a
     * specific context
     */
    public Iterable<? extends SearchDocument> getDocuments(Resource subject) throws IOException {
        String resourceId = SearchFields.getResourceID(subject);
        return getDocuments(resourceId);
    }

    /**
     * Filters the given list of fields, retaining all property fields.
     */
    public static Set<String> getPropertyFields(Set<String> fields) {
        Set<String> result = new HashSet<String>(fields.size());
        for (String field : fields) {
            if (SearchFields.isPropertyField(field))
                result.add(field);
        }
        return result;
    }

    @Override
    public void begin() throws IOException {
    }

    @Override
    public void commit() throws IOException {
        try {
            client.commit();
        } catch (SolrServerException e) {
            throw new IOException(e);
        }
    }

    @Override
    public void rollback() throws IOException {
        try {
            client.rollback();
        } catch (SolrServerException e) {
            throw new IOException(e);
        }
    }

    @Override
    public void beginReading() throws IOException {
    }

    @Override
    public void endReading() throws IOException {
    }

    // //////////////////////////////// Methods for querying the index

    /**
     * Parse the passed query.
     * To be removed, no longer used.
     * @param query
     *        string
     * @return the parsed query
     * @throws ParseException
     *         when the parsing brakes
     */
    @Override
    @Deprecated
    protected SearchQuery parseQuery(String query, URI propertyURI) throws MalformedQueryException {
        SolrQuery q = prepareQuery(propertyURI, new SolrQuery(query));
        return new SolrSearchQuery(q, this);
    }

    /**
     * Parse the passed query.
     * 
     * @param query
     *        string
     * @return the parsed query
     * @throws ParseException
     *         when the parsing brakes
     */
    @Override
    protected Iterable<? extends DocumentScore> query(Resource subject, String query, URI propertyURI,
            boolean highlight) throws MalformedQueryException, IOException {
        SolrQuery q = prepareQuery(propertyURI, new SolrQuery(query));
        if (highlight) {
            q.setHighlight(true);
            String field = (propertyURI != null) ? SearchFields.getPropertyField(propertyURI) : "*";
            q.addHighlightField(field);
            q.setHighlightSimplePre(SearchFields.HIGHLIGHTER_PRE_TAG);
            q.setHighlightSimplePost(SearchFields.HIGHLIGHTER_POST_TAG);
            q.setHighlightSnippets(2);
        }

        QueryResponse response;
        if (q.getHighlight()) {
            q.addField("*");
        } else {
            q.addField(SearchFields.URI_FIELD_NAME);
        }
        q.addField("score");
        try {
            if (subject != null) {
                response = search(subject, q);
            } else {
                response = search(q);
            }
        } catch (SolrServerException e) {
            throw new IOException(e);
        }
        SolrDocumentList results = response.getResults();
        final Map<String, Map<String, List<String>>> highlighting = response.getHighlighting();
        return Iterables.transform(results, new Function<SolrDocument, DocumentScore>() {

            @Override
            public DocumentScore apply(SolrDocument document) {
                SolrSearchDocument doc = new SolrSearchDocument(document);
                Map<String, List<String>> docHighlighting = (highlighting != null) ? highlighting.get(doc.getId())
                        : null;
                return new SolrDocumentScore(doc, docHighlighting);
            }
        });
    }

    // /**
    // * Parses an id-string used for a context filed (a serialized resource)
    // back to a resource.
    // * <b>CAN RETURN NULL</b>
    // * Inverse method of {@link #getResourceID(Resource)}
    // * @param idString
    // * @return null if the passed idString was the {@link #CONTEXT_NULL}
    // constant
    // */
    // private Resource getContextResource(String idString) {
    // if (CONTEXT_NULL.equals(idString))
    // return null;
    // else
    // return getResource(idString);
    // }

    /**
     * Evaluates the given query only for the given resource.
     * 
     * @throws SolrServerException
     */
    public QueryResponse search(Resource resource, SolrQuery query) throws SolrServerException, IOException {
        // rewrite the query
        String idQuery = termQuery(SearchFields.URI_FIELD_NAME, SearchFields.getResourceID(resource));
        query.setQuery(query.getQuery() + " AND " + idQuery);
        return search(query);
    }

    @Override
    protected Iterable<? extends DocumentDistance> geoQuery(URI geoProperty, Point p, final URI units,
            double distance, String distanceVar, Var contextVar) throws MalformedQueryException, IOException {
        double kms = GeoUnits.toKilometres(distance, units);

        String qstr = "{!geofilt score=recipDistance}";
        if (contextVar != null) {
            Resource ctx = (Resource) contextVar.getValue();
            String tq = termQuery(SearchFields.CONTEXT_FIELD_NAME, SearchFields.getContextID(ctx));
            if (ctx != null) {
                qstr = tq + " AND " + qstr;
            } else {
                qstr = "-" + tq + " AND " + qstr;
            }
        }
        SolrQuery q = new SolrQuery(qstr);
        q.set(SpatialParams.FIELD, SearchFields.getPropertyField(geoProperty));
        q.set(SpatialParams.POINT, p.getY() + "," + p.getX());
        q.set(SpatialParams.DISTANCE, Double.toString(kms));
        q.addField(SearchFields.URI_FIELD_NAME);
        // ':' is part of the fl parameter syntax so we can't use the full
        // property field name
        // instead we use wildcard + local part of the property URI
        q.addField("*" + geoProperty.getLocalName());
        // always include the distance - needed for sanity checking
        q.addField(DISTANCE_FIELD + ":geodist()");
        boolean requireContext = (contextVar != null && !contextVar.hasValue());
        if (requireContext) {
            q.addField(SearchFields.CONTEXT_FIELD_NAME);
        }

        QueryResponse response;
        try {
            response = search(q);
        } catch (SolrServerException e) {
            throw new IOException(e);
        }

        SolrDocumentList results = response.getResults();
        return Iterables.transform(results, new Function<SolrDocument, DocumentDistance>() {

            @Override
            public DocumentDistance apply(SolrDocument document) {
                SolrSearchDocument doc = new SolrSearchDocument(document);
                return new SolrDocumentDistance(doc, units);
            }
        });
    }

    @Override
    protected Iterable<? extends DocumentResult> geoRelationQuery(String relation, URI geoProperty, Shape shape,
            Var contextVar) throws MalformedQueryException, IOException {
        String spatialOp = toSpatialOp(relation);
        if (spatialOp == null) {
            return null;
        }
        String wkt = toWkt(shape);
        String qstr = "\"" + spatialOp + "(" + wkt + ")\"";
        if (contextVar != null) {
            Resource ctx = (Resource) contextVar.getValue();
            String tq = termQuery(SearchFields.CONTEXT_FIELD_NAME, SearchFields.getContextID(ctx));
            if (ctx != null) {
                qstr = tq + " AND " + qstr;
            } else {
                qstr = "-" + tq + " AND " + qstr;
            }
        }
        SolrQuery q = new SolrQuery(qstr);
        q.set(CommonParams.DF, SearchFields.getPropertyField(geoProperty));
        q.addField(SearchFields.URI_FIELD_NAME);
        // ':' is part of the fl parameter syntax so we can't use the full
        // property field name
        // instead we use wildcard + local part of the property URI
        q.addField("*" + geoProperty.getLocalName());
        boolean requireContext = (contextVar != null && !contextVar.hasValue());
        if (requireContext) {
            q.addField(SearchFields.CONTEXT_FIELD_NAME);
        }

        QueryResponse response;
        try {
            response = search(q);
        } catch (SolrServerException e) {
            throw new IOException(e);
        }

        SolrDocumentList results = response.getResults();
        return Iterables.transform(results, new Function<SolrDocument, DocumentResult>() {

            @Override
            public DocumentResult apply(SolrDocument document) {
                SolrSearchDocument doc = new SolrSearchDocument(document);
                return new SolrDocumentResult(doc);
            }
        });
    }

    private String toSpatialOp(String relation) {
        if (GEOF.SF_INTERSECTS.stringValue().equals(relation)) {
            return "Intersects";
        }
        if (GEOF.SF_DISJOINT.stringValue().equals(relation)) {
            return "IsDisjointTo";
        }
        if (GEOF.EH_COVERED_BY.stringValue().equals(relation)) {
            return "IsWithin";
        }
        return null;
    }

    @Override
    protected Shape parseQueryShape(String property, String value) throws ParseException {
        Shape s = super.parseQueryShape(property, value);
        // workaround to preserve WKT string
        return (s instanceof Point) ? new WktPoint((Point) s, value) : new WktShape<Shape>(s, value);
    }

    protected String toWkt(Shape s) {
        return ((WktShape<?>) s).wkt;
    }

    private static class WktShape<S extends Shape> implements Shape {
        final S s;
        final String wkt;

        WktShape(S s, String wkt) {
            this.s = s;
            this.wkt = wkt;
        }

        @Override
        public SpatialRelation relate(Shape other) {
            return s.relate(other);
        }

        @Override
        public Rectangle getBoundingBox() {
            return s.getBoundingBox();
        }

        @Override
        public boolean hasArea() {
            return s.hasArea();
        }

        @Override
        public double getArea(SpatialContext ctx) {
            return s.getArea(ctx);
        }

        @Override
        public Point getCenter() {
            return s.getCenter();
        }

        @Override
        public Shape getBuffered(double distance, SpatialContext ctx) {
            return s.getBuffered(distance, ctx);
        }

        @Override
        public boolean isEmpty() {
            return s.isEmpty();
        }

        @Override
        public boolean equals(Object other) {
            return s.equals(other);
        }
    }

    private static class WktPoint extends WktShape<Point> implements Point {
        WktPoint(Point p, String wkt) {
            super(p, wkt);
        }

        @Override
        public void reset(double x, double y) {
            s.reset(x, y);
        }

        @Override
        public double getX() {
            return s.getX();
        }

        @Override
        public double getY() {
            return s.getY();
        }

    }

    /**
     * Evaluates the given query and returns the results as a TopDocs instance.
     * 
     * @throws SolrServerException
     */
    public QueryResponse search(SolrQuery query) throws SolrServerException, IOException {
        int nDocs;
        if (maxDocs > 0) {
            nDocs = maxDocs;
        } else {
            long docCount = client.query(query.setRows(0)).getResults().getNumFound();
            nDocs = Math.max((int) Math.min(docCount, Integer.MAX_VALUE), 1);
        }
        return client.query(query.setRows(nDocs));
    }

    private SolrQuery prepareQuery(URI propertyURI, SolrQuery query) {
        // check out which query parser to use, based on the given property URI
        if (propertyURI == null)
            // if we have no property given, we create a default query parser which
            // has the TEXT_FIELD_NAME as the default field
            query.set(CommonParams.DF, SearchFields.TEXT_FIELD_NAME);
        else
            // otherwise we create a query parser that has the given property as
            // the default field
            query.set(CommonParams.DF, SearchFields.getPropertyField(propertyURI));
        return query;
    }

    /**
     * @param contexts
     * @param sail
     *        - the underlying native sail where to read the missing triples from
     *        after deletion
     * @throws SailException
     */
    @Override
    public synchronized void clearContexts(Resource... contexts) throws IOException {

        // logger.warn("Clearing contexts operation did not change the index: contexts are not indexed at the moment");

        logger.debug("deleting contexts: {}", Arrays.toString(contexts));
        // these resources have to be read from the underlying rdf store
        // and their triples have to be added to the luceneindex after deletion of
        // documents
        // HashSet<Resource> resourcesToUpdate = new HashSet<Resource>();

        try {
            // remove all contexts passed
            for (Resource context : contexts) {
                // attention: context can be NULL!
                String contextString = SearchFields.getContextID(context);
                client.deleteByQuery(termQuery(SearchFields.CONTEXT_FIELD_NAME, contextString));
            }
        } catch (SolrServerException e) {
            throw new IOException(e);
        }
    }

    /**
     * 
     */
    @Override
    public synchronized void clear() throws IOException {
        try {
            client.deleteByQuery("*:*");
        } catch (SolrServerException e) {
            throw new IOException(e);
        }
    }
}