org.arastreju.sge.index.ArastrejuIndex.java Source code

Introduction

Here is the source code for org.arastreju.sge.index.ArastrejuIndex.java
Source

/*
 * Copyright (C) 2013 lichtflut Forschungs- und Entwicklungsgesellschaft mbH
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.arastreju.sge.index;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Version;
import org.arastreju.sge.ConversationContext;
import org.arastreju.sge.inferencing.Inferencer;
import org.arastreju.sge.model.Statement;
import org.arastreju.sge.model.nodes.ResourceNode;
import org.arastreju.sge.naming.QualifiedName;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

/**
 * <p>
 *  Indexer implementation using Lucene
 * </p>
 *
 * <p>
 *    Created Feb 01, 2013
 * </p>
 *
 * @author Timo Buhrmester
 */
public class ArastrejuIndex implements IndexUpdator, IndexSearcher {

    private static final Logger LOGGER = LoggerFactory.getLogger(ArastrejuIndex.class);

    private final List<Inferencer> inferencers = new ArrayList<Inferencer>();

    private final ConversationContext conversationContext;

    private final IndexProvider provider;

    // ----------------------------------------------------

    public ArastrejuIndex(ConversationContext cc, IndexProvider provider) {
        this.conversationContext = cc;
        this.provider = provider;
    }

    // ----------------------------------------------------

    /**
     * Add a soft inferencer.
     * @param inferencer The inferencer.
     * @return This.
     */
    public ArastrejuIndex add(Inferencer... inferencer) {
        Collections.addAll(inferencers, inferencer);
        return this;
    }

    // ----------------------------------------------------

    /**
     * Index this node with all it's statements, regarding the current primary context.
     * If the node already has been indexed, it will be updated.
     * @param node The node to index.
     */
    @Override
    public void index(ResourceNode node) {
        LOGGER.debug("Indexing ({})", node);

        Document doc = createDocument(node);
        ContextIndex index = provider.forContext(conversationContext.getPrimaryContext());
        try {
            index.getWriter().updateDocument(new Term(IndexFields.QUALIFIED_NAME, normalizeQN(node.toURI())), doc); //creates if nonexistent
            //         index.getWriter().commit(); // XXX to be revised when transactions enter the play
        } catch (IOException e) {
            String msg = "caught IOException while indexing resource " + node.toURI();
            LOGGER.error(msg, e);
            throw new IllegalStateException(msg, e);
        }
    }

    /**
     * Remove the resource identified by the qualified name form the index.
     * @param qn The qualified name.
     */
    @Override
    public void remove(QualifiedName qn) {
        LOGGER.debug("remove({})", qn);
        ContextIndex index = provider.forContext(conversationContext.getPrimaryContext());
        try {
            index.getWriter().deleteDocuments(new Term(IndexFields.QUALIFIED_NAME, normalizeQN(qn.toURI())));
            //         index.getWriter().commit();
        } catch (IOException e) {
            LOGGER.error("Could not remove node '{}' from index due to {}", qn, e.getMessage());
            throw new IllegalStateException("Could not remove node.", e);
        }
    }

    @Override
    public IndexSearchResult search(String query) {
        LOGGER.debug("search({})", query);
        ContextIndex index = provider.forContext(conversationContext.getPrimaryContext());
        org.apache.lucene.search.IndexSearcher searcher = index.getSearcher();

        /* default field is 'qn' as this is the only field common to all resources.
         * (not that we're going to need a default field, anyway.) */
        QueryParser qp = new QueryParser(Version.LUCENE_35, IndexFields.QUALIFIED_NAME,
                new LowercaseWhitespaceAnalyzer(Version.LUCENE_35));
        qp.setAllowLeadingWildcard(true); //such queries should be avoided where possible nevertheless

        List<QualifiedName> resultList;
        try {
            /* we can use searcher.search(String, Collector) if we need all them results */
            AllHitsCollector collector = new AllHitsCollector();
            searcher.search(qp.parse(query), collector);

            resultList = collector.getList();
        } catch (IOException e) {
            LOGGER.error("Caught IOException while processing query '" + query + "'", e);
            throw new IllegalStateException("Could not remove node.", e);
        } catch (ParseException e) {
            LOGGER.error("Caught ParseException while processing query '" + query + "'", e);
            throw new IllegalStateException("Could not perform search.", e);
        }

        return new FixedIndexSearchResult(resultList);
    }

    // ----------------------------------------------------

    public void dump() {
        ContextIndex index = provider.forContext(conversationContext.getPrimaryContext());
        org.apache.lucene.search.IndexSearcher searcher = index.getSearcher();
        IndexReader reader = searcher.getIndexReader();

        try {
            TopDocs top = searcher.search(new MatchAllDocsQuery(), 100);
            for (int i = 0; i < top.totalHits; i++) {
                Document doc = reader.document(top.scoreDocs[i].doc);
                LOGGER.info("---Document--- id: " + top.scoreDocs[i].doc);
                List<Fieldable> fields = doc.getFields();
                for (Fieldable f : fields) {
                    LOGGER.info("\tField: name='" + f.name() + "', val='" + f.stringValue() + "'");
                }

            }
        } catch (IOException e) {
            String msg = "caught IOException while dumping index";
            LOGGER.error(msg, e);
            throw new RuntimeException(msg, e);
        }
    }

    /* no more calls to this object after close() */
    public void close() {
        ContextIndex index = provider.forContext(conversationContext.getPrimaryContext());
        provider.release(conversationContext.getPrimaryContext());
        try {
            index.getReader().close();
            index.getWriter().close();
        } catch (IOException e) {
            String msg = "caught IOException while closing reader/writer";
            LOGGER.error(msg, e);
            throw new RuntimeException(msg, e);
        }
    }

    public void clear() {
        ContextIndex index = provider.forContext(conversationContext.getPrimaryContext());
        try {
            index.getWriter().deleteAll();
            // index.getWriter().commit();
        } catch (IOException e) {
            String msg = "caught IOException while clearing index";
            LOGGER.error(msg, e);
            throw new RuntimeException(msg, e);
        }
    }

    // ----------------------------------------------------

    private Document createDocument(ResourceNode node) {
        Document doc = new Document();
        doc.add(new Field(IndexFields.QUALIFIED_NAME, node.toURI(), Store.YES, Index.ANALYZED));

        Set<Statement> asserted = node.getAssociations();
        Set<Statement> inferred = new HashSet<Statement>();
        for (Statement stmt : asserted) {
            for (Inferencer inferencer : inferencers) {
                inferencer.addInferenced(stmt, inferred);
            }
            addFields(doc, stmt);
        }
        for (Statement stmt : inferred) {
            addFields(doc, stmt);
        }
        return doc;
    }

    private void addFields(Document doc, Statement stmt) {
        doc.add(makeField(stmt));
        Field f = makeGenField(stmt);
        if (!findValue(doc, f.name(), f.stringValue())) {
            doc.add(f);
        }
    }

    private Field makeGenField(Statement stmt) {
        Field f;

        if (stmt.getObject().isResourceNode()) {
            f = new Field(IndexFields.RESOURCE_RELATION, stmt.getObject().asResource().toURI(), Store.YES,
                    Index.ANALYZED);
        } else {
            f = new Field(IndexFields.RESOURCE_VALUE, stmt.getObject().asValue().getStringValue(), Store.YES,
                    Index.ANALYZED); //analyzed, right?
        }

        return f;
    }

    private Field makeField(Statement stmt) {
        Field f;

        if (stmt.getObject().isResourceNode()) {
            f = new Field(stmt.getPredicate().toURI(), stmt.getObject().asResource().toURI(), Store.YES,
                    Index.ANALYZED);
        } else {
            /* This replicates the behaviour of the old neo index, for now.
             * TODO: Should probably use different sorts of fields  (like
             * NumericField) where applicable to leverage more of lucenes functionality */
            f = new Field(stmt.getPredicate().toURI(), stmt.getObject().asValue().getStringValue(), Store.YES,
                    Index.ANALYZED); //analyzed, right?
        }

        return f;
    }

    private boolean findValue(Document doc, String fieldName, String val) {
        String[] vals = doc.getValues(fieldName);
        for (String v : vals) {
            if (v.equals(val)) {
                return true;
            }
        }

        return false;
    }

    /* this is applied whenever we search for a qn.
     * XXX do we actually want case-insensitive search on URI?
     * LuceneQueryBuilder.normalizeValue() sort of enforces/suggests this. */
    private String normalizeQN(String qn) {
        return qn.toLowerCase();
    }

}