org.openrdf.sail.lucene.LuceneIndex.java Source code

Introduction

Here is the source code for org.openrdf.sail.lucene.LuceneIndex.java
Source

/* 
 * Licensed to Aduna under one or more contributor license agreements.  
 * See the NOTICE.txt file distributed with this work for additional 
 * information regarding copyright ownership. 
 *
 * Aduna licenses this file to you under the terms of the Aduna BSD 
 * License (the "License"); you may not use this file except in compliance 
 * with the License. See the LICENSE.txt file distributed with this work 
 * for the full License.
 *
 * Unless required by applicable law or agreed to in writing, software 
 * distributed under the License is distributed on an "AS IS" BASIS, 
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
 * implied. See the License for the specific language governing permissions
 * and limitations under the License.
 */
package org.openrdf.sail.lucene;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.FieldSelectorResult;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.openrdf.model.BNode;
import org.openrdf.model.Literal;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.IRI;
import org.openrdf.model.Value;
import org.openrdf.model.impl.SimpleBNode;
import org.openrdf.model.impl.SimpleIRI;
import org.openrdf.sail.Sail;
import org.openrdf.sail.SailException;

/**
 * A LuceneIndex is a one-stop-shop abstraction of a Lucene index. It takes care
 * of proper synchronization of IndexReaders, IndexWriters and IndexSearchers in
 * a way that is suitable for a LuceneSail.
 * 
 * @see LuceneSail
 */
public class LuceneIndex {

    /**
     * A utility FieldSelector that only selects the URI field to be loaded.
     * Useful when locating matching Resources in a LuceneIndex and the other
     * Document fields are not required.
     */
    private static FieldSelector URI_FIELD_SELECTOR = new FieldSelector() {

        private static final long serialVersionUID = 4302925811117170860L;

        public FieldSelectorResult accept(String fieldName) {
            return fieldName.equals(URI_FIELD_NAME) ? FieldSelectorResult.LOAD : FieldSelectorResult.NO_LOAD;
        }
    };

    /**
     * The name of the Document field holding the document identifier. This
     * consists of the Resource identifier (URI or BNodeID) and the Context ID
     * (the format is "resourceId|contextId")
     */
    public static final String ID_FIELD_NAME = "id";

    /**
     * The name of the Document field holding the Resource identifier. The value
     * stored in this field is either a URI or a BNode ID.
     */
    public static final String URI_FIELD_NAME = "uri";

    /**
     * The name of the Document field that holds multiple text values of a
     * Resource. The field is called "text", as it contains all text, but was
     * called "ALL" during the discussion. For each statement-literal of the
     * resource, the object literal is stored in a field using the
     * predicate-literal and additionally in a TEXT_FIELD_NAME-literal field. The
     * reasons are given in the documentation of
     * {@link #addProperty(String, String, Document)}
     */
    public static final String TEXT_FIELD_NAME = "text";

    /**
     * The name of the Document field holding the context identifer(s).
     */
    public static final String CONTEXT_FIELD_NAME = "context";

    /**
     * the null context
     */
    public static final String CONTEXT_NULL = "null";

    /**
     * String used to prefix BNode IDs with so that we can distinguish BNode
     * fields from URI fields in Documents. The prefix is chosen so that it is
     * invalid as a (part of a) URI scheme.
     */
    public static final String BNODE_ID_PREFIX = "!";

    private static final List<String> REJECTED_DATATYPES = new ArrayList<String>();

    static {
        REJECTED_DATATYPES.add("http://www.w3.org/2001/XMLSchema#float");
    };

    static {
        // do NOT set this to Integer.MAX_VALUE, because this breaks fuzzy queries
        BooleanQuery.setMaxClauseCount(1024 * 1024);
    }

    private final Logger logger = LoggerFactory.getLogger(getClass());

    /**
     * The Directory that holds the Lucene index files.
     */
    private final Directory directory;

    /**
     * The Analyzer used to tokenize strings and queries.
     */
    private final Analyzer analyzer;

    private final Analyzer queryAnalyzer;

    /**
     * The IndexWriter that can be used to alter the index' contents. Created
     * lazily.
     */
    private IndexWriter indexWriter;

    /**
     * This holds IndexReader and IndexSearcher.
     */
    protected ReaderMonitor currentMonitor;

    /**
     * keep a lit of old monitors that are still iterating but not closed (open
     * iterators), will be all closed on shutdown items are removed from list by
     * ReaderMnitor.endReading() when closing
     */
    protected final Collection<ReaderMonitor> oldmonitors = new LinkedList<ReaderMonitor>();

    /**
     * Creates a new LuceneIndex.
     * 
     * @param directory
     *        The Directory in which an index can be found and/or in which index
     *        files are written.
     * @param analyzer
     *        The Analyzer that will be used for tokenizing strings to index and
     *        queries.
     * @throws IOException
     *         When the Directory could not be unlocked.
     */
    public LuceneIndex(Directory directory, Analyzer analyzer) throws IOException {
        this.directory = directory;
        this.analyzer = analyzer;
        this.queryAnalyzer = new StandardAnalyzer(Version.LUCENE_35);

        // get rid of any locks that may have been left by previous (crashed)
        // sessions
        if (IndexWriter.isLocked(directory)) {
            logger.info("unlocking directory {}", directory);
            IndexWriter.unlock(directory);
        }

        // do some initialization for new indices
        if (!IndexReader.indexExists(directory)) {
            logger.info("creating new Lucene index in directory {}", directory);
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_35, analyzer);
            indexWriterConfig.setOpenMode(OpenMode.CREATE);
            IndexWriter writer = new IndexWriter(directory, indexWriterConfig);
            writer.close();
        }
    }

    // //////////////////////////////// Setters and getters

    public Directory getDirectory() {
        return directory;
    }

    public Analyzer getAnalyzer() {
        return analyzer;
    }

    // //////////////////////////////// Methods for controlled index access
    // For quick'n'easy access to reader, the indexreader is returned directly
    // result LuceneQueryIterators use the more elaborate
    // ReaderMonitor directly to be able to close the reader when they
    // are done.

    public IndexReader getIndexReader() throws IOException {
        return getCurrentMonitor().getIndexReader();
    }

    public IndexSearcher getIndexSearcher() throws IOException {
        return getCurrentMonitor().getIndexSearcher();
    }

    /**
     * Current monitor holds instance of IndexReader and IndexSearcher It is used
     * to keep track of readers
     */
    public ReaderMonitor getCurrentMonitor() {
        if (currentMonitor == null)
            currentMonitor = new ReaderMonitor(this, directory);
        return currentMonitor;
    }

    public IndexWriter getIndexWriter() throws IOException {

        if (indexWriter == null) {
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_35, analyzer);
            indexWriter = new IndexWriter(directory, indexWriterConfig);
        }
        return indexWriter;
    }

    public void shutDown() throws IOException {
        // try-finally setup ensures that closing of an instance is not skipped
        // when an earlier instance resulted in an IOException
        // FIXME: is there a more elegant way to ensure this?

        // This close oldMonitors which hold InderReader and IndexSeracher
        // Monitor close IndexReader and IndexSearcher
        if (currentMonitor != null) {
            currentMonitor.doClose();
            currentMonitor = null;
        }
        if (oldmonitors.size() > 0) {
            logger.warn(
                    "LuceneSail: On shutdown {} IndexReaders were not closed. This is due to non-closed Query Iterators, which must be closed!",
                    oldmonitors.size());
        }
        for (ReaderMonitor monitor : oldmonitors) {
            monitor.doClose();
        }
        oldmonitors.clear();

        try {
            if (indexWriter != null) {
                indexWriter.close();
            }
        } finally {
            indexWriter = null;
        }
    }

    // //////////////////////////////// Methods for updating the index

    /**
     * Indexes the specified Statement.
     */
    public synchronized void addStatement(Statement statement) throws IOException {
        // determine stuff to store
        Value object = statement.getObject();
        if (!(object instanceof Literal)) {
            return;
        }

        String field = statement.getPredicate().toString();
        String text = ((Literal) object).getLabel();
        String context = getContextID(statement.getContext());
        boolean updated = false;
        IndexWriter writer = null;

        // fetch the Document representing this Resource
        String resourceId = getResourceID(statement.getSubject());
        String contextId = getContextID(statement.getContext());

        String id = formIdString(resourceId, contextId);
        Term idTerm = new Term(ID_FIELD_NAME, id);
        Document document = getDocument(idTerm);

        if (document == null) {
            // there is no such Document: create one now
            document = new Document();
            addID(id, document);
            addResourceID(resourceId, document);
            // add context
            addContext(context, document);

            addProperty(field, text, document);

            // add it to the index
            writer = getIndexWriter();
            writer.addDocument(document);
            updated = true;
        } else {
            // update this Document when this triple has not been stored already
            if (!hasProperty(field, text, document)) {
                // create a copy of the old document; updating the retrieved
                // Document instance works ok for stored properties but indexed data
                // gets lots when doing an IndexWriter.updateDocument with it
                Document newDocument = new Document();

                // add all existing fields (including id, uri, context, and text)
                for (Object oldFieldObject : document.getFields()) {
                    Field oldField = (Field) oldFieldObject;
                    newDocument.add(oldField);
                }

                // add the new triple to the cloned document
                addProperty(field, text, newDocument);

                // update the index with the cloned document
                writer = getIndexWriter();
                writer.updateDocument(idTerm, newDocument);
                updated = true;
            }
        }

        if (updated) {
            // make sure that these updates are visible for new
            // IndexReaders/Searchers
            writer.commit();

            // the old IndexReaders/Searchers are not outdated
            invalidateReaders();
        }
    }

    /**
     * Returns whether the provided literal is accepted by the LuceneIndex to be
     * indexed. It for instance does not make much since to index xsd:float.
     * 
     * @param literal
     *        the literal to be accepted
     * @return true if the given literal will be indexed by this LuceneIndex
     */
    public boolean accept(Literal literal) {
        // we reject null literals
        if (literal == null)
            return false;

        // we reject literals that are in the list of rejected data types
        if ((literal.getDatatype() != null) && (REJECTED_DATATYPES.contains(literal.getDatatype().stringValue())))
            return false;

        return true;
    }

    /**
     * Add the "context" value to the doc
     * 
     * @param context
     *        the context or null, if null-context
     * @param document
     *        the document
     * @param ifNotExists
     *        check if this context exists
     */
    private void addContext(String context, Document document) {
        if (context != null) {
            document.add(
                    new Field(CONTEXT_FIELD_NAME, context, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
        }
    }

    /**
     * Returns the String ID corresponding with the specified Resource. The id
     * string is either the URI or a bnode prefixed with a "!".
     */
    private String getResourceID(Resource resource) {
        if (resource instanceof IRI) {
            return resource.toString();
        } else if (resource instanceof BNode) {
            return BNODE_ID_PREFIX + ((BNode) resource).getID();
        } else {
            throw new IllegalArgumentException("Unknown Resource type: " + resource);
        }
    }

    /**
     * Get the ID for a context. Context can be null, then the "null" string is
     * returned
     * 
     * @param resource
     *        the context
     * @return a string
     */
    private String getContextID(Resource resource) {
        if (resource == null)
            return CONTEXT_NULL;
        else
            return getResourceID(resource);
    }

    /**
     * Returns a Document representing the specified document ID (combination of
     * resource and context), or null when no such Document exists yet.
     */
    private Document getDocument(Term idTerm) throws IOException {
        IndexReader reader = getIndexReader();
        TermDocs termDocs = reader.termDocs(idTerm);

        try {
            if (termDocs.next()) {
                // return the Document and make sure there are no others
                int docNr = termDocs.doc();
                if (termDocs.next()) {
                    throw new RuntimeException("Multiple Documents for resource " + idTerm.text());
                }

                return reader.document(docNr);
            } else {
                // no such Document
                return null;
            }
        } finally {
            termDocs.close();
        }
    }

    private String formIdString(String resourceId, String contextId) {
        StringBuilder idBuilder = new StringBuilder(resourceId);
        idBuilder.append("|");
        idBuilder.append(contextId);
        return idBuilder.toString();
    }

    private Term formIdTerm(String resourceId, String contextId) {
        return new Term(ID_FIELD_NAME, formIdString(resourceId, contextId));
    }

    /**
     * Returns a list of Documents representing the specified Resource (empty
     * when no such Document exists yet). Each document represent a set of
     * statements with the specified Resource as a subject, which are stored in a
     * specific context
     */
    private List<Document> getDocuments(Term uriTerm) throws IOException {

        List<Document> result = new LinkedList<Document>();

        IndexReader reader = getIndexReader();
        TermDocs termDocs = reader.termDocs(uriTerm);

        try {
            while (termDocs.next()) {
                int docNr = termDocs.doc();
                result.add(reader.document(docNr));
            }
        } finally {
            termDocs.close();
        }

        return result;
    }

    /**
     * Returns a Document representing the specified Resource & Context
     * combination, or null when no such Document exists yet.
     */
    public Document getDocument(Resource subject, Resource context) throws IOException {
        // fetch the Document representing this Resource
        String resourceId = getResourceID(subject);
        String contextId = getContextID(context);
        Term idTerm = formIdTerm(resourceId, contextId);
        return getDocument(idTerm);
    }

    /**
     * Returns a list of Documents representing the specified Resource (empty
     * when no such Document exists yet). Each document represent a set of
     * statements with the specified Resource as a subject, which are stored in a
     * specific context
     */
    public List<Document> getDocuments(Resource subject) throws IOException {
        String resourceId = getResourceID(subject);
        Term uriTerm = new Term(URI_FIELD_NAME, resourceId);
        return getDocuments(uriTerm);
    }

    /**
     * Checks whether a field occurs with a specified value in a Document.
     */
    private boolean hasProperty(String fieldName, String value, Document document) {
        Field[] fields = document.getFields(fieldName);
        if (fields != null) {
            for (Field field : fields) {
                if (value.equals(field.stringValue())) {
                    return true;
                }
            }
        }

        return false;
    }

    /**
     * Determines whether the specified field name is a property field name.
     */
    private boolean isPropertyField(String fieldName) {
        return !ID_FIELD_NAME.equals(fieldName) && !URI_FIELD_NAME.equals(fieldName)
                && !TEXT_FIELD_NAME.equals(fieldName) && !CONTEXT_FIELD_NAME.equals(fieldName);
    }

    /**
     * Determines the number of properties stored in a Document.
     */
    private int numberOfPropertyFields(Document document) {
        // count the properties that are NOT id nor context nor text
        int propsize = 0;
        for (Object o : document.getFields()) {
            Field f = (Field) o;
            if (isPropertyField(f.name()))
                propsize++;
        }
        return propsize;
    }

    /**
     * Filters the given list of fields, retaining all property fields.
     */
    public Fieldable[] getPropertyFields(List<Fieldable> fields) {
        List<Fieldable> result = new ArrayList<Fieldable>();
        for (Fieldable field : fields) {
            if (isPropertyField(field.name()))
                result.add(field);
        }
        return result.toArray(new Fieldable[result.size()]);
    }

    /**
     * Stores and indexes an ID in a Document.
     */
    private void addID(String id, Document document) {
        document.add(new Field(ID_FIELD_NAME, id, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
    }

    /**
     * Stores and indexes the resource ID in a Document.
     */
    private void addResourceID(String resourceId, Document document) {
        document.add(new Field(URI_FIELD_NAME, resourceId, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
    }

    private String getLiteralPropertyValueAsString(Statement statement) {
        Value object = statement.getObject();
        if (!(object instanceof Literal)) {
            return null;
        }
        return ((Literal) object).getLabel();
    }

    /**
     * check if the passed statement should be added (is it indexed? is it
     * stored?) and add it as predicate to the passed document. No checks whether
     * the predicate was already there.
     * 
     * @param statement
     *        the statement to add
     * @param document
     *        the document to add to
     */
    private void addProperty(Statement statement, Document document) {
        String text = getLiteralPropertyValueAsString(statement);
        if (text == null)
            return;
        String field = statement.getPredicate().toString();
        addProperty(field, text, document);
    }

    /**
     * Stores and indexes a property in a Document. We don't have to recalculate
     * the concatenated text: just add another TEXT field and Lucene will take
     * care of this. Additional advantage: Lucene may be able to handle the
     * invididual strings in a way that may affect e.g. phrase and proximity
     * searches (concatenation basically means loss of information). NOTE: The
     * TEXT_FIELD_NAME has to be stored, see in LuceneSail
     * 
     * @see LuceneSail
     */
    private void addProperty(String predicate, String text, Document document) {
        // store this predicate
        document.add(new Field(predicate, text, Field.Store.YES, Field.Index.ANALYZED));

        // and in TEXT_FIELD_NAME
        document.add(new Field(TEXT_FIELD_NAME, text, Field.Store.YES, Field.Index.ANALYZED));
    }

    /**
     * invalidate readers, free them if possible (readers that are still open by
     * a {@link LuceneQueryConnection} will not be closed. Synchronized on
     * oldmonitors because it manipulates them
     * 
     * @throws IOException
     */
    private void invalidateReaders() throws IOException {
        synchronized (oldmonitors) {
            // Move current monitor to old monitors and set null
            if (currentMonitor != null)
                // we do NOT close it directly as it may be used by an open result
                // iterator, hence moving it to the
                // list of oldmonitors where it is handled as other older monitors
                oldmonitors.add(currentMonitor);
            currentMonitor = null;

            // close all monitors if possible
            for (Iterator<ReaderMonitor> i = oldmonitors.iterator(); i.hasNext();) {
                ReaderMonitor monitor = i.next();
                if (monitor.closeWhenPossible()) {
                    i.remove();
                }
            }

            // check if all readers were closed
            if (oldmonitors.isEmpty()) {
                logger.debug("Deleting unused files from Lucene index");

                // clean up unused files (marked as 'deletable' in Luke Filewalker)
                getIndexWriter().deleteUnusedFiles();

                // logIndexStats();
            }
        }
    }

    private void logIndexStats() {
        try {
            IndexReader reader = null;
            try {
                reader = getIndexReader();

                Document doc;
                int totalFields = 0;

                Set<String> ids = new HashSet<String>();
                String[] idArray;
                int count = 0;
                for (int i = 0; i < reader.maxDoc(); i++) {
                    if (reader.isDeleted(i))
                        continue;
                    doc = reader.document(i);
                    totalFields += doc.getFields().size();
                    count++;
                    idArray = doc.getValues("id");
                    for (String id : idArray)
                        ids.add(id);

                }

                logger.info("Total documents in the index: " + reader.numDocs()
                        + ", number of deletable documents in the index: " + reader.numDeletedDocs()
                        + ", valid documents: " + count + ", total fields in all documents: " + totalFields
                        + ", average number of fields per document: " + ((double) totalFields) / reader.numDocs());
                logger.info("Distinct ids in the index: " + ids.size());

            } finally {
                if (currentMonitor != null) {
                    currentMonitor.closeWhenPossible();
                    currentMonitor = null;
                }
            }
        } catch (IOException e) {
            logger.warn(e.getMessage(), e);
        }

    }

    public synchronized void removeStatement(Statement statement) throws IOException {
        Value object = statement.getObject();
        if (!(object instanceof Literal)) {
            return;
        }

        IndexWriter writer = null;
        boolean updated = false;

        // fetch the Document representing this Resource
        String resourceId = getResourceID(statement.getSubject());
        String contextId = getContextID(statement.getContext());
        String id = formIdString(resourceId, contextId);
        Term idTerm = new Term(ID_FIELD_NAME, id);

        Document document = getDocument(idTerm);

        if (document != null) {
            // determine the values used in the index for this triple
            String fieldName = statement.getPredicate().toString();
            String text = ((Literal) object).getLabel();

            // see if this triple occurs in this Document
            if (hasProperty(fieldName, text, document)) {
                // if the Document only has one predicate field, we can remove the
                // document
                int nrProperties = numberOfPropertyFields(document);
                if (nrProperties == 0) {
                    logger.info("encountered document with zero properties, should have been deleted: {}",
                            resourceId);
                } else if (nrProperties == 1) {
                    writer = getIndexWriter();
                    writer.deleteDocuments(idTerm);
                    updated = true;
                } else {
                    // there are more triples encoded in this Document: remove the
                    // document and add a new Document without this triple
                    Document newDocument = new Document();
                    addID(id, newDocument);
                    addResourceID(resourceId, newDocument);
                    addContext(contextId, newDocument);

                    for (Object oldFieldObject : document.getFields()) {
                        Field oldField = (Field) oldFieldObject;
                        String oldFieldName = oldField.name();
                        String oldValue = oldField.stringValue();

                        if (isPropertyField(oldFieldName)
                                && !(fieldName.equals(oldFieldName) && text.equals(oldValue))) {
                            addProperty(oldFieldName, oldValue, newDocument);
                        }
                    }

                    writer = getIndexWriter();
                    writer.updateDocument(idTerm, newDocument);
                    updated = true;
                }
            }
        }

        if (updated) {
            // make sure that these updates are visible for new
            // IndexReaders/Searchers
            writer.commit();

            // the old IndexReaders/Searchers are not outdated
            invalidateReaders();
        }
    }

    /**
     * Commits any changes done to the LuceneIndex since the last commit. The
     * semantics is synchronous to SailConnection.commit(), i.e. the LuceneIndex
     * should be committed/rollbacked whenever the LuceneSailConnection is
     * committed/rollbacked.
     */
    public void commit() throws IOException {
        // FIXME: implement
    }

    public void rollback() throws IOException {
        // FIXME: implement
    }

    // //////////////////////////////// Methods for querying the index

    /**
     * Returns the Resource corresponding with the specified Document number.
     * Note that all of Lucene's restrictions of using document numbers apply.
     */
    public Resource getResource(int documentNumber) throws IOException {
        Document document = getIndexSearcher().doc(documentNumber, URI_FIELD_SELECTOR);
        return document == null ? null : getResource(document);
    }

    /**
     * Returns the Resource corresponding with the specified Document.
     */
    public Resource getResource(Document document) {
        String idString = document.get(URI_FIELD_NAME);
        return getResource(idString);
    }

    /**
     * Parses an id-string (a serialized resource) back to a resource Inverse
     * method of {@link #getResourceID(Resource)}
     * 
     * @param idString
     */
    private Resource getResource(String idString) {
        if (idString.startsWith(BNODE_ID_PREFIX)) {
            return new SimpleBNode(idString.substring(BNODE_ID_PREFIX.length()));
        } else {
            return new SimpleIRI(idString);
        }
    }

    private String getContextID(Document document) {
        return document.get(CONTEXT_FIELD_NAME);
    }

    // /**
    // * Parses an id-string used for a context filed (a serialized resource)
    // back to a resource.
    // * <b>CAN RETURN NULL</b>
    // * Inverse method of {@link #getResourceID(Resource)}
    // * @param idString
    // * @return null if the passed idString was the {@link #CONTEXT_NULL}
    // constant
    // */
    // private Resource getContextResource(String idString) {
    // if (CONTEXT_NULL.equals(idString))
    // return null;
    // else
    // return getResource(idString);
    // }

    /**
     * Evaluates the given query and returns the results as a TopDocs instance.
     */
    public TopDocs search(String query) throws ParseException, IOException {
        return search(getQueryParser(null).parse(query));
    }

    /**
     * Evaluates the given query only for the given resource.
     */
    public TopDocs search(Resource resource, Query query) throws ParseException, IOException {
        // rewrite the query
        TermQuery idQuery = new TermQuery(new Term(URI_FIELD_NAME, getResourceID(resource)));
        BooleanQuery combinedQuery = new BooleanQuery();
        combinedQuery.add(idQuery, Occur.MUST);
        combinedQuery.add(query, Occur.MUST);
        int nDocs = Math.max(getIndexReader().numDocs(), 1);
        TopDocs hits = getIndexSearcher().search(combinedQuery, nDocs);

        // Now this is ok
        // if(hits.totalHits > 1)
        // logger.info("More than one Lucene doc was found with {} == {}",
        // ID_FIELD_NAME, getID(resource));

        return hits;
    }

    /**
     * Parse the passed query.
     * 
     * @param query
     *        string
     * @return the parsed query
     * @throws ParseException
     *         when the parsing brakes
     */
    public Query parseQuery(String query, IRI propertyURI) throws ParseException {
        return getQueryParser(propertyURI).parse(query);
    }

    /**
     * Evaluates the given query and returns the results as a TopDocs instance.
     */
    public TopDocs search(Query query) throws IOException {
        int nDocs = Math.max(getIndexReader().numDocs(), 1);
        return getIndexSearcher().search(query, nDocs);
    }

    /**
     * Gets the score for a particular Resource and query. Returns a value < 0
     * when the Resource does not match the query.
     */
    public float getScore(Resource resource, String query, IRI propertyURI) throws ParseException, IOException {
        return getScore(resource, getQueryParser(propertyURI).parse(query));
    }

    /**
     * Gets the score for a particular Resource and query. Returns a value < 0
     * when the Resource does not match the query.
     */
    public float getScore(Resource resource, Query query) throws IOException {
        // rewrite the query
        TermQuery idQuery = new TermQuery(new Term(URI_FIELD_NAME, getResourceID(resource)));
        BooleanQuery combinedQuery = new BooleanQuery();
        combinedQuery.add(idQuery, Occur.MUST);
        combinedQuery.add(query, Occur.MUST);
        IndexSearcher searcher = getIndexSearcher();

        // fetch the score when the URI matches the original query
        TopDocs docs = searcher.search(combinedQuery, null, 1);
        if (docs.totalHits == 0) {
            return -1f;
        } else {
            return docs.scoreDocs[0].score;
        }
    }

    private QueryParser getQueryParser(IRI propertyURI) {
        // check out which query parser to use, based on the given property URI
        if (propertyURI == null)
            // if we have no property given, we create a default query parser which
            // has the TEXT_FIELD_NAME as the default field
            return new QueryParser(Version.LUCENE_35, TEXT_FIELD_NAME, this.queryAnalyzer);
        else
            // otherwise we create a query parser that has the given property as
            // the default field
            return new QueryParser(Version.LUCENE_35, propertyURI.toString(), this.queryAnalyzer);
    }

    /**
     * Add many statements at the same time, remove many statements at the same
     * time. Ordering by resource has to be done inside this method. The passed
     * added/removed sets are disjunct, no statement can be in both
     * 
     * @param added
     *        all added statements, can have multiple subjects
     * @param removed
     *        all removed statements, can have multiple subjects
     */
    public synchronized void addRemoveStatements(Collection<Statement> added, Collection<Statement> removed)
            throws Exception {
        // Buffer per resource
        MapOfListMaps<Resource, String, Statement> rsAdded = new MapOfListMaps<Resource, String, Statement>();
        MapOfListMaps<Resource, String, Statement> rsRemoved = new MapOfListMaps<Resource, String, Statement>();

        HashSet<Resource> resources = new HashSet<Resource>();
        for (Statement s : added) {
            rsAdded.add(s.getSubject(), getContextID(s.getContext()), s);
            resources.add(s.getSubject());
        }
        for (Statement s : removed) {
            rsRemoved.add(s.getSubject(), getContextID(s.getContext()), s);
            resources.add(s.getSubject());
        }

        logger.debug("Removing " + removed.size() + " statements, adding " + added.size() + " statements");

        IndexWriter writer = getIndexWriter();

        // for each resource, add/remove
        for (Resource resource : resources) {
            Map<String, List<Statement>> stmtsToRemove = rsRemoved.get(resource);
            Map<String, List<Statement>> stmtsToAdd = rsAdded.get(resource);

            Set<String> contextsToUpdate = new HashSet<String>(stmtsToAdd.keySet());
            contextsToUpdate.addAll(stmtsToRemove.keySet());

            Map<String, Document> docsByContext = new HashMap<String, Document>();
            // is the resource in the store?
            // fetch the Document representing this Resource
            String resourceId = getResourceID(resource);
            Term uriTerm = new Term(URI_FIELD_NAME, resourceId);
            List<Document> documents = getDocuments(uriTerm);

            for (Document doc : documents) {
                docsByContext.put(this.getContextID(doc), doc);
            }

            for (String contextId : contextsToUpdate) {
                String id = formIdString(resourceId, contextId);

                Term idTerm = new Term(ID_FIELD_NAME, id);
                Document document = docsByContext.get(contextId);
                if (document == null) {
                    // there are no such Documents: create one now
                    document = new Document();
                    addID(id, document);
                    addResourceID(resourceId, document);
                    addContext(contextId, document);
                    // add all statements, remember the contexts
                    // HashSet<Resource> contextsToAdd = new HashSet<Resource>();
                    List<Statement> list = stmtsToAdd.get(contextId);
                    if (list != null) {
                        for (Statement s : list) {
                            addProperty(s, document);
                        }
                    }

                    // add it to the index
                    writer.addDocument(document);

                    // THERE SHOULD BE NO DELETED TRIPLES ON A NEWLY ADDED RESOURCE
                    if (stmtsToRemove.containsKey(contextId))
                        logger.info(
                                "Statements are marked to be removed that should not be in the store, for resource {} and context {}. Nothing done.",
                                resource, contextId);
                } else {
                    // update the Document

                    // create a copy of the old document; updating the retrieved
                    // Document instance works ok for stored properties but indexed
                    // data
                    // gets lots when doing an IndexWriter.updateDocument with it
                    Document newDocument = new Document();

                    // buffer the removed literal statements
                    ListMap<String, String> removedOfResource = null;
                    {
                        List<Statement> removedStatements = stmtsToRemove.get(contextId);
                        if (removedStatements != null && !removedStatements.isEmpty()) {
                            removedOfResource = new ListMap<String, String>();
                            for (Statement r : removedStatements) {
                                if (r.getObject() instanceof Literal) {
                                    // remove value from both property field and the
                                    // corresponding text field
                                    String label = ((Literal) r.getObject()).getLabel();
                                    removedOfResource.put(r.getPredicate().toString(), label);
                                    removedOfResource.put(TEXT_FIELD_NAME, label);
                                }
                            }
                        }
                    }

                    // add all existing fields (including id, uri, context, and text)
                    // but without adding the removed ones
                    // keep the predicate/value pairs to ensure that the statement
                    // cannot be added twice
                    SetMap<String, String> copiedProperties = new SetMap<String, String>();
                    for (Object oldFieldObject : document.getFields()) {
                        Field oldField = (Field) oldFieldObject;
                        // do not copy removed statements to the new version of the
                        // document
                        if (removedOfResource != null) {
                            // which fields were removed?
                            List<String> objectsRemoved = removedOfResource.get(oldField.name());
                            if ((objectsRemoved != null) && (objectsRemoved.contains(oldField.stringValue())))
                                continue;
                        }
                        newDocument.add(oldField);
                        copiedProperties.put(oldField.name(), oldField.stringValue());
                    }

                    // add all statements to this document, except for those which
                    // are already there
                    {
                        List<Statement> addedToResource = stmtsToAdd.get(contextId);
                        String val;
                        if (addedToResource != null && !addedToResource.isEmpty()) {
                            for (Statement s : addedToResource) {
                                val = getLiteralPropertyValueAsString(s);
                                if (val != null) {
                                    if (!copiedProperties.containsKeyValuePair(s.getPredicate().stringValue(),
                                            val)) {
                                        addProperty(s, newDocument);
                                    }
                                }
                            }
                        }
                    }

                    // update the index with the cloned document, if it contains any
                    // meaningful non-system properties
                    int nrProperties = numberOfPropertyFields(newDocument);
                    if (nrProperties > 0) {
                        writer.updateDocument(idTerm, newDocument);
                    } else {
                        writer.deleteDocuments(idTerm);
                    }
                }
            }
        }
        // make sure that these updates are visible for new
        // IndexReaders/Searchers
        writer.commit();

        // the old IndexReaders/Searchers are not outdated
        invalidateReaders();

    }

    /**
     * @param contexts
     * @param sail
     *        - the underlying native sail where to read the missing triples from
     *        after deletion
     * @throws SailException
     */
    public synchronized void clearContexts(Resource[] contexts, Sail sail) throws IOException, SailException {

        // logger.warn("Clearing contexts operation did not change the index: contexts are not indexed at the moment");

        logger.debug("deleting contexts: {}", Arrays.toString(contexts));
        // these resources have to be read from the underlying rdf store
        // and their triples have to be added to the luceneindex after deletion of
        // documents
        // HashSet<Resource> resourcesToUpdate = new HashSet<Resource>();

        // remove all contexts passed
        for (Resource context : contexts) {
            // attention: context can be NULL!
            String contextString = getContextID(context);
            Term contextTerm = new Term(CONTEXT_FIELD_NAME, contextString);
            // IndexReader reader = getIndexReader();

            // now check all documents, and remember the URI of the resources
            // that were in multiple contexts
            // TermDocs termDocs = reader.termDocs(contextTerm);
            // try {
            // while (termDocs.next()) {
            // Document document = reader.document(termDocs.doc());
            // // does this document have any other contexts?
            // Field[] fields = document.getFields(CONTEXT_FIELD_NAME);
            // for (Field f : fields)
            // {
            // if
            // (!contextString.equals(f.stringValue())&&!f.stringValue().equals("null"))
            // // there is another context
            // {
            // logger.debug("test new contexts: {}", f.stringValue());
            // // is it in the also contexts (lucky us if it is)
            // Resource otherContextOfDocument =
            // getContextResource(f.stringValue()); // can return null
            // boolean isAlsoDeleted = false;
            // for (Resource c: contexts){
            // if (c==null) {
            // if (otherContextOfDocument == null)
            // isAlsoDeleted = true;
            // } else
            // if (c.equals(otherContextOfDocument))
            // isAlsoDeleted = true;
            // }
            // // the otherContextOfDocument is now eihter marked for deletion or
            // not
            // if (!isAlsoDeleted) {
            // // get ID of document
            // Resource r = getResource(document);
            // resourcesToUpdate.add(r);
            // }
            // }
            // }
            // }
            // } finally {
            // termDocs.close();
            // }

            // now delete all documents from the deleted context
            getIndexWriter().deleteDocuments(contextTerm);
        }

        // now add those again, that had other contexts also.
        // SailConnection con = sail.getConnection();
        // try {
        // // for each resource, add all
        // for (Resource resource : resourcesToUpdate) {
        // logger.debug("re-adding resource {}", resource);
        // ArrayList<Statement> toAdd = new ArrayList<Statement>();
        // CloseableIteration<? extends Statement, SailException> it =
        // con.getStatements(resource, null, null, false);
        // while (it.hasNext()) {
        // Statement s = it.next();
        // toAdd.add(s);
        // }
        // addDocument(resource, toAdd);
        // }
        // } finally {
        // con.close();
        // }
        getIndexWriter().commit();
        invalidateReaders();

    }

    /**
     * Add a complete Lucene Document based on these statements. Do not search
     * for an existing document with the same subject id. (assume the existing
     * document was deleted)
     * 
     * @param statements
     *        the statements that make up the resource
     * @throws IOException
     */
    public synchronized void addDocuments(Resource subject, List<Statement> statements) throws IOException {

        String resourceId = getResourceID(subject);

        SetMap<String, Statement> stmtsByContextId = new SetMap<String, Statement>();

        String contextId;
        for (Statement statement : statements) {
            contextId = getContextID(statement.getContext());

            stmtsByContextId.put(contextId, statement);
        }

        IndexWriter writer = getIndexWriter();
        for (Entry<String, Set<Statement>> entry : stmtsByContextId.entrySet()) {
            // create a new document
            Document document = new Document();

            String id = formIdString(resourceId, entry.getKey());
            addID(id, document);
            addResourceID(resourceId, document);
            addContext(entry.getKey(), document);

            for (Statement stmt : entry.getValue()) {
                // determine stuff to store
                addProperty(stmt, document);
            }
            // add it to the index
            writer.addDocument(document);
        }

    }

    /**
     * 
     */
    public synchronized void clear() throws IOException {
        // clear
        // the old IndexReaders/Searchers are not outdated
        invalidateReaders();
        if (indexWriter != null)
            indexWriter.close();

        // crate new writer
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_35, analyzer);
        indexWriterConfig.setOpenMode(OpenMode.CREATE);
        indexWriter = new IndexWriter(directory, indexWriterConfig);
        indexWriter.close();
        indexWriter = null;

    }

}