org.geotoolkit.lucene.index.AbstractIndexer.java Source code

Java tutorial

Introduction

Here is the source code for org.geotoolkit.lucene.index.AbstractIndexer.java

Source

/*
 *    Geotoolkit - An Open Source Java GIS Toolkit
 *    http://www.geotoolkit.org
 *
 *    (C) 2007 - 2009, Geomatys
 *
 *    This library is free software; you can redistribute it and/or
 *    modify it under the terms of the GNU Lesser General Public
 *    License as published by the Free Software Foundation; either
 *    version 3 of the License, or (at your option) any later version.
 *
 *    This library is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *    Lesser General Public License for more details.
 */

package org.geotoolkit.lucene.index;

import java.io.IOException;
import java.nio.file.DirectoryStream;
import java.sql.SQLException;
import java.util.*;
import java.util.logging.Level;

// JTS dependencies
import com.vividsolutions.jts.geom.*;
import java.nio.file.Files;
import java.nio.file.Path;

// Apache Lucene dependencies
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.sis.util.ArgumentChecks;
import org.geotoolkit.geometry.jts.JTS;

// Geotoolkit dependencies
import org.geotoolkit.index.tree.StoreIndexException;
import org.geotoolkit.index.tree.TreeElementMapper;
import org.geotoolkit.index.tree.manager.NamedEnvelope;
import org.geotoolkit.index.tree.manager.SQLRtreeManager;
import org.geotoolkit.io.wkb.WKBUtils;
import org.geotoolkit.index.IndexingException;
import org.geotoolkit.lucene.LuceneUtils;
import org.geotoolkit.lucene.filter.LuceneOGCFilter;

import org.geotoolkit.nio.IOUtilities;
import org.geotoolkit.util.collection.CloseableIterator;
import org.opengis.geometry.MismatchedReferenceSystemException;

// Types dependencies
import org.opengis.referencing.crs.CoordinateReferenceSystem;
import org.opengis.referencing.operation.TransformException;
import org.opengis.util.FactoryException;

/**
 * An abstract lucene Indexer used to create and writer lucene index.
 *
 * @author Mehdi Sidhoum
 * @author Guilhem Legal
 * @param <E> The object's type to insert in a document.
 * @module
 */
public abstract class AbstractIndexer<E> extends IndexLucene {

    protected static final String CORRUPTED_SINGLE_MSG = "CorruptIndexException while indexing document: ";
    protected static final String CORRUPTED_MULTI_MSG = "CorruptIndexException while indexing document: ";
    protected static final String LOCK_SINGLE_MSG = "LockObtainException while indexing document: ";
    protected static final String LOCK_MULTI_MSG = "LockObtainException while indexing documents.";
    protected static final String IO_SINGLE_MSG = "IOException while indexing document: ";

    /**
     * A flag use in child constructor.
     */
    private boolean needCreation;

    /**
     * A flag to stop the indexation going on
     */
    protected static boolean stopIndexing = false;

    /**
     * A list of services id
     */
    protected static final List<String> indexationToStop = new ArrayList<>();

    /**
     * Map of fieldName / Number type.
     */
    private final Map<String, String> numericFields = new HashMap<>();

    /**
     * Build a new Indexer witch create an index in the specified directory,
     * with the specified analyzer.
     *
     * @param indexID
     * @param configDirectory
     * @param analyzer
     */
    public AbstractIndexer(final String indexID, final Path configDirectory, final Analyzer analyzer) {
        super(analyzer);
        ArgumentChecks.ensureNonNull("indexID", indexID);
        ArgumentChecks.ensureNonNull("configDirectory", configDirectory);
        try {
            // we get the last index directory
            long maxTime = 0;
            Path currentIndexDirectory = null;
            if (Files.exists(configDirectory) && Files.isDirectory(configDirectory)) {

                try (final DirectoryStream<Path> dirStream = Files.newDirectoryStream(configDirectory)) {
                    for (Path indexDirectory : dirStream) {
                        String suffix = indexDirectory.getFileName().toString();
                        suffix = suffix.substring(suffix.lastIndexOf('-') + 1);
                        try {
                            long currentTime = Long.parseLong(suffix);
                            if (currentTime > maxTime) {
                                maxTime = currentTime;
                                currentIndexDirectory = indexDirectory;
                            }
                        } catch (NumberFormatException ex) {
                            LOGGER.log(Level.WARNING, "Unable to parse the timestamp:{0}", suffix);
                        }
                    }
                }
            }

            if (currentIndexDirectory == null) {
                currentIndexDirectory = configDirectory.resolve(indexID + "index-" + System.currentTimeMillis());
                Files.createDirectories(currentIndexDirectory);
                needCreation = true;
                setFileDirectory(currentIndexDirectory);
            } else {
                LOGGER.finer("Index already created.");
                deleteOldIndexDir(configDirectory, indexID, currentIndexDirectory.getFileName().toString());
                // must be set before reading tree
                setFileDirectory(currentIndexDirectory);
                needCreation = false;
            }
            rTree = SQLRtreeManager.get(currentIndexDirectory, this);
        } catch (IOException ex) {
            throw new RuntimeException(ex);
        }
    }

    /**
     * Build a new Indexer witch create an index in the specified directory,
     * with a Keyword analyzer.
     *
     * @param indexID
     * @param configDirectory
     */
    public AbstractIndexer(final String indexID, final Path configDirectory) {
        this(indexID, configDirectory, null);
    }

    public boolean needCreation() {
        return needCreation;
    }

    /**
     * Replace the precedent index directory by another pre-generated.
     */
    private void deleteOldIndexDir(final Path configDirectory, final String serviceID, final String currentDirName)
            throws IOException {
        try (final DirectoryStream<Path> dirStream = Files.newDirectoryStream(configDirectory)) {
            for (Path indexDirectory : dirStream) {
                if (isIndexDir(indexDirectory, serviceID)) {
                    final String dirName = indexDirectory.getFileName().toString();
                    if (!dirName.equals(currentDirName)) {
                        IOUtilities.deleteRecursively(indexDirectory);
                    }
                }
            }
        }
    }

    protected abstract Collection<String> getAllIdentifiers() throws IndexingException;

    protected abstract Iterator<String> getIdentifierIterator() throws IndexingException;

    protected abstract Iterator<E> getEntryIterator() throws IndexingException;

    protected abstract boolean useEntryIterator();

    protected abstract E getEntry(final String identifier) throws IndexingException;

    /**
     * Create a new Index with the specified list of object.
     *
     * @param toIndex objects to index.
     * @throws IndexingException
     */
    public void createIndex(final List<E> toIndex) throws IndexingException {
        LOGGER.log(logLevel, "Creating lucene index for please wait...");

        final long time = System.currentTimeMillis();
        int nbEntries = 0;
        try {
            final IndexWriterConfig conf = new IndexWriterConfig(analyzer);
            final IndexWriter writer = new IndexWriter(LuceneUtils.getAppropriateDirectory(getFileDirectory()),
                    conf);
            final String serviceID = getServiceID();

            resetTree();
            nbEntries = toIndex.size();
            for (E entry : toIndex) {
                if (!stopIndexing && !indexationToStop.contains(serviceID)) {
                    indexDocument(writer, entry);
                } else {
                    LOGGER.info("Index creation stopped after " + (System.currentTimeMillis() - time)
                            + " ms for service:" + serviceID);
                    stopIndexation(writer, serviceID);
                    return;
                }
            }
            // writer.optimize(); no longer justified
            writer.close();

            // we store the numeric fields in a properties file int the index directory
            storeNumericFieldsFile();

        } catch (IOException | StoreIndexException | SQLException ex) {
            LOGGER.log(Level.WARNING, IO_SINGLE_MSG, ex);
        }
        LOGGER.log(logLevel, "Index creation process in " + (System.currentTimeMillis() - time) + " ms\n"
                + " documents indexed: " + nbEntries);
    }

    /**
     * Create a new Index.
     *
     * @throws IndexingException
     */
    public void createIndex() throws IndexingException {
        LOGGER.log(logLevel, "(light memory) Creating lucene index please wait...");

        final long time = System.currentTimeMillis();
        int nbEntries = 0;
        try {
            final IndexWriterConfig conf = new IndexWriterConfig(analyzer);
            final IndexWriter writer = new IndexWriter(LuceneUtils.getAppropriateDirectory(getFileDirectory()),
                    conf);
            final String serviceID = getServiceID();

            resetTree();
            LOGGER.log(logLevel, "starting indexing...");

            if (useEntryIterator()) {
                final Iterator<E> entries = getEntryIterator();
                while (entries.hasNext()) {
                    if (!stopIndexing && !indexationToStop.contains(serviceID)) {

                        final E entry = entries.next();
                        indexDocument(writer, entry);
                        nbEntries++;

                    } else {
                        LOGGER.info("Index creation stopped after " + (System.currentTimeMillis() - time)
                                + " ms for service:" + serviceID);
                        stopIndexation(writer, serviceID);
                        return;
                    }
                }
                if (entries instanceof CloseableIterator) {
                    ((CloseableIterator) entries).close();
                }
            } else {
                final Iterator<String> identifiers = getIdentifierIterator();
                while (identifiers.hasNext()) {
                    final String identifier = identifiers.next();
                    if (!stopIndexing && !indexationToStop.contains(serviceID)) {
                        try {
                            final E entry = getEntry(identifier);
                            indexDocument(writer, entry);
                            nbEntries++;
                        } catch (IndexingException ex) {
                            LOGGER.log(Level.WARNING, "Metadata IO exeption while indexing metadata: " + identifier
                                    + " " + ex.getMessage() + "\nmove to next metadata...", ex);
                        }
                    } else {
                        LOGGER.info("Index creation stopped after " + (System.currentTimeMillis() - time)
                                + " ms for service:" + serviceID);
                        stopIndexation(writer, serviceID);
                        return;
                    }
                }
                if (identifiers instanceof CloseableIterator) {
                    ((CloseableIterator) identifiers).close();
                }
            }
            // writer.optimize(); no longer justified
            writer.close();

            // we store the numeric fields in a properties file int the index directory
            storeNumericFieldsFile();

        } catch (IOException | StoreIndexException | SQLException ex) {
            LOGGER.log(Level.SEVERE, IO_SINGLE_MSG + "{0}", ex.getMessage());
            throw new IndexingException("IOException while indexing documents:" + ex.getMessage(), ex);
        }
        LOGGER.log(logLevel, "Index creation process in " + (System.currentTimeMillis() - time)
                + " ms\n documents indexed: " + nbEntries + ".");
    }

    /**
      * Index a document from the specified object with the specified index writer.
      * Used when indexing in line many document.
      *
      * @param writer An Lucene index writer.
      * @param meta The object to index.
      */
    public void indexDocument(final IndexWriter writer, final E meta) throws IndexingException, IOException {
        final int docId = writer.maxDoc();
        //adding the document in a specific model. in this case we use a MDwebDocument.
        writer.addDocument(createDocument(meta, docId));
        LOGGER.log(Level.FINER, "Metadata: {0} indexed", getIdentifier(meta));
    }

    /**
     * This method add to index of lucene a new document.
     *
     * @param meta The object to index.
     */
    public void indexDocument(final E meta) {
        try {
            final IndexWriterConfig config = new IndexWriterConfig(analyzer);
            final IndexWriter writer = new IndexWriter(LuceneUtils.getAppropriateDirectory(getFileDirectory()),
                    config);

            final int docId = writer.maxDoc();
            //adding the document in a specific model. in this case we use a MDwebDocument.
            writer.addDocument(createDocument(meta, docId));
            LOGGER.log(Level.FINER, "Metadata: {0} indexed", getIdentifier(meta));
            writer.close();
            if (rTree != null) {
                rTree.getTreeElementMapper().flush();
                rTree.flush();
            }

        } catch (IndexingException | StoreIndexException ex) {
            LOGGER.log(Level.WARNING, "Error while indexing single document", ex);
        } catch (IOException ex) {
            LOGGER.log(Level.WARNING, IO_SINGLE_MSG + ex.getMessage(), ex);
        }
    }

    /**
     * Return the identifier of the metadata
     * 
     * @param metadata
     * @return
     */
    protected abstract String getIdentifier(E metadata);

    /**
     * This method stop all the current indexation running
     */
    public static void stopIndexation() {
        stopIndexing = true;
    }

    private void stopIndexation(final IndexWriter writer, final String serviceID) throws IOException {
        // writer.optimize(); no longer justified
        writer.close();
        IOUtilities.deleteRecursively(getFileDirectory());
        if (indexationToStop.contains(serviceID)) {
            indexationToStop.remove(serviceID);
        }
        if (indexationToStop.isEmpty()) {
            stopIndexing = false;
        }
    }

    /**
     * Store the numeric fields in a properties file int the index directory
     */
    protected void storeNumericFieldsFile() {
        final Path indexDirectory = getFileDirectory();
        final Path numericFieldFile = indexDirectory.resolve("numericFields.properties");
        final Properties prop = new Properties();
        prop.putAll(numericFields);
        try {
            IOUtilities.storeProperties(prop, numericFieldFile, null);
        } catch (IOException ex) {
            LOGGER.log(Level.WARNING, "Unable to store the numeric fields properties file.", ex);
        }

    }

    /**
     * Add a numeric fields to the current list.
     * 
     * @param fieldName
     * @param numberType 
     */
    protected void addNumericField(final String fieldName, final Character numberType) {
        if (numericFields.get(fieldName) == null) {
            numericFields.put(fieldName, numberType.toString());
        }
    }

    /**
     * This method remove index of lucene a document identified by identifier.
     *
     * @param identifier
     */
    public void removeDocument(final String identifier) {
        try {
            final Directory dir = LuceneUtils.getAppropriateDirectory(getFileDirectory());
            final Term t = new Term("id", identifier);
            final TermQuery query = new TermQuery(t);
            LOGGER.log(logLevel, "Term query:{0}", query);

            // look for DOC ID for R-Tree removal
            final NamedEnvelope env = new NamedEnvelope(getTreeCrs(), identifier);
            final TreeElementMapper<NamedEnvelope> mapper = rTree.getTreeElementMapper();
            final int treeID = mapper.getTreeIdentifier(env);
            if (treeID != -1) {
                final NamedEnvelope realEnv = mapper.getObjectFromTreeIdentifier(treeID);
                boolean removed = rTree.remove(realEnv);
                if (!removed) {
                    LOGGER.log(Level.WARNING, "unable to remove envelope for:{0}", identifier);
                } else {
                    //remove from mapper
                    mapper.setTreeIdentifier(null, treeID);
                    mapper.flush();
                    rTree.flush();
                }
            }

            final IndexWriterConfig config = new IndexWriterConfig(analyzer);
            final IndexWriter writer = new IndexWriter(dir, config);
            writer.deleteDocuments(query);
            LOGGER.log(logLevel, "Metadata: {0} removed from the index", identifier);

            writer.commit();
            writer.close();

        } catch (CorruptIndexException ex) {
            LOGGER.log(Level.WARNING, "CorruptIndexException while indexing document: " + ex.getMessage(), ex);
        } catch (IOException ex) {
            LOGGER.log(Level.WARNING, "IOException while indexing document: " + ex.getMessage(), ex);
        } catch (StoreIndexException ex) {
            LOGGER.log(Level.WARNING, "StoreIndexException while indexing document: " + ex.getMessage(), ex);
        }
    }

    /**
    * Makes a document from the specified object.
    *
    * @param object an object to index.
    * @return A Lucene document.
    */
    protected abstract Document createDocument(E object, int docId) throws IndexingException;

    /**
     * Add a geometric field with on ore more boundingBox object in the specified lucene document.
     *
     * @param doc The lucene document currently building.
     * @param minx a list of minimal X coordinate.
     * @param maxx a list of maximal X coordinate.
     * @param miny a list of minimal Y coordinate.
     * @param maxy a list of maximal Y coordinate.
     * @param crs coordinate spatial reference.
     */
    protected void addBoundingBox(final Document doc, final List<Double> minx, final List<Double> maxx,
            final List<Double> miny, final List<Double> maxy, final CoordinateReferenceSystem crs) {
        final Polygon[] polygons = LuceneUtils.getPolygons(minx, maxx, miny, maxy, crs);
        Geometry geom;
        if (polygons.length == 1) {
            geom = polygons[0];
        } else if (polygons.length > 1) {
            geom = LuceneUtils.GF.createGeometryCollection(polygons);
            JTS.setCRS(geom, crs);
        } else {
            return;
        }
        addGeometry(doc, geom, getTreeCrs());
    }

    /**
     * Add a geometric field with a JTS geometry in the specified lucene document.
     * @param doc The lucene document currently building.
     * @param geom A JTS geometry
     */
    public NamedEnvelope addGeometry(final Document doc, final Geometry geom, final CoordinateReferenceSystem crs) {
        NamedEnvelope namedBound = null;
        try {
            final String id = doc.get("id");
            namedBound = LuceneUtils.getNamedEnvelope(id, geom, crs);
            rTree.insert(namedBound);
            rTree.getTreeElementMapper().flush();
            rTree.flush();
        } catch (TransformException | FactoryException | MismatchedReferenceSystemException | StoreIndexException
                | IOException ex) {
            LOGGER.log(Level.WARNING, "Unable to insert envelope in R-Tree.", ex);
        }
        doc.add(new StoredField(LuceneOGCFilter.GEOMETRY_FIELD_NAME, WKBUtils.toWKBwithSRID(geom)));
        return namedBound;
    }

    /**
     * Free the resources.
     */
    @Override
    public void destroy() {
        super.destroy();
    }

    /**
     * This method stop all the current indexation running
     */
    public static void stopIndexation(final List<String> ids) {
        stopIndexing = true;
        if (ids != null) {
            for (String id : ids) {
                indexationToStop.add(id);
            }
        }
    }

    /**
     * Return the service ID of this index or "" if there is not explicit service ID.
     * 
     * @return the service ID of this index or "" if there is not explicit service ID.
     */
    protected String getServiceID() {
        final Path directory = getFileDirectory();
        final String directoryName = directory.getFileName().toString();
        final String serviceId;
        if (directoryName.contains("index")) {
            serviceId = directoryName.substring(0, directoryName.indexOf("index"));

        } else {
            serviceId = "";
        }
        return serviceId;
    }
}