org.fcrepo.migration.foxml.DirectoryScanningIDResolver.java Source code

Java tutorial

Introduction

Here is the source code for org.fcrepo.migration.foxml.DirectoryScanningIDResolver.java

Source

/*
 * Copyright 2015 DuraSpace, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.fcrepo.migration.foxml;

import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.slf4j.Logger;

import java.io.File;
import java.io.IOException;

import static org.slf4j.LoggerFactory.getLogger;

/**
 * An InternalIDResolver implementation that generates an index of
 * datastream ids (filenames) to file paths for the contents of a
 * datastream directory.  The directory is expected to contain just
 * other directories and/or FOXML files.  The FOXML files are expected
 * to have a filename that is reversibly mapped from a fedora internal
 * id for that datastream version.
 * @author mdurbin
 */
public abstract class DirectoryScanningIDResolver implements InternalIDResolver {

    private static final Logger LOGGER = getLogger(InternalIDResolver.class);

    /**
     * A lucene IndexSearcher over an index maintained by this class.
     * For every file found in the datastream directory a document exists
     * in this index that contains an "id" field and a "path" field.  The
     * id field is the internal id, the path field is the full path to the
     * file containing that datastream content.
     */
    private IndexSearcher searcher;

    /**
     * directory scanning ID resolver
     * @param cachedIndexDir the index directory.  If it exists, the old cache will be used, if it doesn't a new
     *                 cache will be built at that location.  If it is null, a new cache will be built in
     *                 the temp file space that will be deleted upon application shutdown.
     * @param dsRoot the datastream root
     * @throws IOException IO exception
     */
    public DirectoryScanningIDResolver(final File cachedIndexDir, final File dsRoot) throws IOException {
        final File indexDir;
        if (cachedIndexDir == null) {
            final File temp = File.createTempFile("tempfile", "basedir");
            temp.delete();
            temp.mkdir();
            indexDir = new File(temp, "index");
            LOGGER.info("No index directory specified.  Creating temporary index at \"" + indexDir.getAbsolutePath()
                    + "\".");
            Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
                @Override
                public void run() {
                    try {
                        LOGGER.info("Deleting index directory at \"" + indexDir.getAbsolutePath() + "\"...");
                        FileUtils.deleteDirectory(indexDir);
                    } catch (IOException e) {
                        LOGGER.error("Unable to delete index directory at \"" + indexDir.getAbsolutePath() + "\"!",
                                e);
                        e.printStackTrace();
                    }
                }
            }));
        } else {
            indexDir = cachedIndexDir;
        }
        final Directory dir = FSDirectory.open(indexDir);
        if (indexDir.exists()) {
            LOGGER.warn("Index exists at \"" + indexDir.getPath() + "\" and will be used.  "
                    + "To clear index, simply delete this directory and re-run the application.");
        } else {
            final Analyzer analyzer = new StandardAnalyzer();
            final IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
            iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
            final IndexWriter writer = new IndexWriter(dir, iwc);
            LOGGER.info("Builidng an index of all the datastreams in \"" + dsRoot.getPath() + "\"...");
            indexDatastreams(writer, dsRoot);

            writer.commit();
            writer.close();
        }

        final IndexReader reader = DirectoryReader.open(FSDirectory.open(indexDir));
        searcher = new IndexSearcher(reader);
    }

    @Override
    public CachedContent resolveInternalID(final String id) {
        try {
            final TopDocs result = searcher.search(new TermQuery(new Term("id", id)), 2);
            if (result.totalHits == 1) {
                return new FileCachedContent(new File(searcher.doc(result.scoreDocs[0].doc).get("path")));
            } else if (result.totalHits < 1) {
                throw new RuntimeException("Unable to resolve internal ID \"" + id + "\"!");
            } else {
                throw new IllegalStateException(result.totalHits + " files matched the internal id \"" + id
                        + "\".  (" + searcher.doc(result.scoreDocs[0].doc).get("path") + ", "
                        + searcher.doc(result.scoreDocs[1].doc).get("path") + "...)");
            }
        } catch (final IOException e) {
            throw new RuntimeException(e);
        }
    }

    private void indexDatastreams(final IndexWriter writer, final File f) throws IOException {
        if (f.isDirectory()) {
            for (final File child : f.listFiles()) {
                indexDatastreams(writer, child);
            }
        } else {
            final Document doc = new Document();
            doc.add(new StringField("path", f.getPath(), Field.Store.YES));
            doc.add(new StringField("id", getInternalIdForFile(f), Field.Store.YES));
            LOGGER.trace("Added \"" + getInternalIdForFile(f) + "\"");
            writer.addDocument(doc);
        }
    }

    /**
     * Determines the internal id for the given file.
     *
     * @param f file to check for
     * @return string containing internal id for the file
     */
    protected abstract String getInternalIdForFile(File f);

}