org.sindice.siren.util.SirenTestCase.java Source code

Introduction

Here is the source code for org.sindice.siren.util.SirenTestCase.java
Source

/**
 * Copyright 2014 National University of Ireland, Galway.
 *
 * This file is part of the SIREn project. Project and contact information:
 *
 *  https://github.com/rdelbru/SIREn
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.sindice.siren.util;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Random;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.ReaderClosedListener;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.search.AssertingIndexSearcher;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NamedThreadFactory;
import org.apache.lucene.util._TestUtil;
import org.sindice.siren.analysis.AnyURIAnalyzer;
import org.sindice.siren.analysis.AnyURIAnalyzer.URINormalisation;
import org.sindice.siren.analysis.JsonAnalyzer;
import org.sindice.siren.analysis.MockSirenAnalyzer;
import org.sindice.siren.analysis.MockSirenDocument;
import org.sindice.siren.analysis.MockSirenReader;
import org.sindice.siren.analysis.TupleAnalyzer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public abstract class SirenTestCase extends LuceneTestCase {

    protected static final Logger logger = LoggerFactory.getLogger(SirenTestCase.class);

    public static final String DEFAULT_TEST_FIELD = "content";

    public static Analyzer newTupleAnalyzer() {
        final AnyURIAnalyzer uriAnalyzer = new AnyURIAnalyzer(TEST_VERSION_CURRENT);
        uriAnalyzer.setUriNormalisation(URINormalisation.FULL);
        final TupleAnalyzer analyzer = new TupleAnalyzer(TEST_VERSION_CURRENT,
                new StandardAnalyzer(TEST_VERSION_CURRENT), uriAnalyzer);
        return analyzer;
    }

    public static Analyzer newJsonAnalyzer() {
        final AnyURIAnalyzer fieldAnalyzer = new AnyURIAnalyzer(TEST_VERSION_CURRENT);
        fieldAnalyzer.setUriNormalisation(URINormalisation.FULL);
        final Analyzer literalAnalyzer = new StandardAnalyzer(TEST_VERSION_CURRENT);
        final JsonAnalyzer analyzer = new JsonAnalyzer(TEST_VERSION_CURRENT, fieldAnalyzer, literalAnalyzer);
        return analyzer;
    }

    public static Analyzer newMockAnalyzer() {
        return new MockSirenAnalyzer();
    }

    private static FieldType newFieldType() {
        final FieldType ft = new FieldType();
        ft.setStored(false);
        ft.setOmitNorms(false);
        ft.setIndexed(true);
        ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
        ft.setTokenized(true);
        return ft;
    }

    protected static FieldType newStoredFieldType() {
        final FieldType ft = newFieldType();
        ft.setStored(true);
        return ft;
    }

    private FieldType newStoredNoNormFieldType() {
        final FieldType ft = newStoredFieldType();
        ft.setOmitNorms(true);
        return ft;
    }

    protected static RandomIndexWriter newRandomIndexWriter(final Directory dir, final Analyzer analyzer,
            final Codec codec) throws IOException {
        return newRandomIndexWriter(dir, analyzer, codec, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)
                .setCodec(codec).setMergePolicy(newLogMergePolicy()).setSimilarity(new DefaultSimilarity()));
    }

    protected static RandomIndexWriter newRandomIndexWriter(final Directory dir, final Analyzer analyzer,
            final Codec codec, final IndexWriterConfig config) throws IOException {
        final RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
        writer.setDoRandomForceMergeAssert(true);
        return writer;
    }

    protected static IndexReader newIndexReader(final RandomIndexWriter writer) throws IOException {
        // We are wrapping by default the reader into a slow reader, as most of the
        // tests require an atomic reader
        return SlowCompositeReaderWrapper.wrap(writer.getReader());
    }

    /**
     * Create a new searcher over the reader. This searcher might randomly use
     * threads.
     * <p>
     * Override the original {@link LuceneTestCase#newSearcher(IndexReader)}
     * implementation in order to avoid getting {@link AssertingIndexSearcher}
     * which is incompatible with SIREn.
     */
    public static IndexSearcher newSearcher(final IndexReader r) throws IOException {
        final Random random = random();
        if (usually()) {
            // compared to the original implementation, we do not wrap to avoid
            // wrapping into an AssertingAtomicReader
            return random.nextBoolean() ? new IndexSearcher(r) : new IndexSearcher(r.getContext());
        } else {
            int threads = 0;
            final ThreadPoolExecutor ex;
            if (random.nextBoolean()) {
                ex = null;
            } else {
                threads = _TestUtil.nextInt(random, 1, 8);
                ex = new ThreadPoolExecutor(threads, threads, 0L, TimeUnit.MILLISECONDS,
                        new LinkedBlockingQueue<Runnable>(), new NamedThreadFactory("LuceneTestCase"));
            }
            if (ex != null) {
                if (VERBOSE) {
                    System.out.println("NOTE: newSearcher using ExecutorService with " + threads + " threads");
                }
                r.addReaderClosedListener(new ReaderClosedListener() {
                    @Override
                    public void onClose(final IndexReader reader) {
                        _TestUtil.shutdownExecutorService(ex);
                    }
                });
            }
            final IndexSearcher ret = random.nextBoolean() ? new IndexSearcher(r, ex)
                    : new IndexSearcher(r.getContext(), ex);
            return ret;
        }
    }

    protected static void addDocument(final RandomIndexWriter writer, final String data) throws IOException {
        final Document doc = new Document();
        doc.add(new Field(DEFAULT_TEST_FIELD, data, newStoredFieldType()));
        writer.addDocument(doc);
        writer.commit();
    }

    protected void addDocumentNoNorms(final RandomIndexWriter writer, final String data) throws IOException {
        final Document doc = new Document();
        doc.add(new Field(DEFAULT_TEST_FIELD, data, this.newStoredNoNormFieldType()));
        writer.addDocument(doc);
        writer.commit();
    }

    /**
     * Atomically adds a block of documents with sequentially
     * assigned document IDs.
     * <br>
     * See also {@link IndexWriter#addDocuments(Iterable)}
     */
    protected static void addDocuments(final RandomIndexWriter writer, final String[] data) throws IOException {
        final ArrayList<Document> docs = new ArrayList<Document>();

        for (final String entry : data) {
            final Document doc = new Document();
            doc.add(new Field(DEFAULT_TEST_FIELD, entry, newStoredFieldType()));
            docs.add(doc);
        }
        writer.addDocuments(docs);
        writer.commit();
    }

    protected static void addDocuments(final RandomIndexWriter writer, final MockSirenDocument... sdocs)
            throws IOException {
        final ArrayList<Document> docs = new ArrayList<Document>(sdocs.length);
        for (final MockSirenDocument sdoc : sdocs) {
            final Document doc = new Document();
            doc.add(new Field(DEFAULT_TEST_FIELD, new MockSirenReader(sdoc), newFieldType()));
            docs.add(doc);
        }
        writer.addDocuments(docs);
        writer.commit();
    }

    protected void deleteAll(final RandomIndexWriter writer) throws IOException {
        writer.deleteAll();
        writer.commit();
    }

    protected void forceMerge(final RandomIndexWriter writer) throws IOException {
        writer.forceMerge(1);
    }

}