com.soebes.supose.core.lucene.LuceneTest.java Source code

Introduction

Here is the source code for com.soebes.supose.core.lucene.LuceneTest.java
Source

/**
 * The (Su)bversion Re(po)sitory (S)earch (E)ngine (SupoSE for short).
 *
 * Copyright (c) 2007-2011 by SoftwareEntwicklung Beratung Schulung (SoEBeS)
 * Copyright (c) 2007-2011 by Karl Heinz Marbaise
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
 *
 * The License can viewed online under http://www.gnu.org/licenses/gpl.html
 * If you have any questions about the Software or about the license
 * just write an email to license@soebes.de
 */
package com.soebes.supose.core.lucene;

import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;

import java.io.IOException;
import java.util.Iterator;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;

import com.soebes.supose.core.FieldNames;
import com.soebes.supose.core.search.CustomQueryParser;
import com.soebes.supose.core.search.NumberUtils;
import com.soebes.supose.core.utility.AnalyzerFactory;

public class LuceneTest {
    // Store the index in memory:
    private Directory directory = new RAMDirectory();
    // Now search the index:
    private IndexSearcher isearcher = null;

    private void addUnTokenizedField(Document doc, String fieldName, String value) {
        doc.add(new Field(fieldName, value, Field.Store.YES, Field.Index.NOT_ANALYZED));
    }

    private void addTokenizedField(Document doc, String fieldName, String value) {
        doc.add(new Field(fieldName, value, Field.Store.YES, Field.Index.ANALYZED));
    }

    @BeforeClass
    public void beforeClass() throws CorruptIndexException, LockObtainFailedException, IOException {
        Analyzer analyzer = AnalyzerFactory.createInstance();

        // To store an index on disk, use this instead:
        // Directory directory = FSDirectory.getDirectory("/tmp/testindex");
        IndexWriter iwriter = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
        iwriter.setMaxFieldLength(25000);

        Document doc = new Document();
        String text = "This is the text to be indexed.";
        addUnTokenizedField(doc, FieldNames.REVISION.getValue(), NumberUtils.pad(1));
        addTokenizedField(doc, FieldNames.CONTENTS.getValue(), text);
        addUnTokenizedField(doc, FieldNames.FILENAME.getValue(), "/trunk/doc/testXML.doc");
        iwriter.addDocument(doc);

        doc = new Document();
        text = "This is different text.";
        addUnTokenizedField(doc, FieldNames.REVISION.getValue(), NumberUtils.pad(2));
        addTokenizedField(doc, FieldNames.CONTENTS.getValue(), text);
        addUnTokenizedField(doc, FieldNames.FILENAME.getValue(), "/tags/docs/XYZabc.java");
        iwriter.addDocument(doc);

        doc = new Document();
        text = "This is more different text.";
        addUnTokenizedField(doc, FieldNames.REVISION.getValue(), NumberUtils.pad(3));
        addTokenizedField(doc, FieldNames.CONTENTS.getValue(), text);
        addUnTokenizedField(doc, FieldNames.FILENAME.getValue(), "/tags/docs/SCMPlan.doc");
        iwriter.addDocument(doc);

        doc = new Document();
        text = "This is the third text.";
        addUnTokenizedField(doc, FieldNames.REVISION.getValue(), NumberUtils.pad(4));
        addTokenizedField(doc, FieldNames.CONTENTS.getValue(), text);
        addUnTokenizedField(doc, FieldNames.FILENAME.getValue(), "/trunk/subdir/elviraXML.doc");
        iwriter.addDocument(doc);

        iwriter.optimize();
        iwriter.close();

        isearcher = new IndexSearcher(directory);
    }

    private void printOut(Query query, String msg, TopDocs result) throws CorruptIndexException, IOException {
        // System.out.println("Message: " + msg);
        // System.out.println(" -> Query: " + query.toString());
        for (int i = 0; i < result.scoreDocs.length; i++) {
            // System.out.println(" -> Document[" + i + "]");
            Document hit = isearcher.doc(result.scoreDocs[i].doc);
            for (Iterator<Field> iterator = hit.getFields().iterator(); iterator.hasNext();) {
                Field field = (Field) iterator.next();
                // System.out.println("   --> Field: " + field.name() + " v:" +
                // field.stringValue());
            }
        }
    }

    @AfterClass
    public void afterClass() throws IOException {
        isearcher.close();
        directory.close();
    }

    @Test
    public void testSingleAsterik() throws ParseException, IOException {
        Analyzer analyzer = AnalyzerFactory.createInstance();
        // Parse a simple query that searches for "text":
        QueryParser parser = new CustomQueryParser(FieldNames.CONTENTS, analyzer);
        Query query = parser.parse("+filename:/*.doc");
        TopDocs result = isearcher.search(query, null, 10);
        printOut(query, "testSingleAsterik", result);
        assertEquals(result.totalHits, 3);
    }

    @Test
    public void testSingleAsterikWithPrefix() throws ParseException, IOException {
        Analyzer analyzer = AnalyzerFactory.createInstance();
        // Parse a simple query that searches for "text":
        QueryParser parser = new CustomQueryParser(FieldNames.CONTENTS, analyzer);
        Query query = parser.parse("+filename:/trunk/*.doc");
        TopDocs result = isearcher.search(query, null, 10);
        // Iterate through the results:
        printOut(query, "testSingleAsterikWithPrefix", result);
        assertEquals(result.totalHits, 2);
    }

    @Test
    public void testMultipleAsterik() throws ParseException, IOException {
        Analyzer analyzer = AnalyzerFactory.createInstance();
        // Parse a simple query that searches for "text":
        QueryParser parser = new CustomQueryParser(FieldNames.CONTENTS, analyzer);
        Query query = parser.parse("+filename:/*te*.doc");
        TopDocs result = isearcher.search(query, null, 10);
        printOut(query, "testMultipleAsterik", result);
        assertEquals(result.totalHits, 1);
    }

    @Test
    public void testMultipleAsterikUppercase() throws ParseException, IOException {
        Analyzer analyzer = AnalyzerFactory.createInstance();
        // Parse a simple query that searches for "text":
        QueryParser parser = new CustomQueryParser(FieldNames.CONTENTS, analyzer);
        parser.setLowercaseExpandedTerms(false);
        Query query = parser.parse("+filename:/*SCM*.doc");
        TopDocs result = isearcher.search(query, null, 10);
        printOut(query, "testMultipleAsterikUppercase", result);
        assertTrue(result.totalHits == 1, "Expected to get at least one element.");
    }

    @Test(enabled = false)
    public void testMultipleAsterikLowerCase() throws ParseException, IOException {
        Analyzer analyzer = AnalyzerFactory.createInstance();
        // Parse a simple query that searches for "text":
        QueryParser parser = new CustomQueryParser(FieldNames.CONTENTS, analyzer);
        parser.setLowercaseExpandedTerms(false);
        Query query = parser.parse("+filename:/*scm*.doc");
        TopDocs result = isearcher.search(query, null, 10);
        printOut(query, "testMultipleAsterikLowerCase", result);
        assertTrue(result.totalHits == 1, "Expected to get at least one element.");
    }

    @Test
    public void testSingleAsterikRestrictionToRevisionRange() throws ParseException, IOException {
        Analyzer analyzer = AnalyzerFactory.createInstance();
        // Parse a simple query that searches for "text":
        QueryParser parser = new CustomQueryParser(FieldNames.CONTENTS, analyzer);
        parser.setLowercaseExpandedTerms(true);
        Query query = parser.parse("+filename:/*.doc +revision:[1 TO 3]");
        TopDocs result = isearcher.search(query, null, 10);
        printOut(query, "testSingleAsterikRestrictionToRevisionRange", result);
        assertTrue(result.totalHits == 2, "Expected to get two elements.");
    }

    @Test
    public void testSingleAsterikRestrictionToDifferentRevisionRange() throws ParseException, IOException {
        Analyzer analyzer = AnalyzerFactory.createInstance();
        // Parse a simple query that searches for "text":
        QueryParser parser = new CustomQueryParser(FieldNames.CONTENTS, analyzer);
        parser.setLowercaseExpandedTerms(true);
        Query query = parser.parse("+filename:/*.doc +revision:[1 TO 2]");
        TopDocs result = isearcher.search(query, null, 10);
        printOut(query, "testSingleAsterikRestrictionToDifferentRevisionRange", result);
        assertTrue(result.totalHits == 1, "Expected to get two elements.");
    }

    @Test
    public void testSingleRevision() throws ParseException, IOException {
        Analyzer analyzer = AnalyzerFactory.createInstance();
        // Parse a simple query that searches for "text":
        QueryParser parser = new CustomQueryParser(FieldNames.CONTENTS, analyzer);
        parser.setLowercaseExpandedTerms(true);
        Query query = parser.parse("+revision:1");
        TopDocs result = isearcher.search(query, null, 10);
        printOut(query, "testTheFifthSearch]", result);
        assertTrue(result.totalHits == 1, "Expected to get two elements.");
    }

}