com.redhat.satellite.search.index.ngram.tests.NGramTestSetup.java Source code

Java tutorial

Introduction

Here is the source code for com.redhat.satellite.search.index.ngram.tests.NGramTestSetup.java

Source

/**
 * Copyright (c) 2008--2015 Red Hat, Inc.
 *
 * This software is licensed to you under the GNU General Public License,
 * version 2 (GPLv2). There is NO WARRANTY for this software, express or
 * implied, including the implied warranties of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. You should have received a copy of GPLv2
 * along with this software; if not, see
 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
 *
 * Red Hat trademarks are not licensed under GPLv2. No permission is
 * granted to use or replicate Red Hat trademarks that are incorporated
 * in this software or its documentation.
 */
package com.redhat.satellite.search.index.ngram.tests;

import java.io.IOException;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

import com.redhat.satellite.search.index.ngram.NGramAnalyzer;

import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;

import junit.framework.TestCase;

public class NGramTestSetup extends TestCase {
    private static Logger log = Logger.getLogger(NGramTestSetup.class);

    protected RAMDirectory ngramDir;
    protected RAMDirectory stanDir;

    protected double score_threshold = .10;
    protected int min_ngram = 1;
    protected int max_ngram = 5;

    protected List<Map<String, String>> items = new LinkedList<Map<String, String>>();

    public NGramTestSetup() {
        super();
    }

    protected void addItem(String name, String description, String filename) {
        Map<String, String> item = new HashMap<String, String>();
        item.put("name", name);
        item.put("description", description);
        item.put("filename", filename);
        items.add(item);
    }

    protected void initItems() {
        addItem("spellingbee", "spelling application", "spellingbee-1.0.rpm");
        addItem("aspelling", "another spelling program alternate spell", "aspelling-1.0.rpm");
        addItem("aspell", "another spelling program", "aspell-0.3.4.rpm");
        addItem("spell", "spelling program", "spell-4943.rpm");
        addItem("slelp", "application with spelling error", "slelp1-43.rpm");
        addItem("libvirt", "virtualization library", "virt-pkg-1.rpm");
        addItem("virt-manager", "blah blah application", "virt-pkg-1.rpm");
        addItem("virtualfactory", "virtual factory something", "virtfact-04.rpm");
        addItem("newFactory", "factory test application", "newFactory-1.9.rpm");
        addItem("gtk+-devel", "development library for gtk", "gtk+-devel-10.rpm");
        addItem("gtk+", "runtime library", "gtk+-30.rpm");
        addItem("gtk-doc", "documentation for gtk", "gtk-doc-393.rpm");
        addItem("authconfig-gtk", "authentication related gtk", "authconfig-gtk-039.rpm");
        addItem("mtr-gtk", "blah blah mtr gtk", "mtr-gtk-039.rpm");
        addItem("ghostscript-gtk", "printting support application gtk", "ghostscript-gtk-30.rpm");
        addItem("gnome-bluetooth-libs", "library for bluetooth support", "gnome-bluetooth-libs-3.4.rpm");
        addItem("scim-bridge-gtk", "blah blah scim gtk", "scim-bridge-gtk-494.rpm");
        addItem("kernel", "linux kernel package", "kernel-094.rpm");
        addItem("kernel-hugemem",
                "This package includes an SMP version of "
                        + "the Linux kernel which supports systems with 16 Gigabytes " + "of memory or more.",
                "kernel-hugemem-2.6.9-84.EL.i686");
        addItem("kernel-hugemem-devel", "This package provides kernel "
                + "headers +and makefiles sufficient to build modules against " + "the hugemem kernel package.",
                "kernel-hugemem-devel-2.6.9-84.EL.i686");
    }

    /**
     * Creates an index in RAM
     * */
    public void setUp() throws Exception {
        super.setUp();
        initItems();
        this.stanDir = new RAMDirectory();
        IndexWriter stanWriter = new IndexWriter(this.stanDir, new StandardAnalyzer(), true);

        this.ngramDir = new RAMDirectory();
        IndexWriter ngramWriter = new IndexWriter(this.ngramDir, new NGramAnalyzer(min_ngram, max_ngram), true);

        for (Map<String, String> item : items) {
            String name = item.get("name");
            String descp = item.get("description");
            Document doc = new Document();
            doc.add(new Field("name", name, Field.Store.YES, Field.Index.TOKENIZED));
            doc.add(new Field("description", descp, Field.Store.YES, Field.Index.TOKENIZED));
            stanWriter.addDocument(doc);
            ngramWriter.addDocument(doc);
        }
        stanWriter.close();
        ngramWriter.close();
    }

    public Hits performSearch(Directory dir, Analyzer alyz, String query) throws Exception {
        QueryParser parser = new QueryParser("name", alyz);
        IndexSearcher searcher = new IndexSearcher(dir);
        Query q = parser.parse(query);
        Hits hits = searcher.search(q);
        return hits;
    }

    protected int thresholdHits(Hits hits) throws IOException {
        /** We could consider doing thresholding as a relative thing...
         * instead of checking against an absolute value, we grab top score
         * then filter based on difference from that...
         */
        int counter = 0;
        for (int i = 0; i < hits.length(); i++) {
            if (hits.score(i) >= score_threshold) {
                counter++;
            } else {
                break;
            }
        }
        return counter;
    }

    protected void displayHits(Hits hits) throws IOException {
        for (int i = 0; i < hits.length(); i++) {
            Document doc = hits.doc(i);
            String name = doc.get("name");
            String description = doc.get("description");
            log.info("Hit<" + i + "> Score< " + hits.score(i) + ">  name = <" + name + "> description = <"
                    + description + ">");
        }
    }

}