byrne.mitre.main.NameMatcher.java Source code

Java tutorial

Introduction

Here is the source code for byrne.mitre.main.NameMatcher.java

Source

package byrne.mitre.main;

/**
 * Copyright 2011 David Byrne
 *    
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
*/

import byrne.mitre.MitreQuery;
import byrne.mitre.NGramAnalyzer;
import byrne.mitre.NameEntry;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.StringReader;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;

public class NameMatcher {

    public static void main(String[] args) {

        try {

            long startTime = System.currentTimeMillis();

            System.out.println("Loading Index...");
            final Analyzer analyzer = new NGramAnalyzer(2, 4);

            final Directory index = new RAMDirectory();
            final IndexWriter writer = new IndexWriter(index, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
            loadIndex("index.txt", writer);
            writer.close();

            System.out.println("Running queries...");
            final BufferedReader bufferedReader = new BufferedReader(new FileReader("queries.txt"));
            final BufferedWriter out = new BufferedWriter(new FileWriter("results.txt"));

            final IndexSearcher searcher = new IndexSearcher(index, true);
            String line = null;

            final int N_THREADS = Runtime.getRuntime().availableProcessors();
            System.out.println("Total threads: " + N_THREADS);

            final ExecutorService executor = Executors.newFixedThreadPool(N_THREADS);

            while ((line = bufferedReader.readLine()) != null) {
                final NameEntry entry = new NameEntry(line);
                final MitreQuery q = new MitreQuery(entry, analyzer, searcher, out);
                executor.execute(q);
            }
            executor.shutdown();
            executor.awaitTermination(Long.MAX_VALUE, TimeUnit.MINUTES);

            bufferedReader.close();
            searcher.close();
            out.close();

            long endTime = System.currentTimeMillis();
            System.out.println("Total run time: " + (endTime - startTime) / 60000 + " minutes");

        } catch (IOException e) {
            System.out.println(e);
        } catch (InterruptedException e) {
            System.out.println(e);
        }

    }

    private static void loadIndex(String filename, IndexWriter writer) throws IOException {

        BufferedReader bufferedReader = new BufferedReader(new FileReader(filename));

        String line = null;
        while ((line = bufferedReader.readLine()) != null) {
            NameEntry entry = new NameEntry(line);
            Document doc = new Document();
            doc.add(new Field("id", entry.getID(), Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.add(new Field("name", entry.getFullName(), Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.add(new Field("ngrams", new StringReader(entry.getFullName()), Field.TermVector.YES));
            writer.addDocument(doc);

        }
        bufferedReader.close();
    }
}