search.handler.Find.java Source code

Introduction

Here is the source code for search.handler.Find.java
Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package search.handler;

import calliope.core.constants.Database;
import calliope.core.constants.JSONKeys;
import calliope.core.database.Connection;
import calliope.core.database.Connector;
import edu.luc.nmerge.mvd.MVD;
import edu.luc.nmerge.mvd.MVDFile;
import java.io.File;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import search.JettyServer;
import search.exception.SearchException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.StringTokenizer;
import org.json.simple.JSONObject;
import org.json.simple.JSONValue;

/**
 * Find things in the indices generated by BuildIndex
 * @author desmond
 */
public class Find {
    static final int MAX_PRECIS_TOKENS = 20;
    static QueryParser parser;
    static int HITS_PER_PAGE = 20;
    static HashMap<String, IndexSearcher> searchers;

    // we need to let this time-consuming load to happen only once
    static void initIndexSearchers() throws Exception {
        searchers = new HashMap<String, IndexSearcher>();
        File indexDir = new File(JettyServer.indexRoot);
        File[] languageDirs = indexDir.listFiles();
        for (int i = 0; i < languageDirs.length; i++) {
            IndexReader reader = DirectoryReader.open(FSDirectory.open(languageDirs[i]));
            IndexSearcher searcher = new IndexSearcher(reader);
            searchers.put(languageDirs[i].getName(), searcher);
        }
    }

    /**
    * Get the last component of the version id, its short name
    * @param vid the full version id
    * @return a simple version name
    */
    private String shortName(String vid) {
        int index = vid.lastIndexOf("/");
        if (index != -1)
            return vid.substring(index + 1);
        else
            return vid;
    }

    /**
     * Get the group name or path leading tot he final short name
     * @param vid the full version id
     * @return a path delimited by slashes and ending with one or ""
     */
    private String group(String vid) {
        int index = vid.lastIndexOf("/");
        if (index != -1)
            return vid.substring(0, index + 1);
        else
            return "";
    }

    /**
     * Make a short sample of the text ending in ...
     * @param text the text possibly long
     * @return a short sample of the first few words
     */
    private String sample(String text) {
        int nTokens = 0;
        StringBuilder sb = new StringBuilder();
        StringTokenizer st = new StringTokenizer(text, " \n\t");
        while (st.hasMoreTokens()) {
            sb.append(st.nextToken());
            sb.append(" ");
            if (nTokens >= MAX_PRECIS_TOKENS)
                break;
            nTokens++;
        }
        sb.append(" ...");
        return sb.toString();
    }

    String digestCortex(String docid, String[] vid) throws SearchException {
        try {
            Connection conn = Connector.getConnection();
            StringBuilder doc = new StringBuilder();
            String bson = conn.getFromDb(Database.CORTEX, docid);
            JSONObject jDoc = (JSONObject) JSONValue.parse(bson);
            String format = (String) jDoc.get(JSONKeys.FORMAT);
            String body = (String) jDoc.get(JSONKeys.BODY);
            doc.append("{ ");
            if (format != null && format.startsWith("MVD") && vid != null && vid.length > 0) {
                MVD mvd = MVDFile.internalise(body);
                String encoding = mvd.getEncoding();
                int version = mvd.getVersionByNameAndGroup(shortName(vid[0]), group(vid[0]));
                if (version != 0) {
                    byte[] data = mvd.getVersion(version);
                    body = new String(data, encoding);
                }
            }
            body = sample(body);
            doc.append("\"digest\": \"");
            doc.append(body);
            doc.append("\"");
            if (jDoc.containsKey(JSONKeys.TITLE)) {
                doc.append(", \"title\": \"");
                doc.append((String) jDoc.get(JSONKeys.TITLE));
                doc.append("\"");
            }
            doc.append(", \"docid\": \"");
            doc.append(docid);
            doc.append("\"");
            if (vid != null) {
                doc.append(", \"vids\": [ ");
                for (int j = 0; j < vid.length; j++) {
                    doc.append("\"");
                    doc.append(vid[j]);
                    doc.append("\"");
                    if (j < vid.length - 1)
                        doc.append(", ");
                }
                doc.append(" ]");
            }
            doc.append(" }");
            return doc.toString();
        } catch (Exception e) {
            throw new SearchException(e);
        }
    }

    /**
     * We add any kosher metadata fields - those indexed in the first place
     * @param docid the document identifier
     * @return the digest - a JSON document
     * @throws SearchException 
     */
    String digestMetadata(String docid) throws SearchException {
        try {
            Connection conn = Connector.getConnection();
            StringBuilder doc = new StringBuilder();
            String bson = conn.getFromDb(Database.METADATA, docid);
            JSONObject jDoc = (JSONObject) JSONValue.parse(bson);
            doc.append("{ ");
            doc.append(", \"docid\": \"");
            doc.append(docid);
            doc.append("\"");
            Iterator<String> iter = BuildIndex.metadataKeys.iterator();
            while (iter.hasNext()) {
                String key = iter.next();
                if (jDoc.containsKey(key)) {
                    doc.append(", \"");
                    doc.append(key);
                    doc.append("\": \"");
                    doc.append((String) jDoc.get(key));
                    doc.append("\"");
                }
            }
            return doc.toString();
        } catch (Exception e) {
            throw new SearchException(e);
        }
    }

    /**
     * Digest an annotation
     * @param docid the annotation docid
     * @return a JSON digest of the annotation body and docid
     * @throws SearchException 
     */
    String digestAnnotation(String docid) throws SearchException {
        try {
            Connection conn = Connector.getConnection();
            StringBuilder doc = new StringBuilder();
            String bson = conn.getFromDb(Database.ANNOTATIONS, docid);
            JSONObject jDoc = (JSONObject) JSONValue.parse(bson);
            String body = (String) jDoc.get(JSONKeys.BODY);
            doc.append("{ ");
            doc.append(", \"docid\": \"");
            doc.append(docid);
            doc.append("\"");
            body = sample(body);
            doc.append("\"digest\": \"");
            doc.append(body);
            doc.append("\"");
            doc.append(" }");
            return doc.toString();
        } catch (Exception e) {
            throw new SearchException(e);
        }
    }

    /**
     * Get a digest of the document
     * @param docid the document identifier
     * @param vid an array of matching vids in this docid or null
     * @param database the database from which to retrieve the document
     * @return a JSON document being a basic digest of the contents or null
     */
    private String getDigest(String docid, String[] vid, String database) throws SearchException {
        if (database.equals(Database.CORTEX))
            return digestCortex(docid, vid);
        else if (database.equals(Database.METADATA))
            return digestMetadata(docid);
        else if (database.equals(Database.ANNOTATIONS))
            return digestAnnotation(docid);
        else
            return null;
    }

    /**
      * Search for a query string in the index
      * @param line the query as text
      * @param language the language name of the index e.g. "english"
      * @param start the hit-number to start from
      * @return a JSON document containing the docids and vids of matching docs
      * @throws SearchException 
      */
    public String search(String line, String language, int start) throws SearchException {
        try {
            if (searchers == null)
                initIndexSearchers();
            Analyzer analyzer = new StandardAnalyzer();
            parser = new QueryParser(JSONKeys.CONTENT, analyzer);
            Query query = parser.parse(line);
            StringBuilder sb = new StringBuilder();
            IndexSearcher searcher = searchers.get(language);
            if (searcher != null) {
                TopDocs results = searcher.search(query, 5 * HITS_PER_PAGE);
                ScoreDoc[] hits = results.scoreDocs;
                MVDHit[] mvdHits = MVDHit.build(searcher, hits);
                int numTotalHits = mvdHits.length;
                int end = Math.min(numTotalHits, start + HITS_PER_PAGE);
                sb.append("[ ");
                for (int i = start; i < end; i++) {
                    String digest = getDigest(mvdHits[i].docid, mvdHits[i].vids, mvdHits[i].database);
                    if (sb.length() > 2)
                        sb.append(",\n");
                    if (digest != null)
                        sb.append(digest);
                }
                sb.append(" ]");
                return sb.toString();
            } else
                throw new Exception("Unknown language " + language);
        } catch (Exception e) {
            throw new SearchException(e);
        }
    }
}