Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package search.handler; import calliope.core.constants.Database; import calliope.core.constants.JSONKeys; import calliope.core.database.Connection; import calliope.core.database.Connector; import edu.luc.nmerge.mvd.MVD; import edu.luc.nmerge.mvd.MVDFile; import java.io.File; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.FSDirectory; import search.JettyServer; import search.exception.SearchException; import java.util.HashMap; import java.util.Iterator; import java.util.StringTokenizer; import org.json.simple.JSONObject; import org.json.simple.JSONValue; /** * Find things in the indices generated by BuildIndex * @author desmond */ public class Find { static final int MAX_PRECIS_TOKENS = 20; static QueryParser parser; static int HITS_PER_PAGE = 20; static HashMap<String, IndexSearcher> searchers; // we need to let this time-consuming load to happen only once static void initIndexSearchers() throws Exception { searchers = new HashMap<String, IndexSearcher>(); File indexDir = new File(JettyServer.indexRoot); File[] languageDirs = indexDir.listFiles(); for (int i = 0; i < languageDirs.length; i++) { IndexReader reader = DirectoryReader.open(FSDirectory.open(languageDirs[i])); IndexSearcher searcher = new IndexSearcher(reader); searchers.put(languageDirs[i].getName(), searcher); } } /** * Get the last component of the version id, its short name * @param vid the full version id * @return a simple version name */ private String shortName(String vid) { int index = vid.lastIndexOf("/"); if (index != -1) return vid.substring(index + 1); else return vid; } /** * Get the group name or path leading tot he final short name * @param vid the full version id * @return a path delimited by slashes and ending with one or "" */ private String group(String vid) { int index = vid.lastIndexOf("/"); if (index != -1) return vid.substring(0, index + 1); else return ""; } /** * Make a short sample of the text ending in ... * @param text the text possibly long * @return a short sample of the first few words */ private String sample(String text) { int nTokens = 0; StringBuilder sb = new StringBuilder(); StringTokenizer st = new StringTokenizer(text, " \n\t"); while (st.hasMoreTokens()) { sb.append(st.nextToken()); sb.append(" "); if (nTokens >= MAX_PRECIS_TOKENS) break; nTokens++; } sb.append(" ..."); return sb.toString(); } String digestCortex(String docid, String[] vid) throws SearchException { try { Connection conn = Connector.getConnection(); StringBuilder doc = new StringBuilder(); String bson = conn.getFromDb(Database.CORTEX, docid); JSONObject jDoc = (JSONObject) JSONValue.parse(bson); String format = (String) jDoc.get(JSONKeys.FORMAT); String body = (String) jDoc.get(JSONKeys.BODY); doc.append("{ "); if (format != null && format.startsWith("MVD") && vid != null && vid.length > 0) { MVD mvd = MVDFile.internalise(body); String encoding = mvd.getEncoding(); int version = mvd.getVersionByNameAndGroup(shortName(vid[0]), group(vid[0])); if (version != 0) { byte[] data = mvd.getVersion(version); body = new String(data, encoding); } } body = sample(body); doc.append("\"digest\": \""); doc.append(body); doc.append("\""); if (jDoc.containsKey(JSONKeys.TITLE)) { doc.append(", \"title\": \""); doc.append((String) jDoc.get(JSONKeys.TITLE)); doc.append("\""); } doc.append(", \"docid\": \""); doc.append(docid); doc.append("\""); if (vid != null) { doc.append(", \"vids\": [ "); for (int j = 0; j < vid.length; j++) { doc.append("\""); doc.append(vid[j]); doc.append("\""); if (j < vid.length - 1) doc.append(", "); } doc.append(" ]"); } doc.append(" }"); return doc.toString(); } catch (Exception e) { throw new SearchException(e); } } /** * We add any kosher metadata fields - those indexed in the first place * @param docid the document identifier * @return the digest - a JSON document * @throws SearchException */ String digestMetadata(String docid) throws SearchException { try { Connection conn = Connector.getConnection(); StringBuilder doc = new StringBuilder(); String bson = conn.getFromDb(Database.METADATA, docid); JSONObject jDoc = (JSONObject) JSONValue.parse(bson); doc.append("{ "); doc.append(", \"docid\": \""); doc.append(docid); doc.append("\""); Iterator<String> iter = BuildIndex.metadataKeys.iterator(); while (iter.hasNext()) { String key = iter.next(); if (jDoc.containsKey(key)) { doc.append(", \""); doc.append(key); doc.append("\": \""); doc.append((String) jDoc.get(key)); doc.append("\""); } } return doc.toString(); } catch (Exception e) { throw new SearchException(e); } } /** * Digest an annotation * @param docid the annotation docid * @return a JSON digest of the annotation body and docid * @throws SearchException */ String digestAnnotation(String docid) throws SearchException { try { Connection conn = Connector.getConnection(); StringBuilder doc = new StringBuilder(); String bson = conn.getFromDb(Database.ANNOTATIONS, docid); JSONObject jDoc = (JSONObject) JSONValue.parse(bson); String body = (String) jDoc.get(JSONKeys.BODY); doc.append("{ "); doc.append(", \"docid\": \""); doc.append(docid); doc.append("\""); body = sample(body); doc.append("\"digest\": \""); doc.append(body); doc.append("\""); doc.append(" }"); return doc.toString(); } catch (Exception e) { throw new SearchException(e); } } /** * Get a digest of the document * @param docid the document identifier * @param vid an array of matching vids in this docid or null * @param database the database from which to retrieve the document * @return a JSON document being a basic digest of the contents or null */ private String getDigest(String docid, String[] vid, String database) throws SearchException { if (database.equals(Database.CORTEX)) return digestCortex(docid, vid); else if (database.equals(Database.METADATA)) return digestMetadata(docid); else if (database.equals(Database.ANNOTATIONS)) return digestAnnotation(docid); else return null; } /** * Search for a query string in the index * @param line the query as text * @param language the language name of the index e.g. "english" * @param start the hit-number to start from * @return a JSON document containing the docids and vids of matching docs * @throws SearchException */ public String search(String line, String language, int start) throws SearchException { try { if (searchers == null) initIndexSearchers(); Analyzer analyzer = new StandardAnalyzer(); parser = new QueryParser(JSONKeys.CONTENT, analyzer); Query query = parser.parse(line); StringBuilder sb = new StringBuilder(); IndexSearcher searcher = searchers.get(language); if (searcher != null) { TopDocs results = searcher.search(query, 5 * HITS_PER_PAGE); ScoreDoc[] hits = results.scoreDocs; MVDHit[] mvdHits = MVDHit.build(searcher, hits); int numTotalHits = mvdHits.length; int end = Math.min(numTotalHits, start + HITS_PER_PAGE); sb.append("[ "); for (int i = start; i < end; i++) { String digest = getDigest(mvdHits[i].docid, mvdHits[i].vids, mvdHits[i].database); if (sb.length() > 2) sb.append(",\n"); if (digest != null) sb.append(digest); } sb.append(" ]"); return sb.toString(); } else throw new Exception("Unknown language " + language); } catch (Exception e) { throw new SearchException(e); } } }