Example usage for org.apache.lucene.search IndexSearcher explain

List of usage examples for org.apache.lucene.search IndexSearcher explain

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher explain.

Prototype

protected Explanation explain(Weight weight, int doc) throws IOException 

Source Link

Document

Expert: low-level implementation method Returns an Explanation that describes how doc scored against weight.

Usage

From source file:org.getopt.luke.Luke.java

License:Apache License

/**
 * Pop up a modal dialog explaining the selected result.
 * @param sTable Thinlet table widget containing selected search result.
 */// w ww. j  a  v a 2 s.c o  m
public void explainResult(Object sTable) {
    Object row = getSelectedItem(sTable);
    if (row == null)
        return;
    final Integer docid = (Integer) getProperty(row, "docid");
    if (docid == null)
        return;
    if (ir == null) {
        showStatus(MSG_NOINDEX);
        return;
    }
    final Query q = (Query) getProperty(sTable, "query");
    if (q == null)
        return;
    Thread t = new Thread() {
        public void run() {
            try {
                IndexSearcher is = new IndexSearcher(ir);
                Similarity sim = createSimilarity(find("srchOptTabs"));
                is.setSimilarity(sim);
                Explanation expl = is.explain(q, docid.intValue());
                Object dialog = addComponent(null, "/xml/explain.xml", null, null);
                Object eTree = find(dialog, "eTree");
                addNode(eTree, expl);
                //setBoolean(eTree, "expand", true);
                add(dialog);
            } catch (Exception e) {
                e.printStackTrace();
                errorMsg(e.getMessage());
            }
        }
    };
    if (slowAccess) {
        t.start();
    } else {
        t.run();
    }
}

From source file:org.modmine.web.ModMineSearch.java

License:GNU General Public License

/**
 * perform a keyword search over all document metadata fields with lucene
 * @param searchString//from  ww  w  . j a  v  a 2 s  .com
 *            string to search for
 * @return map of document IDs with their respective scores
 */
public static Map<Integer, Float> runLuceneSearch(String searchString) {
    LinkedHashMap<Integer, Float> matches = new LinkedHashMap<Integer, Float>();
    String queryString = parseQueryString(prepareQueryString(searchString));

    long time = System.currentTimeMillis();

    try {
        IndexSearcher searcher = new IndexSearcher(ram);
        Analyzer analyzer = new WhitespaceAnalyzer();
        org.apache.lucene.search.Query query;

        // pass entire list of field names to the multi-field parser
        // => search through all fields
        String[] fieldNamesArray = new String[fieldNames.size()];
        fieldNames.toArray(fieldNamesArray);
        QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_30, fieldNamesArray, analyzer,
                fieldBoosts);
        query = queryParser.parse(queryString);

        // required to expand search terms
        query = query.rewrite(IndexReader.open(ram));
        LOG.debug("Actual query: " + query);

        TopDocs topDocs = searcher.search(query, 500);

        time = System.currentTimeMillis() - time;
        LOG.info("Found " + topDocs.totalHits + " document(s) that matched query '" + queryString + "' in "
                + time + " milliseconds:");

        for (int i = 0; (i < MAX_HITS && i < topDocs.totalHits); i++) {
            Document doc = searcher.doc(topDocs.scoreDocs[i].doc);
            String name = doc.get("name");

            // show how score was calculated
            if (i < 2) {
                LOG.debug("Score for " + name + ": " + searcher.explain(query, topDocs.scoreDocs[i].doc));
            }

            matches.put(Integer.parseInt(name), new Float(topDocs.scoreDocs[i].score));
        }
    } catch (ParseException e) {
        // just return an empty list
        LOG.info("Exception caught, returning no results", e);
    } catch (IOException e) {
        // just return an empty list
        LOG.info("Exception caught, returning no results", e);
    }
    return matches;
}

From source file:org.mskcc.pathdb.lucene.LuceneResults.java

License:Open Source License

public LuceneResults(Pager pager, Query query, IndexSearcher indexSearcher, Hits hits, String term,
        GlobalFilterSettings globalFilterSettings, boolean debug)
        throws IOException, ParseException, DaoException {
    numHits = hits.length();/* www.  j  a v  a2  s  .  c  o m*/
    int size = pager.getEndIndex() - pager.getStartIndex();

    // init private variables
    cpathIds = new long[size];
    fragments = new ArrayList<List<String>>();
    numDescendentsList = new ArrayList<Integer>();
    numParentsList = new ArrayList<Integer>();
    numParentPathwaysList = new ArrayList<Integer>();
    numParentInteractionsList = new ArrayList<Integer>();
    dataSourceMap = new HashMap<Long, Set<String>>();
    scores = new HashMap<Long, Float>();
    globalDataSources = new HashSet<String>();

    if (globalFilterSettings != null) {
        DaoExternalDbSnapshot daoSnapShot = new DaoExternalDbSnapshot();
        for (Long snapshotId : globalFilterSettings.getSnapshotIdSet()) {
            ExternalDatabaseSnapshotRecord snapShotRecord = daoSnapShot.getDatabaseSnapshot(snapshotId);
            if (snapShotRecord == null)
                continue;
            ExternalDatabaseRecord externalDatabaseRecord = snapShotRecord.getExternalDatabase();
            if (externalDatabaseRecord == null)
                continue;
            globalDataSources.add(externalDatabaseRecord.getMasterTerm());
        }
    }

    DaoExternalDb dao = new DaoExternalDb();
    int index = 0;
    Highlighter highLighter = null;

    if (term != null) {
        //term = reformatTerm(term);
        highLighter = createHighlighter(term);
    }

    for (int i = pager.getStartIndex(); i < pager.getEndIndex(); i++) {
        Document doc = hits.doc(i);

        Field field = doc.getField(LuceneConfig.FIELD_CPATH_ID);
        if (field != null) {
            cpathIds[index++] = Long.parseLong(field.stringValue());
            scores.put(Long.parseLong(field.stringValue()), new Float(hits.score(i)));

            if (debug) {
                explanationMap.put(Long.parseLong(field.stringValue()),
                        indexSearcher.explain(query, hits.id(i)));
            }
        }

        if (highLighter != null) {
            extractFragment(doc, highLighter, term);
        }

        extractNumFamilyTree(doc, LuceneConfig.FIELD_NUM_DESCENDENTS, numDescendentsList);
        extractNumFamilyTree(doc, LuceneConfig.FIELD_NUM_PARENTS, numParentsList);
        extractNumFamilyTree(doc, LuceneConfig.FIELD_NUM_PARENT_PATHWAYS, numParentPathwaysList);
        extractNumFamilyTree(doc, LuceneConfig.FIELD_NUM_PARENT_INTERACTIONS, numParentInteractionsList);
        extractDataSourceMap(doc, dao);
    }
}

From source file:org.ohdsi.usagi.tests.TestLucene.java

License:Apache License

public static void main(String[] args) throws IOException, ParseException {
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
    //Analyzer analyzer = new UsagiAnalyzer();
    FieldType textVectorField = new FieldType();
    textVectorField.setIndexed(true);//ww w.  j  a  va 2 s  . co  m
    textVectorField.setTokenized(true);
    textVectorField.setStoreTermVectors(true);
    textVectorField.setStoreTermVectorPositions(false);
    textVectorField.setStoreTermVectorPayloads(false);
    textVectorField.setStoreTermVectorOffsets(false);
    textVectorField.setStored(true);
    textVectorField.freeze();

    File indexFolder = new File(folder);
    if (indexFolder.exists())
        DirectoryUtilities.deleteDir(indexFolder);

    Directory dir = FSDirectory.open(indexFolder);

    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
    iwc.setOpenMode(OpenMode.CREATE);
    iwc.setRAMBufferSizeMB(256.0);
    IndexWriter writer = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new Field("F", "word1 word2 w3 word4", textVectorField));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new Field("F", "word1 word2 w3", textVectorField));
    writer.addDocument(doc);

    writer.close();

    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(folder)));
    for (int i = 0; i < reader.numDocs(); i++) {
        TermsEnum termsEnum = reader.getTermVector(i, "F").iterator(null);
        BytesRef text;
        while ((text = termsEnum.next()) != null) {
            System.out.print(text.utf8ToString() + ",");
        }
        System.out.println();
    }
    IndexSearcher searcher = new IndexSearcher(reader);

    // MoreLikeThis mlt = new MoreLikeThis(searcher.getIndexReader());
    // mlt.setMinTermFreq(0);
    // mlt.setMinDocFreq(0);
    // mlt.setMaxDocFreq(9999);
    // mlt.setMinWordLen(0);
    // mlt.setMaxWordLen(9999);
    // mlt.setMaxDocFreqPct(100);
    // mlt.setMaxNumTokensParsed(9999);
    // mlt.setMaxQueryTerms(9999);
    // mlt.setStopWords(null);
    // mlt.setFieldNames(new String[] { "F" });
    // mlt.setAnalyzer(new UsagiAnalyzer());
    // Query query = mlt.like("F", new StringReader("Systolic blood pressure"));
    QueryParser parser = new QueryParser(Version.LUCENE_4_9, "F", analyzer);
    Query query = parser.parse("word1");

    Explanation explanation = searcher.explain(query, 0);
    print(explanation);
    System.out.println();
    explanation = searcher.explain(query, 1);
    print(explanation);
    System.out.println();

    TopDocs topDocs = searcher.search(query, 99);
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
        System.out.println(scoreDoc.score + "\t" + reader.document(scoreDoc.doc).get("F"));
    }
}

From source file:org.talend.dataquality.standardization.index.Explainer.java

License:Open Source License

public static void main(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: Explainer <index dir> <query>");
        System.exit(1);/*w ww .  j a v a 2s.com*/
    }
    String indexDir = args[0];
    String queryExpression = args[1];
    Directory directory = FSDirectory.open(new File(indexDir));
    QueryParser parser = new QueryParser(Version.LUCENE_30, "contents", new SimpleAnalyzer());
    Query query = parser.parse(queryExpression);
    System.out.println("Query: " + queryExpression);
    IndexSearcher searcher = new IndexSearcher(directory);
    TopDocs topDocs = searcher.search(query, 10);
    for (ScoreDoc match : topDocs.scoreDocs) {
        Explanation explanation = searcher.explain(query, match.doc);
        System.out.println("----------");
        Document doc = searcher.doc(match.doc);
        System.out.println(doc.get("title"));
        System.out.println(explanation.toString());
    }
    searcher.close();
    directory.close();
}

From source file:practica1_2.SearchFiles.java

License:Apache License

/**
 * This demonstrates a typical paging search scenario, where the search engine presents 
 * pages of size n to the user. The user can then go to the next page if interested in
 * the next hits./*from  w  w  w.  j a v  a  2s  . c o m*/
 * 
 * When the query is executed for the first time, then only enough results are collected
 * to fill 5 result pages. If the user wants to page beyond this limit, then the query
 * is executed another time and all hits are collected.
 * 
 */
public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage,
        boolean raw, boolean interactive) throws IOException {

    // Collect enough docs to show 5 pages
    TopDocs results = searcher.search(query, 5 * hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    System.out.println(numTotalHits + " total matching documents");

    int start = 0;
    int end = Math.min(numTotalHits, hitsPerPage);

    while (true) {

        if (end > hits.length) {
            System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits
                    + " total matching documents collected.");
            System.out.println("Collect more (y/n) ?");
            String line = in.readLine();
            if (line.length() == 0 || line.charAt(0) == 'n') {
                break;
            }

            hits = searcher.search(query, numTotalHits).scoreDocs;
        }

        end = Math.min(hits.length, start + hitsPerPage);

        for (int i = start; i < end; i++) {
            System.out.println(searcher.explain(query, hits[i].doc));

            if (raw) { // output raw format
                System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
                continue;
            }

            Document doc = searcher.doc(hits[i].doc);
            String path = doc.get("path");
            if (path != null) {
                System.out.println((i + 1) + ". " + path);
            } else {
                System.out.println((i + 1) + ". " + "No path for this document");
            }

        }

        if (!interactive || end == 0) {
            break;
        }

        if (numTotalHits >= end) {
            boolean quit = false;
            while (true) {
                System.out.print("Press ");
                if (start - hitsPerPage >= 0) {
                    System.out.print("(p)revious page, ");
                }
                if (start + hitsPerPage < numTotalHits) {
                    System.out.print("(n)ext page, ");
                }
                System.out.println("(q)uit or enter number to jump to a page.");

                String line = in.readLine();
                if (line.length() == 0 || line.charAt(0) == 'q') {
                    quit = true;
                    break;
                }
                if (line.charAt(0) == 'p') {
                    start = Math.max(0, start - hitsPerPage);
                    break;
                } else if (line.charAt(0) == 'n') {
                    if (start + hitsPerPage < numTotalHits) {
                        start += hitsPerPage;
                    }
                    break;
                } else {
                    int page = Integer.parseInt(line);
                    if ((page - 1) * hitsPerPage < numTotalHits) {
                        start = (page - 1) * hitsPerPage;
                        break;
                    } else {
                        System.out.println("No such page");
                    }
                }
            }
            if (quit)
                break;
            end = Math.min(numTotalHits, start + hitsPerPage);
        }
    }
}

From source file:practica1_2.SearchFiles_P2.java

License:Apache License

/**
 * This demonstrates a typical paging search scenario, where the search engine presents 
 * pages of size n to the user. The user can then go to the next page if interested in
 * the next hits.//from   w  w  w  .  j ava 2 s .co m
 * 
 * When the query is executed for the first time, then only enough results are collected
 * to fill 5 result pages. If the user wants to page beyond this limit, then the query
 * is executed another time and all hits are collected.
 * 
 */
public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage,
        boolean raw, boolean interactive) throws IOException {
    // Collect enough docs to show 5 pages
    TopDocs results = searcher.search(query, 5 * hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    System.out.println(numTotalHits + " total matching documents");

    int start = 0;
    int end = Math.min(numTotalHits, hitsPerPage);

    while (true) {

        if (end > hits.length) {
            System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits
                    + " total matching documents collected.");
            System.out.println("Collect more (y/n) ?");
            String line = in.readLine();
            if (line.length() == 0 || line.charAt(0) == 'n') {
                break;
            }

            hits = searcher.search(query, numTotalHits).scoreDocs;
        }

        end = Math.min(hits.length, start + hitsPerPage);

        for (int i = start; i < end; i++) {
            System.out.println(searcher.explain(query, hits[i].doc));

            if (raw) { // output raw format
                System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
                continue;
            }

            Document doc = searcher.doc(hits[i].doc);
            String path = doc.get("path");
            if (path != null) {
                System.out.println((i + 1) + ". " + path);
            } else {
                System.out.println((i + 1) + ". " + "No path for this document");
            }

        }

        if (!interactive || end == 0) {
            break;
        }

        if (numTotalHits >= end) {
            boolean quit = false;
            while (true) {
                System.out.print("Press ");
                if (start - hitsPerPage >= 0) {
                    System.out.print("(p)revious page, ");
                }
                if (start + hitsPerPage < numTotalHits) {
                    System.out.print("(n)ext page, ");
                }
                System.out.println("(q)uit or enter number to jump to a page.");

                String line = in.readLine();
                if (line.length() == 0 || line.charAt(0) == 'q') {
                    quit = true;
                    break;
                }
                if (line.charAt(0) == 'p') {
                    start = Math.max(0, start - hitsPerPage);
                    break;
                } else if (line.charAt(0) == 'n') {
                    if (start + hitsPerPage < numTotalHits) {
                        start += hitsPerPage;
                    }
                    break;
                } else {
                    int page = Integer.parseInt(line);
                    if ((page - 1) * hitsPerPage < numTotalHits) {
                        start = (page - 1) * hitsPerPage;
                        break;
                    } else {
                        System.out.println("No such page");
                    }
                }
            }
            if (quit)
                break;
            end = Math.min(numTotalHits, start + hitsPerPage);
        }
    }
}

From source file:sisTradicional.SearchFiles.java

License:Apache License

/**
   * This demonstrates a typical paging search scenario, where the search engine presents 
   * pages of size n to the user. The user can then go to the next page if interested in
   * the next hits./*from w w  w .ja v a 2s .  co m*/
   * 
   * When the query is executed for the first time, then only enough results are collected
   * to fill 5 result pages. If the user wants to page beyond this limit, then the query
   * is executed another time and all hits are collected.
   * 
   */
public static void doPagingSearch(IndexSearcher searcher, Query query, String id, PrintWriter writer)
        throws IOException {

    // Collect enough docs to show 5 pages
    TopDocs results = searcher.search(query, 9999);
    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    System.out.println(numTotalHits + " total matching documents");

    int start = 0;

    for (int i = start; i < 50; i++) {

        Document doc = searcher.doc(hits[i].doc);
        Path path = Paths.get(doc.get("path"));
        //System.out.printf("%s\t%s\n",id, path.getFileName());
        writer.printf("%s\t%s\n", id, path.getFileName());
        String modified = doc.get("modified");
        if (path != null) {
            System.out.println((i + 1) + ". " + path);
            System.out.println("  modified: " + new Date(Long.parseLong(modified)).toString());
            System.out.println(searcher.explain(query, hits[i].doc));
        } else {
            System.out.println((i + 1) + ". " + "No path for this document");
        }

    }
}