Example usage for org.apache.lucene.search IndexSearcher doc

List of usage examples for org.apache.lucene.search IndexSearcher doc

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher doc.

Prototype

public Document doc(int docID) throws IOException 

Source Link

Document

Sugar for .getIndexReader().document(docID)

Usage

From source file:ead.editor.model.ModelIndex.java

License:Open Source License

/**
 * Get a (sorted) list of nodes that match a query
 */// w ww  . jav  a2 s .  co m
public List<DependencyNode> searchAll(String queryText, Map<Integer, DependencyNode> nodesById) {

    ArrayList<DependencyNode> nodes = new ArrayList<DependencyNode>();
    try {
        IndexReader reader = IndexReader.open(searchIndex);
        Query query = getQueryAllParser().parse(queryText);
        IndexSearcher searcher = new IndexSearcher(reader);
        TopScoreDocCollector collector = TopScoreDocCollector.create(MAX_SEARCH_HITS, true);
        searcher.search(query, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;
        for (ScoreDoc hit : hits) {
            String nodeId = searcher.doc(hit.doc).get(editorIdFieldName);
            nodes.add(nodesById.get(Integer.parseInt(nodeId)));
        }
        searcher.close();
    } catch (Exception e) {
        logger.error("Error parsing or looking up query '{}' in index", queryText, e);
    }

    return nodes;
}

From source file:ead.editor.model.ModelIndex.java

License:Open Source License

/**
 * Get a (sorted) list of nodes that match a query
 *//*from w  w w  .j  a va 2  s .co m*/
public List<DependencyNode> search(String field, String queryText, Map<Integer, DependencyNode> nodesById) {

    ArrayList<DependencyNode> nodes = new ArrayList<DependencyNode>();
    try {
        IndexReader reader = IndexReader.open(searchIndex);
        Query query = new QueryParser(Version.LUCENE_35, field, searchAnalyzer).parse(queryText);
        IndexSearcher searcher = new IndexSearcher(reader);
        TopScoreDocCollector collector = TopScoreDocCollector.create(MAX_SEARCH_HITS, true);
        searcher.search(query, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;
        for (ScoreDoc hit : hits) {
            String nodeId = searcher.doc(hit.doc).get(editorIdFieldName);
            nodes.add(nodesById.get(Integer.parseInt(nodeId)));
        }
        searcher.close();
    } catch (Exception e) {
        logger.error("Error parsing or looking up query '{}' in index", queryText, e);
    }

    return nodes;
}

From source file:edu.albany.ir.example.SearchFiles.java

License:Apache License

/**
 * This demonstrates a typical paging search scenario, where the search
 * engine presents pages of size n to the user. The user can then go to the
 * next page if interested in the next hits.
 * //  ww  w.jav a 2s  .  c o  m
 * When the query is executed for the first time, then only enough results
 * are collected to fill 5 result pages. If the user wants to page beyond
 * this limit, then the query is executed another time and all hits are
 * collected.
 * 
 */
public static void doPagingSearch(BufferedReader in, String out_f, IndexSearcher searcher, Query query,
        int hitsPerPage, boolean raw, boolean interactive, String q_id) throws IOException {

    // Collect enough docs to show 5 pages
    raw = true; // only show raw
    TopDocs results = searcher.search(query, 5 * hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;
    BufferedWriter out = new BufferedWriter(new FileWriter(out_f, true));

    int numTotalHits = results.totalHits;
    System.out.println(numTotalHits + " total matching documents");

    int start = 0;
    int end = Math.min(numTotalHits, hitsPerPage);

    while (true) {
        /*
         * if (end > hits.length) { System.out.println("Only results 1 - " +
         * hits.length +" of " + numTotalHits +
         * " total matching documents collected.");
         * System.out.println("Collect more (y/n) ?"); String line =
         * in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') {
         * break; }
         * 
         * hits = searcher.search(query, numTotalHits).scoreDocs; }
         */

        end = Math.min(hits.length, start + hitsPerPage);

        for (int i = start; i < end; i++) {

            Document doc = searcher.doc(hits[i].doc);
            float score = hits[i].score;
            String path = doc.get("path");
            if (path != null) {
                path = path.substring(path.lastIndexOf("\\") + 1);
                out.write(q_id + " 0  " + path + "  " + i + " " + score + "  CSI550\n");
                System.out.println(q_id + " 0  " + path + "  " + i + " " + score + "  CSI550");
            } else {
                String url = doc.get("url");
                if (url != null) {
                    System.out.println(i + ". " + url);
                    System.out.println("   - " + doc.get("title"));
                } else {
                    System.out.println(i + ". " + "No path nor URL for this document");
                }
            }

            if (true)
                continue;
            if (raw) { // output raw format
                System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
                continue;
            }

            doc = searcher.doc(hits[i].doc);
            path = doc.get("path");
            if (path != null) {
                System.out.println((i + 1) + ". " + path);
                String title = doc.get("title");
                if (title != null) {
                    System.out.println("   Title: " + doc.get("title"));
                }
            } else {
                System.out.println((i + 1) + ". " + "No path for this document");
            }

        }
        out.close();

        if (!interactive || end == 0) {
            break;
        }

        if (numTotalHits >= end) {
            boolean quit = false;
            while (true) {
                System.out.print("Press ");
                if (start - hitsPerPage >= 0) {
                    System.out.print("(p)revious page, ");
                }
                if (start + hitsPerPage < numTotalHits) {
                    System.out.print("(n)ext page, ");
                }
                System.out.println("(q)uit or enter number to jump to a page.");

                String line = in.readLine();
                if (line.length() == 0 || line.charAt(0) == 'q') {
                    quit = true;
                    break;
                }
                if (line.charAt(0) == 'p') {
                    start = Math.max(0, start - hitsPerPage);
                    break;
                } else if (line.charAt(0) == 'n') {
                    if (start + hitsPerPage < numTotalHits) {
                        start += hitsPerPage;
                    }
                    break;
                } else {
                    int page = Integer.parseInt(line);
                    if ((page - 1) * hitsPerPage < numTotalHits) {
                        start = (page - 1) * hitsPerPage;
                        break;
                    } else {
                        System.out.println("No such page");
                    }
                }
            }
            if (quit)
                break;
            end = Math.min(numTotalHits, start + hitsPerPage);
        }
    }
}

From source file:edu.cmu.cs.in.search.HoopLuceneSearch.java

License:Apache License

/**
 * This demonstrates a typical paging search scenario, where the search engine presents 
 * pages of size n to the user. The user can then go to the next page if interested in
 * the next hits./*from   w ww  .ja  v a2 s  . c  o m*/
 * 
 * When the query is executed for the first time, then only enough results are collected
 * to fill 5 result pages. If the user wants to page beyond this limit, then the query
 * is executed another time and all hits are collected.
 * 
 */
public void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw,
        boolean interactive) throws IOException {
    // Collect enough docs to show 5 pages
    TopDocs results = searcher.search(query, 5 * hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;

    debug(numTotalHits + " total matching documents");

    int start = 0;
    int end = Math.min(numTotalHits, hitsPerPage);

    while (true) {
        if (end > hits.length) {
            debug("Only results 1 - " + hits.length + " of " + numTotalHits
                    + " total matching documents collected.");
            debug("Collect more (y/n) ?");
            String line = in.readLine();

            if (line.length() == 0 || line.charAt(0) == 'n') {
                break;
            }

            hits = searcher.search(query, numTotalHits).scoreDocs;
        }

        end = Math.min(hits.length, start + hitsPerPage);

        for (int i = start; i < end; i++) {
            if (raw) { // output raw format
                debug("doc=" + hits[i].doc + " score=" + hits[i].score);
                continue;
            }

            Document doc = searcher.doc(hits[i].doc);
            String path = doc.get("path");

            if (path != null) {
                debug((i + 1) + ". " + path);
                String title = doc.get("title");

                if (title != null) {
                    debug("   Title: " + doc.get("title"));
                }
            } else {
                debug((i + 1) + ". " + "No path for this document");
            }
        }

        if (!interactive || end == 0) {
            break;
        }

        if (numTotalHits >= end) {
            boolean quit = false;

            while (true) {
                System.out.print("Press ");

                if (start - hitsPerPage >= 0) {
                    System.out.print("(p)revious page, ");
                }

                if (start + hitsPerPage < numTotalHits) {
                    System.out.print("(n)ext page, ");
                }

                debug("(q)uit or enter number to jump to a page.");

                String line = in.readLine();

                if (line.length() == 0 || line.charAt(0) == 'q') {
                    quit = true;
                    break;
                }

                if (line.charAt(0) == 'p') {
                    start = Math.max(0, start - hitsPerPage);
                    break;
                } else if (line.charAt(0) == 'n') {
                    if (start + hitsPerPage < numTotalHits) {
                        start += hitsPerPage;
                    }

                    break;
                } else {
                    int page = Integer.parseInt(line);

                    if ((page - 1) * hitsPerPage < numTotalHits) {
                        start = (page - 1) * hitsPerPage;
                        break;
                    } else {
                        debug("No such page");
                    }
                }
            }

            if (quit)
                break;

            end = Math.min(numTotalHits, start + hitsPerPage);
        }
    }
}

From source file:edu.cmu.geolocator.resource.gazindexing.CollaborativeIndex.GazInfoIndexerAllCountries.java

License:Apache License

public static void main(String argv[]) throws Exception {

    if (argv.length < 1)
        throw new Exception("Command line argument number wrong");

    GazInfoIndexerAllCountries gi = new GazInfoIndexerAllCountries();

    //      argv[0]="-write";
    String mode = argv[0];// w  ww  .  j  a v a  2  s.c o m

    if (mode.equals("-write")) {

        String argv1 = GlobalParam.getGeoNames() + "/allCountries.txt";
        String argv2 = GlobalParam.getGazIndex() + "/InfoIndex";
        BufferedReader br = GetReader.getUTF8FileReader(argv1);
        IndexWriter iw = GetWriter.getIndexWriter(argv2, 1400);
        iw.deleteAll();
        gi.indexGazatteer(br, iw);
        iw.close();
        br.close();
    }
    if (mode.equals("-read")) {
        System.out.println("input id. Output basic information. For debugging.");
        // query first two fields.
        String argv1 = GlobalParam.getGazIndex() + "/GazIndex/";
        IndexSearcher is = GetReader.getIndexSearcher(argv1, "disk");
        BufferedReader r = new BufferedReader(new InputStreamReader(System.in, "utf-8"));
        String line;
        while ((line = r.readLine()) != null) {

            long id;
            try {
                id = Long.parseLong(line);
            } catch (Exception e) {
                System.err.println("number wrong.");
                continue;
            }

            Query q = NumericRangeQuery.newLongRange("ID", id, id, true, true);

            long start = System.currentTimeMillis();
            TopDocs docs = is.search(q, 1);
            if (docs == null) {
                System.err.println("Not found.");
                continue;
            }
            if (docs.scoreDocs.length == 0) {
                System.err.println("Not found.");
                continue;
            }
            ScoreDoc sd = docs.scoreDocs[0];
            Document d = is.doc(sd.doc);
            long end = System.currentTimeMillis();
            System.out.println(d);
            System.out.println(d.get("ID"));
            System.out.println(d.get("ORIGIN"));
            System.out.println(d.get("LONGTITUDE") + " " + d.get("LATITUDE"));
            System.out.println("lookup time: " + (end - start));
        }
    }
}

From source file:edu.cmu.geolocator.resource.gazindexing.CollaborativeIndex.GazStringIndexerAllCountries.java

License:Apache License

public static void main(String argv[]) throws Exception {

    if (argv.length < 1)
        throw new Exception("Command line argument number wrong");

    GazStringIndexerAllCountries gi = new GazStringIndexerAllCountries();

    //    argv[0] = "-read";
    String mode = argv[0];/*from  w  w w. java 2  s.  c o m*/

    if (mode.equals("-write")) {
        String argv1 = GlobalParam.getGeoNames() + "/allCountries.txt";
        String argv2 = GlobalParam.getGazIndex() + "/StringIndex/";

        BufferedReader br = GetReader.getUTF8FileReader(argv1);
        IndexWriter iw = GetWriter.getIndexWriter(argv2, 1600);
        iw.deleteAll();
        gi.indexGazatteer(br, iw);
        // iw.optimize();
        iw.close();
        br.close();
    }
    if (mode.equals("-read")) {
        System.out.println("input id. Output basic information. For debugging.");
        // query first two fields.
        String argv1 = GlobalParam.getGazIndex() + "/StringIndex";
        IndexSearcher is = GetReader.getIndexSearcher(argv1, "disk");
        BufferedReader r = new BufferedReader(new InputStreamReader(System.in, "utf-8"));
        String line;
        while ((line = r.readLine()) != null) {

            long id;
            try {
                id = Long.parseLong(line);
            } catch (Exception e) {
                System.err.println("number wrong.");
                continue;
            }

            Query q = NumericRangeQuery.newLongRange("ID", id, id, true, true);

            long start = System.currentTimeMillis();
            TopDocs docs = is.search(q, 1);
            if (docs == null) {
                System.err.println("Not found.");
                continue;
            }
            if (docs.scoreDocs.length == 0) {
                System.err.println("Not found.");
                continue;
            }
            ScoreDoc sd = docs.scoreDocs[0];
            Document d = is.doc(sd.doc);
            long end = System.currentTimeMillis();
            System.out.println(d);
            System.out.println(d.get("ID"));
            System.out.println(d.get("LOWERED_ORIGIN"));
        }
    }
}

From source file:edu.cmu.geolocator.resource.gazindexing.CollaborativeIndex.GazStringIndexerAltNames.java

License:Apache License

public static void main(String argv[]) throws Exception {

    if (argv.length < 1)
        throw new Exception("Command line argument number wrong");

    GazStringIndexerAltNames stringIndex = new GazStringIndexerAltNames();

    //    argv[0] = "-read";

    String mode = argv[0];//from  ww w.j  a v a2  s  . com
    if (mode.equals("-write")) {
        String argv1 = GlobalParam.getGeoNames() + "/alternateNames.txt";
        String argv2 = GlobalParam.getGazIndex() + "/StringIndex/";
        BufferedReader br = GetReader.getUTF8FileReader(argv1);

        //add the list that adds the items into the gazetteer.
        String argv3 = "res/geonames/addition.txt";
        BufferedReader br2 = GetReader.getUTF8FileReader(argv3);

        IndexWriter iw = GetWriter.getIndexWriter(argv2, 1024 + 512);

        stringIndex.indexAlterNames(br, iw);

        stringIndex.indexAlterNames(br2, iw);

        // iw.optimize();
        iw.close();
        br.close();
    }
    if (mode.equals("-read")) {
        System.out.println("input id. Output basic information. For debugging.");
        // query first two fields.
        String argv1 = GlobalParam.getGazIndex() + "/StringIndex";
        IndexSearcher is = GetReader.getIndexSearcher(argv1, "disk");
        BufferedReader r = new BufferedReader(new InputStreamReader(System.in, "utf-8"));
        String line;
        while ((line = r.readLine()) != null) {

            Query q = new TermQuery(new Term("LOWERED-NO-WS", line));
            TermFilter filter = new TermFilter(new Term("LOWERED-NO-WS", line));
            long start = System.currentTimeMillis();
            //        TopDocs docs = is.search(q, filter, 100);
            TopDocs docs = is.search(q, 100);
            long end = System.currentTimeMillis();

            if (docs == null) {
                System.err.println("Not found.");
                continue;
            }
            if (docs.scoreDocs.length == 0) {
                System.err.println("Not found.");
                continue;
            }
            for (ScoreDoc sd : docs.scoreDocs) {
                Document d = is.doc(sd.doc);
                System.out.println(d);
                System.out.println(d.get("ID"));
                System.out.println(d.get("LOWERED_ORIGIN"));
                System.out.println(d.get("LANG"));
            }
            System.out.println(docs.totalHits);
            System.out.println("lookup time: " + (end - start));

        }
    }
}

From source file:edu.cmu.geoparser.resource.gazindexing.CollaborativeIndex.GazInfoIndexerAllCountries.java

License:Apache License

public static void main(String argv[]) throws Exception {

    GazInfoIndexerAllCountries gi = new GazInfoIndexerAllCountries();

    argv[0] = "-write";
    String mode = argv[0];/*from   ww w  .j  av  a 2 s. co  m*/

    if (mode.equals("-write")) {
        if (argv.length != 3)
            throw new Exception("Command line argument number wrong");
        argv[1] = "GeoNames/allCountries.txt";
        argv[2] = "GazIndex/InfoIndex";
        BufferedReader br = GetReader.getUTF8FileReader(argv[1]);
        IndexWriter iw = GetWriter.getIndexWriter(argv[2], 1400);
        iw.deleteAll();
        gi.indexGazatteer(br, iw);
        iw.close();
        br.close();
    }
    if (mode.equals("-read")) {
        System.out.println("input id. Output basic information. For debugging.");
        // query first two fields.
        argv[1] = "/Users/Indri/Eclipse_workspace/GazIndex/";
        IndexSearcher is = GetReader.getIndexSearcher(argv[1], "disk");
        BufferedReader r = new BufferedReader(new InputStreamReader(System.in, "utf-8"));
        String line;
        while ((line = r.readLine()) != null) {

            long id;
            try {
                id = Long.parseLong(line);
            } catch (Exception e) {
                System.err.println("number wrong.");
                continue;
            }

            Query q = NumericRangeQuery.newLongRange("ID", id, id, true, true);

            long start = System.currentTimeMillis();
            TopDocs docs = is.search(q, 1);
            if (docs == null) {
                System.err.println("Not found.");
                continue;
            }
            if (docs.scoreDocs.length == 0) {
                System.err.println("Not found.");
                continue;
            }
            ScoreDoc sd = docs.scoreDocs[0];
            Document d = is.doc(sd.doc);
            long end = System.currentTimeMillis();
            System.out.println(d);
            System.out.println(d.get("ID"));
            System.out.println(d.get("ORIGIN"));
            System.out.println(d.get("LONGTITUDE") + " " + d.get("LATITUDE"));
            System.out.println("lookup time: " + (end - start));
        }
    }
}

From source file:edu.cmu.geoparser.resource.gazindexing.CollaborativeIndex.GazStringIndexerAllCountries.java

License:Apache License

public static void main(String argv[]) throws Exception {

    GazStringIndexerAllCountries gi = new GazStringIndexerAllCountries();

    argv[0] = "-read";
    String mode = argv[0];//ww  w . j  a  va2  s  .c o  m

    if (mode.equals("-write")) {
        if (argv.length != 3)
            throw new Exception("Command line argument number wrong");
        argv[1] = "GeoNames/allCountries.txt";
        argv[2] = "GazIndex/StringIndex/";

        BufferedReader br = GetReader.getUTF8FileReader(argv[1]);
        IndexWriter iw = GetWriter.getIndexWriter(argv[2], 1600);
        iw.deleteAll();
        gi.indexGazatteer(br, iw);
        // iw.optimize();
        iw.close();
        br.close();
    }
    if (mode.equals("-read")) {
        System.out.println("input id. Output basic information. For debugging.");
        // query first two fields.
        argv[1] = "GazIndex/StringIndex";
        IndexSearcher is = GetReader.getIndexSearcher(argv[1], "disk");
        BufferedReader r = new BufferedReader(new InputStreamReader(System.in, "utf-8"));
        String line;
        while ((line = r.readLine()) != null) {

            long id;
            try {
                id = Long.parseLong(line);
            } catch (Exception e) {
                System.err.println("number wrong.");
                continue;
            }

            Query q = NumericRangeQuery.newLongRange("ID", id, id, true, true);

            long start = System.currentTimeMillis();
            TopDocs docs = is.search(q, 1);
            if (docs == null) {
                System.err.println("Not found.");
                continue;
            }
            if (docs.scoreDocs.length == 0) {
                System.err.println("Not found.");
                continue;
            }
            ScoreDoc sd = docs.scoreDocs[0];
            Document d = is.doc(sd.doc);
            long end = System.currentTimeMillis();
            System.out.println(d);
            System.out.println(d.get("ID"));
            System.out.println(d.get("LOWERED_ORIGIN"));
        }
    }
}

From source file:edu.cmu.geoparser.resource.gazindexing.CollaborativeIndex.GazStringIndexerAltNames.java

License:Apache License

public static void main(String argv[]) throws Exception {

    GazStringIndexerAltNames stringIndex = new GazStringIndexerAltNames();

    argv[0] = "-read";

    String mode = argv[0];/*from   w  w w  . j a  v a  2 s  .co  m*/
    if (mode.equals("-write")) {
        if (argv.length != 3)
            throw new Exception("Command line argument number wrong");
        argv[1] = "GeoNames/alternateNames.txt";
        argv[2] = "GazIndex/StringIndex/";
        BufferedReader br = GetReader.getUTF8FileReader(argv[1]);
        IndexWriter iw = GetWriter.getIndexWriter(argv[2], 1024 + 512);
        stringIndex.indexAlterNames(br, iw);
        // iw.optimize();
        iw.close();
        br.close();
    }
    if (mode.equals("-read")) {
        System.out.println("input id. Output basic information. For debugging.");
        // query first two fields.
        argv[1] = "GazIndex/StringIndex";
        IndexSearcher is = GetReader.getIndexSearcher(argv[1], "disk");
        BufferedReader r = new BufferedReader(new InputStreamReader(System.in, "utf-8"));
        String line;
        while ((line = r.readLine()) != null) {

            Query q = new TermQuery(new Term("LOWERED-NO-WS", line));
            TermFilter filter = new TermFilter(new Term("LOWERED-NO-WS", line));
            long start = System.currentTimeMillis();
            //        TopDocs docs = is.search(q, filter, 100);
            TopDocs docs = is.search(q, 100);
            long end = System.currentTimeMillis();

            if (docs == null) {
                System.err.println("Not found.");
                continue;
            }
            if (docs.scoreDocs.length == 0) {
                System.err.println("Not found.");
                continue;
            }
            for (ScoreDoc sd : docs.scoreDocs) {
                Document d = is.doc(sd.doc);
                System.out.println(d);
                System.out.println(d.get("ID"));
                System.out.println(d.get("LOWERED_ORIGIN"));
                System.out.println(d.get("LANG"));
            }
            System.out.println(docs.totalHits);
            System.out.println("lookup time: " + (end - start));

        }
    }
}