Example usage for org.apache.lucene.search IndexSearcher doc

List of usage examples for org.apache.lucene.search IndexSearcher doc

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher doc.

Prototype

public Document doc(int docID) throws IOException 

Source Link

Document

Sugar for .getIndexReader().document(docID)

Usage

From source file:buscador.SearchFiles.java

License:Apache License

/**
 * This demonstrates a typical paging search scenario, where the search engine presents 
 * pages of size n to the user. The user can then go to the next page if interested in
 * the next hits./*from ww  w.ja  v  a2  s . c  om*/
 * 
 * When the query is executed for the first time, then only enough results are collected
 * to fill 5 result pages. If the user wants to page beyond this limit, then the query
 * is executed another time and all hits are collected.
 * 
 */
public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage,
        boolean raw, boolean interactive) throws IOException {

    // Collect enough docs to show 5 pages
    TopDocs results = searcher.search(query, 5 * hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    System.out.println(numTotalHits + " total matching documents");

    int start = 0;
    int end = Math.min(numTotalHits, hitsPerPage);

    while (true) {
        if (end > hits.length) {
            System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits
                    + " total matching documents collected.");
            System.out.println("Collect more (y/n) ?");
            String line = in.readLine();
            if (line.length() == 0 || line.charAt(0) == 'n') {
                break;
            }

            hits = searcher.search(query, numTotalHits).scoreDocs;
        }

        end = Math.min(hits.length, start + hitsPerPage);

        for (int i = start; i < end; i++) {
            if (raw) { // output raw format
                System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
                continue;
            }

            Document doc = searcher.doc(hits[i].doc);
            String path = doc.get("path");
            if (path != null) {
                System.out.println((i + 1) + ". " + path);
            } else {
                System.out.println((i + 1) + ". " + "No path for this document");
            }
            String modified = doc.get("modified");
            if (modified != null) {
                System.out.println(modified + ": " + new Date(new Long(modified)));
            } else {
                System.out.println((i + 1) + ". " + "No last modified for this document");
            }

        }

        if (!interactive || end == 0) {
            break;
        }

        if (numTotalHits >= end) {
            boolean quit = false;
            while (true) {
                System.out.print("Press ");
                if (start - hitsPerPage >= 0) {
                    System.out.print("(p)revious page, ");
                }
                if (start + hitsPerPage < numTotalHits) {
                    System.out.print("(n)ext page, ");
                }
                System.out.println("(q)uit or enter number to jump to a page.");

                String line = in.readLine();
                if (line.length() == 0 || line.charAt(0) == 'q') {
                    quit = true;
                    break;
                }
                if (line.charAt(0) == 'p') {
                    start = Math.max(0, start - hitsPerPage);
                    break;
                } else if (line.charAt(0) == 'n') {
                    if (start + hitsPerPage < numTotalHits) {
                        start += hitsPerPage;
                    }
                    break;
                } else {
                    int page = Integer.parseInt(line);
                    if ((page - 1) * hitsPerPage < numTotalHits) {
                        start = (page - 1) * hitsPerPage;
                        break;
                    } else {
                        System.out.println("No such page");
                    }
                }
            }
            if (quit)
                break;
            end = Math.min(numTotalHits, start + hitsPerPage);
        }
    }
}

From source file:bzh.terrevirtuelle.navisu.gazetteer.impl.lucene.GeoNameResolver.java

License:Apache License

private void getMatchingCandidates(IndexSearcher searcher, HashMap<String, List<Location>> allCandidates,
        String name, ScoreDoc[] hits) {
    List<Location> topHits = new ArrayList<Location>();

    for (int i = 0; i < hits.length; ++i) {
        Location tmpLocObj = new Location();

        int docId = hits[i].doc;
        Document d;/*from w  w w.  ja v a  2s  .c  o  m*/
        try {
            d = searcher.doc(docId);
            tmpLocObj.setName(d.get(FIELD_NAME_NAME));
            tmpLocObj.setLongitude(d.get(FIELD_NAME_LONGITUDE));
            tmpLocObj.setLatitude(d.get(FIELD_NAME_LATITUDE));
            //If alternate names are empty put name as actual name
            //This covers missing data and equals weight for later computation
            if (d.get(FIELD_NAME_ALTERNATE_NAMES).isEmpty()) {
                tmpLocObj.setAlternateNames(d.get(FIELD_NAME_NAME));
            } else {
                tmpLocObj.setAlternateNames(d.get(FIELD_NAME_ALTERNATE_NAMES));
            }
            tmpLocObj.setCountryCode(d.get(FIELD_NAME_COUNTRY_CODE));
            tmpLocObj.setAdmin1Code(d.get(FIELD_NAME_ADMIN1_CODE));
            tmpLocObj.setAdmin2Code(d.get(FIELD_NAME_ADMIN2_CODE));
            tmpLocObj.setFeatureCode(d.get(FIELD_NAME_FEATURE_CODE));

        } catch (IOException e) {
            e.printStackTrace();
        }
        topHits.add(tmpLocObj);
    }
    //Picking hitsPerPage number of locations from feature code sorted list 
    allCandidates.put(name, pickTopSortedByCode(topHits, hitsPerPage));
}

From source file:ca.mcgill.cs.creco.logic.search.CategorySearch.java

License:Apache License

@Override
public List<Category> queryCategories(String pQueryString) {
    List<Category> searchResult = new ArrayList<Category>();
    try {/*from  w ww.j a va  2s.  c o  m*/
        DirectoryReader reader = DirectoryReader.open(aDirectory);
        IndexSearcher searcher = new IndexSearcher(reader);
        TopScoreDocCollector results = TopScoreDocCollector.create(MAX_NUM_RESULTS, true);

        // Search category names
        Query categoryNameQuery = new QueryParser(VERSION, CATEGORY_NAME, aAnalyzer).parse(pQueryString);
        searcher.search(categoryNameQuery, results);

        // Search flattened text (only product names for now)
        Query flattenedTextQuery = new QueryParser(VERSION, FLATTENED_TEXT, aAnalyzer).parse(pQueryString);
        searcher.search(flattenedTextQuery, results);

        for (ScoreDoc scoredResult : results.topDocs().scoreDocs) {
            Document doc = searcher.doc(scoredResult.doc);
            Category resultCategory = aDataStore.getCategory(doc.get(CATEGORY_ID));

            if (!searchResult.contains(resultCategory) && resultCategory.getNumberOfProducts() > 0) {
                searchResult.add(resultCategory);
            }
        }
    } catch (IOException e) {
        LOG.error(e.getMessage());
    } catch (ParseException e) {
        LOG.error(e.getMessage());
    }
    return searchResult;
}

From source file:ca.pgon.freenetknowledge.search.impl.LuceneSearchEngine.java

License:Apache License

@Override
public List<SearchResultEntry> searchTerm(String term) {
    IndexSearcher indexSearcher = null;
    try {/*w  w  w .  j  a v a 2  s  . c  o  m*/
        // Init the needed components
        IndexReader indexReader = IndexReader.open(directory);
        indexSearcher = new IndexSearcher(indexReader);
        QueryParser queryParser = new QueryParser(LUCENE_VERSION, INDEX_CONTENT, analyzer);

        // Create the query
        Query query;
        query = queryParser.parse(term);

        // Get the search result
        TopDocs topDocs = indexSearcher.search(query, LUCENE_MAX_HITS);
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;

        // Change them to urls
        Map<String, SearchResultEntry> alreadyIn = new HashMap<>();
        List<SearchResultEntry> results = new ArrayList<>();
        for (ScoreDoc sd : scoreDocs) {
            // Get the url
            Document document = indexSearcher.doc(sd.doc);
            String urlId = document.get(INDEX_FOR_URL);

            SearchResultEntry sre;
            if (alreadyIn.containsKey(urlId)) {
                sre = alreadyIn.get(urlId);
            } else {
                sre = new SearchResultEntry();
                UrlEntity ue = urlDAO.get(Integer.valueOf(urlId));
                sre.urlEntity = ue;

                if (ue == null) {
                    continue;
                }
                if (ue.isError()) {
                    continue;
                }

                alreadyIn.put(urlId, sre);
                results.add(sre);
            }

            // Add the description
            String fullDescription = document.get(INDEX_CONTENT);
            String partialDescription = SearchTools.getPartAround(fullDescription,
                    SearchTools.findWordPosition(fullDescription, term), LUCENE_MAX_DESCRIPTION_CARACTERS);
            partialDescription = partialDescription.replace('\n', ' ');
            partialDescription = partialDescription.replace('\r', ' ');
            if (!sre.description.contains(partialDescription)) {
                sre.description.add(partialDescription);
            }
        }

        return results;
    } catch (ParseException e) {
        logger.log(Level.SEVERE, "Error while parsing the search term", e);
    } catch (IOException e) {
        logger.log(Level.SEVERE, "Error while searching", e);
    } finally {
        if (indexSearcher != null) {
            try {
                indexSearcher.close();
            } catch (IOException e) {
            }
        }
    }

    return null;
}

From source file:calliope.search.AeseSearch.java

License:Open Source License

/**
 * Search the index for the given expression
 * @param expr the expression to be parsed
 * @param langCode the language of the expression and index
 * @param profile the hit profile (where to start from etc)
 * @return the result docs/*from  www . j a  va 2  s .c om*/
 */
public static String searchIndex(String expr, String langCode, HitProfile profile) {
    StringBuilder sb = new StringBuilder();
    try {
        Analyzer analyzer = AeseSearch.createAnalyzer(langCode);
        DirectoryReader reader = DirectoryReader.open(AeseSearch.index);
        if (reader != null) {
            IndexSearcher searcher = new IndexSearcher(reader);
            QueryParser qp = new QueryParser(Version.LUCENE_45, "text", analyzer);
            Query q = qp.parse(expr);
            TopDocs hits = searcher.search(q, AeseSearch.maxHits);
            ScoreDoc[] docs = hits.scoreDocs;
            for (int j = profile.from; j < profile.to && j < docs.length; j++) {
                Document doc = searcher.doc(docs[j].doc);
                String vid = doc.get(LuceneFields.VID);
                String docID = doc.get(LuceneFields.DOCID);
                Highlighter h = new Highlighter(new QueryScorer(q));
                String text = getCorTexVersion(docID, vid);
                sb.append(formatDocID(docID));
                sb.append(" ");
                sb.append(formatVersionID(vid));
                sb.append(" ");
                String frag = h.getBestFragment(analyzer, "text", text);
                sb.append("<span class=\"found\">");
                sb.append(frag);
                sb.append("</span>\n");
            }
            profile.numHits = docs.length;
        }
        reader.close();
    } catch (Exception e) {
        sb.append(e.getMessage());
    }
    return sb.toString();
}

From source file:cc.pp.analyzer.ik.demo.IKAnalyzerDemo.java

License:Apache License

public static void main(String[] args) {

    //Lucene Document??
    String fieldName = "text";
    ////from   w ww . ja  v  a  2 s.  co m
    String text = "IK Analyzer???????";

    //IKAnalyzer?
    Analyzer analyzer = new IKAnalyzer(Version.LUCENE_48, true);

    Directory directory = null;
    IndexWriter iwriter = null;
    DirectoryReader ireader = null;
    IndexSearcher isearcher = null;
    try {
        //
        directory = new RAMDirectory();

        //?IndexWriterConfig
        IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_48, analyzer);
        iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwriter = new IndexWriter(directory, iwConfig);
        //
        Document doc = new Document();
        doc.add(new LongField("ID", 1000, Field.Store.YES));
        doc.add(new TextField(fieldName, text, Field.Store.YES));
        iwriter.addDocument(doc);
        iwriter.close();

        //?**********************************
        //?
        ireader = DirectoryReader.open(directory);
        isearcher = new IndexSearcher(ireader);

        String keyword = "?";
        //         String keyword = "";
        //QueryParser?Query
        QueryParser qp = new QueryParser(Version.LUCENE_48, fieldName, analyzer);
        qp.setDefaultOperator(QueryParser.AND_OPERATOR);
        Query query = qp.parse(keyword);
        System.out.println("Query = " + query);

        //?5?
        TopDocs topDocs = isearcher.search(query, 5);
        System.out.println("" + topDocs.totalHits);
        //
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (int i = 0; i < topDocs.totalHits; i++) {
            Document targetDoc = isearcher.doc(scoreDocs[i].doc);
            System.out.println("" + targetDoc.toString());
        }

    } catch (CorruptIndexException e) {
        e.printStackTrace();
    } catch (LockObtainFailedException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ParseException e) {
        e.printStackTrace();
    } finally {
        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        if (directory != null) {
            try {
                directory.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:cc.twittertools.search.local.RunQueries.java

License:Apache License

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(//  ww w  .  j a v a2  s  . c o m
            OptionBuilder.withArgName("path").hasArg().withDescription("index location").create(INDEX_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return")
            .create(NUM_RESULTS_OPTION));
    options.addOption(OptionBuilder.withArgName("file").hasArg()
            .withDescription("file containing topics in TREC format").create(QUERIES_OPTION));
    options.addOption(OptionBuilder.withArgName("similarity").hasArg()
            .withDescription("similarity to use (BM25, LM)").create(SIMILARITY_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION));
    options.addOption(new Option(VERBOSE_OPTION, "print out complete document"));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(QUERIES_OPTION) || !cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(RunQueries.class.getName(), options);
        System.exit(-1);
    }

    File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION));
    if (!indexLocation.exists()) {
        System.err.println("Error: " + indexLocation + " does not exist!");
        System.exit(-1);
    }

    String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG;

    String topicsFile = cmdline.getOptionValue(QUERIES_OPTION);

    int numResults = 1000;
    try {
        if (cmdline.hasOption(NUM_RESULTS_OPTION)) {
            numResults = Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION));
        }
    } catch (NumberFormatException e) {
        System.err.println("Invalid " + NUM_RESULTS_OPTION + ": " + cmdline.getOptionValue(NUM_RESULTS_OPTION));
        System.exit(-1);
    }

    String similarity = "LM";
    if (cmdline.hasOption(SIMILARITY_OPTION)) {
        similarity = cmdline.getOptionValue(SIMILARITY_OPTION);
    }

    boolean verbose = cmdline.hasOption(VERBOSE_OPTION);

    PrintStream out = new PrintStream(System.out, true, "UTF-8");

    IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation));
    IndexSearcher searcher = new IndexSearcher(reader);

    if (similarity.equalsIgnoreCase("BM25")) {
        searcher.setSimilarity(new BM25Similarity());
    } else if (similarity.equalsIgnoreCase("LM")) {
        searcher.setSimilarity(new LMDirichletSimilarity(2500.0f));
    }

    QueryParser p = new QueryParser(Version.LUCENE_43, StatusField.TEXT.name, IndexStatuses.ANALYZER);

    TrecTopicSet topics = TrecTopicSet.fromFile(new File(topicsFile));
    for (TrecTopic topic : topics) {
        Query query = p.parse(topic.getQuery());
        Filter filter = NumericRangeFilter.newLongRange(StatusField.ID.name, 0L, topic.getQueryTweetTime(),
                true, true);

        TopDocs rs = searcher.search(query, filter, numResults);

        int i = 1;
        for (ScoreDoc scoreDoc : rs.scoreDocs) {
            Document hit = searcher.doc(scoreDoc.doc);
            out.println(String.format("%s Q0 %s %d %f %s", topic.getId(),
                    hit.getField(StatusField.ID.name).numericValue(), i, scoreDoc.score, runtag));
            if (verbose) {
                out.println("# " + hit.toString().replaceAll("[\\n\\r]+", " "));
            }
            i++;
        }
    }
    reader.close();
    out.close();
}

From source file:cc.twittertools.search.local.SearchStatuses.java

License:Apache License

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(//from   w  ww. j av a  2  s  .c  o m
            OptionBuilder.withArgName("path").hasArg().withDescription("index location").create(INDEX_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("query id").create(QID_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("query text").create(QUERY_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("maxid").create(MAX_ID_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return")
            .create(NUM_RESULTS_OPTION));
    options.addOption(OptionBuilder.withArgName("similarity").hasArg()
            .withDescription("similarity to use (BM25, LM)").create(SIMILARITY_OPTION));
    options.addOption(new Option(VERBOSE_OPTION, "print out complete document"));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(QUERY_OPTION) || !cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(SearchStatuses.class.getName(), options);
        System.exit(-1);
    }

    File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION));
    if (!indexLocation.exists()) {
        System.err.println("Error: " + indexLocation + " does not exist!");
        System.exit(-1);
    }

    String qid = cmdline.hasOption(QID_OPTION) ? cmdline.getOptionValue(QID_OPTION) : DEFAULT_QID;
    String queryText = cmdline.hasOption(QUERY_OPTION) ? cmdline.getOptionValue(QUERY_OPTION) : DEFAULT_Q;
    String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG;
    long maxId = cmdline.hasOption(MAX_ID_OPTION) ? Long.parseLong(cmdline.getOptionValue(MAX_ID_OPTION))
            : DEFAULT_MAX_ID;
    int numResults = cmdline.hasOption(NUM_RESULTS_OPTION)
            ? Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION))
            : DEFAULT_NUM_RESULTS;
    boolean verbose = cmdline.hasOption(VERBOSE_OPTION);

    String similarity = "LM";
    if (cmdline.hasOption(SIMILARITY_OPTION)) {
        similarity = cmdline.getOptionValue(SIMILARITY_OPTION);
    }

    PrintStream out = new PrintStream(System.out, true, "UTF-8");

    IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation));
    IndexSearcher searcher = new IndexSearcher(reader);

    if (similarity.equalsIgnoreCase("BM25")) {
        searcher.setSimilarity(new BM25Similarity());
    } else if (similarity.equalsIgnoreCase("LM")) {
        searcher.setSimilarity(new LMDirichletSimilarity(2500.0f));
    }

    QueryParser p = new QueryParser(Version.LUCENE_43, IndexStatuses.StatusField.TEXT.name,
            IndexStatuses.ANALYZER);
    Query query = p.parse(queryText);
    Filter filter = NumericRangeFilter.newLongRange(StatusField.ID.name, 0L, maxId, true, true);

    TopDocs rs = searcher.search(query, filter, numResults);

    int i = 1;
    for (ScoreDoc scoreDoc : rs.scoreDocs) {
        Document hit = searcher.doc(scoreDoc.doc);

        out.println(String.format("%s Q0 %s %d %f %s", qid, hit.getField(StatusField.ID.name).numericValue(), i,
                scoreDoc.score, runtag));
        if (verbose) {
            out.println("# " + hit.toString().replaceAll("[\\n\\r]+", " "));
        }
        i++;
    }

    reader.close();
    out.close();
}

From source file:ccc.plugins.search.lucene.CapturingHandler.java

License:Open Source License

/**
 * Determine the CCC resource id for a search hit.
 *
 * @param searcher The lucene index searcher.
 * @param docId The lucene document id.//from   w ww . j a va  2 s  .  co  m
 *
 * @return The CCC resource id.
 *
 * @throws IOException If an error occurs while reading the index.
 */
protected UUID lookupResourceId(final IndexSearcher searcher, final int docId) throws IOException {
    return UUID.fromString(searcher.doc(docId).getField("id").stringValue());
}

From source file:ch.admin.isb.hermes5.business.search.SearchEngine.java

License:Apache License

private SearchResult buildSearchResult(IndexSearcher isearcher, HighlighterWrapper highlighter, int id)
        throws IOException, InvalidTokenOffsetsException {
    Document hitDoc = isearcher.doc(id);
    String presentationName = hitDoc.getField("presentationName").stringValue();
    SearchResultType type = SearchResultType.valueOf(hitDoc.getField("type").stringValue());
    String name = hitDoc.getField("name").stringValue();
    String fragment = highlighter.getHighlightedText(id, "content");
    return new SearchResult(presentationName, name, fragment, type);
}