Example usage for org.apache.lucene.search IndexSearcher doc

List of usage examples for org.apache.lucene.search IndexSearcher doc

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher doc.

Prototype

public Document doc(int docID) throws IOException 

Source Link

Document

Sugar for .getIndexReader().document(docID)

Usage

From source file:coreservlets.consolesearch.java

License:Apache License

/**
139   * This demonstrates a typical paging search scenario, where the search engine presents 
140   * pages of size n to the user. The user can then go to the next page if interested in
141   * the next hits.//  w  w  w .  j a  v a2 s  .  c  o m
142   * 
143   * When the query is executed for the first time, then only enough results are collected
144   * to fill 5 result pages. If the user wants to page beyond this limit, then the query
145   * is executed another time and all hits are collected.
146   * 
147   */
public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage,
        boolean raw, boolean interactive) throws IOException {

    // Collect enough docs to show 5 pages
    TopDocs results = searcher.search(query, 5 * hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;
    System.out.println("hitsperpage is :" + hitsPerPage);
    int numTotalHits = results.totalHits;
    //System.out.println("numTotalHits is :"+numTotalHits);
    //?otalhits?360
    System.out.println(numTotalHits + " total matching documents");

    int start = 0;
    int end = Math.min(numTotalHits, hitsPerPage);

    while (true) {
        System.out.println("now hits length is:" + hits.length);
        System.out.println("now end is:" + end);
        if (end > hits.length) {
            System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits
                    + " total matching documents collected.");
            System.out.println("Collect more (y/n) ?");
            String line = in.readLine();
            if (line.length() == 0 || line.charAt(0) == 'n') {
                break;
            }

            hits = searcher.search(query, numTotalHits).scoreDocs;
        }

        end = Math.min(hits.length, start + hitsPerPage);

        for (int i = start; i < end; i++) {
            if (raw) { // output raw format
                System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
                continue;
            }

            Document doc = searcher.doc(hits[i].doc);
            String url = doc.get("url");
            if (url != null) {
                System.out.println((i + 1) + ". " + url);
                String title = doc.get("title");
                if (title != null) {
                    System.out.println(" Title: " + doc.get("title"));
                    String origstr = doc.getField("contents").stringValue();
                    System.out.println(origstr);
                }
            } else {
                System.out.println((i + 1) + ". " + "No url for this document");
            }

        }

        if (!interactive || end == 0) {
            break;
        }

        if (numTotalHits >= end) {
            boolean quit = false;
            while (true) {
                System.out.print("Press ");
                if (start - hitsPerPage >= 0) {
                    System.out.print("(p)revious page, ");
                }
                if (start + hitsPerPage < numTotalHits) {
                    System.out.print("(n)ext page, ");
                }
                System.out.println("(q)uit or enter number to jump to a page.");

                String line = in.readLine();
                if (line.length() == 0 || line.charAt(0) == 'q') {
                    quit = true;
                    break;
                }
                if (line.charAt(0) == 'p') {
                    start = Math.max(0, start - hitsPerPage);
                    break;
                } else if (line.charAt(0) == 'n') {
                    if (start + hitsPerPage < numTotalHits) {
                        start += hitsPerPage;
                    }
                    break;
                } else {
                    int page = Integer.parseInt(line);
                    if ((page - 1) * hitsPerPage < numTotalHits) {
                        start = (page - 1) * hitsPerPage;
                        break;
                    } else {
                        System.out.println("No such page");
                    }
                }
            }
            if (quit)
                break;
            end = Math.min(numTotalHits, start + hitsPerPage);
        }
    }
}

From source file:cs571.proj1.SearchFiles.java

License:Apache License

/**
 * This demonstrates a typical paging search scenario, where the search engine presents 
 * pages of size n to the user. The user can then go to the next page if interested in
 * the next hits.//  w  w w .  j  a v  a 2 s  .c o  m
 * 
 * When the query is executed for the first time, then only enough results are collected
 * to fill 5 result pages. If the user wants to page beyond this limit, then the query
 * is executed another time and all hits are collected.
 * 
 */
public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage,
        boolean raw, boolean interactive) throws IOException {

    // Collect enough docs to show 5 pages
    TopDocs results = searcher.search(query, 5 * hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    System.out.println(numTotalHits + " total matching documents");

    int start = 0;
    int end = Math.min(numTotalHits, hitsPerPage);

    while (true) {
        if (end > hits.length) {
            System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits
                    + " total matching documents collected.");
            System.out.println("Collect more (y/n) ?");
            String line = in.readLine();
            if (line.length() == 0 || line.charAt(0) == 'n') {
                break;
            }

            hits = searcher.search(query, numTotalHits).scoreDocs;
        }

        end = Math.min(hits.length, start + hitsPerPage);

        for (int i = start; i < end; i++) {
            if (raw) { // output raw format
                System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
                continue;
            }

            Document doc = searcher.doc(hits[i].doc);
            String docID = doc.get("docID");
            if (docID != null) {
                System.out.println((i + 1) + " " + docID);
            } else {
                System.out.println((i + 1) + ". " + "No docID for this document");
            }

        }

        if (!interactive || end == 0) {
            break;
        }

        if (numTotalHits >= end) {
            boolean quit = false;
            while (true) {
                System.out.print("Press ");
                if (start - hitsPerPage >= 0) {
                    System.out.print("(p)revious page, ");
                }
                if (start + hitsPerPage < numTotalHits) {
                    System.out.print("(n)ext page, ");
                }
                System.out.println("(q)uit or enter number to jump to a page.");

                String line = in.readLine();
                if (line.length() == 0 || line.charAt(0) == 'q') {
                    quit = true;
                    break;
                }
                if (line.charAt(0) == 'p') {
                    start = Math.max(0, start - hitsPerPage);
                    break;
                } else if (line.charAt(0) == 'n') {
                    if (start + hitsPerPage < numTotalHits) {
                        start += hitsPerPage;
                    }
                    break;
                } else {
                    int page = Integer.parseInt(line);
                    if ((page - 1) * hitsPerPage < numTotalHits) {
                        start = (page - 1) * hitsPerPage;
                        break;
                    } else {
                        System.out.println("No such page");
                    }
                }
            }
            if (quit)
                break;
            end = Math.min(numTotalHits, start + hitsPerPage);
        }
    }
}

From source file:csdn.lucene.first.version.Searcher.java

License:Apache License

protected ArrayList<String> search(String indexDir, String q) {
    //5.5 replace directory with path 
    java.nio.file.Path pathA = Paths.get(indexDir);
    FSDirectory dir;//from w  ww.j a v  a 2  s.  c  o m
    IndexSearcher is;
    try {
        dir = FSDirectory.open(pathA);
        DirectoryReader dReader = DirectoryReader.open(dir);
        is = new IndexSearcher(dReader);

        QueryParser parser = new QueryParser("contents", analyzer);
        Query query = parser.parse(q);
        long start = System.currentTimeMillis();
        //is.search():Finds the top n hits for query
        //TopDocs:Represents hits returned by IndexSearcher.search(Query,int).
        TopDocs hits = is.search(query, 10); //5 
        long end = System.currentTimeMillis();

        System.err.println("Found " + hits.totalHits + //6  
                " document(s) (in " + (end - start) + // 6
                " milliseconds) that matched query '" + // 6
                q + "':"); // 6

        //ScoreDocs:The top hits for the query.
        for (ScoreDoc scoreDoc : hits.scoreDocs) {
            Document doc = is.doc(scoreDoc.doc);
            //         System.out.println("From Searcher : filename = " + doc.get("fieldname"));
            //         System.out.println("From Searcher : fullpath = " + doc.get("fullpath"));
            hit_ids.add(doc.get("fieldname"));
            hit_paths.add(doc.get("fullpath"));
        }
        dReader.close();
        dir.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
    return hit_ids;
}

From source file:cz.muni.fi.japanesedictionary.engine.FragmentListAsyncTask.java

License:Open Source License

/**
 * Loads translation using Lucene//from www.  j  a  va 2s  . com
 */
@Override
protected List<Translation> doInBackground(String... params) {
    String expression = params[0];
    String part = params[1];

    SharedPreferences settings = mContext.getSharedPreferences(ParserService.DICTIONARY_PREFERENCES, 0);
    String pathToDictionary = settings.getString(Const.PREF_JMDICT_PATH, null);
    SharedPreferences sharedPrefs = PreferenceManager.getDefaultSharedPreferences(mContext);
    final boolean englishBool = sharedPrefs.getBoolean("language_english", false);
    final boolean frenchBool = sharedPrefs.getBoolean("language_french", false);
    final boolean dutchBool = sharedPrefs.getBoolean("language_dutch", false);
    final boolean germanBool = sharedPrefs.getBoolean("language_german", false);
    final boolean russianBool = sharedPrefs.getBoolean("language_russian", false);
    final boolean searchOnlyFavorised = sharedPrefs.getBoolean("search_only_favorite", false);
    final boolean searchDeinflected = sharedPrefs.getBoolean("search_deinflected", false);

    final List<Translation> translations = new ArrayList<>();

    if (expression == null) {
        // first run
        Log.i(LOG_TAG, "First run - last 10 translations ");
        GlossaryReaderContract database = new GlossaryReaderContract(mContext);
        List<Translation> translationsTemp = database.getLastTranslations(10);
        database.close();
        return translationsTemp;
    }

    if (pathToDictionary == null) {
        Log.e(LOG_TAG, "No path to jmdict dictionary");
        return null;
    }
    File file = new File(pathToDictionary);
    if (!file.exists() || !file.canRead()) {
        Log.e(LOG_TAG, "Can't read jmdict dictionary directory");
        return null;
    }

    if (expression.length() < 1) {
        Log.w(LOG_TAG, "No expression to translate");
        return null;
    }
    Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_36);

    IndexReader reader;
    try {
        final String search;
        final String hiragana;
        boolean onlyReb = false;

        if (Pattern.matches("\\p{Latin}*", expression)) {
            // only romaji
            onlyReb = true;
            Log.i(LOG_TAG, "Only latin letters, converting to hiragana. ");
            expression = TranscriptionConverter.kunreiToHepburn(expression);
            expression = RomanizationEnum.Hepburn.toHiragana(expression);
        }
        hiragana = expression;

        expression = insertSpaces(expression);

        switch (part) {
        case "end":
            search = "\"" + expression + "lucenematch\"";
            break;
        case "beginning":
            search = "\"lucenematch " + expression + "\"";
            break;
        case "middle":
            search = "\"" + expression + "\"";
            break;
        default:
            if (searchDeinflected) {
                StringBuilder sb = new StringBuilder("\"lucenematch " + expression + "lucenematch\"");
                for (Predicate predicate : Deconjugator.deconjugate(hiragana)) {
                    if (predicate.isSuru()) {
                        sb.append(" OR ").append("(\"lucenematch ")
                                .append(insertSpaces(predicate.getPredicate()))
                                .append("lucenematch\" AND (pos:vs OR pos:vs-c OR pos:vs-s OR pos:vs-i))");
                    } else if (predicate.isKuru()) {
                        sb.append(" OR ").append("(\"lucenematch ")
                                .append(insertSpaces(predicate.getPredicate()))
                                .append("lucenematch\" AND pos:vk)");
                    } else if (predicate.isIku()) {
                        sb.append(" OR ").append("(\"lucenematch ")
                                .append(insertSpaces(predicate.getPredicate()))
                                .append("lucenematch\" AND pos:v5k-s)");
                    } else if (predicate.isIAdjective()) {
                        sb.append(" OR ").append("(\"lucenematch ")
                                .append(insertSpaces(predicate.getPredicate()))
                                .append("lucenematch\" AND pos:adj-i)");
                    } else
                        sb.append(" OR ").append("(\"lucenematch ")
                                .append(insertSpaces(predicate.getPredicate()))
                                .append("lucenematch\" AND (pos:v1 OR pos:v2 OR pos:v5 OR pos:vz OR pos:vi OR pos:vn OR pos:vr))");
                }
                search = sb.toString();
            } else {
                search = "\"lucenematch " + expression + "lucenematch\"";
            }
        }
        Log.i(LOG_TAG, " Searching for: " + search);

        Query q;
        if (onlyReb) {
            q = (new QueryParser(Version.LUCENE_36, "index_japanese_reb", analyzer)).parse(search);
        } else {
            StandardQueryParser parser = new StandardQueryParser(analyzer);
            q = parser.parse(search, "japanese");
        }

        Directory dir = FSDirectory.open(file);
        reader = IndexReader.open(dir);
        final IndexSearcher searcher = new IndexSearcher(reader);
        Collector collector = new Collector() {
            int max = 1000;
            int count = 0;
            private int docBase;

            @Override
            public boolean acceptsDocsOutOfOrder() {
                return true;
            }

            @Override
            public void collect(int docID) throws IOException {
                Document d = searcher.doc(docID + docBase);
                Translation translation = new Translation();
                String prioritized = d.get("prioritized");
                if (searchOnlyFavorised && prioritized == null) {
                    return;
                }
                if (prioritized != null) {
                    //is prioritized
                    translation.setPrioritized(true);
                }

                String ruby = d.get("ruby");

                if (ruby != null && ruby.length() > 0) {
                    translation.setRuby(ruby);
                }

                String japanese_keb = d.get("japanese_keb");
                if (japanese_keb != null && japanese_keb.length() != 0) {
                    translation.parseJapaneseKeb(japanese_keb);
                }

                String japanese_reb = d.get("japanese_reb");
                if (japanese_reb != null && japanese_reb.length() != 0) {
                    translation.parseJapaneseReb(japanese_reb);
                }

                String english = d.get("english");
                if (english != null && english.length() != 0) {
                    translation.parseEnglish(english);
                }

                String french = d.get("french");
                if (french != null && french.length() != 0) {
                    translation.parseFrench(french);
                }

                String dutch = d.get("dutch");
                if (dutch != null && dutch.length() != 0) {
                    translation.parseDutch(dutch);
                }

                String german = d.get("german");
                if (german != null && german.length() != 0) {
                    translation.parseGerman(german);
                }

                String russian = d.get("russian");
                if (russian != null && russian.length() != 0) {
                    translation.parseRussian(russian);
                }

                if ((englishBool && translation.getEnglishSense() != null)
                        || (dutchBool && translation.getDutchSense() != null)
                        || (germanBool && translation.getGermanSense() != null)
                        || (frenchBool && translation.getFrenchSense() != null)
                        || (russianBool && translation.getRussianSense() != null)) {

                    count++;
                    if (count < max) {
                        if (!FragmentListAsyncTask.this.isCancelled()) {
                            FragmentListAsyncTask.this.publishProgress(translation);
                            translations.add(translation);
                        } else {
                            translations.clear();
                            throw new IOException("Loader canceled");
                        }
                    } else {
                        throw new IOException("Max exceeded");
                    }
                }
            }

            @Override
            public void setNextReader(IndexReader reader, int docBas) throws IOException {
                docBase = docBas;
            }

            @Override
            public void setScorer(Scorer arg0) throws IOException {
            }

        };

        searcher.search(q, collector);
        reader.close();
    } catch (IOException ex) {
        Log.e(LOG_TAG, "IO Exception:  " + ex.toString());
        return translations;
    } catch (Exception ex) {
        Log.e(LOG_TAG, "Exception: " + ex.toString());
        return null;
    }

    return translations.isEmpty() ? null : translations;
}

From source file:de.anycook.db.lucene.FulltextIndex.java

License:Open Source License

public Set<String> search(String q) throws IOException {
    Set<String> recipes = new LinkedHashSet<>();
    String fields[] = new String[] { "description", "steps" };
    logger.debug(String.format("searching for %s", q));

    try (IndexReader reader = DirectoryReader.open(index)) {
        int hitsPerPage = 1000;
        IndexSearcher searcher = new IndexSearcher(reader);
        Query query = new MultiFieldQueryParser(fields, analyzer).parse(q);
        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, null);
        searcher.search(query, collector);

        ScoreDoc[] hits = collector.topDocs().scoreDocs;
        for (ScoreDoc hit : hits) {
            Document d = searcher.doc(hit.doc);
            recipes.add(d.get("title"));
        }/*from w ww  .  ja v  a  2 s.  com*/

    } catch (CorruptIndexException | ParseException e) {
        logger.error(e);
    }

    logger.debug(String.format("found %d results", recipes.size()));
    return recipes;
}

From source file:de.berlios.jhelpdesk.utils.LuceneIndexer.java

License:Open Source License

public List<Article> search(String searchQuery) {
    try {//from w w  w  .j  av a2 s.c om
        Query query = parser.parse(searchQuery);
        Directory directory = FSDirectory.open(new File(indexDirectory));
        IndexSearcher indexSearcher = new IndexSearcher(directory);
        TopDocs res = indexSearcher.search(query, DEFAULT_SEARCH_RESULT_LIMIT);
        List<Article> result = new ArrayList<Article>();
        for (ScoreDoc scoreDoc : res.scoreDocs) {
            Document document = indexSearcher.doc(scoreDoc.doc);
            result.add(documentToArticle(document));
        }
        indexSearcher.close();
        return result;
    } catch (Exception ex) {
        log.error(ex.getMessage(), ex);
        throw new RuntimeException(ex);
    }
}

From source file:de.cosmocode.lucene.fragments.query.AbstractLuceneQueryTestFragment.java

License:Apache License

/**
 * Searches for the given query and returns a list that has at most "max" items.
 * @param query the query to search for//from  www .  j  ava  2s  . c  o m
 * @param max the maximum number of items in the returned list
 * @return a list with the names of the found documents
 * @throws IOException if lucene throws an exception 
 */
protected List<String> search(final Query query, final int max) throws IOException {
    final List<String> docList = new LinkedList<String>();
    final IndexSearcher searcher = new IndexSearcher(IndexHelper.DIRECTORY);
    final TopDocs docs = searcher.search(query, max);
    if (docs.totalHits > 0) {
        for (final ScoreDoc doc : docs.scoreDocs) {
            docList.add(searcher.doc(doc.doc).get("name"));
        }
    }
    return docList;
}

From source file:de.elbe5.cms.search.SearchBean.java

License:Open Source License

public void searchContent(ContentSearchResultData result) {
    result.getResults().clear();/*from  w  w w.  j  a v  a2s . c o m*/
    String[] fieldNames = result.getFieldNames();
    ScoreDoc[] hits = null;
    float maxScore = 0f;
    try {
        String indexPath = ApplicationPath.getAppPath() + "contentindex";
        ensureDirectory(indexPath);
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath)));
        IndexSearcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new StandardAnalyzer();
        MultiFieldQueryParser parser = new MultiFieldQueryParser(fieldNames, analyzer);
        String pattern = result.getPattern();
        pattern = pattern.trim();
        Query query = null;
        if (pattern.length() != 0) {
            query = parser.parse(pattern);
            //Log.log("Searching for: " + query.toString());
            TopDocs topDocs = searcher.search(query, result.getMaxSearchResults());
            hits = topDocs.scoreDocs;
            maxScore = topDocs.getMaxScore();
        }
        if (hits != null) {
            for (ScoreDoc hit : hits) {
                Document doc = searcher.doc(hit.doc);
                ContentSearchData data = null;
                String type = doc.get("type");
                switch (type) {
                case SiteSearchData.TYPE:
                    data = new SiteSearchData();
                    break;
                case PageSearchData.TYPE:
                    data = new PageSearchData();
                    break;
                case FileSearchData.TYPE:
                    data = new FileSearchData();
                    break;
                }
                assert (data != null);
                data.setDoc(doc);
                data.setScore(maxScore <= 1f ? hit.score : hit.score / maxScore);
                data.evaluateDoc();
                data.setContexts(query, analyzer);
                result.getResults().add(data);
            }
        }
        reader.close();
    } catch (Exception ignore) {
    }
}

From source file:de.elbe5.cms.search.SearchBean.java

License:Open Source License

public void searchUsers(UserSearchResultData result) {
    result.getResults().clear();//from   www  .  j a  va2 s . c o  m
    String[] fieldNames = result.getFieldNames();
    ScoreDoc[] hits = null;
    float maxScore = 0f;
    try {
        String indexPath = ApplicationPath.getAppPath() + "userindex";
        ensureDirectory(indexPath);
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath)));
        IndexSearcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new StandardAnalyzer();
        MultiFieldQueryParser parser = new MultiFieldQueryParser(fieldNames, analyzer);
        String pattern = result.getPattern();
        pattern = pattern.trim();
        Query query = null;
        if (pattern.length() != 0) {
            query = parser.parse(pattern);
            //Log.log("Searching for: " + query.toString());
            TopDocs topDocs = searcher.search(query, result.getMaxSearchResults());
            hits = topDocs.scoreDocs;
            maxScore = topDocs.getMaxScore();
        }
        if (hits != null) {
            for (ScoreDoc hit : hits) {
                Document doc = searcher.doc(hit.doc);
                UserSearchData data = new UserSearchData();
                data.setDoc(doc);
                data.setScore(maxScore <= 1f ? hit.score : hit.score / maxScore);
                data.evaluateDoc();
                data.setContexts(query, analyzer);
                result.getResults().add(data);
            }
        }
        reader.close();
    } catch (Exception ignore) {
    }
}

From source file:de.hsmannheim.ss15.alr.searchengine.DefaultLuceneController.java

public List<StoredDocument> doSearch(String queryString) throws IOException, ParseException {
    String field = "contents";
    String queries = null;//from w  ww  .  j  a  v a  2  s  .  c  om
    boolean raw = false;
    int hitsPerPage = 10;

    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexDir)));
    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new StandardAnalyzer();

    BufferedReader in = null;
    if (queries != null) {
        in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8);
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
    }
    QueryParser parser = new QueryParser(field, analyzer);

    Query query = parser.parse(queryString);

    Highlighter highlighter = new Highlighter(new QueryScorer(query));

    TotalHitCountCollector collector = new TotalHitCountCollector();
    searcher.search(query, collector);
    TopDocs topDocs = searcher.search(query, Math.max(1, collector.getTotalHits()));

    List<StoredDocument> results = new ArrayList<>();
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
        StoredDocument doc = searcher.doc(scoreDoc.doc);
        try {
            File file = new File(doc.get("path"));
            BufferedReader docReader = new BufferedReader(
                    new InputStreamReader(Files.newInputStream(file.toPath()), StandardCharsets.UTF_8));

            List<String> lines = new ArrayList<>();
            while (docReader.ready()) {
                lines.add(docReader.readLine());
            }
            lines.remove(0);
            lines.remove(0);
            lines.remove(0);

            String content = "";

            for (String s : lines) {
                content = content + s;
            }
            String highLight = highlighter.getBestFragment(analyzer, null, content);
            if (highLight == null) {
                LOGGER.warn("No Highlight found");
            } else {
                doc.add(new TextField("highlight", highLight, Field.Store.YES));
            }
        } catch (InvalidTokenOffsetsException ex) {
            LOGGER.warn("No Highlight found");
        }

        results.add(doc);
    }

    reader.close();
    return results;

}