Example usage for org.apache.lucene.search IndexSearcher search

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher search.

Prototype

public <C extends Collector, T> T search(Query query, CollectorManager<C, T> collectorManager)
        throws IOException

Source Link

Document

Lower-level search API.

Usage

From source file:com.rubenlaguna.en4j.searchlucene.NoteFinderLuceneImpl.java

License:Open Source License

public Collection<Note> find(String searchText) {
    if ("".equals(searchText.trim())) {
        return Collections.EMPTY_LIST;
    }//  w  w w  .j  a v  a2 s  .c  o m
    long start = System.currentTimeMillis();
    searchText = searchText.trim();
    String patternStr = "\\s+";
    String replaceStr = "* ";
    Pattern pattern = Pattern.compile(patternStr);
    Matcher matcher = pattern.matcher(searchText);
    searchText = matcher.replaceAll(replaceStr);
    if (Pattern.matches(".*\\w$", searchText)) {
        searchText = searchText + "*";
    }

    LOG.info("search text:" + searchText);
    final Collection<Note> toReturn = new ArrayList<Note>();

    try {
        IndexReader newReader = reader.reopen();
        if (newReader != reader) {
            reader.close();
        }
        reader = newReader;
        LOG.info("using index version: " + reader.getVersion());
        final IndexSearcher searcher = new IndexSearcher(reader);

        final Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_29);
        QueryParser parser = new CustomQueryParser("all", analyzer);
        parser.setDefaultOperator(QueryParser.Operator.AND);

        Query query = parser.parse(searchText);
        LOG.info("query =" + query.toString());
        //search the query
        Collector collector = new Collector() {

            private int docBase = 0;

            @Override
            public void setScorer(Scorer scorer) throws IOException {
            }

            @Override
            public void collect(int doc) throws IOException {
                int scoreId = doc + docBase;
                Document document = searcher.doc(scoreId);
                final String stringValue = document.getField("id").stringValue();
                int docId = Integer.parseInt(stringValue);
                LOG.fine("doc id " + stringValue + " matches the search.");
                toReturn.add(nr.get(docId, false));
            }

            @Override
            public void setNextReader(IndexReader reader, int docBase) throws IOException {
                this.docBase = docBase;
            }

            @Override
            public boolean acceptsDocsOutOfOrder() {
                return true;
            }
        };
        searcher.search(query, collector);
        searcher.close();
    } catch (ParseException ex) {
        Exceptions.printStackTrace(ex);
    } catch (CorruptIndexException ex) {
        Exceptions.printStackTrace(ex);
    } catch (IOException ex) {
        Exceptions.printStackTrace(ex);
    } catch (IllegalStateException ex) {
        LOG.info("caught " + ex.getMessage() + ". Most likely the app is shutting down");
    }
    long delta = System.currentTimeMillis() - start;
    Installer.mbean.sampleSearchTime(delta);
    LOG.info("find took " + delta / 1000.0 + " secs. " + toReturn.size() + " results found");
    return toReturn;
}

From source file:com.search.lucene.demo.facet.SimpleFacetsExample.java

License:Apache License

/** User runs a query and counts facets only without collecting the matching documents.*/
private List<FacetResult> facetsOnly() throws IOException {
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);

    FacetsCollector fc = new FacetsCollector();

    // MatchAllDocsQuery is for "browsing" (counts facets
    // for all non-deleted docs in the index); normally
    // you'd use a "normal" query:
    searcher.search(new MatchAllDocsQuery(), fc);

    // Retrieve results
    List<FacetResult> results = new ArrayList<>();

    // Count both "Publish Date" and "Author" dimensions
    Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, fc);

    results.add(facets.getTopChildren(10, "Author"));
    results.add(facets.getTopChildren(10, "Publish Date"));

    indexReader.close();//  w  w w. j a  va2s  . c  o  m
    taxoReader.close();

    return results;
}

From source file:com.searchcode.app.service.CodeSearcher.java

License:Open Source License

/**
 * Only used as fallback if getByRepoFileName fails for some reason due to what appears to be a lucene index bug
 * this should always work as the path used is sha1 and should be unique for anything the current codebase can
 * deal with//  w  ww.ja  v  a2s  .  co  m
 */
public CodeResult getByCodeId(String codeId) {
    CodeResult codeResult = null;

    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
        IndexSearcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new CodeAnalyzer();
        QueryParser parser = new QueryParser(CODEFIELD, analyzer);

        Query query = parser.parse(Values.CODEID + ":" + QueryParser.escape(codeId));
        Singleton.getLogger().info("Query to get by " + Values.CODEID + ":" + QueryParser.escape(codeId));

        TopDocs results = searcher.search(query, 1);
        ScoreDoc[] hits = results.scoreDocs;

        if (hits.length != 0) {
            Document doc = searcher.doc(hits[0].doc);

            String filepath = doc.get(Values.PATH);

            List<String> code = new ArrayList<>();
            try {
                code = Singleton.getHelpers()
                        .readFileLinesGuessEncoding(filepath,
                                Singleton.getHelpers().tryParseInt(
                                        Properties.getProperties().getProperty(Values.MAXFILELINEDEPTH,
                                                Values.DEFAULTMAXFILELINEDEPTH),
                                        Values.DEFAULTMAXFILELINEDEPTH));
            } catch (Exception ex) {
                Singleton.getLogger().info("Indexed file appears to binary: " + filepath);
            }

            codeResult = new CodeResult(code, null);
            codeResult.setFilePath(filepath);
            codeResult.setCodePath(doc.get(Values.FILELOCATIONFILENAME));
            codeResult.setFileName(doc.get(Values.FILENAME));
            codeResult.setLanguageName(doc.get(Values.LANGUAGENAME));
            codeResult.setMd5hash(doc.get(Values.MD5HASH));
            codeResult.setCodeLines(doc.get(Values.CODELINES));
            codeResult.setDocumentId(hits[0].doc);
            codeResult.setRepoName(doc.get(Values.REPONAME));
            codeResult.setRepoLocation(doc.get(Values.REPOLOCATION));
            codeResult.setCodeOwner(doc.get(Values.CODEOWNER));
            codeResult.setCodeId(doc.get(Values.CODEID));
        }

        reader.close();
    } catch (Exception ex) {
        LOGGER.severe(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
    }

    return codeResult;
}

From source file:com.searchcode.app.service.CodeSearcher.java

License:Open Source License

public ProjectStats getProjectStats(String repoName) {
    int totalCodeLines = 0;
    int totalFiles = 0;
    List<CodeFacetLanguage> codeFacetLanguages = new ArrayList<>();
    List<CodeFacetOwner> repoFacetOwners = new ArrayList<>();
    List<CodeFacetLanguage> codeByLines = new ArrayList<>();
    SearchcodeLib searchcodeLib = Singleton.getSearchCodeLib();

    try {// w  w w  . j  a v a2s .  c om
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
        IndexSearcher searcher = new IndexSearcher(reader);

        Analyzer analyzer = new CodeAnalyzer();
        QueryParser parser = new QueryParser(CODEFIELD, analyzer);
        Query query = parser.parse(Values.REPONAME + ":" + repoName);

        TopDocs results = searcher.search(query, Integer.MAX_VALUE);
        ScoreDoc[] hits = results.scoreDocs;

        Map<String, Integer> linesCount = new HashMap<>();

        for (int i = 0; i < results.totalHits; i++) {
            Document doc = searcher.doc(hits[i].doc);

            if (!searchcodeLib.languageCostIgnore(doc.get(Values.LANGUAGENAME))) {
                int lines = Singleton.getHelpers().tryParseInt(doc.get(Values.CODELINES), "0");
                totalCodeLines += lines;
                String languageName = doc.get(Values.LANGUAGENAME).replace("_", " ");

                if (linesCount.containsKey(languageName)) {
                    linesCount.put(languageName, linesCount.get(languageName) + lines);
                } else {
                    linesCount.put(languageName, lines);
                }
            }
        }

        for (String key : linesCount.keySet()) {
            codeByLines.add(new CodeFacetLanguage(key, linesCount.get(key)));
        }
        codeByLines.sort((a, b) -> b.getCount() - a.getCount());

        totalFiles = results.totalHits;
        codeFacetLanguages = this.getLanguageFacetResults(searcher, reader, query);
        repoFacetOwners = this.getOwnerFacetResults(searcher, reader, query);

        reader.close();
    } catch (Exception ex) {
        LOGGER.severe("CodeSearcher getProjectStats caught a " + ex.getClass() + "\n with message: "
                + ex.getMessage());
    }

    return new ProjectStats(totalCodeLines, totalFiles, codeFacetLanguages, codeByLines, repoFacetOwners);
}

From source file:com.searchcode.app.service.CodeSearcher.java

License:Open Source License

/**
 * Due to very large repositories (500,000 files) this needs to support
 * paging. Also need to consider the fact that is a list of strings
 * TODO maybe convert to hash so lookups are faster
 *//* w w  w  .j av  a2 s  .c o  m*/
public List<String> getRepoDocuments(String repoName, int page) {
    int REPOPAGELIMIT = 1000;
    List<String> fileLocations = new ArrayList<>(REPOPAGELIMIT);
    int start = REPOPAGELIMIT * page;

    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
        IndexSearcher searcher = new IndexSearcher(reader);

        Analyzer analyzer = new CodeAnalyzer();
        QueryParser parser = new QueryParser(CODEFIELD, analyzer);
        Query query = parser.parse(Values.REPONAME + ":" + repoName);

        TopDocs results = searcher.search(query, Integer.MAX_VALUE);
        int end = Math.min(results.totalHits, (REPOPAGELIMIT * (page + 1)));
        ScoreDoc[] hits = results.scoreDocs;

        for (int i = start; i < end; i++) {
            Document doc = searcher.doc(hits[i].doc);
            fileLocations.add(doc.get(Values.PATH));
        }

        reader.close();
    } catch (Exception ex) {
        LOGGER.severe("CodeSearcher getRepoDocuments caught a " + ex.getClass() + " on page " + page
                + "\n with message: " + ex.getMessage());
    }

    return fileLocations;
}

From source file:com.searchcode.app.service.CodeSearcher.java

License:Open Source License

/**
 * Only really used internally but does the heavy lifting of actually converting the index document on disk to the
 * format used internally including reading the file from disk.
 *//*from  w ww  . j a  v  a 2 s  .c o m*/
public SearchResult doPagingSearch(IndexReader reader, IndexSearcher searcher, Query query, int page)
        throws IOException {
    TopDocs results = searcher.search(query, 20 * this.PAGELIMIT); // 20 pages worth of documents
    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    int start = this.PAGELIMIT * page;
    int end = Math.min(numTotalHits, (this.PAGELIMIT * (page + 1)));
    int noPages = numTotalHits / this.PAGELIMIT;

    if (noPages > 20) {
        noPages = 19;
    }

    List<Integer> pages = this.calculatePages(numTotalHits, noPages);

    List<CodeResult> codeResults = new ArrayList<>();

    for (int i = start; i < end; i++) {
        Document doc = searcher.doc(hits[i].doc);

        String filepath = doc.get(Values.PATH);

        if (filepath != null) {
            // This line is occasionally useful for debugging ranking, but not useful enough to have as log info
            //System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);

            List<String> code = new ArrayList<>();
            try {
                // This should probably be limited by however deep we are meant to look into the file
                // or the value we use here whichever is less
                code = Singleton.getHelpers()
                        .readFileLinesGuessEncoding(filepath,
                                Singleton.getHelpers().tryParseInt(
                                        Properties.getProperties().getProperty(Values.MAXFILELINEDEPTH,
                                                Values.DEFAULTMAXFILELINEDEPTH),
                                        Values.DEFAULTMAXFILELINEDEPTH));
            } catch (Exception ex) {
                LOGGER.warning("Indexed file appears to binary or missing: " + filepath);
            }

            CodeResult cr = new CodeResult(code, null);
            cr.setCodePath(doc.get(Values.FILELOCATIONFILENAME));
            cr.setFileName(doc.get(Values.FILENAME));
            cr.setLanguageName(doc.get(Values.LANGUAGENAME));
            cr.setMd5hash(doc.get(Values.MD5HASH));
            cr.setCodeLines(doc.get(Values.CODELINES));
            cr.setDocumentId(hits[i].doc);
            cr.setRepoLocation(doc.get(Values.REPOLOCATION));
            cr.setRepoName(doc.get(Values.REPONAME));
            cr.setCodeOwner(doc.get(Values.CODEOWNER));
            cr.setCodeId(doc.get(Values.CODEID));

            codeResults.add(cr);
        } else {
            LOGGER.warning((i + 1) + ". " + "No path for this document");
        }
    }

    List<CodeFacetLanguage> codeFacetLanguages = this.getLanguageFacetResults(searcher, reader, query);
    List<CodeFacetRepo> repoFacetLanguages = this.getRepoFacetResults(searcher, reader, query);
    List<CodeFacetOwner> repoFacetOwner = this.getOwnerFacetResults(searcher, reader, query);

    return new SearchResult(numTotalHits, page, query.toString(), codeResults, pages, codeFacetLanguages,
            repoFacetLanguages, repoFacetOwner);
}

From source file:com.searchcode.app.service.TimeCodeSearcher.java

/**
 * Attempts to find a unique file given the repository name and the path/filename however
 * it seems to randomly not find things for some files. No idea of the root cause at this point and have implemented
 * a work around where we get the file by getById which is no ideal. The bug appears to be due to some issue
 * inside lucene itself as using raw queries to pull back the file results in no matches, and yet it does appear
 * when not limiting to the repo/* ww w .ja  v a2 s .  c  o  m*/
 * TODO investigate the lucene issue that occurs here mentioned above
 * TODO needs to use the revision number here as well to get the right value
 */
public CodeResult getByRepoFileName(String repo, String fileName) {
    CodeResult codeResult = null;

    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
        IndexSearcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new CodeAnalyzer();
        QueryParser parser = new QueryParser(CODEFIELD, analyzer);

        // TODO I have a feeling this may not be unique if there are to files in the same directory with different case... something to investigate
        Query query = parser
                .parse(Values.FILELOCATIONFILENAME + ":" + QueryParser.escape(repo + "/" + fileName));
        Singleton.getLogger().info("Query to get by filename = " + Values.FILELOCATIONFILENAME + ":"
                + QueryParser.escape(repo + "/" + fileName));

        TopDocs results = searcher.search(query, 1);
        ScoreDoc[] hits = results.scoreDocs;

        if (hits.length != 0) {
            Document doc = searcher.doc(hits[0].doc);

            String filepath = doc.get(Values.PATH);

            List<String> code = new ArrayList<>();
            try {
                code = Files.readAllLines(Paths.get(filepath), StandardCharsets.UTF_8);
                code = Singleton.getHelpers()
                        .readFileLines(filepath,
                                Singleton.getHelpers().tryParseInt(
                                        Properties.getProperties().getProperty(Values.MAXFILELINEDEPTH,
                                                Values.DEFAULTMAXFILELINEDEPTH),
                                        Values.DEFAULTMAXFILELINEDEPTH));
            } catch (Exception ex) {
                Singleton.getLogger().info("Indexed file appears to binary: " + filepath);
            }

            codeResult = new CodeResult(code, null);
            codeResult.setCodePath(doc.get(Values.FILELOCATIONFILENAME));
            codeResult.setFileName(doc.get(Values.FILENAME));
            codeResult.setLanguageName(doc.get(Values.LANGUAGENAME));
            codeResult.setMd5hash(doc.get(Values.MD5HASH));
            codeResult.setCodeLines(doc.get(Values.CODELINES));
            codeResult.setDocumentId(hits[0].doc);
            codeResult.setRepoName(doc.get(Values.REPONAME));
            codeResult.setRepoLocation(doc.get(Values.REPOLOCATION));
            codeResult.setCodeOwner(doc.get(Values.CODEOWNER));
        }

        reader.close();

    } catch (Exception ex) {
        LOGGER.severe(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
    }

    return codeResult;
}

From source file:com.searchcode.app.service.TimeCodeSearcher.java

public List<String> getRepoDocuments(String repoName) {
    List<String> fileLocations = new ArrayList<>();
    try {//  w w w  . j  a v a 2 s.  c o  m
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
        IndexSearcher searcher = new IndexSearcher(reader);

        Analyzer analyzer = new CodeAnalyzer();
        QueryParser parser = new QueryParser(CODEFIELD, analyzer);
        Query query = parser.parse(Values.REPONAME + ":" + repoName);

        TopDocs results = searcher.search(query, Integer.MAX_VALUE);
        ScoreDoc[] hits = results.scoreDocs;

        for (int i = 0; i < hits.length; i++) {
            Document doc = searcher.doc(hits[i].doc);
            fileLocations.add(doc.get(Values.FILELOCATIONFILENAME));
        }

        reader.close();
    } catch (Exception ex) {
        LOGGER.severe(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
    }

    return fileLocations;
}

From source file:com.searchcode.app.service.TimeCodeSearcher.java

/**
 * Only really used internally but does the heavy lifting of actually converting the index document on disk to the
 * format used internally including reading the file from disk.
 *//*from w ww .  j a  v a  2 s  . c o m*/
public SearchResult doPagingSearch(IndexReader reader, IndexSearcher searcher, Query query, int page)
        throws IOException {
    TopDocs results = searcher.search(query, 20 * this.PAGELIMIT); // 20 pages worth of documents
    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    int start = this.PAGELIMIT * page;
    int end = Math.min(numTotalHits, (this.PAGELIMIT * (page + 1)));
    int noPages = numTotalHits / this.PAGELIMIT;

    if (noPages > 20) {
        noPages = 20;
    }

    List<Integer> pages = new ArrayList<>();
    for (int i = 0; i < noPages; i++) {
        pages.add(i);
    }

    List<CodeResult> codeResults = new ArrayList<>();

    for (int i = start; i < end; i++) {
        Document doc = searcher.doc(hits[i].doc);

        String filepath = doc.get(Values.PATH);

        if (filepath != null) {
            // This line is occasionally useful for debugging ranking, but not useful enough to have as log info
            //System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);

            CodeResult cr = new CodeResult(null, null);
            cr.setCodePath(doc.get(Values.FILELOCATIONFILENAME));
            cr.setFileName(doc.get(Values.FILENAME));
            cr.setLanguageName(doc.get(Values.LANGUAGENAME));
            cr.setMd5hash(doc.get(Values.MD5HASH));
            cr.setCodeLines(doc.get(Values.CODELINES));
            cr.setDocumentId(hits[i].doc);
            cr.setRepoLocation(doc.get(Values.REPOLOCATION));
            cr.setRepoName(doc.get(Values.REPONAME));
            cr.setCodeOwner(doc.get(Values.CODEOWNER));
            cr.setRevision(doc.get(Values.REVISION));
            cr.setYearMonthDay(doc.get(Values.DATEYEARMONTHDAY));
            cr.setMessage(doc.get(Values.MESSAGE));
            cr.setDeleted(doc.get(Values.DELETED));

            try {
                // This should probably be limited by however deep we are meant to look into the file
                // or the value we use here whichever is less
                String repoLoc = "./repo/" + cr.getRepoName() + "/.git";
                cr.setCode(Arrays.asList(gitService
                        .fetchFileRevision(repoLoc, cr.getRevision(), cr.getCodePath()).split("\\r?\\n")));
            } catch (Exception ex) {
                LOGGER.warning("Indexed file appears to binary or missing: " + filepath);
            }

            codeResults.add(cr);
        } else {
            LOGGER.warning((i + 1) + ". " + "No path for this document");
        }
    }

    List<CodeFacetLanguage> codeFacetLanguages = this.getLanguageFacetResults(searcher, reader, query);
    List<CodeFacetRepo> repoFacetLanguages = this.getRepoFacetResults(searcher, reader, query);
    List<CodeFacetOwner> repoFacetOwner = this.getOwnerFacetResults(searcher, reader, query);
    List<CodeFacetYearMonthDay> repoFacetYearMonthDay = this.getYearMonthDayFacetResults(searcher, reader,
            query);
    List<CodeFacetYearMonth> repoFacetYearMonth = this.getYearMonthFacetResults(searcher, reader, query);
    List<CodeFacetYear> repoFacetYear = this.getYearFacetResults(searcher, reader, query);
    List<CodeFacetRevision> repoFacetRevision = this.getRevisionFacetResults(searcher, reader, query);
    List<CodeFacetDeleted> repoFacetDeleted = this.getDeletedFacetResults(searcher, reader, query);

    SearchResult searchResult = new SearchResult(numTotalHits, page, query.toString(), codeResults, pages,
            codeFacetLanguages, repoFacetLanguages, repoFacetOwner);

    searchResult.setRepoFacetYearMonthDay(repoFacetYearMonthDay);
    searchResult.setRepoFacetYearMonth(repoFacetYearMonth);
    searchResult.setRepoFacetYear(repoFacetYear);
    searchResult.setRepoFacetRevision(repoFacetRevision);
    searchResult.setRepoFacetDeleted(repoFacetDeleted);

    return searchResult;
}

From source file:com.searchlocal.lucene.ContentSearcher.java

License:Open Source License

/** 
 * ?/*  ww w.  ja  va 2  s .c  o m*/
 * 
 * @param param ?
 * @return int 
 */
public static int getCount(SearchParam param) throws IOException {
    // ?
    String indexPath = param.getIndexPath();
    FSDirectory fsd = SimpleFSDirectory.open(new File(indexPath));
    int count = 0;
    try {
        // ?
        Analyzer analyzer = new PaodingAnalyzer();
        QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "content", analyzer);

        // 
        Query query = parser.parse(param.getKeyWord());

        TopScoreDocCollector collector = TopScoreDocCollector.create(100, true);
        IndexSearcher is = new IndexSearcher(fsd, true);
        is.search(query, collector);
        ScoreDoc[] scoreDoc = collector.topDocs().scoreDocs;
        count = scoreDoc.length;
    } catch (ParseException e) {
        e.printStackTrace();
    }
    return count;
}