List of usage examples for org.apache.lucene.search IndexSearcher search
public <C extends Collector, T> T search(Query query, CollectorManager<C, T> collectorManager) throws IOException
From source file:com.rubenlaguna.en4j.searchlucene.NoteFinderLuceneImpl.java
License:Open Source License
public Collection<Note> find(String searchText) { if ("".equals(searchText.trim())) { return Collections.EMPTY_LIST; }// w w w .j a v a2 s .c o m long start = System.currentTimeMillis(); searchText = searchText.trim(); String patternStr = "\\s+"; String replaceStr = "* "; Pattern pattern = Pattern.compile(patternStr); Matcher matcher = pattern.matcher(searchText); searchText = matcher.replaceAll(replaceStr); if (Pattern.matches(".*\\w$", searchText)) { searchText = searchText + "*"; } LOG.info("search text:" + searchText); final Collection<Note> toReturn = new ArrayList<Note>(); try { IndexReader newReader = reader.reopen(); if (newReader != reader) { reader.close(); } reader = newReader; LOG.info("using index version: " + reader.getVersion()); final IndexSearcher searcher = new IndexSearcher(reader); final Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_29); QueryParser parser = new CustomQueryParser("all", analyzer); parser.setDefaultOperator(QueryParser.Operator.AND); Query query = parser.parse(searchText); LOG.info("query =" + query.toString()); //search the query Collector collector = new Collector() { private int docBase = 0; @Override public void setScorer(Scorer scorer) throws IOException { } @Override public void collect(int doc) throws IOException { int scoreId = doc + docBase; Document document = searcher.doc(scoreId); final String stringValue = document.getField("id").stringValue(); int docId = Integer.parseInt(stringValue); LOG.fine("doc id " + stringValue + " matches the search."); toReturn.add(nr.get(docId, false)); } @Override public void setNextReader(IndexReader reader, int docBase) throws IOException { this.docBase = docBase; } @Override public boolean acceptsDocsOutOfOrder() { return true; } }; searcher.search(query, collector); searcher.close(); } catch (ParseException ex) { Exceptions.printStackTrace(ex); } catch (CorruptIndexException ex) { Exceptions.printStackTrace(ex); } catch (IOException ex) { Exceptions.printStackTrace(ex); } catch (IllegalStateException ex) { LOG.info("caught " + ex.getMessage() + ". Most likely the app is shutting down"); } long delta = System.currentTimeMillis() - start; Installer.mbean.sampleSearchTime(delta); LOG.info("find took " + delta / 1000.0 + " secs. " + toReturn.size() + " results found"); return toReturn; }
From source file:com.search.lucene.demo.facet.SimpleFacetsExample.java
License:Apache License
/** User runs a query and counts facets only without collecting the matching documents.*/ private List<FacetResult> facetsOnly() throws IOException { DirectoryReader indexReader = DirectoryReader.open(indexDir); IndexSearcher searcher = new IndexSearcher(indexReader); TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); FacetsCollector fc = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query: searcher.search(new MatchAllDocsQuery(), fc); // Retrieve results List<FacetResult> results = new ArrayList<>(); // Count both "Publish Date" and "Author" dimensions Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, fc); results.add(facets.getTopChildren(10, "Author")); results.add(facets.getTopChildren(10, "Publish Date")); indexReader.close();// w w w. j a va2s . c o m taxoReader.close(); return results; }
From source file:com.searchcode.app.service.CodeSearcher.java
License:Open Source License
/** * Only used as fallback if getByRepoFileName fails for some reason due to what appears to be a lucene index bug * this should always work as the path used is sha1 and should be unique for anything the current codebase can * deal with// w ww.ja v a2s . co m */ public CodeResult getByCodeId(String codeId) { CodeResult codeResult = null; try { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new CodeAnalyzer(); QueryParser parser = new QueryParser(CODEFIELD, analyzer); Query query = parser.parse(Values.CODEID + ":" + QueryParser.escape(codeId)); Singleton.getLogger().info("Query to get by " + Values.CODEID + ":" + QueryParser.escape(codeId)); TopDocs results = searcher.search(query, 1); ScoreDoc[] hits = results.scoreDocs; if (hits.length != 0) { Document doc = searcher.doc(hits[0].doc); String filepath = doc.get(Values.PATH); List<String> code = new ArrayList<>(); try { code = Singleton.getHelpers() .readFileLinesGuessEncoding(filepath, Singleton.getHelpers().tryParseInt( Properties.getProperties().getProperty(Values.MAXFILELINEDEPTH, Values.DEFAULTMAXFILELINEDEPTH), Values.DEFAULTMAXFILELINEDEPTH)); } catch (Exception ex) { Singleton.getLogger().info("Indexed file appears to binary: " + filepath); } codeResult = new CodeResult(code, null); codeResult.setFilePath(filepath); codeResult.setCodePath(doc.get(Values.FILELOCATIONFILENAME)); codeResult.setFileName(doc.get(Values.FILENAME)); codeResult.setLanguageName(doc.get(Values.LANGUAGENAME)); codeResult.setMd5hash(doc.get(Values.MD5HASH)); codeResult.setCodeLines(doc.get(Values.CODELINES)); codeResult.setDocumentId(hits[0].doc); codeResult.setRepoName(doc.get(Values.REPONAME)); codeResult.setRepoLocation(doc.get(Values.REPOLOCATION)); codeResult.setCodeOwner(doc.get(Values.CODEOWNER)); codeResult.setCodeId(doc.get(Values.CODEID)); } reader.close(); } catch (Exception ex) { LOGGER.severe(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage()); } return codeResult; }
From source file:com.searchcode.app.service.CodeSearcher.java
License:Open Source License
public ProjectStats getProjectStats(String repoName) { int totalCodeLines = 0; int totalFiles = 0; List<CodeFacetLanguage> codeFacetLanguages = new ArrayList<>(); List<CodeFacetOwner> repoFacetOwners = new ArrayList<>(); List<CodeFacetLanguage> codeByLines = new ArrayList<>(); SearchcodeLib searchcodeLib = Singleton.getSearchCodeLib(); try {// w w w . j a v a2s . c om IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new CodeAnalyzer(); QueryParser parser = new QueryParser(CODEFIELD, analyzer); Query query = parser.parse(Values.REPONAME + ":" + repoName); TopDocs results = searcher.search(query, Integer.MAX_VALUE); ScoreDoc[] hits = results.scoreDocs; Map<String, Integer> linesCount = new HashMap<>(); for (int i = 0; i < results.totalHits; i++) { Document doc = searcher.doc(hits[i].doc); if (!searchcodeLib.languageCostIgnore(doc.get(Values.LANGUAGENAME))) { int lines = Singleton.getHelpers().tryParseInt(doc.get(Values.CODELINES), "0"); totalCodeLines += lines; String languageName = doc.get(Values.LANGUAGENAME).replace("_", " "); if (linesCount.containsKey(languageName)) { linesCount.put(languageName, linesCount.get(languageName) + lines); } else { linesCount.put(languageName, lines); } } } for (String key : linesCount.keySet()) { codeByLines.add(new CodeFacetLanguage(key, linesCount.get(key))); } codeByLines.sort((a, b) -> b.getCount() - a.getCount()); totalFiles = results.totalHits; codeFacetLanguages = this.getLanguageFacetResults(searcher, reader, query); repoFacetOwners = this.getOwnerFacetResults(searcher, reader, query); reader.close(); } catch (Exception ex) { LOGGER.severe("CodeSearcher getProjectStats caught a " + ex.getClass() + "\n with message: " + ex.getMessage()); } return new ProjectStats(totalCodeLines, totalFiles, codeFacetLanguages, codeByLines, repoFacetOwners); }
From source file:com.searchcode.app.service.CodeSearcher.java
License:Open Source License
/** * Due to very large repositories (500,000 files) this needs to support * paging. Also need to consider the fact that is a list of strings * TODO maybe convert to hash so lookups are faster *//* w w w .j av a2 s .c o m*/ public List<String> getRepoDocuments(String repoName, int page) { int REPOPAGELIMIT = 1000; List<String> fileLocations = new ArrayList<>(REPOPAGELIMIT); int start = REPOPAGELIMIT * page; try { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new CodeAnalyzer(); QueryParser parser = new QueryParser(CODEFIELD, analyzer); Query query = parser.parse(Values.REPONAME + ":" + repoName); TopDocs results = searcher.search(query, Integer.MAX_VALUE); int end = Math.min(results.totalHits, (REPOPAGELIMIT * (page + 1))); ScoreDoc[] hits = results.scoreDocs; for (int i = start; i < end; i++) { Document doc = searcher.doc(hits[i].doc); fileLocations.add(doc.get(Values.PATH)); } reader.close(); } catch (Exception ex) { LOGGER.severe("CodeSearcher getRepoDocuments caught a " + ex.getClass() + " on page " + page + "\n with message: " + ex.getMessage()); } return fileLocations; }
From source file:com.searchcode.app.service.CodeSearcher.java
License:Open Source License
/** * Only really used internally but does the heavy lifting of actually converting the index document on disk to the * format used internally including reading the file from disk. *//*from w ww . j a v a 2 s .c o m*/ public SearchResult doPagingSearch(IndexReader reader, IndexSearcher searcher, Query query, int page) throws IOException { TopDocs results = searcher.search(query, 20 * this.PAGELIMIT); // 20 pages worth of documents ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; int start = this.PAGELIMIT * page; int end = Math.min(numTotalHits, (this.PAGELIMIT * (page + 1))); int noPages = numTotalHits / this.PAGELIMIT; if (noPages > 20) { noPages = 19; } List<Integer> pages = this.calculatePages(numTotalHits, noPages); List<CodeResult> codeResults = new ArrayList<>(); for (int i = start; i < end; i++) { Document doc = searcher.doc(hits[i].doc); String filepath = doc.get(Values.PATH); if (filepath != null) { // This line is occasionally useful for debugging ranking, but not useful enough to have as log info //System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); List<String> code = new ArrayList<>(); try { // This should probably be limited by however deep we are meant to look into the file // or the value we use here whichever is less code = Singleton.getHelpers() .readFileLinesGuessEncoding(filepath, Singleton.getHelpers().tryParseInt( Properties.getProperties().getProperty(Values.MAXFILELINEDEPTH, Values.DEFAULTMAXFILELINEDEPTH), Values.DEFAULTMAXFILELINEDEPTH)); } catch (Exception ex) { LOGGER.warning("Indexed file appears to binary or missing: " + filepath); } CodeResult cr = new CodeResult(code, null); cr.setCodePath(doc.get(Values.FILELOCATIONFILENAME)); cr.setFileName(doc.get(Values.FILENAME)); cr.setLanguageName(doc.get(Values.LANGUAGENAME)); cr.setMd5hash(doc.get(Values.MD5HASH)); cr.setCodeLines(doc.get(Values.CODELINES)); cr.setDocumentId(hits[i].doc); cr.setRepoLocation(doc.get(Values.REPOLOCATION)); cr.setRepoName(doc.get(Values.REPONAME)); cr.setCodeOwner(doc.get(Values.CODEOWNER)); cr.setCodeId(doc.get(Values.CODEID)); codeResults.add(cr); } else { LOGGER.warning((i + 1) + ". " + "No path for this document"); } } List<CodeFacetLanguage> codeFacetLanguages = this.getLanguageFacetResults(searcher, reader, query); List<CodeFacetRepo> repoFacetLanguages = this.getRepoFacetResults(searcher, reader, query); List<CodeFacetOwner> repoFacetOwner = this.getOwnerFacetResults(searcher, reader, query); return new SearchResult(numTotalHits, page, query.toString(), codeResults, pages, codeFacetLanguages, repoFacetLanguages, repoFacetOwner); }
From source file:com.searchcode.app.service.TimeCodeSearcher.java
/** * Attempts to find a unique file given the repository name and the path/filename however * it seems to randomly not find things for some files. No idea of the root cause at this point and have implemented * a work around where we get the file by getById which is no ideal. The bug appears to be due to some issue * inside lucene itself as using raw queries to pull back the file results in no matches, and yet it does appear * when not limiting to the repo/* ww w .ja v a2 s . c o m*/ * TODO investigate the lucene issue that occurs here mentioned above * TODO needs to use the revision number here as well to get the right value */ public CodeResult getByRepoFileName(String repo, String fileName) { CodeResult codeResult = null; try { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new CodeAnalyzer(); QueryParser parser = new QueryParser(CODEFIELD, analyzer); // TODO I have a feeling this may not be unique if there are to files in the same directory with different case... something to investigate Query query = parser .parse(Values.FILELOCATIONFILENAME + ":" + QueryParser.escape(repo + "/" + fileName)); Singleton.getLogger().info("Query to get by filename = " + Values.FILELOCATIONFILENAME + ":" + QueryParser.escape(repo + "/" + fileName)); TopDocs results = searcher.search(query, 1); ScoreDoc[] hits = results.scoreDocs; if (hits.length != 0) { Document doc = searcher.doc(hits[0].doc); String filepath = doc.get(Values.PATH); List<String> code = new ArrayList<>(); try { code = Files.readAllLines(Paths.get(filepath), StandardCharsets.UTF_8); code = Singleton.getHelpers() .readFileLines(filepath, Singleton.getHelpers().tryParseInt( Properties.getProperties().getProperty(Values.MAXFILELINEDEPTH, Values.DEFAULTMAXFILELINEDEPTH), Values.DEFAULTMAXFILELINEDEPTH)); } catch (Exception ex) { Singleton.getLogger().info("Indexed file appears to binary: " + filepath); } codeResult = new CodeResult(code, null); codeResult.setCodePath(doc.get(Values.FILELOCATIONFILENAME)); codeResult.setFileName(doc.get(Values.FILENAME)); codeResult.setLanguageName(doc.get(Values.LANGUAGENAME)); codeResult.setMd5hash(doc.get(Values.MD5HASH)); codeResult.setCodeLines(doc.get(Values.CODELINES)); codeResult.setDocumentId(hits[0].doc); codeResult.setRepoName(doc.get(Values.REPONAME)); codeResult.setRepoLocation(doc.get(Values.REPOLOCATION)); codeResult.setCodeOwner(doc.get(Values.CODEOWNER)); } reader.close(); } catch (Exception ex) { LOGGER.severe(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage()); } return codeResult; }
From source file:com.searchcode.app.service.TimeCodeSearcher.java
public List<String> getRepoDocuments(String repoName) { List<String> fileLocations = new ArrayList<>(); try {// w w w . j a v a 2 s. c o m IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new CodeAnalyzer(); QueryParser parser = new QueryParser(CODEFIELD, analyzer); Query query = parser.parse(Values.REPONAME + ":" + repoName); TopDocs results = searcher.search(query, Integer.MAX_VALUE); ScoreDoc[] hits = results.scoreDocs; for (int i = 0; i < hits.length; i++) { Document doc = searcher.doc(hits[i].doc); fileLocations.add(doc.get(Values.FILELOCATIONFILENAME)); } reader.close(); } catch (Exception ex) { LOGGER.severe(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage()); } return fileLocations; }
From source file:com.searchcode.app.service.TimeCodeSearcher.java
/** * Only really used internally but does the heavy lifting of actually converting the index document on disk to the * format used internally including reading the file from disk. *//*from w ww . j a v a 2 s . c o m*/ public SearchResult doPagingSearch(IndexReader reader, IndexSearcher searcher, Query query, int page) throws IOException { TopDocs results = searcher.search(query, 20 * this.PAGELIMIT); // 20 pages worth of documents ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; int start = this.PAGELIMIT * page; int end = Math.min(numTotalHits, (this.PAGELIMIT * (page + 1))); int noPages = numTotalHits / this.PAGELIMIT; if (noPages > 20) { noPages = 20; } List<Integer> pages = new ArrayList<>(); for (int i = 0; i < noPages; i++) { pages.add(i); } List<CodeResult> codeResults = new ArrayList<>(); for (int i = start; i < end; i++) { Document doc = searcher.doc(hits[i].doc); String filepath = doc.get(Values.PATH); if (filepath != null) { // This line is occasionally useful for debugging ranking, but not useful enough to have as log info //System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); CodeResult cr = new CodeResult(null, null); cr.setCodePath(doc.get(Values.FILELOCATIONFILENAME)); cr.setFileName(doc.get(Values.FILENAME)); cr.setLanguageName(doc.get(Values.LANGUAGENAME)); cr.setMd5hash(doc.get(Values.MD5HASH)); cr.setCodeLines(doc.get(Values.CODELINES)); cr.setDocumentId(hits[i].doc); cr.setRepoLocation(doc.get(Values.REPOLOCATION)); cr.setRepoName(doc.get(Values.REPONAME)); cr.setCodeOwner(doc.get(Values.CODEOWNER)); cr.setRevision(doc.get(Values.REVISION)); cr.setYearMonthDay(doc.get(Values.DATEYEARMONTHDAY)); cr.setMessage(doc.get(Values.MESSAGE)); cr.setDeleted(doc.get(Values.DELETED)); try { // This should probably be limited by however deep we are meant to look into the file // or the value we use here whichever is less String repoLoc = "./repo/" + cr.getRepoName() + "/.git"; cr.setCode(Arrays.asList(gitService .fetchFileRevision(repoLoc, cr.getRevision(), cr.getCodePath()).split("\\r?\\n"))); } catch (Exception ex) { LOGGER.warning("Indexed file appears to binary or missing: " + filepath); } codeResults.add(cr); } else { LOGGER.warning((i + 1) + ". " + "No path for this document"); } } List<CodeFacetLanguage> codeFacetLanguages = this.getLanguageFacetResults(searcher, reader, query); List<CodeFacetRepo> repoFacetLanguages = this.getRepoFacetResults(searcher, reader, query); List<CodeFacetOwner> repoFacetOwner = this.getOwnerFacetResults(searcher, reader, query); List<CodeFacetYearMonthDay> repoFacetYearMonthDay = this.getYearMonthDayFacetResults(searcher, reader, query); List<CodeFacetYearMonth> repoFacetYearMonth = this.getYearMonthFacetResults(searcher, reader, query); List<CodeFacetYear> repoFacetYear = this.getYearFacetResults(searcher, reader, query); List<CodeFacetRevision> repoFacetRevision = this.getRevisionFacetResults(searcher, reader, query); List<CodeFacetDeleted> repoFacetDeleted = this.getDeletedFacetResults(searcher, reader, query); SearchResult searchResult = new SearchResult(numTotalHits, page, query.toString(), codeResults, pages, codeFacetLanguages, repoFacetLanguages, repoFacetOwner); searchResult.setRepoFacetYearMonthDay(repoFacetYearMonthDay); searchResult.setRepoFacetYearMonth(repoFacetYearMonth); searchResult.setRepoFacetYear(repoFacetYear); searchResult.setRepoFacetRevision(repoFacetRevision); searchResult.setRepoFacetDeleted(repoFacetDeleted); return searchResult; }
From source file:com.searchlocal.lucene.ContentSearcher.java
License:Open Source License
/** * ?/* ww w. ja va 2 s .c o m*/ * * @param param ? * @return int */ public static int getCount(SearchParam param) throws IOException { // ? String indexPath = param.getIndexPath(); FSDirectory fsd = SimpleFSDirectory.open(new File(indexPath)); int count = 0; try { // ? Analyzer analyzer = new PaodingAnalyzer(); QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "content", analyzer); // Query query = parser.parse(param.getKeyWord()); TopScoreDocCollector collector = TopScoreDocCollector.create(100, true); IndexSearcher is = new IndexSearcher(fsd, true); is.search(query, collector); ScoreDoc[] scoreDoc = collector.topDocs().scoreDocs; count = scoreDoc.length; } catch (ParseException e) { e.printStackTrace(); } return count; }