List of usage examples for org.apache.lucene.index DirectoryReader open
public static DirectoryReader open(final IndexCommit commit) throws IOException
From source file:com.search.lucene.demo.facet.SimpleFacetsExample.java
License:Apache License
/** User runs a query and counts facets only without collecting the matching documents.*/ private List<FacetResult> facetsOnly() throws IOException { DirectoryReader indexReader = DirectoryReader.open(indexDir); IndexSearcher searcher = new IndexSearcher(indexReader); TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); FacetsCollector fc = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query: searcher.search(new MatchAllDocsQuery(), fc); // Retrieve results List<FacetResult> results = new ArrayList<>(); // Count both "Publish Date" and "Author" dimensions Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, fc); results.add(facets.getTopChildren(10, "Author")); results.add(facets.getTopChildren(10, "Publish Date")); indexReader.close();/* w w w . j a v a 2 s. c o m*/ taxoReader.close(); return results; }
From source file:com.searchcode.app.service.CodeSearcher.java
License:Open Source License
/** * Returns the total number of documents that are present in the index at this time *///from ww w . ja v a2s.c o m public int getTotalNumberDocumentsIndexed() { int numDocs = 0; try { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH))); numDocs = reader.numDocs(); reader.close(); } catch (Exception ex) { LOGGER.info(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage()); } return numDocs; }
From source file:com.searchcode.app.service.CodeSearcher.java
License:Open Source License
/** * Given a query and what page of results we are on return the matching results for that search *///from w ww .j a v a2 s.c om public SearchResult search(String queryString, int page) { SearchResult searchResult = new SearchResult(); statsService.incrementSearchCount(); try { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new CodeAnalyzer(); QueryParser parser = new QueryParser(CODEFIELD, analyzer); Query query = parser.parse(queryString); LOGGER.info("Searching for: " + query.toString(CODEFIELD)); LOGGER.searchLog(query.toString(CODEFIELD) + " " + page); searchResult = this.doPagingSearch(reader, searcher, query, page); reader.close(); } catch (Exception ex) { LOGGER.warning(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage()); } return searchResult; }
From source file:com.searchcode.app.service.CodeSearcher.java
License:Open Source License
/** * Only used as fallback if getByRepoFileName fails for some reason due to what appears to be a lucene index bug * this should always work as the path used is sha1 and should be unique for anything the current codebase can * deal with//from w w w. j a va2s . c om */ public CodeResult getByCodeId(String codeId) { CodeResult codeResult = null; try { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new CodeAnalyzer(); QueryParser parser = new QueryParser(CODEFIELD, analyzer); Query query = parser.parse(Values.CODEID + ":" + QueryParser.escape(codeId)); Singleton.getLogger().info("Query to get by " + Values.CODEID + ":" + QueryParser.escape(codeId)); TopDocs results = searcher.search(query, 1); ScoreDoc[] hits = results.scoreDocs; if (hits.length != 0) { Document doc = searcher.doc(hits[0].doc); String filepath = doc.get(Values.PATH); List<String> code = new ArrayList<>(); try { code = Singleton.getHelpers() .readFileLinesGuessEncoding(filepath, Singleton.getHelpers().tryParseInt( Properties.getProperties().getProperty(Values.MAXFILELINEDEPTH, Values.DEFAULTMAXFILELINEDEPTH), Values.DEFAULTMAXFILELINEDEPTH)); } catch (Exception ex) { Singleton.getLogger().info("Indexed file appears to binary: " + filepath); } codeResult = new CodeResult(code, null); codeResult.setFilePath(filepath); codeResult.setCodePath(doc.get(Values.FILELOCATIONFILENAME)); codeResult.setFileName(doc.get(Values.FILENAME)); codeResult.setLanguageName(doc.get(Values.LANGUAGENAME)); codeResult.setMd5hash(doc.get(Values.MD5HASH)); codeResult.setCodeLines(doc.get(Values.CODELINES)); codeResult.setDocumentId(hits[0].doc); codeResult.setRepoName(doc.get(Values.REPONAME)); codeResult.setRepoLocation(doc.get(Values.REPOLOCATION)); codeResult.setCodeOwner(doc.get(Values.CODEOWNER)); codeResult.setCodeId(doc.get(Values.CODEID)); } reader.close(); } catch (Exception ex) { LOGGER.severe(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage()); } return codeResult; }
From source file:com.searchcode.app.service.CodeSearcher.java
License:Open Source License
public ProjectStats getProjectStats(String repoName) { int totalCodeLines = 0; int totalFiles = 0; List<CodeFacetLanguage> codeFacetLanguages = new ArrayList<>(); List<CodeFacetOwner> repoFacetOwners = new ArrayList<>(); List<CodeFacetLanguage> codeByLines = new ArrayList<>(); SearchcodeLib searchcodeLib = Singleton.getSearchCodeLib(); try {//from w w w . ja va2s . co m IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new CodeAnalyzer(); QueryParser parser = new QueryParser(CODEFIELD, analyzer); Query query = parser.parse(Values.REPONAME + ":" + repoName); TopDocs results = searcher.search(query, Integer.MAX_VALUE); ScoreDoc[] hits = results.scoreDocs; Map<String, Integer> linesCount = new HashMap<>(); for (int i = 0; i < results.totalHits; i++) { Document doc = searcher.doc(hits[i].doc); if (!searchcodeLib.languageCostIgnore(doc.get(Values.LANGUAGENAME))) { int lines = Singleton.getHelpers().tryParseInt(doc.get(Values.CODELINES), "0"); totalCodeLines += lines; String languageName = doc.get(Values.LANGUAGENAME).replace("_", " "); if (linesCount.containsKey(languageName)) { linesCount.put(languageName, linesCount.get(languageName) + lines); } else { linesCount.put(languageName, lines); } } } for (String key : linesCount.keySet()) { codeByLines.add(new CodeFacetLanguage(key, linesCount.get(key))); } codeByLines.sort((a, b) -> b.getCount() - a.getCount()); totalFiles = results.totalHits; codeFacetLanguages = this.getLanguageFacetResults(searcher, reader, query); repoFacetOwners = this.getOwnerFacetResults(searcher, reader, query); reader.close(); } catch (Exception ex) { LOGGER.severe("CodeSearcher getProjectStats caught a " + ex.getClass() + "\n with message: " + ex.getMessage()); } return new ProjectStats(totalCodeLines, totalFiles, codeFacetLanguages, codeByLines, repoFacetOwners); }
From source file:com.searchcode.app.service.CodeSearcher.java
License:Open Source License
/** * Due to very large repositories (500,000 files) this needs to support * paging. Also need to consider the fact that is a list of strings * TODO maybe convert to hash so lookups are faster *//*from w ww .ja v a 2s .c o m*/ public List<String> getRepoDocuments(String repoName, int page) { int REPOPAGELIMIT = 1000; List<String> fileLocations = new ArrayList<>(REPOPAGELIMIT); int start = REPOPAGELIMIT * page; try { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new CodeAnalyzer(); QueryParser parser = new QueryParser(CODEFIELD, analyzer); Query query = parser.parse(Values.REPONAME + ":" + repoName); TopDocs results = searcher.search(query, Integer.MAX_VALUE); int end = Math.min(results.totalHits, (REPOPAGELIMIT * (page + 1))); ScoreDoc[] hits = results.scoreDocs; for (int i = start; i < end; i++) { Document doc = searcher.doc(hits[i].doc); fileLocations.add(doc.get(Values.PATH)); } reader.close(); } catch (Exception ex) { LOGGER.severe("CodeSearcher getRepoDocuments caught a " + ex.getClass() + " on page " + page + "\n with message: " + ex.getMessage()); } return fileLocations; }
From source file:com.searchcode.app.service.IndexService.java
License:Open Source License
/** * Returns the total number of documents that are present in the index at this time *//* w w w. j a v a 2s .c om*/ public int getTotalNumberDocumentsIndexed() { int numDocs = 0; try { IndexReader reader = DirectoryReader.open(FSDirectory.open(this.INDEX_LOCATION)); numDocs = reader.numDocs(); reader.close(); } catch (IOException ex) { this.logger.info(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage()); } return numDocs; }
From source file:com.searchcode.app.service.TimeCodeSearcher.java
/** * Given a query and what page of results we are on return the matching results for that search *//*from w w w . j a v a2 s . com*/ public SearchResult search(String queryString, int page) { SearchResult searchResult = new SearchResult(); statsService.incrementSearchCount(); try { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new CodeAnalyzer(); QueryParser parser = new QueryParser(CODEFIELD, analyzer); Query query = parser.parse(queryString); LOGGER.info("Searching for: " + query.toString(CODEFIELD)); searchResult = this.doPagingSearch(reader, searcher, query, page); reader.close(); } catch (Exception ex) { LOGGER.warning(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage()); } return searchResult; }
From source file:com.searchcode.app.service.TimeCodeSearcher.java
/** * Attempts to find a unique file given the repository name and the path/filename however * it seems to randomly not find things for some files. No idea of the root cause at this point and have implemented * a work around where we get the file by getById which is no ideal. The bug appears to be due to some issue * inside lucene itself as using raw queries to pull back the file results in no matches, and yet it does appear * when not limiting to the repo/*from www .j ava 2s . c o m*/ * TODO investigate the lucene issue that occurs here mentioned above * TODO needs to use the revision number here as well to get the right value */ public CodeResult getByRepoFileName(String repo, String fileName) { CodeResult codeResult = null; try { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new CodeAnalyzer(); QueryParser parser = new QueryParser(CODEFIELD, analyzer); // TODO I have a feeling this may not be unique if there are to files in the same directory with different case... something to investigate Query query = parser .parse(Values.FILELOCATIONFILENAME + ":" + QueryParser.escape(repo + "/" + fileName)); Singleton.getLogger().info("Query to get by filename = " + Values.FILELOCATIONFILENAME + ":" + QueryParser.escape(repo + "/" + fileName)); TopDocs results = searcher.search(query, 1); ScoreDoc[] hits = results.scoreDocs; if (hits.length != 0) { Document doc = searcher.doc(hits[0].doc); String filepath = doc.get(Values.PATH); List<String> code = new ArrayList<>(); try { code = Files.readAllLines(Paths.get(filepath), StandardCharsets.UTF_8); code = Singleton.getHelpers() .readFileLines(filepath, Singleton.getHelpers().tryParseInt( Properties.getProperties().getProperty(Values.MAXFILELINEDEPTH, Values.DEFAULTMAXFILELINEDEPTH), Values.DEFAULTMAXFILELINEDEPTH)); } catch (Exception ex) { Singleton.getLogger().info("Indexed file appears to binary: " + filepath); } codeResult = new CodeResult(code, null); codeResult.setCodePath(doc.get(Values.FILELOCATIONFILENAME)); codeResult.setFileName(doc.get(Values.FILENAME)); codeResult.setLanguageName(doc.get(Values.LANGUAGENAME)); codeResult.setMd5hash(doc.get(Values.MD5HASH)); codeResult.setCodeLines(doc.get(Values.CODELINES)); codeResult.setDocumentId(hits[0].doc); codeResult.setRepoName(doc.get(Values.REPONAME)); codeResult.setRepoLocation(doc.get(Values.REPOLOCATION)); codeResult.setCodeOwner(doc.get(Values.CODEOWNER)); } reader.close(); } catch (Exception ex) { LOGGER.severe(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage()); } return codeResult; }
From source file:com.searchcode.app.service.TimeCodeSearcher.java
/** * Only used as fallback if getByRepoFileName fails for some reason due to what appears to be a lucene index bug * Using this is problematic because if the index is updated while this method is called it will possibly * return the incorrect result. We could add a shared lock between them both but that's hardly ideal especially * since when its called the index could already be updated *//*from w ww. ja v a 2 s . c o m*/ public CodeResult getById(int documentId) { CodeResult codeResult = null; try { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH))); Document doc = reader.document(documentId); String filepath = doc.get(Values.PATH); List<String> code = new ArrayList<>(); try { code = Files.readAllLines(Paths.get(filepath), StandardCharsets.UTF_8); } catch (Exception ex) { LOGGER.warning("Indexed file appears to binary: " + filepath); } codeResult = new CodeResult(code, null); codeResult.setCodePath(doc.get(Values.FILELOCATIONFILENAME)); codeResult.setFileName(doc.get(Values.FILENAME)); codeResult.setLanguageName(doc.get(Values.LANGUAGENAME)); codeResult.setMd5hash(doc.get(Values.MD5HASH)); codeResult.setCodeLines(doc.get(Values.CODELINES)); codeResult.setDocumentId(documentId); codeResult.setRepoName(doc.get(Values.REPONAME)); codeResult.setRepoLocation(doc.get(Values.REPOLOCATION)); codeResult.setCodeOwner(doc.get(Values.CODEOWNER)); reader.close(); } catch (Exception ex) { LOGGER.warning(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage()); } return codeResult; }