Example usage for org.apache.lucene.index DirectoryReader open

Introduction

In this page you can find the example usage for org.apache.lucene.index DirectoryReader open.

Prototype

public static DirectoryReader open(final IndexCommit commit) throws IOException

Source Link

Document

Expert: returns an IndexReader reading the index in the given IndexCommit .

Usage

From source file:com.search.lucene.demo.facet.SimpleFacetsExample.java

License:Apache License

/** User runs a query and counts facets only without collecting the matching documents.*/
private List<FacetResult> facetsOnly() throws IOException {
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);

    FacetsCollector fc = new FacetsCollector();

    // MatchAllDocsQuery is for "browsing" (counts facets
    // for all non-deleted docs in the index); normally
    // you'd use a "normal" query:
    searcher.search(new MatchAllDocsQuery(), fc);

    // Retrieve results
    List<FacetResult> results = new ArrayList<>();

    // Count both "Publish Date" and "Author" dimensions
    Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, fc);

    results.add(facets.getTopChildren(10, "Author"));
    results.add(facets.getTopChildren(10, "Publish Date"));

    indexReader.close();/* w w w . j  a v  a  2  s.  c o  m*/
    taxoReader.close();

    return results;
}

From source file:com.searchcode.app.service.CodeSearcher.java

License:Open Source License

/**
 * Returns the total number of documents that are present in the index at this time
 *///from   ww  w  . ja  v  a2s.c  o  m
public int getTotalNumberDocumentsIndexed() {
    int numDocs = 0;
    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));

        numDocs = reader.numDocs();
        reader.close();
    } catch (Exception ex) {
        LOGGER.info(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
    }

    return numDocs;
}

From source file:com.searchcode.app.service.CodeSearcher.java

License:Open Source License

/**
 * Given a query and what page of results we are on return the matching results for that search
 *///from  w ww  .j  a  v a2 s.c om
public SearchResult search(String queryString, int page) {
    SearchResult searchResult = new SearchResult();
    statsService.incrementSearchCount();

    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
        IndexSearcher searcher = new IndexSearcher(reader);

        Analyzer analyzer = new CodeAnalyzer();

        QueryParser parser = new QueryParser(CODEFIELD, analyzer);

        Query query = parser.parse(queryString);
        LOGGER.info("Searching for: " + query.toString(CODEFIELD));
        LOGGER.searchLog(query.toString(CODEFIELD) + " " + page);

        searchResult = this.doPagingSearch(reader, searcher, query, page);
        reader.close();
    } catch (Exception ex) {
        LOGGER.warning(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
    }

    return searchResult;
}

From source file:com.searchcode.app.service.CodeSearcher.java

License:Open Source License

/**
 * Only used as fallback if getByRepoFileName fails for some reason due to what appears to be a lucene index bug
 * this should always work as the path used is sha1 and should be unique for anything the current codebase can
 * deal with//from w  w w. j a  va2s  .  c om
 */
public CodeResult getByCodeId(String codeId) {
    CodeResult codeResult = null;

    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
        IndexSearcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new CodeAnalyzer();
        QueryParser parser = new QueryParser(CODEFIELD, analyzer);

        Query query = parser.parse(Values.CODEID + ":" + QueryParser.escape(codeId));
        Singleton.getLogger().info("Query to get by " + Values.CODEID + ":" + QueryParser.escape(codeId));

        TopDocs results = searcher.search(query, 1);
        ScoreDoc[] hits = results.scoreDocs;

        if (hits.length != 0) {
            Document doc = searcher.doc(hits[0].doc);

            String filepath = doc.get(Values.PATH);

            List<String> code = new ArrayList<>();
            try {
                code = Singleton.getHelpers()
                        .readFileLinesGuessEncoding(filepath,
                                Singleton.getHelpers().tryParseInt(
                                        Properties.getProperties().getProperty(Values.MAXFILELINEDEPTH,
                                                Values.DEFAULTMAXFILELINEDEPTH),
                                        Values.DEFAULTMAXFILELINEDEPTH));
            } catch (Exception ex) {
                Singleton.getLogger().info("Indexed file appears to binary: " + filepath);
            }

            codeResult = new CodeResult(code, null);
            codeResult.setFilePath(filepath);
            codeResult.setCodePath(doc.get(Values.FILELOCATIONFILENAME));
            codeResult.setFileName(doc.get(Values.FILENAME));
            codeResult.setLanguageName(doc.get(Values.LANGUAGENAME));
            codeResult.setMd5hash(doc.get(Values.MD5HASH));
            codeResult.setCodeLines(doc.get(Values.CODELINES));
            codeResult.setDocumentId(hits[0].doc);
            codeResult.setRepoName(doc.get(Values.REPONAME));
            codeResult.setRepoLocation(doc.get(Values.REPOLOCATION));
            codeResult.setCodeOwner(doc.get(Values.CODEOWNER));
            codeResult.setCodeId(doc.get(Values.CODEID));
        }

        reader.close();
    } catch (Exception ex) {
        LOGGER.severe(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
    }

    return codeResult;
}

From source file:com.searchcode.app.service.CodeSearcher.java

License:Open Source License

public ProjectStats getProjectStats(String repoName) {
    int totalCodeLines = 0;
    int totalFiles = 0;
    List<CodeFacetLanguage> codeFacetLanguages = new ArrayList<>();
    List<CodeFacetOwner> repoFacetOwners = new ArrayList<>();
    List<CodeFacetLanguage> codeByLines = new ArrayList<>();
    SearchcodeLib searchcodeLib = Singleton.getSearchCodeLib();

    try {//from w  w w  . ja va2s . co m
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
        IndexSearcher searcher = new IndexSearcher(reader);

        Analyzer analyzer = new CodeAnalyzer();
        QueryParser parser = new QueryParser(CODEFIELD, analyzer);
        Query query = parser.parse(Values.REPONAME + ":" + repoName);

        TopDocs results = searcher.search(query, Integer.MAX_VALUE);
        ScoreDoc[] hits = results.scoreDocs;

        Map<String, Integer> linesCount = new HashMap<>();

        for (int i = 0; i < results.totalHits; i++) {
            Document doc = searcher.doc(hits[i].doc);

            if (!searchcodeLib.languageCostIgnore(doc.get(Values.LANGUAGENAME))) {
                int lines = Singleton.getHelpers().tryParseInt(doc.get(Values.CODELINES), "0");
                totalCodeLines += lines;
                String languageName = doc.get(Values.LANGUAGENAME).replace("_", " ");

                if (linesCount.containsKey(languageName)) {
                    linesCount.put(languageName, linesCount.get(languageName) + lines);
                } else {
                    linesCount.put(languageName, lines);
                }
            }
        }

        for (String key : linesCount.keySet()) {
            codeByLines.add(new CodeFacetLanguage(key, linesCount.get(key)));
        }
        codeByLines.sort((a, b) -> b.getCount() - a.getCount());

        totalFiles = results.totalHits;
        codeFacetLanguages = this.getLanguageFacetResults(searcher, reader, query);
        repoFacetOwners = this.getOwnerFacetResults(searcher, reader, query);

        reader.close();
    } catch (Exception ex) {
        LOGGER.severe("CodeSearcher getProjectStats caught a " + ex.getClass() + "\n with message: "
                + ex.getMessage());
    }

    return new ProjectStats(totalCodeLines, totalFiles, codeFacetLanguages, codeByLines, repoFacetOwners);
}

From source file:com.searchcode.app.service.CodeSearcher.java

License:Open Source License

/**
 * Due to very large repositories (500,000 files) this needs to support
 * paging. Also need to consider the fact that is a list of strings
 * TODO maybe convert to hash so lookups are faster
 *//*from w  ww .ja  v a  2s  .c o  m*/
public List<String> getRepoDocuments(String repoName, int page) {
    int REPOPAGELIMIT = 1000;
    List<String> fileLocations = new ArrayList<>(REPOPAGELIMIT);
    int start = REPOPAGELIMIT * page;

    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
        IndexSearcher searcher = new IndexSearcher(reader);

        Analyzer analyzer = new CodeAnalyzer();
        QueryParser parser = new QueryParser(CODEFIELD, analyzer);
        Query query = parser.parse(Values.REPONAME + ":" + repoName);

        TopDocs results = searcher.search(query, Integer.MAX_VALUE);
        int end = Math.min(results.totalHits, (REPOPAGELIMIT * (page + 1)));
        ScoreDoc[] hits = results.scoreDocs;

        for (int i = start; i < end; i++) {
            Document doc = searcher.doc(hits[i].doc);
            fileLocations.add(doc.get(Values.PATH));
        }

        reader.close();
    } catch (Exception ex) {
        LOGGER.severe("CodeSearcher getRepoDocuments caught a " + ex.getClass() + " on page " + page
                + "\n with message: " + ex.getMessage());
    }

    return fileLocations;
}

From source file:com.searchcode.app.service.IndexService.java

License:Open Source License

/**
 * Returns the total number of documents that are present in the index at this time
 *//*  w  w w.  j  a  v a  2s .c  om*/
public int getTotalNumberDocumentsIndexed() {
    int numDocs = 0;
    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(this.INDEX_LOCATION));
        numDocs = reader.numDocs();
        reader.close();
    } catch (IOException ex) {
        this.logger.info(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
    }

    return numDocs;
}

From source file:com.searchcode.app.service.TimeCodeSearcher.java

/**
 * Given a query and what page of results we are on return the matching results for that search
 *//*from w  w w . j  a  v  a2 s  . com*/
public SearchResult search(String queryString, int page) {
    SearchResult searchResult = new SearchResult();
    statsService.incrementSearchCount();

    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
        IndexSearcher searcher = new IndexSearcher(reader);

        Analyzer analyzer = new CodeAnalyzer();

        QueryParser parser = new QueryParser(CODEFIELD, analyzer);

        Query query = parser.parse(queryString);
        LOGGER.info("Searching for: " + query.toString(CODEFIELD));

        searchResult = this.doPagingSearch(reader, searcher, query, page);
        reader.close();
    } catch (Exception ex) {
        LOGGER.warning(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
    }

    return searchResult;
}

From source file:com.searchcode.app.service.TimeCodeSearcher.java

/**
 * Attempts to find a unique file given the repository name and the path/filename however
 * it seems to randomly not find things for some files. No idea of the root cause at this point and have implemented
 * a work around where we get the file by getById which is no ideal. The bug appears to be due to some issue
 * inside lucene itself as using raw queries to pull back the file results in no matches, and yet it does appear
 * when not limiting to the repo/*from  www  .j  ava  2s  . c o m*/
 * TODO investigate the lucene issue that occurs here mentioned above
 * TODO needs to use the revision number here as well to get the right value
 */
public CodeResult getByRepoFileName(String repo, String fileName) {
    CodeResult codeResult = null;

    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
        IndexSearcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new CodeAnalyzer();
        QueryParser parser = new QueryParser(CODEFIELD, analyzer);

        // TODO I have a feeling this may not be unique if there are to files in the same directory with different case... something to investigate
        Query query = parser
                .parse(Values.FILELOCATIONFILENAME + ":" + QueryParser.escape(repo + "/" + fileName));
        Singleton.getLogger().info("Query to get by filename = " + Values.FILELOCATIONFILENAME + ":"
                + QueryParser.escape(repo + "/" + fileName));

        TopDocs results = searcher.search(query, 1);
        ScoreDoc[] hits = results.scoreDocs;

        if (hits.length != 0) {
            Document doc = searcher.doc(hits[0].doc);

            String filepath = doc.get(Values.PATH);

            List<String> code = new ArrayList<>();
            try {
                code = Files.readAllLines(Paths.get(filepath), StandardCharsets.UTF_8);
                code = Singleton.getHelpers()
                        .readFileLines(filepath,
                                Singleton.getHelpers().tryParseInt(
                                        Properties.getProperties().getProperty(Values.MAXFILELINEDEPTH,
                                                Values.DEFAULTMAXFILELINEDEPTH),
                                        Values.DEFAULTMAXFILELINEDEPTH));
            } catch (Exception ex) {
                Singleton.getLogger().info("Indexed file appears to binary: " + filepath);
            }

            codeResult = new CodeResult(code, null);
            codeResult.setCodePath(doc.get(Values.FILELOCATIONFILENAME));
            codeResult.setFileName(doc.get(Values.FILENAME));
            codeResult.setLanguageName(doc.get(Values.LANGUAGENAME));
            codeResult.setMd5hash(doc.get(Values.MD5HASH));
            codeResult.setCodeLines(doc.get(Values.CODELINES));
            codeResult.setDocumentId(hits[0].doc);
            codeResult.setRepoName(doc.get(Values.REPONAME));
            codeResult.setRepoLocation(doc.get(Values.REPOLOCATION));
            codeResult.setCodeOwner(doc.get(Values.CODEOWNER));
        }

        reader.close();

    } catch (Exception ex) {
        LOGGER.severe(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
    }

    return codeResult;
}

From source file:com.searchcode.app.service.TimeCodeSearcher.java

/**
 * Only used as fallback if getByRepoFileName fails for some reason due to what appears to be a lucene index bug
 * Using this is problematic because if the index is updated while this method is called it will possibly
 * return the incorrect result. We could add a shared lock between them both but that's hardly ideal especially
 * since when its called the index could already be updated
 *//*from w  ww.  ja  v  a 2 s . c o m*/
public CodeResult getById(int documentId) {
    CodeResult codeResult = null;

    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
        Document doc = reader.document(documentId);

        String filepath = doc.get(Values.PATH);

        List<String> code = new ArrayList<>();
        try {
            code = Files.readAllLines(Paths.get(filepath), StandardCharsets.UTF_8);
        } catch (Exception ex) {
            LOGGER.warning("Indexed file appears to binary: " + filepath);
        }

        codeResult = new CodeResult(code, null);
        codeResult.setCodePath(doc.get(Values.FILELOCATIONFILENAME));
        codeResult.setFileName(doc.get(Values.FILENAME));
        codeResult.setLanguageName(doc.get(Values.LANGUAGENAME));
        codeResult.setMd5hash(doc.get(Values.MD5HASH));
        codeResult.setCodeLines(doc.get(Values.CODELINES));
        codeResult.setDocumentId(documentId);
        codeResult.setRepoName(doc.get(Values.REPONAME));
        codeResult.setRepoLocation(doc.get(Values.REPOLOCATION));
        codeResult.setCodeOwner(doc.get(Values.CODEOWNER));

        reader.close();
    } catch (Exception ex) {
        LOGGER.warning(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
    }

    return codeResult;
}