Example usage for org.apache.lucene.search IndexSearcher doc

List of usage examples for org.apache.lucene.search IndexSearcher doc

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher doc.

Prototype

public Document doc(int docID) throws IOException 

Source Link

Document

Sugar for .getIndexReader().document(docID)

Usage

From source file:com.redsqirl.SimpleSearcher.java

License:Open Source License

public List<String> searchIndex(File indexDir, String queryStr, int maxHits) throws Exception {

    Directory directory = FSDirectory.open(indexDir);

    IndexSearcher searcher = new IndexSearcher(directory);
    QueryParser parser = new QueryParser(Version.LUCENE_30, "contents", new SimpleAnalyzer());
    Query query = parser.parse(queryStr);

    TopDocs topDocs = searcher.search(query, maxHits);

    List<String> list = new ArrayList<String>();
    ScoreDoc[] hits = topDocs.scoreDocs;
    for (int i = 0; i < hits.length; i++) {
        int docId = hits[i].doc;
        Document d = searcher.doc(docId);
        logger.info(d.get("filename"));
        list.add(d.get("filename"));
    }/* w w  w  .j  a  v  a  2s.co m*/

    logger.info("Found " + hits.length);
    return list;
}

From source file:com.ricky.codelab.lucene.LuceneIndexAndSearchDemo.java

License:Apache License

/**
 * /*from   w ww .  ja  v  a2 s  .  c o m*/
 * ???
 * @param args
 */
public static void main(String[] args) {
    //Lucene Document??
    String fieldName = "text";
    //
    String text = "IK Analyzer???????";

    //IKAnalyzer?
    Analyzer analyzer = new IKAnalyzer(true);

    Directory directory = null;
    IndexWriter iwriter = null;
    IndexReader ireader = null;
    IndexSearcher isearcher = null;
    try {
        //
        directory = new RAMDirectory();

        //?IndexWriterConfig
        IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer);
        iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwriter = new IndexWriter(directory, iwConfig);
        //
        Document doc = new Document();
        doc.add(new StringField("ID", "10000", Field.Store.YES));
        doc.add(new TextField(fieldName, text, Field.Store.YES));
        iwriter.addDocument(doc);
        iwriter.close();

        //?**********************************
        //?   
        ireader = DirectoryReader.open(directory);
        isearcher = new IndexSearcher(ireader);

        String keyword = "?";
        //QueryParser?Query
        QueryParser qp = new QueryParser(fieldName, analyzer);
        qp.setDefaultOperator(QueryParser.AND_OPERATOR);
        Query query = qp.parse(keyword);
        System.out.println("Query = " + query);

        //?5?
        TopDocs topDocs = isearcher.search(query, 5);
        System.out.println("" + topDocs.totalHits);
        //
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (int i = 0; i < topDocs.totalHits; i++) {
            Document targetDoc = isearcher.doc(scoreDocs[i].doc);
            System.out.println("" + targetDoc.toString());
        }

    } catch (CorruptIndexException e) {
        e.printStackTrace();
    } catch (LockObtainFailedException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ParseException e) {
        e.printStackTrace();
    } finally {
        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        if (directory != null) {
            try {
                directory.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:com.rubenlaguna.en4j.searchlucene.NoteFinderLuceneImpl.java

License:Open Source License

public Collection<Note> find(String searchText) {
    if ("".equals(searchText.trim())) {
        return Collections.EMPTY_LIST;
    }//from   w  w  w .  j a  v  a 2 s .  c  o  m
    long start = System.currentTimeMillis();
    searchText = searchText.trim();
    String patternStr = "\\s+";
    String replaceStr = "* ";
    Pattern pattern = Pattern.compile(patternStr);
    Matcher matcher = pattern.matcher(searchText);
    searchText = matcher.replaceAll(replaceStr);
    if (Pattern.matches(".*\\w$", searchText)) {
        searchText = searchText + "*";
    }

    LOG.info("search text:" + searchText);
    final Collection<Note> toReturn = new ArrayList<Note>();

    try {
        IndexReader newReader = reader.reopen();
        if (newReader != reader) {
            reader.close();
        }
        reader = newReader;
        LOG.info("using index version: " + reader.getVersion());
        final IndexSearcher searcher = new IndexSearcher(reader);

        final Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_29);
        QueryParser parser = new CustomQueryParser("all", analyzer);
        parser.setDefaultOperator(QueryParser.Operator.AND);

        Query query = parser.parse(searchText);
        LOG.info("query =" + query.toString());
        //search the query
        Collector collector = new Collector() {

            private int docBase = 0;

            @Override
            public void setScorer(Scorer scorer) throws IOException {
            }

            @Override
            public void collect(int doc) throws IOException {
                int scoreId = doc + docBase;
                Document document = searcher.doc(scoreId);
                final String stringValue = document.getField("id").stringValue();
                int docId = Integer.parseInt(stringValue);
                LOG.fine("doc id " + stringValue + " matches the search.");
                toReturn.add(nr.get(docId, false));
            }

            @Override
            public void setNextReader(IndexReader reader, int docBase) throws IOException {
                this.docBase = docBase;
            }

            @Override
            public boolean acceptsDocsOutOfOrder() {
                return true;
            }
        };
        searcher.search(query, collector);
        searcher.close();
    } catch (ParseException ex) {
        Exceptions.printStackTrace(ex);
    } catch (CorruptIndexException ex) {
        Exceptions.printStackTrace(ex);
    } catch (IOException ex) {
        Exceptions.printStackTrace(ex);
    } catch (IllegalStateException ex) {
        LOG.info("caught " + ex.getMessage() + ". Most likely the app is shutting down");
    }
    long delta = System.currentTimeMillis() - start;
    Installer.mbean.sampleSearchTime(delta);
    LOG.info("find took " + delta / 1000.0 + " secs. " + toReturn.size() + " results found");
    return toReturn;
}

From source file:com.searchcode.app.service.CodeSearcher.java

License:Open Source License

/**
 * Only used as fallback if getByRepoFileName fails for some reason due to what appears to be a lucene index bug
 * this should always work as the path used is sha1 and should be unique for anything the current codebase can
 * deal with//from w  ww .j a  v  a 2s.  c  o  m
 */
public CodeResult getByCodeId(String codeId) {
    CodeResult codeResult = null;

    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
        IndexSearcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new CodeAnalyzer();
        QueryParser parser = new QueryParser(CODEFIELD, analyzer);

        Query query = parser.parse(Values.CODEID + ":" + QueryParser.escape(codeId));
        Singleton.getLogger().info("Query to get by " + Values.CODEID + ":" + QueryParser.escape(codeId));

        TopDocs results = searcher.search(query, 1);
        ScoreDoc[] hits = results.scoreDocs;

        if (hits.length != 0) {
            Document doc = searcher.doc(hits[0].doc);

            String filepath = doc.get(Values.PATH);

            List<String> code = new ArrayList<>();
            try {
                code = Singleton.getHelpers()
                        .readFileLinesGuessEncoding(filepath,
                                Singleton.getHelpers().tryParseInt(
                                        Properties.getProperties().getProperty(Values.MAXFILELINEDEPTH,
                                                Values.DEFAULTMAXFILELINEDEPTH),
                                        Values.DEFAULTMAXFILELINEDEPTH));
            } catch (Exception ex) {
                Singleton.getLogger().info("Indexed file appears to binary: " + filepath);
            }

            codeResult = new CodeResult(code, null);
            codeResult.setFilePath(filepath);
            codeResult.setCodePath(doc.get(Values.FILELOCATIONFILENAME));
            codeResult.setFileName(doc.get(Values.FILENAME));
            codeResult.setLanguageName(doc.get(Values.LANGUAGENAME));
            codeResult.setMd5hash(doc.get(Values.MD5HASH));
            codeResult.setCodeLines(doc.get(Values.CODELINES));
            codeResult.setDocumentId(hits[0].doc);
            codeResult.setRepoName(doc.get(Values.REPONAME));
            codeResult.setRepoLocation(doc.get(Values.REPOLOCATION));
            codeResult.setCodeOwner(doc.get(Values.CODEOWNER));
            codeResult.setCodeId(doc.get(Values.CODEID));
        }

        reader.close();
    } catch (Exception ex) {
        LOGGER.severe(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
    }

    return codeResult;
}

From source file:com.searchcode.app.service.CodeSearcher.java

License:Open Source License

public ProjectStats getProjectStats(String repoName) {
    int totalCodeLines = 0;
    int totalFiles = 0;
    List<CodeFacetLanguage> codeFacetLanguages = new ArrayList<>();
    List<CodeFacetOwner> repoFacetOwners = new ArrayList<>();
    List<CodeFacetLanguage> codeByLines = new ArrayList<>();
    SearchcodeLib searchcodeLib = Singleton.getSearchCodeLib();

    try {/*from w w w .j av  a  2 s  . c  om*/
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
        IndexSearcher searcher = new IndexSearcher(reader);

        Analyzer analyzer = new CodeAnalyzer();
        QueryParser parser = new QueryParser(CODEFIELD, analyzer);
        Query query = parser.parse(Values.REPONAME + ":" + repoName);

        TopDocs results = searcher.search(query, Integer.MAX_VALUE);
        ScoreDoc[] hits = results.scoreDocs;

        Map<String, Integer> linesCount = new HashMap<>();

        for (int i = 0; i < results.totalHits; i++) {
            Document doc = searcher.doc(hits[i].doc);

            if (!searchcodeLib.languageCostIgnore(doc.get(Values.LANGUAGENAME))) {
                int lines = Singleton.getHelpers().tryParseInt(doc.get(Values.CODELINES), "0");
                totalCodeLines += lines;
                String languageName = doc.get(Values.LANGUAGENAME).replace("_", " ");

                if (linesCount.containsKey(languageName)) {
                    linesCount.put(languageName, linesCount.get(languageName) + lines);
                } else {
                    linesCount.put(languageName, lines);
                }
            }
        }

        for (String key : linesCount.keySet()) {
            codeByLines.add(new CodeFacetLanguage(key, linesCount.get(key)));
        }
        codeByLines.sort((a, b) -> b.getCount() - a.getCount());

        totalFiles = results.totalHits;
        codeFacetLanguages = this.getLanguageFacetResults(searcher, reader, query);
        repoFacetOwners = this.getOwnerFacetResults(searcher, reader, query);

        reader.close();
    } catch (Exception ex) {
        LOGGER.severe("CodeSearcher getProjectStats caught a " + ex.getClass() + "\n with message: "
                + ex.getMessage());
    }

    return new ProjectStats(totalCodeLines, totalFiles, codeFacetLanguages, codeByLines, repoFacetOwners);
}

From source file:com.searchcode.app.service.CodeSearcher.java

License:Open Source License

/**
 * Due to very large repositories (500,000 files) this needs to support
 * paging. Also need to consider the fact that is a list of strings
 * TODO maybe convert to hash so lookups are faster
 *///www.j a v a2  s.co  m
public List<String> getRepoDocuments(String repoName, int page) {
    int REPOPAGELIMIT = 1000;
    List<String> fileLocations = new ArrayList<>(REPOPAGELIMIT);
    int start = REPOPAGELIMIT * page;

    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
        IndexSearcher searcher = new IndexSearcher(reader);

        Analyzer analyzer = new CodeAnalyzer();
        QueryParser parser = new QueryParser(CODEFIELD, analyzer);
        Query query = parser.parse(Values.REPONAME + ":" + repoName);

        TopDocs results = searcher.search(query, Integer.MAX_VALUE);
        int end = Math.min(results.totalHits, (REPOPAGELIMIT * (page + 1)));
        ScoreDoc[] hits = results.scoreDocs;

        for (int i = start; i < end; i++) {
            Document doc = searcher.doc(hits[i].doc);
            fileLocations.add(doc.get(Values.PATH));
        }

        reader.close();
    } catch (Exception ex) {
        LOGGER.severe("CodeSearcher getRepoDocuments caught a " + ex.getClass() + " on page " + page
                + "\n with message: " + ex.getMessage());
    }

    return fileLocations;
}

From source file:com.searchcode.app.service.CodeSearcher.java

License:Open Source License

/**
 * Only really used internally but does the heavy lifting of actually converting the index document on disk to the
 * format used internally including reading the file from disk.
 *///from  w w w.  j  av a  2s .  co m
public SearchResult doPagingSearch(IndexReader reader, IndexSearcher searcher, Query query, int page)
        throws IOException {
    TopDocs results = searcher.search(query, 20 * this.PAGELIMIT); // 20 pages worth of documents
    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    int start = this.PAGELIMIT * page;
    int end = Math.min(numTotalHits, (this.PAGELIMIT * (page + 1)));
    int noPages = numTotalHits / this.PAGELIMIT;

    if (noPages > 20) {
        noPages = 19;
    }

    List<Integer> pages = this.calculatePages(numTotalHits, noPages);

    List<CodeResult> codeResults = new ArrayList<>();

    for (int i = start; i < end; i++) {
        Document doc = searcher.doc(hits[i].doc);

        String filepath = doc.get(Values.PATH);

        if (filepath != null) {
            // This line is occasionally useful for debugging ranking, but not useful enough to have as log info
            //System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);

            List<String> code = new ArrayList<>();
            try {
                // This should probably be limited by however deep we are meant to look into the file
                // or the value we use here whichever is less
                code = Singleton.getHelpers()
                        .readFileLinesGuessEncoding(filepath,
                                Singleton.getHelpers().tryParseInt(
                                        Properties.getProperties().getProperty(Values.MAXFILELINEDEPTH,
                                                Values.DEFAULTMAXFILELINEDEPTH),
                                        Values.DEFAULTMAXFILELINEDEPTH));
            } catch (Exception ex) {
                LOGGER.warning("Indexed file appears to binary or missing: " + filepath);
            }

            CodeResult cr = new CodeResult(code, null);
            cr.setCodePath(doc.get(Values.FILELOCATIONFILENAME));
            cr.setFileName(doc.get(Values.FILENAME));
            cr.setLanguageName(doc.get(Values.LANGUAGENAME));
            cr.setMd5hash(doc.get(Values.MD5HASH));
            cr.setCodeLines(doc.get(Values.CODELINES));
            cr.setDocumentId(hits[i].doc);
            cr.setRepoLocation(doc.get(Values.REPOLOCATION));
            cr.setRepoName(doc.get(Values.REPONAME));
            cr.setCodeOwner(doc.get(Values.CODEOWNER));
            cr.setCodeId(doc.get(Values.CODEID));

            codeResults.add(cr);
        } else {
            LOGGER.warning((i + 1) + ". " + "No path for this document");
        }
    }

    List<CodeFacetLanguage> codeFacetLanguages = this.getLanguageFacetResults(searcher, reader, query);
    List<CodeFacetRepo> repoFacetLanguages = this.getRepoFacetResults(searcher, reader, query);
    List<CodeFacetOwner> repoFacetOwner = this.getOwnerFacetResults(searcher, reader, query);

    return new SearchResult(numTotalHits, page, query.toString(), codeResults, pages, codeFacetLanguages,
            repoFacetLanguages, repoFacetOwner);
}

From source file:com.searchcode.app.service.TimeCodeSearcher.java

/**
 * Attempts to find a unique file given the repository name and the path/filename however
 * it seems to randomly not find things for some files. No idea of the root cause at this point and have implemented
 * a work around where we get the file by getById which is no ideal. The bug appears to be due to some issue
 * inside lucene itself as using raw queries to pull back the file results in no matches, and yet it does appear
 * when not limiting to the repo//from  w w w .j  av a  2 s.com
 * TODO investigate the lucene issue that occurs here mentioned above
 * TODO needs to use the revision number here as well to get the right value
 */
public CodeResult getByRepoFileName(String repo, String fileName) {
    CodeResult codeResult = null;

    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
        IndexSearcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new CodeAnalyzer();
        QueryParser parser = new QueryParser(CODEFIELD, analyzer);

        // TODO I have a feeling this may not be unique if there are to files in the same directory with different case... something to investigate
        Query query = parser
                .parse(Values.FILELOCATIONFILENAME + ":" + QueryParser.escape(repo + "/" + fileName));
        Singleton.getLogger().info("Query to get by filename = " + Values.FILELOCATIONFILENAME + ":"
                + QueryParser.escape(repo + "/" + fileName));

        TopDocs results = searcher.search(query, 1);
        ScoreDoc[] hits = results.scoreDocs;

        if (hits.length != 0) {
            Document doc = searcher.doc(hits[0].doc);

            String filepath = doc.get(Values.PATH);

            List<String> code = new ArrayList<>();
            try {
                code = Files.readAllLines(Paths.get(filepath), StandardCharsets.UTF_8);
                code = Singleton.getHelpers()
                        .readFileLines(filepath,
                                Singleton.getHelpers().tryParseInt(
                                        Properties.getProperties().getProperty(Values.MAXFILELINEDEPTH,
                                                Values.DEFAULTMAXFILELINEDEPTH),
                                        Values.DEFAULTMAXFILELINEDEPTH));
            } catch (Exception ex) {
                Singleton.getLogger().info("Indexed file appears to binary: " + filepath);
            }

            codeResult = new CodeResult(code, null);
            codeResult.setCodePath(doc.get(Values.FILELOCATIONFILENAME));
            codeResult.setFileName(doc.get(Values.FILENAME));
            codeResult.setLanguageName(doc.get(Values.LANGUAGENAME));
            codeResult.setMd5hash(doc.get(Values.MD5HASH));
            codeResult.setCodeLines(doc.get(Values.CODELINES));
            codeResult.setDocumentId(hits[0].doc);
            codeResult.setRepoName(doc.get(Values.REPONAME));
            codeResult.setRepoLocation(doc.get(Values.REPOLOCATION));
            codeResult.setCodeOwner(doc.get(Values.CODEOWNER));
        }

        reader.close();

    } catch (Exception ex) {
        LOGGER.severe(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
    }

    return codeResult;
}

From source file:com.searchcode.app.service.TimeCodeSearcher.java

public List<String> getRepoDocuments(String repoName) {
    List<String> fileLocations = new ArrayList<>();
    try {//from   ww w . ja  v  a 2  s  .  c o m
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
        IndexSearcher searcher = new IndexSearcher(reader);

        Analyzer analyzer = new CodeAnalyzer();
        QueryParser parser = new QueryParser(CODEFIELD, analyzer);
        Query query = parser.parse(Values.REPONAME + ":" + repoName);

        TopDocs results = searcher.search(query, Integer.MAX_VALUE);
        ScoreDoc[] hits = results.scoreDocs;

        for (int i = 0; i < hits.length; i++) {
            Document doc = searcher.doc(hits[i].doc);
            fileLocations.add(doc.get(Values.FILELOCATIONFILENAME));
        }

        reader.close();
    } catch (Exception ex) {
        LOGGER.severe(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
    }

    return fileLocations;
}

From source file:com.searchcode.app.service.TimeCodeSearcher.java

/**
 * Only really used internally but does the heavy lifting of actually converting the index document on disk to the
 * format used internally including reading the file from disk.
 *//* w w  w  .  ja  va  2  s.  co  m*/
public SearchResult doPagingSearch(IndexReader reader, IndexSearcher searcher, Query query, int page)
        throws IOException {
    TopDocs results = searcher.search(query, 20 * this.PAGELIMIT); // 20 pages worth of documents
    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    int start = this.PAGELIMIT * page;
    int end = Math.min(numTotalHits, (this.PAGELIMIT * (page + 1)));
    int noPages = numTotalHits / this.PAGELIMIT;

    if (noPages > 20) {
        noPages = 20;
    }

    List<Integer> pages = new ArrayList<>();
    for (int i = 0; i < noPages; i++) {
        pages.add(i);
    }

    List<CodeResult> codeResults = new ArrayList<>();

    for (int i = start; i < end; i++) {
        Document doc = searcher.doc(hits[i].doc);

        String filepath = doc.get(Values.PATH);

        if (filepath != null) {
            // This line is occasionally useful for debugging ranking, but not useful enough to have as log info
            //System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);

            CodeResult cr = new CodeResult(null, null);
            cr.setCodePath(doc.get(Values.FILELOCATIONFILENAME));
            cr.setFileName(doc.get(Values.FILENAME));
            cr.setLanguageName(doc.get(Values.LANGUAGENAME));
            cr.setMd5hash(doc.get(Values.MD5HASH));
            cr.setCodeLines(doc.get(Values.CODELINES));
            cr.setDocumentId(hits[i].doc);
            cr.setRepoLocation(doc.get(Values.REPOLOCATION));
            cr.setRepoName(doc.get(Values.REPONAME));
            cr.setCodeOwner(doc.get(Values.CODEOWNER));
            cr.setRevision(doc.get(Values.REVISION));
            cr.setYearMonthDay(doc.get(Values.DATEYEARMONTHDAY));
            cr.setMessage(doc.get(Values.MESSAGE));
            cr.setDeleted(doc.get(Values.DELETED));

            try {
                // This should probably be limited by however deep we are meant to look into the file
                // or the value we use here whichever is less
                String repoLoc = "./repo/" + cr.getRepoName() + "/.git";
                cr.setCode(Arrays.asList(gitService
                        .fetchFileRevision(repoLoc, cr.getRevision(), cr.getCodePath()).split("\\r?\\n")));
            } catch (Exception ex) {
                LOGGER.warning("Indexed file appears to binary or missing: " + filepath);
            }

            codeResults.add(cr);
        } else {
            LOGGER.warning((i + 1) + ". " + "No path for this document");
        }
    }

    List<CodeFacetLanguage> codeFacetLanguages = this.getLanguageFacetResults(searcher, reader, query);
    List<CodeFacetRepo> repoFacetLanguages = this.getRepoFacetResults(searcher, reader, query);
    List<CodeFacetOwner> repoFacetOwner = this.getOwnerFacetResults(searcher, reader, query);
    List<CodeFacetYearMonthDay> repoFacetYearMonthDay = this.getYearMonthDayFacetResults(searcher, reader,
            query);
    List<CodeFacetYearMonth> repoFacetYearMonth = this.getYearMonthFacetResults(searcher, reader, query);
    List<CodeFacetYear> repoFacetYear = this.getYearFacetResults(searcher, reader, query);
    List<CodeFacetRevision> repoFacetRevision = this.getRevisionFacetResults(searcher, reader, query);
    List<CodeFacetDeleted> repoFacetDeleted = this.getDeletedFacetResults(searcher, reader, query);

    SearchResult searchResult = new SearchResult(numTotalHits, page, query.toString(), codeResults, pages,
            codeFacetLanguages, repoFacetLanguages, repoFacetOwner);

    searchResult.setRepoFacetYearMonthDay(repoFacetYearMonthDay);
    searchResult.setRepoFacetYearMonth(repoFacetYearMonth);
    searchResult.setRepoFacetYear(repoFacetYear);
    searchResult.setRepoFacetRevision(repoFacetRevision);
    searchResult.setRepoFacetDeleted(repoFacetDeleted);

    return searchResult;
}