Example usage for org.apache.lucene.search.highlight QueryTermExtractor getTerms

List of usage examples for org.apache.lucene.search.highlight QueryTermExtractor getTerms

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight QueryTermExtractor getTerms.

Prototype

public static final WeightedTerm[] getTerms(Query query) 

Source Link

Document

Extracts all terms texts of a given Query into an array of WeightedTerms

Usage

From source file:org.conifer.MyBean.java

License:Apache License

public static String sortOutTerms(String queryString, String resultString, String tag, String strLen)
        throws org.apache.lucene.queryParser.ParseException {
    String startTag = "<" + tag + ">";
    String endTag = "</" + tag + ">";
    StringBuffer termBuf = new StringBuffer();
    int addToLen = startTag.length() + endTag.length();

    int resultLen = Integer.parseInt(strLen);
    String displayString = resultString;

    QueryParser luceneParser = new QueryParser("text", new StandardAnalyzer());
    QueryTermExtractor luceneTerms = new QueryTermExtractor();
    Query luceneQuery = null;//from  www .j  av a 2s  .  c o m
    Query query = null;
    WeightedTerm[] weightTerms = null;
    luceneQuery = luceneParser.parse(queryString);
    weightTerms = luceneTerms.getTerms(luceneQuery);

    for (int i = 0; i < weightTerms.length; i++) {
        String term = weightTerms[i].getTerm();
        Pattern replace = Pattern.compile(term, Pattern.CASE_INSENSITIVE);
        Matcher matcher = replace.matcher(displayString);
        while (matcher.find()) {
            //displayString = matcher.replaceAll(startTag + term + endTag);
            if (termBuf.toString().indexOf(matcher.group(0)) == -1) {
                displayString = displayString.replaceAll(matcher.group(0),
                        startTag + matcher.group(0) + endTag);
                resultLen += addToLen;
            } //if
            termBuf.append(matcher.group(0));
            termBuf.append(",");
        } //while
    } //for

    int hlStart = displayString.indexOf(startTag);
    //System.out.println("hlStart: " + hlStart);

    if (hlStart > resultLen) {
        String tmpString = displayString.substring(0, hlStart);
        hlStart = tmpString.lastIndexOf(" ");
        displayString = "..." + displayString.substring(++hlStart);
        resultLen += 3;
    } //if

    if (displayString.length() > resultLen) {
        displayString = displayString.substring(0, resultLen);
        hlStart = displayString.lastIndexOf(startTag);
        int hlEnd = displayString.lastIndexOf(endTag);
        if (hlEnd < hlStart)
            displayString = displayString.substring(0, --hlStart);
        displayString = displayString + "...";
    } //if

    return displayString;
}

From source file:org.sd.text.lucene.SdQuery.java

License:Open Source License

public SdQuery(String queryString, QueryParser queryParser) {
    this.queryString = queryString;
    try {/*  ww  w.ja v  a 2  s.c  om*/
        this.query = queryParser.parse(queryString);

        //todo: populate fieldNames. Currently, these are left null such that
        //      if we need them we'll get a NullPointerException which will
        //      alert us that it is time to implement this method!

        // It turns out that lucene handles the functionality we need out of the box!
        // The search.highlight package handles term extraction, including exclustion of prohibited terms
        //final WeightedTerm[] queryTerms = QueryTermExtractor.getTerms(this.query.rewrite(new IndexReader()));
        // todo: implement the rewrite() call to incorporate an index reader
        final WeightedTerm[] queryTerms = QueryTermExtractor.getTerms(this.query);
        this.queryTerms = new String[queryTerms.length];
        for (int i = 0; i < queryTerms.length; i++) {
            this.queryTerms[i] = queryTerms[i].getTerm();
        }
    } catch (ParseException e) {
        throw new IllegalArgumentException(e);
    }
}

From source file:retriever.TermStats.java

TopDocs rerankTopDocsByWordVecSim(Query query, TopDocs topDocs) throws Exception {
    // Compute doc-query vector based similarities
    WeightedTerm[] qterms = QueryTermExtractor.getTerms(query);
    DocVecSimilarity dvecSim = new DocVecSimilarity(prop, frIndexReader, topDocs, qterms, textSimWt);
    ScoreDoc[] wvecScoreDocs = dvecSim.computeSims();

    // Combine the similarity scores of the wvecs and the text
    ScoreDoc[] combinedScoreDocs = combineSimilarities(topDocs.scoreDocs, wvecScoreDocs);

    TopDocs rerankedTopDocs = new TopDocs(topDocs.scoreDocs.length, combinedScoreDocs,
            combinedScoreDocs[0].score);

    return rerankedTopDocs;
}