List of usage examples for org.apache.lucene.search.highlight QueryTermExtractor getTerms
public static final WeightedTerm[] getTerms(Query query)
From source file:org.conifer.MyBean.java
License:Apache License
public static String sortOutTerms(String queryString, String resultString, String tag, String strLen) throws org.apache.lucene.queryParser.ParseException { String startTag = "<" + tag + ">"; String endTag = "</" + tag + ">"; StringBuffer termBuf = new StringBuffer(); int addToLen = startTag.length() + endTag.length(); int resultLen = Integer.parseInt(strLen); String displayString = resultString; QueryParser luceneParser = new QueryParser("text", new StandardAnalyzer()); QueryTermExtractor luceneTerms = new QueryTermExtractor(); Query luceneQuery = null;//from www .j av a 2s . c o m Query query = null; WeightedTerm[] weightTerms = null; luceneQuery = luceneParser.parse(queryString); weightTerms = luceneTerms.getTerms(luceneQuery); for (int i = 0; i < weightTerms.length; i++) { String term = weightTerms[i].getTerm(); Pattern replace = Pattern.compile(term, Pattern.CASE_INSENSITIVE); Matcher matcher = replace.matcher(displayString); while (matcher.find()) { //displayString = matcher.replaceAll(startTag + term + endTag); if (termBuf.toString().indexOf(matcher.group(0)) == -1) { displayString = displayString.replaceAll(matcher.group(0), startTag + matcher.group(0) + endTag); resultLen += addToLen; } //if termBuf.append(matcher.group(0)); termBuf.append(","); } //while } //for int hlStart = displayString.indexOf(startTag); //System.out.println("hlStart: " + hlStart); if (hlStart > resultLen) { String tmpString = displayString.substring(0, hlStart); hlStart = tmpString.lastIndexOf(" "); displayString = "..." + displayString.substring(++hlStart); resultLen += 3; } //if if (displayString.length() > resultLen) { displayString = displayString.substring(0, resultLen); hlStart = displayString.lastIndexOf(startTag); int hlEnd = displayString.lastIndexOf(endTag); if (hlEnd < hlStart) displayString = displayString.substring(0, --hlStart); displayString = displayString + "..."; } //if return displayString; }
From source file:org.sd.text.lucene.SdQuery.java
License:Open Source License
public SdQuery(String queryString, QueryParser queryParser) { this.queryString = queryString; try {/* ww w.ja v a 2 s.c om*/ this.query = queryParser.parse(queryString); //todo: populate fieldNames. Currently, these are left null such that // if we need them we'll get a NullPointerException which will // alert us that it is time to implement this method! // It turns out that lucene handles the functionality we need out of the box! // The search.highlight package handles term extraction, including exclustion of prohibited terms //final WeightedTerm[] queryTerms = QueryTermExtractor.getTerms(this.query.rewrite(new IndexReader())); // todo: implement the rewrite() call to incorporate an index reader final WeightedTerm[] queryTerms = QueryTermExtractor.getTerms(this.query); this.queryTerms = new String[queryTerms.length]; for (int i = 0; i < queryTerms.length; i++) { this.queryTerms[i] = queryTerms[i].getTerm(); } } catch (ParseException e) { throw new IllegalArgumentException(e); } }
From source file:retriever.TermStats.java
TopDocs rerankTopDocsByWordVecSim(Query query, TopDocs topDocs) throws Exception { // Compute doc-query vector based similarities WeightedTerm[] qterms = QueryTermExtractor.getTerms(query); DocVecSimilarity dvecSim = new DocVecSimilarity(prop, frIndexReader, topDocs, qterms, textSimWt); ScoreDoc[] wvecScoreDocs = dvecSim.computeSims(); // Combine the similarity scores of the wvecs and the text ScoreDoc[] combinedScoreDocs = combineSimilarities(topDocs.scoreDocs, wvecScoreDocs); TopDocs rerankedTopDocs = new TopDocs(topDocs.scoreDocs.length, combinedScoreDocs, combinedScoreDocs[0].score); return rerankedTopDocs; }