List of usage examples for org.apache.lucene.search.highlight WeightedTerm WeightedTerm
public WeightedTerm(float weight, String term)
From source file:com.liferay.portal.search.lucene.highlight.QueryTermExtractor.java
License:Open Source License
public static WeightedTerm[] getTerms(Query query, boolean prohibited, String fieldName) { if (query == null) { return _emptyWeightedTermArray; }/* ww w. j a va2s. c o m*/ Set<WeightedTerm> weightedTerms = new HashSet<WeightedTerm>(); Set<Term> terms = new HashSet<Term>(); LinkedList<Query> queries = new LinkedList<Query>(); Query lastQuery = query; while (lastQuery != null) { if (lastQuery instanceof BooleanQuery) { BooleanQuery booleanQuery = (BooleanQuery) lastQuery; BooleanClause[] booleanClauses = booleanQuery.getClauses(); for (BooleanClause booleanClause : booleanClauses) { if (prohibited || (booleanClause.getOccur() != BooleanClause.Occur.MUST_NOT)) { Query booleanClauseQuery = booleanClause.getQuery(); if (booleanClauseQuery != null) { queries.addFirst(booleanClauseQuery); } } } lastQuery = queries.poll(); } else if (lastQuery instanceof FilteredQuery) { FilteredQuery filteredQuery = (FilteredQuery) lastQuery; lastQuery = filteredQuery.getQuery(); if (lastQuery == null) { lastQuery = queries.poll(); } } else { Class<? extends Query> queryClass = lastQuery.getClass(); if (!_queryClasses.contains(queryClass)) { try { lastQuery.extractTerms(terms); for (Term term : terms) { if ((fieldName == null) || fieldName.equals(term.field())) { WeightedTerm weightedTerm = new WeightedTerm(query.getBoost(), term.text()); weightedTerms.add(weightedTerm); } } terms.clear(); } catch (UnsupportedOperationException uoe) { _queryClasses.addIfAbsent(queryClass); } } lastQuery = queries.poll(); } } return weightedTerms.toArray(new WeightedTerm[weightedTerms.size()]); }
From source file:org.apache.nutch.summary.lucene.LuceneSummarizer.java
License:Apache License
public Summary getSummary(String text, Query query) { String[] terms = query.getTerms(); WeightedTerm[] weighted = new WeightedTerm[terms.length]; for (int i = 0; i < terms.length; i++) { weighted[i] = new WeightedTerm(1.0f, terms[i]); }/*w w w .j a va 2s . c o m*/ Highlighter highlighter = new Highlighter(FORMATTER, new QueryScorer(weighted)); TokenStream tokens = analyzer.tokenStream("content", new StringReader(text)); Summary summary = new Summary(); try { // TODO : The max number of fragments (3) should be configurable String[] result = highlighter.getBestFragments(tokens, text, 3); for (int i = 0; i < result.length; i++) { String[] parts = result[i].split(SEPARATOR); boolean highlight = false; for (int j = 0; j < parts.length; j++) { if (highlight) { summary.add(new Highlight(parts[j])); } else { summary.add(new Fragment(parts[j])); } highlight = !highlight; } summary.add(new Ellipsis()); } /* TODO MC BUG resolved 0000029 - if query terms do not occur on text, an empty summary is returned. Now it sends the first tokens. */ if (result == null || result.length == 0) { tokens = analyzer.tokenStream("content", new StringReader(text)); Token firstToken = null, lastToken = null; Token token = null; int maxLen = 100; // the same as defined in SimpleFragmenter but it is private /* ArrayList<Token> titleTokens=new ArrayList<Token>(); ArrayList<Token> textTokens=new ArrayList<Token>(); boolean titleMatched=false; boolean hasMatched=false; // exit match after match title the first time // remove title from text. compares pairs of text while ((titleMatched || !hasMatched) && (token=tokens.next())!=null) { if (token.type().equals("<WORD>")) { if (titleTokens.size()==0) { titleTokens.add(token); } else if (textTokens.size()<titleTokens.size()) { textTokens.add(token); } if (textTokens.size()==titleTokens.size()) { // compare titleMatched=true; for (int i=0;i<textTokens.size() && titleMatched;i++) { if (!textTokens.get(i).termText().equals(titleTokens.get(i).termText())) { titleMatched=false; } } if (titleMatched) { // try to match a larger pattern titleTokens.add(textTokens.get(0)); textTokens.remove(0); hasMatched=true; } else { // remove rest of title from text if (hasMatched) { firstToken=textTokens.get(titleTokens.size()-2); } else { // add one more token to title titleTokens.add(textTokens.get(0)); textTokens.remove(0); } } } } } if (textTokens.size()==0) { return summary; } for (int i=0;i<textTokens.size() && textTokens.get(i).endOffset()-firstToken.startOffset()<maxLen;i++) { lastToken=textTokens.get(i); } */ // read tokens until maxLen while ((token = tokens.next()) != null) { if (token.type().equals("<WORD>")) { if (firstToken == null) { firstToken = token; } else if (token.endOffset() - firstToken.startOffset() < maxLen) { lastToken = token; } else { break; } } } if (lastToken == null) { lastToken = firstToken; } summary.add(new Fragment(text.substring(firstToken.startOffset(), lastToken.endOffset()))); summary.add(new Ellipsis()); } /* TODO MC */ } catch (Exception e) { // Nothing to do... } return summary; }