List of usage examples for org.apache.lucene.search.highlight TextFragment TextFragment
public TextFragment(CharSequence markedUpText, int textStartPos, int fragNum)
From source file:lux.search.highlight.XmlHighlighter.java
License:Mozilla Public License
private void init(TokenStream tokenStream) { try {// ww w . j a v a2s . co m tokenStream.reset(); scorer.setMaxDocCharsToAnalyze(maxDocCharsToAnalyze); scorerTokens = scorer.init(tokenStream); if (scorerTokens == null) { // The scorer didn't consume any tokens (it does that for PhraseQuery), // in which case we must give it the live token stream scorer.init(xmlStreamTokens); } // we score the entire document as a single fragment scorer.startFragment(new TextFragment("", 0, 0)); } catch (IOException e) { throw new LuxException(e); } }
From source file:org.apache.uima.lucas.ProspectiveSearchAE.java
License:Apache License
@Override public void process(CAS aCAS) throws AnalysisEngineProcessException { // First create the index of the document text MemoryIndex index = new MemoryIndex(); List fields = createDocument(aCAS).getFields(); for (Iterator it = fields.iterator(); it.hasNext();) { Field field = (Field) it.next(); if (field.isIndexed() && field.tokenStreamValue() != null) { index.addField(field.name(), field.tokenStreamValue()); }/*from w w w .ja v a 2 s .c o m*/ } // Search all queries against the one document index for (SearchQuery query : searchQueryProvider.getSearchQueries(aCAS)) { float score = index.search(query.query()); if (score > matchingThreshold) { // Add a FS to the CAS with the search result FeatureStructure searchResult = aCAS.createFS(searchResultType); searchResult.setLongValue(searchResultIdFeature, query.id()); aCAS.addFsToIndexes(searchResult); // Find matching tokens and link their annotations // in case the user wants search term highlighting if (searchResultMatchingTextFeature != null) { fields = createDocument(aCAS).getFields(); for (Iterator it = fields.iterator(); it.hasNext();) { Field field = (Field) it.next(); if (field.isIndexed() && field.tokenStreamValue() != null) { TokenStream tokenStream = field.tokenStreamValue(); Collection<AnnotationFS> matchingTextAnnotations = new LinkedList<AnnotationFS>(); QueryScorer scorer = new QueryScorer(query.query(), field.name()); scorer.startFragment(new TextFragment(new StringBuffer(aCAS.getDocumentText()), 0, 0)); try { scorer.init(tokenStream); OffsetAttribute offsetAttr = null; while (tokenStream.incrementToken()) { offsetAttr = (OffsetAttribute) tokenStream.getAttribute(OffsetAttribute.class); float tokenScore = scorer.getTokenScore(); if (tokenScore > 0) { AnnotationFS annotation = aCAS.createAnnotation(matchingTextType, offsetAttr.startOffset(), offsetAttr.endOffset()); matchingTextAnnotations.add(annotation); } } } catch (IOException e) { throw new AnalysisEngineProcessException(e); } ArrayFS matchtingTextArray = aCAS.createArrayFS(matchingTextAnnotations.size()); int matchtingTextArrayIndex = 0; for (AnnotationFS matchingTextAnnotation : matchingTextAnnotations) { matchtingTextArray.set(matchtingTextArrayIndex++, matchingTextAnnotation); } searchResult.setFeatureValue(searchResultMatchingTextFeature, matchtingTextArray); } } } } } }