Example usage for org.apache.lucene.search.highlight QueryScorer getTokenScore

List of usage examples for org.apache.lucene.search.highlight QueryScorer getTokenScore

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight QueryScorer getTokenScore.

Prototype

@Override
    public float getTokenScore() 

Source Link

Usage

From source file:org.apache.uima.lucas.ProspectiveSearchAE.java

License:Apache License

@Override
public void process(CAS aCAS) throws AnalysisEngineProcessException {

    // First create the index of the document text
    MemoryIndex index = new MemoryIndex();

    List fields = createDocument(aCAS).getFields();

    for (Iterator it = fields.iterator(); it.hasNext();) {
        Field field = (Field) it.next();

        if (field.isIndexed() && field.tokenStreamValue() != null) {
            index.addField(field.name(), field.tokenStreamValue());
        }/*from   ww w  .j  a v a2  s  . c  o m*/
    }

    // Search all queries against the one document index
    for (SearchQuery query : searchQueryProvider.getSearchQueries(aCAS)) {

        float score = index.search(query.query());

        if (score > matchingThreshold) {

            // Add a FS to the CAS with the search result
            FeatureStructure searchResult = aCAS.createFS(searchResultType);
            searchResult.setLongValue(searchResultIdFeature, query.id());
            aCAS.addFsToIndexes(searchResult);

            // Find matching tokens and link their annotations
            // in case the user wants search term highlighting
            if (searchResultMatchingTextFeature != null) {

                fields = createDocument(aCAS).getFields();

                for (Iterator it = fields.iterator(); it.hasNext();) {

                    Field field = (Field) it.next();

                    if (field.isIndexed() && field.tokenStreamValue() != null) {

                        TokenStream tokenStream = field.tokenStreamValue();

                        Collection<AnnotationFS> matchingTextAnnotations = new LinkedList<AnnotationFS>();

                        QueryScorer scorer = new QueryScorer(query.query(), field.name());
                        scorer.startFragment(new TextFragment(new StringBuffer(aCAS.getDocumentText()), 0, 0));

                        try {
                            scorer.init(tokenStream);

                            OffsetAttribute offsetAttr = null;
                            while (tokenStream.incrementToken()) {
                                offsetAttr = (OffsetAttribute) tokenStream.getAttribute(OffsetAttribute.class);
                                float tokenScore = scorer.getTokenScore();
                                if (tokenScore > 0) {
                                    AnnotationFS annotation = aCAS.createAnnotation(matchingTextType,
                                            offsetAttr.startOffset(), offsetAttr.endOffset());

                                    matchingTextAnnotations.add(annotation);
                                }
                            }
                        } catch (IOException e) {
                            throw new AnalysisEngineProcessException(e);
                        }

                        ArrayFS matchtingTextArray = aCAS.createArrayFS(matchingTextAnnotations.size());

                        int matchtingTextArrayIndex = 0;
                        for (AnnotationFS matchingTextAnnotation : matchingTextAnnotations) {
                            matchtingTextArray.set(matchtingTextArrayIndex++, matchingTextAnnotation);
                        }

                        searchResult.setFeatureValue(searchResultMatchingTextFeature, matchtingTextArray);
                    }
                }
            }
        }
    }
}

From source file:org.eclipse.che.api.search.server.impl.LuceneSearcher.java

License:Open Source License

@Override
public SearchResult search(QueryExpression query) throws InvalidQueryException, QueryExecutionException {
    IndexSearcher luceneSearcher = null;
    try {/* www  .  java2 s  . c o m*/
        final long startTime = System.currentTimeMillis();
        searcherManager.maybeRefresh();
        luceneSearcher = searcherManager.acquire();

        Query luceneQuery = createLuceneQuery(query);

        ScoreDoc after = null;
        final int numSkipDocs = Math.max(0, query.getSkipCount());
        if (numSkipDocs > 0) {
            after = skipScoreDocs(luceneSearcher, luceneQuery, numSkipDocs);
        }

        final int numDocs = query.getMaxItems() > 0 ? Math.min(query.getMaxItems(), RESULT_LIMIT)
                : RESULT_LIMIT;
        TopDocs topDocs = luceneSearcher.searchAfter(after, luceneQuery, numDocs, sort, true, true);
        final long totalHitsNum = topDocs.totalHits;

        List<SearchResultEntry> results = newArrayList();
        List<OffsetData> offsetData = Collections.emptyList();
        for (int i = 0; i < topDocs.scoreDocs.length; i++) {
            ScoreDoc scoreDoc = topDocs.scoreDocs[i];
            int docId = scoreDoc.doc;
            Document doc = luceneSearcher.doc(docId);
            if (query.isIncludePositions()) {
                offsetData = new ArrayList<>();
                String txt = doc.get(TEXT_FIELD);
                if (txt != null) {
                    IndexReader reader = luceneSearcher.getIndexReader();

                    TokenStream tokenStream = TokenSources.getTokenStream(TEXT_FIELD,
                            reader.getTermVectors(docId), txt, luceneIndexWriter.getAnalyzer(), -1);

                    CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
                    OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);

                    QueryScorer queryScorer = new QueryScorer(luceneQuery);
                    // TODO think about this constant
                    queryScorer.setMaxDocCharsToAnalyze(1_000_000);
                    TokenStream newStream = queryScorer.init(tokenStream);
                    if (newStream != null) {
                        tokenStream = newStream;
                    }
                    queryScorer.startFragment(null);

                    tokenStream.reset();

                    int startOffset, endOffset;
                    // TODO think about this constant
                    for (boolean next = tokenStream.incrementToken(); next
                            && (offsetAtt.startOffset() < 1_000_000); next = tokenStream.incrementToken()) {
                        startOffset = offsetAtt.startOffset();
                        endOffset = offsetAtt.endOffset();

                        if ((endOffset > txt.length()) || (startOffset > txt.length())) {
                            throw new QueryExecutionException("Token " + termAtt.toString()
                                    + " exceeds length of provided text size " + txt.length());
                        }

                        float res = queryScorer.getTokenScore();
                        if (res > 0.0F && startOffset <= endOffset) {
                            String tokenText = txt.substring(startOffset, endOffset);
                            Scanner sc = new Scanner(txt);
                            int lineNum = 1;
                            long len = 0;
                            String foundLine = "";
                            while (sc.hasNextLine()) {
                                foundLine = sc.nextLine();

                                len += foundLine.length();
                                if (len > startOffset) {
                                    break;
                                }
                                lineNum++;
                            }
                            offsetData.add(
                                    new OffsetData(tokenText, startOffset, endOffset, res, lineNum, foundLine));
                        }
                    }
                }
            }

            String filePath = doc.getField(PATH_FIELD).stringValue();
            LOG.debug("Doc {} path {} score {} ", docId, filePath, scoreDoc.score);
            results.add(new SearchResultEntry(filePath, offsetData));
        }

        final long elapsedTimeMillis = System.currentTimeMillis() - startTime;

        boolean hasMoreToRetrieve = numSkipDocs + topDocs.scoreDocs.length + 1 < totalHitsNum;
        QueryExpression nextPageQueryExpression = null;
        if (hasMoreToRetrieve) {
            nextPageQueryExpression = createNextPageQuery(query, numSkipDocs + topDocs.scoreDocs.length);
        }

        return SearchResult.aSearchResult().withResults(results).withTotalHits(totalHitsNum)
                .withNextPageQueryExpression(nextPageQueryExpression).withElapsedTimeMillis(elapsedTimeMillis)
                .build();
    } catch (ParseException e) {
        throw new InvalidQueryException(e.getMessage(), e);
    } catch (IOException e) {
        throw new QueryExecutionException(e.getMessage(), e);
    } finally {
        try {
            searcherManager.release(luceneSearcher);
        } catch (IOException e) {
            LOG.error(e.getMessage());
        }
    }
}