Example usage for org.apache.lucene.search.highlight TokenSources getTokenStream

List of usage examples for org.apache.lucene.search.highlight TokenSources getTokenStream

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight TokenSources getTokenStream.

Prototype

@Deprecated 
    public static TokenStream getTokenStream(String field, String contents, Analyzer analyzer) 

Source Link

Usage

From source file:com.gauronit.tagmata.core.Indexer.java

License:Open Source License

public ArrayList<CardSnapshot> search(String searchText, ArrayList<String> indexNames, boolean searchInTitle,
        boolean searchInTags, boolean searchInText, boolean superFuzzy) {
    ArrayList<CardSnapshot> cardSnaps = new ArrayList();
    try {// ww  w. jav  a2 s . c o  m
        ArrayList<IndexSearcher> searchers = new ArrayList<IndexSearcher>();

        for (String indexName : indexNames) {
            IndexReader reader = IndexReader
                    .open(FSDirectory.open(new File(indexDir + File.separator + indexName),
                            new SimpleFSLockFactory(indexDir + File.separator + indexName)));
            IndexSearcher searcher = new IndexSearcher(reader);
            searchers.add(searcher);
        }

        BooleanQuery query = new BooleanQuery();
        if (searchInTitle) {
            IndexerUtil.getTokenizedQuery(query, "title", searchText, superFuzzy);
        }
        if (searchInTags) {
            IndexerUtil.getTokenizedQuery(query, "tags", searchText, superFuzzy);
        }
        if (searchInText) {
            IndexerUtil.getTokenizedQuery(query, "text", searchText, superFuzzy);
            IndexerUtil.getTokenizedQuery(query, "analyzedText", searchText, superFuzzy);
        }

        for (IndexSearcher searcher : searchers) {
            TopScoreDocCollector collector = TopScoreDocCollector.create(10000, false);
            searcher.search(query, collector);
            ScoreDoc[] hits = collector.topDocs().scoreDocs;

            for (ScoreDoc hit : hits) {
                Document doc = searcher.doc(hit.doc);

                TokenStream stream = TokenSources.getTokenStream("text", doc.get("analyzedText"),
                        new StandardAnalyzer(Version.LUCENE_20.LUCENE_35));
                QueryScorer scorer = new QueryScorer(query, "analyzedText");
                Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 20);
                Highlighter highlighter = new Highlighter(scorer);
                highlighter.setTextFragmenter(fragmenter);
                String[] fragments = highlighter.getBestFragments(stream, doc.get("text"), 5);
                String highlights = "";

                for (String fragment : fragments) {
                    highlights += fragment + "...";
                }

                if (highlights.equals("")) {
                    String text = doc.get("text");
                    if (text.length() > 100) {
                        highlights += doc.get("text").substring(0, 100);
                    } else {
                        highlights += doc.get("text");
                    }
                }

                cardSnaps.add(new CardSnapshot(highlights, doc));
            }
            searcher.getIndexReader().close();
            searcher.close();
            searcher = null;
        }

    } catch (Exception ex) {
        ex.printStackTrace();
    }
    return cardSnaps;
}

From source file:com.pongasoft.kiwidoc.index.impl.keyword.impl.KeywordIndexImpl.java

License:Apache License

/**
 * Highlights the provided results obtained using the provided query.
 *
 * @param query  the query from which the results were computed
 * @param models the models to highlight
 * @return a map representing for each entry in the model its associated resource and highlight
 * @throws MalformedQueryException if the query cannot be parsed
 * @throws InternalException if there is an internal problem
 *///from  w w  w . j  a v a2  s  . c  om
public <R extends Resource> Map<R, String[]> highlightResults(KeywordQuery query, Collection<Model<R>> models)
        throws InternalException, MalformedQueryException {
    Map<R, String[]> res = new LinkedHashMap<R, String[]>();

    Query parsedQuery = parseQuery(query);

    if (parsedQuery != null) {
        Highlighter highlighter = new Highlighter(_highlighterFormatter, HTML_ENCODER,
                new QueryScorer(parsedQuery));

        for (Model<R> model : models) {
            Document document = new Document();
            String bodyText = buildBody(model);
            document.add(new Field(DocumentFactory.BODY_FIELD, bodyText, Field.Store.NO, Field.Index.ANALYZED));
            TokenStream tokenStream = TokenSources.getTokenStream(document, DocumentFactory.BODY_FIELD,
                    _analyzer);
            try {
                res.put(model.getResource(), highlighter.getBestFragments(tokenStream, bodyText, 2));
            } catch (IOException e) {
                log.warn("exception while computing highlight... [ignored]", e);
            }
        }
    }

    return res;
}

From source file:de.spartusch.nasfvi.server.NSearcher.java

License:Apache License

/**
 * Extracts a field's values from a document. This method is aware of
 * <i>collapsed</i> or <i>merged</i> fields and handles them properly. 
 * @param nquery NQuery used for searching
 * @param doc Document to extract the field's values from
 * @param field Name of the field to extract values for
 * @return Set of extracted values/*  w  ww .  j av a2s. c o  m*/
 */
private Set<String> extractValues(final NQuery nquery, final Document doc, final String field) {
    Set<String> values = new HashSet<String>();

    if (NQuery.isFieldToCollapse(field)) {
        // process merged field
        String mfield = NQuery.getMergedField();
        QueryScorer scorer = new QueryScorer(nquery.getQuery(), mfield);
        Highlighter highlighter = new Highlighter(scorer);
        highlighter.setTextFragmenter(new NullFragmenter());

        try {
            Set<String> buffer = new HashSet<String>();

            for (Fieldable f : doc.getFieldables(mfield)) {
                String content = f.stringValue();
                String value = normalizeValue(NQuery.extractValue(field, content));

                // Test if the field was matched by the query
                TokenStream ts = TokenSources.getTokenStream(mfield, content, nquery.getAnalyzer());
                if (highlighter.getBestFragment(ts, content) != null) {
                    values.add(value);
                } else {
                    // Buffer the value - in case no field matches
                    buffer.add(value);
                }
            }

            if (values.isEmpty()) {
                // No field was matched by the query
                values.addAll(buffer);
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        } catch (InvalidTokenOffsetsException e) {
            throw new RuntimeException(e);
        }
    } else {
        for (String v : doc.getValues(field)) {
            values.add(normalizeValue(v));
        }
    }

    return values;
}

From source file:drakkar.mast.retrieval.LuceneContext.java

/**
 * Para la sumarizacin/*  w  ww .jav a 2s  .  c o m*/
 *
 * @return
 */
private String getHighlighter(Query q, Analyzer a, String text, String field) {

    String summary = null;

    this.hg = new Highlighter(new QueryTermScorer(q));
    this.hg.setTextFragmenter(new SimpleFragmenter(20));
    this.hg.setMaxDocCharsToAnalyze(600);

    try {
        try {
            this.tokens = TokenSources.getTokenStream(field, text, a);
            summary = this.hg.getBestFragments(this.tokens, text, 20, "...");
            // summary = this.hg.getBestFragments(this.tokens, text, 10).toString();
        } catch (IOException ex) {
            OutputMonitor.printStream("IO", ex);
        }
    } catch (InvalidTokenOffsetsException ex) {
        OutputMonitor.printStream("", ex);
    }

    if (summary == null) {
        summary = " ";
    }
    return summary;
}

From source file:lucandra.LucandraTests.java

License:Apache License

public void testHighlight() throws Exception {

    // This tests the TermPositionVector classes

    IndexReader indexReader = new IndexReader(indexName, client);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "key", analyzer);

    // check exact
    Query q = qp.parse("+key:\"foobar foobar\"");
    TopDocs docs = searcher.search(q, 10);
    assertEquals(1, docs.totalHits);/*from  w  w  w  . jav a2 s  . c o m*/

    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
    QueryScorer scorer = new QueryScorer(q, "key", text);
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));

    TokenStream tvStream = TokenSources.getTokenStream(indexReader, docs.scoreDocs[0].doc, "key");

    String rv = highlighter.getBestFragment(tvStream, text);

    assertNotNull(rv);
    assertEquals(rv, highlightedText);
}

From source file:net.chwise.documents.HighlightedFragmentsRetriever.java

License:Open Source License

public String[] getFragmentsWithHighlightedTerms(Analyzer analyzer, Query query, String fieldName,
        String fieldContents, int fragmentNumber, int fragmentSize)
        throws IOException, InvalidTokenOffsetsException {

    TokenStream stream = TokenSources.getTokenStream(fieldName, fieldContents, analyzer);
    QueryScorer scorer = new QueryScorer(query, fieldName);
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentSize);

    Highlighter highlighter = new Highlighter(scorer);
    highlighter.setTextFragmenter(fragmenter);
    highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE);

    String[] fragments = highlighter.getBestFragments(stream, fieldContents, fragmentNumber);

    if (fragments.length == 0) {
        //Return starting piece of fieldContents fragment
        fragments = new String[1];
        fragments[0] = fieldContents.substring(0, Math.min(fragmentSize, fieldContents.length()));
    }/*from   w  ww. j  ava2 s . c  om*/

    return fragments;
}