List of usage examples for org.apache.lucene.search.highlight TokenSources getTokenStream
@Deprecated
public static TokenStream getTokenStream(String field, String contents, Analyzer analyzer)
From source file:com.gauronit.tagmata.core.Indexer.java
License:Open Source License
public ArrayList<CardSnapshot> search(String searchText, ArrayList<String> indexNames, boolean searchInTitle, boolean searchInTags, boolean searchInText, boolean superFuzzy) { ArrayList<CardSnapshot> cardSnaps = new ArrayList(); try {// ww w. jav a2 s . c o m ArrayList<IndexSearcher> searchers = new ArrayList<IndexSearcher>(); for (String indexName : indexNames) { IndexReader reader = IndexReader .open(FSDirectory.open(new File(indexDir + File.separator + indexName), new SimpleFSLockFactory(indexDir + File.separator + indexName))); IndexSearcher searcher = new IndexSearcher(reader); searchers.add(searcher); } BooleanQuery query = new BooleanQuery(); if (searchInTitle) { IndexerUtil.getTokenizedQuery(query, "title", searchText, superFuzzy); } if (searchInTags) { IndexerUtil.getTokenizedQuery(query, "tags", searchText, superFuzzy); } if (searchInText) { IndexerUtil.getTokenizedQuery(query, "text", searchText, superFuzzy); IndexerUtil.getTokenizedQuery(query, "analyzedText", searchText, superFuzzy); } for (IndexSearcher searcher : searchers) { TopScoreDocCollector collector = TopScoreDocCollector.create(10000, false); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (ScoreDoc hit : hits) { Document doc = searcher.doc(hit.doc); TokenStream stream = TokenSources.getTokenStream("text", doc.get("analyzedText"), new StandardAnalyzer(Version.LUCENE_20.LUCENE_35)); QueryScorer scorer = new QueryScorer(query, "analyzedText"); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 20); Highlighter highlighter = new Highlighter(scorer); highlighter.setTextFragmenter(fragmenter); String[] fragments = highlighter.getBestFragments(stream, doc.get("text"), 5); String highlights = ""; for (String fragment : fragments) { highlights += fragment + "..."; } if (highlights.equals("")) { String text = doc.get("text"); if (text.length() > 100) { highlights += doc.get("text").substring(0, 100); } else { highlights += doc.get("text"); } } cardSnaps.add(new CardSnapshot(highlights, doc)); } searcher.getIndexReader().close(); searcher.close(); searcher = null; } } catch (Exception ex) { ex.printStackTrace(); } return cardSnaps; }
From source file:com.pongasoft.kiwidoc.index.impl.keyword.impl.KeywordIndexImpl.java
License:Apache License
/** * Highlights the provided results obtained using the provided query. * * @param query the query from which the results were computed * @param models the models to highlight * @return a map representing for each entry in the model its associated resource and highlight * @throws MalformedQueryException if the query cannot be parsed * @throws InternalException if there is an internal problem *///from w w w . j a v a2 s . c om public <R extends Resource> Map<R, String[]> highlightResults(KeywordQuery query, Collection<Model<R>> models) throws InternalException, MalformedQueryException { Map<R, String[]> res = new LinkedHashMap<R, String[]>(); Query parsedQuery = parseQuery(query); if (parsedQuery != null) { Highlighter highlighter = new Highlighter(_highlighterFormatter, HTML_ENCODER, new QueryScorer(parsedQuery)); for (Model<R> model : models) { Document document = new Document(); String bodyText = buildBody(model); document.add(new Field(DocumentFactory.BODY_FIELD, bodyText, Field.Store.NO, Field.Index.ANALYZED)); TokenStream tokenStream = TokenSources.getTokenStream(document, DocumentFactory.BODY_FIELD, _analyzer); try { res.put(model.getResource(), highlighter.getBestFragments(tokenStream, bodyText, 2)); } catch (IOException e) { log.warn("exception while computing highlight... [ignored]", e); } } } return res; }
From source file:de.spartusch.nasfvi.server.NSearcher.java
License:Apache License
/** * Extracts a field's values from a document. This method is aware of * <i>collapsed</i> or <i>merged</i> fields and handles them properly. * @param nquery NQuery used for searching * @param doc Document to extract the field's values from * @param field Name of the field to extract values for * @return Set of extracted values/* w ww . j av a2s. c o m*/ */ private Set<String> extractValues(final NQuery nquery, final Document doc, final String field) { Set<String> values = new HashSet<String>(); if (NQuery.isFieldToCollapse(field)) { // process merged field String mfield = NQuery.getMergedField(); QueryScorer scorer = new QueryScorer(nquery.getQuery(), mfield); Highlighter highlighter = new Highlighter(scorer); highlighter.setTextFragmenter(new NullFragmenter()); try { Set<String> buffer = new HashSet<String>(); for (Fieldable f : doc.getFieldables(mfield)) { String content = f.stringValue(); String value = normalizeValue(NQuery.extractValue(field, content)); // Test if the field was matched by the query TokenStream ts = TokenSources.getTokenStream(mfield, content, nquery.getAnalyzer()); if (highlighter.getBestFragment(ts, content) != null) { values.add(value); } else { // Buffer the value - in case no field matches buffer.add(value); } } if (values.isEmpty()) { // No field was matched by the query values.addAll(buffer); } } catch (IOException e) { throw new RuntimeException(e); } catch (InvalidTokenOffsetsException e) { throw new RuntimeException(e); } } else { for (String v : doc.getValues(field)) { values.add(normalizeValue(v)); } } return values; }
From source file:drakkar.mast.retrieval.LuceneContext.java
/** * Para la sumarizacin/* w ww .jav a 2s . c o m*/ * * @return */ private String getHighlighter(Query q, Analyzer a, String text, String field) { String summary = null; this.hg = new Highlighter(new QueryTermScorer(q)); this.hg.setTextFragmenter(new SimpleFragmenter(20)); this.hg.setMaxDocCharsToAnalyze(600); try { try { this.tokens = TokenSources.getTokenStream(field, text, a); summary = this.hg.getBestFragments(this.tokens, text, 20, "..."); // summary = this.hg.getBestFragments(this.tokens, text, 10).toString(); } catch (IOException ex) { OutputMonitor.printStream("IO", ex); } } catch (InvalidTokenOffsetsException ex) { OutputMonitor.printStream("", ex); } if (summary == null) { summary = " "; } return summary; }
From source file:lucandra.LucandraTests.java
License:Apache License
public void testHighlight() throws Exception { // This tests the TermPositionVector classes IndexReader indexReader = new IndexReader(indexName, client); IndexSearcher searcher = new IndexSearcher(indexReader); QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "key", analyzer); // check exact Query q = qp.parse("+key:\"foobar foobar\""); TopDocs docs = searcher.search(q, 10); assertEquals(1, docs.totalHits);/*from w w w . jav a2 s . c o m*/ SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(); QueryScorer scorer = new QueryScorer(q, "key", text); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE)); TokenStream tvStream = TokenSources.getTokenStream(indexReader, docs.scoreDocs[0].doc, "key"); String rv = highlighter.getBestFragment(tvStream, text); assertNotNull(rv); assertEquals(rv, highlightedText); }
From source file:net.chwise.documents.HighlightedFragmentsRetriever.java
License:Open Source License
public String[] getFragmentsWithHighlightedTerms(Analyzer analyzer, Query query, String fieldName, String fieldContents, int fragmentNumber, int fragmentSize) throws IOException, InvalidTokenOffsetsException { TokenStream stream = TokenSources.getTokenStream(fieldName, fieldContents, analyzer); QueryScorer scorer = new QueryScorer(query, fieldName); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentSize); Highlighter highlighter = new Highlighter(scorer); highlighter.setTextFragmenter(fragmenter); highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE); String[] fragments = highlighter.getBestFragments(stream, fieldContents, fragmentNumber); if (fragments.length == 0) { //Return starting piece of fieldContents fragment fragments = new String[1]; fragments[0] = fieldContents.substring(0, Math.min(fragmentSize, fieldContents.length())); }/*from w ww. j ava2 s . c om*/ return fragments; }