List of usage examples for org.apache.lucene.search.vectorhighlight FastVectorHighlighter getFieldQuery
public FieldQuery getFieldQuery(Query query, IndexReader reader) throws IOException
From source file:org.apache.solr.handler.component.AlfrescoSolrHighlighter.java
License:Open Source License
/** * Generates a list of Highlighted query fragments for each item in a list * of documents, or returns null if highlighting is disabled. * * @param docs/*w w w . j a v a 2 s. c o m*/ * query results * @param query * the query * @param req * the current request * @param defaultFields * default list of fields to summarize * * @return NamedList containing a NamedList for each document, which in * turns contains sets (field, summary) pairs. */ @Override @SuppressWarnings("unchecked") public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException { SolrParams params = req.getParams(); if (!isHighlightingEnabled(params)) // also returns early if no unique // key field return null; boolean rewrite = query != null && !(Boolean.valueOf(params.get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true")) && Boolean.valueOf(params.get(HighlightParams.HIGHLIGHT_MULTI_TERM, "true"))); if (rewrite) { query = query.rewrite(req.getSearcher().getIndexReader()); } SolrIndexSearcher searcher = req.getSearcher(); IndexSchema schema = searcher.getSchema(); // fetch unique key if one exists. SchemaField keyField = schema.getUniqueKeyField(); if (keyField == null) { return null;// exit early; we need a unique key field to populate // the response } String[] fieldNames = getHighlightFields(query, req, defaultFields); Set<String> preFetchFieldNames = getDocPrefetchFieldNames(fieldNames, req); if (preFetchFieldNames != null) { preFetchFieldNames.add(keyField.getName()); } FastVectorHighlighter fvh = null; // lazy FieldQuery fvhFieldQuery = null; // lazy IndexReader reader = new TermVectorReusingLeafReader(req.getSearcher().getSlowAtomicReader()); // SOLR-5855 // Highlight each document NamedList fragments = new SimpleOrderedMap(); DocIterator iterator = docs.iterator(); for (int i = 0; i < docs.size(); i++) { int docId = iterator.nextDoc(); Document doc = getDocument(searcher.doc(docId, preFetchFieldNames), req); @SuppressWarnings("rawtypes") NamedList docHighlights = new SimpleOrderedMap(); // Highlight per-field for (String fieldName : fieldNames) { String schemaFieldName = AlfrescoSolrDataModel.getInstance().mapProperty(fieldName, FieldUse.HIGHLIGHT, req); // rewrite field specific parameters ..... SchemaField schemaField = schema.getFieldOrNull(schemaFieldName); rewriteRequestParameters(params, fieldName, schemaFieldName, req); Object fieldHighlights; // object type allows flexibility for // subclassers if (schemaField == null) { fieldHighlights = null; } else if (schemaField.getType() instanceof org.apache.solr.schema.TrieField) { // TODO: highlighting numeric fields is broken (Lucene) - so // we disable them until fixed (see LUCENE-3080)! fieldHighlights = null; } else if (useFastVectorHighlighter(req.getParams(), schemaField)) { if (fvhFieldQuery == null) { fvh = new FastVectorHighlighter( // FVH cannot process hl.usePhraseHighlighter parameter // per-field basis req.getParams().getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true), // FVH cannot process hl.requireFieldMatch // parameter per-field basis req.getParams().getBool(HighlightParams.FIELD_MATCH, false)); fvh.setPhraseLimit(req.getParams().getInt(HighlightParams.PHRASE_LIMIT, SolrHighlighter.DEFAULT_PHRASE_LIMIT)); fvhFieldQuery = fvh.getFieldQuery(query, reader); } fieldHighlights = null; FvhContainer fvhContainer = new FvhContainer(fvh, fvhFieldQuery); fieldHighlights = doHighlightingByFastVectorHighlighter(doc, docId, schemaField, fvhContainer, reader, req); } else { // standard/default highlighter fieldHighlights = doHighlightingByHighlighter(doc, docId, schemaField, query, reader, req); // Fall back to the best FTS field if highlight fails if (fieldHighlights == null) { schemaFieldName = AlfrescoSolrDataModel.getInstance().mapProperty(fieldName, FieldUse.HIGHLIGHT, req, 1); if (schemaField != null) { schemaField = schema.getFieldOrNull(schemaFieldName); rewriteRequestParameters(params, fieldName, schemaFieldName, req); fieldHighlights = doHighlightingByHighlighter(doc, docId, schemaField, query, reader, req); } } } if (fieldHighlights == null) { // no summaries made; copy text from alternate field fieldHighlights = alternateField(doc, fieldName, req); } if (fieldHighlights != null) { docHighlights.add(fieldName, fieldHighlights); } } // for each field if (doc.get("DBID") != null) { docHighlights.add("DBID", doc.get("DBID")); } fragments.add(schema.printableUniqueKey(doc), docHighlights); } // for each doc return fragments; }
From source file:org.apache.solr.highlight.DefaultSolrHighlighter.java
License:Apache License
/** * Generates a list of Highlighted query fragments for each item in a list * of documents, or returns null if highlighting is disabled. * * @param docs query results//from w w w .j a v a 2 s . co m * @param query the query * @param req the current request * @param defaultFields default list of fields to summarize * * @return NamedList containing a NamedList for each document, which in * turns contains sets (field, summary) pairs. */ @Override @SuppressWarnings("unchecked") public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException { SolrParams params = req.getParams(); if (!isHighlightingEnabled(params)) return null; SolrIndexSearcher searcher = req.getSearcher(); IndexSchema schema = searcher.getSchema(); NamedList fragments = new SimpleOrderedMap(); String[] fieldNames = getHighlightFields(query, req, defaultFields); Set<String> fset = new HashSet<String>(); { // pre-fetch documents using the Searcher's doc cache for (String f : fieldNames) { fset.add(f); } // fetch unique key if one exists. SchemaField keyField = schema.getUniqueKeyField(); if (null != keyField) fset.add(keyField.getName()); } // get FastVectorHighlighter instance out of the processing loop FastVectorHighlighter fvh = new FastVectorHighlighter( // FVH cannot process hl.usePhraseHighlighter parameter per-field basis params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true), // FVH cannot process hl.requireFieldMatch parameter per-field basis params.getBool(HighlightParams.FIELD_MATCH, false)); fvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, Integer.MAX_VALUE)); FieldQuery fieldQuery = fvh.getFieldQuery(query, searcher.getIndexReader()); // Highlight each document DocIterator iterator = docs.iterator(); for (int i = 0; i < docs.size(); i++) { int docId = iterator.nextDoc(); Document doc = searcher.doc(docId, fset); NamedList docSummaries = new SimpleOrderedMap(); for (String fieldName : fieldNames) { fieldName = fieldName.trim(); if (useFastVectorHighlighter(params, schema, fieldName)) doHighlightingByFastVectorHighlighter(fvh, fieldQuery, req, docSummaries, docId, doc, fieldName); else doHighlightingByHighlighter(query, req, docSummaries, docId, doc, fieldName); } String printId = schema.printableUniqueKey(doc); fragments.add(printId == null ? null : printId, docSummaries); } return fragments; }
From source file:org.segrada.search.lucene.LuceneSearchEngine.java
License:Apache License
@Override public PaginationInfo<SearchHit> search(String searchTerm, Map<String, String> filters) { // to avoid NPEs if (filters == null) filters = new HashMap<>(); // set defaults int page = 1; int entriesPerPage = 20; try {/*from ww w.j a v a2 s .c o m*/ DirectoryReader iReader = DirectoryReader.open(directory); String[] containFields; // do we have a filter to contain to certain fields? if (filters.containsKey("fields")) { String fields = filters.get("fields"); if (fields.isEmpty()) containFields = new String[] { "title", "subTitles", "content" }; else if (fields.equalsIgnoreCase("title")) containFields = new String[] { "title" }; else if (fields.equalsIgnoreCase("subTitles")) containFields = new String[] { "subTitles" }; else if (fields.equalsIgnoreCase("content")) containFields = new String[] { "content" }; else if (fields.equalsIgnoreCase("allTitles")) containFields = new String[] { "title", "subTitles" }; else throw new RuntimeException("fields-Filter " + fields + " is not known."); } else containFields = new String[] { "title", "subTitles", "content" }; // Parse a simple query that searches for "text": MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_47, containFields, analyzer); // which operator do we use? parser.setDefaultOperator(QueryParser.Operator.AND); if (filters.containsKey("operator")) { String operator = filters.get("operator"); if (operator.equalsIgnoreCase("or")) parser.setDefaultOperator(QueryParser.Operator.OR); else if (!operator.isEmpty() && !operator.equalsIgnoreCase("and")) throw new RuntimeException("operator-Filter " + operator + " is not and/or."); } // filters for query List<Filter> searchFilters = new ArrayList<>(); // class filter if (filters.containsKey("class") && !filters.get("class").isEmpty()) { // multiple classes? String[] classes = filters.get("class").split(","); // single class if (classes.length <= 1) { TermQuery categoryQuery = new TermQuery(new Term("className", filters.get("class"))); searchFilters.add(new QueryWrapperFilter(categoryQuery)); } else { // multiple classes Filter[] categories = new Filter[classes.length]; for (int i = 0; i < classes.length; i++) { categories[i] = new QueryWrapperFilter( new TermQuery(new Term("className", classes[i].trim()))); } // add chained filter searchFilters.add(new ChainedFilter(categories, ChainedFilter.OR)); } } // tag filter if (filters.containsKey("tags") && !filters.get("tags").isEmpty()) { // split tags into array String[] tags = filters.get("tags").split(","); BooleanQuery booleanQuery = new BooleanQuery(); for (String tag : tags) { booleanQuery.add(new TermQuery(new Term("tag", tag.trim())), BooleanClause.Occur.SHOULD); } searchFilters.add(new QueryWrapperFilter(booleanQuery)); } // create filter - if multiple filters applied, add chained filter Filter filter = null; if (searchFilters.size() == 1) filter = searchFilters.get(0); else if (searchFilters.size() > 1) { Filter[] filterArray = new Filter[searchFilters.size()]; searchFilters.toArray(filterArray); filter = new ChainedFilter(filterArray, ChainedFilter.AND); } // define query Query query = null; if (searchTerm != null) query = parser.parse(searchTerm); if (query == null) query = new MatchAllDocsQuery(); // fallback to match all documents // get hits per page if (filters.containsKey("limit")) { try { entriesPerPage = Integer.valueOf(filters.get("limit")); if (entriesPerPage <= 0 || entriesPerPage > 1000) entriesPerPage = 20; } catch (NumberFormatException e) { logger.warn("Could not parse limit " + filters.get("limit") + " to integer", e); } } // get page number if (filters.containsKey("page")) { try { page = Integer.valueOf(filters.get("page")); } catch (NumberFormatException e) { logger.warn("Could not parse page " + filters.get("page") + " to integer", e); } } // calculate start/stop indexes int startIndex = (page - 1) * entriesPerPage; int endIndex = page * entriesPerPage; IndexSearcher iSearcher = new IndexSearcher(iReader); // do search TopDocs topDocs = iSearcher.search(query, filter, 1000); // update end index if (topDocs.scoreDocs.length < endIndex) endIndex = topDocs.scoreDocs.length; // how many pages do we have? int pages = topDocs.scoreDocs.length / entriesPerPage + 1; // reset page to sane limit, if needed if (page <= 0 || page > pages) page = 1; // highlighter FastVectorHighlighter highlighter = new FastVectorHighlighter(); FieldQuery fieldQuery = null; // field query for highlighted terms if (searchTerm != null) fieldQuery = highlighter.getFieldQuery( new QueryParser(Version.LUCENE_47, "content", analyzer).parse(searchTerm), iReader); // cycle trough hits List<SearchHit> hits = new ArrayList<>(); for (int i = startIndex; i < endIndex; i++) { ScoreDoc scoreDoc = topDocs.scoreDocs[i]; Document hitDoc = iSearcher.doc(scoreDoc.doc); SearchHit searchHit = new SearchHit(); searchHit.setId(hitDoc.get("id")); searchHit.setClassName(hitDoc.get("className")); searchHit.setTitle(hitDoc.get("title")); searchHit.setSubTitles(hitDoc.get("subTitles")); searchHit.setTagIds(hitDoc.getValues("tag")); String color = hitDoc.get("color"); searchHit.setColor(color != null ? new Integer(color) : null); searchHit.setIconFileIdentifier(hitDoc.get("iconFileIdentifier")); searchHit.setRelevance(scoreDoc.score); // get highlighted components if (searchTerm != null) { String[] bestFragments = highlighter.getBestFragments(fieldQuery, iReader, scoreDoc.doc, "content", 18, 10); searchHit.setHighlightText(bestFragments); } // add hit hits.add(searchHit); } iReader.close(); // return pagination info return new PaginationInfo<>(page, pages, topDocs.totalHits, entriesPerPage, hits); } catch (Throwable e) { logger.error("Error in search.", e); } // return empty list result in order to avoid NPEs return new PaginationInfo<>(page, 1, 0, entriesPerPage, new ArrayList<>()); }
From source file:org.segrada.search.lucene.LuceneSearchEngine.java
License:Apache License
@Override public String[] searchInDocument(String searchTerm, String id) { // sanity check if (searchTerm == null || id == null || searchTerm.isEmpty() || id.isEmpty()) return new String[] {}; try {/*from w w w . j av a 2s .c om*/ DirectoryReader iReader = DirectoryReader.open(directory); IndexSearcher iSearcher = new IndexSearcher(iReader); // only search content MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_47, new String[] { "content" }, analyzer); // set operator and contain by id parser.setDefaultOperator(QueryParser.Operator.AND); Query query = parser.parse(searchTerm); Filter filter = new QueryWrapperFilter(new TermQuery(new Term("id", id))); // do search, maximum of 1 document TopDocs topDocs = iSearcher.search(query, filter, 1); if (topDocs.scoreDocs.length > 0) { ScoreDoc scoreDoc = topDocs.scoreDocs[0]; // get highlighted text FastVectorHighlighter highlighter = new FastVectorHighlighter(); FieldQuery fieldQuery = highlighter.getFieldQuery( new QueryParser(Version.LUCENE_47, "content", analyzer).parse(searchTerm), iReader); // return max of 100 highlighted elements return highlighter.getBestFragments(fieldQuery, iReader, scoreDoc.doc, "content", 100, 100); } } catch (Throwable e) { logger.error("Error in search.", e); } return new String[] {}; }