List of usage examples for org.apache.solr.search SolrIndexSearcher doc
@Override public final Document doc(int i, Set<String> fields) throws IOException
From source file:com.lucid.solr.sidecar.SidecarIndexReaderFactory.java
License:Apache License
private Document lookup(SolrIndexSearcher lookup, String id, BytesRef idRef, Set<String> fields) throws IOException { idRef.copyChars(id);/*from www .ja v a2 s. com*/ Term t = new Term("id", idRef); if (lookup.docFreq(t) == 0) { return null; } int docId = lookup.getFirstMatch(t); if (docId == -1) { return null; } Document doc = lookup.doc(docId, fields); if (doc == null) { return null; } doc.removeFields("id"); return doc; }
From source file:com.o19s.solr.swan.highlight.SwanHighlighter.java
License:Apache License
/** * Generates a list of Highlighted query fragments for each item in a list * of documents, or returns null if highlighting is disabled. * * @param docs query results//from w w w . ja v a 2 s.c o m * @param query the query * @param req the current request * @param defaultFields default list of fields to summarize * * @return NamedList containing a NamedList for each document, which in * turns contains sets (field, summary) pairs. */ @Override @SuppressWarnings("unchecked") public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException { NamedList fragments = new SimpleOrderedMap(); SolrParams params = req.getParams(); if (!isHighlightingEnabled(params)) return null; SolrIndexSearcher searcher = req.getSearcher(); IndexSchema schema = searcher.getSchema(); String[] fieldNames = getHighlightFields(query, req, defaultFields); Set<String> fset = new HashSet<String>(); { // pre-fetch documents using the Searcher's doc cache Collections.addAll(fset, fieldNames); // fetch unique key if one exists. SchemaField keyField = schema.getUniqueKeyField(); if (null != keyField) fset.add(keyField.getName()); } //CHANGE start // int[] docIds = new int[docs.swordize()]; TreeSet<Integer> docIds = new TreeSet<Integer>(); DocIterator iterator = docs.iterator(); for (int i = 0; i < docs.size(); i++) { docIds.add(iterator.nextDoc()); } // Get Frag list builder String fragListBuilderString = params.get(HighlightParams.FRAG_LIST_BUILDER).toLowerCase(); FragListBuilder fragListBuilder; if (fragListBuilderString.equals("single")) { fragListBuilder = new SingleFragListBuilder(); } else { fragListBuilder = new com.o19s.solr.swan.highlight.SimpleFragListBuilder(); } // get FastVectorHighlighter instance out of the processing loop SpanAwareFastVectorHighlighter safvh = new SpanAwareFastVectorHighlighter( // FVH cannot process hl.usePhraseHighlighter parameter per-field basis params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true), // FVH cannot process hl.requireFieldMatch parameter per-field basis params.getBool(HighlightParams.FIELD_MATCH, false), fragListBuilder, //new com.o19s.solr.swan.highlight.ScoreOrderFragmentsBuilder(), new WordHashFragmentsBuilder(), // List of docIds to filter spans docIds); safvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, Integer.MAX_VALUE)); SpanAwareFieldQuery fieldQuery = safvh.getFieldQuery(query, searcher.getIndexReader(), docIds); // Highlight each document for (int docId : docIds) { Document doc = searcher.doc(docId, fset); NamedList docSummaries = new SimpleOrderedMap(); for (String fieldName : fieldNames) { fieldName = fieldName.trim(); if (useFastVectorHighlighter(params, schema, fieldName)) doHighlightingByFastVectorHighlighter(safvh, fieldQuery, req, docSummaries, docId, doc, fieldName); else doHighlightingByHighlighter(query, req, docSummaries, docId, doc, fieldName); } String printId = schema.printableUniqueKey(doc); fragments.add(printId == null ? null : printId, docSummaries); } //CHANGE end return fragments; }
From source file:com.searchbox.TaggerComponent.java
License:Apache License
private NamedList doDocuments(ResponseBuilder rb, SolrParams params, SolrIndexSearcher searcher, int lcount) { /*-----------------*/ String[] localfields = params.getParams(TaggerComponentParams.QUERY_FIELDS); String[] fields = null;//from w w w. j a v a 2 s .co m if (gfields != null) { fields = gfields; } if (localfields != null) { fields = localfields; } if (fields == null) { LOGGER.error("Fields aren't defined, not performing tagging."); return null; } DocList docs = rb.getResults().docList; if (docs == null || docs.size() == 0) { LOGGER.debug("No results"); } LOGGER.debug("Doing This many docs:\t" + docs.size()); Set<String> fset = new HashSet<String>(); SchemaField keyField = rb.req.getCore().getSchema().getUniqueKeyField(); if (null != keyField) { fset.add(keyField.getName()); } for (String field : fields) { fset.add(field); } NamedList response = new SimpleOrderedMap(); DocIterator iterator = docs.iterator(); for (int i = 0; i < docs.size(); i++) { try { int docId = iterator.nextDoc(); Document doc = searcher.doc(docId, fset); StringBuilder sb = new StringBuilder(); for (String field : fields) { IndexableField[] multifield = doc.getFields(field); for (IndexableField singlefield : multifield) { sb.append(singlefield.stringValue() + ". "); } } String q = sb.toString(); String id = doc.getField(keyField.getName()).stringValue(); // do work here TaggerResultSet trs = dfb.tagText(q, lcount); NamedList docresponse = new SimpleOrderedMap(); for (TaggerResult tr : trs.suggestions) { docresponse.add(tr.suggestion, tr.score); } response.add(id, docresponse); } catch (IOException ex) { java.util.logging.Logger.getLogger(TaggerComponent.class.getName()).log(Level.SEVERE, null, ex); } } // response.add(suggestion.suggestion, suggestion.probability); return response; }
From source file:com.sn.solr.plugin.rank.RankEngine.java
License:Apache License
/** * Provides implementation for Dense ranking ["1223"] as identified by the * {@link RankStrategy#LEGACY_DENSE} the difference is that this * implementation is computed without using facet results so this will * noticeably slower than computing rank based on facets * use {@link RankStrategy#DENSE}. Besides this implementation might cause * lot of cache evictions putting stress on memory. * * @see #computeDenseRank(List)//from www . j a v a2s. co m * * @param pairList List of {@link Pair} objects that holds the value of rank * field & respective count. */ @Deprecated public static Map<String, Number> computeLegacyDenseRank(ResponseBuilder rb, String idField, String rankField) throws IOException { SolrIndexSearcher searcher = rb.req.getSearcher(); SolrParams params = rb.req.getParams();// .getParams(FacetParams.FACET_FIELD); String _start = params.get(CommonParams.START); String _rows = params.get(CommonParams.ROWS); int start = 0; int rows = 10; if (_start != null & AppHelper.isInteger(_start)) start = new Integer(_start); if (_rows != null & AppHelper.isInteger(_rows)) rows = new Integer(_rows); LOG.info("Computing rank using strategy: {}", RankStrategy.ORDINAL.getDescription()); FieldSelector fs = new MapFieldSelector(new String[] { idField, rankField }); Map<String, Number> rankMap = new HashMap<String, Number>(); DocList docs = searcher.getDocList(rb.getQuery(), rb.getFilters(), rb.getSortSpec().getSort(), 0, start + rows, 0); int denseRank = 1; int _CurrScore = 0; int _PrevScore = 0; int i = 0; for (DocIterator it = docs.iterator(); it.hasNext();) { Document doc = searcher.doc(it.nextDoc(), fs); _CurrScore = new Integer(doc.get(rankField)); if (i == 0) { _PrevScore = _CurrScore; } if (_PrevScore != _CurrScore) { _PrevScore = _CurrScore; denseRank++; } if (i >= start) { rankMap.put(doc.get(idField), denseRank); } i++; } return rankMap; }
From source file:com.tamingtext.fuzzy.TypeAheadResponseWriter.java
License:Apache License
@Override public void write(Writer w, SolrQueryRequest req, SolrQueryResponse rsp) throws IOException { SolrIndexSearcher searcher = req.getSearcher(); NamedList nl = rsp.getValues();/* w ww . ja va2 s .c om*/ int sz = nl.size(); for (int li = 0; li < sz; li++) { Object val = nl.getVal(li); if (val instanceof DocList) { //<co id="co.fuzzy.type-ahead.doclist"/> DocList dl = (DocList) val; DocIterator iterator = dl.iterator(); w.append("<ul>\n"); while (iterator.hasNext()) { int id = iterator.nextDoc(); Document doc = searcher.doc(id, fields); //<co id="co.fuzzy.type-ahead.search"/> String name = doc.get("word"); w.append("<li>" + name + "</li>\n"); } w.append("</ul>\n"); } } }
From source file:com.tamingtext.qa.PassageRankingComponent.java
License:Apache License
@Override public void process(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); if (!params.getBool(COMPONENT_NAME, false)) { return;//from ww w. j av a 2 s . com } Query origQuery = rb.getQuery(); //TODO: longer term, we don't have to be a span query, we could re-analyze the document if (origQuery != null) { if (origQuery instanceof SpanNearQuery == false) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Illegal query type. The incoming query must be a Lucene SpanNearQuery and it was a " + origQuery.getClass().getName()); } SpanNearQuery sQuery = (SpanNearQuery) origQuery; SolrIndexSearcher searcher = rb.req.getSearcher(); IndexReader reader = searcher.getIndexReader(); Spans spans = sQuery.getSpans(reader); //Assumes the query is a SpanQuery //Build up the query term weight map and the bi-gram Map<String, Float> termWeights = new HashMap<String, Float>(); Map<String, Float> bigramWeights = new HashMap<String, Float>(); createWeights(params.get(CommonParams.Q), sQuery, termWeights, bigramWeights, reader); float adjWeight = params.getFloat(ADJACENT_WEIGHT, DEFAULT_ADJACENT_WEIGHT); float secondAdjWeight = params.getFloat(SECOND_ADJ_WEIGHT, DEFAULT_SECOND_ADJACENT_WEIGHT); float bigramWeight = params.getFloat(BIGRAM_WEIGHT, DEFAULT_BIGRAM_WEIGHT); //get the passages int primaryWindowSize = params.getInt(QAParams.PRIMARY_WINDOW_SIZE, DEFAULT_PRIMARY_WINDOW_SIZE); int adjacentWindowSize = params.getInt(QAParams.ADJACENT_WINDOW_SIZE, DEFAULT_ADJACENT_WINDOW_SIZE); int secondaryWindowSize = params.getInt(QAParams.SECONDARY_WINDOW_SIZE, DEFAULT_SECONDARY_WINDOW_SIZE); WindowBuildingTVM tvm = new WindowBuildingTVM(primaryWindowSize, adjacentWindowSize, secondaryWindowSize); PassagePriorityQueue rankedPassages = new PassagePriorityQueue(); //intersect w/ doclist DocList docList = rb.getResults().docList; while (spans.next() == true) { //build up the window if (docList.exists(spans.doc())) { tvm.spanStart = spans.start(); tvm.spanEnd = spans.end(); reader.getTermFreqVector(spans.doc(), sQuery.getField(), tvm); //The entries map contains the window, do some ranking of it if (tvm.passage.terms.isEmpty() == false) { log.debug("Candidate: Doc: {} Start: {} End: {} ", new Object[] { spans.doc(), spans.start(), spans.end() }); } tvm.passage.lDocId = spans.doc(); tvm.passage.field = sQuery.getField(); //score this window try { addPassage(tvm.passage, rankedPassages, termWeights, bigramWeights, adjWeight, secondAdjWeight, bigramWeight); } catch (CloneNotSupportedException e) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Internal error cloning Passage", e); } //clear out the entries for the next round tvm.passage.clear(); } } NamedList qaResp = new NamedList(); rb.rsp.add("qaResponse", qaResp); int rows = params.getInt(QA_ROWS, 5); SchemaField uniqField = rb.req.getSchema().getUniqueKeyField(); if (rankedPassages.size() > 0) { int size = Math.min(rows, rankedPassages.size()); Set<String> fields = new HashSet<String>(); for (int i = size - 1; i >= 0; i--) { Passage passage = rankedPassages.pop(); if (passage != null) { NamedList passNL = new NamedList(); qaResp.add(("answer"), passNL); String idName; String idValue; if (uniqField != null) { idName = uniqField.getName(); fields.add(idName); fields.add(passage.field);//prefetch this now, so that it is cached idValue = searcher.doc(passage.lDocId, fields).get(idName); } else { idName = "luceneDocId"; idValue = String.valueOf(passage.lDocId); } passNL.add(idName, idValue); passNL.add("field", passage.field); //get the window String fldValue = searcher.doc(passage.lDocId, fields).get(passage.field); if (fldValue != null) { //get the window of words to display, we don't use the passage window, as that is based on the term vector int start = passage.terms.first().start;//use the offsets int end = passage.terms.last().end; if (start >= 0 && start < fldValue.length() && end >= 0 && end < fldValue.length()) { passNL.add("window", fldValue.substring(start, end + passage.terms.last().term.length())); } else { log.debug("Passage does not have correct offset information"); passNL.add("window", fldValue);//we don't have offsets, or they are incorrect, return the whole field value } } } else { break; } } } } }
From source file:jp.co.atware.solr.geta.GETAssocComponent.java
License:Apache License
/** * {@inheritDoc}// w w w . j a v a2s. c o m * * @see SearchComponent#process(ResponseBuilder) */ @Override public void process(ResponseBuilder rb) throws IOException { NamedList<Object> result = new NamedList<Object>(); HttpClient client = new HttpClient(); // ???(geta.settings.process.req[*]) if (!config.settings.req.isEmpty()) { NamedList<Object> paramList = new NamedList<Object>(); for (Entry<String, QueryType> entry : config.settings.req.entrySet()) { String param = entry.getKey(); NamedList<Object> paramValueList = new NamedList<Object>(); String[] paramValues = rb.req.getParams().getParams(param); if (paramValues != null) { for (String paramValue : paramValues) { NamedList<Object> getaResultList = convertResult(postGss3Request(client, convertRequest(rb.req.getParams(), paramValue, entry.getValue()))); paramValueList.add(paramValue, getaResultList); } } paramList.add(param, paramValueList); } result.add("req", paramList); } // ????(geta.settings.process.doc[*]) if (!config.settings.doc.isEmpty()) { NamedList<Object> docList = new NamedList<Object>(); SolrIndexSearcher searcher = rb.req.getSearcher(); IndexSchema schema = searcher.getSchema(); String key = schema.getUniqueKeyField().getName(); List<String> targetFieldNames = new ArrayList<String>(config.settings.doc.size() + 1); targetFieldNames.add(key); targetFieldNames.addAll(config.settings.doc.keySet()); FieldSelector selector = new MapFieldSelector(targetFieldNames); DocIterator iterator = rb.getResults().docList.iterator(); while (iterator.hasNext()) { Document doc = searcher.doc(iterator.next(), selector); String docKey = schema.printableUniqueKey(doc); NamedList<Object> fieldList = new NamedList<Object>(); for (Entry<String, QueryType> entry : config.settings.doc.entrySet()) { String field = entry.getKey(); NamedList<Object> queryList = new NamedList<Object>(); for (Fieldable fieldable : doc.getFieldables(field)) { NamedList<Object> getaResultList = convertResult(postGss3Request(client, convertRequest(rb.req.getParams(), fieldable.stringValue(), entry.getValue()))); queryList.add(fieldable.stringValue(), getaResultList); } fieldList.add(entry.getKey(), queryList); } docList.add(docKey, fieldList); } result.add("doc", docList); } if (result.size() != 0) { rb.rsp.add("geta", result); } }
From source file:net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector.java
License:Open Source License
/** * conversion from a SolrQueryResponse (which is a solr-internal data format) to SolrDocumentList (which is a solrj-format) * The conversion is done inside the solrj api using the BinaryResponseWriter and a very complex unfolding process * via org.apache.solr.common.util.JavaBinCodec.marshal. * @param request// w ww. ja v a 2s .co m * @param sqr * @return */ public SolrDocumentList SolrQueryResponse2SolrDocumentList(final SolrQueryRequest req, final SolrQueryResponse rsp) { SolrDocumentList sdl = new SolrDocumentList(); NamedList<?> nl = rsp.getValues(); ResultContext resultContext = (ResultContext) nl.get("response"); DocList response = resultContext == null ? new DocSlice(0, 0, new int[0], new float[0], 0, 0.0f) : resultContext.docs; sdl.setNumFound(response == null ? 0 : response.matches()); sdl.setStart(response == null ? 0 : response.offset()); String originalName = Thread.currentThread().getName(); if (response != null) { try { SolrIndexSearcher searcher = req.getSearcher(); final int responseCount = response.size(); DocIterator iterator = response.iterator(); for (int i = 0; i < responseCount; i++) { int docid = iterator.nextDoc(); Thread.currentThread() .setName("EmbeddedSolrConnector.SolrQueryResponse2SolrDocumentList: " + docid); Document responsedoc = searcher.doc(docid, (Set<String>) null); SolrDocument sordoc = doc2SolrDoc(responsedoc); sdl.add(sordoc); } } catch (IOException e) { ConcurrentLog.logException(e); } } Thread.currentThread().setName(originalName); return sdl; }
From source file:net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector.java
License:Open Source License
/** * check if a given document, identified by url hash as document id exists * @param id the url hash and document id * @return the load date if any entry in solr exists, -1 otherwise * @throws IOException//from w w w . j a v a 2 s .c o m */ @Override public LoadTimeURL getLoadTimeURL(String id) throws IOException { int responseCount = 0; DocListSearcher docListSearcher = null; try { docListSearcher = new DocListSearcher( "{!cache=false raw f=" + CollectionSchema.id.getSolrFieldName() + "}" + id, null, 0, 1, CollectionSchema.id.getSolrFieldName(), CollectionSchema.load_date_dt.getSolrFieldName()); responseCount = docListSearcher.response.size(); if (responseCount == 0) return null; SolrIndexSearcher searcher = docListSearcher.request.getSearcher(); DocIterator iterator = docListSearcher.response.iterator(); //for (int i = 0; i < responseCount; i++) { Document doc = searcher.doc(iterator.nextDoc(), AbstractSolrConnector.SOLR_ID_and_LOAD_DATE_FIELDS); if (doc == null) return null; return AbstractSolrConnector.getLoadTimeURL(doc); //} } catch (Throwable e) { ConcurrentLog.logException(e); throw new IOException(e.getMessage()); } finally { if (docListSearcher != null) docListSearcher.close(); } }
From source file:net.yacy.cora.federate.solr.responsewriter.EnhancedXMLResponseWriter.java
License:Open Source License
private static final void writeDocs(final Writer writer, final SolrQueryRequest request, final DocList response) throws IOException { boolean includeScore = false; final int sz = response.size(); writer.write("<result"); writeAttr(writer, "name", "response"); writeAttr(writer, "numFound", Long.toString(response.matches())); writeAttr(writer, "start", Long.toString(response.offset())); if (includeScore) { writeAttr(writer, "maxScore", Float.toString(response.maxScore())); }/*from w ww.ja va2s . co m*/ if (sz == 0) { writer.write("/>"); return; } writer.write('>'); writer.write(lb); SolrIndexSearcher searcher = request.getSearcher(); DocIterator iterator = response.iterator(); includeScore = includeScore && response.hasScores(); IndexSchema schema = request.getSchema(); for (int i = 0; i < sz; i++) { int id = iterator.nextDoc(); Document doc = searcher.doc(id, DEFAULT_FIELD_LIST); writeDoc(writer, schema, null, doc.getFields(), (includeScore ? iterator.score() : 0.0f), includeScore); } writer.write("</result>"); writer.write(lb); }