List of usage examples for org.apache.solr.request SolrQueryRequest getContentStreams
Iterable<ContentStream> getContentStreams();
From source file:com.searchbox.solr.CategoryLikeThis.java
License:Apache License
@Override public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { numRequests++;//from ww w . j a v a 2 s. c o m long startTime = System.currentTimeMillis(); if (!keystate) { LOGGER.error( "License key failure, not performing clt query. Please email contact@searchbox.com for more information."); return; } try { SolrParams params = req.getParams(); String senseField = params.get(SenseParams.SENSE_FIELD, SenseParams.DEFAULT_SENSE_FIELD); BooleanQuery catfilter = new BooleanQuery(); // Set field flags ReturnFields returnFields = new SolrReturnFields(req); rsp.setReturnFields(returnFields); int flags = 0; if (returnFields.wantsScore()) { flags |= SolrIndexSearcher.GET_SCORES; } String defType = params.get(QueryParsing.DEFTYPE, QParserPlugin.DEFAULT_QTYPE); String q = params.get(CommonParams.Q); Query query = null; SortSpec sortSpec = null; List<Query> filters = new LinkedList<Query>(); List<RealTermFreqVector> prototypetfs = new LinkedList<RealTermFreqVector>(); try { if (q != null) { QParser parser = QParser.getParser(q, defType, req); query = parser.getQuery(); sortSpec = parser.getSort(true); } String[] fqs = req.getParams().getParams(CommonParams.FQ); if (fqs != null && fqs.length != 0) { for (String fq : fqs) { if (fq != null && fq.trim().length() != 0) { QParser fqp = QParser.getParser(fq, null, req); filters.add(fqp.getQuery()); } } } } catch (Exception e) { numErrors++; throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); } SolrIndexSearcher searcher = req.getSearcher(); DocListAndSet cltDocs = null; // Parse Required Params // This will either have a single Reader or valid query Reader reader = null; try { if (q == null || q.trim().length() < 1) { Iterable<ContentStream> streams = req.getContentStreams(); if (streams != null) { Iterator<ContentStream> iter = streams.iterator(); if (iter.hasNext()) { reader = iter.next().getReader(); } if (iter.hasNext()) { numErrors++; throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "SenseLikeThis does not support multiple ContentStreams"); } } } int start = params.getInt(CommonParams.START, 0); int rows = params.getInt(CommonParams.ROWS, 10); // Find documents SenseLikeThis - either with a reader or a query // -------------------------------------------------------------------------------- if (reader != null) { numErrors++; throw new RuntimeException("SLT based on a reader is not yet implemented"); } else if (q != null) { LOGGER.debug("Query for category:\t" + query); DocList match = searcher.getDocList(query, null, null, 0, 10, flags); // get first 10 if (match.size() == 0) { // no docs to make prototype! LOGGER.info("No documents found for prototype!"); rsp.add("response", new DocListAndSet()); return; } HashMap<String, Float> overallFreqMap = new HashMap<String, Float>(); // Create the TF of blah blah blah DocIterator iterator = match.iterator(); while (iterator.hasNext()) { // do a MoreLikeThis query for each document in results int id = iterator.nextDoc(); LOGGER.trace("Working on doc:\t" + id); RealTermFreqVector rtv = new RealTermFreqVector(id, searcher.getIndexReader(), senseField); for (int zz = 0; zz < rtv.getSize(); zz++) { Float prev = overallFreqMap.get(rtv.getTerms()[zz]); if (prev == null) { prev = 0f; } overallFreqMap.put(rtv.getTerms()[zz], rtv.getFreqs()[zz] + prev); } prototypetfs.add(rtv); } List<String> sortedKeys = Ordering.natural().onResultOf(Functions.forMap(overallFreqMap)) .immutableSortedCopy(overallFreqMap.keySet()); int keyiter = Math.min(sortedKeys.size() - 1, BooleanQuery.getMaxClauseCount() - 1); LOGGER.debug("I have this many terms:\t" + sortedKeys.size()); LOGGER.debug("And i'm going to use this many:\t" + keyiter); for (; keyiter >= 0; keyiter--) { TermQuery tq = new TermQuery(new Term(senseField, sortedKeys.get(keyiter))); catfilter.add(tq, BooleanClause.Occur.SHOULD); } } else { numErrors++; throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "CategoryLikeThis requires either a query (?q=) or text to find similar documents."); } LOGGER.debug("document filter is: \t" + catfilter); CategorizationBase model = new CategorizationBase(prototypetfs); CategoryQuery clt = CategoryQuery.CategoryQueryForDocument(catfilter, model, searcher.getIndexReader(), senseField); DocSet filtered = searcher.getDocSet(filters); cltDocs = searcher.getDocListAndSet(clt, filtered, Sort.RELEVANCE, start, rows, flags); } finally { if (reader != null) { reader.close(); } } if (cltDocs == null) { numEmpty++; cltDocs = new DocListAndSet(); // avoid NPE } rsp.add("response", cltDocs.docList); // maybe facet the results if (params.getBool(FacetParams.FACET, false)) { if (cltDocs.docSet == null) { rsp.add("facet_counts", null); } else { SimpleFacets f = new SimpleFacets(req, cltDocs.docSet, params); rsp.add("facet_counts", f.getFacetCounts()); } } } catch (Exception e) { numErrors++; } finally { totalTime += System.currentTimeMillis() - startTime; } }
From source file:com.searchbox.solr.SenseLikeThisHandlerNoReduction.java
License:Apache License
@Override public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { SolrParams params = req.getParams(); if (!keystate) { LOGGER.error(// ww w .ja v a2 s .c o m "License key failure, not performing sense query. Please email contact@searchbox.com for more information."); return; } int docID; // Set field flags ReturnFields returnFields = new SolrReturnFields(req); rsp.setReturnFields(returnFields); int flags = 0; if (returnFields.wantsScore()) { flags |= SolrIndexSearcher.GET_SCORES; } String defType = params.get(QueryParsing.DEFTYPE, QParserPlugin.DEFAULT_QTYPE); String q = params.get(CommonParams.Q); Query query = null; SortSpec sortSpec = null; List<Query> filters = new ArrayList<Query>(); try { if (q != null) { QParser parser = QParser.getParser(q, defType, req); query = parser.getQuery(); sortSpec = parser.getSort(true); } String[] fqs = req.getParams().getParams(CommonParams.FQ); if (fqs != null && fqs.length != 0) { for (String fq : fqs) { if (fq != null && fq.trim().length() != 0) { QParser fqp = QParser.getParser(fq, null, req); filters.add(fqp.getQuery()); } } } } catch (Exception e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); } SolrIndexSearcher searcher = req.getSearcher(); SchemaField uniqueKeyField = searcher.getSchema().getUniqueKeyField(); DocListAndSet sltDocs = null; // Parse Required Params // This will either have a single Reader or valid query Reader reader = null; try { if (q == null || q.trim().length() < 1) { Iterable<ContentStream> streams = req.getContentStreams(); if (streams != null) { Iterator<ContentStream> iter = streams.iterator(); if (iter.hasNext()) { reader = iter.next().getReader(); } if (iter.hasNext()) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "SenseLikeThis does not support multiple ContentStreams"); } } } int start = params.getInt(CommonParams.START, 0); int rows = params.getInt(CommonParams.ROWS, 10); // Find documents SenseLikeThis - either with a reader or a query // -------------------------------------------------------------------------------- SenseQuery slt = null; if (reader != null) { throw new RuntimeException("SLT based on a reader is not yet implemented"); } else if (q != null) { // Matching options boolean includeMatch = params.getBool(MoreLikeThisParams.MATCH_INCLUDE, true); int matchOffset = params.getInt(MoreLikeThisParams.MATCH_OFFSET, 0); // Find the base match DocList match = searcher.getDocList(query, null, null, matchOffset, 1, flags); // only get the first one... if (includeMatch) { rsp.add("match", match); } // Get docID DocIterator iterator = match.iterator(); docID = iterator.nextDoc(); BooleanQuery bq = new BooleanQuery(); Document doc = searcher.getIndexReader().document(docID); bq.add(new TermQuery(new Term(uniqueKeyField.getName(), uniqueKeyField.getType().storedToIndexed(doc.getField(uniqueKeyField.getName())))), BooleanClause.Occur.MUST_NOT); filters.add(bq); } else { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "SenseLikeThis requires either a query (?q=) or text to find similar documents."); } String CKBid = params.get(SenseParams.SENSE_CKB, SenseParams.SENSE_CKB_DEFAULT); String senseField = params.get(SenseParams.SENSE_FIELD, SenseParams.DEFAULT_SENSE_FIELD); slt = new SenseQuery(new RealTermFreqVector(docID, searcher.getIndexReader(), senseField), senseField, CKBid, params.getFloat(SenseParams.SENSE_WEIGHT, SenseParams.DEFAULT_SENSE_WEIGHT), null); //Execute the SLT query //DocSet filtered = searcher.getDocSet(filters); //System.out.println("Number of documents to search:\t" + filtered.size()); //sltDocs = searcher.getDocListAndSet(slt, filtered, Sort.RELEVANCE, start, rows, flags); sltDocs = searcher.getDocListAndSet(slt, filters, Sort.RELEVANCE, start, rows, flags); } finally { if (reader != null) { reader.close(); } } if (sltDocs == null) { sltDocs = new DocListAndSet(); // avoid NPE } rsp.add("response", sltDocs.docList); // maybe facet the results if (params.getBool(FacetParams.FACET, false)) { if (sltDocs.docSet == null) { rsp.add("facet_counts", null); } else { SimpleFacets f = new SimpleFacets(req, sltDocs.docSet, params); rsp.add("facet_counts", f.getFacetCounts()); } } // Debug info, not doing it for the moment. boolean dbg = req.getParams().getBool(CommonParams.DEBUG_QUERY, false); boolean dbgQuery = false, dbgResults = false; if (dbg == false) {//if it's true, we are doing everything anyway. String[] dbgParams = req.getParams().getParams(CommonParams.DEBUG); if (dbgParams != null) { for (int i = 0; i < dbgParams.length; i++) { if (dbgParams[i].equals(CommonParams.QUERY)) { dbgQuery = true; } else if (dbgParams[i].equals(CommonParams.RESULTS)) { dbgResults = true; } } } } else { dbgQuery = true; dbgResults = true; } // Copied from StandardRequestHandler... perhaps it should be added to doStandardDebug? if (dbg == true) { try { NamedList<Object> dbgInfo = SolrPluginUtils.doStandardDebug(req, q, query, sltDocs.docList, dbgQuery, dbgResults); if (null != dbgInfo) { if (null != filters) { dbgInfo.add("filter_queries", req.getParams().getParams(CommonParams.FQ)); List<String> fqs = new ArrayList<String>(filters.size()); for (Query fq : filters) { fqs.add(QueryParsing.toString(fq, req.getSchema())); } dbgInfo.add("parsed_filter_queries", fqs); } rsp.add("debug", dbgInfo); } } catch (Exception e) { SolrException.log(SolrCore.log, "Exception during debug", e); rsp.add("exception_during_debug", SolrException.toStr(e)); } } }
From source file:de.qaware.chronix.solr.ingestion.AbstractIngestionHandler.java
License:Apache License
@Override @SuppressWarnings("PMD.SignatureDeclareThrowsException") public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { formatResponseAsJson(req);/* w w w . ja va2 s . c o m*/ if (req.getContentStreams() == null) { LOGGER.warn("no content stream"); rsp.add("error", "No content stream"); return; } InputStream stream = req.getContentStreams().iterator().next().getStream(); MetricTimeSeriesConverter converter = new MetricTimeSeriesConverter(); UpdateRequestProcessorChain processorChain = req.getCore().getUpdateProcessorChain(req.getParams()); UpdateRequestProcessor processor = processorChain.createProcessor(req, rsp); try { for (MetricTimeSeries series : formatParser.parse(stream)) { SolrInputDocument document = new SolrInputDocument(); converter.to(series).getFields().forEach(document::addField); storeDocument(document, processor, req); } LOGGER.debug("Committing transaction..."); processor.processCommit(new CommitUpdateCommand(req, false)); LOGGER.debug("Committed transaction"); } finally { processor.finish(); } }
From source file:lux.solr.XQueryComponent.java
License:Mozilla Public License
private XdmValue buildEXPathRequest(Compiler compiler, Evaluator evaluator, SolrQueryRequest req) throws XPathException { LinkedTreeBuilder builder = new LinkedTreeBuilder( compiler.getProcessor().getUnderlyingConfiguration().makePipelineConfiguration()); builder.startDocument(0);/* w ww .j a v a 2 s . c o m*/ builder.startElement(fQNameFor("http", EXPATH_HTTP_NS, "request"), AnyType.getInstance(), 0, 0); builder.namespace(new NamespaceBinding("http", EXPATH_HTTP_NS), 0); Request requestWrapper = (Request) req.getContext().get(SolrQueryContext.LUX_HTTP_SERVLET_REQUEST); addAttribute(builder, "method", requestWrapper.getMethod()); addAttribute(builder, "servlet", requestWrapper.getServletPath()); HttpServletRequest httpReq = (HttpServletRequest) requestWrapper.getRequest(); addAttribute(builder, "path", httpReq.getServletPath()); String pathInfo = requestWrapper.getPathInfo(); if (pathInfo != null) { addAttribute(builder, "path-info", pathInfo); } builder.startContent(); // child elements StringBuilder buf = new StringBuilder(); // authority buf.append(requestWrapper.getScheme()).append("://").append(requestWrapper.getServerName()).append(':') .append(requestWrapper.getServerPort()); String authority = buf.toString(); addSimpleElement(builder, "authority", authority); // url buf.append(httpReq.getServletPath()); if (httpReq.getQueryString() != null) { buf.append('?').append(httpReq.getQueryString()); } String url = buf.toString(); addSimpleElement(builder, "url", url); // context-root addSimpleElement(builder, "context-root", httpReq.getContextPath()); // path - just one part: we don't do any parsing of the path builder.startElement(fQNameFor("http", EXPATH_HTTP_NS, "path"), BuiltInAtomicType.UNTYPED_ATOMIC, 0, 0); builder.startContent(); addSimpleElement(builder, "part", httpReq.getServletPath()); builder.endElement(); // params Iterator<String> paramNames = req.getParams().getParameterNamesIterator(); while (paramNames.hasNext()) { String param = paramNames.next(); String[] values = req.getParams().getParams(param); for (String value : values) { builder.startElement(fQNameFor("http", EXPATH_HTTP_NS, "param"), BuiltInAtomicType.UNTYPED_ATOMIC, 0, 0); addAttribute(builder, "name", param); addAttribute(builder, "value", value); builder.startContent(); builder.endElement(); } } // headers Enumeration<String> headerNames = httpReq.getHeaderNames(); while (headerNames.hasMoreElements()) { String headerName = headerNames.nextElement(); Enumeration<String> headerValues = httpReq.getHeaders(headerName); while (headerValues.hasMoreElements()) { String value = headerValues.nextElement(); builder.startElement(fQNameFor("http", EXPATH_HTTP_NS, "header"), BuiltInAtomicType.UNTYPED_ATOMIC, 0, 0); addAttribute(builder, "name", headerName); addAttribute(builder, "value", value); builder.startContent(); builder.endElement(); } } ArrayList<XdmItem> resultSequence = null; if (req.getContentStreams() != null) { resultSequence = new ArrayList<XdmItem>(); handleContentStreams(builder, req, resultSequence, evaluator); } builder.endElement(); // end request builder.endDocument(); XdmNode expathReq = new XdmNode(builder.getCurrentRoot()); if (resultSequence == null) { return expathReq; } resultSequence.add(0, expathReq); return new XdmValue(resultSequence); }
From source file:lux.solr.XQueryComponent.java
License:Mozilla Public License
private void handleContentStreams(LinkedTreeBuilder builder, SolrQueryRequest req, ArrayList<XdmItem> result, Evaluator evaluator) throws XPathException { // parts/*from w ww . j a va 2 s . c om*/ int i = 0; for (ContentStream stream : req.getContentStreams()) { String contentType = stream.getContentType(); //String name = stream.getName(); byte[] partBytes = null; try { partBytes = IOUtils.toByteArray(stream.getStream(), stream.getSize()); } catch (IOException e) { throw new LuxException(e); } String charset = ContentStreamBase.getCharsetFromContentType(contentType); if (charset == null) { charset = "utf-8"; } if (!isText(contentType)) { logger.warn("Binary values not supported; treating " + contentType + " as xml, or text"); } XdmItem part = null; if (isXML(contentType) || !isText(contentType)) { try { part = evaluator.build(new ByteArrayInputStream(partBytes), "#part" + i); } catch (LuxException e) { // failed to parse logger.warn("Caught an exception while parsing XML: " + e.getMessage() + ", treating it as plain text"); contentType = "text/plain; charset=" + charset; } } if (part == null) { String text; try { text = new String(partBytes, charset); } catch (UnsupportedEncodingException e1) { throw new LuxException(e1); } if (isHTML(contentType)) { HtmlParser parser = new HtmlParser(); //Parser parser = new Parser(); SAXSource source = new SAXSource(parser, new InputSource(new StringReader(text))); try { part = evaluator.getDocBuilder().build(source); } catch (SaxonApiException e) { e.printStackTrace(); logger.warn("failed to parse HTML; treating as plain text: " + e.getMessage()); } } if (part == null) { TextFragmentValue node = new TextFragmentValue(text, "#part" + i); node.setConfiguration(builder.getConfiguration()); part = new XdmNode(node); } } result.add(part); builder.startElement(fQNameFor("http", EXPATH_HTTP_NS, "body"), BuiltInAtomicType.UNTYPED_ATOMIC, 0, 0); addAttribute(builder, "position", "1"); addAttribute(builder, "content-type", contentType); builder.startContent(); builder.endElement(); } }
From source file:org.codeexample.jeffery.solr.ThreadedUpdateRequestHandler.java
License:Apache License
private void handleReqStream(final SolrQueryRequest req, List<ContentStream> streams) { Iterable<ContentStream> iterabler = req.getContentStreams(); if (iterabler != null) { Iterator<ContentStream> iterator = iterabler.iterator(); while (iterator.hasNext()) { streams.add(iterator.next()); iterator.remove();/* ww w . ja v a 2 s . c om*/ } } }
From source file:org.dfdeshom.solr.mlt.MoreLikeThisHandler.java
License:Apache License
@Override public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { SolrParams params = req.getParams(); // Set field flags ReturnFields returnFields = new SolrReturnFields(req); rsp.setReturnFields(returnFields);/*from w w w . j a va2 s. c o m*/ int flags = 0; if (returnFields.wantsScore()) { flags |= SolrIndexSearcher.GET_SCORES; } String defType = params.get(QueryParsing.DEFTYPE, QParserPlugin.DEFAULT_QTYPE); String q = params.get(CommonParams.Q); Query query = null; SortSpec sortSpec = null; List<Query> filters = null; QParser parser = null; try { if (q != null) { parser = QParser.getParser(q, defType, req); query = parser.getQuery(); sortSpec = parser.getSort(true); } String[] fqs = req.getParams().getParams(CommonParams.FQ); if (fqs != null && fqs.length != 0) { filters = new ArrayList<Query>(); for (String fq : fqs) { if (fq != null && fq.trim().length() != 0) { QParser fqp = QParser.getParser(fq, null, req); filters.add(fqp.getQuery()); } } } } catch (SyntaxError e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); } SolrIndexSearcher searcher = req.getSearcher(); MoreLikeThisHelper mlt = new MoreLikeThisHelper(params, searcher); // Hold on to the interesting terms if relevant TermStyle termStyle = TermStyle.get(params.get(MoreLikeThisParams.INTERESTING_TERMS)); List<InterestingTerm> interesting = (termStyle == TermStyle.NONE) ? null : new ArrayList<InterestingTerm>(mlt.mlt.getMaxQueryTerms()); DocListAndSet mltDocs = null; // Parse Required Params // This will either have a single Reader or valid query Reader reader = null; try { if (q == null || q.trim().length() < 1) { Iterable<ContentStream> streams = req.getContentStreams(); if (streams != null) { Iterator<ContentStream> iter = streams.iterator(); if (iter.hasNext()) { reader = iter.next().getReader(); } if (iter.hasNext()) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "MoreLikeThis does not support multiple ContentStreams"); } } } int start = params.getInt(CommonParams.START, 0); int rows = params.getInt(CommonParams.ROWS, 10); // Find documents MoreLikeThis - either with a reader or a query // -------------------------------------------------------------------------------- if (reader != null) { mltDocs = mlt.getMoreLikeThis(reader, sortSpec.getSort(), start, rows, filters, interesting, flags); } else if (q != null) { // Matching options boolean includeMatch = params.getBool(MoreLikeThisParams.MATCH_INCLUDE, true); int matchOffset = params.getInt(MoreLikeThisParams.MATCH_OFFSET, 0); // Find the base match DocList match = searcher.getDocList(query, null, null, matchOffset, 1, flags); // only get the first one... if (includeMatch) { rsp.add("match", match); } // This is an iterator, but we only handle the first match DocIterator iterator = match.iterator(); if (iterator.hasNext()) { // do a MoreLikeThis query for each document in results int id = iterator.nextDoc(); mltDocs = mlt.getMoreLikeThis(parser, id, sortSpec.getSort(), start, rows, filters, interesting, flags); } } else { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "MoreLikeThis requires either a query (?q=) or text to find similar documents."); } } finally { if (reader != null) { reader.close(); } } if (mltDocs == null) { mltDocs = new DocListAndSet(); // avoid NPE } rsp.add("response", mltDocs.docList); if (interesting != null) { if (termStyle == TermStyle.DETAILS) { NamedList<Float> it = new NamedList<Float>(); for (InterestingTerm t : interesting) { it.add(t.term.toString(), t.boost); } rsp.add("interestingTerms", it); } else { List<String> it = new ArrayList<String>(interesting.size()); for (InterestingTerm t : interesting) { it.add(t.term.text()); } rsp.add("interestingTerms", it); } } // maybe facet the results if (params.getBool(FacetParams.FACET, false)) { if (mltDocs.docSet == null) { rsp.add("facet_counts", null); } else { SimpleFacets f = new SimpleFacets(req, mltDocs.docSet, params); rsp.add("facet_counts", f.getFacetCounts()); } } boolean dbg = req.getParams().getBool(CommonParams.DEBUG_QUERY, false); boolean dbgQuery = false, dbgResults = false; if (dbg == false) {//if it's true, we are doing everything anyway. String[] dbgParams = req.getParams().getParams(CommonParams.DEBUG); if (dbgParams != null) { for (int i = 0; i < dbgParams.length; i++) { if (dbgParams[i].equals(CommonParams.QUERY)) { dbgQuery = true; } else if (dbgParams[i].equals(CommonParams.RESULTS)) { dbgResults = true; } } } } else { dbgQuery = true; dbgResults = true; } // Copied from StandardRequestHandler... perhaps it should be added to doStandardDebug? if (dbg == true) { try { NamedList<Object> dbgInfo = SolrPluginUtils.doStandardDebug(req, q, mlt.getRawMLTQuery(), mltDocs.docList, dbgQuery, dbgResults); if (null != dbgInfo) { if (null != filters) { dbgInfo.add("filter_queries", req.getParams().getParams(CommonParams.FQ)); List<String> fqs = new ArrayList<String>(filters.size()); for (Query fq : filters) { fqs.add(QueryParsing.toString(fq, req.getSchema())); } dbgInfo.add("parsed_filter_queries", fqs); } rsp.add("debug", dbgInfo); } } catch (Exception e) { SolrException.log(SolrCore.log, "Exception during debug", e); rsp.add("exception_during_debug", SolrException.toStr(e)); } } }
From source file:org.dice.solrenhancements.morelikethis.DiceMoreLikeThisHandler.java
License:Apache License
private Reader getContentStreamReader(SolrQueryRequest req, Reader reader) throws IOException { Iterable<ContentStream> streams = req.getContentStreams(); if (streams != null) { Iterator<ContentStream> iter = streams.iterator(); if (iter.hasNext()) { reader = iter.next().getReader(); }// w w w . j a va 2s. com if (iter.hasNext()) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "MoreLikeThis does not support multiple ContentStreams"); } } return reader; }
From source file:org.opensextant.solrtexttagger.TaggerRequestHandler.java
License:Open Source License
@Override public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { setTopInitArgsAsInvariants(req);/*from w w w . ja v a 2 s.co m*/ //--Read params final String indexedField = req.getParams().get("field"); if (indexedField == null) throw new RuntimeException("required param 'field'"); final TagClusterReducer tagClusterReducer = chooseTagClusterReducer(req.getParams().get(OVERLAPS)); final int rows = req.getParams().getInt(CommonParams.ROWS, 10000); final int tagsLimit = req.getParams().getInt(TAGS_LIMIT, 1000); final boolean addMatchText = req.getParams().getBool(MATCH_TEXT, false); final SchemaField idSchemaField = req.getSchema().getUniqueKeyField(); if (idSchemaField == null) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "The tagger requires a" + "uniqueKey in the schema.");//TODO this could be relaxes } final boolean skipAltTokens = req.getParams().getBool(SKIP_ALT_TOKENS, false); final boolean ignoreStopWords = req.getParams().getBool(IGNORE_STOPWORDS, fieldHasIndexedStopFilter(indexedField, req)); final boolean htmlOffsetAdjust = req.getParams().getBool(HTML_OFFSET_ADJUST, false); final boolean xmlOffsetAdjust = req.getParams().getBool(XML_OFFSET_ADJUST, false); final String nonTaggableTags = req.getParams().get(NON_TAGGABLE_TAGS); final String textToTag = req.getParams().get(TEXT_TO_TAG); //--Get posted data Reader inputReader = null; Iterable<ContentStream> streams = req.getContentStreams(); if (streams != null) { Iterator<ContentStream> iter = streams.iterator(); if (iter.hasNext()) { inputReader = iter.next().getReader(); } if (iter.hasNext()) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, getClass().getSimpleName() + " does not support multiple ContentStreams"); } } if (inputReader == null) { if (textToTag != null) { inputReader = new StringReader(textToTag); } else { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, getClass().getSimpleName() + " requires text to be POSTed to it"); } } final String inputString;//only populated if needed if (addMatchText || xmlOffsetAdjust || htmlOffsetAdjust) { //Read the input fully into a String buffer that we'll need later, // then replace the input with a reader wrapping the buffer. inputString = CharStreams.toString(inputReader); inputReader.close(); inputReader = new StringReader(inputString); } else { inputString = null;//not used } final OffsetCorrector offsetCorrector = initOffsetCorrector(htmlOffsetAdjust, xmlOffsetAdjust, inputString, nonTaggableTags); final SolrIndexSearcher searcher = req.getSearcher(); final FixedBitSet matchDocIdsBS = new FixedBitSet(searcher.maxDoc()); final List tags = new ArrayList(2000); try { Analyzer analyzer = req.getSchema().getField(indexedField).getType().getQueryAnalyzer(); try (TokenStream tokenStream = analyzer.tokenStream("", inputReader)) { Terms terms = searcher.getSlowAtomicReader().terms(indexedField); if (terms == null) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "field " + indexedField + " has no indexed data"); Tagger tagger = new Tagger(terms, computeDocCorpus(req), tokenStream, tagClusterReducer, skipAltTokens, ignoreStopWords) { @SuppressWarnings("unchecked") @Override protected void tagCallback(int startOffset, int endOffset, Object docIdsKey) { if (tags.size() >= tagsLimit) return; if (offsetCorrector != null) { int[] offsetPair = offsetCorrector.correctPair(startOffset, endOffset); if (offsetPair == null) { log.debug("Discarded offsets [{}, {}] because couldn't balance XML.", startOffset, endOffset); return; } startOffset = offsetPair[0]; endOffset = offsetPair[1]; } NamedList tag = new NamedList(); tag.add("startOffset", startOffset); tag.add("endOffset", endOffset); if (addMatchText) tag.add("matchText", inputString.substring(startOffset, endOffset)); //below caches, and also flags matchDocIdsBS tag.add("ids", lookupSchemaDocIds(docIdsKey)); tags.add(tag); } Map<Object, List> docIdsListCache = new HashMap<>(2000); ValueSourceAccessor uniqueKeyCache = new ValueSourceAccessor(searcher, idSchemaField.getType().getValueSource(idSchemaField, null)); @SuppressWarnings("unchecked") private List lookupSchemaDocIds(Object docIdsKey) { List schemaDocIds = docIdsListCache.get(docIdsKey); if (schemaDocIds != null) return schemaDocIds; IntsRef docIds = lookupDocIds(docIdsKey); //translate lucene docIds to schema ids schemaDocIds = new ArrayList(docIds.length); for (int i = docIds.offset; i < docIds.offset + docIds.length; i++) { int docId = docIds.ints[i]; matchDocIdsBS.set(docId);//also, flip docid in bitset schemaDocIds.add(uniqueKeyCache.objectVal(docId));//translates here } assert !schemaDocIds.isEmpty(); docIdsListCache.put(docIds, schemaDocIds); return schemaDocIds; } }; tagger.enableDocIdsCache(2000);//TODO configurable tagger.process(); } } finally { inputReader.close(); } rsp.add("tagsCount", tags.size()); rsp.add("tags", tags); rsp.setReturnFields(new SolrReturnFields(req)); //Solr's standard name for matching docs in response rsp.add("response", getDocList(rows, matchDocIdsBS)); }