Example usage for org.apache.solr.common.params CommonParams ROWS

Introduction

In this page you can find the example usage for org.apache.solr.common.params CommonParams ROWS.

Prototype

String ROWS

To view the source code for org.apache.solr.common.params CommonParams ROWS.

Click Source Link

Document

number of documents to return starting at "start"

Usage

From source file:org.mitre.opensextant.extraction.PlacenameMatcher.java

License:Apache License

/**
 *///from   w  w  w.  j a  v  a2 s  .c  om
protected static void initialize() throws IOException {

    if (solr != null) {
        return;
    }

    // NOTE: This is set via opensextant.apps.Config or by some other means
    // But it is required to intialize.  "gazetteer" is the core name of interest.
    // Being explicit here about the core name allows integrator to field multiple cores 
    // in the same gazetteer.  
    // 
    String config_solr_home = System.getProperty("solr.solr.home");
    solr = new SolrProxy(config_solr_home, "gazetteer");

    ModifiableSolrParams _params = new ModifiableSolrParams();
    _params.set(CommonParams.QT, requestHandler);
    //request all fields in the Solr index
    // Do we need specific fields or *?  If faster use specific fields. TODO.
    //_params.set(CommonParams.FL, "*,score");
    // Note -- removed score for now, as we have not evaluated how score could be used in this sense.
    // Score depends on FST creation and other factors.
    // 
    // TODO: verify that all the right metadata is being retrieved here
    _params.set(CommonParams.FL,
            "id,name,cc,adm1,adm2,feat_class,feat_code,lat,lon,place_id,name_bias,id_bias,name_type");

    _params.set("tagsLimit", 100000);
    _params.set(CommonParams.ROWS, 100000);
    _params.set("subTags", false);
    _params.set("matchText", false);//we've got the input doc as a string instead

    /* Possible overlaps: ALL, NO_SUB, LONGEST_DOMINANT_RIGHT
     * See Solr Text Tagger documentation for details. 
     */
    _params.set("overlaps", "LONGEST_DOMINANT_RIGHT");
    //_params.set("overlaps", "NO_SUB");

    params = _params;
}

From source file:org.nlp.solr.handler.component.PagerComponent.java

License:GNU General Public License

@Override
public void process(ResponseBuilder rb) throws IOException {
    /* get request params */
    SolrParams par = rb.req.getParams();
    int rows = par.getInt(CommonParams.ROWS, 0);
    int start = par.getInt(CommonParams.START, 0);
    int pages = par.getInt(PARAM_PAGER, 0);
    int pages_pre = par.getInt(PARAM_PAGER_PRE, 2);

    /* neet to work ? */
    if (pages == 0 || rows == 0 || rb == null || rb.getResults() == null)
        return;// w  w w . j  a  va 2  s  .  co m

    /* select result list */
    int doc_count = 0;

    if (rb.getResults().docSet != null)
        doc_count = rb.getResults().docSet.size();
    else
        return;

    /* pager list */
    NamedList<Object> lst = new SimpleOrderedMap<Object>();
    NamedList<Object> lst2 = new SimpleOrderedMap<Object>();

    /* paging pages */
    int page_count = doc_count / rows;
    int page_actual = start / rows;
    int page_pre = pages_pre;
    int page_post = pages - page_pre - 1;

    /* last page */
    if (doc_count % rows != 0)
        page_count++;

    /* page range */
    if (page_actual - page_pre < 0) {
        page_post += -(page_actual - page_pre);
        page_pre -= -(page_actual - page_pre);
    } else if (page_actual + page_post > page_count) {
        page_post = pages - page_pre;
        page_pre = page_actual + pages - page_count;
    }

    /* sanity */
    if (page_pre < 0)
        page_pre = 0;
    if (page_post < 0)
        page_post = 0;

    /* next pages list */
    int i = (page_actual - page_pre);
    for (i = (i <= 0 ? 0 : i); i < page_count && i <= (page_actual + page_post); i++)
        lst2.add(Integer.toString(i + 1), i * rows);
    lst.add("pages", lst2);

    /* navi */
    if (page_actual > 0)
        lst.add("prev", (page_actual - 1) * rows);
    if (page_actual - page_pre > 0)
        lst.add("first", 0);
    if (page_actual < (page_count - 1))
        lst.add("next", (page_actual + 1) * rows);
    if (page_actual + page_post < (page_count - 1))
        lst.add("last", (page_count - 1) * rows);
    lst.add("actual", page_actual + 1);
    lst.add("count", page_count);

    /* finish */
    rb.rsp.add("pager", lst);
}

From source file:org.opencms.search.solr.CmsSolrIndex.java

License:Open Source License

/**
 * Performs the actual search.<p>/*  w w w  .  j  av a  2s .c om*/
 *
 * @param cms the current OpenCms context
 * @param ignoreMaxRows <code>true</code> to return all all requested rows, <code>false</code> to use max rows
 * @param query the OpenCms Solr query
 * @param response the servlet response to write the query result to, may also be <code>null</code>
 * @param ignoreSearchExclude if set to false, only contents with search_exclude unset or "false" will be found - typical for the the non-gallery case
 * @param filter the resource filter to use
 *
 * @return the found documents
 *
 * @throws CmsSearchException if something goes wrong
 *
 * @see #search(CmsObject, CmsSolrQuery, boolean)
 */
@SuppressWarnings("unchecked")
public CmsSolrResultList search(CmsObject cms, final CmsSolrQuery query, boolean ignoreMaxRows,
        ServletResponse response, boolean ignoreSearchExclude, CmsResourceFilter filter)
        throws CmsSearchException {

    // check if the user is allowed to access this index
    checkOfflineAccess(cms);
    if (!ignoreSearchExclude) {
        query.addFilterQuery(CmsSearchField.FIELD_SEARCH_EXCLUDE + ":\"false\"");
    }

    int previousPriority = Thread.currentThread().getPriority();
    long startTime = System.currentTimeMillis();

    // remember the initial query
    SolrQuery initQuery = query.clone();

    query.setHighlight(false);
    LocalSolrQueryRequest solrQueryRequest = null;
    try {

        // initialize the search context
        CmsObject searchCms = OpenCms.initCmsObject(cms);

        // change thread priority in order to reduce search impact on overall system performance
        if (getPriority() > 0) {
            Thread.currentThread().setPriority(getPriority());
        }

        // the lists storing the found documents that will be returned
        List<CmsSearchResource> resourceDocumentList = new ArrayList<CmsSearchResource>();
        SolrDocumentList solrDocumentList = new SolrDocumentList();

        // Initialize rows, offset, end and the current page.
        int rows = query.getRows() != null ? query.getRows().intValue() : CmsSolrQuery.DEFAULT_ROWS.intValue();
        if (!ignoreMaxRows && (rows > ROWS_MAX)) {
            rows = ROWS_MAX;
        }
        int start = query.getStart() != null ? query.getStart().intValue() : 0;
        int end = start + rows;
        int page = 0;
        if (rows > 0) {
            page = Math.round(start / rows) + 1;
        }

        // set the start to '0' and expand the rows before performing the query
        query.setStart(new Integer(0));
        query.setRows(new Integer((5 * rows * page) + start));

        // perform the Solr query and remember the original Solr response
        QueryResponse queryResponse = m_solr.query(query);
        long solrTime = System.currentTimeMillis() - startTime;

        // initialize the counts
        long hitCount = queryResponse.getResults().getNumFound();
        start = -1;
        end = -1;
        if ((rows > 0) && (page > 0) && (hitCount > 0)) {
            // calculate the final size of the search result
            start = rows * (page - 1);
            end = start + rows;
            // ensure that both i and n are inside the range of foundDocuments.size()
            start = new Long((start > hitCount) ? hitCount : start).intValue();
            end = new Long((end > hitCount) ? hitCount : end).intValue();
        } else {
            // return all found documents in the search result
            start = 0;
            end = new Long(hitCount).intValue();
        }
        long visibleHitCount = hitCount;
        float maxScore = 0;

        // If we're using a postprocessor, (re-)initialize it before using it
        if (m_postProcessor != null) {
            m_postProcessor.init();
        }

        // process found documents
        List<CmsSearchResource> allDocs = new ArrayList<CmsSearchResource>();
        int cnt = 0;
        for (int i = 0; (i < queryResponse.getResults().size()) && (cnt < end); i++) {
            try {
                SolrDocument doc = queryResponse.getResults().get(i);
                CmsSolrDocument searchDoc = new CmsSolrDocument(doc);
                if (needsPermissionCheck(searchDoc)) {
                    // only if the document is an OpenCms internal resource perform the permission check
                    CmsResource resource = filter == null ? getResource(searchCms, searchDoc)
                            : getResource(searchCms, searchDoc, filter);
                    if (resource != null) {
                        // permission check performed successfully: the user has read permissions!
                        if (cnt >= start) {
                            if (m_postProcessor != null) {
                                doc = m_postProcessor.process(searchCms, resource,
                                        (SolrInputDocument) searchDoc.getDocument());
                            }
                            resourceDocumentList.add(new CmsSearchResource(resource, searchDoc));
                            if (null != doc) {
                                solrDocumentList.add(doc);
                            }
                            maxScore = maxScore < searchDoc.getScore() ? searchDoc.getScore() : maxScore;
                        }
                        allDocs.add(new CmsSearchResource(resource, searchDoc));
                        cnt++;
                    } else {
                        visibleHitCount--;
                    }
                } else {
                    // if permission check is not required for this index,
                    // add a pseudo resource together with document to the results
                    resourceDocumentList.add(new CmsSearchResource(PSEUDO_RES, searchDoc));
                    solrDocumentList.add(doc);
                    maxScore = maxScore < searchDoc.getScore() ? searchDoc.getScore() : maxScore;
                    cnt++;
                }
            } catch (Exception e) {
                // should not happen, but if it does we want to go on with the next result nevertheless
                LOG.warn(Messages.get().getBundle().key(Messages.LOG_SOLR_ERR_RESULT_ITERATION_FAILED_0), e);
            }
        }
        // the last documents were all secret so let's take the last found docs
        if (resourceDocumentList.isEmpty() && (allDocs.size() > 0)) {
            page = Math.round(allDocs.size() / rows) + 1;
            int showCount = allDocs.size() % rows;
            showCount = showCount == 0 ? rows : showCount;
            start = allDocs.size() - new Long(showCount).intValue();
            end = allDocs.size();
            if (allDocs.size() > start) {
                resourceDocumentList = allDocs.subList(start, end);
                for (CmsSearchResource r : resourceDocumentList) {
                    maxScore = maxScore < r.getDocument().getScore() ? r.getDocument().getScore() : maxScore;
                    solrDocumentList.add(((CmsSolrDocument) r.getDocument()).getSolrDocument());
                }
            }
        }
        long processTime = System.currentTimeMillis() - startTime - solrTime;

        // create and return the result
        solrDocumentList.setStart(start);
        solrDocumentList.setMaxScore(new Float(maxScore));
        solrDocumentList.setNumFound(visibleHitCount);

        queryResponse.getResponse().setVal(queryResponse.getResponse().indexOf(QUERY_RESPONSE_NAME, 0),
                solrDocumentList);

        queryResponse.getResponseHeader().setVal(queryResponse.getResponseHeader().indexOf(QUERY_TIME_NAME, 0),
                new Integer(new Long(System.currentTimeMillis() - startTime).intValue()));
        long highlightEndTime = System.currentTimeMillis();
        SolrCore core = m_solr instanceof EmbeddedSolrServer
                ? ((EmbeddedSolrServer) m_solr).getCoreContainer().getCore(getCoreName())
                : null;
        CmsSolrResultList result = null;
        try {
            SearchComponent highlightComponenet = null;
            if (core != null) {
                highlightComponenet = core.getSearchComponent("highlight");
                solrQueryRequest = new LocalSolrQueryRequest(core, queryResponse.getResponseHeader());
            }
            SolrQueryResponse solrQueryResponse = null;
            if (solrQueryRequest != null) {
                // create and initialize the solr response
                solrQueryResponse = new SolrQueryResponse();
                solrQueryResponse.setAllValues(queryResponse.getResponse());
                int paramsIndex = queryResponse.getResponseHeader().indexOf(HEADER_PARAMS_NAME, 0);
                NamedList<Object> header = null;
                Object o = queryResponse.getResponseHeader().getVal(paramsIndex);
                if (o instanceof NamedList) {
                    header = (NamedList<Object>) o;
                    header.setVal(header.indexOf(CommonParams.ROWS, 0), new Integer(rows));
                    header.setVal(header.indexOf(CommonParams.START, 0), new Long(start));
                }

                // set the OpenCms Solr query as parameters to the request
                solrQueryRequest.setParams(initQuery);

                // perform the highlighting
                if ((header != null) && (initQuery.getHighlight()) && (highlightComponenet != null)) {
                    header.add(HighlightParams.HIGHLIGHT, "on");
                    if ((initQuery.getHighlightFields() != null)
                            && (initQuery.getHighlightFields().length > 0)) {
                        header.add(HighlightParams.FIELDS,
                                CmsStringUtil.arrayAsString(initQuery.getHighlightFields(), ","));
                    }
                    String formatter = initQuery.getParams(HighlightParams.FORMATTER) != null
                            ? initQuery.getParams(HighlightParams.FORMATTER)[0]
                            : null;
                    if (formatter != null) {
                        header.add(HighlightParams.FORMATTER, formatter);
                    }
                    if (initQuery.getHighlightFragsize() != 100) {
                        header.add(HighlightParams.FRAGSIZE, new Integer(initQuery.getHighlightFragsize()));
                    }
                    if (initQuery.getHighlightRequireFieldMatch()) {
                        header.add(HighlightParams.FIELD_MATCH,
                                new Boolean(initQuery.getHighlightRequireFieldMatch()));
                    }
                    if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(initQuery.getHighlightSimplePost())) {
                        header.add(HighlightParams.SIMPLE_POST, initQuery.getHighlightSimplePost());
                    }
                    if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(initQuery.getHighlightSimplePre())) {
                        header.add(HighlightParams.SIMPLE_PRE, initQuery.getHighlightSimplePre());
                    }
                    if (initQuery.getHighlightSnippets() != 1) {
                        header.add(HighlightParams.SNIPPETS, new Integer(initQuery.getHighlightSnippets()));
                    }
                    ResponseBuilder rb = new ResponseBuilder(solrQueryRequest, solrQueryResponse,
                            Collections.singletonList(highlightComponenet));
                    try {
                        rb.doHighlights = true;
                        DocListAndSet res = new DocListAndSet();
                        SchemaField idField = OpenCms.getSearchManager().getSolrServerConfiguration()
                                .getSolrSchema().getUniqueKeyField();

                        int[] luceneIds = new int[rows];
                        int docs = 0;
                        for (SolrDocument doc : solrDocumentList) {
                            String idString = (String) doc.getFirstValue(CmsSearchField.FIELD_ID);
                            int id = solrQueryRequest.getSearcher().getFirstMatch(
                                    new Term(idField.getName(), idField.getType().toInternal(idString)));
                            luceneIds[docs++] = id;
                        }
                        res.docList = new DocSlice(0, docs, luceneIds, null, docs, 0);
                        rb.setResults(res);
                        rb.setQuery(QParser.getParser(initQuery.getQuery(), null, solrQueryRequest).getQuery());
                        rb.setQueryString(initQuery.getQuery());
                        highlightComponenet.prepare(rb);
                        highlightComponenet.process(rb);
                        highlightComponenet.finishStage(rb);
                    } catch (Exception e) {
                        LOG.error(e.getMessage() + " in query: " + initQuery, new Exception(e));
                    }

                    // Make highlighting also available via the CmsSolrResultList
                    queryResponse.setResponse(solrQueryResponse.getValues());

                    highlightEndTime = System.currentTimeMillis();
                }
            }

            result = new CmsSolrResultList(initQuery, queryResponse, solrDocumentList, resourceDocumentList,
                    start, new Integer(rows), end, page, visibleHitCount, new Float(maxScore), startTime,
                    highlightEndTime);
            if (LOG.isDebugEnabled()) {
                Object[] logParams = new Object[] { new Long(System.currentTimeMillis() - startTime),
                        new Long(result.getNumFound()), new Long(solrTime), new Long(processTime),
                        new Long(result.getHighlightEndTime() != 0 ? result.getHighlightEndTime() - startTime
                                : 0) };
                LOG.debug(query.toString() + "\n"
                        + Messages.get().getBundle().key(Messages.LOG_SOLR_SEARCH_EXECUTED_5, logParams));
            }
            if (response != null) {
                writeResp(response, solrQueryRequest, solrQueryResponse);
            }
        } finally {
            if (solrQueryRequest != null) {
                solrQueryRequest.close();
            }
            if (core != null) {
                core.close();
            }
        }
        return result;
    } catch (Exception e) {
        throw new CmsSearchException(Messages.get().container(Messages.LOG_SOLR_ERR_SEARCH_EXECUTION_FAILD_1,
                CmsEncoder.decode(query.toString()), e), e);
    } finally {
        if (solrQueryRequest != null) {
            solrQueryRequest.close();
        }
        // re-set thread to previous priority
        Thread.currentThread().setPriority(previousPriority);
    }

}

From source file:org.opencommercesearch.lucene.queries.function.valuesource.BoostValueSourceParser.java

License:Apache License

@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
    String field = fp.parseArg();

    SolrParams params = fp.getReq().getParams();
    String boostId = params.get(BOOST_ID);
    String treatmentId = params.get(TREATMENT_ID);
    if (StringUtils.isNotBlank(treatmentId)) {
        if (log.isDebugEnabled()) {
            log.debug("There is treatment enabled:" + treatmentId);
        }/* w w w  . j  a  v a2  s.c om*/
        boostId += "_" + treatmentId;
    }

    SchemaField f = fp.getReq().getSchema().getField(field);
    ValueSource fieldValueSource = f.getType().getValueSource(f, fp);
    Map<String, Float> boosts = Collections.emptyMap();

    if (StringUtils.isBlank(boostId)) {
        if (log.isDebugEnabled()) {
            log.debug("Missing required 'boostId', skipping boosts");
        }
        return new BoostValueSource(field, fieldValueSource, boosts);
    }

    int queryRows = NumberUtils.toInt(params.get(CommonParams.ROWS), 0);

    if (queryRows == 0) {
        if (log.isDebugEnabled()) {
            log.debug("Zero rows specified for this query, boosts not needed");
        }
        return new BoostValueSource(field, fieldValueSource, boosts);
    }

    @SuppressWarnings("unchecked")
    SolrCache<String, Map<String, Float>> cache = (SolrCache<String, Map<String, Float>>) fp.getReq()
            .getSearcher().getCache("boostCache");
    return new BoostValueSource(field, fieldValueSource, loadBoosts(boostId, cache));
}

From source file:org.opencommercesearch.lucene.queries.function.valuesource.BoostValueSourceParserTest.java

License:Apache License

@Before
public void setup() throws Exception {
    initMocks(this);

    schemaField = new SchemaField("productId", fieldType);

    vsp.defaultClient = httpClient;/*from www .  java  2s .co m*/

    when(fp.getReq()).thenReturn(request);
    when(fp.parseArg()).thenReturn("productId");
    when(request.getParams()).thenReturn(params);
    when(params.get(RuleManagerParams.CATALOG_ID)).thenReturn("myCatalog");
    when(params.get(BOOST_ID)).thenReturn(boostId);
    when(params.get(CommonParams.ROWS)).thenReturn("10");
    when(request.getSchema()).thenReturn(schema);
    when(schema.getField("productId")).thenReturn(schemaField);
    when(fieldType.getValueSource(schemaField, fp)).thenReturn(productIdValueSource);
    when(request.getSearcher()).thenReturn(searcher);
    when(searcher.getCache("boostCache")).thenReturn(boostCache);

    // product id function values
    when(productIdValueSource.getValues(any(Map.class), any(AtomicReaderContext.class)))
            .thenReturn(productIdFunctionValues);
    for (int i = 0; i <= 10; i++) {
        when(productIdFunctionValues.strVal(i)).thenReturn("prod" + i);
    }
}

From source file:org.opensextant.extractors.geo.GazetteerMatcher.java

License:Apache License

@Override
public void initialize() throws ConfigException {

    super.initialize();

    /*//w w w  .  jav  a  2s  .co  m
     * Setup matcher params.
     */
    params.set(CommonParams.FL,
            "id,name,cc,adm1,adm2,feat_class,feat_code,geo,place_id,name_bias,id_bias,name_type");
    params.set("tagsLimit", 100000);
    params.set(CommonParams.ROWS, 100000);
    params.set("subTags", false);

    // we've got the input doc as a string instead; matchText=false means
    // the tagger will not report the text, just span offsets.
    params.set("matchText", false);

    /*
     * Possible overlaps: ALL, NO_SUB, LONGEST_DOMINANT_RIGHT See Solr Text
     * Tagger documentation for details.
     */
    params.set("overlaps", "LONGEST_DOMINANT_RIGHT");
    // params.set("overlaps", "NO_SUB");

    gazetteer = new SolrGazetteer(this.solr);
}

From source file:org.opensextant.extractors.geo.SolrGazetteer.java

License:Apache License

/**
 * For larger areas choose a higher number of Rows to return. If you choose
 * to use Solr spatial score-by-distance for sorting or anything, then Solr
 * appears to want to load entire index into memory. So this sort mechanism
 * is off by default.//from ww  w.j  a v  a  2  s  . co  m
 * 
 * @param rows
 *            rows to include in spatial lookups
 * @return solr params
 */
protected static ModifiableSolrParams createGeodeticLookupParams(int rows) {
    /*
     * Basic parameters for geospatial lookup. These are reused, and only pt
     * and d are set for each lookup.
     *
     */
    ModifiableSolrParams p = new ModifiableSolrParams();
    p.set(CommonParams.FL,
            "id,name,cc,adm1,adm2,feat_class,feat_code," + "geo,place_id,name_bias,id_bias,name_type");
    p.set(CommonParams.ROWS, rows);
    p.set(CommonParams.Q, "{!geofilt sfield=geo}");
    // p.set(CommonParams.SORT, "score desc");
    p.set("spatial", "true");

    return p;
}

From source file:org.opensextant.solrtexttagger.TaggerRequestHandler.java

License:Open Source License

@Override
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
    setTopInitArgsAsInvariants(req);//from  w ww . j a v  a  2s  . c o  m

    //--Read params
    final String indexedField = req.getParams().get("field");
    if (indexedField == null)
        throw new RuntimeException("required param 'field'");

    final TagClusterReducer tagClusterReducer = chooseTagClusterReducer(req.getParams().get(OVERLAPS));
    final int rows = req.getParams().getInt(CommonParams.ROWS, 10000);
    final int tagsLimit = req.getParams().getInt(TAGS_LIMIT, 1000);
    final boolean addMatchText = req.getParams().getBool(MATCH_TEXT, false);
    final SchemaField idSchemaField = req.getSchema().getUniqueKeyField();
    if (idSchemaField == null) {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                "The tagger requires a" + "uniqueKey in the schema.");//TODO this could be relaxes
    }
    final boolean skipAltTokens = req.getParams().getBool(SKIP_ALT_TOKENS, false);
    final boolean ignoreStopWords = req.getParams().getBool(IGNORE_STOPWORDS,
            fieldHasIndexedStopFilter(indexedField, req));
    final boolean htmlOffsetAdjust = req.getParams().getBool(HTML_OFFSET_ADJUST, false);
    final boolean xmlOffsetAdjust = req.getParams().getBool(XML_OFFSET_ADJUST, false);
    final String nonTaggableTags = req.getParams().get(NON_TAGGABLE_TAGS);
    final String textToTag = req.getParams().get(TEXT_TO_TAG);

    //--Get posted data
    Reader inputReader = null;
    Iterable<ContentStream> streams = req.getContentStreams();
    if (streams != null) {
        Iterator<ContentStream> iter = streams.iterator();
        if (iter.hasNext()) {
            inputReader = iter.next().getReader();
        }
        if (iter.hasNext()) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                    getClass().getSimpleName() + " does not support multiple ContentStreams");
        }
    }
    if (inputReader == null) {
        if (textToTag != null) {
            inputReader = new StringReader(textToTag);
        } else {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                    getClass().getSimpleName() + " requires text to be POSTed to it");
        }
    }
    final String inputString;//only populated if needed
    if (addMatchText || xmlOffsetAdjust || htmlOffsetAdjust) {
        //Read the input fully into a String buffer that we'll need later,
        // then replace the input with a reader wrapping the buffer.
        inputString = CharStreams.toString(inputReader);
        inputReader.close();
        inputReader = new StringReader(inputString);
    } else {
        inputString = null;//not used
    }

    final OffsetCorrector offsetCorrector = initOffsetCorrector(htmlOffsetAdjust, xmlOffsetAdjust, inputString,
            nonTaggableTags);
    final SolrIndexSearcher searcher = req.getSearcher();
    final FixedBitSet matchDocIdsBS = new FixedBitSet(searcher.maxDoc());
    final List tags = new ArrayList(2000);

    try {
        Analyzer analyzer = req.getSchema().getField(indexedField).getType().getQueryAnalyzer();
        try (TokenStream tokenStream = analyzer.tokenStream("", inputReader)) {
            Terms terms = searcher.getSlowAtomicReader().terms(indexedField);
            if (terms == null)
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                        "field " + indexedField + " has no indexed data");
            Tagger tagger = new Tagger(terms, computeDocCorpus(req), tokenStream, tagClusterReducer,
                    skipAltTokens, ignoreStopWords) {
                @SuppressWarnings("unchecked")
                @Override
                protected void tagCallback(int startOffset, int endOffset, Object docIdsKey) {
                    if (tags.size() >= tagsLimit)
                        return;
                    if (offsetCorrector != null) {
                        int[] offsetPair = offsetCorrector.correctPair(startOffset, endOffset);
                        if (offsetPair == null) {
                            log.debug("Discarded offsets [{}, {}] because couldn't balance XML.", startOffset,
                                    endOffset);
                            return;
                        }
                        startOffset = offsetPair[0];
                        endOffset = offsetPair[1];
                    }

                    NamedList tag = new NamedList();
                    tag.add("startOffset", startOffset);
                    tag.add("endOffset", endOffset);
                    if (addMatchText)
                        tag.add("matchText", inputString.substring(startOffset, endOffset));
                    //below caches, and also flags matchDocIdsBS
                    tag.add("ids", lookupSchemaDocIds(docIdsKey));
                    tags.add(tag);
                }

                Map<Object, List> docIdsListCache = new HashMap<>(2000);

                ValueSourceAccessor uniqueKeyCache = new ValueSourceAccessor(searcher,
                        idSchemaField.getType().getValueSource(idSchemaField, null));

                @SuppressWarnings("unchecked")
                private List lookupSchemaDocIds(Object docIdsKey) {
                    List schemaDocIds = docIdsListCache.get(docIdsKey);
                    if (schemaDocIds != null)
                        return schemaDocIds;
                    IntsRef docIds = lookupDocIds(docIdsKey);
                    //translate lucene docIds to schema ids
                    schemaDocIds = new ArrayList(docIds.length);
                    for (int i = docIds.offset; i < docIds.offset + docIds.length; i++) {
                        int docId = docIds.ints[i];
                        matchDocIdsBS.set(docId);//also, flip docid in bitset
                        schemaDocIds.add(uniqueKeyCache.objectVal(docId));//translates here
                    }
                    assert !schemaDocIds.isEmpty();

                    docIdsListCache.put(docIds, schemaDocIds);
                    return schemaDocIds;
                }

            };
            tagger.enableDocIdsCache(2000);//TODO configurable
            tagger.process();
        }
    } finally {
        inputReader.close();
    }
    rsp.add("tagsCount", tags.size());
    rsp.add("tags", tags);

    rsp.setReturnFields(new SolrReturnFields(req));

    //Solr's standard name for matching docs in response
    rsp.add("response", getDocList(rows, matchDocIdsBS));
}

From source file:org.phenotips.data.similarity.internal.DefaultPatientSimilarityViewFactory.java

License:Open Source License

/**
 * Return all terms in the vocabulary.//from ww  w.  j a  va 2  s  . co m
 *
 * @param vocabulary the vocabulary to query
 * @return a Collection of all VocabularyTerms in the vocabulary
 */
private Collection<VocabularyTerm> queryAllTerms(Vocabulary vocabulary) {
    this.logger.info("Querying all terms in vocabulary: " + vocabulary.getAliases().iterator().next());
    Map<String, String> queryAll = new HashMap<String, String>();
    queryAll.put("id", "*");
    Map<String, String> queryAllParams = new HashMap<String, String>();
    queryAllParams.put(CommonParams.ROWS, String.valueOf(vocabulary.size()));
    Collection<VocabularyTerm> results = vocabulary.search(queryAll, queryAllParams);
    this.logger.info(String.format("  ... found %d entries.", results.size()));
    return results;
}

From source file:org.phenotips.diagnosis.differentialPhenotypes.PhenotypeSuggestService.java

License:Open Source License

/**
 * Prepare the map of parameters that can be passed to a Solr query, in order to get a list of diseases matching the
 * selected positive and negative phenotypes.
 *
 * @param phenotypes the list of already selected phenotypes
 * @param nphenotypes phenotypes that are not observed in the patient
 * @return the computed Solr query parameters
 *//*from   w w  w .  j  a v  a  2 s .c  o m*/
private SolrQuery prepareParams(Collection<String> phenotypes, Collection<String> nphenotypes) {
    SolrQuery result = new SolrQuery();
    String q = "symptom:" + StringUtils.join(phenotypes, " symptom:");
    if (!nphenotypes.isEmpty()) {
        q += "  not_symptom:" + StringUtils.join(nphenotypes, " not_symptom:");
    }
    q += " -nameSort:\\** -nameSort:\\+* -nameSort:\\^*";
    result.set(CommonParams.Q, q.replaceAll("HP:", "HP\\\\:"));
    result.set(CommonParams.ROWS, "100");
    result.set(CommonParams.START, "0");
    result.set(CommonParams.DEBUG_QUERY, Boolean.toString(true));
    result.set(CommonParams.EXPLAIN_STRUCT, Boolean.toString(true));

    return result;
}