Example usage for org.apache.solr.search SolrIndexSearcher getIndexReader

List of usage examples for org.apache.solr.search SolrIndexSearcher getIndexReader

Introduction

In this page you can find the example usage for org.apache.solr.search SolrIndexSearcher getIndexReader.

Prototype

@Override
    public final DirectoryReader getIndexReader() 

Source Link

Usage

From source file:com.lucid.solr.sidecar.SidecarIndexReaderFactory.java

License:Apache License

DirectoryReader reopen(DirectoryReader newMain, boolean rebuild) throws IOException {
    CoreContainer container = currentCore.getCoreDescriptor().getCoreContainer();
    SolrCore source = container.getCore(sourceCollection);
    if (source == null) {
        LOG.info("Source collection '" + sourceCollection + "' not present, sidecar index is disabled.");
        try {/* w  w  w . j a  v  a  2  s.  co  m*/
            return new SidecarIndexReader(this, newMain, null,
                    SidecarIndexReader.getSequentialSubReaders(newMain), sourceCollection, null);
        } catch (Exception e1) {
            LOG.warn("Unexpected exception, returning single main index", e1);
            return newMain;
        }
    }
    if (source.isClosed()) {
        LOG.info("Source collection '" + sourceCollection + "' is closed, sidecar index is disabled.");
        try {
            return new SidecarIndexReader(this, newMain, null,
                    SidecarIndexReader.getSequentialSubReaders(newMain), sourceCollection, null);
        } catch (Exception e1) {
            LOG.warn("Unexpected exception, returning single main index", e1);
            return newMain;
        }
    }
    DirectoryReader parallel = null;
    SolrIndexSearcher searcher = null;
    try {
        searcher = source.getNewestSearcher(true).get();
        parallel = buildParallelReader(newMain, searcher, rebuild);
    } finally {
        if (searcher != null && searcher.getIndexReader().getRefCount() > 0) {
            LOG.info("-- closing " + searcher);
            searcher.close();
        }
        if (source != null) {
            source.close();
        }
    }
    return parallel;
}

From source file:com.o19s.solr.swan.highlight.SwanHighlighter.java

License:Apache License

/**
 * Generates a list of Highlighted query fragments for each item in a list
 * of documents, or returns null if highlighting is disabled.
 *
 * @param docs query results/* w ww. j a  va  2s  .co  m*/
 * @param query the query
 * @param req the current request
 * @param defaultFields default list of fields to summarize
 *
 * @return NamedList containing a NamedList for each document, which in
 * turns contains sets (field, summary) pairs.
 */
@Override
@SuppressWarnings("unchecked")
public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields)
        throws IOException {

    NamedList fragments = new SimpleOrderedMap();

    SolrParams params = req.getParams();
    if (!isHighlightingEnabled(params))
        return null;

    SolrIndexSearcher searcher = req.getSearcher();
    IndexSchema schema = searcher.getSchema();
    String[] fieldNames = getHighlightFields(query, req, defaultFields);
    Set<String> fset = new HashSet<String>();

    {
        // pre-fetch documents using the Searcher's doc cache
        Collections.addAll(fset, fieldNames);
        // fetch unique key if one exists.
        SchemaField keyField = schema.getUniqueKeyField();
        if (null != keyField)
            fset.add(keyField.getName());
    }

    //CHANGE start
    //       int[] docIds = new int[docs.swordize()];
    TreeSet<Integer> docIds = new TreeSet<Integer>();
    DocIterator iterator = docs.iterator();
    for (int i = 0; i < docs.size(); i++) {
        docIds.add(iterator.nextDoc());
    }
    // Get Frag list builder
    String fragListBuilderString = params.get(HighlightParams.FRAG_LIST_BUILDER).toLowerCase();
    FragListBuilder fragListBuilder;
    if (fragListBuilderString.equals("single")) {
        fragListBuilder = new SingleFragListBuilder();
    } else {
        fragListBuilder = new com.o19s.solr.swan.highlight.SimpleFragListBuilder();
    }

    // get FastVectorHighlighter instance out of the processing loop
    SpanAwareFastVectorHighlighter safvh = new SpanAwareFastVectorHighlighter(
            // FVH cannot process hl.usePhraseHighlighter parameter per-field basis
            params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true),
            // FVH cannot process hl.requireFieldMatch parameter per-field basis
            params.getBool(HighlightParams.FIELD_MATCH, false), fragListBuilder,
            //new com.o19s.solr.swan.highlight.ScoreOrderFragmentsBuilder(),
            new WordHashFragmentsBuilder(),
            // List of docIds to filter spans
            docIds);
    safvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, Integer.MAX_VALUE));
    SpanAwareFieldQuery fieldQuery = safvh.getFieldQuery(query, searcher.getIndexReader(), docIds);

    // Highlight each document
    for (int docId : docIds) {
        Document doc = searcher.doc(docId, fset);
        NamedList docSummaries = new SimpleOrderedMap();
        for (String fieldName : fieldNames) {
            fieldName = fieldName.trim();
            if (useFastVectorHighlighter(params, schema, fieldName))
                doHighlightingByFastVectorHighlighter(safvh, fieldQuery, req, docSummaries, docId, doc,
                        fieldName);
            else
                doHighlightingByHighlighter(query, req, docSummaries, docId, doc, fieldName);
        }
        String printId = schema.printableUniqueKey(doc);
        fragments.add(printId == null ? null : printId, docSummaries);
    }
    //CHANGE end
    return fragments;
}

From source file:com.searchbox.solr.CategoryLikeThis.java

License:Apache License

@Override
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
    numRequests++;//  www .  jav a 2s  . c om
    long startTime = System.currentTimeMillis();
    if (!keystate) {
        LOGGER.error(
                "License key failure, not performing clt query. Please email contact@searchbox.com for more information.");
        return;
    }

    try {
        SolrParams params = req.getParams();
        String senseField = params.get(SenseParams.SENSE_FIELD, SenseParams.DEFAULT_SENSE_FIELD);
        BooleanQuery catfilter = new BooleanQuery();
        // Set field flags
        ReturnFields returnFields = new SolrReturnFields(req);
        rsp.setReturnFields(returnFields);
        int flags = 0;
        if (returnFields.wantsScore()) {
            flags |= SolrIndexSearcher.GET_SCORES;
        }

        String defType = params.get(QueryParsing.DEFTYPE, QParserPlugin.DEFAULT_QTYPE);
        String q = params.get(CommonParams.Q);
        Query query = null;
        SortSpec sortSpec = null;
        List<Query> filters = new LinkedList<Query>();
        List<RealTermFreqVector> prototypetfs = new LinkedList<RealTermFreqVector>();

        try {
            if (q != null) {
                QParser parser = QParser.getParser(q, defType, req);
                query = parser.getQuery();
                sortSpec = parser.getSort(true);
            }

            String[] fqs = req.getParams().getParams(CommonParams.FQ);
            if (fqs != null && fqs.length != 0) {
                for (String fq : fqs) {
                    if (fq != null && fq.trim().length() != 0) {
                        QParser fqp = QParser.getParser(fq, null, req);
                        filters.add(fqp.getQuery());
                    }
                }
            }
        } catch (Exception e) {
            numErrors++;
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
        }

        SolrIndexSearcher searcher = req.getSearcher();
        DocListAndSet cltDocs = null;

        // Parse Required Params
        // This will either have a single Reader or valid query
        Reader reader = null;
        try {
            if (q == null || q.trim().length() < 1) {
                Iterable<ContentStream> streams = req.getContentStreams();
                if (streams != null) {
                    Iterator<ContentStream> iter = streams.iterator();
                    if (iter.hasNext()) {
                        reader = iter.next().getReader();
                    }
                    if (iter.hasNext()) {
                        numErrors++;
                        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                                "SenseLikeThis does not support multiple ContentStreams");
                    }
                }
            }

            int start = params.getInt(CommonParams.START, 0);
            int rows = params.getInt(CommonParams.ROWS, 10);

            // Find documents SenseLikeThis - either with a reader or a query
            // --------------------------------------------------------------------------------
            if (reader != null) {
                numErrors++;
                throw new RuntimeException("SLT based on a reader is not yet implemented");
            } else if (q != null) {

                LOGGER.debug("Query for category:\t" + query);
                DocList match = searcher.getDocList(query, null, null, 0, 10, flags); // get first 10
                if (match.size() == 0) { // no docs to make prototype!
                    LOGGER.info("No documents found for prototype!");
                    rsp.add("response", new DocListAndSet());
                    return;
                }

                HashMap<String, Float> overallFreqMap = new HashMap<String, Float>();
                // Create the TF of blah blah blah
                DocIterator iterator = match.iterator();
                while (iterator.hasNext()) {
                    // do a MoreLikeThis query for each document in results
                    int id = iterator.nextDoc();
                    LOGGER.trace("Working on doc:\t" + id);
                    RealTermFreqVector rtv = new RealTermFreqVector(id, searcher.getIndexReader(), senseField);
                    for (int zz = 0; zz < rtv.getSize(); zz++) {
                        Float prev = overallFreqMap.get(rtv.getTerms()[zz]);
                        if (prev == null) {
                            prev = 0f;
                        }
                        overallFreqMap.put(rtv.getTerms()[zz], rtv.getFreqs()[zz] + prev);
                    }
                    prototypetfs.add(rtv);
                }

                List<String> sortedKeys = Ordering.natural().onResultOf(Functions.forMap(overallFreqMap))
                        .immutableSortedCopy(overallFreqMap.keySet());
                int keyiter = Math.min(sortedKeys.size() - 1, BooleanQuery.getMaxClauseCount() - 1);
                LOGGER.debug("I have this many terms:\t" + sortedKeys.size());
                LOGGER.debug("And i'm going to use this many:\t" + keyiter);
                for (; keyiter >= 0; keyiter--) {
                    TermQuery tq = new TermQuery(new Term(senseField, sortedKeys.get(keyiter)));
                    catfilter.add(tq, BooleanClause.Occur.SHOULD);
                }

            } else {
                numErrors++;
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                        "CategoryLikeThis requires either a query (?q=) or text to find similar documents.");
            }

            LOGGER.debug("document filter is: \t" + catfilter);
            CategorizationBase model = new CategorizationBase(prototypetfs);
            CategoryQuery clt = CategoryQuery.CategoryQueryForDocument(catfilter, model,
                    searcher.getIndexReader(), senseField);
            DocSet filtered = searcher.getDocSet(filters);
            cltDocs = searcher.getDocListAndSet(clt, filtered, Sort.RELEVANCE, start, rows, flags);
        } finally {
            if (reader != null) {
                reader.close();
            }
        }

        if (cltDocs == null) {
            numEmpty++;
            cltDocs = new DocListAndSet(); // avoid NPE
        }
        rsp.add("response", cltDocs.docList);

        // maybe facet the results
        if (params.getBool(FacetParams.FACET, false)) {
            if (cltDocs.docSet == null) {
                rsp.add("facet_counts", null);
            } else {
                SimpleFacets f = new SimpleFacets(req, cltDocs.docSet, params);
                rsp.add("facet_counts", f.getFacetCounts());
            }
        }
    } catch (Exception e) {
        numErrors++;
    } finally {
        totalTime += System.currentTimeMillis() - startTime;
    }

}

From source file:com.searchbox.solr.SenseLikeThisHandler.java

License:Apache License

@Override
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
    NamedList<Object> timinginfo = new NamedList<Object>();
    numRequests++;//  w ww.  j a  va2s.  c om
    long startTime = System.currentTimeMillis();
    long lstartTime = System.currentTimeMillis();
    if (!keystate) {
        LOGGER.error(
                "License key failure, not performing sense query. Please email contact@searchbox.com for more information.");
        return;
    }

    boolean fromcache = false;

    try {
        SolrParams params = req.getParams();
        int start = params.getInt(CommonParams.START, 0);
        int rows = params.getInt(CommonParams.ROWS, 10);

        HashSet<String> toIgnore = (new HashSet<String>());
        toIgnore.add("start");
        toIgnore.add("rows");
        toIgnore.add("fl");
        toIgnore.add("wt");
        toIgnore.add("indent");

        SolrCacheKey key = new SolrCacheKey(params, toIgnore);

        // Set field flags
        ReturnFields returnFields = new SolrReturnFields(req);
        rsp.setReturnFields(returnFields);
        int flags = 0;
        if (returnFields.wantsScore()) {
            flags |= SolrIndexSearcher.GET_SCORES;
        }

        String defType = params.get(QueryParsing.DEFTYPE, QParserPlugin.DEFAULT_QTYPE);
        String q = params.get(CommonParams.Q);
        Query query = null;
        QueryReductionFilter qr = null;
        SortSpec sortSpec = null;
        List<Query> filters = new ArrayList<Query>();

        try {
            if (q != null) {
                QParser parser = QParser.getParser(q, defType, req);
                query = parser.getQuery();
                sortSpec = parser.getSort(true);
            }

            String[] fqs = req.getParams().getParams(CommonParams.FQ);
            if (fqs != null && fqs.length != 0) {
                for (String fq : fqs) {
                    if (fq != null && fq.trim().length() != 0) {
                        QParser fqp = QParser.getParser(fq, null, req);
                        filters.add(fqp.getQuery());
                    }
                }
            }
        } catch (Exception e) {
            numErrors++;
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
        }

        timinginfo.add("Parse Query time", System.currentTimeMillis() - lstartTime);
        LOGGER.debug("Parsed Query Time:\t" + (System.currentTimeMillis() - lstartTime));
        lstartTime = System.currentTimeMillis();

        SolrIndexSearcher searcher = req.getSearcher();
        SchemaField uniqueKeyField = searcher.getSchema().getUniqueKeyField();

        // Parse Required Params
        // This will either have a single Reader or valid query

        // Find documents SenseLikeThis - either with a reader or a query
        // --------------------------------------------------------------------------------
        SenseQuery slt = null;
        if (q == null) {
            numErrors++;
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                    "SenseLikeThis requires either a query (?q=) or text to find similar documents.");

        }
        // Matching options
        boolean includeMatch = params.getBool(MoreLikeThisParams.MATCH_INCLUDE, true);
        int matchOffset = params.getInt(MoreLikeThisParams.MATCH_OFFSET, 0);
        // Find the base match

        DocList match = searcher.getDocList(query, null, null, matchOffset, 1, flags); // only get the first one...
        if (includeMatch) {
            rsp.add("match", match);
        }

        DocIterator iterator = match.iterator();
        if (!iterator.hasNext()) {
            numErrors++;
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                    "SenseLikeThis no document found matching request.");
        }
        int id = iterator.nextDoc();

        timinginfo.add("Find Query Doc", System.currentTimeMillis() - lstartTime);
        LOGGER.debug("Find Query Doc:\t" + (System.currentTimeMillis() - lstartTime));
        lstartTime = System.currentTimeMillis();

        SolrCache sc = searcher.getCache("com.searchbox.sltcache");
        DocListAndSet sltDocs = null;
        if (sc != null) {
            //try to get from cache
            sltDocs = (DocListAndSet) sc.get(key.getSet());
        } else {
            LOGGER.error("com.searchbox.sltcache not defined, can't cache slt queries");
        }

        sltDocs = (DocListAndSet) sc.get(key.getSet());
        if (start + rows > 1000 || sltDocs == null || !params.getBool(CommonParams.CACHE, true)) { //not in cache, need to do search
            BooleanQuery bq = new BooleanQuery();
            Document doc = searcher.getIndexReader().document(id);
            bq.add(new TermQuery(new Term(uniqueKeyField.getName(),
                    uniqueKeyField.getType().storedToIndexed(doc.getField(uniqueKeyField.getName())))),
                    BooleanClause.Occur.MUST_NOT);
            filters.add(bq);

            String[] senseFields = splitList
                    .split(params.get(SenseParams.SENSE_FIELD, SenseParams.DEFAULT_SENSE_FIELD));
            String senseField = (senseFields[0] != null) ? senseFields[0] : SenseParams.DEFAULT_SENSE_FIELD;

            //TODO more intelligent handling of multiple fields , can probably do a boolean junction of multiple sensequeries, but this will be slow
            long maxlength = -1;
            for (String possibleField : senseFields) {
                try {
                    long flength = doc.getField(possibleField).stringValue().length();
                    if (flength > maxlength) {
                        senseField = possibleField;
                        maxlength = flength;
                    }
                } catch (Exception e) {
                    System.out.println("Error: " + e.getMessage());
                }
            }

            LOGGER.debug("Using sense field :\t" + (senseField));

            String CKBid = params.get(SenseParams.SENSE_CKB, SenseParams.SENSE_CKB_DEFAULT);

            RealTermFreqVector rtv = new RealTermFreqVector(id, searcher.getIndexReader(), senseField);
            timinginfo.add("Make real term freq vector", System.currentTimeMillis() - lstartTime);
            lstartTime = System.currentTimeMillis();

            qr = new QueryReductionFilter(rtv, CKBid, searcher, senseField);
            qr.setNumtermstouse(params.getInt(SenseParams.SENSE_QR_NTU, SenseParams.SENSE_QR_NTU_DEFAULT));
            qr.setThreshold(params.getInt(SenseParams.SENSE_QR_THRESH, SenseParams.SENSE_QR_THRESH_DEFAULT));
            qr.setMaxDocSubSet(params.getInt(SenseParams.SENSE_QR_MAXDOC, SenseParams.SENSE_QR_MAXDOC_DEFAULT));
            qr.setMinDocSetSizeForFilter(
                    params.getInt(SenseParams.SENSE_MINDOC4QR, SenseParams.SENSE_MINDOC4QR_DEFAULT));

            numTermsUsed += qr.getNumtermstouse();
            numTermsConsidered += rtv.getSize();

            timinginfo.add("Setup SLT query", System.currentTimeMillis() - lstartTime);
            LOGGER.debug("Setup SLT query:\t" + (System.currentTimeMillis() - lstartTime));
            lstartTime = System.currentTimeMillis();

            DocList subFiltered = qr.getSubSetToSearchIn(filters);
            timinginfo.add("Do Query Redux", System.currentTimeMillis() - lstartTime);
            LOGGER.debug("Do query redux:\t" + (System.currentTimeMillis() - lstartTime));
            lstartTime = System.currentTimeMillis();

            numFiltered += qr.getFiltered().docList.size();
            numSubset += subFiltered.size();
            LOGGER.info("Number of documents to search:\t" + subFiltered.size());

            slt = new SenseQuery(rtv, senseField, CKBid,
                    params.getFloat(SenseParams.SENSE_WEIGHT, SenseParams.DEFAULT_SENSE_WEIGHT), null);
            LOGGER.debug("Setup sense query:\t" + (System.currentTimeMillis() - lstartTime));
            timinginfo.add("Setup sense query", System.currentTimeMillis() - lstartTime);
            lstartTime = System.currentTimeMillis();

            sltDocs = searcher.getDocListAndSet(slt, subFiltered, Sort.RELEVANCE, 0, 1000, flags);
            timinginfo.add("Do sense query", System.currentTimeMillis() - lstartTime);
            lstartTime = System.currentTimeMillis();

            LOGGER.debug("Adding this keyto cache:\t" + key.getSet().toString());
            searcher.getCache("com.searchbox.sltcache").put(key.getSet(), sltDocs);

        } else {
            fromcache = true;
            timinginfo.add("Getting from cache", System.currentTimeMillis() - lstartTime);
            LOGGER.debug("Got result from cache");
            lstartTime = System.currentTimeMillis();
        }

        if (sltDocs == null) {
            numEmpty++;
            sltDocs = new DocListAndSet(); // avoid NPE
        }
        rsp.add("response", sltDocs.docList.subset(start, rows));

        // maybe facet the results
        if (params.getBool(FacetParams.FACET, false)) {
            if (sltDocs.docSet == null) {
                rsp.add("facet_counts", null);
            } else {
                SimpleFacets f = new SimpleFacets(req, sltDocs.docSet, params);
                rsp.add("facet_counts", f.getFacetCounts());
            }
        }
        timinginfo.add("Facet parts", System.currentTimeMillis() - lstartTime);
        LOGGER.debug("Facet parts:\t" + (System.currentTimeMillis() - lstartTime));

        // Debug info, not doing it for the moment.
        boolean dbg = req.getParams().getBool(CommonParams.DEBUG_QUERY, false);

        boolean dbgQuery = false, dbgResults = false;
        if (dbg == false) {//if it's true, we are doing everything anyway.
            String[] dbgParams = req.getParams().getParams(CommonParams.DEBUG);
            if (dbgParams != null) {
                for (int i = 0; i < dbgParams.length; i++) {
                    if (dbgParams[i].equals(CommonParams.QUERY)) {
                        dbgQuery = true;
                    } else if (dbgParams[i].equals(CommonParams.RESULTS)) {
                        dbgResults = true;
                    }
                }
            }
        } else {
            dbgQuery = true;
            dbgResults = true;
        }
        // Copied from StandardRequestHandler... perhaps it should be added to doStandardDebug?
        if (dbg == true) {
            try {
                lstartTime = System.currentTimeMillis();
                NamedList<Object> dbgInfo = SolrPluginUtils.doStandardDebug(req, q, slt,
                        sltDocs.docList.subset(start, rows), dbgQuery, dbgResults);
                dbgInfo.add("Query freqs", slt.getAllTermsasString());
                if (null != dbgInfo) {
                    if (null != filters) {
                        dbgInfo.add("filter_queries", req.getParams().getParams(CommonParams.FQ));
                        List<String> fqs = new ArrayList<String>(filters.size());
                        for (Query fq : filters) {
                            fqs.add(QueryParsing.toString(fq, req.getSchema()));
                        }
                        dbgInfo.add("parsed_filter_queries", fqs);
                    }
                    if (null != qr) {
                        dbgInfo.add("QueryReduction", qr.getDbgInfo());
                    }
                    if (null != slt) {
                        dbgInfo.add("SLT", slt.getDbgInfo());

                    }

                    dbgInfo.add("fromcache", fromcache);
                    rsp.add("debug", dbgInfo);
                    timinginfo.add("Debugging parts", System.currentTimeMillis() - lstartTime);
                    dbgInfo.add("timings", timinginfo);
                }
            } catch (Exception e) {
                SolrException.log(SolrCore.log, "Exception during debug", e);
                rsp.add("exception_during_debug", SolrException.toStr(e));
            }
        }
    } catch (Exception e) {
        numErrors++;
        e.printStackTrace();
    } finally {
        totalTime += System.currentTimeMillis() - startTime;
    }

}

From source file:com.searchbox.solr.SenseLikeThisHandlerNoReduction.java

License:Apache License

@Override
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
    SolrParams params = req.getParams();

    if (!keystate) {
        LOGGER.error(/*from  w w  w .ja v  a2 s.  c  o m*/
                "License key failure, not performing sense query. Please email contact@searchbox.com for more information.");
        return;
    }

    int docID;
    // Set field flags
    ReturnFields returnFields = new SolrReturnFields(req);
    rsp.setReturnFields(returnFields);
    int flags = 0;
    if (returnFields.wantsScore()) {
        flags |= SolrIndexSearcher.GET_SCORES;
    }

    String defType = params.get(QueryParsing.DEFTYPE, QParserPlugin.DEFAULT_QTYPE);
    String q = params.get(CommonParams.Q);
    Query query = null;
    SortSpec sortSpec = null;
    List<Query> filters = new ArrayList<Query>();

    try {
        if (q != null) {
            QParser parser = QParser.getParser(q, defType, req);
            query = parser.getQuery();
            sortSpec = parser.getSort(true);
        }

        String[] fqs = req.getParams().getParams(CommonParams.FQ);
        if (fqs != null && fqs.length != 0) {
            for (String fq : fqs) {
                if (fq != null && fq.trim().length() != 0) {
                    QParser fqp = QParser.getParser(fq, null, req);
                    filters.add(fqp.getQuery());
                }
            }
        }
    } catch (Exception e) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
    }

    SolrIndexSearcher searcher = req.getSearcher();
    SchemaField uniqueKeyField = searcher.getSchema().getUniqueKeyField();

    DocListAndSet sltDocs = null;

    // Parse Required Params
    // This will either have a single Reader or valid query
    Reader reader = null;
    try {
        if (q == null || q.trim().length() < 1) {
            Iterable<ContentStream> streams = req.getContentStreams();
            if (streams != null) {
                Iterator<ContentStream> iter = streams.iterator();
                if (iter.hasNext()) {
                    reader = iter.next().getReader();
                }
                if (iter.hasNext()) {
                    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                            "SenseLikeThis does not support multiple ContentStreams");
                }
            }
        }

        int start = params.getInt(CommonParams.START, 0);
        int rows = params.getInt(CommonParams.ROWS, 10);

        // Find documents SenseLikeThis - either with a reader or a query
        // --------------------------------------------------------------------------------
        SenseQuery slt = null;
        if (reader != null) {
            throw new RuntimeException("SLT based on a reader is not yet implemented");
        } else if (q != null) {
            // Matching options
            boolean includeMatch = params.getBool(MoreLikeThisParams.MATCH_INCLUDE, true);
            int matchOffset = params.getInt(MoreLikeThisParams.MATCH_OFFSET, 0);
            // Find the base match

            DocList match = searcher.getDocList(query, null, null, matchOffset, 1, flags); // only get the first one...
            if (includeMatch) {
                rsp.add("match", match);
            }

            // Get docID
            DocIterator iterator = match.iterator();
            docID = iterator.nextDoc();

            BooleanQuery bq = new BooleanQuery();
            Document doc = searcher.getIndexReader().document(docID);
            bq.add(new TermQuery(new Term(uniqueKeyField.getName(),
                    uniqueKeyField.getType().storedToIndexed(doc.getField(uniqueKeyField.getName())))),
                    BooleanClause.Occur.MUST_NOT);
            filters.add(bq);

        } else {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                    "SenseLikeThis requires either a query (?q=) or text to find similar documents.");
        }

        String CKBid = params.get(SenseParams.SENSE_CKB, SenseParams.SENSE_CKB_DEFAULT);

        String senseField = params.get(SenseParams.SENSE_FIELD, SenseParams.DEFAULT_SENSE_FIELD);
        slt = new SenseQuery(new RealTermFreqVector(docID, searcher.getIndexReader(), senseField), senseField,
                CKBid, params.getFloat(SenseParams.SENSE_WEIGHT, SenseParams.DEFAULT_SENSE_WEIGHT), null);

        //Execute the SLT query
        //DocSet filtered = searcher.getDocSet(filters);
        //System.out.println("Number of documents to search:\t" + filtered.size());
        //sltDocs = searcher.getDocListAndSet(slt, filtered, Sort.RELEVANCE, start, rows, flags);
        sltDocs = searcher.getDocListAndSet(slt, filters, Sort.RELEVANCE, start, rows, flags);

    } finally {
        if (reader != null) {
            reader.close();
        }
    }

    if (sltDocs == null) {
        sltDocs = new DocListAndSet(); // avoid NPE
    }
    rsp.add("response", sltDocs.docList);

    // maybe facet the results
    if (params.getBool(FacetParams.FACET, false)) {
        if (sltDocs.docSet == null) {
            rsp.add("facet_counts", null);
        } else {
            SimpleFacets f = new SimpleFacets(req, sltDocs.docSet, params);
            rsp.add("facet_counts", f.getFacetCounts());
        }
    }

    // Debug info, not doing it for the moment.
    boolean dbg = req.getParams().getBool(CommonParams.DEBUG_QUERY, false);

    boolean dbgQuery = false, dbgResults = false;
    if (dbg == false) {//if it's true, we are doing everything anyway.
        String[] dbgParams = req.getParams().getParams(CommonParams.DEBUG);
        if (dbgParams != null) {
            for (int i = 0; i < dbgParams.length; i++) {
                if (dbgParams[i].equals(CommonParams.QUERY)) {
                    dbgQuery = true;
                } else if (dbgParams[i].equals(CommonParams.RESULTS)) {
                    dbgResults = true;
                }
            }
        }
    } else {
        dbgQuery = true;
        dbgResults = true;
    }
    // Copied from StandardRequestHandler... perhaps it should be added to doStandardDebug?
    if (dbg == true) {
        try {

            NamedList<Object> dbgInfo = SolrPluginUtils.doStandardDebug(req, q, query, sltDocs.docList,
                    dbgQuery, dbgResults);
            if (null != dbgInfo) {
                if (null != filters) {
                    dbgInfo.add("filter_queries", req.getParams().getParams(CommonParams.FQ));
                    List<String> fqs = new ArrayList<String>(filters.size());
                    for (Query fq : filters) {
                        fqs.add(QueryParsing.toString(fq, req.getSchema()));
                    }
                    dbgInfo.add("parsed_filter_queries", fqs);
                }
                rsp.add("debug", dbgInfo);
            }
        } catch (Exception e) {
            SolrException.log(SolrCore.log, "Exception during debug", e);
            rsp.add("exception_during_debug", SolrException.toStr(e));
        }
    }
}

From source file:com.searchbox.SuggesterComponent.java

License:Apache License

/**
 * new searcher event used to create suggestion model if config flags
 * areappropriately set//from  w ww .  j a  v a 2s. co  m
 */
public void newSearcher(SolrIndexSearcher newSearcher, SolrIndexSearcher currentSearcher) {
    LOGGER.trace("newSearcher hit");
    if (currentSearcher == null) {
        // firstSearcher event
        try {
            LOGGER.info("Loading suggester model.");
            readFile(storeDir);

        } catch (Exception e) {
            LOGGER.error("Exception in reloading suggester model.");
        }
    } else {
        // newSearcher event
        if (buildOnCommit) {
            buildAndWrite(newSearcher, gfields);
        } else if (buildOnOptimize) {
            if (newSearcher.getIndexReader().leaves().size() == 1) {
                buildAndWrite(newSearcher, gfields);
            } else {
                LOGGER.info("Index is not optimized therefore skipping " + "building suggester index");
            }
        }
    }
}

From source file:com.searchbox.SuggeterDataStructureBuilder.java

License:Apache License

private void iterateThroughDocuments(SolrIndexSearcher searcher, String[] fields, int maxNumDocs) {
    IndexReader reader = searcher.getIndexReader();
    // WARNING: returns null if there are no deletions
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    maxNumDocs = Math.min(maxNumDocs, reader.maxDoc());

    if (maxNumDocs == -1) {
        maxNumDocs = reader.maxDoc();//from  w ww.j av  a2  s.  co  m
    }
    LOGGER.info("Analyzing docs:\t" + numdocs);

    for (int docID = 0; docID < reader.maxDoc(); docID++) {
        if (numdocs > maxNumDocs) {
            break;
        }
        if (liveDocs != null && !liveDocs.get(docID)) {
            continue; // deleted
        }

        if ((docID % 1000) == 0) {
            LOGGER.debug("Doing " + docID + " of " + maxNumDocs);
        }

        StringBuilder text = new StringBuilder();
        for (String field : fields) {
            /*
             * not sure if this is the best way, might make sense to do a
             * process text for each field individually, but then book
             * keeping the doc freq for terms becomes a bit of a pain in the
             * ass
             */
            try {
                IndexableField[] multifield = reader.document(docID).getFields(field);
                for (IndexableField singlefield : multifield) {
                    // create one big string from all of the text in the
                    // documents for processing later on
                    text.append(". " + singlefield.stringValue());
                }

            } catch (IOException ex) {
                LOGGER.warn("Document " + docID + " missing requested field (" + field + ")...ignoring");
            }
        }
        // might as well see if its empty
        if (text.length() > 0) {
            // actually processes the massive string which was created from
            // all of the above fields
            processText(text.toString().toLowerCase());
            numdocs++;
        }
    }

    LOGGER.info("Number of documents analyzed: \t" + numdocs);
    for (int zz = 0; zz < counts.length; zz++) {
        LOGGER.info("Number of " + zz + "-grams: \t" + counts[zz]);
    }
}

From source file:com.searchbox.Tagger.java

License:Apache License

private void DfCountBuilder(SolrIndexSearcher searcher, String[] fields, int maxNumDocs) {
    IndexReader reader = searcher.getIndexReader();
    Bits liveDocs = MultiFields.getLiveDocs(reader); // WARNING: returns null if
                                                     // there are no deletions

    maxNumDocs = Math.min(maxNumDocs, reader.maxDoc());

    if (maxNumDocs == -1) {
        maxNumDocs = reader.maxDoc();//from  w ww  .j a  va 2  s.  c om
    }
    LOGGER.info("Analyzing docs:\t" + numdocs);

    for (int docID = 0; docID < reader.maxDoc(); docID++) {
        if (numdocs > maxNumDocs) {
            break;
        }
        if (liveDocs != null && !liveDocs.get(docID)) {
            continue; // deleted
        }

        if ((docID % 1000) == 0) {
            LOGGER.debug("Doing " + docID + " of " + maxNumDocs);
        }

        StringBuilder text = new StringBuilder();
        for (String field : fields) { // not sure if this is the best way, might
                                      // make sense to do a
                                      // process text for each field individually, but then book keeping
                                      // the doc freq for terms becomes a bit of a pain in the ass
            try {
                text.append(". " + reader.document(docID).get(field));
            } catch (IOException ex) {
                LOGGER.warn("Document " + docID + " missing requested field (" + field + ")...ignoring");
            }
        }
        if (text.length() > 0) { // might as well see if its empty
            processDocText(text.toString());
            numdocs++;

        }
    }

    LOGGER.info("Number of documents analyzed: \t" + numdocs);
    dfcounts.put(DOC_COUNTS_STRING, numdocs);
    tfcounts.put(DOC_COUNTS_STRING, numdocs);
}

From source file:com.searchbox.TaggerComponent.java

License:Apache License

public void newSearcher(SolrIndexSearcher newSearcher, SolrIndexSearcher currentSearcher) {
    LOGGER.trace("newSearcher hit");
    if (currentSearcher == null) {
        // firstSearcher event
        try {//from   ww w .j a  v a  2 s  .c  o m
            LOGGER.info("Loading tagger model.");
            dfb = Tagger.loadTagger(storeDir, boostsFileName);

        } catch (Exception e) {
            LOGGER.error("Exception in reloading tagger model.");
        }
    } else {
        // newSearcher event
        if (buildOnCommit) {
            buildAndWrite(newSearcher, gfields);
        } else if (buildOnOptimize) {
            if (newSearcher.getIndexReader().leaves().size() == 1) {
                buildAndWrite(newSearcher, gfields);
            } else {
                LOGGER.info("Index is not optimized therefore skipping building tagger index");
            }
        }
    }
}

From source file:com.tamingtext.qa.PassageRankingComponent.java

License:Apache License

@Override
public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false)) {
        return;// w  w w.ja  va2 s .co  m
    }
    Query origQuery = rb.getQuery();
    //TODO: longer term, we don't have to be a span query, we could re-analyze the document
    if (origQuery != null) {
        if (origQuery instanceof SpanNearQuery == false) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                    "Illegal query type.  The incoming query must be a Lucene SpanNearQuery and it was a "
                            + origQuery.getClass().getName());
        }
        SpanNearQuery sQuery = (SpanNearQuery) origQuery;
        SolrIndexSearcher searcher = rb.req.getSearcher();
        IndexReader reader = searcher.getIndexReader();
        Spans spans = sQuery.getSpans(reader);
        //Assumes the query is a SpanQuery
        //Build up the query term weight map and the bi-gram
        Map<String, Float> termWeights = new HashMap<String, Float>();
        Map<String, Float> bigramWeights = new HashMap<String, Float>();
        createWeights(params.get(CommonParams.Q), sQuery, termWeights, bigramWeights, reader);
        float adjWeight = params.getFloat(ADJACENT_WEIGHT, DEFAULT_ADJACENT_WEIGHT);
        float secondAdjWeight = params.getFloat(SECOND_ADJ_WEIGHT, DEFAULT_SECOND_ADJACENT_WEIGHT);
        float bigramWeight = params.getFloat(BIGRAM_WEIGHT, DEFAULT_BIGRAM_WEIGHT);
        //get the passages
        int primaryWindowSize = params.getInt(QAParams.PRIMARY_WINDOW_SIZE, DEFAULT_PRIMARY_WINDOW_SIZE);
        int adjacentWindowSize = params.getInt(QAParams.ADJACENT_WINDOW_SIZE, DEFAULT_ADJACENT_WINDOW_SIZE);
        int secondaryWindowSize = params.getInt(QAParams.SECONDARY_WINDOW_SIZE, DEFAULT_SECONDARY_WINDOW_SIZE);
        WindowBuildingTVM tvm = new WindowBuildingTVM(primaryWindowSize, adjacentWindowSize,
                secondaryWindowSize);
        PassagePriorityQueue rankedPassages = new PassagePriorityQueue();
        //intersect w/ doclist
        DocList docList = rb.getResults().docList;
        while (spans.next() == true) {
            //build up the window
            if (docList.exists(spans.doc())) {
                tvm.spanStart = spans.start();
                tvm.spanEnd = spans.end();
                reader.getTermFreqVector(spans.doc(), sQuery.getField(), tvm);
                //The entries map contains the window, do some ranking of it
                if (tvm.passage.terms.isEmpty() == false) {
                    log.debug("Candidate: Doc: {} Start: {} End: {} ",
                            new Object[] { spans.doc(), spans.start(), spans.end() });
                }
                tvm.passage.lDocId = spans.doc();
                tvm.passage.field = sQuery.getField();
                //score this window
                try {
                    addPassage(tvm.passage, rankedPassages, termWeights, bigramWeights, adjWeight,
                            secondAdjWeight, bigramWeight);
                } catch (CloneNotSupportedException e) {
                    throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                            "Internal error cloning Passage", e);
                }
                //clear out the entries for the next round
                tvm.passage.clear();
            }
        }
        NamedList qaResp = new NamedList();
        rb.rsp.add("qaResponse", qaResp);
        int rows = params.getInt(QA_ROWS, 5);

        SchemaField uniqField = rb.req.getSchema().getUniqueKeyField();
        if (rankedPassages.size() > 0) {
            int size = Math.min(rows, rankedPassages.size());
            Set<String> fields = new HashSet<String>();
            for (int i = size - 1; i >= 0; i--) {
                Passage passage = rankedPassages.pop();
                if (passage != null) {
                    NamedList passNL = new NamedList();
                    qaResp.add(("answer"), passNL);
                    String idName;
                    String idValue;
                    if (uniqField != null) {
                        idName = uniqField.getName();
                        fields.add(idName);
                        fields.add(passage.field);//prefetch this now, so that it is cached
                        idValue = searcher.doc(passage.lDocId, fields).get(idName);
                    } else {
                        idName = "luceneDocId";
                        idValue = String.valueOf(passage.lDocId);
                    }
                    passNL.add(idName, idValue);
                    passNL.add("field", passage.field);
                    //get the window
                    String fldValue = searcher.doc(passage.lDocId, fields).get(passage.field);
                    if (fldValue != null) {
                        //get the window of words to display, we don't use the passage window, as that is based on the term vector
                        int start = passage.terms.first().start;//use the offsets
                        int end = passage.terms.last().end;
                        if (start >= 0 && start < fldValue.length() && end >= 0 && end < fldValue.length()) {
                            passNL.add("window",
                                    fldValue.substring(start, end + passage.terms.last().term.length()));
                        } else {
                            log.debug("Passage does not have correct offset information");
                            passNL.add("window", fldValue);//we don't have offsets, or they are incorrect, return the whole field value
                        }
                    }
                } else {
                    break;
                }
            }
        }
    }

}