Example usage for org.apache.solr.search SolrIndexSearcher getSchema

List of usage examples for org.apache.solr.search SolrIndexSearcher getSchema

Introduction

In this page you can find the example usage for org.apache.solr.search SolrIndexSearcher getSchema.

Prototype

public IndexSchema getSchema() 

Source Link

Document

Direct access to the IndexSchema for use with this searcher

Usage

From source file:com.grantingersoll.intell.clustering.KMeansClusteringEngine.java

License:Apache License

private void cluster(SolrIndexSearcher searcher, int k) {
    log.info("Clustering");
    //go and do the clustering.  First, we need to export the fields
    SchemaField keyField = searcher.getSchema().getUniqueKeyField();
    //TODO: should we prevent overlaps here if there are too many commits?  Clustering isn't something that has to be fresh all the time
    // and we likely can't sustain that anyway.
    if (keyField != null) {//we must have a key field
        //do this part synchronously here, and then spawn off a thread to do the clustering, otherwise it will take too long
        String idName = keyField.getName();
        Weight weight = new TFIDF();
        SolrIndexReader reader = searcher.getReader();
        try {/*from www .j  av a 2s. c  o m*/
            TermInfo termInfo = new CachedTermInfo(reader, "content", 1, 100);
            LuceneIterable li = new LuceneIterable(reader, idName, inputField,
                    new TFDFMapper(reader, weight, termInfo));
            Date now = new Date();
            String jobDir = clusterBaseDir.getAbsolutePath() + File.separator + "clusters-" + now.getTime();
            log.info("Dumping {} to {}", inputField, clusterBaseDir);
            File outFile = new File(jobDir, "index-" + inputField + ".vec");
            VectorWriter vectorWriter = getSeqFileWriter(outFile.getAbsolutePath());
            long numDocs = vectorWriter.write(li, Integer.MAX_VALUE);
            vectorWriter.close();
            log.info("Wrote: {} vectors", numDocs);
            File dictOutFile = new File(jobDir, "dict-" + inputField + ".txt");
            log.info("Dictionary Output file: {}", dictOutFile);
            BufferedWriter writer = new BufferedWriter(
                    new OutputStreamWriter(new FileOutputStream(dictOutFile), Charset.forName("UTF8")));
            JWriterTermInfoWriter tiWriter = new JWriterTermInfoWriter(writer, "\t", inputField);
            tiWriter.write(termInfo);
            tiWriter.close();
            writer.close();
            //OK, the dictionary is dumped, now we can cluster, do this via a thread in the background.
            //when it's done, we can switch to it
            ClusterJob clusterJob = new ClusterJob(k, jobDir, new Path(outFile.getAbsolutePath()),
                    new Path(jobDir + File.separator + "clusters"),
                    new Path(jobDir + File.separator + "output"), new Path(dictOutFile.getAbsolutePath()));

            writeJobDetails(clusterJob);
            theFuture = execService.submit(new ClusterCallable(clusterJob));
        } catch (IOException e) {
            log.error("Exception", e);
        }
    }

}

From source file:com.o19s.solr.swan.highlight.SwanHighlighter.java

License:Apache License

/**
 * Generates a list of Highlighted query fragments for each item in a list
 * of documents, or returns null if highlighting is disabled.
 *
 * @param docs query results/*from  ww  w .  ja va 2s  . c  om*/
 * @param query the query
 * @param req the current request
 * @param defaultFields default list of fields to summarize
 *
 * @return NamedList containing a NamedList for each document, which in
 * turns contains sets (field, summary) pairs.
 */
@Override
@SuppressWarnings("unchecked")
public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields)
        throws IOException {

    NamedList fragments = new SimpleOrderedMap();

    SolrParams params = req.getParams();
    if (!isHighlightingEnabled(params))
        return null;

    SolrIndexSearcher searcher = req.getSearcher();
    IndexSchema schema = searcher.getSchema();
    String[] fieldNames = getHighlightFields(query, req, defaultFields);
    Set<String> fset = new HashSet<String>();

    {
        // pre-fetch documents using the Searcher's doc cache
        Collections.addAll(fset, fieldNames);
        // fetch unique key if one exists.
        SchemaField keyField = schema.getUniqueKeyField();
        if (null != keyField)
            fset.add(keyField.getName());
    }

    //CHANGE start
    //       int[] docIds = new int[docs.swordize()];
    TreeSet<Integer> docIds = new TreeSet<Integer>();
    DocIterator iterator = docs.iterator();
    for (int i = 0; i < docs.size(); i++) {
        docIds.add(iterator.nextDoc());
    }
    // Get Frag list builder
    String fragListBuilderString = params.get(HighlightParams.FRAG_LIST_BUILDER).toLowerCase();
    FragListBuilder fragListBuilder;
    if (fragListBuilderString.equals("single")) {
        fragListBuilder = new SingleFragListBuilder();
    } else {
        fragListBuilder = new com.o19s.solr.swan.highlight.SimpleFragListBuilder();
    }

    // get FastVectorHighlighter instance out of the processing loop
    SpanAwareFastVectorHighlighter safvh = new SpanAwareFastVectorHighlighter(
            // FVH cannot process hl.usePhraseHighlighter parameter per-field basis
            params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true),
            // FVH cannot process hl.requireFieldMatch parameter per-field basis
            params.getBool(HighlightParams.FIELD_MATCH, false), fragListBuilder,
            //new com.o19s.solr.swan.highlight.ScoreOrderFragmentsBuilder(),
            new WordHashFragmentsBuilder(),
            // List of docIds to filter spans
            docIds);
    safvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, Integer.MAX_VALUE));
    SpanAwareFieldQuery fieldQuery = safvh.getFieldQuery(query, searcher.getIndexReader(), docIds);

    // Highlight each document
    for (int docId : docIds) {
        Document doc = searcher.doc(docId, fset);
        NamedList docSummaries = new SimpleOrderedMap();
        for (String fieldName : fieldNames) {
            fieldName = fieldName.trim();
            if (useFastVectorHighlighter(params, schema, fieldName))
                doHighlightingByFastVectorHighlighter(safvh, fieldQuery, req, docSummaries, docId, doc,
                        fieldName);
            else
                doHighlightingByHighlighter(query, req, docSummaries, docId, doc, fieldName);
        }
        String printId = schema.printableUniqueKey(doc);
        fragments.add(printId == null ? null : printId, docSummaries);
    }
    //CHANGE end
    return fragments;
}

From source file:com.o19s.solr.swan.highlight.SwanHighlighter.java

License:Apache License

private void doHighlightingByHighlighter(Query query, SolrQueryRequest req, NamedList docSummaries, int docId,
        Document doc, String fieldName) throws IOException {
    final SolrIndexSearcher searcher = req.getSearcher();
    final IndexSchema schema = searcher.getSchema();

    // TODO: Currently in trunk highlighting numeric fields is broken (Lucene) -
    // so we disable them until fixed (see LUCENE-3080)!
    // BEGIN: Hack
    final SchemaField schemaField = schema.getFieldOrNull(fieldName);
    if (schemaField != null && ((schemaField.getType() instanceof org.apache.solr.schema.TrieField)
            || (schemaField.getType() instanceof org.apache.solr.schema.TrieDateField)))
        return;//www  . j a v a 2 s . c  o  m
    // END: Hack

    SolrParams params = req.getParams();
    IndexableField[] docFields = doc.getFields(fieldName);
    List<String> listFields = new ArrayList<String>();
    for (IndexableField field : docFields) {
        listFields.add(field.stringValue());
    }

    String[] docTexts = listFields.toArray(new String[listFields.size()]);

    // according to Document javadoc, doc.getValues() never returns null. check empty instead of null
    if (docTexts.length == 0)
        return;

    TokenStream tokenStream;
    int numFragments = getMaxSnippets(fieldName, params);
    boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params);

    List<TextFragment> frags = new ArrayList<TextFragment>();

    TermOffsetsTokenStream tots = null; // to be non-null iff we're using TermOffsets optimization
    try {
        //      TokenStream tvStream = TokenSources.getTokenStream(searcher.getIndexReader(), docId, fieldName);
        //      if (tvStream != null) {
        //        tots = new TermOffsetsTokenStream(tvStream);
        //      }
    } catch (IllegalArgumentException e) {
        // No problem. But we can't use TermOffsets optimization.
    }

    for (int j = 0; j < docTexts.length; j++) {
        if (tots != null) {
            // if we're using TermOffsets optimization, then get the next
            // field value's TokenStream (i.e. get field j's TokenStream) from tots:
            tokenStream = tots.getMultiValuedTokenStream(docTexts[j].length());
        } else {
            // fall back to analyzer
            tokenStream = createAnalyzerTStream(schema, fieldName, docTexts[j]);
        }

        int maxCharsToAnalyze = params.getFieldInt(fieldName, HighlightParams.MAX_CHARS,
                Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE);

        Highlighter highlighter;
        if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))) {
            if (maxCharsToAnalyze < 0) {
                tokenStream = new CachingTokenFilter(tokenStream);
            } else {
                tokenStream = new CachingTokenFilter(
                        new OffsetLimitTokenFilter(tokenStream, maxCharsToAnalyze));
            }

            // get highlighter
            highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tokenStream);

            // after highlighter initialization, reset tstream since construction of highlighter already used it
            tokenStream.reset();
        } else {
            // use "the old way"
            highlighter = getHighlighter(query, fieldName, req);
        }

        if (maxCharsToAnalyze < 0) {
            highlighter.setMaxDocCharsToAnalyze(docTexts[j].length());
        } else {
            highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
        }

        try {
            TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tokenStream, docTexts[j],
                    mergeContiguousFragments, numFragments);
            for (int k = 0; k < bestTextFragments.length; k++) {
                if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) {
                    frags.add(bestTextFragments[k]);
                }
            }
        } catch (InvalidTokenOffsetsException e) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
        }
    }
    // sort such that the fragments with the highest score come first
    Collections.sort(frags, new Comparator<TextFragment>() {
        public int compare(TextFragment arg0, TextFragment arg1) {
            return Math.round(arg1.getScore() - arg0.getScore());
        }
    });

    // convert fragments back into text
    // TODO: we can include score and position information in output as snippet attributes
    String[] summaries = null;
    if (frags.size() > 0) {
        ArrayList<String> fragTexts = new ArrayList<String>();
        for (TextFragment fragment : frags) {
            if ((fragment != null) && (fragment.getScore() > 0)) {
                fragTexts.add(fragment.toString());
            }
            if (fragTexts.size() >= numFragments)
                break;
        }
        summaries = (String[]) fragTexts.toArray();
        if (summaries.length > 0)
            docSummaries.add(fieldName, summaries);
    }
    // no summeries made, copy text from alternate field
    if (summaries == null || summaries.length == 0) {
        alternateField(docSummaries, params, doc, fieldName);
    }
}

From source file:com.searchbox.solr.SenseLikeThisHandler.java

License:Apache License

@Override
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
    NamedList<Object> timinginfo = new NamedList<Object>();
    numRequests++;//w w  w. ja v a2 s.  c  om
    long startTime = System.currentTimeMillis();
    long lstartTime = System.currentTimeMillis();
    if (!keystate) {
        LOGGER.error(
                "License key failure, not performing sense query. Please email contact@searchbox.com for more information.");
        return;
    }

    boolean fromcache = false;

    try {
        SolrParams params = req.getParams();
        int start = params.getInt(CommonParams.START, 0);
        int rows = params.getInt(CommonParams.ROWS, 10);

        HashSet<String> toIgnore = (new HashSet<String>());
        toIgnore.add("start");
        toIgnore.add("rows");
        toIgnore.add("fl");
        toIgnore.add("wt");
        toIgnore.add("indent");

        SolrCacheKey key = new SolrCacheKey(params, toIgnore);

        // Set field flags
        ReturnFields returnFields = new SolrReturnFields(req);
        rsp.setReturnFields(returnFields);
        int flags = 0;
        if (returnFields.wantsScore()) {
            flags |= SolrIndexSearcher.GET_SCORES;
        }

        String defType = params.get(QueryParsing.DEFTYPE, QParserPlugin.DEFAULT_QTYPE);
        String q = params.get(CommonParams.Q);
        Query query = null;
        QueryReductionFilter qr = null;
        SortSpec sortSpec = null;
        List<Query> filters = new ArrayList<Query>();

        try {
            if (q != null) {
                QParser parser = QParser.getParser(q, defType, req);
                query = parser.getQuery();
                sortSpec = parser.getSort(true);
            }

            String[] fqs = req.getParams().getParams(CommonParams.FQ);
            if (fqs != null && fqs.length != 0) {
                for (String fq : fqs) {
                    if (fq != null && fq.trim().length() != 0) {
                        QParser fqp = QParser.getParser(fq, null, req);
                        filters.add(fqp.getQuery());
                    }
                }
            }
        } catch (Exception e) {
            numErrors++;
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
        }

        timinginfo.add("Parse Query time", System.currentTimeMillis() - lstartTime);
        LOGGER.debug("Parsed Query Time:\t" + (System.currentTimeMillis() - lstartTime));
        lstartTime = System.currentTimeMillis();

        SolrIndexSearcher searcher = req.getSearcher();
        SchemaField uniqueKeyField = searcher.getSchema().getUniqueKeyField();

        // Parse Required Params
        // This will either have a single Reader or valid query

        // Find documents SenseLikeThis - either with a reader or a query
        // --------------------------------------------------------------------------------
        SenseQuery slt = null;
        if (q == null) {
            numErrors++;
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                    "SenseLikeThis requires either a query (?q=) or text to find similar documents.");

        }
        // Matching options
        boolean includeMatch = params.getBool(MoreLikeThisParams.MATCH_INCLUDE, true);
        int matchOffset = params.getInt(MoreLikeThisParams.MATCH_OFFSET, 0);
        // Find the base match

        DocList match = searcher.getDocList(query, null, null, matchOffset, 1, flags); // only get the first one...
        if (includeMatch) {
            rsp.add("match", match);
        }

        DocIterator iterator = match.iterator();
        if (!iterator.hasNext()) {
            numErrors++;
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                    "SenseLikeThis no document found matching request.");
        }
        int id = iterator.nextDoc();

        timinginfo.add("Find Query Doc", System.currentTimeMillis() - lstartTime);
        LOGGER.debug("Find Query Doc:\t" + (System.currentTimeMillis() - lstartTime));
        lstartTime = System.currentTimeMillis();

        SolrCache sc = searcher.getCache("com.searchbox.sltcache");
        DocListAndSet sltDocs = null;
        if (sc != null) {
            //try to get from cache
            sltDocs = (DocListAndSet) sc.get(key.getSet());
        } else {
            LOGGER.error("com.searchbox.sltcache not defined, can't cache slt queries");
        }

        sltDocs = (DocListAndSet) sc.get(key.getSet());
        if (start + rows > 1000 || sltDocs == null || !params.getBool(CommonParams.CACHE, true)) { //not in cache, need to do search
            BooleanQuery bq = new BooleanQuery();
            Document doc = searcher.getIndexReader().document(id);
            bq.add(new TermQuery(new Term(uniqueKeyField.getName(),
                    uniqueKeyField.getType().storedToIndexed(doc.getField(uniqueKeyField.getName())))),
                    BooleanClause.Occur.MUST_NOT);
            filters.add(bq);

            String[] senseFields = splitList
                    .split(params.get(SenseParams.SENSE_FIELD, SenseParams.DEFAULT_SENSE_FIELD));
            String senseField = (senseFields[0] != null) ? senseFields[0] : SenseParams.DEFAULT_SENSE_FIELD;

            //TODO more intelligent handling of multiple fields , can probably do a boolean junction of multiple sensequeries, but this will be slow
            long maxlength = -1;
            for (String possibleField : senseFields) {
                try {
                    long flength = doc.getField(possibleField).stringValue().length();
                    if (flength > maxlength) {
                        senseField = possibleField;
                        maxlength = flength;
                    }
                } catch (Exception e) {
                    System.out.println("Error: " + e.getMessage());
                }
            }

            LOGGER.debug("Using sense field :\t" + (senseField));

            String CKBid = params.get(SenseParams.SENSE_CKB, SenseParams.SENSE_CKB_DEFAULT);

            RealTermFreqVector rtv = new RealTermFreqVector(id, searcher.getIndexReader(), senseField);
            timinginfo.add("Make real term freq vector", System.currentTimeMillis() - lstartTime);
            lstartTime = System.currentTimeMillis();

            qr = new QueryReductionFilter(rtv, CKBid, searcher, senseField);
            qr.setNumtermstouse(params.getInt(SenseParams.SENSE_QR_NTU, SenseParams.SENSE_QR_NTU_DEFAULT));
            qr.setThreshold(params.getInt(SenseParams.SENSE_QR_THRESH, SenseParams.SENSE_QR_THRESH_DEFAULT));
            qr.setMaxDocSubSet(params.getInt(SenseParams.SENSE_QR_MAXDOC, SenseParams.SENSE_QR_MAXDOC_DEFAULT));
            qr.setMinDocSetSizeForFilter(
                    params.getInt(SenseParams.SENSE_MINDOC4QR, SenseParams.SENSE_MINDOC4QR_DEFAULT));

            numTermsUsed += qr.getNumtermstouse();
            numTermsConsidered += rtv.getSize();

            timinginfo.add("Setup SLT query", System.currentTimeMillis() - lstartTime);
            LOGGER.debug("Setup SLT query:\t" + (System.currentTimeMillis() - lstartTime));
            lstartTime = System.currentTimeMillis();

            DocList subFiltered = qr.getSubSetToSearchIn(filters);
            timinginfo.add("Do Query Redux", System.currentTimeMillis() - lstartTime);
            LOGGER.debug("Do query redux:\t" + (System.currentTimeMillis() - lstartTime));
            lstartTime = System.currentTimeMillis();

            numFiltered += qr.getFiltered().docList.size();
            numSubset += subFiltered.size();
            LOGGER.info("Number of documents to search:\t" + subFiltered.size());

            slt = new SenseQuery(rtv, senseField, CKBid,
                    params.getFloat(SenseParams.SENSE_WEIGHT, SenseParams.DEFAULT_SENSE_WEIGHT), null);
            LOGGER.debug("Setup sense query:\t" + (System.currentTimeMillis() - lstartTime));
            timinginfo.add("Setup sense query", System.currentTimeMillis() - lstartTime);
            lstartTime = System.currentTimeMillis();

            sltDocs = searcher.getDocListAndSet(slt, subFiltered, Sort.RELEVANCE, 0, 1000, flags);
            timinginfo.add("Do sense query", System.currentTimeMillis() - lstartTime);
            lstartTime = System.currentTimeMillis();

            LOGGER.debug("Adding this keyto cache:\t" + key.getSet().toString());
            searcher.getCache("com.searchbox.sltcache").put(key.getSet(), sltDocs);

        } else {
            fromcache = true;
            timinginfo.add("Getting from cache", System.currentTimeMillis() - lstartTime);
            LOGGER.debug("Got result from cache");
            lstartTime = System.currentTimeMillis();
        }

        if (sltDocs == null) {
            numEmpty++;
            sltDocs = new DocListAndSet(); // avoid NPE
        }
        rsp.add("response", sltDocs.docList.subset(start, rows));

        // maybe facet the results
        if (params.getBool(FacetParams.FACET, false)) {
            if (sltDocs.docSet == null) {
                rsp.add("facet_counts", null);
            } else {
                SimpleFacets f = new SimpleFacets(req, sltDocs.docSet, params);
                rsp.add("facet_counts", f.getFacetCounts());
            }
        }
        timinginfo.add("Facet parts", System.currentTimeMillis() - lstartTime);
        LOGGER.debug("Facet parts:\t" + (System.currentTimeMillis() - lstartTime));

        // Debug info, not doing it for the moment.
        boolean dbg = req.getParams().getBool(CommonParams.DEBUG_QUERY, false);

        boolean dbgQuery = false, dbgResults = false;
        if (dbg == false) {//if it's true, we are doing everything anyway.
            String[] dbgParams = req.getParams().getParams(CommonParams.DEBUG);
            if (dbgParams != null) {
                for (int i = 0; i < dbgParams.length; i++) {
                    if (dbgParams[i].equals(CommonParams.QUERY)) {
                        dbgQuery = true;
                    } else if (dbgParams[i].equals(CommonParams.RESULTS)) {
                        dbgResults = true;
                    }
                }
            }
        } else {
            dbgQuery = true;
            dbgResults = true;
        }
        // Copied from StandardRequestHandler... perhaps it should be added to doStandardDebug?
        if (dbg == true) {
            try {
                lstartTime = System.currentTimeMillis();
                NamedList<Object> dbgInfo = SolrPluginUtils.doStandardDebug(req, q, slt,
                        sltDocs.docList.subset(start, rows), dbgQuery, dbgResults);
                dbgInfo.add("Query freqs", slt.getAllTermsasString());
                if (null != dbgInfo) {
                    if (null != filters) {
                        dbgInfo.add("filter_queries", req.getParams().getParams(CommonParams.FQ));
                        List<String> fqs = new ArrayList<String>(filters.size());
                        for (Query fq : filters) {
                            fqs.add(QueryParsing.toString(fq, req.getSchema()));
                        }
                        dbgInfo.add("parsed_filter_queries", fqs);
                    }
                    if (null != qr) {
                        dbgInfo.add("QueryReduction", qr.getDbgInfo());
                    }
                    if (null != slt) {
                        dbgInfo.add("SLT", slt.getDbgInfo());

                    }

                    dbgInfo.add("fromcache", fromcache);
                    rsp.add("debug", dbgInfo);
                    timinginfo.add("Debugging parts", System.currentTimeMillis() - lstartTime);
                    dbgInfo.add("timings", timinginfo);
                }
            } catch (Exception e) {
                SolrException.log(SolrCore.log, "Exception during debug", e);
                rsp.add("exception_during_debug", SolrException.toStr(e));
            }
        }
    } catch (Exception e) {
        numErrors++;
        e.printStackTrace();
    } finally {
        totalTime += System.currentTimeMillis() - startTime;
    }

}

From source file:com.searchbox.solr.SenseLikeThisHandlerNoReduction.java

License:Apache License

@Override
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
    SolrParams params = req.getParams();

    if (!keystate) {
        LOGGER.error(/*from   w ww  .  j a  v  a  2  s  . c o  m*/
                "License key failure, not performing sense query. Please email contact@searchbox.com for more information.");
        return;
    }

    int docID;
    // Set field flags
    ReturnFields returnFields = new SolrReturnFields(req);
    rsp.setReturnFields(returnFields);
    int flags = 0;
    if (returnFields.wantsScore()) {
        flags |= SolrIndexSearcher.GET_SCORES;
    }

    String defType = params.get(QueryParsing.DEFTYPE, QParserPlugin.DEFAULT_QTYPE);
    String q = params.get(CommonParams.Q);
    Query query = null;
    SortSpec sortSpec = null;
    List<Query> filters = new ArrayList<Query>();

    try {
        if (q != null) {
            QParser parser = QParser.getParser(q, defType, req);
            query = parser.getQuery();
            sortSpec = parser.getSort(true);
        }

        String[] fqs = req.getParams().getParams(CommonParams.FQ);
        if (fqs != null && fqs.length != 0) {
            for (String fq : fqs) {
                if (fq != null && fq.trim().length() != 0) {
                    QParser fqp = QParser.getParser(fq, null, req);
                    filters.add(fqp.getQuery());
                }
            }
        }
    } catch (Exception e) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
    }

    SolrIndexSearcher searcher = req.getSearcher();
    SchemaField uniqueKeyField = searcher.getSchema().getUniqueKeyField();

    DocListAndSet sltDocs = null;

    // Parse Required Params
    // This will either have a single Reader or valid query
    Reader reader = null;
    try {
        if (q == null || q.trim().length() < 1) {
            Iterable<ContentStream> streams = req.getContentStreams();
            if (streams != null) {
                Iterator<ContentStream> iter = streams.iterator();
                if (iter.hasNext()) {
                    reader = iter.next().getReader();
                }
                if (iter.hasNext()) {
                    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                            "SenseLikeThis does not support multiple ContentStreams");
                }
            }
        }

        int start = params.getInt(CommonParams.START, 0);
        int rows = params.getInt(CommonParams.ROWS, 10);

        // Find documents SenseLikeThis - either with a reader or a query
        // --------------------------------------------------------------------------------
        SenseQuery slt = null;
        if (reader != null) {
            throw new RuntimeException("SLT based on a reader is not yet implemented");
        } else if (q != null) {
            // Matching options
            boolean includeMatch = params.getBool(MoreLikeThisParams.MATCH_INCLUDE, true);
            int matchOffset = params.getInt(MoreLikeThisParams.MATCH_OFFSET, 0);
            // Find the base match

            DocList match = searcher.getDocList(query, null, null, matchOffset, 1, flags); // only get the first one...
            if (includeMatch) {
                rsp.add("match", match);
            }

            // Get docID
            DocIterator iterator = match.iterator();
            docID = iterator.nextDoc();

            BooleanQuery bq = new BooleanQuery();
            Document doc = searcher.getIndexReader().document(docID);
            bq.add(new TermQuery(new Term(uniqueKeyField.getName(),
                    uniqueKeyField.getType().storedToIndexed(doc.getField(uniqueKeyField.getName())))),
                    BooleanClause.Occur.MUST_NOT);
            filters.add(bq);

        } else {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                    "SenseLikeThis requires either a query (?q=) or text to find similar documents.");
        }

        String CKBid = params.get(SenseParams.SENSE_CKB, SenseParams.SENSE_CKB_DEFAULT);

        String senseField = params.get(SenseParams.SENSE_FIELD, SenseParams.DEFAULT_SENSE_FIELD);
        slt = new SenseQuery(new RealTermFreqVector(docID, searcher.getIndexReader(), senseField), senseField,
                CKBid, params.getFloat(SenseParams.SENSE_WEIGHT, SenseParams.DEFAULT_SENSE_WEIGHT), null);

        //Execute the SLT query
        //DocSet filtered = searcher.getDocSet(filters);
        //System.out.println("Number of documents to search:\t" + filtered.size());
        //sltDocs = searcher.getDocListAndSet(slt, filtered, Sort.RELEVANCE, start, rows, flags);
        sltDocs = searcher.getDocListAndSet(slt, filters, Sort.RELEVANCE, start, rows, flags);

    } finally {
        if (reader != null) {
            reader.close();
        }
    }

    if (sltDocs == null) {
        sltDocs = new DocListAndSet(); // avoid NPE
    }
    rsp.add("response", sltDocs.docList);

    // maybe facet the results
    if (params.getBool(FacetParams.FACET, false)) {
        if (sltDocs.docSet == null) {
            rsp.add("facet_counts", null);
        } else {
            SimpleFacets f = new SimpleFacets(req, sltDocs.docSet, params);
            rsp.add("facet_counts", f.getFacetCounts());
        }
    }

    // Debug info, not doing it for the moment.
    boolean dbg = req.getParams().getBool(CommonParams.DEBUG_QUERY, false);

    boolean dbgQuery = false, dbgResults = false;
    if (dbg == false) {//if it's true, we are doing everything anyway.
        String[] dbgParams = req.getParams().getParams(CommonParams.DEBUG);
        if (dbgParams != null) {
            for (int i = 0; i < dbgParams.length; i++) {
                if (dbgParams[i].equals(CommonParams.QUERY)) {
                    dbgQuery = true;
                } else if (dbgParams[i].equals(CommonParams.RESULTS)) {
                    dbgResults = true;
                }
            }
        }
    } else {
        dbgQuery = true;
        dbgResults = true;
    }
    // Copied from StandardRequestHandler... perhaps it should be added to doStandardDebug?
    if (dbg == true) {
        try {

            NamedList<Object> dbgInfo = SolrPluginUtils.doStandardDebug(req, q, query, sltDocs.docList,
                    dbgQuery, dbgResults);
            if (null != dbgInfo) {
                if (null != filters) {
                    dbgInfo.add("filter_queries", req.getParams().getParams(CommonParams.FQ));
                    List<String> fqs = new ArrayList<String>(filters.size());
                    for (Query fq : filters) {
                        fqs.add(QueryParsing.toString(fq, req.getSchema()));
                    }
                    dbgInfo.add("parsed_filter_queries", fqs);
                }
                rsp.add("debug", dbgInfo);
            }
        } catch (Exception e) {
            SolrException.log(SolrCore.log, "Exception during debug", e);
            rsp.add("exception_during_debug", SolrException.toStr(e));
        }
    }
}

From source file:jp.co.atware.solr.geta.GETAssocComponent.java

License:Apache License

/**
 * {@inheritDoc}/* w  ww. jav a  2s .  c o  m*/
 * 
 * @see SearchComponent#process(ResponseBuilder)
 */
@Override
public void process(ResponseBuilder rb) throws IOException {

    NamedList<Object> result = new NamedList<Object>();
    HttpClient client = new HttpClient();

    // ???(geta.settings.process.req[*])
    if (!config.settings.req.isEmpty()) {
        NamedList<Object> paramList = new NamedList<Object>();
        for (Entry<String, QueryType> entry : config.settings.req.entrySet()) {
            String param = entry.getKey();
            NamedList<Object> paramValueList = new NamedList<Object>();
            String[] paramValues = rb.req.getParams().getParams(param);
            if (paramValues != null) {
                for (String paramValue : paramValues) {
                    NamedList<Object> getaResultList = convertResult(postGss3Request(client,
                            convertRequest(rb.req.getParams(), paramValue, entry.getValue())));
                    paramValueList.add(paramValue, getaResultList);
                }
            }
            paramList.add(param, paramValueList);
        }
        result.add("req", paramList);
    }

    // ????(geta.settings.process.doc[*])
    if (!config.settings.doc.isEmpty()) {
        NamedList<Object> docList = new NamedList<Object>();

        SolrIndexSearcher searcher = rb.req.getSearcher();
        IndexSchema schema = searcher.getSchema();
        String key = schema.getUniqueKeyField().getName();
        List<String> targetFieldNames = new ArrayList<String>(config.settings.doc.size() + 1);
        targetFieldNames.add(key);
        targetFieldNames.addAll(config.settings.doc.keySet());
        FieldSelector selector = new MapFieldSelector(targetFieldNames);

        DocIterator iterator = rb.getResults().docList.iterator();
        while (iterator.hasNext()) {
            Document doc = searcher.doc(iterator.next(), selector);
            String docKey = schema.printableUniqueKey(doc);

            NamedList<Object> fieldList = new NamedList<Object>();
            for (Entry<String, QueryType> entry : config.settings.doc.entrySet()) {

                String field = entry.getKey();

                NamedList<Object> queryList = new NamedList<Object>();
                for (Fieldable fieldable : doc.getFieldables(field)) {
                    NamedList<Object> getaResultList = convertResult(postGss3Request(client,
                            convertRequest(rb.req.getParams(), fieldable.stringValue(), entry.getValue())));
                    queryList.add(fieldable.stringValue(), getaResultList);
                }
                fieldList.add(entry.getKey(), queryList);
            }
            docList.add(docKey, fieldList);
        }
        result.add("doc", docList);
    }

    if (result.size() != 0) {
        rb.rsp.add("geta", result);
    }
}

From source file:org.alfresco.solr.query.SolrAuthorityScorer.java

License:Open Source License

public static SolrAuthorityScorer createAuthorityScorer(Weight weight, LeafReaderContext context,
        SolrIndexSearcher searcher, String authority) throws IOException {
    Properties p = searcher.getSchema().getResourceLoader().getCoreProperties();
    boolean doPermissionChecks = Boolean.parseBoolean(p.getProperty("alfresco.doPermissionChecks", "true"));

    Query key = new SolrAuthorityQuery(authority);

    DocSet answer = (DocSet) searcher.cacheLookup(CacheConstants.ALFRESCO_AUTHORITY_CACHE, key);
    if (answer != null) {
        // Answer was in the cache, so return it.
        return new SolrAuthorityScorer(weight, answer, context, searcher);
    }//from   w w w. j a va  2 s. c om

    // Answer was not in cache, so build the results, cache and return.        
    final HashSet<String> globalReaders = GlobalReaders.getReaders();

    if (globalReaders.contains(authority) || (doPermissionChecks == false)) {
        // can read all
        DocSet allDocs = searcher.getDocSet(new MatchAllDocsQuery());
        return new SolrAuthorityScorer(weight, allDocs, context, searcher);
    }

    // Docs for which the authority has explicit read access.
    DocSet readableDocSet = searcher.getDocSet(new SolrReaderQuery(authority));

    // Are all doc owners granted read permissions at a global level?
    if (globalReaders.contains(PermissionService.OWNER_AUTHORITY)) {
        // Get the set of docs owned by the authority (which they can therefore read).
        DocSet authorityOwnedDocs = searcher.getDocSet(new SolrOwnerQuery(authority));
        // Final set of docs that the authority can read.
        DocSet toCache = readableDocSet.union(authorityOwnedDocs);
        searcher.cacheInsert(CacheConstants.ALFRESCO_AUTHORITY_CACHE, key, toCache);
        return new SolrAuthorityScorer(weight, toCache, context, searcher);
    } else {
        // for that docs I own that have owner Read rights
        DocSet ownerReadableDocSet = searcher.getDocSet(new SolrReaderQuery(PermissionService.OWNER_AUTHORITY));
        DocSet authorityOwnedDocs = searcher.getDocSet(new SolrOwnerQuery(authority));

        // Docs where the authority is an owner and where owners have read rights.
        DocSet docsAuthorityOwnsAndCanRead = ownerReadableDocSet.intersection(authorityOwnedDocs);
        // Final set of docs that the authority can read.
        DocSet toCache = readableDocSet.union(docsAuthorityOwnsAndCanRead);
        searcher.cacheInsert(CacheConstants.ALFRESCO_AUTHORITY_CACHE, key, toCache);
        return new SolrAuthorityScorer(weight, toCache, context, searcher);
    }
}

From source file:org.alfresco.solr.query.SolrAuthoritySetQuery.java

License:Open Source License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
    if (!(searcher instanceof SolrIndexSearcher)) {
        throw new IllegalStateException("Must have a SolrIndexSearcher");
    }/*w  w  w.  j  av  a  2  s  . c  o m*/

    String[] auths = authorities.substring(1).split(authorities.substring(0, 1));

    SolrIndexSearcher solrIndexSearcher = (SolrIndexSearcher) searcher;
    Properties p = solrIndexSearcher.getSchema().getResourceLoader().getCoreProperties();
    boolean doPermissionChecks = Boolean.parseBoolean(p.getProperty("alfresco.doPermissionChecks", "true"));

    boolean hasGlobalRead = false;

    final HashSet<String> globalReaders = GlobalReaders.getReaders();

    for (String auth : auths) {
        if (globalReaders.contains(auth)) {
            hasGlobalRead = true;
            break;
        }
    }

    if (hasGlobalRead || (doPermissionChecks == false)) {
        return new MatchAllDocsQuery().createWeight(searcher, needsScores);
    }

    BitsFilter readFilter = getACLFilter(auths, QueryConstants.FIELD_READER, solrIndexSearcher);
    BitsFilter ownerFilter = getOwnerFilter(auths, solrIndexSearcher);

    if (globalReaders.contains(PermissionService.OWNER_AUTHORITY)) {
        readFilter.or(ownerFilter);
        return new ConstantScoreQuery(readFilter).createWeight(searcher, needsScores);
    } else {
        String[] ownerAuth = { PermissionService.OWNER_AUTHORITY };
        BitsFilter ownerReadFilter = getACLFilter(ownerAuth, QueryConstants.FIELD_READER, solrIndexSearcher);
        ownerReadFilter.and(ownerFilter);
        readFilter.or(ownerReadFilter);
        return new ConstantScoreQuery(readFilter).createWeight(searcher, needsScores);
    }
}

From source file:org.alfresco.solr.query.SolrAuthoritySetQuery.java

License:Open Source License

public DelegatingCollector getFilterCollector(IndexSearcher searcher) {

    String[] auths = authorities.substring(1).split(authorities.substring(0, 1));

    SolrIndexSearcher solrIndexSearcher = (SolrIndexSearcher) searcher;
    Properties p = solrIndexSearcher.getSchema().getResourceLoader().getCoreProperties();
    boolean doPermissionChecks = Boolean.parseBoolean(p.getProperty("alfresco.doPermissionChecks", "true"));
    boolean hasGlobalRead = false;

    final HashSet<String> globalReaders = GlobalReaders.getReaders();

    for (String auth : auths) {
        if (globalReaders.contains(auth)) {
            hasGlobalRead = true;/* w ww . j av a 2 s  . c o m*/
            break;
        }
    }

    if (hasGlobalRead || (doPermissionChecks == false)) {
        return new AllAccessCollector();
    }

    try {

        /*
        *  Collect the ACLID's that match the authorities.
        *  This is done by querying the ACL records in the index. See the method for more
        *  documentation on this query.
        */

        HybridBitSet aclSet = getACLSet(auths, QueryConstants.FIELD_READER, solrIndexSearcher);

        /*
        * Collect the documents that the user owns.
        */

        BitsFilter ownerFilter = getOwnerFilter(auths, solrIndexSearcher);

        if (globalReaders.contains(PermissionService.OWNER_AUTHORITY)) {
            return new AccessControlCollector(aclSet, ownerFilter);
        } else {
            String[] ownerAuth = { PermissionService.OWNER_AUTHORITY };
            HybridBitSet ownerAclSet = getACLSet(ownerAuth, QueryConstants.FIELD_READER, solrIndexSearcher);
            return new AccessControlCollectorWithoutOwnerRead(aclSet, ownerAclSet, ownerFilter);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:org.alfresco.solr.query.SolrAuthoritySetScorer.java

License:Open Source License

public static SolrAuthoritySetScorer createAuthoritySetScorer(Weight weight, LeafReaderContext context,
        SolrIndexSearcher searcher, String authorities) throws IOException {
    Properties p = searcher.getSchema().getResourceLoader().getCoreProperties();
    boolean doPermissionChecks = Boolean.parseBoolean(p.getProperty("alfresco.doPermissionChecks", "true"));

    Query key = new SolrAuthoritySetQuery(authorities);

    DocSet answer = (DocSet) searcher.cacheLookup(CacheConstants.ALFRESCO_AUTHORITY_CACHE, key);
    if (answer != null) {
        // Answer was in the cache, so return it.
        return new SolrAuthoritySetScorer(weight, answer, context, searcher);
    }/*w  w  w  .  j  a v  a 2  s.  c o  m*/

    // Answer was not in cache, so build the results, cache and return.
    String[] auths = authorities.substring(1).split(authorities.substring(0, 1));

    boolean hasGlobalRead = false;

    final HashSet<String> globalReaders = GlobalReaders.getReaders();

    for (String auth : auths) {
        if (globalReaders.contains(auth)) {
            hasGlobalRead = true;
            break;
        }

    }

    if (hasGlobalRead || (doPermissionChecks == false)) {
        // can read all
        WrappedQuery wrapped = new WrappedQuery(new MatchAllDocsQuery());
        wrapped.setCache(false);
        DocSet allDocs = searcher.getDocSet(wrapped);
        return new SolrAuthoritySetScorer(weight, allDocs, context, searcher);
    }

    // Docs for which the authorities have explicit read access.
    WrappedQuery wrapped;
    wrapped = new WrappedQuery(new SolrReaderSetQuery(authorities));
    wrapped.setCache(false);
    DocSet readableDocSet = searcher.getDocSet(wrapped);

    // Are all doc owners granted read permissions at a global level?
    if (globalReaders.contains(PermissionService.OWNER_AUTHORITY)) {
        // Get the set of docs owned by the authorities (which they can therefore read).
        wrapped = new WrappedQuery(new SolrOwnerSetQuery(authorities));
        wrapped.setCache(false);
        DocSet authorityOwnedDocs = searcher.getDocSet(wrapped);
        // Final set of docs that the authorities can read.
        DocSet toCache = readableDocSet.union(authorityOwnedDocs);
        searcher.cacheInsert(CacheConstants.ALFRESCO_AUTHORITY_CACHE, key, toCache);
        return new SolrAuthoritySetScorer(weight, toCache, context, searcher);
    } else {
        // for that docs I own that have owner Read rights
        wrapped = new WrappedQuery(new SolrReaderSetQuery("|" + PermissionService.OWNER_AUTHORITY));
        wrapped.setCache(false);
        DocSet ownerReadableDocSet = searcher.getDocSet(wrapped);
        wrapped = new WrappedQuery(new SolrOwnerSetQuery(authorities));
        wrapped.setCache(false);
        DocSet authorityOwnedDocs = searcher.getDocSet(wrapped);

        // Docs where the authority is an owner and where owners have read rights.
        DocSet docsAuthorityOwnsAndCanRead = ownerReadableDocSet.intersection(authorityOwnedDocs);
        // Final set of docs that the authorities can read.
        DocSet toCache = readableDocSet.union(docsAuthorityOwnsAndCanRead);
        searcher.cacheInsert(CacheConstants.ALFRESCO_AUTHORITY_CACHE, key, toCache);
        return new SolrAuthoritySetScorer(weight, toCache, context, searcher);
    }
}