Example usage for org.apache.solr.client.solrj SolrQuery setShowDebugInfo

List of usage examples for org.apache.solr.client.solrj SolrQuery setShowDebugInfo

Introduction

In this page you can find the example usage for org.apache.solr.client.solrj SolrQuery setShowDebugInfo.

Prototype

public SolrQuery setShowDebugInfo(boolean showDebugInfo) 

Source Link

Usage

From source file:org.sleuthkit.autopsy.keywordsearch.AccountsText.java

License:Open Source License

/**
 * Initialize this object with information about which pages/chunks have
 * hits. Multiple calls will not change the initial results.
 *//*from  w  w w. jav  a2s  .com*/
synchronized private void loadPageInfo() {
    if (isPageInfoLoaded) {
        return;
    }
    if (chunkId != null) {//if a chunk is specified, only show that chunk/page
        this.numberPagesForFile = 1;
        this.currentPage = chunkId;
        this.numberOfHitsPerPage.put(chunkId, 0);
        this.pages.add(chunkId);
        this.currentHitPerPage.put(chunkId, 0);
    } else {
        try {
            this.numberPagesForFile = solrServer.queryNumFileChunks(this.solrObjectId);
        } catch (KeywordSearchModuleException | NoOpenCoreException ex) {
            LOGGER.log(Level.WARNING, "Could not get number pages for content " + this.solrDocumentId, ex); //NON-NLS
            return;
        }

        //if has chunks, get pages with hits
        TreeSet<Integer> sortedPagesWithHits = new TreeSet<>();
        SolrQuery q = new SolrQuery();
        q.setShowDebugInfo(DEBUG); //debug
        q.setQuery(queryString);
        q.setFields(Server.Schema.ID.toString()); //for this case we only need the document ids
        q.addFilterQuery(
                Server.Schema.ID.toString() + ":" + this.solrObjectId + Server.CHUNK_ID_SEPARATOR + "*");

        try {
            QueryResponse response = solrServer.query(q, METHOD.POST);
            for (SolrDocument resultDoc : response.getResults()) {
                final String resultDocumentId = resultDoc.getFieldValue(Server.Schema.ID.toString()).toString();
                // Put the solr chunk id in the map
                String resultChunkID = StringUtils.substringAfter(resultDocumentId, Server.CHUNK_ID_SEPARATOR);
                if (StringUtils.isNotBlank(resultChunkID)) {
                    sortedPagesWithHits.add(Integer.parseInt(resultChunkID));
                } else {
                    sortedPagesWithHits.add(0);
                }
            }

        } catch (KeywordSearchModuleException | NoOpenCoreException | NumberFormatException ex) {
            LOGGER.log(Level.WARNING, "Error executing Solr highlighting query: " + keywords, ex); //NON-NLS
        }

        //set page to first page having highlights
        if (sortedPagesWithHits.isEmpty()) {
            this.currentPage = 0;
        } else {
            this.currentPage = sortedPagesWithHits.first();
        }

        for (Integer page : sortedPagesWithHits) {
            numberOfHitsPerPage.put(page, 0); //unknown number of matches in the page
            pages.add(page);
            currentHitPerPage.put(page, 0); //set current hit to 0th
        }
    }

    isPageInfoLoaded = true;
}

From source file:org.sleuthkit.autopsy.keywordsearch.AccountsText.java

License:Open Source License

@Override
@NbBundle.Messages({ "AccountsText.getMarkup.noMatchMsg="
        + "<html><pre><span style\\\\='background\\\\:yellow'>There were no keyword hits on this page. <br />"
        + "The keyword could have been in the file name."
        + " <br />Advance to another page if present, or to view the original text, choose File Text"
        + " <br />in the drop down menu to the right...</span></pre></html>",
        "AccountsText.getMarkup.queryFailedMsg="
                + "<html><pre><span style\\\\='background\\\\:yellow'>Failed to retrieve keyword hit results."
                + " <br />Confirm that Autopsy can connect to the Solr server. "
                + "<br /></span></pre></html>" })
public String getText() {
    loadPageInfo(); //inits once

    SolrQuery q = new SolrQuery();
    q.setShowDebugInfo(DEBUG); //debug
    q.addHighlightField(HIGHLIGHT_FIELD);
    q.setQuery(queryString);/* w  w w.java  2 s .c  o m*/

    //set the documentID filter
    String queryDocumentID = this.solrObjectId + Server.CHUNK_ID_SEPARATOR + this.currentPage;
    q.addFilterQuery(Server.Schema.ID.toString() + ":" + queryDocumentID);

    //configure the highlighter
    q.setParam("hl.useFastVectorHighlighter", "true"); //fast highlighter scales better than standard one NON-NLS
    q.setParam("hl.tag.pre", HIGHLIGHT_PRE); //makes sense for FastVectorHighlighter only NON-NLS
    q.setParam("hl.tag.post", HIGHLIGHT_POST); //makes sense for FastVectorHighlighter only NON-NLS
    q.setParam("hl.fragListBuilder", "single"); //makes sense for FastVectorHighlighter only NON-NLS
    q.setParam("hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); //docs says makes sense for the original Highlighter only, but not really //NON-NLS

    try {
        //extract highlighting and bail early on null responses
        Map<String, Map<String, List<String>>> highlightingPerDocument = solrServer.query(q, METHOD.POST)
                .getHighlighting();
        Map<String, List<String>> highlightingPerField = highlightingPerDocument.get(queryDocumentID);
        if (highlightingPerField == null) {
            return Bundle.AccountsText_getMarkup_noMatchMsg();
        }
        List<String> highlights = highlightingPerField.get(HIGHLIGHT_FIELD);
        if (highlights == null) {
            return Bundle.AccountsText_getMarkup_noMatchMsg();
        }

        //There should only be one item
        String highlighting = highlights.get(0).trim();

        /*
         * use regex matcher to iterate over occurences of HIGHLIGHT_PRE,
         * and prepend them with an anchor tag.
         */
        Matcher m = ANCHOR_DETECTION_PATTERN.matcher(highlighting);
        StringBuffer sb = new StringBuffer(highlighting.length());
        int count = 0;
        while (m.find()) {
            count++;
            m.appendReplacement(sb, INSERT_PREFIX + count + INSERT_POSTFIX);
        }
        m.appendTail(sb);

        //store total hits for this page, now that we know it
        this.numberOfHitsPerPage.put(this.currentPage, count);
        if (this.currentItem() == 0 && this.hasNextItem()) {
            this.nextItem();
        }

        // extracted content (minus highlight tags) is HTML-escaped
        return "<html><pre>" + sb.toString() + "</pre></html>"; //NON-NLS
    } catch (Exception ex) {
        LOGGER.log(Level.WARNING, "Error executing Solr highlighting query: " + keywords, ex); //NON-NLS
        return Bundle.AccountsText_getMarkup_queryFailedMsg();
    }
}

From source file:org.sleuthkit.autopsy.keywordsearch.HighlightedMatchesSource.java

License:Open Source License

@Override
public String getMarkup() {
    init(); //inits once

    String highLightField = null;

    String highlightQuery = keywordHitQuery;

    if (isRegex) {
        highLightField = LuceneQuery.HIGHLIGHT_FIELD_REGEX;
        //escape special lucene chars if not already escaped (if not a compound query)
        //TODO a better way to mark it a compound highlight query
        final String findSubstr = LuceneQuery.HIGHLIGHT_FIELD_REGEX + ":";
        if (!highlightQuery.contains(findSubstr)) {
            highlightQuery = KeywordSearchUtil.escapeLuceneQuery(highlightQuery);
        }/*w  ww.  jav  a 2s. c o m*/
    } else {
        highLightField = LuceneQuery.HIGHLIGHT_FIELD_LITERAL;
        //escape special lucene chars always for literal queries query
        highlightQuery = KeywordSearchUtil.escapeLuceneQuery(highlightQuery);
    }

    SolrQuery q = new SolrQuery();
    q.setShowDebugInfo(DEBUG); //debug

    String queryStr = null;

    if (isRegex) {
        StringBuilder sb = new StringBuilder();
        sb.append(highLightField).append(":");
        if (group) {
            sb.append("\"");
        }
        sb.append(highlightQuery);
        if (group) {
            sb.append("\"");
        }
        queryStr = sb.toString();
    } else {
        //use default field, simplifies query
        //always force grouping/quotes
        queryStr = KeywordSearchUtil.quoteQuery(highlightQuery);
    }

    q.setQuery(queryStr);

    final long contentId = content.getId();

    String contentIdStr = Long.toString(contentId);
    if (hasChunks) {
        contentIdStr += "_" + Integer.toString(this.currentPage);
    }

    final String filterQuery = Server.Schema.ID.toString() + ":" + contentIdStr;
    q.addFilterQuery(filterQuery);
    q.addHighlightField(highLightField); //for exact highlighting, try content_ws field (with stored="true" in Solr schema)

    //q.setHighlightSimplePre(HIGHLIGHT_PRE); //original highlighter only
    //q.setHighlightSimplePost(HIGHLIGHT_POST); //original highlighter only
    q.setHighlightFragsize(0); // don't fragment the highlight, works with original highlighter, or needs "single" list builder with FVH

    //tune the highlighter
    q.setParam("hl.useFastVectorHighlighter", "on"); //fast highlighter scales better than standard one
    q.setParam("hl.tag.pre", HIGHLIGHT_PRE); //makes sense for FastVectorHighlighter only
    q.setParam("hl.tag.post", HIGHLIGHT_POST); //makes sense for FastVectorHighlighter only
    q.setParam("hl.fragListBuilder", "single"); //makes sense for FastVectorHighlighter only

    //docs says makes sense for the original Highlighter only, but not really
    q.setParam("hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED);

    try {
        QueryResponse response = solrServer.query(q, METHOD.POST);
        Map<String, Map<String, List<String>>> responseHighlight = response.getHighlighting();

        Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIdStr);
        if (responseHighlightID == null) {
            return NO_MATCHES;
        }
        List<String> contentHighlights = responseHighlightID.get(highLightField);
        if (contentHighlights == null) {
            return NO_MATCHES;
        } else {
            // extracted content (minus highlight tags) is HTML-escaped
            String highlightedContent = contentHighlights.get(0).trim();
            highlightedContent = insertAnchors(highlightedContent);

            return "<html><pre>" + highlightedContent + "</pre></html>";
        }
    } catch (NoOpenCoreException ex) {
        logger.log(Level.WARNING, "Couldn't query markup for page: " + currentPage, ex);
        return "";
    } catch (KeywordSearchModuleException ex) {
        logger.log(Level.WARNING, "Could not query markup for page: " + currentPage, ex);
        return "";
    }
}

From source file:org.sleuthkit.autopsy.keywordsearch.HighlightedText.java

License:Open Source License

@Override
public String getText() {
    loadPageInfo(); //inits once

    String highLightField = null;

    if (isRegex) {
        highLightField = LuceneQuery.HIGHLIGHT_FIELD_REGEX;
    } else {// w ww.j a  va2s .com
        highLightField = LuceneQuery.HIGHLIGHT_FIELD_LITERAL;
    }

    SolrQuery q = new SolrQuery();
    q.setShowDebugInfo(DEBUG); //debug

    // input query has already been properly constructed and escaped
    q.setQuery(keywordHitQuery);

    String contentIdStr = Long.toString(this.objectId);
    if (hasChunks) {
        contentIdStr += "_" + Integer.toString(this.currentPage);
    }

    final String filterQuery = Server.Schema.ID.toString() + ":"
            + KeywordSearchUtil.escapeLuceneQuery(contentIdStr);
    q.addFilterQuery(filterQuery);
    q.addHighlightField(highLightField); //for exact highlighting, try content_ws field (with stored="true" in Solr schema)

    //q.setHighlightSimplePre(HIGHLIGHT_PRE); //original highlighter only
    //q.setHighlightSimplePost(HIGHLIGHT_POST); //original highlighter only
    q.setHighlightFragsize(0); // don't fragment the highlight, works with original highlighter, or needs "single" list builder with FVH

    //tune the highlighter
    q.setParam("hl.useFastVectorHighlighter", "on"); //fast highlighter scales better than standard one NON-NLS
    q.setParam("hl.tag.pre", HIGHLIGHT_PRE); //makes sense for FastVectorHighlighter only NON-NLS
    q.setParam("hl.tag.post", HIGHLIGHT_POST); //makes sense for FastVectorHighlighter only NON-NLS
    q.setParam("hl.fragListBuilder", "single"); //makes sense for FastVectorHighlighter only NON-NLS

    //docs says makes sense for the original Highlighter only, but not really
    q.setParam("hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); //NON-NLS

    try {
        QueryResponse response = solrServer.query(q, METHOD.POST);
        Map<String, Map<String, List<String>>> responseHighlight = response.getHighlighting();

        Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIdStr);
        if (responseHighlightID == null) {
            return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.getMarkup.noMatchMsg");
        }
        List<String> contentHighlights = responseHighlightID.get(highLightField);
        if (contentHighlights == null) {
            return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.getMarkup.noMatchMsg");
        } else {
            // extracted content (minus highlight tags) is HTML-escaped
            String highlightedContent = contentHighlights.get(0).trim();
            highlightedContent = insertAnchors(highlightedContent);

            return "<html><pre>" + highlightedContent + "</pre></html>"; //NON-NLS
        }
    } catch (Exception ex) {
        logger.log(Level.WARNING, "Error executing Solr highlighting query: " + keywordHitQuery, ex); //NON-NLS
        return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.getMarkup.queryFailedMsg");
    }
}

From source file:org.sleuthkit.autopsy.keywordsearch.HighlightedTextMarkup.java

License:Open Source License

@Override
public String getMarkup() {
    loadPageInfo(); //inits once

    String highLightField = null;

    String highlightQuery = keywordHitQuery;

    if (isRegex) {
        highLightField = LuceneQuery.HIGHLIGHT_FIELD_REGEX;
        //escape special lucene chars if not already escaped (if not a compound query)
        //TODO a better way to mark it a compound highlight query
        final String findSubstr = LuceneQuery.HIGHLIGHT_FIELD_REGEX + ":";
        if (!highlightQuery.contains(findSubstr)) {
            highlightQuery = KeywordSearchUtil.escapeLuceneQuery(highlightQuery);
        }/* w  w  w  .j  av a2s . c  o m*/
    } else {
        highLightField = LuceneQuery.HIGHLIGHT_FIELD_LITERAL;
        //escape special lucene chars always for literal queries query
        highlightQuery = KeywordSearchUtil.escapeLuceneQuery(highlightQuery);
    }

    SolrQuery q = new SolrQuery();
    q.setShowDebugInfo(DEBUG); //debug

    String queryStr = null;

    if (isRegex) {
        StringBuilder sb = new StringBuilder();
        sb.append(highLightField).append(":");
        if (group) {
            sb.append("\"");
        }
        sb.append(highlightQuery);
        if (group) {
            sb.append("\"");
        }
        queryStr = sb.toString();
    } else {
        //use default field, simplifies query
        //always force grouping/quotes
        queryStr = KeywordSearchUtil.quoteQuery(highlightQuery);
    }

    q.setQuery(queryStr);

    String contentIdStr = Long.toString(this.objectId);
    if (hasChunks) {
        contentIdStr += "_" + Integer.toString(this.currentPage);
    }

    final String filterQuery = Server.Schema.ID.toString() + ":"
            + KeywordSearchUtil.escapeLuceneQuery(contentIdStr);
    q.addFilterQuery(filterQuery);
    q.addHighlightField(highLightField); //for exact highlighting, try content_ws field (with stored="true" in Solr schema)

    //q.setHighlightSimplePre(HIGHLIGHT_PRE); //original highlighter only
    //q.setHighlightSimplePost(HIGHLIGHT_POST); //original highlighter only
    q.setHighlightFragsize(0); // don't fragment the highlight, works with original highlighter, or needs "single" list builder with FVH

    //tune the highlighter
    q.setParam("hl.useFastVectorHighlighter", "on"); //fast highlighter scales better than standard one NON-NLS
    q.setParam("hl.tag.pre", HIGHLIGHT_PRE); //makes sense for FastVectorHighlighter only NON-NLS
    q.setParam("hl.tag.post", HIGHLIGHT_POST); //makes sense for FastVectorHighlighter only NON-NLS
    q.setParam("hl.fragListBuilder", "single"); //makes sense for FastVectorHighlighter only NON-NLS

    //docs says makes sense for the original Highlighter only, but not really
    q.setParam("hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); //NON-NLS

    try {
        QueryResponse response = solrServer.query(q, METHOD.POST);
        Map<String, Map<String, List<String>>> responseHighlight = response.getHighlighting();

        Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIdStr);
        if (responseHighlightID == null) {
            return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.getMarkup.noMatchMsg");

        }
        List<String> contentHighlights = responseHighlightID.get(highLightField);
        if (contentHighlights == null) {
            return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.getMarkup.noMatchMsg");
        } else {
            // extracted content (minus highlight tags) is HTML-escaped
            String highlightedContent = contentHighlights.get(0).trim();
            highlightedContent = insertAnchors(highlightedContent);

            return "<html><pre>" + highlightedContent + "</pre></html>"; //NON-NLS
        }
    } catch (NoOpenCoreException ex) {
        logger.log(Level.WARNING, "Couldn't query markup for page: " + currentPage, ex); //NON-NLS
        return "";
    } catch (KeywordSearchModuleException ex) {
        logger.log(Level.WARNING, "Could not query markup for page: " + currentPage, ex); //NON-NLS
        return "";
    }
}

From source file:org.sleuthkit.autopsy.keywordsearch.LuceneQuery.java

License:Open Source License

/**
 * Create the query object for the stored keyword
 *
 * @param snippets True if query should request snippets
 * @return//from   w  w w . j a va 2  s  .  c o  m
 */
private SolrQuery createAndConfigureSolrQuery(boolean snippets) {
    SolrQuery q = new SolrQuery();
    q.setShowDebugInfo(DEBUG); //debug
    //set query, force quotes/grouping around all literal queries
    final String groupedQuery = KeywordSearchUtil.quoteQuery(keywordStringEscaped);
    String theQueryStr = groupedQuery;
    if (field != null) {
        //use the optional field
        StringBuilder sb = new StringBuilder();
        sb.append(field).append(":").append(groupedQuery);
        theQueryStr = sb.toString();
    }
    q.setQuery(theQueryStr);
    q.setRows(MAX_RESULTS);

    if (snippets) {
        q.setFields(Server.Schema.ID.toString());
    } else {
        q.setFields(Server.Schema.ID.toString());
    }

    for (KeywordQueryFilter filter : filters) {
        q.addFilterQuery(filter.toString());
    }

    if (snippets) {
        q.addHighlightField(Server.Schema.TEXT.toString());
        //q.setHighlightSimplePre("&laquo;"); //original highlighter only
        //q.setHighlightSimplePost("&raquo;");  //original highlighter only
        q.setHighlightSnippets(1);
        q.setHighlightFragsize(SNIPPET_LENGTH);

        //tune the highlighter
        q.setParam("hl.useFastVectorHighlighter", "on"); //fast highlighter scales better than standard one NON-NLS
        q.setParam("hl.tag.pre", "&laquo;"); //makes sense for FastVectorHighlighter only NON-NLS
        q.setParam("hl.tag.post", "&laquo;"); //makes sense for FastVectorHighlighter only NON-NLS
        q.setParam("hl.fragListBuilder", "simple"); //makes sense for FastVectorHighlighter only NON-NLS

        //Solr bug if fragCharSize is smaller than Query string, StringIndexOutOfBoundsException is thrown.
        q.setParam("hl.fragCharSize", Integer.toString(theQueryStr.length())); //makes sense for FastVectorHighlighter only NON-NLS

        //docs says makes sense for the original Highlighter only, but not really
        //analyze all content SLOW! consider lowering
        q.setParam("hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); //NON-NLS
    }

    return q;
}

From source file:org.sleuthkit.autopsy.keywordsearch.LuceneQuery.java

License:Open Source License

/**
 * return snippet preview context//www.ja  v a  2  s .  c om
 *
 * @param query the keyword query for text to highlight. Lucene special
 * cahrs should already be escaped.
 * @param solrObjectId Solr object id associated with the hit
 * @param chunkID chunk id associated with the content hit, or 0 if no
 * chunks
 * @param isRegex whether the query is a regular expression (different Solr
 * fields are then used to generate the preview)
 * @param group whether the query should look for all terms grouped together
 * in the query order, or not
 * @return
 */
public static String querySnippet(String query, long solrObjectId, int chunkID, boolean isRegex, boolean group)
        throws NoOpenCoreException {
    Server solrServer = KeywordSearch.getServer();

    String highlightField;
    if (isRegex) {
        highlightField = LuceneQuery.HIGHLIGHT_FIELD_REGEX;
    } else {
        highlightField = LuceneQuery.HIGHLIGHT_FIELD_LITERAL;
    }

    SolrQuery q = new SolrQuery();

    String queryStr;

    if (isRegex) {
        StringBuilder sb = new StringBuilder();
        sb.append(highlightField).append(":");
        if (group) {
            sb.append("\"");
        }
        sb.append(query);
        if (group) {
            sb.append("\"");
        }

        queryStr = sb.toString();
    } else {
        //simplify query/escaping and use default field
        //always force grouping/quotes
        queryStr = KeywordSearchUtil.quoteQuery(query);
    }

    q.setQuery(queryStr);

    String contentIDStr;

    if (chunkID == 0) {
        contentIDStr = Long.toString(solrObjectId);
    } else {
        contentIDStr = Server.getChunkIdString(solrObjectId, chunkID);
    }

    String idQuery = Server.Schema.ID.toString() + ":" + KeywordSearchUtil.escapeLuceneQuery(contentIDStr);
    q.setShowDebugInfo(DEBUG); //debug
    q.addFilterQuery(idQuery);
    q.addHighlightField(highlightField);
    //q.setHighlightSimplePre("&laquo;"); //original highlighter only
    //q.setHighlightSimplePost("&raquo;");  //original highlighter only
    q.setHighlightSnippets(1);
    q.setHighlightFragsize(SNIPPET_LENGTH);

    //tune the highlighter
    q.setParam("hl.useFastVectorHighlighter", "on"); //fast highlighter scales better than standard one NON-NLS
    q.setParam("hl.tag.pre", "&laquo;"); //makes sense for FastVectorHighlighter only NON-NLS
    q.setParam("hl.tag.post", "&laquo;"); //makes sense for FastVectorHighlighter only NON-NLS
    q.setParam("hl.fragListBuilder", "simple"); //makes sense for FastVectorHighlighter only NON-NLS

    //Solr bug if fragCharSize is smaller than Query string, StringIndexOutOfBoundsException is thrown.
    q.setParam("hl.fragCharSize", Integer.toString(queryStr.length())); //makes sense for FastVectorHighlighter only NON-NLS

    //docs says makes sense for the original Highlighter only, but not really
    //analyze all content SLOW! consider lowering
    q.setParam("hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); //NON-NLS

    try {
        QueryResponse response = solrServer.query(q, METHOD.POST);
        Map<String, Map<String, List<String>>> responseHighlight = response.getHighlighting();
        Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIDStr);
        if (responseHighlightID == null) {
            return "";
        }
        List<String> contentHighlights = responseHighlightID.get(highlightField);
        if (contentHighlights == null) {
            return "";
        } else {
            // extracted content is HTML-escaped, but snippet goes in a plain text field
            return EscapeUtil.unEscapeHtml(contentHighlights.get(0)).trim();
        }
    } catch (NoOpenCoreException ex) {
        logger.log(Level.WARNING, "Error executing Lucene Solr Query: " + query, ex); //NON-NLS
        throw ex;
    } catch (KeywordSearchModuleException ex) {
        logger.log(Level.WARNING, "Error executing Lucene Solr Query: " + query, ex); //NON-NLS
        return "";
    }
}

From source file:org.sleuthkit.autopsy.keywordsearch.TermComponentQuery.java

License:Open Source License

@Override
public QueryResults performQuery() throws NoOpenCoreException {

    final SolrQuery q = createQuery();
    q.setShowDebugInfo(DEBUG);
    q.setTermsLimit(MAX_TERMS_RESULTS);//  w  w w  .j av a  2s .c o  m
    logger.log(Level.INFO, "Query: {0}", q.toString()); //NON-NLS
    terms = executeQuery(q);

    QueryResults results = new QueryResults(this, keywordList);
    int resultSize = 0;

    for (Term term : terms) {
        final String termStr = KeywordSearchUtil.escapeLuceneQuery(term.getTerm());

        LuceneQuery filesQuery = new LuceneQuery(keywordList, new Keyword(termStr, true));

        //filesQuery.setField(TERMS_SEARCH_FIELD);
        for (KeywordQueryFilter filter : filters) {
            //set filter
            //note: we can't set filter query on terms query
            //but setting filter query on terms results query will yield the same result
            filesQuery.addFilter(filter);
        }
        try {
            QueryResults subResults = filesQuery.performQuery();
            Set<KeywordHit> filesResults = new HashSet<>();
            for (Keyword key : subResults.getKeywords()) {
                List<KeywordHit> keyRes = subResults.getResults(key);
                resultSize += keyRes.size();
                filesResults.addAll(keyRes);
            }
            results.addResult(new Keyword(term.getTerm(), false), new ArrayList<>(filesResults));
        } catch (NoOpenCoreException e) {
            logger.log(Level.WARNING, "Error executing Solr query,", e); //NON-NLS
            throw e;
        } catch (RuntimeException e) {
            logger.log(Level.WARNING, "Error executing Solr query,", e); //NON-NLS
        }

    }

    //TODO limit how many results we store, not to hit memory limits
    logger.log(Level.INFO, "Regex # results: {0}", resultSize); //NON-NLS

    return results;
}

From source file:org.sleuthkit.autopsy.keywordsearch.TermsComponentQuery.java

License:Open Source License

/**
 * Executes the regex query as a two step operation. In the first step, the
 * Solr terms component is used to find any terms in the index that match
 * the regex. In the second step, term queries are executed for each matched
 * term to produce the set of keyword hits for the regex.
 *
 * @return A QueryResult object or null.
 *
 * @throws NoOpenCoreException// w  w  w.  ja  v  a2  s  . com
 */
@Override
public QueryResults performQuery() throws KeywordSearchModuleException, NoOpenCoreException {
    /*
     * Do a query using the Solr terms component to find any terms in the
     * index that match the regex.
     */
    final SolrQuery termsQuery = new SolrQuery();
    termsQuery.setRequestHandler(SEARCH_HANDLER);
    termsQuery.setTerms(true);
    termsQuery.setTermsRegexFlag(CASE_INSENSITIVE);
    termsQuery.setTermsRegex(searchTerm);
    termsQuery.addTermsField(SEARCH_FIELD);
    termsQuery.setTimeAllowed(TERMS_SEARCH_TIMEOUT);
    termsQuery.setShowDebugInfo(DEBUG_FLAG);
    termsQuery.setTermsLimit(MAX_TERMS_QUERY_RESULTS);
    List<Term> terms = KeywordSearch.getServer().queryTerms(termsQuery).getTerms(SEARCH_FIELD);
    /*
     * Do a term query for each term that matched the regex.
     */
    QueryResults results = new QueryResults(this, keywordList);
    for (Term term : terms) {
        /*
         * If searching for credit card account numbers, do a Luhn check on
         * the term and discard it if it does not pass.
         */
        if (keyword.getArtifactAttributeType() == ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
            Matcher matcher = CREDIT_CARD_NUM_PATTERN.matcher(term.getTerm());
            matcher.find();
            final String ccn = CharMatcher.anyOf(" -").removeFrom(matcher.group("ccn"));
            if (false == CREDIT_CARD_NUM_LUHN_CHECK.isValid(ccn)) {
                continue;
            }
        }

        /*
         * Do an ordinary query with the escaped term and convert the query
         * results into a single list of keyword hits without duplicates.
         *
         * Note that the filters field appears to be unused. There is an old
         * comment here, what does it mean? "Note: we can't set filter query
         * on terms query but setting filter query on fileResults query will
         * yield the same result." The filter is NOT being added to the term
         * query.
         */
        String escapedTerm = KeywordSearchUtil.escapeLuceneQuery(term.getTerm());
        LuceneQuery termQuery = new LuceneQuery(keywordList, new Keyword(escapedTerm, true));
        filters.forEach(termQuery::addFilter); // This appears to be unused
        QueryResults termQueryResult = termQuery.performQuery();
        Set<KeywordHit> termHits = new HashSet<>();
        for (Keyword word : termQueryResult.getKeywords()) {
            termHits.addAll(termQueryResult.getResults(word));
        }
        results.addResult(new Keyword(term.getTerm(), false), new ArrayList<>(termHits));
    }
    return results;
}