net.yacy.cora.federate.solr.responsewriter.GrepHTMLResponseWriter.java Source code

Introduction

Here is the source code for net.yacy.cora.federate.solr.responsewriter.GrepHTMLResponseWriter.java
Source

/**
 *  GrepHTMLResponseWriter
 *  Copyright 2013 by Michael Peter Christen
 *  First released 09.06.2013 at http://yacy.net
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program in the file lgpl21.txt
 *  If not, see <http://www.gnu.org/licenses/>.
 */

package net.yacy.cora.federate.solr.responsewriter;

import java.io.IOException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.Set;
import java.util.regex.Pattern;

import net.yacy.document.SentenceReader;
import net.yacy.search.schema.CollectionSchema;

import org.apache.lucene.document.Document;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.XML;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.QueryResponseWriter;
import org.apache.solr.response.ResultContext;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.search.SolrIndexSearcher;

/**
 * this response writer shows a list of documents with the lines containing matches
 * of the search request in 'grep-style', which means it is like doing a grep on a set
 * of files. Within the result list, the document is splitted into the sentences of the
 * text part and each sentence is shown as separate line. grep attributes can be used to
 * show leading and trainling lines.
 */
public class GrepHTMLResponseWriter implements QueryResponseWriter {

    private static final Set<String> DEFAULT_FIELD_LIST = new HashSet<String>();
    private static final Pattern dqp = Pattern.compile("\"");
    static {
        DEFAULT_FIELD_LIST.add(CollectionSchema.id.getSolrFieldName());
        DEFAULT_FIELD_LIST.add(CollectionSchema.sku.getSolrFieldName());
        DEFAULT_FIELD_LIST.add(CollectionSchema.title.getSolrFieldName());
        DEFAULT_FIELD_LIST.add(CollectionSchema.text_t.getSolrFieldName());
    }

    public GrepHTMLResponseWriter() {
        super();
    }

    @Override
    public String getContentType(final SolrQueryRequest request, final SolrQueryResponse response) {
        return "text/html";
    }

    @Override
    public void init(@SuppressWarnings("rawtypes") NamedList n) {
    }

    @Override
    public void write(final Writer writer, final SolrQueryRequest request, final SolrQueryResponse rsp)
            throws IOException {
        NamedList<?> values = rsp.getValues();
        assert values.get("responseHeader") != null;
        assert values.get("response") != null;

        writer.write(
                "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>\n");
        writer.write("<link rel=\"stylesheet\" type=\"text/css\" media=\"all\" href=\"/env/base.css\" />\n");
        writer.write("<link rel=\"stylesheet\" type=\"text/css\" media=\"screen\" href=\"/env/style.css\" />\n");
        SolrParams params = request.getOriginalParams();
        String grep = params.get("grep");
        String query = "";
        String q = params.get(CommonParams.Q);
        if (q == null)
            q = "";
        int p = q.indexOf(':');
        if (p >= 0) {
            int r = q.charAt(p + 1) == '"' ? q.indexOf(p + 2, '"') : q.indexOf(' ');
            if (r < 0)
                r = q.length();
            query = q.substring(p + 1, r);
            if (query.length() > 0) {
                if (query.charAt(0) == '"')
                    query = query.substring(1);
                if (query.charAt(query.length() - 1) == '"')
                    query = query.substring(0, query.length() - 1);
            }
        }
        if (grep == null && query.length() > 0)
            grep = query;
        if (grep.length() > 0) {
            if (grep.charAt(0) == '"')
                grep = grep.substring(1);
            if (grep.charAt(grep.length() - 1) == '"')
                grep = grep.substring(0, grep.length() - 1);
        }
        NamedList<Object> paramsList = params.toNamedList();
        paramsList.remove("wt");
        String xmlquery = dqp.matcher("/solr/select?" + SolrParams.toSolrParams(paramsList).toString())
                .replaceAll("%22");

        DocList response = ((ResultContext) values.get("response")).docs;
        final int sz = response.size();
        if (sz > 0) {
            SolrIndexSearcher searcher = request.getSearcher();
            DocIterator iterator = response.iterator();
            IndexSchema schema = request.getSchema();
            String h1 = "Document Grep for query \"" + query + "\" and grep phrase \"" + grep + "\"";
            writer.write("<title>" + h1 + "</title>\n</head><body>\n<h1>" + h1 + "</h1>\n");
            writer.write("<div id=\"api\"><a href=\"" + xmlquery
                    + "\"><img src=\"../env/grafics/api.png\" width=\"60\" height=\"40\" alt=\"API\" /></a>\n");
            writer.write(
                    "<span>This search result can also be retrieved as XML. Click the API icon to see an example call to the search rss API.</span></div>\n");
            for (int i = 0; i < sz; i++) {
                int id = iterator.nextDoc();
                Document doc = searcher.doc(id, DEFAULT_FIELD_LIST);
                LinkedHashMap<String, String> tdoc = HTMLResponseWriter.translateDoc(schema, doc);
                String sku = tdoc.get(CollectionSchema.sku.getSolrFieldName());
                String title = tdoc.get(CollectionSchema.title.getSolrFieldName());
                String text = tdoc.get(CollectionSchema.text_t.getSolrFieldName());

                ArrayList<String> sentences = new ArrayList<String>();
                if (title != null)
                    sentences.add(title);
                SentenceReader sr = new SentenceReader(text);
                StringBuilder line;
                while (sr.hasNext()) {
                    line = sr.next();
                    if (line.length() > 0)
                        sentences.add(line.toString());
                }
                writeDoc(writer, sku, sentences, grep);
            }
        } else {
            writer.write("<title>No Document Found</title>\n</head><body>\n");
        }

        writer.write("</body></html>\n");
    }

    private static final void writeDoc(Writer writer, String url, ArrayList<String> sentences, String grep)
            throws IOException {
        writer.write("<form name=\"yacydoc" + url
                + "\" method=\"post\" action=\"#\" enctype=\"multipart/form-data\" accept-charset=\"UTF-8\">\n");
        writer.write("<fieldset>\n");
        writer.write("<h1><a href=\"" + url + "\">" + url + "</a></h1>\n");
        writer.write("<dl>\n");
        int c = 0;
        for (String line : sentences) {
            if (grep != null && grep.length() > 0 && line.indexOf(grep) < 0)
                continue;
            writer.write("<dt>");
            if (c++ == 0) {
                if (grep == null || grep.length() == 0)
                    writer.write("all lines in document");
                else {
                    writer.write("matches for grep phrase \"");
                    writer.write(grep);
                    writer.write("\"");
                }
            }
            writer.write("</dt>");
            writedd(writer, line, grep);
        }
        writer.write("</dl>\n");
        writer.write("</fieldset>\n");
        writer.write("</form>\n");
    }

    private static void writedd(Writer writer, String line, String grep) throws IOException {
        writer.write("<dd><a href=\"/solr/select?q=text_t:%22");
        XML.escapeAttributeValue(line, writer);
        writer.write("%22&rows=100&grep=%22");
        XML.escapeAttributeValue(grep, writer);
        writer.write("%22&wt=grephtml\">");
        XML.escapeAttributeValue(line, writer);
        writer.write("</a></dd>\n");
    }

}