opennlp.tools.similarity.apps.solr.SearchResultsReRankerRequestHandler.java Source code

Java tutorial

Introduction

Here is the source code for opennlp.tools.similarity.apps.solr.SearchResultsReRankerRequestHandler.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package opennlp.tools.similarity.apps.solr;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Logger;

import opennlp.tools.similarity.apps.HitBase;
import opennlp.tools.similarity.apps.utils.Pair;
import opennlp.tools.textsimilarity.ParseTreeChunk;
import opennlp.tools.textsimilarity.ParseTreeChunkListScorer;
import opennlp.tools.textsimilarity.SentencePairMatchResult;
import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;

import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.CachingWrapperFilter;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.ScoreDoc;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.handler.component.SearchHandler;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;

public class SearchResultsReRankerRequestHandler extends SearchHandler {
    private static Logger LOG = Logger
            .getLogger("com.become.search.requestHandlers.SearchResultsReRankerRequestHandler");
    private final static int MAX_SEARCH_RESULTS = 100;
    private ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();
    private ParserChunker2MatcherProcessor sm = null;
    private int MAX_QUERY_LENGTH_NOT_TO_RERANK = 3;
    private static String resourceDir = "/home/solr/solr-4.4.0/example/src/test/resources";
    //"C:/workspace/TestSolr/src/test/resources";

    //"/data1/solr/example/src/test/resources";

    public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) {
        // get query string
        String requestExpression = req.getParamString();
        String[] exprParts = requestExpression.split("&");
        for (String part : exprParts) {
            if (part.startsWith("q="))
                requestExpression = part;
        }
        String query = StringUtils.substringAfter(requestExpression, ":");
        LOG.info(requestExpression);

        SolrParams ps = req.getOriginalParams();
        Iterator<String> iter = ps.getParameterNamesIterator();
        List<String> keys = new ArrayList<String>();
        while (iter.hasNext()) {
            keys.add(iter.next());
        }

        List<HitBase> searchResults = new ArrayList<HitBase>();

        for (Integer i = 0; i < MAX_SEARCH_RESULTS; i++) {
            String title = req.getParams().get("t" + i.toString());
            String descr = req.getParams().get("d" + i.toString());

            if (title == null || descr == null)
                continue;

            HitBase hit = new HitBase();
            hit.setTitle(title);
            hit.setAbstractText(descr);
            hit.setSource(i.toString());
            searchResults.add(hit);
        }

        /*
         * http://173.255.254.250:8983/solr/collection1/reranker/?
         * q=search_keywords:design+iphone+cases&fields=spend+a+day+with+a+custom+iPhone+case&fields=Add+style+to+your+every+day+fresh+design+with+a+custom+iPhone+case&fields=Add+style+to+your+every+day+with+mobile+case+for+your+family&fields=Add+style+to+your+iPhone+and+iPad&fields=Add+Apple+fashion+to+your+iPhone+and+iPad
         * 
         */

        if (searchResults.size() < 1) {
            int count = 0;
            for (String val : exprParts) {
                if (val.startsWith("fields=")) {
                    val = StringUtils.mid(val, 7, val.length());
                    HitBase hit = new HitBase();
                    hit.setTitle("");
                    hit.setAbstractText(val);
                    hit.setSource(new Integer(count).toString());
                    searchResults.add(hit);
                    count++;
                }

            }
        }

        List<HitBase> reRankedResults = null;
        query = query.replace('+', ' ');
        if (tooFewKeywords(query) || orQuery(query)) {
            reRankedResults = searchResults;
            LOG.info("No re-ranking for " + query);
        } else
            reRankedResults = calculateMatchScoreResortHits(searchResults, query);
        /*
         * <scores>
        <score index="2">3.0005</score>
        <score index="1">2.101</score>
        <score index="3">2.1003333333333334</score>
        <score index="4">2.00025</score>
        <score index="5">1.1002</score>
        </scores>
         * 
         * 
         */
        StringBuffer buf = new StringBuffer();
        buf.append("<scores>");
        for (HitBase hit : reRankedResults) {
            buf.append("<score index=\"" + hit.getSource() + "\">" + hit.getGenerWithQueryScore() + "</score>");
        }
        buf.append("</scores>");

        NamedList<Object> scoreNum = new NamedList<Object>();
        for (HitBase hit : reRankedResults) {
            scoreNum.add(hit.getSource(), hit.getGenerWithQueryScore());
        }

        StringBuffer bufNums = new StringBuffer();
        bufNums.append("order>");
        for (HitBase hit : reRankedResults) {
            bufNums.append(hit.getSource() + "_");
        }
        bufNums.append("/order>");

        LOG.info("re-ranking results: " + buf.toString());
        NamedList<Object> values = rsp.getValues();
        values.remove("response");
        values.add("response", scoreNum);
        //values.add("new_order", bufNums.toString().trim());
        rsp.setAllValues(values);

    }

    private boolean orQuery(String query) {
        if (query.indexOf('|') > -1)
            return true;

        return false;
    }

    private boolean tooFewKeywords(String query) {
        String[] parts = query.split(" ");
        if (parts != null && parts.length < MAX_QUERY_LENGTH_NOT_TO_RERANK)
            return true;

        return false;
    }

    private List<HitBase> calculateMatchScoreResortHits(List<HitBase> hits, String searchQuery) {
        try {
            sm = ParserChunker2MatcherProcessor.getInstance(resourceDir);
        } catch (Exception e) {
            LOG.severe(e.getMessage());
        }
        List<HitBase> newHitList = new ArrayList<HitBase>();

        int count = 1;
        for (HitBase hit : hits) {
            String snapshot = hit.getAbstractText();
            snapshot += " . " + hit.getTitle();
            Double score = 0.0;
            try {
                SentencePairMatchResult matchRes = sm.assessRelevance(snapshot, searchQuery);
                List<List<ParseTreeChunk>> match = matchRes.getMatchResult(); // we need the second member
                // so that when scores are the same, original order is maintained
                score = parseTreeChunkListScorer.getParseTreeChunkListScore(match) + 0.001 / (double) count;
            } catch (Exception e) {
                LOG.info(e.getMessage());
                e.printStackTrace();
            }
            hit.setGenerWithQueryScore(score);
            newHitList.add(hit);
            count++;
        }
        Collections.sort(newHitList, new HitBaseComparable());
        LOG.info(newHitList.toString());

        return newHitList;
    }

    public class HitBaseComparable implements Comparator<HitBase> {
        // @Override
        public int compare(HitBase o1, HitBase o2) {
            return (o1.getGenerWithQueryScore() > o2.getGenerWithQueryScore() ? -1 : (o1 == o2 ? 0 : 1));
        }
    }

}

/*
    
http://dev1.exava.us:8086/solr/collection1/reranker/?q=search_keywords:I+want+style+in+my+every+day+fresh+design+iphone+cases
&t1=Personalized+iPhone+4+Cases&d1=spend+a+day+with+a+custom+iPhone+case
&t2=iPhone+Cases+to+spend+a+day&d2=Add+style+to+your+every+day+fresh+design+with+a+custom+iPhone+case
&t3=Plastic+iPhone+Cases&d3=Add+style+to+your+every+day+with+mobile+case+for+your+family
&t4=Personalized+iPhone+and+iPad+Cases&d4=Add+style+to+your+iPhone+and+iPad
&t5=iPhone+accessories+from+Apple&d5=Add+Apple+fashion+to+your+iPhone+and+iPad
    
http://dev1.exava.us:8086/solr/collection1/reranker/?q=search_keywords:I+want+style+in+my+every+day+fresh+design+iphone+cases&t1=Personalized+iPhone+4+Cases&d1=spend+a+day+with+a+custom+iPhone+case&t2=iPhone+Cases+to+spend+a+day&d2=Add+style+to+your+every+day+fresh+design+with+a+custom+iPhone+case&t3=Plastic+iPhone+Cases&d3=Add+style+to+your+every+day+with+mobile+case+for+your+family&t4=Personalized+iPhone+and+iPad+Cases&d4=Add+style+to+your+iPhone+and+iPad&t5=iPhone+accessories+from+Apple&d5=Add+Apple+fashion+to+your+iPhone+and+iPad
 */