com.mhs.qsol.proximity.distribute.BasicDistributable.java Source code

Java tutorial

Introduction

Here is the source code for com.mhs.qsol.proximity.distribute.BasicDistributable.java

Source

package com.mhs.qsol.proximity.distribute;

import com.mhs.qsol.QsolParseException;
import com.mhs.qsol.spans.SpanWithinQuery;

import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanNotQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;

/**
 * Copyright 2006 Mark Miller (markrmiller@gmail.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 * @author Mark Miller <markrmiller@gmail.com> Aug 26, 2006
 * 
 */
public class BasicDistributable implements Distributable {
    private final static Logger logger = Logger.getLogger(BasicDistributable.class.getPackage().getName());
    private SpanQuery query;
    private Distributable parent;

    private BasicDistributable() {
    }

    public BasicDistributable(SpanQuery query) {
        this.query = query;
    }

    public void addConnector(Occur occurType) {
        // no-op
    }

    public void add(Distributable c) {
        // no-op
    }

    public ArrayList<Distributable> getChildren() {
        return null;
    }

    public void remove(Distributable c) {
        // no-op
    }

    public void clear() {
        // no-op
    }

    public String toString() {
        int numParents = 0;
        Distributable distrib = this;

        do {
            distrib = distrib.getParent();

            if (distrib != null) {
                numParents++;
            }
        } while (distrib != null);

        StringBuilder tabs = new StringBuilder();

        for (int i = 0; i < numParents; i++) {
            tabs.append("\t");
        }

        if (query == null) {
            System.out.println("null term found in distrib in toString()");
        }

        return ("\n" + tabs.toString() + "distrib(" + query.toString() + ")");
    }

    public void setParent(Distributable distrib) {
        parent = distrib;
    }

    public Distributable getParent() {
        return parent;
    }

    public void setChildren(ArrayList<Distributable> children) {
        // no-op
    }

    public ArrayList<Occur> getConnectors() {
        // no-op
        return null;
    }

    public void setConnectors(ArrayList<Occur> connectors) {
        // no-op
    }

    public Query distribute(Distributable distrib, ProxInfo proxInfo) {
        BooleanQuery boolQuery = new BooleanQuery();
        List<Distributable> children = distrib.getChildren();

        if (children == null) {
            Query query = distrib.distribute(this.query, proxInfo);

            if (logger.isLoggable(Level.FINE)) {
                logger.fine("return query:" + query);
            }

            return query;
        }

        Query query;
        Query cacheQuery2 = null;
        int size = children.size();
        List<Occur> connectors = distrib.getConnectors();

        for (int i = 0; i < size; i++) {
            Occur con = connectors.get(i);

            // if we have already computed query2 looking for a possible SpanOr
            // use
            if (cacheQuery2 != null) {
                query = cacheQuery2;
            } else {
                query = children.get(i).distribute(this.query, proxInfo);
            }

            // must make sure both clauses are spans and connector is | if you
            // want to optimize to SpanOr
            if (children.size() > (i + 1) && (con == Occur.SHOULD) && query instanceof SpanQuery) {
                cacheQuery2 = children.get(i + 1).distribute(this.query, proxInfo);

                if (cacheQuery2 instanceof SpanQuery) {
                    if (children.size() == 2) {
                        return new SpanOrQuery(new SpanQuery[] { (SpanQuery) query, (SpanQuery) cacheQuery2 });
                    } else {
                        query = new SpanOrQuery(new SpanQuery[] { (SpanQuery) query, (SpanQuery) cacheQuery2 });
                        cacheQuery2 = null;
                        i++;
                    }
                } else {
                    cacheQuery2 = null;
                }
            }

            boolQuery.add(query, con);
        }

        if (logger.isLoggable(Level.FINE)) {
            logger.fine("distribute(Distributable) - to distrib:" + distrib + " and :" + this.query);
            logger.fine("boolquery:" + boolQuery.toString());
        }

        return boolQuery;
    }

    public Query distribute(SpanQuery query, ProxInfo proxInfo) {
        if (logger.isLoggable(Level.FINE)) {
            logger.fine("distribute(Distributable) - to distrib:" + query + " and :" + this.query);
        }

        if ((this.query == null) || (query == null)) {
            throw new QsolParseException("A proximity search cannot contain stop words: " + query + " ~"
                    + proxInfo.distance + proxInfo.proxType + " " + this.query);
        }

        switch (proxInfo.proxType) {
        case WORD:

            if (logger.isLoggable(Level.FINE)) {
                logger.fine("return:" + "spanQuery(" + query + "," + this.query + ")");
            }

            SpanQuery spanQuery = new SpanNearQuery(new SpanQuery[] { query, this.query },
                    Integer.parseInt(proxInfo.distance), proxInfo.ordered);

            if (proxInfo.fieldBreakMarker != null) {
                SpanTermQuery fieldBreakMarker = new SpanTermQuery(
                        new Term(this.query.getField(), proxInfo.fieldBreakMarker));
                spanQuery = new SpanNotQuery(spanQuery, fieldBreakMarker);
            }

            return spanQuery;

        case PARAGRAPH:

            SpanTermQuery paraMarker = new SpanTermQuery(new Term(this.query.getField(), proxInfo.paraMarker));
            SpanQuery querySpan = new SpanNearQuery(new SpanQuery[] { query, this.query }, 99999, false);

            if (proxInfo.fieldBreakMarker != null) {
                SpanTermQuery fieldBreakMarker = new SpanTermQuery(
                        new Term(this.query.getField(), proxInfo.fieldBreakMarker));
                querySpan = new SpanNotQuery(querySpan, fieldBreakMarker);
            }

            return new SpanWithinQuery(querySpan, paraMarker, Integer.parseInt(proxInfo.distance));

        case SENTENCE:

            SpanTermQuery sentMarker = new SpanTermQuery(new Term(this.query.getField(), proxInfo.sentMarker));
            querySpan = new SpanNearQuery(new SpanQuery[] { query, this.query }, 99999, false);

            if (proxInfo.fieldBreakMarker != null) {
                SpanTermQuery fieldBreakMarker = new SpanTermQuery(
                        new Term(this.query.getField(), proxInfo.fieldBreakMarker));
                querySpan = new SpanNotQuery(querySpan, fieldBreakMarker);
            }

            return new SpanWithinQuery(querySpan, sentMarker, Integer.parseInt(proxInfo.distance));
        }

        throw new RuntimeException("proximity search was not of type sent/para/word");
    }
}