de.spartusch.nasfvi.server.NQuery.java Source code

Java tutorial

Introduction

Here is the source code for de.spartusch.nasfvi.server.NQuery.java

Source

/*
 * Copyright 2011 Stefan Partusch
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package de.spartusch.nasfvi.server;

import java.util.GregorianCalendar;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.core.QueryNodeException;
import org.apache.lucene.queryParser.core.processors.QueryNodeProcessorPipeline;
import org.apache.lucene.queryParser.standard.StandardQueryParser;
import org.apache.lucene.queryParser.standard.config.StandardQueryConfigHandler.Operator;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;

import de.spartusch.StringMethods;

/**
 * A query for use with {@link NSearcher}.
 * @author Stefan Partusch
 *
 */
public class NQuery {
    /** The default search field in Lucene. */
    private static final String DEFAULT_SEARCH_FIELD = "titel";
    /** Fields in the original query to collapse. */
    private static final String[] FIELDS_TO_COLLAPSE = new String[] { "raum", "tag" };
    /** Name of the field to collapse fields to. */
    private static final String COLLAPSE_TO = "termin";
    /** Regular expression to extract day information
     * from {@link collapseTo}. */
    private static final Pattern COLLAPSED_FIELD_TAG = Pattern.compile("(?:^| )?(mo|di|mi|do|fr|sa|so)\\b",
            Pattern.CASE_INSENSITIVE);
    /** Regular expression to extract location information
     * from {@link collapseTo}. */
    private static final Pattern COLLAPSED_FIELD_RAUM = Pattern
            .compile("(Rechnerraum(?: \\w+)?)|Raum (.+)|(\\w+strae.*)", Pattern.CASE_INSENSITIVE);

    /** The primary query. */
    private Query query;
    /** The similarity query. */
    private Query similQuery;
    /** Names of fields with values for the answer in natural language. */
    private Set<String> answerFields;
    /** Analyzer used for creating Lucene queries. */
    private Analyzer analyzer;
    /** true if the user query contains a semester. */
    private boolean semesterQueried;
    /** Tense of the natural language question that forms the basis of
     * this query. */
    private Grammar.Tense tense;

    /**
     * Creates a new query.
     * @param tense Grammatical tense of the question that forms the basis
     * of the new query
     * @param queryString Lucene query as returned by
     * the {@link Grammar grammar}
     * @param similQueryString Lucene query for similarity as returned
     * by the grammar
     * @param answerFields Names of fields with values for the answer
     * to be generated
     * @param analyzer Analyzer to use for handling Lucene queries
     * @throws QueryNodeException If parsing the query strings fails
     * @see {@link Grammar#parse(String, Analyzer) Grammar.parse}
     */
    public NQuery(final Grammar.Tense tense, final String queryString, final String similQueryString,
            final String[] answerFields, final Analyzer analyzer) throws QueryNodeException {
        StandardQueryParser qp = new StandardQueryParser(analyzer);
        qp.setDefaultOperator(Operator.AND);
        QueryNodeProcessorPipeline processors = (QueryNodeProcessorPipeline) qp.getQueryNodeProcessor();
        processors.add(new FieldsCollapsingProcessor(FIELDS_TO_COLLAPSE, COLLAPSE_TO, 50));

        String qs = queryString.replaceAll("'", "");
        if (qs.isEmpty()) {
            query = new MatchAllDocsQuery();
        } else {
            query = qp.parse(qs, DEFAULT_SEARCH_FIELD);
        }

        String sqs = similQueryString.replaceAll("'", "");
        if (!sqs.isEmpty()) {
            similQuery = qp.parse(sqs, DEFAULT_SEARCH_FIELD);
        }

        if (queryString.contains("semester:\"") || similQueryString.contains("semester:\"")) {
            semesterQueried = true;
        } else {
            semesterQueried = false;
            query = interpretTense(tense, query);
            if (similQuery != null) {
                similQuery = interpretTense(tense, similQuery);
            }
        }

        this.analyzer = analyzer;
        this.tense = tense;
        this.answerFields = new HashSet<String>();

        for (String answField : answerFields) {
            this.answerFields.add(mapFieldname(answField));
        }
    }

    public final Query getQuery() {
        return query;
    }

    public final Query getSimilarityQuery() {
        return similQuery;
    }

    public final boolean hasSimilarityQuery() {
        return similQuery != null;
    }

    public final Set<String> getFieldsToAnswer() {
        return answerFields;
    }

    public final Analyzer getAnalyzer() {
        return analyzer;
    }

    /**
     * Maps "virtual" field names to actual field names. That is "zeit" to
     * "semester" or, if the grammatical tense of the question is present or
     * the query contains a semester, to "tag" and "ort" to "raum". 
     * @param field Field name to map
     * @return Mapped field name
     */
    private String mapFieldname(final String field) {
        if ("zeit".equals(field)) {
            if (semesterQueried || Grammar.Tense.praes.equals(tense) || answerFields.contains("semester")) {
                return "tag";
            }
            return "semester";
        } else if ("ort".equals(field)) {
            return "raum";
        }
        return field;
    }

    /**
     * Returns a representation of this query in JSON.
     */
    @Override
    public final String toString() {
        StringBuilder sb = new StringBuilder();
        // {
        //   "Query": query,
        //   "SQuery": similQuery,
        //   "Fields": [answerFields]
        // }

        sb.append("{\n\"Query\": ");
        sb.append(Grammar.toJsonString(query.toString(), true));
        sb.append(",\n\"SQuery\": ");

        if (hasSimilarityQuery()) {
            sb.append(Grammar.toJsonString(similQuery.toString(), true));
        } else {
            sb.append("\"\"");
        }

        sb.append(",\n\"Fields\": [");
        boolean first = true;
        for (String f : answerFields) {
            if (first) {
                first = false;
            } else {
                sb.append(", ");
            }
            sb.append("\"").append(f).append("\"");
        }
        sb.append("]\n}");

        return sb.toString();
    }

    /**
     * Extracts values for collapsed fields. Some fields are collapsed, i.e.
     * are merged into a single field. This methods extracts values for these
     * fields from the single field.
     * @param field Name of a collapsed field 
     * @param value Value of the single field to extract from
     * @return Extracted value
     * @throws AssertionError if <code>field</code> is not a collapsed field
     */
    public static String extractValue(final String field, final String value) {
        Pattern pattern = null;

        if ("tag".equals(field)) {
            pattern = COLLAPSED_FIELD_TAG;
        } else if ("raum".equals(field)) {
            pattern = COLLAPSED_FIELD_RAUM;
        } else {
            throw new AssertionError();
        }

        Matcher m = pattern.matcher(value);
        if (m.find()) {
            for (int i = 1; i <= m.groupCount(); i++) {
                String match = m.group(i);
                if (match != null) {
                    return match;
                }
            }
        }

        return "(Unbekannt)";
    }

    /**
     * Tests if a field is to be collapsed. Such fields are merged with
     * other fields into a single field.
     * @param field Name of the field to test
     * @return true if <code>field</code> names a field to collapse
     */
    public static boolean isFieldToCollapse(final String field) {
        return StringMethods.equalsOneOf(field, FIELDS_TO_COLLAPSE);
    }

    /**
     * Returns the name of the single field other fields are collapsed
     * to, i.e. are merged into.
     * @return Name of the field other fields are collapsed to
     */
    public static String getMergedField() {
        return COLLAPSE_TO;
    }

    /**
     * Interprets the grammatical tense and extends the <code>query</code>
     * accordingly.
     * @param tense Grammatical tense to interpret
     * @param query Query to extend
     * @return Extended query according to the grammatical tense
     */
    private static Query interpretTense(final Grammar.Tense tense, final Query query) {
        Semester now = new Semester();
        Query tenseQuery;

        switch (tense) {
        case pqperf:
            int year = new GregorianCalendar().get(GregorianCalendar.YEAR) - 1;
            tenseQuery = new TermRangeQuery("semester_end", "19700101", Integer.toString(year) + "0221", true,
                    false);
            break;
        case perf:
            tenseQuery = new TermRangeQuery("semester_beg", "19700101", now.getBegin(), true, false);
            break;
        case praet:
            tenseQuery = new TermRangeQuery("semester_beg", "19700101", now.getBegin(), true, true);
            break;
        case praes:
            tenseQuery = new TermQuery(new Term("semester", now.getCanonical()));
            break;
        case fut1:
            tenseQuery = new TermRangeQuery("semester_end", now.getEnd(), "29991231", false, true);
            break;
        default:
            throw new AssertionError();
        }

        BooleanQuery booleanQuery = new BooleanQuery();
        booleanQuery.add(query, BooleanClause.Occur.MUST);
        booleanQuery.add(tenseQuery, BooleanClause.Occur.MUST);

        return booleanQuery;
    }
}