com.hrstc.lucene.queryexpansion.GenerateClassCodesQuery.java Source code

Java tutorial

Introduction

Here is the source code for com.hrstc.lucene.queryexpansion.GenerateClassCodesQuery.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package com.hrstc.lucene.queryexpansion;

import java.io.IOException;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import nicta.com.au.patent.document.PatentDocument;
import nicta.com.au.patent.pac.search.PatentQuery;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;

/**
 *
 * @author rbouadjenek
 */
public class GenerateClassCodesQuery {

    public static Pattern pattern = Pattern.compile("([A-Z])(\\d{2})([A-Z])(\\d+)/(\\d+)");

    public static Query generateQuery(Set<String> codes) {
        BooleanQuery bquery = new BooleanQuery();
        for (String code : codes) {
            // in case you would like to ignore case sensitivity,
            // you could use this statement:
            // Pattern pattern = Pattern.compile("\\s+", Pattern.CASE_INSENSITIVE);
            Matcher matcher = pattern.matcher(code);
            // check all occurance
            while (matcher.find()) {
                PhraseQuery pq = new PhraseQuery();
                PhraseQuery section = new PhraseQuery();
                section.add(new Term(PatentDocument.Classification, matcher.group(1)));
                bquery.add(section, BooleanClause.Occur.SHOULD);

                PhraseQuery classification = new PhraseQuery();
                classification.add(new Term(PatentDocument.Classification, matcher.group(1) + matcher.group(2)));
                bquery.add(classification, BooleanClause.Occur.SHOULD);

                PhraseQuery subclass = new PhraseQuery();
                subclass.add(new Term(PatentDocument.Classification,
                        matcher.group(1) + matcher.group(2) + matcher.group(3)));
                bquery.add(subclass, BooleanClause.Occur.SHOULD);

                PhraseQuery mainGroup = new PhraseQuery();
                mainGroup.add(new Term(PatentDocument.Classification,
                        matcher.group(1) + matcher.group(2) + matcher.group(3) + matcher.group(4) + "NICTA00"));
                bquery.add(mainGroup, BooleanClause.Occur.SHOULD);

                PhraseQuery subGroup = new PhraseQuery();
                subGroup.add(new Term(PatentDocument.Classification, matcher.group(1) + matcher.group(2)
                        + matcher.group(3) + matcher.group(4) + "NICTA" + matcher.group(5)));
                bquery.add(subGroup, BooleanClause.Occur.SHOULD);

            }
        }

        return bquery;
    }

    /**
     * @param args the command line arguments
     */
    public static void main(String[] args) throws IOException {
        // TODO code application logic here
        PatentQuery query = new PatentQuery(
                "/Volumes/Macintosh HD/Users/rbouadjenek/Documents/Patent-Project/CLEF-IP 2010/PAC_test/topics/PAC-1028_EP-1652736-A1.xml",
                1, 0, 0, 0, 0, 0, true, true);
        System.out.println(generateQuery(query.getFullClassCodes()));
    }

}