org.LexGrid.LexBIG.Impl.Extensions.GenericExtensions.search.SearchExtensionImpl.java Source code

Java tutorial

Introduction

Here is the source code for org.LexGrid.LexBIG.Impl.Extensions.GenericExtensions.search.SearchExtensionImpl.java

Source

/*
 * Copyright: (c) 2004-2010 Mayo Foundation for Medical Education and 
 * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
 * triple-shield Mayo logo are trademarks and service marks of MFMER.
 *
 * Except as contained in the copyright notice above, or as used to identify 
 * MFMER as the author of this software, the trade names, trademarks, service
 * marks, or product names of the copyright holder shall not be used in
 * advertising, promotion or otherwise in connection with this software without
 * prior written authorization of the copyright holder.
 * 
 * Licensed under the Eclipse Public License, Version 1.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at 
 * 
 *      http://www.eclipse.org/legal/epl-v10.html
 * 
 */
package org.LexGrid.LexBIG.Impl.Extensions.GenericExtensions.search;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.LexGrid.LexBIG.DataModel.Core.AbsoluteCodingSchemeVersionReference;
import org.LexGrid.LexBIG.DataModel.Core.CodingSchemeVersionOrTag;
import org.LexGrid.LexBIG.DataModel.Core.types.CodingSchemeVersionStatus;
import org.LexGrid.LexBIG.DataModel.InterfaceElements.ExtensionDescription;
import org.LexGrid.LexBIG.Exceptions.LBParameterException;
import org.LexGrid.LexBIG.Extensions.ExtensionRegistry;
import org.LexGrid.LexBIG.Extensions.Generic.CodingSchemeReference;
import org.LexGrid.LexBIG.Extensions.Generic.GenericExtension;
import org.LexGrid.LexBIG.Extensions.Generic.SearchExtension;
import org.LexGrid.LexBIG.Impl.Extensions.AbstractExtendable;
import org.LexGrid.LexBIG.Utility.Constructors;
import org.LexGrid.LexBIG.Utility.ServiceUtility;
import org.LexGrid.LexBIG.Utility.Iterators.ResolvedConceptReferencesIterator;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.join.QueryBitSetProducer;
import org.apache.lucene.search.join.ScoreMode;
import org.apache.lucene.search.join.ToParentBlockJoinQuery;
import org.lexevs.dao.index.indexer.LuceneLoaderCode;
import org.lexevs.dao.indexer.utility.CodingSchemeMetaData;
import org.lexevs.dao.indexer.utility.ConcurrentMetaData;
import org.lexevs.locator.LexEvsServiceLocator;
import org.lexevs.registry.model.RegistryEntry;
import org.lexevs.registry.service.Registry.ResourceType;
import org.springframework.util.CollectionUtils;

public class SearchExtensionImpl extends AbstractExtendable implements SearchExtension {

    private static final long serialVersionUID = 8704782086137708226L;
    private static final Term baseQuery = new Term("propertyType", "presentation");
    private static final Term preferred = new Term("isPreferred", "T");

    @Override
    public ResolvedConceptReferencesIterator search(String text, MatchAlgorithm matchAlgorithm)
            throws LBParameterException {
        return this.search(text, null, matchAlgorithm);
    }

    @Override
    public ResolvedConceptReferencesIterator search(String text, Set<CodingSchemeReference> codeSystems,
            MatchAlgorithm matchAlgorithm) throws LBParameterException {
        return this.search(text, codeSystems, null, matchAlgorithm);
    }

    @Override
    public ResolvedConceptReferencesIterator search(final String text,
            Set<CodingSchemeReference> codeSystemsToInclude, Set<CodingSchemeReference> codeSystemsToExclude,
            MatchAlgorithm matchAlgorithm) throws LBParameterException {
        return this.search(text, codeSystemsToInclude, codeSystemsToExclude, matchAlgorithm, false);
    }

    @Override
    public ResolvedConceptReferencesIterator search(final String text,
            Set<CodingSchemeReference> codeSystemsToInclude, Set<CodingSchemeReference> codeSystemsToExclude,
            MatchAlgorithm matchAlgorithm, boolean includeAnonymous) throws LBParameterException {
        return this.search(text, codeSystemsToInclude, codeSystemsToExclude, matchAlgorithm, includeAnonymous,
                true);
    }

    @Override
    public ResolvedConceptReferencesIterator search(final String text,
            Set<CodingSchemeReference> codeSystemsToInclude, Set<CodingSchemeReference> codeSystemsToExclude,
            MatchAlgorithm matchAlgorithm, boolean includeAnonymous, boolean includeInactive)
            throws LBParameterException {

        LexEvsServiceLocator lexEvsServiceLocator = LexEvsServiceLocator.getInstance();
        List<RegistryEntry> entries = lexEvsServiceLocator.getRegistry()
                .getAllRegistryEntriesOfType(ResourceType.CODING_SCHEME);

        codeSystemsToInclude = sanitizeReferences(codeSystemsToInclude);
        codeSystemsToExclude = sanitizeReferences(codeSystemsToExclude);

        Set<CodingSchemeReference> tempSystemsToInclude = new HashSet<CodingSchemeReference>();
        for (RegistryEntry entry : entries) {
            CodingSchemeReference ref = new CodingSchemeReference();
            ref.setCodingScheme(entry.getResourceUri());
            ref.setVersionOrTag(Constructors.createCodingSchemeVersionOrTagFromVersion(entry.getResourceVersion()));
            if (!entry.getStatus().equals(CodingSchemeVersionStatus.ACTIVE.toString())) {

                if (codeSystemsToExclude == null) {
                    codeSystemsToExclude = new HashSet<CodingSchemeReference>();
                }
                codeSystemsToExclude.add(ref);
            }
            tempSystemsToInclude.add(ref);
        }

        //We'll want any and all systems if this contains none.
        if (codeSystemsToInclude == null) {
            codeSystemsToInclude = tempSystemsToInclude;
        }

        Query query = this.buildOnMatchAlgorithm(text, matchAlgorithm);
        BooleanQuery.Builder newBuilder = new BooleanQuery.Builder();

        if (!includeAnonymous || !includeInactive) {

            newBuilder.add(query, Occur.MUST);

            if (!includeAnonymous) {
                newBuilder.add(new TermQuery(new Term("isAnonymous", "T")), Occur.MUST_NOT);
            }
            if (!includeInactive) {
                newBuilder.add(new TermQuery(new Term("isActive", "F")), Occur.MUST_NOT);
            }
        }

        newBuilder.add(query, Occur.MUST);
        newBuilder.add(new TermQuery(new Term("isParentDoc", "true")), Occur.MUST_NOT);

        query = newBuilder.build();

        QueryBitSetProducer parentFilter;
        try {
            parentFilter = new QueryBitSetProducer(
                    new QueryParser("isParentDoc", new StandardAnalyzer(new CharArraySet(0, true))).parse("true"));
        } catch (ParseException e) {
            throw new RuntimeException("Query Parser Failed against parent query: ", e);
        }
        ToParentBlockJoinQuery blockJoinQuery = new ToParentBlockJoinQuery(query, parentFilter, ScoreMode.Total);

        if (codeSystemsToExclude != null && codeSystemsToInclude.size() > 0 && codeSystemsToExclude.size() > 0) {
            codeSystemsToInclude.removeAll(codeSystemsToExclude);
        }
        Set<AbsoluteCodingSchemeVersionReference> codeSystemRefs = this
                .resolveCodeSystemReferences(codeSystemsToInclude);
        List<ScoreDoc> scoreDocs = lexEvsServiceLocator.getIndexServiceManager().getSearchIndexService()
                .query(codeSystemRefs, blockJoinQuery);
        return new SearchScoreDocIterator(codeSystemRefs, scoreDocs);
    }

    protected BooleanQuery buildOnMatchAlgorithm(String text, MatchAlgorithm matchAlgorithm) {
        BooleanQuery.Builder builder = new BooleanQuery.Builder();
        if (StringUtils.isBlank(text)) {
            builder.add(new MatchAllDocsQuery(), Occur.MUST);
            return builder.build();
        }
        switch (matchAlgorithm) {
        case PRESENTATION_EXACT:
            builder.add(new TermQuery(baseQuery), Occur.MUST);
            builder.add(new TermQuery(
                    new Term(LuceneLoaderCode.UNTOKENIZED_LOWERCASE_PROPERTY_VALUE_FIELD, text.toLowerCase())),
                    Occur.MUST);
            return builder.build();
        case CODE_EXACT:
            builder.add(new TermQuery(new Term("code", text)), Occur.MUST);
            return builder.build();
        case PRESENTATION_CONTAINS:
            builder.add(new TermQuery(baseQuery), Occur.MUST);
            builder.add(new TermQuery(preferred), Occur.SHOULD);
            text = text.toLowerCase();

            List<String> tokens;
            Analyzer tokenAnalyzer = new WhitespaceAnalyzer();
            try {
                tokens = tokenize(tokenAnalyzer, LuceneLoaderCode.LITERAL_PROPERTY_VALUE_FIELD, text);
            } catch (IOException e) {
                throw new RuntimeException("Tokenizing query text failed", e);
            }
            QueryParser parser = new QueryParser(LuceneLoaderCode.PROPERTY_VALUE_FIELD,
                    LuceneLoaderCode.getAnaylzer());
            for (String token : tokens) {
                builder.add(new PrefixQuery(new Term(LuceneLoaderCode.LITERAL_PROPERTY_VALUE_FIELD, token)),
                        Occur.MUST);
            }
            text = QueryParser.escape(text);
            try {
                builder.add(parser.parse(text), Occur.SHOULD);
            } catch (ParseException e1) {
                // TODO Auto-generated catch block
                e1.printStackTrace();
            }
            builder.add(new TermQuery(new Term(LuceneLoaderCode.UNTOKENIZED_LOWERCASE_PROPERTY_VALUE_FIELD, text)),
                    Occur.SHOULD);
            return builder.build();
        case LUCENE:
            builder.add(new TermQuery(baseQuery), Occur.MUST);
            builder.add(new TermQuery(preferred), Occur.SHOULD);
            QueryParser luceneParser = new QueryParser(LuceneLoaderCode.PROPERTY_VALUE_FIELD,
                    LuceneLoaderCode.getAnaylzer());
            Query query;
            try {
                query = luceneParser.parse(text);
            } catch (ParseException e) {
                throw new RuntimeException("Parser failed parsing text: " + text);
            }
            builder.add(query, Occur.MUST);
            return builder.build();
        default:
            throw new IllegalStateException("Unrecognized MatchAlgorithm: " + matchAlgorithm.name());
        }
    }

    protected Set<AbsoluteCodingSchemeVersionReference> resolveCodeSystemReferences(
            Set<CodingSchemeReference> references) throws LBParameterException {
        if (CollectionUtils.isEmpty(references)) {
            return null;
        }

        Set<AbsoluteCodingSchemeVersionReference> returnSet = new HashSet<AbsoluteCodingSchemeVersionReference>();
        ConcurrentMetaData metadata = ConcurrentMetaData.getInstance();
        for (CodingSchemeReference ref : references) {
            CodingSchemeMetaData csm = metadata.getCodingSchemeMetaDataForNameAndVersion(ref.getCodingScheme(),
                    ref.getVersionOrTag().getVersion());
            if (csm == null) {
                csm = metadata.getCodingSchemeMetaDataForUriAndVersion(ref.getCodingScheme(),
                        ref.getVersionOrTag().getVersion());
            }
            if (csm == null) {
                continue;
            }
            if ((ref.getCodingScheme().equals(csm.getCodingSchemeName())
                    || ref.getCodingScheme().equals(csm.getCodingSchemeUri())
                            && ref.getVersionOrTag().getVersion().equals(csm.getCodingSchemeVersion()))) {
                returnSet.add(csm.getRef());
            }
        }
        return returnSet;
    }

    private Set<CodingSchemeReference> sanitizeReferences(Set<CodingSchemeReference> references)
            throws LBParameterException {
        if (CollectionUtils.isEmpty(references)) {
            return null;
        }
        Set<CodingSchemeReference> tempReferences = new HashSet<CodingSchemeReference>();
        for (CodingSchemeReference ref : references) {
            AbsoluteCodingSchemeVersionReference abc = ServiceUtility
                    .getAbsoluteCodingSchemeVersionReference(ref.getCodingScheme(), ref.getVersionOrTag(), true);
            if (!ref.equals(abc.getCodingSchemeURN())) {
                ref.setCodingScheme(abc.getCodingSchemeURN());
            }
            if (ref.getVersionOrTag() == null) {
                ref.setVersionOrTag(new CodingSchemeVersionOrTag());
            }
            if (ref.getVersionOrTag().getVersion() == null) {
                ref.getVersionOrTag().setVersion(abc.getCodingSchemeVersion());
            }
            tempReferences.add(ref);
        }
        return tempReferences;
    }

    public List<String> tokenize(Analyzer analyzer, String field, String keywords) throws IOException {
        List<String> result = new ArrayList<String>();
        StringReader reader = new StringReader(keywords);
        TokenStream stream = analyzer.tokenStream(field, reader);
        CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
        try {
            stream.reset();
            while (stream.incrementToken()) {
                result.add(termAtt.toString());
            }
            stream.close();
        } finally {
            stream.close();
        }
        return result;
    }

    @Override
    protected ExtensionDescription buildExtensionDescription() {
        ExtensionDescription ed = new ExtensionDescription();
        ed.setDescription("Simple Search Extension for LexEVS.");
        ed.setExtensionBaseClass(GenericExtension.class.getName());
        ed.setExtensionClass(this.getClass().getName());
        ed.setName("SearchExtension");
        ed.setVersion("1.0");

        return ed;
    }

    @Override
    protected void doRegister(ExtensionRegistry registry, ExtensionDescription description)
            throws LBParameterException {
        registry.registerGenericExtension(description);
    }

}