org.apache.ctakes.ytex.uima.DBCollectionReader.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.ctakes.ytex.uima.DBCollectionReader.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.ctakes.ytex.uima;

import java.io.IOException;
import java.sql.Driver;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import javax.sql.DataSource;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.ctakes.ytex.uima.types.DocKey;
import org.apache.ctakes.ytex.uima.types.KeyValuePair;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.collection.CollectionReader_ImplBase;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.FSArray;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.metadata.ConfigurationParameterSettings;
import org.apache.uima.resource.metadata.ProcessingResourceMetaData;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;
import org.springframework.jdbc.core.RowCallbackHandler;
import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate;
import org.springframework.jdbc.core.simple.SimpleJdbcTemplate;
import org.springframework.jdbc.datasource.DataSourceTransactionManager;
import org.springframework.jdbc.datasource.SimpleDriverDataSource;
import org.springframework.jdbc.support.lob.DefaultLobHandler;
import org.springframework.jdbc.support.lob.LobHandler;
import org.springframework.transaction.TransactionStatus;
import org.springframework.transaction.support.TransactionCallback;
import org.springframework.transaction.support.TransactionTemplate;

/**
 * 
 * Read documents from db. Config parameters:
 * <ul>
 * <li>queryGetDocumentKeys the query to get the document keys</li>
 * <li>queryGetDocument the query to get a document given a key. should have
 * named parameters that match the columns of the result set returned by
 * queryGetDocumentKeys</li>
 * <li>keyTypeName the uima type of the document key to be added to the cas.
 * defaults to org.apache.ctakes.ytex.uima.types.DocKey.
 * <li>keyNameToLowerCase convert the column names returned by
 * queryGetDocumentKeys to lower case, default true</li>
 * </ul>
 * 
 * @TODO more doc
 * @author vijay
 * 
 */
public class DBCollectionReader extends CollectionReader_ImplBase {
    private static final Log log = LogFactory.getLog(DBCollectionReader.class);

    /**
     * the query to get the document keys set in config file
     */
    protected String queryGetDocumentKeys;
    /**
     * the queyr to get a document given a key. set in config file
     */
    protected String queryGetDocument;
    /**
     * the key type. if not set, will default to
     * org.apache.ctakes.ytex.uima.types.DocKey
     */
    protected String keyTypeName = "org.apache.ctakes.ytex.uima.types.DocKey";

    protected DataSource dataSource;
    protected SimpleJdbcTemplate simpleJdbcTemplate;
    protected NamedParameterJdbcTemplate namedJdbcTemplate;
    protected TransactionTemplate txTemplate;
    protected boolean keyNameToLowerCase = true;

    public boolean isKeyNameToLowerCase() {
        return keyNameToLowerCase;
    }

    public void setKeyNameToLowerCase(boolean keyNameToLowerCase) {
        this.keyNameToLowerCase = keyNameToLowerCase;
    }

    List<Map<String, Object>> listDocumentIds;
    int i = 0;

    @Override
    public void initialize() throws ResourceInitializationException {
        initializePreLoad();
        loadDocumentIds();
    }

    protected void initializePreLoad() throws ResourceInitializationException {
        super.initialize();
        ProcessingResourceMetaData metaData = getProcessingResourceMetaData();
        ConfigurationParameterSettings paramSettings = metaData.getConfigurationParameterSettings();
        this.queryGetDocumentKeys = (String) paramSettings.getParameterValue("queryGetDocumentKeys");
        this.queryGetDocument = (String) paramSettings.getParameterValue("queryGetDocument");
        this.keyTypeName = (String) paramSettings.getParameterValue("keyTypeName");
        Boolean keyNameToLowerCase = (Boolean) paramSettings.getParameterValue("keyNameToLowerCase");
        if (keyNameToLowerCase != null)
            this.keyNameToLowerCase = keyNameToLowerCase.booleanValue();
        String dbURL = (String) paramSettings.getParameterValue("dbURL");
        String dbDriver = (String) paramSettings.getParameterValue("dbDriver");
        initDB(dbDriver, dbURL);
    }

    protected void initDB(String dbDriver, String dbURL) throws ResourceInitializationException {
        if (dbURL != null && dbURL.length() > 0) {
            try {

                if (dbDriver == null || dbDriver.length() == 0) {
                    dbDriver = ApplicationContextHolder.getYtexProperties().getProperty("db.driver");
                }
                dataSource = new SimpleDriverDataSource((Driver) Class.forName(dbDriver).newInstance(), dbURL);
                txTemplate = new TransactionTemplate(new DataSourceTransactionManager(dataSource));
            } catch (InstantiationException e) {
                throw new ResourceInitializationException(e);
            } catch (IllegalAccessException e) {
                throw new ResourceInitializationException(e);
            } catch (ClassNotFoundException e) {
                throw new ResourceInitializationException(e);
            }
        } else {
            txTemplate = (TransactionTemplate) ApplicationContextHolder.getApplicationContext()
                    .getBean("txTemplate");
            dataSource = (DataSource) ApplicationContextHolder.getApplicationContext()
                    .getBean("collectionReaderDataSource");
        }
        simpleJdbcTemplate = new SimpleJdbcTemplate(dataSource);
        namedJdbcTemplate = new NamedParameterJdbcTemplate(dataSource);
    }

    protected void loadDocumentIds() {
        if (listDocumentIds == null) {
            listDocumentIds = txTemplate.execute(new TransactionCallback<List<Map<String, Object>>>() {

                @Override
                public List<Map<String, Object>> doInTransaction(TransactionStatus arg0) {
                    return simpleJdbcTemplate.queryForList(queryGetDocumentKeys);
                }
            });
            i = 0;
        }
    }

    @Override
    public void getNext(final CAS aCAS) throws IOException, CollectionException {
        try {
            getNext(aCAS.getJCas());
        } catch (CASException e) {
            throw new CollectionException(e);
        }
    }

    public void getNext(final JCas aCAS) throws IOException, CollectionException {
        if (i < listDocumentIds.size()) {
            final Map<String, Object> id = listDocumentIds.get(i++);
            if (log.isInfoEnabled()) {
                log.info("loading document with id = " + id);
            }
            getDocumentById(aCAS, id);
            addDocKey(aCAS, id);
        } else {
            // shouldn't get here?
            throw new CollectionException("no documents to process", new Object[] {});
        }
    }

    private void addDocKey(JCas aCAS, Map<String, Object> id) throws CollectionException {
        DocKey docKey = new DocKey(aCAS);
        FSArray keyValuePairs = new FSArray(aCAS, id.size());
        int i = 0;
        for (Map.Entry<String, Object> idVal : id.entrySet()) {
            String key = idVal.getKey();
            Object val = idVal.getValue();
            KeyValuePair p = new KeyValuePair(aCAS);
            p.setKey(key);
            if (val instanceof Number) {
                p.setValueLong(((Number) val).longValue());
            } else if (val instanceof String) {
                p.setValueString((String) val);
            } else {
                log.warn("Don't know how to handle key attribute, converting to string, key=" + key + ", value="
                        + val);
                p.setValueString(val.toString());
            }
            keyValuePairs.set(i, p);
            i++;
        }
        docKey.setKeyValuePairs(keyValuePairs);
        docKey.addToIndexes();

    }

    protected void getDocumentById(final JCas aCAS, final Map<String, Object> id) {
        Map<String, Object> idMapTmp = id;
        if (this.isKeyNameToLowerCase()) {
            idMapTmp = new HashMap<String, Object>();
            for (Map.Entry<String, Object> e : id.entrySet()) {
                idMapTmp.put(e.getKey().toLowerCase(), e.getValue());
            }
        }
        final Map<String, Object> idQuery = idMapTmp;
        this.txTemplate.execute(new TransactionCallback<Object>() {

            @Override
            public Object doInTransaction(TransactionStatus arg0) {
                namedJdbcTemplate.query(queryGetDocument, idQuery, new RowCallbackHandler() {
                    boolean bFirstRowRead = false;

                    @Override
                    public void processRow(ResultSet rs) throws SQLException {
                        if (!bFirstRowRead) {
                            LobHandler lobHandler = new DefaultLobHandler();
                            String clobText = lobHandler.getClobAsString(rs, 1);
                            aCAS.setDocumentText(clobText);
                            bFirstRowRead = true;
                        } else {
                            log.error("Multiple documents for document key: " + idQuery);
                        }
                    }
                });
                return null;
            }
        });
    }

    @Override
    public Progress[] getProgress() {
        return new Progress[] { new ProgressImpl(i, listDocumentIds.size(), Progress.ENTITIES) };
    }

    @Override
    public boolean hasNext() throws IOException, CollectionException {
        return i < listDocumentIds.size();
    }

    @Override
    public void close() throws IOException {
        this.listDocumentIds = null;
        this.i = 0;
    }

}