org.datacleaner.configuration.JaxbConfigurationReaderTest.java Source code

Java tutorial

Introduction

Here is the source code for org.datacleaner.configuration.JaxbConfigurationReaderTest.java

Source

/**
 * DataCleaner (community edition)
 * Copyright (C) 2014 Neopost - Customer Information Management
 *
 * This copyrighted material is made available to anyone wishing to use, modify,
 * copy, or redistribute it subject to the terms and conditions of the GNU
 * Lesser General Public License, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 * for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this distribution; if not, write to:
 * Free Software Foundation, Inc.
 * 51 Franklin Street, Fifth Floor
 * Boston, MA  02110-1301  USA
 */
package org.datacleaner.configuration;

import static org.hamcrest.Matchers.is;
import static org.junit.Assert.assertThat;

import java.io.File;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Map;

import junit.framework.TestCase;

import org.apache.commons.lang.ArrayUtils;
import org.apache.metamodel.DataContext;
import org.apache.metamodel.csv.CsvConfiguration;
import org.apache.metamodel.data.DataSet;
import org.apache.metamodel.hbase.HBaseConfiguration;
import org.apache.metamodel.schema.Column;
import org.apache.metamodel.schema.Schema;
import org.apache.metamodel.schema.Table;
import org.apache.metamodel.util.ExclusionPredicate;
import org.apache.metamodel.util.Predicate;
import org.apache.metamodel.util.SimpleTableDef;
import org.datacleaner.api.RenderingFormat;
import org.datacleaner.connection.CassandraDatastore;
import org.datacleaner.connection.CouchDbDatastore;
import org.datacleaner.connection.CsvDatastore;
import org.datacleaner.connection.DataHubDatastore;
import org.datacleaner.connection.Datastore;
import org.datacleaner.connection.DatastoreCatalog;
import org.datacleaner.connection.DatastoreConnection;
import org.datacleaner.connection.ElasticSearchDatastore;
import org.datacleaner.connection.FixedWidthDatastore;
import org.datacleaner.connection.HBaseDatastore;
import org.datacleaner.connection.JdbcDatastore;
import org.datacleaner.connection.JsonDatastore;
import org.datacleaner.connection.MongoDbDatastore;
import org.datacleaner.connection.PojoDatastore;
import org.datacleaner.connection.UpdateableDatastoreConnection;
import org.datacleaner.connection.XmlDatastore;
import org.datacleaner.descriptors.ClasspathScanDescriptorProvider;
import org.datacleaner.descriptors.DescriptorProvider;
import org.datacleaner.descriptors.Descriptors;
import org.datacleaner.descriptors.RendererBeanDescriptor;
import org.datacleaner.job.concurrent.SingleThreadedTaskRunner;
import org.datacleaner.lifecycle.LifeCycleHelper;
import org.datacleaner.metamodel.datahub.DataHubSecurityMode;
import org.datacleaner.reference.Dictionary;
import org.datacleaner.reference.DictionaryConnection;
import org.datacleaner.reference.ReferenceDataCatalog;
import org.datacleaner.reference.StringPattern;
import org.datacleaner.reference.StringPatternConnection;
import org.datacleaner.reference.SynonymCatalog;
import org.datacleaner.reference.SynonymCatalogConnection;
import org.datacleaner.result.renderer.HtmlRenderingFormat;
import org.datacleaner.result.renderer.TextRenderingFormat;
import org.datacleaner.storage.BerkeleyDbStorageProvider;
import org.datacleaner.storage.CombinedStorageProvider;
import org.datacleaner.storage.InMemoryRowAnnotationFactory2;
import org.datacleaner.storage.InMemoryStorageProvider;
import org.datacleaner.storage.RowAnnotationFactory;
import org.datacleaner.storage.StorageProvider;
import org.junit.Assert;

public class JaxbConfigurationReaderTest extends TestCase {

    private final JaxbConfigurationReader reader = new JaxbConfigurationReader();
    private DatastoreCatalog _datastoreCatalog;

    public void testReadCsvFilesWithSpecialCharacters() throws Exception {
        DataCleanerConfiguration configuration = reader
                .create(new File("src/test/resources/example-configuration-csv-with-special-chars.xml"));
        CsvDatastore csv = (CsvDatastore) configuration.getDatastoreCatalog().getDatastore("csv");

        assertTrue("Unexpected separator: " + csv.getSeparatorChar(), '\t' == csv.getSeparatorChar());
        assertTrue("Unexpected escape: " + csv.getEscapeChar(), CsvConfiguration.NOT_A_CHAR == csv.getEscapeChar());

        assertTrue(csv.isMultilineValues());

        csv = (CsvDatastore) configuration.getDatastoreCatalog().getDatastore("csv_quot");

        assertEquals("\"", csv.getQuoteChar().toString());

        assertFalse(csv.isMultilineValues());
    }

    public void testReadClasspathScannerWithExcludedRenderer() throws Exception {
        DataCleanerConfiguration configuration = reader
                .create(new File("src/test/resources/example-configuration-classpath-scanner-with-exclusions.xml"));

        DescriptorProvider descriptorProvider = configuration.getEnvironment().getDescriptorProvider();
        assertTrue(descriptorProvider instanceof ClasspathScanDescriptorProvider);

        ClasspathScanDescriptorProvider scanner = (ClasspathScanDescriptorProvider) descriptorProvider;

        Predicate<Class<? extends RenderingFormat<?>>> predicate = scanner.getRenderingFormatPredicate();
        assertNotNull(predicate);
        assertTrue(predicate instanceof ExclusionPredicate);

        Collection<RendererBeanDescriptor<?>> renderers = descriptorProvider
                .getRendererBeanDescriptorsForRenderingFormat(TextRenderingFormat.class);
        assertTrue(renderers.isEmpty());

        renderers = descriptorProvider.getRendererBeanDescriptorsForRenderingFormat(HtmlRenderingFormat.class);
        assertFalse(renderers.isEmpty());
    }

    public void testReadComplexDataInPojoDatastore() throws Exception {
        DataCleanerConfiguration configuration = reader
                .create(new File("src/test/resources/example-configuration-pojo-datastore-with-complex-data.xml"));
        Datastore datastore = configuration.getDatastoreCatalog().getDatastore("pojo");
        assertNotNull(datastore);

        DatastoreConnection con = datastore.openConnection();
        DataContext dc = con.getDataContext();
        Table table = dc.getDefaultSchema().getTable(0);

        Column[] columns = table.getColumns();
        assertEquals("[Column[name=Foo,columnNumber=0,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
                + "Column[name=Bar,columnNumber=1,type=MAP,nullable=true,nativeType=null,columnSize=null], "
                + "Column[name=Baz,columnNumber=2,type=LIST,nullable=true,nativeType=null,columnSize=null], "
                + "Column[name=bytes,columnNumber=3,type=BINARY,nullable=true,nativeType=null,columnSize=null]]",
                Arrays.toString(columns));

        DataSet ds = dc.query().from(table).select(columns).execute();

        assertTrue(ds.next());
        assertEquals("Hello", ds.getRow().getValue(0).toString());
        assertEquals("{greeting=hello, person=world}", ds.getRow().getValue(1).toString());
        assertEquals("[hello, world]", ds.getRow().getValue(2).toString());
        assertEquals("{1,2,3,4,5}", ArrayUtils.toString(ds.getRow().getValue(3)));
        assertTrue(ds.getRow().getValue(1) instanceof Map);
        assertTrue(ds.getRow().getValue(2) instanceof List);
        assertTrue(ds.getRow().getValue(3) instanceof byte[]);

        assertTrue(ds.next());
        assertEquals("There", ds.getRow().getValue(0).toString());
        assertEquals("{greeting=hi, there you!, person={Firstname=Kasper, Lastname=Srensen}}",
                ds.getRow().getValue(1).toString());
        assertEquals(null, ds.getRow().getValue(2));
        assertEquals(null, ds.getRow().getValue(3));
        assertTrue(ds.getRow().getValue(1) instanceof Map);

        assertTrue(ds.next());
        assertEquals("World", ds.getRow().getValue(0).toString());
        assertEquals(null, ds.getRow().getValue(1));
        assertEquals("[Srensen, Kasper]", ds.getRow().getValue(2).toString());
        assertEquals("{-1,-2,-3,-4,-5}", ArrayUtils.toString(ds.getRow().getValue(3)));
        assertTrue(ds.getRow().getValue(2) instanceof List);
        assertTrue(ds.getRow().getValue(3) instanceof byte[]);
    }

    public void testOverrideVariables() throws Exception {
        System.setProperty("datastoreCatalog.myDatabase.username", "foobar");
        System.setProperty("datastoreCatalog.persons_csv.filename", "foo/bar.csv");

        try {
            DataCleanerConfiguration configuration = reader
                    .create(new File("src/test/resources/example-configuration-valid.xml"));
            Datastore datastore = configuration.getDatastoreCatalog().getDatastore("my database");
            assertTrue(datastore instanceof JdbcDatastore);

            String username = ((JdbcDatastore) datastore).getUsername();
            assertEquals("foobar", username);

            datastore = configuration.getDatastoreCatalog().getDatastore("persons_csv");
            assertTrue(datastore instanceof CsvDatastore);

            String filename = ((CsvDatastore) datastore).getFilename();
            assertEquals("foo/bar.csv", filename);
        } finally {
            System.getProperties().remove("datastoreCatalog.myDatabase.username");
            System.getProperties().remove("datastoreCatalog.persons_csv.filename");
        }
    }

    public void testValidConfiguration() throws Exception {
        DataCleanerConfiguration configuration = reader
                .create(new File("src/test/resources/example-configuration-valid.xml"));

        DatastoreCatalog datastoreCatalog = getDataStoreCatalog(configuration);
        assertEquals("[composite_datastore, my database, mydb_jndi, persons_csv]",
                Arrays.toString(datastoreCatalog.getDatastoreNames()));

        assertTrue(configuration.getEnvironment().getTaskRunner() instanceof SingleThreadedTaskRunner);
    }

    public void testCombinedStorage() throws Exception {
        DataCleanerConfiguration configuration = reader
                .create(new File("src/test/resources/example-configuration-combined-storage.xml"));
        StorageProvider storageProvider = configuration.getEnvironment().getStorageProvider();

        assertEquals(CombinedStorageProvider.class, storageProvider.getClass());

        CombinedStorageProvider csp = (CombinedStorageProvider) storageProvider;
        assertEquals(BerkeleyDbStorageProvider.class, csp.getCollectionsStorageProvider().getClass());
        assertEquals(InMemoryStorageProvider.class, csp.getRowAnnotationsStorageProvider().getClass());

        RowAnnotationFactory rowAnnotationFactory = csp.getRowAnnotationsStorageProvider()
                .createRowAnnotationFactory();
        assertEquals(InMemoryRowAnnotationFactory2.class, rowAnnotationFactory.getClass());
    }

    public void testAllDatastoreTypes() throws Exception {
        DatastoreCatalog datastoreCatalog = getDataStoreCatalog(getConfiguration());
        String[] datastoreNames = datastoreCatalog.getDatastoreNames();
        assertEquals(
                "[my cassandra db, my couch, my es index, my hbase, my mongo, my_access, my_composite, my_csv, my_custom, my_datahub, my_dbase, my_dom_xml, my_excel_2003, "
                        + "my_fixed_width_1, my_fixed_width_2, my_jdbc_connection, my_jdbc_datasource, my_json, my_odb, my_pojo, "
                        + "my_sas, my_sax_xml, my_sfdc_ds, my_sugarcrm]",
                Arrays.toString(datastoreNames));

        assertEquals("a mongo db based datastore", datastoreCatalog.getDatastore("my mongo").getDescription());
        assertEquals("jdbc_con", datastoreCatalog.getDatastore("my_jdbc_connection").getDescription());
        assertEquals("jdbc_ds", datastoreCatalog.getDatastore("my_jdbc_datasource").getDescription());
        assertEquals("dbf", datastoreCatalog.getDatastore("my_dbase").getDescription());

        CsvDatastore myCsvDatastore = (CsvDatastore) datastoreCatalog.getDatastore("my_csv");
        assertEquals("csv", myCsvDatastore.getDescription());
        assertTrue(myCsvDatastore.isMultilineValues());
        assertTrue(myCsvDatastore.isFailOnInconsistencies());
        assertEquals('\\', myCsvDatastore.getEscapeChar().charValue());

        CassandraDatastore cassandraDatastore = (CassandraDatastore) datastoreCatalog
                .getDatastore("my cassandra db");
        assertEquals("localhost", cassandraDatastore.getHostname());
        assertEquals(9042, cassandraDatastore.getPort());
        assertEquals("my_keyspace", cassandraDatastore.getKeyspace());
        assertEquals("foo", cassandraDatastore.getUsername());
        assertEquals("bar", cassandraDatastore.getPassword());
        assertEquals("[SimpleTableDef[name=table,columnNames=[bah, baz],columnTypes=[STRING, STRING]]]",
                Arrays.toString(cassandraDatastore.getTableDefs()));

        ElasticSearchDatastore esDatastore = (ElasticSearchDatastore) datastoreCatalog.getDatastore("my es index");
        assertEquals("localhost", esDatastore.getHostname());
        assertEquals(new Integer(9300), esDatastore.getPort());
        assertEquals("my_es_cluster", esDatastore.getClusterName());
        assertEquals("my_index", esDatastore.getIndexName());
        assertNull(esDatastore.getTableDefs());

        assertEquals("a SugarCRM instance", datastoreCatalog.getDatastore("my_sugarcrm").getDescription());
        assertEquals("dom xml", datastoreCatalog.getDatastore("my_dom_xml").getDescription());
        assertEquals("sax xml", datastoreCatalog.getDatastore("my_sax_xml").getDescription());
        assertEquals("custom", datastoreCatalog.getDatastore("my_custom").getDescription());
        assertEquals("odb", datastoreCatalog.getDatastore("my_odb").getDescription());
        assertEquals("xls", datastoreCatalog.getDatastore("my_excel_2003").getDescription());
        assertEquals("comp", datastoreCatalog.getDatastore("my_composite").getDescription());
        assertEquals("salesforce.com is an online CRM system",
                datastoreCatalog.getDatastore("my_sfdc_ds").getDescription());
        assertEquals("mdb", datastoreCatalog.getDatastore("my_access").getDescription());
        assertEquals("folder of sas7bdat files", datastoreCatalog.getDatastore("my_sas").getDescription());
        assertEquals("A datastore based on plain values",
                datastoreCatalog.getDatastore("my_pojo").getDescription());

        PojoDatastore pojoDatastore = (PojoDatastore) datastoreCatalog.getDatastore("my_pojo");
        {
            try (UpdateableDatastoreConnection con = pojoDatastore.openConnection()) {
                DataContext dc = con.getDataContext();
                Schema schema = dc.getDefaultSchema();
                assertEquals("my_schema", schema.getName());
                assertEquals(2, schema.getTableCount());
                assertEquals("[table1, table2]", Arrays.toString(schema.getTableNames()));

                assertEquals(
                        "[Column[name=Foo,columnNumber=0,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
                                + "Column[name=Bar,columnNumber=1,type=INTEGER,nullable=true,nativeType=null,columnSize=null]]",
                        Arrays.toString(schema.getTable(0).getColumns()));
                assertEquals(
                        "[Column[name=Baz,columnNumber=0,type=BOOLEAN,nullable=true,nativeType=null,columnSize=null]]",
                        Arrays.toString(schema.getTable(1).getColumns()));

                try (DataSet ds = dc.query().from("table1").select("Foo", "Bar").execute()) {
                    assertTrue(ds.next());
                    assertEquals("Row[values=[Hello, 1]]", ds.getRow().toString());
                    assertEquals(String.class, ds.getRow().getValue(0).getClass());
                    assertEquals(Integer.class, ds.getRow().getValue(1).getClass());

                    assertTrue(ds.next());
                    assertEquals("Row[values=[There, null]]", ds.getRow().toString());
                    assertNull(ds.getRow().getValue(1));
                }

                try (DataSet ds = dc.query().from("table2").select("Baz").execute()) {
                    assertTrue(ds.next());
                    assertEquals("Row[values=[true]]", ds.getRow().toString());
                    assertEquals(Boolean.class, ds.getRow().getValue(0).getClass());
                }
            }
        }

        CouchDbDatastore couchDbDatastore = (CouchDbDatastore) datastoreCatalog.getDatastore("my couch");
        assertEquals("localhost", couchDbDatastore.getHostname());
        assertEquals("user", couchDbDatastore.getUsername());
        assertEquals("pass", couchDbDatastore.getPassword());
        assertEquals(true, couchDbDatastore.isSslEnabled());
        assertEquals(1, couchDbDatastore.getTableDefs().length);
        assertEquals("SimpleTableDef[name=foobar,columnNames=[foo, bar, baz],columnTypes=[MAP, INTEGER, VARCHAR]]",
                couchDbDatastore.getTableDefs()[0].toString());

        MongoDbDatastore mongoDbDatastore = (MongoDbDatastore) datastoreCatalog.getDatastore("my mongo");
        assertEquals("analyzerbeans_test", mongoDbDatastore.getDatabaseName());
        assertEquals("localhost", mongoDbDatastore.getHostname());
        assertEquals(27017, mongoDbDatastore.getPort());
        SimpleTableDef[] tableDefs = mongoDbDatastore.getTableDefs();
        assertEquals(
                "[SimpleTableDef[name=my_col_1,columnNames=[foo, bar, baz],columnTypes=[VARCHAR, INTEGER, DATE]]]",
                Arrays.toString(tableDefs));

        XmlDatastore xmlDatastore = (XmlDatastore) datastoreCatalog.getDatastore("my_sax_xml");
        assertEquals("../core/src/test/resources/example-xml-file.xml", xmlDatastore.getFilename());
        assertEquals(
                "[XmlSaxTableDef[rowXpath=/greetings/greeting,"
                        + "valueXpaths=[/greetings/greeting/how, /greetings/greeting/what]]]",
                Arrays.toString(xmlDatastore.getTableDefs()));

        FixedWidthDatastore ds = (FixedWidthDatastore) datastoreCatalog.getDatastore("my_fixed_width_1");
        assertEquals(19, ds.getFixedValueWidth());
        assertEquals("[]", Arrays.toString(ds.getValueWidths()));
        assertEquals(0, ds.getHeaderLineNumber());

        ds = (FixedWidthDatastore) datastoreCatalog.getDatastore("my_fixed_width_2");
        assertEquals(-1, ds.getFixedValueWidth());
        assertEquals("[4, 17, 19]", Arrays.toString(ds.getValueWidths()));
        assertEquals(1, ds.getHeaderLineNumber());

        HBaseDatastore hbaseDatastore = (HBaseDatastore) datastoreCatalog.getDatastore("my hbase");
        assertEquals("HBaseDatastore[name=my hbase]", hbaseDatastore.toString());
        assertEquals("localhost", hbaseDatastore.getZookeeperHostname());
        assertEquals(HBaseConfiguration.DEFAULT_ZOOKEEPER_PORT, hbaseDatastore.getZookeeperPort());
        tableDefs = hbaseDatastore.getTableDefs();
        assertNotNull(tableDefs);
        assertEquals(2, tableDefs.length);
        assertEquals(
                "SimpleTableDef[name=table1,columnNames=[fam1:foo, fam1:bar, fam2:baz],columnTypes=[STRING, STRING, INTEGER]]",
                tableDefs[0].toString());
        assertEquals(
                "SimpleTableDef[name=table2,columnNames=[fam3:hello, fam3:world],columnTypes=[STRING, VARCHAR]]",
                tableDefs[1].toString());

        JsonDatastore jsonDatastore = (JsonDatastore) datastoreCatalog.getDatastore("my_json");
        assertEquals("JsonDatastore[name=my_json]", jsonDatastore.toString());

        DataHubDatastore dataHubDatastore = (DataHubDatastore) datastoreCatalog.getDatastore("my_datahub");
        assertThat(dataHubDatastore.getName(), is("my_datahub"));
        assertThat(dataHubDatastore.getHost(), is("hostname"));
        assertThat(dataHubDatastore.getPort(), is(1234));
        assertThat(dataHubDatastore.getUsername(), is("user"));
        assertThat(dataHubDatastore.getPassword(), is("SECRET"));
        assertThat(dataHubDatastore.isHttps(), is(false));
        assertThat(dataHubDatastore.isAcceptUnverifiedSslPeers(), is(false));
        assertThat(dataHubDatastore.getSecurityMode(), is(DataHubSecurityMode.DEFAULT));

        for (String name : datastoreNames) {
            // test that all connections, except the JNDI-, MongoDB- and
            // CouchDB-based on will work
            if (!"my_jdbc_datasource".equals(name) && !"my mongo".equals(name) && !"my couch".equals(name)
                    && !"my hbase".equals(name) && !"my_sfdc_ds".equals(name) && !"my_sugarcrm".equals(name)
                    && !"my es index".equals(name) && !"my_datahub".equals(name)) {
                Datastore datastore = datastoreCatalog.getDatastore(name);
                DataContext dc;
                try {
                    DatastoreConnection connection = datastore.openConnection();
                    dc = connection.getDataContext();
                    assertNotNull(dc);
                } catch (RuntimeException e) {
                    throw new RuntimeException("Failed to read from datastore: " + name, e);
                }
            }
        }

        Datastore compositeDatastore = datastoreCatalog.getDatastore("my_composite");
        {
            try (DatastoreConnection con = compositeDatastore.openConnection();) {
                DataContext dataContext = con.getDataContext();
                String[] schemaNames = dataContext.getSchemaNames();
                assertEquals("[PUBLIC, Spreadsheet2003.xls, developers.mdb, resources]",
                        Arrays.toString(schemaNames));
            }
        }
    }

    private DataCleanerConfiguration getConfiguration() {
        DataCleanerConfiguration configuration = reader
                .create(new File("src/test/resources/example-configuration-all-datastore-types.xml"));
        return configuration;
    }

    private DatastoreCatalog getDataStoreCatalog(DataCleanerConfiguration configuration) {
        _datastoreCatalog = configuration.getDatastoreCatalog();
        return _datastoreCatalog;
    }

    public void testReferenceDataCatalog() throws Exception {
        DataCleanerConfiguration conf = getConfigurationFromXMLFile();
        ReferenceDataCatalog referenceDataCatalog = conf.getReferenceDataCatalog();
        String[] dictionaryNames = referenceDataCatalog.getDictionaryNames();
        assertEquals("[custom_dict, datastore_dict, textfile_dict, valuelist_dict]",
                Arrays.toString(dictionaryNames));

        LifeCycleHelper lifeCycleHelper = new LifeCycleHelper(conf, null, true);

        Dictionary d = referenceDataCatalog.getDictionary("datastore_dict");
        assertEquals("dict_ds", d.getDescription());
        lifeCycleHelper.assignProvidedProperties(Descriptors.ofComponent(d.getClass()), d);
        lifeCycleHelper.initialize(Descriptors.ofComponent(d.getClass()), d);
        DictionaryConnection dictionaryConnection = d.openConnection(conf);
        assertTrue(dictionaryConnection.containsValue("Patterson"));
        assertTrue(dictionaryConnection.containsValue("Murphy"));
        assertFalse(dictionaryConnection.containsValue("Gates"));
        dictionaryConnection.close();

        d = referenceDataCatalog.getDictionary("textfile_dict");
        assertEquals("dict_txt", d.getDescription());
        lifeCycleHelper.initialize(Descriptors.ofComponent(d.getClass()), d);
        dictionaryConnection = d.openConnection(conf);
        assertTrue(dictionaryConnection.containsValue("Patterson"));
        assertFalse(dictionaryConnection.containsValue("Murphy"));
        assertTrue(dictionaryConnection.containsValue("Gates"));
        dictionaryConnection.close();

        d = referenceDataCatalog.getDictionary("valuelist_dict");
        assertEquals("dict_simple", d.getDescription());
        lifeCycleHelper.initialize(Descriptors.ofComponent(d.getClass()), d);
        dictionaryConnection = d.openConnection(conf);
        assertFalse(dictionaryConnection.containsValue("Patterson"));
        assertFalse(dictionaryConnection.containsValue("Murphy"));
        assertTrue(dictionaryConnection.containsValue("greetings"));
        dictionaryConnection.close();

        d = referenceDataCatalog.getDictionary("custom_dict");
        assertEquals("dict_custom", d.getDescription());
        lifeCycleHelper.initialize(Descriptors.ofComponent(d.getClass()), d);
        dictionaryConnection = d.openConnection(conf);
        assertFalse(dictionaryConnection.containsValue("Patterson"));
        assertFalse(dictionaryConnection.containsValue("Murphy"));
        assertFalse(dictionaryConnection.containsValue("Gates"));
        assertTrue(dictionaryConnection.containsValue("value0"));
        assertTrue(dictionaryConnection.containsValue("value1"));
        assertTrue(dictionaryConnection.containsValue("value2"));
        assertTrue(dictionaryConnection.containsValue("value3"));
        assertTrue(dictionaryConnection.containsValue("value4"));
        assertFalse(dictionaryConnection.containsValue("value5"));
        dictionaryConnection.close();

        String[] synonymCatalogNames = referenceDataCatalog.getSynonymCatalogNames();
        assertEquals("[custom_syn, datastore_syn, textfile_syn]", Arrays.toString(synonymCatalogNames));

        SynonymCatalog s = referenceDataCatalog.getSynonymCatalog("textfile_syn");
        assertEquals("syn_txt", s.getDescription());
        lifeCycleHelper.initialize(Descriptors.ofComponent(s.getClass()), s);
        SynonymCatalogConnection synonymConnection = s.openConnection(conf);
        assertEquals("DNK", synonymConnection.getMasterTerm("Denmark"));
        assertEquals("DNK", synonymConnection.getMasterTerm("Danmark"));
        assertEquals("DNK", synonymConnection.getMasterTerm("DK"));
        assertEquals("ALB", synonymConnection.getMasterTerm("Albania"));
        assertEquals(null, synonymConnection.getMasterTerm("Netherlands"));
        synonymConnection.close();

        s = referenceDataCatalog.getSynonymCatalog("datastore_syn");
        assertEquals("syn_ds", s.getDescription());
        lifeCycleHelper.assignProvidedProperties(Descriptors.ofComponent(s.getClass()), s);
        lifeCycleHelper.initialize(Descriptors.ofComponent(s.getClass()), s);

        synonymConnection = s.openConnection(conf);

        // lookup by id
        assertEquals("La Rochelle Gifts", synonymConnection.getMasterTerm("119"));
        // lookup by phone number (string)
        assertEquals("Danish Wholesale Imports", synonymConnection.getMasterTerm("31 12 3555"));
        assertEquals(null, synonymConnection.getMasterTerm("foobar"));

        synonymConnection.close();

        s = referenceDataCatalog.getSynonymCatalog("custom_syn");
        assertEquals("syn_custom", s.getDescription());
        lifeCycleHelper.initialize(Descriptors.ofComponent(s.getClass()), s);

        synonymConnection = s.openConnection(conf);
        assertEquals("DNK", synonymConnection.getMasterTerm("Denmark"));
        assertEquals("DNK", synonymConnection.getMasterTerm("Danmark"));
        assertEquals(null, synonymConnection.getMasterTerm("DK"));
        assertEquals(null, synonymConnection.getMasterTerm("Albania"));
        assertEquals("NLD", synonymConnection.getMasterTerm("Netherlands"));
        synonymConnection.close();

        String[] stringPatternNames = referenceDataCatalog.getStringPatternNames();
        assertEquals("[regex danish email, simple email]", Arrays.toString(stringPatternNames));

        StringPattern pattern = referenceDataCatalog.getStringPattern("regex danish email");
        assertEquals("pattern_reg", pattern.getDescription());
        lifeCycleHelper.initialize(Descriptors.ofComponent(pattern.getClass()), pattern);
        assertEquals(
                "RegexStringPattern[name=regex danish email, expression=[a-z]+@[a-z]+\\.dk, matchEntireString=true]",
                pattern.toString());
        StringPatternConnection patternConnection = pattern.openConnection(conf);
        assertTrue(patternConnection.matches("kasper@eobjects.dk"));
        assertFalse(patternConnection.matches("kasper@eobjects.org"));
        assertFalse(patternConnection.matches(" kasper@eobjects.dk"));
        patternConnection.close();

        pattern = referenceDataCatalog.getStringPattern("simple email");
        assertEquals("pattern_simple", pattern.getDescription());
        lifeCycleHelper.initialize(Descriptors.ofComponent(pattern.getClass()), pattern);
        assertEquals("SimpleStringPattern[name=simple email, expression=aaaa@aaaaa.aa]", pattern.toString());
        patternConnection = pattern.openConnection(conf);
        assertTrue(patternConnection.matches("kasper@eobjects.dk"));
        assertTrue(patternConnection.matches("kasper@eobjects.org"));
        assertFalse(patternConnection.matches(" kasper@eobjects.dk"));
        patternConnection.close();
    }

    public void testCustomDictionaryWithInjectedDatastore() {
        DataCleanerConfiguration configuration = getConfigurationFromXMLFile();
        ReferenceDataCatalog referenceDataCatalog = configuration.getReferenceDataCatalog();
        SampleCustomDictionary sampleCustomDictionary = (SampleCustomDictionary) referenceDataCatalog
                .getDictionary("custom_dict");
        Assert.assertEquals("my_jdbc_connection", sampleCustomDictionary.datastore.getName());
    }

    private DataCleanerConfiguration getConfigurationFromXMLFile() {
        DataCleanerConfiguration configuration = reader
                .create(new File("src/test/resources/example-configuration-all-reference-data-types.xml"));
        return configuration;
    }
}