com.cloudera.cdk.morphline.solr.AbstractSolrMorphlineTest.java Source code

Java tutorial

Introduction

Here is the source code for com.cloudera.cdk.morphline.solr.AbstractSolrMorphlineTest.java

Source

/*
 * Copyright 2013 Cloudera Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.cloudera.cdk.morphline.solr;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.impl.XMLResponseParser;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.cloudera.cdk.morphline.api.Collector;
import com.cloudera.cdk.morphline.api.Command;
import com.cloudera.cdk.morphline.api.MorphlineContext;
import com.cloudera.cdk.morphline.api.Record;
import com.cloudera.cdk.morphline.base.Compiler;
import com.cloudera.cdk.morphline.base.FaultTolerance;
import com.cloudera.cdk.morphline.base.Fields;
import com.cloudera.cdk.morphline.base.Notifications;
import com.cloudera.cdk.morphline.stdlib.PipeBuilder;
import com.codahale.metrics.MetricRegistry;
import com.google.common.io.Files;
import com.typesafe.config.Config;

public class AbstractSolrMorphlineTest extends SolrTestCaseJ4 {

    protected Collector collector;
    protected Command morphline;
    protected SolrServer solrServer;
    protected DocumentLoader testServer;

    protected static final boolean TEST_WITH_EMBEDDED_SOLR_SERVER = true;
    protected static final String EXTERNAL_SOLR_SERVER_URL = System.getProperty("externalSolrServer");
    //  protected static final String EXTERNAL_SOLR_SERVER_URL = "http://127.0.0.1:8983/solr";

    protected static final String RESOURCES_DIR = "target/test-classes";
    protected static final String DEFAULT_BASE_DIR = "solr";
    protected static final AtomicInteger SEQ_NUM = new AtomicInteger();
    protected static final AtomicInteger SEQ_NUM2 = new AtomicInteger();

    protected static final Object NON_EMPTY_FIELD = new Object();

    private static final Logger LOGGER = LoggerFactory.getLogger(AbstractSolrMorphlineTest.class);

    @BeforeClass
    public static void beforeClass() throws Exception {
        myInitCore(DEFAULT_BASE_DIR);
    }

    protected static void myInitCore(String baseDirName) throws Exception {
        initCore(RESOURCES_DIR + "/" + baseDirName + "/collection1/conf/solrconfig.xml",
                RESOURCES_DIR + "/" + baseDirName + "/collection1/conf/schema.xml",
                RESOURCES_DIR + "/" + baseDirName);
    }

    @Before
    public void setUp() throws Exception {
        super.setUp();
        collector = new Collector();

        if (EXTERNAL_SOLR_SERVER_URL != null) {
            //solrServer = new ConcurrentUpdateSolrServer(EXTERNAL_SOLR_SERVER_URL, 2, 2);
            //solrServer = new SafeConcurrentUpdateSolrServer(EXTERNAL_SOLR_SERVER_URL, 2, 2);
            solrServer = new HttpSolrServer(EXTERNAL_SOLR_SERVER_URL);
            ((HttpSolrServer) solrServer).setParser(new XMLResponseParser());
        } else {
            if (TEST_WITH_EMBEDDED_SOLR_SERVER) {
                solrServer = new TestEmbeddedSolrServer(h.getCoreContainer(), "");
            } else {
                throw new RuntimeException("Not yet implemented");
                //solrServer = new TestSolrServer(getSolrServer());
            }
        }

        int batchSize = SEQ_NUM2.incrementAndGet() % 2 == 0 ? 100 : 1; //SolrInspector.DEFAULT_SOLR_SERVER_BATCH_SIZE : 1;
        testServer = new SolrServerDocumentLoader(solrServer, batchSize);
        deleteAllDocuments();
    }

    @After
    public void tearDown() throws Exception {
        collector = null;
        solrServer = null;
        super.tearDown();
    }

    protected void testDocumentTypesInternal(String[] files, Map<String, Integer> expectedRecords,
            Map<String, Map<String, Object>> expectedRecordContents) throws Exception {

        deleteAllDocuments();
        int numDocs = 0;
        for (int i = 0; i < 1; i++) {

            for (String file : files) {
                File f = new File(file);
                byte[] body = Files.toByteArray(f);
                Record event = new Record();
                //event.put(Fields.ID, docId++);
                event.getFields().put(Fields.ATTACHMENT_BODY, new ByteArrayInputStream(body));
                event.getFields().put(Fields.ATTACHMENT_NAME, f.getName());
                event.getFields().put(Fields.BASE_ID, f.getName());
                collector.reset();
                load(event);
                Integer count = expectedRecords.get(file);
                if (count != null) {
                    numDocs += count;
                } else {
                    numDocs++;
                }
                assertEquals("unexpected results in " + file, numDocs, queryResultSetSize("*:*"));
                Map<String, Object> expectedContents = expectedRecordContents.get(file);
                if (expectedContents != null) {
                    Record actual = collector.getFirstRecord();
                    for (Map.Entry<String, Object> entry : expectedContents.entrySet()) {
                        if (entry.getValue() == NON_EMPTY_FIELD) {
                            assertNotNull(entry.getKey());
                            assertTrue(actual.getFirstValue(entry.getKey()).toString().length() > 0);
                        } else if (entry.getValue() == null) {
                            assertEquals("key:" + entry.getKey(), 0, actual.get(entry.getKey()).size());
                        } else {
                            assertEquals("key:" + entry.getKey(), Arrays.asList(entry.getValue()),
                                    actual.get(entry.getKey()));
                        }
                    }
                }
            }
        }
        assertEquals(numDocs, queryResultSetSize("*:*"));
    }

    private boolean load(Record record) {
        Notifications.notifyStartSession(morphline);
        return morphline.process(record);
    }

    protected int queryResultSetSize(String query) {
        //    return collector.getRecords().size();
        try {
            testServer.commitTransaction();
            solrServer.commit(false, true, true);
            QueryResponse rsp = solrServer.query(new SolrQuery(query).setRows(Integer.MAX_VALUE));
            LOGGER.debug("rsp: {}", rsp);
            int i = 0;
            for (SolrDocument doc : rsp.getResults()) {
                LOGGER.debug("rspDoc #{}: {}", i++, doc);
            }
            int size = rsp.getResults().size();
            return size;
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    private void deleteAllDocuments() throws SolrServerException, IOException {
        collector.reset();
        SolrServer s = solrServer;
        s.deleteByQuery("*:*"); // delete everything!
        s.commit();
    }

    protected Command createMorphline(String file) throws IOException {
        return new PipeBuilder().build(parse(file), null, collector, createMorphlineContext());
    }

    private MorphlineContext createMorphlineContext() {
        return new SolrMorphlineContext.Builder().setDocumentLoader(testServer)
                //      .setDocumentLoader(new CollectingDocumentLoader(100))
                .setExceptionHandler(new FaultTolerance(false, false, SolrServerException.class.getName()))
                .setMetricRegistry(new MetricRegistry()).build();
    }

    private Config parse(String file) throws IOException {
        SolrLocator locator = new SolrLocator(createMorphlineContext());
        locator.setSolrHomeDir(testSolrHome + "/collection1");
        Config config = new Compiler().parse(new File(RESOURCES_DIR + "/" + file + ".conf"),
                locator.toConfig("SOLR_LOCATOR"));
        config = config.getConfigList("morphlines").get(0);
        return config;
    }

    protected void startSession() {
        Notifications.notifyStartSession(morphline);
    }

    protected void testDocumentContent(HashMap<String, ExpectedResult> expectedResultMap) throws Exception {
        QueryResponse rsp = solrServer.query(new SolrQuery("*:*").setRows(Integer.MAX_VALUE));
        // Check that every expected field/values shows up in the actual query
        for (Entry<String, ExpectedResult> current : expectedResultMap.entrySet()) {
            String field = current.getKey();
            for (String expectedFieldValue : current.getValue().getFieldValues()) {
                ExpectedResult.CompareType compareType = current.getValue().getCompareType();
                boolean foundField = false;

                for (SolrDocument doc : rsp.getResults()) {
                    Collection<Object> actualFieldValues = doc.getFieldValues(field);
                    if (compareType == ExpectedResult.CompareType.equals) {
                        if (actualFieldValues != null && actualFieldValues.contains(expectedFieldValue)) {
                            foundField = true;
                            break;
                        }
                    } else {
                        for (Iterator<Object> it = actualFieldValues.iterator(); it.hasNext();) {
                            String actualValue = it.next().toString(); // test only supports string comparison
                            if (actualFieldValues != null && actualValue.contains(expectedFieldValue)) {
                                foundField = true;
                                break;
                            }
                        }
                    }
                }
                assert (foundField); // didn't find expected field/value in query
            }
        }
    }

    /**
     * Representation of the expected output of a SolrQuery.
     */
    protected static class ExpectedResult {
        private HashSet<String> fieldValues;

        public enum CompareType {
            equals, // Compare with equals, i.e. actual.equals(expected)
            contains; // Compare with contains, i.e. actual.contains(expected)
        }

        private CompareType compareType;

        public ExpectedResult(HashSet<String> fieldValues, CompareType compareType) {
            this.fieldValues = fieldValues;
            this.compareType = compareType;
        }

        public HashSet<String> getFieldValues() {
            return fieldValues;
        }

        public CompareType getCompareType() {
            return compareType;
        }
    }
}