com.cloudera.cdk.morphline.solr.SolrMorphlineZkAvroTest.java Source code

Java tutorial

Introduction

Here is the source code for com.cloudera.cdk.morphline.solr.SolrMorphlineZkAvroTest.java

Source

/*
 * Copyright 2013 Cloudera Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.cloudera.cdk.morphline.solr;

import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;

import org.apache.avro.Schema.Field;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.FileReader;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;

import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence;
import com.cloudera.cdk.morphline.api.Record;
import com.cloudera.cdk.morphline.base.Fields;
import com.cloudera.cdk.morphline.base.Notifications;
import com.google.common.base.Preconditions;
import com.google.common.io.Files;

@ThreadLeakAction({ Action.WARN })
@ThreadLeakLingering(linger = 0)
@ThreadLeakZombies(Consequence.CONTINUE)
@ThreadLeakScope(Scope.NONE)
@SuppressCodecs({ "Lucene3x", "Lucene40" })
public class SolrMorphlineZkAvroTest extends AbstractSolrMorphlineZkTest {

    @Override
    public void doTest() throws Exception {
        File file = new File(RESOURCES_DIR + "/test-documents/sample-statuses-20120906-141433-medium.avro");

        waitForRecoveriesToFinish(false);

        // load avro records via morphline and zk into solr
        morphline = parse("test-morphlines/tutorialReadAvroContainer");
        Record record = new Record();
        byte[] body = Files.toByteArray(file);
        record.put(Fields.ATTACHMENT_BODY, body);
        startSession();
        Notifications.notifyBeginTransaction(morphline);
        assertTrue(morphline.process(record));
        assertEquals(1, collector.getNumStartEvents());

        commit();

        // fetch sorted result set from solr
        QueryResponse rsp = cloudClient
                .query(new SolrQuery("*:*").setRows(100000).addSort("id", SolrQuery.ORDER.asc));
        assertEquals(2104, collector.getRecords().size());
        assertEquals(collector.getRecords().size(), rsp.getResults().size());

        Collections.sort(collector.getRecords(), new Comparator<Record>() {
            @Override
            public int compare(Record r1, Record r2) {
                return r1.get("id").toString().compareTo(r2.get("id").toString());
            }
        });

        // fetch test input data and sort like solr result set
        List<GenericData.Record> records = new ArrayList();
        FileReader<GenericData.Record> reader = new DataFileReader(file, new GenericDatumReader());
        while (reader.hasNext()) {
            GenericData.Record expected = reader.next();
            records.add(expected);
        }
        assertEquals(collector.getRecords().size(), records.size());
        Collections.sort(records, new Comparator<GenericData.Record>() {
            @Override
            public int compare(GenericData.Record r1, GenericData.Record r2) {
                return r1.get("id").toString().compareTo(r2.get("id").toString());
            }
        });

        Object lastId = null;
        for (int i = 0; i < records.size(); i++) {
            //System.out.println("myrec" + i + ":" + records.get(i));      
            Object id = records.get(i);
            if (id != null && id.equals(lastId)) {
                throw new IllegalStateException(
                        "Detected duplicate id. Test input data must not contain duplicate ids!");
            }
            lastId = id;
        }

        for (int i = 0; i < records.size(); i++) {
            //System.out.println("myrsp" + i + ":" + rsp.getResults().get(i));      
        }

        Iterator<SolrDocument> rspIter = rsp.getResults().iterator();
        for (int i = 0; i < records.size(); i++) {
            // verify morphline spat out expected data
            Record actual = collector.getRecords().get(i);
            GenericData.Record expected = records.get(i);
            Preconditions.checkNotNull(expected);
            assertTweetEquals(expected, actual, i);

            // verify Solr result set contains expected data
            actual = new Record();
            actual.getFields().putAll(next(rspIter));
            assertTweetEquals(expected, actual, i);
        }

        Notifications.notifyRollbackTransaction(morphline);
        Notifications.notifyShutdown(morphline);
        cloudClient.shutdown();
    }

    private void assertTweetEquals(GenericData.Record expected, Record actual, int i) {
        Preconditions.checkNotNull(expected);
        Preconditions.checkNotNull(actual);
        //    System.out.println("\n\nexpected: " + toString(expected));
        //    System.out.println("actual:   " + actual);
        String[] fieldNames = new String[] { "id", "in_reply_to_status_id", "in_reply_to_user_id", "retweet_count",
                "text", };
        for (String fieldName : fieldNames) {
            assertEquals(i + " fieldName: " + fieldName, expected.get(fieldName).toString(),
                    actual.getFirstValue(fieldName).toString());
        }
    }

    private String toString(GenericData.Record avroRecord) {
        Record record = new Record();
        for (Field field : avroRecord.getSchema().getFields()) {
            record.put(field.name(), avroRecord.get(field.pos()));
        }
        return record.toString(); // prints sorted by key for human readability
    }

}