com.inmobi.databus.partition.TestClusterReaderMultipleCollectors.java Source code

Java tutorial

Introduction

Here is the source code for com.inmobi.databus.partition.TestClusterReaderMultipleCollectors.java

Source

package com.inmobi.databus.partition;

/*
 * #%L
 * messaging-client-databus
 * %%
 * Copyright (C) 2012 - 2014 InMobi
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */

import java.io.IOException;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.LinkedBlockingQueue;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.testng.Assert;
import org.testng.annotations.AfterTest;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;

import com.inmobi.databus.files.StreamFile;
import com.inmobi.databus.readers.CollectorStreamReader;
import com.inmobi.databus.readers.DatabusStreamWaitingReader;
import com.inmobi.messaging.consumer.databus.QueueEntry;
import com.inmobi.messaging.consumer.databus.StreamType;
import com.inmobi.messaging.consumer.databus.mapred.DatabusInputFormat;
import com.inmobi.messaging.consumer.util.ClusterUtil;
import com.inmobi.messaging.consumer.util.DatabusUtil;
import com.inmobi.messaging.consumer.util.MiniClusterUtil;
import com.inmobi.messaging.consumer.util.TestUtil;
import com.inmobi.messaging.metrics.PartitionReaderStatsExposer;

public class TestClusterReaderMultipleCollectors {

    private static final String testStream = "testclient";

    private String[] collectors = new String[] { "collector1", "collector2" };
    private static final String clusterName = "testCluster";
    private PartitionId partitionId = new PartitionId(clusterName, null);
    private LinkedBlockingQueue<QueueEntry> buffer = new LinkedBlockingQueue<QueueEntry>(149);
    private PartitionReader preader;
    private ClusterUtil cluster;
    private String[] files = new String[] { TestUtil.files[1], TestUtil.files[3], TestUtil.files[5],
            TestUtil.files[6] };
    Path[] databusFiles1 = new Path[3];
    Path[] databusFiles2 = new Path[3];
    FileSystem fs;
    Path streamDir;
    String fsUri;
    Configuration conf = new Configuration();
    Set<Integer> partitionMinList;
    PartitionCheckpointList partitionCheckpointList;
    int consumerNumber;

    @BeforeTest
    public void setup() throws Exception {
        // initialize config
        consumerNumber = 1;
        fs = MiniClusterUtil.getDFSCluster(conf).getFileSystem();
        cluster = TestUtil.setupDFSCluster(this.getClass().getSimpleName(), testStream,
                new PartitionId(clusterName, collectors[0]), fs.getUri().toString(), files, null, databusFiles1, 0,
                1, TestUtil.getConfiguredRootDir());
        TestUtil.setUpFiles(cluster, collectors[1], files, null, databusFiles2, 0, 1);
        streamDir = DatabusUtil.getStreamDir(StreamType.MERGED, new Path(cluster.getRootDir()), testStream);
        fsUri = fs.getUri().toString();
        Map<Integer, PartitionCheckpoint> chkpoints = new TreeMap<Integer, PartitionCheckpoint>();
        partitionCheckpointList = new PartitionCheckpointList(chkpoints);
        partitionMinList = new TreeSet<Integer>();
        for (int i = 0; i < 60; i++) {
            partitionMinList.add(i);
        }
    }

    @AfterTest
    public void cleanup() throws IOException {
        TestUtil.cleanupCluster(cluster);
        MiniClusterUtil.shutdownDFSCluster();
    }

    @Test
    public void testReadFromStart() throws Exception {
        PartitionReaderStatsExposer prMetrics = new PartitionReaderStatsExposer(testStream, "c1",
                partitionId.toString(), consumerNumber, fsUri);
        preader = new PartitionReader(partitionId, partitionCheckpointList, fs, buffer, streamDir, conf,
                DatabusInputFormat.class.getCanonicalName(),
                CollectorStreamReader.getDateFromCollectorFile(files[0]), 10, true, prMetrics, false,
                partitionMinList, null);
        Map<Integer, PartitionCheckpoint> expectedDeltaPck = new HashMap<Integer, PartitionCheckpoint>();
        preader.init();
        Assert.assertTrue(buffer.isEmpty());
        Assert.assertEquals(preader.getReader().getClass().getName(), ClusterReader.class.getName());
        Assert.assertEquals(((ClusterReader) preader.getReader()).getReader().getClass().getName(),
                DatabusStreamWaitingReader.class.getName());
        preader.start(null);
        // move file11
        TestUtil.incrementCommitTime();
        Path movedPath1 = TestUtil.moveFileToStreams(fs, testStream, collectors[1], cluster,
                TestUtil.getCollectorDir(cluster, testStream, collectors[1]), files[1]);
        Date fromTime = CollectorStreamReader.getDateFromCollectorFile(files[0]);
        Date toTime = getTimeStampFromFile(databusFiles1[0]);
        TestUtil.prepareExpectedDeltaPck(fromTime, toTime, expectedDeltaPck, null, streamDir, partitionMinList,
                partitionCheckpointList, true, false);
        // read file00, file10
        TestUtil.assertBuffer(DatabusStreamWaitingReader.getHadoopStreamFile(fs.getFileStatus(databusFiles1[0])), 1,
                0, 100, partitionId, buffer, true, expectedDeltaPck);
        expectedDeltaPck.clear();

        fromTime = getTimeStampFromFile(databusFiles1[0]);
        toTime = getTimeStampFromFile(databusFiles2[0]);
        TestUtil.prepareExpectedDeltaPck(fromTime, toTime, expectedDeltaPck, fs.getFileStatus(databusFiles1[0]),
                streamDir, partitionMinList, partitionCheckpointList, false, false);
        TestUtil.assertBuffer(DatabusStreamWaitingReader.getHadoopStreamFile(fs.getFileStatus(databusFiles2[0])), 1,
                0, 50, partitionId, buffer, true, expectedDeltaPck);
        expectedDeltaPck.clear();

        // move file01, file12
        TestUtil.incrementCommitTime();
        Path movedPath2 = TestUtil.moveFileToStreams(fs, testStream, collectors[0], cluster,
                TestUtil.getCollectorDir(cluster, testStream, collectors[0]), files[1]);
        Path movedPath3 = TestUtil.moveFileToStreams(fs, testStream, collectors[1], cluster,
                TestUtil.getCollectorDir(cluster, testStream, collectors[1]), files[2]);

        // read file10, file11
        TestUtil.assertBuffer(DatabusStreamWaitingReader.getHadoopStreamFile(fs.getFileStatus(databusFiles2[0])), 1,
                50, 50, partitionId, buffer, true, expectedDeltaPck);
        fromTime = getTimeStampFromFile(databusFiles2[0]);
        toTime = getTimeStampFromFile(movedPath1);
        TestUtil.prepareExpectedDeltaPck(fromTime, toTime, expectedDeltaPck, fs.getFileStatus(databusFiles2[0]),
                streamDir, partitionMinList, partitionCheckpointList, false, false);
        TestUtil.assertBuffer(DatabusStreamWaitingReader.getHadoopStreamFile(fs.getFileStatus(movedPath1)), 2, 0,
                100, partitionId, buffer, true, expectedDeltaPck);
        expectedDeltaPck.clear();

        // move file02
        TestUtil.incrementCommitTime();
        Path movedPath4 = TestUtil.moveFileToStreams(fs, testStream, collectors[0], cluster,
                TestUtil.getCollectorDir(cluster, testStream, collectors[0]), files[2]);

        fromTime = getTimeStampFromFile(movedPath1);
        toTime = getTimeStampFromFile(movedPath2);
        TestUtil.prepareExpectedDeltaPck(fromTime, toTime, expectedDeltaPck, fs.getFileStatus(movedPath1),
                streamDir, partitionMinList, partitionCheckpointList, false, false);
        // read file10, file12
        TestUtil.assertBuffer(DatabusStreamWaitingReader.getHadoopStreamFile(fs.getFileStatus(movedPath2)), 2, 0,
                100, partitionId, buffer, true, expectedDeltaPck);
        expectedDeltaPck.clear();

        fromTime = getTimeStampFromFile(movedPath2);
        toTime = getTimeStampFromFile(movedPath3);
        TestUtil.prepareExpectedDeltaPck(fromTime, toTime, expectedDeltaPck, fs.getFileStatus(movedPath2),
                streamDir, partitionMinList, partitionCheckpointList, false, false);
        TestUtil.assertBuffer(DatabusStreamWaitingReader.getHadoopStreamFile(fs.getFileStatus(movedPath3)), 3, 0,
                50, partitionId, buffer, true, expectedDeltaPck);
        expectedDeltaPck.clear();

        // move file13
        TestUtil.incrementCommitTime();
        Path movedPath5 = TestUtil.moveFileToStreams(fs, testStream, collectors[1], cluster,
                TestUtil.getCollectorDir(cluster, testStream, collectors[1]), files[3]);

        //read file12, file02
        TestUtil.assertBuffer(DatabusStreamWaitingReader.getHadoopStreamFile(fs.getFileStatus(movedPath3)), 3, 50,
                50, partitionId, buffer, true, expectedDeltaPck);
        fromTime = getTimeStampFromFile(movedPath3);
        toTime = getTimeStampFromFile(movedPath4);
        TestUtil.prepareExpectedDeltaPck(fromTime, toTime, expectedDeltaPck, fs.getFileStatus(movedPath3),
                streamDir, partitionMinList, partitionCheckpointList, false, false);
        TestUtil.assertBuffer(DatabusStreamWaitingReader.getHadoopStreamFile(fs.getFileStatus(movedPath4)), 3, 0,
                50, partitionId, buffer, true, expectedDeltaPck);
        expectedDeltaPck.clear();

        //move file03
        Path movedPath6 = TestUtil.moveFileToStreams(fs, testStream, collectors[0], cluster,
                TestUtil.getCollectorDir(cluster, testStream, collectors[0]), files[3]);
        TestUtil.publishLastPathForStreamsDir(fs, cluster, testStream);

        // read file02, file13, file03
        TestUtil.assertBuffer(DatabusStreamWaitingReader.getHadoopStreamFile(fs.getFileStatus(movedPath4)), 3, 50,
                50, partitionId, buffer, true, expectedDeltaPck);
        fromTime = getTimeStampFromFile(movedPath4);
        toTime = getTimeStampFromFile(movedPath5);
        TestUtil.prepareExpectedDeltaPck(fromTime, toTime, expectedDeltaPck, fs.getFileStatus(movedPath4),
                streamDir, partitionMinList, partitionCheckpointList, false, false);
        TestUtil.assertBuffer(DatabusStreamWaitingReader.getHadoopStreamFile(fs.getFileStatus(movedPath5)), 4, 0,
                100, partitionId, buffer, true, expectedDeltaPck);
        expectedDeltaPck.clear();

        fromTime = getTimeStampFromFile(movedPath5);
        toTime = getTimeStampFromFile(movedPath6);
        TestUtil.prepareExpectedDeltaPck(fromTime, toTime, expectedDeltaPck, fs.getFileStatus(movedPath5),
                streamDir, partitionMinList, partitionCheckpointList, false, false);
        TestUtil.assertBuffer(DatabusStreamWaitingReader.getHadoopStreamFile(fs.getFileStatus(movedPath6)), 4, 0,
                100, partitionId, buffer, true, expectedDeltaPck);
        expectedDeltaPck.clear();
        Assert.assertTrue(buffer.isEmpty());
        //XXX Reader sholud close after listing
        Thread.sleep(3000);
        preader.close();
        preader.join();
        Assert.assertEquals(prMetrics.getMessagesReadFromSource(), 800);
        Assert.assertEquals(prMetrics.getMessagesAddedToBuffer(), 800);
        Assert.assertTrue(prMetrics.getWaitTimeUnitsNewFile() > 0);

        prMetrics = new PartitionReaderStatsExposer(testStream, "c1", partitionId.toString(), consumerNumber,
                fsUri);
        prepareCheckpoint(DatabusStreamWaitingReader.getHadoopStreamFile(fs.getFileStatus(movedPath5)), 50,
                movedPath5, partitionCheckpointList);
        preader = new PartitionReader(partitionId, partitionCheckpointList, fs, buffer, streamDir, conf,
                DatabusInputFormat.class.getCanonicalName(), null, 1000, true, prMetrics, false, partitionMinList,
                null);
        preader.start(null);
        TestUtil.prepareExpectedDeltaPck(fromTime, toTime, expectedDeltaPck, fs.getFileStatus(movedPath5),
                streamDir, partitionMinList, partitionCheckpointList, true, false);
        TestUtil.assertBuffer(DatabusStreamWaitingReader.getHadoopStreamFile(fs.getFileStatus(movedPath5)), 4, 50,
                50, partitionId, buffer, true, expectedDeltaPck);
        fromTime = getTimeStampFromFile(movedPath5);
        toTime = getTimeStampFromFile(movedPath6);
        TestUtil.prepareExpectedDeltaPck(fromTime, toTime, expectedDeltaPck, fs.getFileStatus(movedPath5),
                streamDir, partitionMinList, partitionCheckpointList, false, false);
        TestUtil.assertBuffer(DatabusStreamWaitingReader.getHadoopStreamFile(fs.getFileStatus(movedPath6)), 4, 0,
                100, partitionId, buffer, true, expectedDeltaPck);
        Assert.assertTrue(buffer.isEmpty());
        preader.close();
        preader.join();
        Assert.assertEquals(prMetrics.getMessagesReadFromSource(), 150);
        Assert.assertEquals(prMetrics.getMessagesAddedToBuffer(), 150);
        Assert.assertTrue(prMetrics.getCumulativeNanosForFetchMessage() > 0);
    }

    public void prepareCheckpoint(StreamFile streamFile, int lineNum, Path databusFile,
            PartitionCheckpointList partitionCheckpointList) {
        Date date = DatabusStreamWaitingReader.getDateFromStreamDir(streamDir, databusFile.getParent());
        Calendar cal = Calendar.getInstance();
        cal.setTime(date);
        partitionCheckpointList.set(cal.get(Calendar.MINUTE), new PartitionCheckpoint(streamFile, lineNum));
    }

    private Date getTimeStampFromFile(Path dir) {
        return DatabusStreamWaitingReader.getDateFromStreamDir(streamDir, dir);
    }
}