org.apache.metron.profiler.integration.ProfilerIntegrationTest.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.metron.profiler.integration.ProfilerIntegrationTest.java

Source

/*
 *
 *  Licensed to the Apache Software Foundation (ASF) under one
 *  or more contributor license agreements.  See the NOTICE file
 *  distributed with this work for additional information
 *  regarding copyright ownership.  The ASF licenses this file
 *  to you under the Apache License, Version 2.0 (the
 *  "License"); you may not use this file except in compliance
 *  with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 */

package org.apache.metron.profiler.integration;

import com.google.common.base.Joiner;
import org.adrianwalker.multilinestring.Multiline;
import org.apache.commons.math.util.MathUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.metron.common.Constants;
import org.apache.metron.common.utils.SerDeUtils;
import org.apache.metron.hbase.TableProvider;
import org.apache.metron.integration.BaseIntegrationTest;
import org.apache.metron.integration.ComponentRunner;
import org.apache.metron.integration.components.FluxTopologyComponent;
import org.apache.metron.integration.components.KafkaComponent;
import org.apache.metron.integration.components.ZKServerComponent;
import org.apache.metron.profiler.hbase.ColumnBuilder;
import org.apache.metron.profiler.hbase.ValueOnlyColumnBuilder;
import org.apache.metron.statistics.OnlineStatisticsProvider;
import org.apache.metron.test.mock.MockHTable;
import org.junit.After;
import org.junit.Assert;
import org.junit.Test;

import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Properties;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static com.google.code.tempusfugit.temporal.Duration.seconds;
import static com.google.code.tempusfugit.temporal.Timeout.timeout;
import static com.google.code.tempusfugit.temporal.WaitFor.waitOrTimeout;

/**
 * An integration test of the Profiler topology.
 */
public class ProfilerIntegrationTest extends BaseIntegrationTest {

    private static final String TEST_RESOURCES = "../../metron-analytics/metron-profiler/src/test";
    private static final String FLUX_PATH = "../metron-profiler/src/main/flux/profiler/remote.yaml";

    /**
     * {
     * "ip_src_addr": "10.0.0.1",
     * "protocol": "HTTPS",
     * "length": 10,
     * "bytes_in": 234
     * }
     */
    @Multiline
    private String message1;

    /**
     * {
     * "ip_src_addr": "10.0.0.2",
     * "protocol": "HTTP",
     * "length": 20,
     * "bytes_in": 390
     * }
     */
    @Multiline
    private String message2;

    /**
     * {
     * "ip_src_addr": "10.0.0.3",
     * "protocol": "DNS",
     * "length": 30,
     * "bytes_in": 560
     * }
     */
    @Multiline
    private String message3;

    private ColumnBuilder columnBuilder;
    private ZKServerComponent zkComponent;
    private FluxTopologyComponent fluxComponent;
    private KafkaComponent kafkaComponent;
    private List<byte[]> input;
    private ComponentRunner runner;
    private MockHTable profilerTable;

    private static final String tableName = "profiler";
    private static final String columnFamily = "P";
    private static final double epsilon = 0.001;
    private static final String inputTopic = Constants.INDEXING_TOPIC;
    private static final String outputTopic = "profiles";

    /**
     * A TableProvider that allows us to mock HBase.
     */
    public static class MockTableProvider implements TableProvider, Serializable {

        MockHTable.Provider provider = new MockHTable.Provider();

        @Override
        public HTableInterface getTable(Configuration config, String tableName) throws IOException {
            return provider.getTable(config, tableName);
        }
    }

    /**
     * Tests the first example contained within the README.
     */
    @Test
    public void testExample1() throws Exception {

        setup(TEST_RESOURCES + "/config/zookeeper/readme-example-1");

        // start the topology and write test messages to kafka
        fluxComponent.submitTopology();
        kafkaComponent.writeMessages(inputTopic, input);

        // verify - ensure the profile is being persisted
        waitOrTimeout(() -> profilerTable.getPutLog().size() > 0, timeout(seconds(90)));

        // verify - only 10.0.0.2 sends 'HTTP', thus there should be only 1 value
        List<Double> actuals = read(profilerTable.getPutLog(), columnFamily,
                columnBuilder.getColumnQualifier("value"), Double.class);

        // verify - there are 5 'HTTP' each with 390 bytes
        Assert.assertTrue(actuals.stream().anyMatch(val -> MathUtils.equals(390.0 * 5, val, epsilon)));
    }

    /**
     * Tests the second example contained within the README.
     */
    @Test
    public void testExample2() throws Exception {

        setup(TEST_RESOURCES + "/config/zookeeper/readme-example-2");

        // start the topology and write test messages to kafka
        fluxComponent.submitTopology();
        kafkaComponent.writeMessages(inputTopic, input);

        // expect 2 values written by the profile; one for 10.0.0.2 and another for 10.0.0.3
        final int expected = 2;

        // verify - ensure the profile is being persisted
        waitOrTimeout(() -> profilerTable.getPutLog().size() >= expected, timeout(seconds(90)));

        // verify - expect 2 results as 2 hosts involved; 10.0.0.2 sends 'HTTP' and 10.0.0.3 send 'DNS'
        List<Double> actuals = read(profilerTable.getPutLog(), columnFamily,
                columnBuilder.getColumnQualifier("value"), Double.class);

        // verify - 10.0.0.3 -> 1/6
        Assert.assertTrue(
                "Could not find a value near 1/6. Actual values read are are: " + Joiner.on(",").join(actuals),
                actuals.stream().anyMatch(val -> MathUtils.equals(val, 1.0 / 6.0, epsilon)));

        // verify - 10.0.0.2 -> 6/1
        Assert.assertTrue(
                "Could not find a value near 6. Actual values read are are: " + Joiner.on(",").join(actuals),
                actuals.stream().anyMatch(val -> MathUtils.equals(val, 6.0 / 1.0, epsilon)));
    }

    /**
     * Tests the third example contained within the README.
     */
    @Test
    public void testExample3() throws Exception {

        setup(TEST_RESOURCES + "/config/zookeeper/readme-example-3");

        // start the topology and write test messages to kafka
        fluxComponent.submitTopology();
        kafkaComponent.writeMessages(inputTopic, input);

        // verify - ensure the profile is being persisted
        waitOrTimeout(() -> profilerTable.getPutLog().size() > 0, timeout(seconds(90)));

        // verify - only 10.0.0.2 sends 'HTTP', thus there should be only 1 value
        List<Double> actuals = read(profilerTable.getPutLog(), columnFamily,
                columnBuilder.getColumnQualifier("value"), Double.class);

        // verify - there are 5 'HTTP' messages each with a length of 20, thus the average should be 20
        Assert.assertTrue(
                "Could not find a value near 20. Actual values read are are: " + Joiner.on(",").join(actuals),
                actuals.stream().anyMatch(val -> MathUtils.equals(val, 20.0, epsilon)));
    }

    /**
     * Tests the fourth example contained within the README.
     */
    @Test
    public void testExample4() throws Exception {

        setup(TEST_RESOURCES + "/config/zookeeper/readme-example-4");

        // start the topology and write test messages to kafka
        fluxComponent.submitTopology();
        kafkaComponent.writeMessages(inputTopic, input);

        // verify - ensure the profile is being persisted
        waitOrTimeout(() -> profilerTable.getPutLog().size() > 0, timeout(seconds(90)));

        // verify - only 10.0.0.2 sends 'HTTP', thus there should be only 1 value
        byte[] column = columnBuilder.getColumnQualifier("value");
        List<OnlineStatisticsProvider> actuals = read(profilerTable.getPutLog(), columnFamily, column,
                OnlineStatisticsProvider.class);

        // verify - there are 5 'HTTP' messages each with a length of 20, thus the average should be 20
        Assert.assertTrue(
                "Could not find a value near 20. Actual values read are are: " + Joiner.on(",").join(actuals),
                actuals.stream().anyMatch(val -> MathUtils.equals(val.getMean(), 20.0, epsilon)));
    }

    @Test
    public void testPercentiles() throws Exception {

        setup(TEST_RESOURCES + "/config/zookeeper/percentiles");

        // start the topology and write test messages to kafka
        fluxComponent.submitTopology();
        kafkaComponent.writeMessages(inputTopic, input);

        // verify - ensure the profile is being persisted
        waitOrTimeout(() -> profilerTable.getPutLog().size() > 0, timeout(seconds(90)));

        List<Double> actuals = read(profilerTable.getPutLog(), columnFamily,
                columnBuilder.getColumnQualifier("value"), Double.class);

        // verify - the 70th percentile of 5 x 20s = 20.0
        Assert.assertTrue(
                "Could not find a value near 20. Actual values read are are: " + Joiner.on(",").join(actuals),
                actuals.stream().anyMatch(val -> MathUtils.equals(val, 20.0, epsilon)));
    }

    /**
     * Reads a value written by the Profiler.
     * @param family The column family.
     * @param qualifier The column qualifier.
     * @param clazz The expected type of the value.
     * @param <T> The expected type of the value.
     * @return The value written by the Profiler.
     */
    private <T> List<T> read(List<Put> puts, String family, byte[] qualifier, Class<T> clazz) {
        List<T> results = new ArrayList<>();

        for (Put put : puts) {
            for (Cell cell : put.get(Bytes.toBytes(family), qualifier)) {
                T value = SerDeUtils.fromBytes(cell.getValue(), clazz);
                results.add(value);
            }
        }

        return results;
    }

    public void setup(String pathToConfig) throws Exception {
        columnBuilder = new ValueOnlyColumnBuilder(columnFamily);

        // create input messages for the profiler to consume
        input = Stream.of(message1, message2, message3).map(Bytes::toBytes).map(m -> Collections.nCopies(5, m))
                .flatMap(l -> l.stream()).collect(Collectors.toList());

        // storm topology properties
        final Properties topologyProperties = new Properties() {
            {
                setProperty("kafka.start", "UNCOMMITTED_EARLIEST");
                setProperty("profiler.workers", "1");
                setProperty("profiler.executors", "0");
                setProperty("profiler.input.topic", inputTopic);
                setProperty("profiler.output.topic", outputTopic);
                setProperty("profiler.period.duration", "20");
                setProperty("profiler.period.duration.units", "SECONDS");
                setProperty("profiler.ttl", "30");
                setProperty("profiler.ttl.units", "MINUTES");
                setProperty("profiler.hbase.salt.divisor", "10");
                setProperty("profiler.hbase.table", tableName);
                setProperty("profiler.hbase.column.family", columnFamily);
                setProperty("profiler.hbase.batch", "10");
                setProperty("profiler.hbase.flush.interval.seconds", "1");
                setProperty("profiler.profile.ttl", "20");
                setProperty("hbase.provider.impl", "" + MockTableProvider.class.getName());
            }
        };

        // create the mock table
        profilerTable = (MockHTable) MockHTable.Provider.addToCache(tableName, columnFamily);

        zkComponent = getZKServerComponent(topologyProperties);

        // create the input topic
        kafkaComponent = getKafkaComponent(topologyProperties,
                Arrays.asList(new KafkaComponent.Topic(inputTopic, 1), new KafkaComponent.Topic(outputTopic, 1)));

        // upload profiler configuration to zookeeper
        ConfigUploadComponent configUploadComponent = new ConfigUploadComponent()
                .withTopologyProperties(topologyProperties).withGlobalConfiguration(pathToConfig)
                .withProfilerConfiguration(pathToConfig);

        // load flux definition for the profiler topology
        fluxComponent = new FluxTopologyComponent.Builder().withTopologyLocation(new File(FLUX_PATH))
                .withTopologyName("profiler").withTopologyProperties(topologyProperties).build();

        // start all components
        runner = new ComponentRunner.Builder().withComponent("zk", zkComponent)
                .withComponent("kafka", kafkaComponent).withComponent("config", configUploadComponent)
                .withComponent("storm", fluxComponent).withMillisecondsBetweenAttempts(15000).withNumRetries(10)
                .withCustomShutdownOrder(new String[] { "storm", "config", "kafka", "zk" }).build();
        runner.start();
    }

    @After
    public void tearDown() throws Exception {
        MockHTable.Provider.clear();
        if (runner != null) {
            runner.stop();
        }
    }
}