org.apache.metron.profiler.storm.integration.ProfilerIntegrationTest.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.metron.profiler.storm.integration.ProfilerIntegrationTest.java

Source

/*
 *
 *  Licensed to the Apache Software Foundation (ASF) under one
 *  or more contributor license agreements.  See the NOTICE file
 *  distributed with this work for additional information
 *  regarding copyright ownership.  The ASF licenses this file
 *  to you under the Apache License, Version 2.0 (the
 *  "License"); you may not use this file except in compliance
 *  with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 */

package org.apache.metron.profiler.storm.integration;

import org.adrianwalker.multilinestring.Multiline;
import org.apache.commons.io.FileUtils;
import org.apache.metron.common.Constants;
import org.apache.metron.common.configuration.profiler.ProfilerConfig;
import org.apache.metron.hbase.mock.MockHBaseTableProvider;
import org.apache.metron.hbase.mock.MockHTable;
import org.apache.metron.integration.BaseIntegrationTest;
import org.apache.metron.integration.ComponentRunner;
import org.apache.metron.integration.UnableToStartException;
import org.apache.metron.integration.components.FluxTopologyComponent;
import org.apache.metron.integration.components.KafkaComponent;
import org.apache.metron.integration.components.ZKServerComponent;
import org.apache.metron.profiler.client.stellar.FixedLookback;
import org.apache.metron.profiler.client.stellar.GetProfile;
import org.apache.metron.profiler.client.stellar.WindowLookback;
import org.apache.metron.statistics.OnlineStatisticsProvider;
import org.apache.metron.stellar.common.DefaultStellarStatefulExecutor;
import org.apache.metron.stellar.common.StellarStatefulExecutor;
import org.apache.metron.stellar.dsl.Context;
import org.apache.metron.stellar.dsl.functions.resolver.SimpleFunctionResolver;
import org.apache.storm.Config;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.lang.invoke.MethodHandles;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.TimeUnit;

import static org.apache.metron.integration.utils.TestUtils.assertEventually;
import static org.apache.metron.profiler.client.stellar.ProfilerClientConfig.PROFILER_COLUMN_FAMILY;
import static org.apache.metron.profiler.client.stellar.ProfilerClientConfig.PROFILER_HBASE_TABLE;
import static org.apache.metron.profiler.client.stellar.ProfilerClientConfig.PROFILER_HBASE_TABLE_PROVIDER;
import static org.apache.metron.profiler.client.stellar.ProfilerClientConfig.PROFILER_PERIOD;
import static org.apache.metron.profiler.client.stellar.ProfilerClientConfig.PROFILER_PERIOD_UNITS;
import static org.apache.metron.profiler.client.stellar.ProfilerClientConfig.PROFILER_SALT_DIVISOR;
import static org.apache.metron.profiler.storm.KafkaEmitter.ALERT_FIELD;
import static org.apache.metron.profiler.storm.KafkaEmitter.ENTITY_FIELD;
import static org.apache.metron.profiler.storm.KafkaEmitter.PERIOD_END_FIELD;
import static org.apache.metron.profiler.storm.KafkaEmitter.PERIOD_ID_FIELD;
import static org.apache.metron.profiler.storm.KafkaEmitter.PERIOD_START_FIELD;
import static org.apache.metron.profiler.storm.KafkaEmitter.PROFILE_FIELD;
import static org.apache.metron.profiler.storm.KafkaEmitter.TIMESTAMP_FIELD;
import static org.hamcrest.CoreMatchers.hasItem;
import static org.hamcrest.CoreMatchers.hasItems;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;

/**
 * An integration test of the Profiler topology.
 */
public class ProfilerIntegrationTest extends BaseIntegrationTest {

    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    private static final String TEST_RESOURCES = "../../metron-analytics/metron-profiler-storm/src/test";
    private static final String FLUX_PATH = "src/main/flux/profiler/remote.yaml";
    private static final long timeout = TimeUnit.SECONDS.toMillis(90);

    public static final long startAt = 10;
    public static final String entity = "10.0.0.1";
    private static final String tableName = "profiler";
    private static final String columnFamily = "P";
    private static final String inputTopic = Constants.INDEXING_TOPIC;
    private static final String outputTopic = "profiles";
    private static final int saltDivisor = 10;
    private static final long periodDurationMillis = TimeUnit.SECONDS.toMillis(20);
    private static final long windowLagMillis = TimeUnit.SECONDS.toMillis(10);
    private static final long windowDurationMillis = TimeUnit.SECONDS.toMillis(10);
    private static final long profileTimeToLiveMillis = TimeUnit.SECONDS.toMillis(20);
    private static final long maxRoutesPerBolt = 100000;

    private static ZKServerComponent zkComponent;
    private static FluxTopologyComponent fluxComponent;
    private static KafkaComponent kafkaComponent;
    private static ConfigUploadComponent configUploadComponent;
    private static ComponentRunner runner;
    private static MockHTable profilerTable;
    private static String message1;
    private static String message2;
    private static String message3;
    private StellarStatefulExecutor executor;

    /**
     * [
     *    org.apache.metron.profiler.ProfileMeasurement,
     *    org.apache.metron.profiler.ProfilePeriod,
     *    org.apache.metron.common.configuration.profiler.ProfileResult,
     *    org.apache.metron.common.configuration.profiler.ProfileResultExpressions,
     *    org.apache.metron.common.configuration.profiler.ProfileTriageExpressions,
     *    org.apache.metron.common.configuration.profiler.ProfilerConfig,
     *    org.apache.metron.common.configuration.profiler.ProfileConfig,
     *    org.json.simple.JSONObject,
     *    org.json.simple.JSONArray,
     *    java.util.LinkedHashMap,
     *    org.apache.metron.statistics.OnlineStatisticsProvider
     *  ]
     */
    @Multiline
    private static String kryoSerializers;

    /**
     * {
     *    "profiles": [
     *      {
     *        "profile": "processing-time-test",
     *        "foreach": "ip_src_addr",
     *        "init": { "counter": "0" },
     *        "update": { "counter": "counter + 1" },
     *        "result": "counter"
     *      }
     *    ]
     * }
     */
    @Multiline
    private static String processingTimeProfile;

    @Test
    public void testProcessingTime() throws Exception {
        uploadConfigToZookeeper(ProfilerConfig.fromJSON(processingTimeProfile));

        // start the topology and write 3 test messages to kafka
        fluxComponent.submitTopology();
        kafkaComponent.writeMessages(inputTopic, message1);
        kafkaComponent.writeMessages(inputTopic, message2);
        kafkaComponent.writeMessages(inputTopic, message3);

        // retrieve the profile measurement using PROFILE_GET
        String profileGetExpression = "PROFILE_GET('processing-time-test', '10.0.0.1', PROFILE_FIXED('15', 'MINUTES'))";
        List<Integer> measurements = execute(profileGetExpression, List.class);

        // need to keep checking for measurements until the profiler has flushed one out
        int attempt = 0;
        while (measurements.size() == 0 && attempt++ < 10) {

            // wait for the profiler to flush
            long sleep = windowDurationMillis;
            LOG.debug("Waiting {} millis for profiler to flush", sleep);
            Thread.sleep(sleep);

            // write another message to advance time. this ensures we are testing the 'normal' flush mechanism.
            // if we do not send additional messages to advance time, then it is the profile TTL mechanism which
            // will ultimately flush the profile
            kafkaComponent.writeMessages(inputTopic, message2);

            // try again to retrieve the profile measurement using PROFILE_GET
            measurements = execute(profileGetExpression, List.class);
        }

        // expect to see only 1 measurement, but could be more (one for each period) depending on
        // how long we waited for the flush to occur
        assertTrue(measurements.size() > 0);

        // the profile should have counted at least 3 messages; the 3 test messages that were sent.
        // the count could be higher due to the test messages we sent to advance time.
        assertTrue(measurements.get(0) >= 3);
    }

    @Test
    public void testProcessingTimeWithTimeToLiveFlush() throws Exception {
        uploadConfigToZookeeper(ProfilerConfig.fromJSON(processingTimeProfile));

        // start the topology and write 3 test messages to kafka
        fluxComponent.submitTopology();
        kafkaComponent.writeMessages(inputTopic, message1);
        kafkaComponent.writeMessages(inputTopic, message2);
        kafkaComponent.writeMessages(inputTopic, message3);

        // wait a bit beyond the window lag before writing another message.  this allows storm's window manager to close
        // the event window, which then lets the profiler processes the previous messages.
        long sleep = windowLagMillis + periodDurationMillis;
        LOG.debug("Waiting {} millis before sending message to close window", sleep);
        Thread.sleep(sleep);
        kafkaComponent.writeMessages(inputTopic, message3);

        // the profile should have counted 3 messages; the 3 test messages that were sent
        assertEventually(() -> {
            List<Integer> results = execute(
                    "PROFILE_GET('processing-time-test', '10.0.0.1', PROFILE_FIXED('15', 'MINUTES'))", List.class);
            assertThat(results, hasItem(3));
        }, timeout);
    }

    /**
     * {
     *    "timestampField": "timestamp",
     *    "profiles": [
     *      {
     *        "profile": "count-by-ip",
     *        "foreach": "ip_src_addr",
     *        "init": { "count": 0 },
     *        "update": { "count" : "count + 1" },
     *        "result": "count"
     *      },
     *      {
     *        "profile": "total-count",
     *        "foreach": "'total'",
     *        "init": { "count": 0 },
     *        "update": { "count": "count + 1" },
     *        "result": "count"
     *      }
     *    ]
     * }
     */
    @Multiline
    private static String eventTimeProfile;

    @Test
    public void testEventTime() throws Exception {
        uploadConfigToZookeeper(ProfilerConfig.fromJSON(eventTimeProfile));

        // start the topology and write test messages to kafka
        fluxComponent.submitTopology();
        List<String> messages = FileUtils.readLines(new File("src/test/resources/telemetry.json"));
        kafkaComponent.writeMessages(inputTopic, messages);

        long timestamp = System.currentTimeMillis();
        LOG.debug("Attempting to close window period by sending message with timestamp = {}", timestamp);
        kafkaComponent.writeMessages(inputTopic, getMessage("192.168.66.1", timestamp));
        kafkaComponent.writeMessages(inputTopic, getMessage("192.168.138.158", timestamp));

        // create the 'window' that looks up to 5 hours before the max timestamp contained in the test data
        assign("maxTimestamp", "1530978728982L");
        assign("window", "PROFILE_WINDOW('from 5 hours ago', maxTimestamp)");

        // wait until the profile flushes both periods.  the first period will flush immediately as subsequent messages
        // advance time.  the next period contains all of the remaining messages, so there are no other messages to
        // advance time.  because of this the next period only flushes after the time-to-live expires

        // there are 14 messages in the first period and 12 in the next where ip_src_addr = 192.168.66.1
        assertEventually(() -> {
            List<Integer> results = execute("PROFILE_GET('count-by-ip', '192.168.66.1', window)", List.class);
            assertThat(results, hasItems(14, 12));
        }, timeout);

        // there are 36 messages in the first period and 38 in the next where ip_src_addr = 192.168.138.158
        assertEventually(() -> {
            List<Integer> results = execute("PROFILE_GET('count-by-ip', '192.168.138.158', window)", List.class);
            assertThat(results, hasItems(36, 38));
        }, timeout);

        // in all there are 50 (36+14) messages in the first period and 50 (38+12) messages in the next
        assertEventually(() -> {
            List<Integer> results = execute("PROFILE_GET('total-count', 'total', window)", List.class);
            assertThat(results, hasItems(50, 50));
        }, timeout);
    }

    /**
     * {
     *    "profiles": [
     *      {
     *        "profile": "profile-with-stats",
     *        "foreach": "'global'",
     *        "init": { "stats": "STATS_INIT()" },
     *        "update": { "stats": "STATS_ADD(stats, 1)" },
     *        "result": "stats"
     *      }
     *    ],
     *    "timestampField": "timestamp"
     * }
     */
    @Multiline
    private static String profileWithStats;

    /**
     * The result produced by a Profile has to be serializable within Storm. If the result is not
     * serializable the topology will crash and burn.
     *
     * This test ensures that if a profile returns a STATS object created using the STATS_INIT and
     * STATS_ADD functions, that it can be correctly serialized and persisted.
     */
    @Test
    public void testProfileWithStatsObject() throws Exception {
        uploadConfigToZookeeper(ProfilerConfig.fromJSON(profileWithStats));

        // start the topology and write test messages to kafka
        fluxComponent.submitTopology();
        List<String> messages = FileUtils.readLines(new File("src/test/resources/telemetry.json"));
        kafkaComponent.writeMessages(inputTopic, messages);

        assertEventually(() -> {
            // validate the measurements written by the batch profiler using `PROFILE_GET`
            // the 'window' looks up to 5 hours before the max timestamp contained in the test data
            assign("maxTimestamp", "1530978728982L");
            assign("window", "PROFILE_WINDOW('from 5 hours ago', maxTimestamp)");

            // retrieve the stats stored by the profiler
            List results = execute("PROFILE_GET('profile-with-stats', 'global', window)", List.class);
            assertTrue(results.size() > 0);
            assertTrue(results.get(0) instanceof OnlineStatisticsProvider);

        }, timeout);
    }

    /**
     * {
     *    "profiles": [
     *      {
     *        "profile": "profile-with-triage",
     *        "foreach": "'global'",
     *        "update": {
     *          "stats": "STATS_ADD(stats, 1)"
     *        },
     *        "result": {
     *          "profile": "stats",
     *          "triage": {
     *            "min": "STATS_MIN(stats)",
     *            "max": "STATS_MAX(stats)",
     *            "mean": "STATS_MEAN(stats)"
     *          }
     *        }
     *      }
     *    ],
     *    "timestampField": "timestamp"
     * }
     */
    @Multiline
    private static String profileWithTriageResult;

    private List<byte[]> outputMessages;

    @Test
    public void testProfileWithTriageResult() throws Exception {
        uploadConfigToZookeeper(ProfilerConfig.fromJSON(profileWithTriageResult));

        // start the topology and write test messages to kafka
        fluxComponent.submitTopology();
        List<String> telemetry = FileUtils.readLines(new File("src/test/resources/telemetry.json"));
        kafkaComponent.writeMessages(inputTopic, telemetry);

        // wait until the triage message is output to kafka
        assertEventually(() -> {
            outputMessages = kafkaComponent.readMessages(outputTopic);
            assertEquals(1, outputMessages.size());
        }, timeout);

        // validate the triage message
        JSONObject message = (JSONObject) new JSONParser().parse(new String(outputMessages.get(0), "UTF-8"));
        assertEquals("profile-with-triage", message.get(PROFILE_FIELD));
        assertEquals("global", message.get(ENTITY_FIELD));
        assertEquals(76548935L, message.get(PERIOD_ID_FIELD));
        assertEquals(1530978700000L, message.get(PERIOD_START_FIELD));
        assertEquals(1530978720000L, message.get(PERIOD_END_FIELD));
        assertEquals("profiler", message.get(Constants.SENSOR_TYPE));
        assertEquals("true", message.get(ALERT_FIELD));
        assertEquals(1.0, message.get("min"));
        assertEquals(1.0, message.get("max"));
        assertEquals(1.0, message.get("mean"));
        assertTrue(message.containsKey(TIMESTAMP_FIELD));
        assertTrue(message.containsKey(Constants.GUID));
    }

    private static String getMessage(String ipSource, long timestamp) {
        return new MessageBuilder().withField("ip_src_addr", ipSource).withField("timestamp", timestamp).build()
                .toJSONString();
    }

    @BeforeClass
    public static void setupBeforeClass() throws UnableToStartException {

        // create some messages that contain a timestamp - a really old timestamp; close to 1970
        message1 = getMessage(entity, startAt);
        message2 = getMessage(entity, startAt + 100);
        message3 = getMessage(entity, startAt + (windowDurationMillis * 2));

        // storm topology properties
        final Properties topologyProperties = new Properties() {
            {

                // storm settings
                setProperty("profiler.workers", "1");
                setProperty("profiler.executors", "0");

                setProperty(Config.TOPOLOGY_AUTO_CREDENTIALS, "[]");
                setProperty(Config.TOPOLOGY_MESSAGE_TIMEOUT_SECS, "60");
                setProperty(Config.TOPOLOGY_MAX_SPOUT_PENDING, "100000");

                // ensure tuples are serialized during the test, otherwise serialization problems
                // will not be found until the topology is run on a cluster with multiple workers
                setProperty(Config.TOPOLOGY_TESTING_ALWAYS_TRY_SERIALIZE, "true");
                setProperty(Config.TOPOLOGY_FALL_BACK_ON_JAVA_SERIALIZATION, "false");
                setProperty(Config.TOPOLOGY_KRYO_REGISTER, kryoSerializers);

                // kafka settings
                setProperty("profiler.input.topic", inputTopic);
                setProperty("profiler.output.topic", outputTopic);
                setProperty("kafka.start", "EARLIEST");
                setProperty("kafka.security.protocol", "PLAINTEXT");

                // hbase settings
                setProperty("profiler.hbase.salt.divisor", Integer.toString(saltDivisor));
                setProperty("profiler.hbase.table", tableName);
                setProperty("profiler.hbase.column.family", columnFamily);
                setProperty("profiler.hbase.batch", "10");
                setProperty("profiler.hbase.flush.interval.seconds", "1");
                setProperty("hbase.provider.impl", "" + MockHBaseTableProvider.class.getName());

                // profile settings
                setProperty("profiler.period.duration", Long.toString(periodDurationMillis));
                setProperty("profiler.period.duration.units", "MILLISECONDS");
                setProperty("profiler.ttl", Long.toString(profileTimeToLiveMillis));
                setProperty("profiler.ttl.units", "MILLISECONDS");
                setProperty("profiler.window.duration", Long.toString(windowDurationMillis));
                setProperty("profiler.window.duration.units", "MILLISECONDS");
                setProperty("profiler.window.lag", Long.toString(windowLagMillis));
                setProperty("profiler.window.lag.units", "MILLISECONDS");
                setProperty("profiler.max.routes.per.bolt", Long.toString(maxRoutesPerBolt));
            }
        };

        // create the mock table
        profilerTable = (MockHTable) MockHBaseTableProvider.addToCache(tableName, columnFamily);

        zkComponent = getZKServerComponent(topologyProperties);

        // create the input and output topics
        kafkaComponent = getKafkaComponent(topologyProperties,
                Arrays.asList(new KafkaComponent.Topic(inputTopic, 1), new KafkaComponent.Topic(outputTopic, 1)));

        // upload profiler configuration to zookeeper
        configUploadComponent = new ConfigUploadComponent().withTopologyProperties(topologyProperties);

        // load flux definition for the profiler topology
        fluxComponent = new FluxTopologyComponent.Builder().withTopologyLocation(new File(FLUX_PATH))
                .withTopologyName("profiler").withTopologyProperties(topologyProperties).build();

        // start all components
        runner = new ComponentRunner.Builder().withComponent("zk", zkComponent)
                .withComponent("kafka", kafkaComponent).withComponent("config", configUploadComponent)
                .withComponent("storm", fluxComponent).withMillisecondsBetweenAttempts(15000).withNumRetries(10)
                .withCustomShutdownOrder(new String[] { "storm", "config", "kafka", "zk" }).build();
        runner.start();
    }

    @AfterClass
    public static void tearDownAfterClass() throws Exception {
        MockHBaseTableProvider.clear();
        if (runner != null) {
            runner.stop();
        }
    }

    @Before
    public void setup() {
        // create the mock table
        profilerTable = (MockHTable) MockHBaseTableProvider.addToCache(tableName, columnFamily);

        // global properties
        Map<String, Object> global = new HashMap<String, Object>() {
            {
                put(PROFILER_HBASE_TABLE.getKey(), tableName);
                put(PROFILER_COLUMN_FAMILY.getKey(), columnFamily);
                put(PROFILER_HBASE_TABLE_PROVIDER.getKey(), MockHBaseTableProvider.class.getName());

                // client needs to use the same period duration
                put(PROFILER_PERIOD.getKey(), Long.toString(periodDurationMillis));
                put(PROFILER_PERIOD_UNITS.getKey(), "MILLISECONDS");

                // client needs to use the same salt divisor
                put(PROFILER_SALT_DIVISOR.getKey(), saltDivisor);
            }
        };

        // create the stellar execution environment
        executor = new DefaultStellarStatefulExecutor(
                new SimpleFunctionResolver().withClass(GetProfile.class).withClass(FixedLookback.class)
                        .withClass(WindowLookback.class),
                new Context.Builder().with(Context.Capabilities.GLOBAL_CONFIG, () -> global).build());
    }

    @After
    public void tearDown() throws Exception {
        MockHBaseTableProvider.clear();
        profilerTable.clear();
        if (runner != null) {
            runner.reset();
        }
    }

    /**
     * Uploads config values to Zookeeper.
     * @param profilerConfig The Profiler configuration.
     * @throws Exception
     */
    public void uploadConfigToZookeeper(ProfilerConfig profilerConfig) throws Exception {
        configUploadComponent.withProfilerConfiguration(profilerConfig).update();
    }

    /**
     * Assign a value to the result of an expression.
     *
     * @param var The variable to assign.
     * @param expression The expression to execute.
     */
    private void assign(String var, String expression) {
        executor.assign(var, expression, Collections.emptyMap());
    }

    /**
     * Execute a Stellar expression.
     *
     * @param expression The Stellar expression to execute.
     * @param clazz
     * @param <T>
     * @return The result of executing the Stellar expression.
     */
    private <T> T execute(String expression, Class<T> clazz) {
        T results = executor.execute(expression, Collections.emptyMap(), clazz);
        LOG.debug("{} = {}", expression, results);
        return results;
    }
}