org.apache.metron.parsers.GrokParser.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.metron.parsers.GrokParser.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.metron.parsers;

import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import oi.thekraken.grok.api.Grok;
import oi.thekraken.grok.api.Match;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.metron.common.Constants;
import org.apache.metron.parsers.interfaces.MessageParser;
import org.apache.metron.parsers.interfaces.MessageParserResult;
import org.json.simple.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.io.StringReader;
import java.lang.invoke.MethodHandles;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.TimeZone;

public class GrokParser implements MessageParser<JSONObject>, Serializable {

    protected static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

    protected transient Grok grok;
    protected String grokPath;
    protected boolean multiLine = false;
    protected String patternLabel;
    protected List<String> timeFields = new ArrayList<>();
    protected String timestampField;
    protected SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.S z");
    protected String patternsCommonDir = "/patterns/common";

    @Override
    @SuppressWarnings("unchecked")
    public void configure(Map<String, Object> parserConfig) {
        this.grokPath = (String) parserConfig.get("grokPath");
        String multiLineString = (String) parserConfig.get("multiLine");
        if (!StringUtils.isBlank(multiLineString)) {
            multiLine = Boolean.parseBoolean(multiLineString);
        }
        this.patternLabel = (String) parserConfig.get("patternLabel");
        this.timestampField = (String) parserConfig.get("timestampField");
        List<String> timeFieldsParam = (List<String>) parserConfig.get("timeFields");
        if (timeFieldsParam != null) {
            this.timeFields = timeFieldsParam;
        }
        String dateFormatParam = (String) parserConfig.get("dateFormat");
        if (dateFormatParam != null) {
            this.dateFormat = new SimpleDateFormat(dateFormatParam);
        }
        String timeZoneParam = (String) parserConfig.get("timeZone");
        if (timeZoneParam != null) {
            dateFormat.setTimeZone(TimeZone.getTimeZone(timeZoneParam));
            LOG.debug("Grok Parser using provided TimeZone: {}", timeZoneParam);
        } else {
            dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
            LOG.debug("Grok Parser using default TimeZone (UTC)");
        }
    }

    public InputStream openInputStream(String streamName) throws IOException {
        FileSystem fs = FileSystem.get(new Configuration());
        Path path = new Path(streamName);
        if (fs.exists(path)) {
            return fs.open(path);
        } else {
            return getClass().getResourceAsStream(streamName);
        }
    }

    @Override
    public void init() {
        grok = new Grok();
        try {
            InputStream commonInputStream = openInputStream(patternsCommonDir);
            LOG.debug("Grok parser loading common patterns from: {}", patternsCommonDir);

            if (commonInputStream == null) {
                throw new RuntimeException("Unable to initialize grok parser: Unable to load " + patternsCommonDir
                        + " from either classpath or HDFS");
            }

            grok.addPatternFromReader(new InputStreamReader(commonInputStream));
            LOG.debug("Loading parser-specific patterns from: {}", grokPath);

            InputStream patterInputStream = openInputStream(grokPath);
            if (patterInputStream == null) {
                throw new RuntimeException("Grok parser unable to initialize grok parser: Unable to load "
                        + grokPath + " from either classpath or HDFS");
            }
            grok.addPatternFromReader(new InputStreamReader(patterInputStream));

            if (LOG.isDebugEnabled()) {
                LOG.debug("Grok parser set the following grok expression: {}",
                        grok.getNamedRegexCollectionById(patternLabel));
            }

            String grokPattern = "%{" + patternLabel + "}";

            grok.compile(grokPattern);
            LOG.debug("Compiled grok pattern {}", grokPattern);

        } catch (Throwable e) {
            LOG.error(e.getMessage(), e);
            throw new RuntimeException("Grok parser Error: " + e.getMessage(), e);
        }
    }

    @SuppressWarnings("unchecked")
    @Override
    public Optional<MessageParserResult<JSONObject>> parseOptionalResult(byte[] rawMessage) {
        if (grok == null) {
            init();
        }
        if (multiLine) {
            return parseMultiLine(rawMessage);
        }
        return parseSingleLine(rawMessage);
    }

    @SuppressWarnings("unchecked")
    private Optional<MessageParserResult<JSONObject>> parseMultiLine(byte[] rawMessage) {
        List<JSONObject> messages = new ArrayList<>();
        Map<Object, Throwable> errors = new HashMap<>();
        String originalMessage = null;
        // read the incoming raw data as if it may have multiple lines of logs
        // if there is only only one line, it will just get processed.
        try (BufferedReader reader = new BufferedReader(
                new StringReader(new String(rawMessage, StandardCharsets.UTF_8)))) {
            while ((originalMessage = reader.readLine()) != null) {
                LOG.debug("Grok parser parsing message: {}", originalMessage);
                try {
                    Match gm = grok.match(originalMessage);
                    gm.captures();
                    JSONObject message = new JSONObject();
                    message.putAll(gm.toMap());

                    if (message.size() == 0) {
                        Throwable rte = new RuntimeException(
                                "Grok statement produced a null message. Original message was: " + originalMessage
                                        + " and the parsed message was: " + message + " . Check the pattern at: "
                                        + grokPath);
                        errors.put(originalMessage, rte);
                        continue;
                    }
                    message.put("original_string", originalMessage);
                    for (String timeField : timeFields) {
                        String fieldValue = (String) message.get(timeField);
                        if (fieldValue != null) {
                            message.put(timeField, toEpoch(fieldValue));
                        }
                    }
                    if (timestampField != null) {
                        message.put(Constants.Fields.TIMESTAMP.getName(),
                                formatTimestamp(message.get(timestampField)));
                    }
                    message.remove(patternLabel);
                    postParse(message);
                    messages.add(message);
                    LOG.debug("Grok parser parsed message: {}", message);
                } catch (Exception e) {
                    LOG.error(e.getMessage(), e);
                    errors.put(originalMessage, e);
                }
            }
        } catch (IOException e) {
            LOG.error(e.getMessage(), e);
            Exception innerException = new IllegalStateException(
                    "Grok parser Error: " + e.getMessage() + " on " + originalMessage, e);
            return Optional.of(new DefaultMessageParserResult<>(innerException));
        }
        return Optional.of(new DefaultMessageParserResult<>(messages, errors));
    }

    @SuppressWarnings("unchecked")
    private Optional<MessageParserResult<JSONObject>> parseSingleLine(byte[] rawMessage) {
        List<JSONObject> messages = new ArrayList<>();
        Map<Object, Throwable> errors = new HashMap<>();
        String originalMessage = null;
        try {
            originalMessage = new String(rawMessage, "UTF-8");
            LOG.debug("Grok parser parsing message: {}", originalMessage);
            Match gm = grok.match(originalMessage);
            gm.captures();
            JSONObject message = new JSONObject();
            message.putAll(gm.toMap());

            if (message.size() == 0) {
                Throwable rte = new RuntimeException(
                        "Grok statement produced a null message. Original message was: " + originalMessage
                                + " and the parsed message was: " + message + " . Check the pattern at: "
                                + grokPath);
                errors.put(originalMessage, rte);
            } else {
                message.put("original_string", originalMessage);
                for (String timeField : timeFields) {
                    String fieldValue = (String) message.get(timeField);
                    if (fieldValue != null) {
                        message.put(timeField, toEpoch(fieldValue));
                    }
                }
                if (timestampField != null) {
                    message.put(Constants.Fields.TIMESTAMP.getName(), formatTimestamp(message.get(timestampField)));
                }
                message.remove(patternLabel);
                postParse(message);
                messages.add(message);
                LOG.debug("Grok parser parsed message: {}", message);
            }
        } catch (Exception e) {
            LOG.error(e.getMessage(), e);
            Exception innerException = new IllegalStateException(
                    "Grok parser Error: " + e.getMessage() + " on " + originalMessage, e);
            return Optional.of(new DefaultMessageParserResult<>(innerException));
        }
        return Optional.of(new DefaultMessageParserResult<JSONObject>(messages, errors));
    }

    @Override
    public boolean validate(JSONObject message) {
        LOG.debug("Grok parser validating message: {}", message);

        Object timestampObject = message.get(Constants.Fields.TIMESTAMP.getName());
        if (timestampObject instanceof Long) {
            Long timestamp = (Long) timestampObject;
            if (timestamp > 0) {
                LOG.debug("Grok parser validated message: {}", message);
                return true;
            }
        }

        LOG.debug("Grok parser did not validate message: {}", message);
        return false;
    }

    protected void postParse(JSONObject message) {
    }

    protected long toEpoch(String datetime) throws ParseException {
        LOG.debug("Grok parser converting timestamp to epoch: {}", datetime);
        LOG.debug("Grok parser's DateFormat has TimeZone: {}", dateFormat.getTimeZone());

        Date date = dateFormat.parse(datetime);
        LOG.debug("Grok parser converted timestamp to epoch: {}", date);

        return date.getTime();
    }

    protected long formatTimestamp(Object value) {
        LOG.debug("Grok parser formatting timestamp {}", value);

        if (value == null) {
            throw new RuntimeException(patternLabel + " pattern does not include field " + timestampField);
        }
        if (value instanceof Number) {
            return ((Number) value).longValue();
        } else {
            return Long.parseLong(Joiner.on("").join(Splitter.on('.').split(value + "")));
        }
    }

}