org.apache.hadoop.tools.rumen.ParsedLine.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.tools.rumen.ParsedLine.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.tools.rumen;

import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

class ParsedLine {
    Properties content;
    LogRecordType type;

    static final String KEY = "(\\w+)";
    /**
     * The value string is enclosed in double quotation marks ('"') and
     * occurrences of '"' and '\' are escaped with a '\'. So the escaped value
     * string is essentially a string of escaped sequence ('\' followed by any
     * character) or any character other than '"' and '\'.
     * 
     * The straightforward REGEX to capture the above is "((?:[^\"\\\\]|\\\\.)*)".
     * Unfortunately Java's REGEX implementation is "broken" that it does not
     * perform the NFA-to-DFA conversion and such expressions would lead to
     * backtracking and stack overflow when matching with long strings. The
     * following is a manual "unfolding" of the REGEX to get rid of backtracking.
     */
    static final String VALUE = "([^\"\\\\]*+(?:\\\\.[^\"\\\\]*+)*+)";
    /**
     * REGEX to match the Key-Value pairs in an input line. Capture group 1
     * matches the key and capture group 2 matches the value (without quotation
     * marks).
     */
    static final Pattern keyValPair = Pattern.compile(KEY + "=" + "\"" + VALUE + "\"");

    @SuppressWarnings("unused")
    ParsedLine(String fullLine, int version) {
        super();

        content = new Properties();

        int firstSpace = fullLine.indexOf(" ");

        if (firstSpace < 0) {
            firstSpace = fullLine.length();
        }

        if (firstSpace == 0) {
            return; // This is a junk line of some sort
        }

        type = LogRecordType.intern(fullLine.substring(0, firstSpace));

        String propValPairs = fullLine.substring(firstSpace + 1);

        Matcher matcher = keyValPair.matcher(propValPairs);

        while (matcher.find()) {
            String key = matcher.group(1);
            String value = matcher.group(2);
            content.setProperty(key, value);
        }
    }

    protected LogRecordType getType() {
        return type;
    }

    protected String get(String key) {
        return content.getProperty(key);
    }

    protected long getLong(String key) {
        String val = get(key);

        return Long.parseLong(val);
    }
}