com.addthis.hydra.task.source.bundleizer.RegexBundleizer.java Source code

Java tutorial

Introduction

Here is the source code for com.addthis.hydra.task.source.bundleizer.RegexBundleizer.java

Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.addthis.hydra.task.source.bundleizer;

import javax.annotation.Nullable;

import java.lang.reflect.Method;

import java.util.Arrays;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.addthis.bundle.core.Bundle;
import com.addthis.bundle.util.AutoField;
import com.addthis.bundle.util.CachingField;
import com.addthis.bundle.util.NoopField;
import com.addthis.bundle.value.ValueFactory;

import com.google.common.collect.ImmutableList;

import com.fasterxml.jackson.annotation.JsonProperty;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class RegexBundleizer extends NewlineBundleizer {
    private static final Logger log = LoggerFactory.getLogger(RegexBundleizer.class);
    private static final Method GROUPS_METHOD = tryGetNamedGroupsMethod();
    private static final Pattern TRYING_GROUPS_HEURISTIC = Pattern.compile("\\(\\?<([a-zA-Z][a-zA-Z0-9]+)>");

    private final ImmutableList<AutoField> fields;
    private final Pattern regex;

    public RegexBundleizer(@JsonProperty("fields") ImmutableList<AutoField> fields,
            @JsonProperty("regex") Pattern regex) {
        this.regex = regex;
        Map<String, Integer> namedGroups = tryCallNamedGroups(regex);
        if (namedGroups == null) {
            if (estimateIfPatternHasNamedGroups(regex)) {
                throw new IllegalArgumentException("Looks like named groups were used, but we can't support them");
            } else if (fields.isEmpty()) {
                throw new IllegalArgumentException("No fields were specified, and we can't support named groups");
            } else {
                this.fields = fields;
            }
        } else if (fields.isEmpty() == namedGroups.isEmpty()) {
            throw new IllegalArgumentException("Must use (exactly one of) either named groups or fields");
        } else if (!namedGroups.isEmpty()) {
            int maxGroupIndex = namedGroups.values().stream().mapToInt(Integer::intValue).max().getAsInt() - 1;
            AutoField[] fieldsFromGroups = new AutoField[maxGroupIndex + 1];
            Arrays.fill(fieldsFromGroups, new NoopField());
            namedGroups.forEach((key, value) -> fieldsFromGroups[value - 1] = new CachingField(key));
            this.fields = ImmutableList.copyOf(fieldsFromGroups);
        } else {
            this.fields = fields;
        }
    }

    @Override
    public Bundle bundleize(Bundle next, String line) {
        Matcher lineMatcher = regex.matcher(line);
        if (lineMatcher.matches()) {
            for (int i = 0; i < fields.size(); i++) {
                fields.get(i).setValue(next, ValueFactory.create(lineMatcher.group(i + 1)));
            }
        } else {
            return null;
        }
        return next;
    }

    private static boolean estimateIfPatternHasNamedGroups(Pattern pattern) {
        return TRYING_GROUPS_HEURISTIC.matcher(pattern.pattern()).find();
    }

    @Nullable
    private static Method tryGetNamedGroupsMethod() {
        try {
            Method namedGroupsMethod = Pattern.class.getDeclaredMethod("namedGroups");
            namedGroupsMethod.setAccessible(true);
            return namedGroupsMethod;
        } catch (NoSuchMethodException | SecurityException ex) {
            log.warn("Failed to reflect the Pattern.namedGroups method, so we cannot use them for field names.",
                    ex);
            return null;
        }
    }

    @SuppressWarnings("unchecked")
    @Nullable
    private static Map<String, Integer> tryCallNamedGroups(Pattern pattern) {
        if (GROUPS_METHOD != null) {
            try {
                return (Map<String, Integer>) GROUPS_METHOD.invoke(pattern);
            } catch (ReflectiveOperationException ex) {
                log.warn("Unexpected error invoking Pattern.namedGroups, so we cannot use them for field names.",
                        ex);
            }
        }
        return null;
    }
}