Example usage for org.apache.hadoop.conf Configuration getValByRegex

List of usage examples for org.apache.hadoop.conf Configuration getValByRegex

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getValByRegex.

Prototype

public Map<String, String> getValByRegex(String regex) 

Source Link

Document

get keys matching the the regex

Usage

From source file:co.cask.cdap.common.conf.ConfigurationUtil.java

License:Apache License

/**
 * Retrieves all configurations that are prefixed with a particular prefix.
 *
 * @see {@link #setNamedConfigurations(Configuration, String, Map)}.
 *
 * @param conf the Configuration from which to get the configurations
 * @param confKeyPrefix the prefix to search for in the keys
 * @return a map of key-value pairs, representing the requested configurations, after removing the prefix
 *//*  w w  w. j  ava2 s  . c o  m*/
public static Map<String, String> getNamedConfigurations(Configuration conf, String confKeyPrefix) {
    Map<String, String> namedConf = new HashMap<>();
    int prefixLength = confKeyPrefix.length();
    // since its a regex match, we want to look for the character '.', and not match any character
    confKeyPrefix = confKeyPrefix.replace(".", "\\.");
    Map<String, String> properties = conf.getValByRegex("^" + confKeyPrefix + ".*");
    for (Map.Entry<String, String> entry : properties.entrySet()) {
        namedConf.put(entry.getKey().substring(prefixLength), entry.getValue());
    }
    return namedConf;
}

From source file:com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtil.java

License:Apache License

private static Map<String, String> getConfigMap(Configuration conf) {
    assert conf != null;
    Map<String, String> map = conf.getValByRegex(PREFIX_PATTERN.pattern());
    NavigableMap<String, String> prefixMap = createPrefixMap(map, PREFIX);
    return prefixMap;
}

From source file:com.asakusafw.runtime.stage.inprocess.InProcessStageConfigurator.java

License:Apache License

private void install(Job job) {
    Configuration conf = job.getConfiguration();
    int prefixLength = KEY_PREFIX_REPLACE.length();
    for (Map.Entry<String, String> entry : conf.getValByRegex(PATTERN_KEY_REPLACE.pattern()).entrySet()) {
        assert entry.getKey().length() >= prefixLength;
        String key = entry.getKey().substring(prefixLength);
        if (key.isEmpty()) {
            continue;
        }/*from ww w.j  av  a2  s  . c  om*/
        String value = entry.getValue();
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("activate in-process configuration: {0}=\"{1}\"->\"{2}\"", //$NON-NLS-1$
                    key, conf.get(key, ""), //$NON-NLS-1$
                    value));
        }
        conf.set(key, value);
    }
    conf.set(StageConstants.PROP_JOB_RUNNER, SimpleJobRunner.class.getName());
    StageResourceDriver.setAccessMode(job, StageResourceDriver.AccessMode.DIRECT);
    StageInputFormat.setSplitCombinerClass(job, ExtremeSplitCombiner.class);
}

From source file:com.digitalpebble.behemoth.DocumentFilter.java

License:Apache License

/** Builds a document filter given a Configuration object **/
public static DocumentFilter getFilters(Configuration conf) {
    // extracts the patterns
    Map<String, String> PositiveKVpatterns = conf.getValByRegex(DocumentFilterParamNamePrefixKeep + ".+");
    Map<String, String> NegativeKVpatterns = conf.getValByRegex(DocumentFilterParamNamePrefixSkip + ".+");

    Map<String, String> tmpMap;

    DocumentFilter filter = new DocumentFilter();

    filter.medataMode = conf.get(DocumentFilterParamNameMode, "AND");

    // has to be either prositive or negative but not both
    if (PositiveKVpatterns.size() > 0 && NegativeKVpatterns.size() > 0) {
        throw new RuntimeException(
                "Can't have positive AND negative document filters - check your configuration");
    } else if (PositiveKVpatterns.size() > 0) {
        filter.negativeMode = false;//from w w  w .j av a  2  s  .  c  o  m
        tmpMap = PositiveKVpatterns;
    } else {
        filter.negativeMode = true;
        tmpMap = NegativeKVpatterns;
    }

    // normalise the keys
    Iterator<Entry<String, String>> kviter = tmpMap.entrySet().iterator();
    while (kviter.hasNext()) {
        Entry<String, String> ent = kviter.next();
        String k = ent.getKey();
        String v = ent.getValue();
        k = k.substring(DocumentFilterParamNamePrefixKeep.length());

        StringBuffer message = new StringBuffer();
        if (filter.negativeMode)
            message.append("Negative ");
        else
            message.append("Positive ");
        message.append("filter found : ").append(k).append(" = ").append(v);
        LOG.info(message.toString());

        filter.KVpatterns.put(k, v);
    }

    String URLPatternS = conf.get(DocumentFilterParamNameURLFilterKeep, "");
    if (URLPatternS.length() > 0) {
        try {
            filter.URLRegex = Pattern.compile(URLPatternS);
        } catch (PatternSyntaxException e) {
            filter.URLRegex = null;
            LOG.error("Can't create regular expression for URL from " + URLPatternS);
        }
    }

    String MTPatternS = conf.get(DocumentFilterParamNameMimeTypeFilterKeep, "");
    if (MTPatternS.length() > 0) {
        try {
            filter.MimetypeRegex = Pattern.compile(MTPatternS);
        } catch (PatternSyntaxException e) {
            filter.MimetypeRegex = null;
            LOG.error("Can't create regular expression for MimeType from " + MTPatternS);
        }
    }

    filter.maxContentLength = conf.getInt(DocumentFilterParamNameLength, -1);

    return filter;
}

From source file:hydrograph.engine.cascading.integration.HydrographRuntime.java

License:Apache License

public void initialize(Properties config, String[] args, HydrographJob hydrographJob, String jobId,
        String UDFPath) {/* w w  w  .  j av a 2 s  .  com*/

    AppProps.setApplicationName(hadoopProperties, hydrographJob.getJAXBObject().getName());

    hadoopProperties.putAll(config);

    Configuration conf = new HadoopConfigProvider(hadoopProperties).getJobConf();

    SchemaFieldHandler schemaFieldHandler = new SchemaFieldHandler(
            hydrographJob.getJAXBObject().getInputsOrOutputsOrStraightPulls());

    flowManipulationContext = new FlowManipulationContext(hydrographJob, args, schemaFieldHandler, jobId);

    FlowManipulationHandler flowManipulationHandler = new FlowManipulationHandler();

    hydrographJob = flowManipulationHandler.execute(flowManipulationContext);

    if (hydrographJob.getJAXBObject().getRuntimeProperties() != null
            && hydrographJob.getJAXBObject().getRuntimeProperties().getProperty() != null) {
        for (Property property : hydrographJob.getJAXBObject().getRuntimeProperties().getProperty()) {
            hadoopProperties.put(property.getName(), property.getValue());
        }
    }

    JAXBTraversal traversal = new JAXBTraversal(hydrographJob.getJAXBObject());

    if (traversal.isHiveComponentPresentInFlow()) {
        try {
            HiveMetastoreTokenProvider.obtainTokenForHiveMetastore(conf);
        } catch (TException e) {
            throw new HydrographRuntimeException(e);
        } catch (IOException e) {
            throw new HydrographRuntimeException(e);
        }
    }

    String[] otherArgs;
    try {
        otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    } catch (IOException e) {
        throw new HydrographRuntimeException(e);
    }

    String argsString = "";
    for (String arg : otherArgs) {
        argsString = argsString + " " + arg;
    }
    LOG.info("After processing arguments are:" + argsString);
    this.args = otherArgs;
    // setJar(otherArgs);

    hadoopProperties.putAll(conf.getValByRegex(".*"));

    ComponentAdapterFactory componentAdapterFactory = new ComponentAdapterFactory(
            hydrographJob.getJAXBObject());

    flowBuilder = new FlowBuilder();

    runtimeContext = new RuntimeContext(hydrographJob, traversal, hadoopProperties, componentAdapterFactory,
            flowManipulationContext.getSchemaFieldHandler(), UDFPath);

    LOG.info("Graph '" + runtimeContext.getHydrographJob().getJAXBObject().getName()
            + "' initialized successfully");
}

From source file:hydrograph.engine.cascading.schemes.MixedSchemeTestsWithEncoding.java

License:Apache License

@Before
public void prepare() {
    outPath = "testData/schemes/TextMixed/output";

    Configuration conf = new Configuration();
    Properties properties = new Properties();
    properties.putAll(conf.getValByRegex(".*"));
    AppProps.setApplicationJarClass(properties, MixedSchemeTestsWithEncoding.class);
    flowConnector = new Hadoop2MR1FlowConnector(properties);

    fields = new Fields("f1", "f2", "f3", "f4", "f5");
    fields_new = new Fields("f1", "f2", "f3", "f4", "f5");
}

From source file:hydrograph.engine.cascading.schemes.TextDelimitedSchemeTestsWithEncoding.java

License:Apache License

@Before
public void prepare() {

    outPath = "testData/schemes/TextDelimited/output";

    Configuration conf = new Configuration();
    Properties properties = new Properties();
    properties.putAll(conf.getValByRegex(".*"));
    AppProps.setApplicationJarClass(properties, TextDelimitedSchemeTestsWithEncoding.class);
    flowConnector = new Hadoop2MR1FlowConnector(properties);

    fields = new Fields("f1", "f2", "f3", "f4", "f5");
    fields_new = new Fields("f1", "f2", "f3", "f4", "f5");
}

From source file:hydrograph.engine.cascading.schemes.TextDelimiterAndFixedWidthTest.java

License:Apache License

@Before
public void prepare() {
    outPath = "testData/schemes/TextMixed/output";

    Configuration conf = new Configuration();
    Properties properties = new Properties();
    properties.putAll(conf.getValByRegex(".*"));
    AppProps.setApplicationJarClass(properties, TextDelimiterAndFixedWidthTest.class);
    flowConnector = new Hadoop2MR1FlowConnector(properties);

    fields = new Fields("f1", "f2", "f3", "f4", "f5");
    fields_new = new Fields("f1", "f2", "f3", "f4", "f5", "newline");
}

From source file:hydrograph.engine.cascading.schemes.TextFixedWidthSchemeTestsWithEncoding.java

License:Apache License

@Before
public void prepare() {

    outPath = "testData/schemes/TextFixedWidth/output";

    Configuration conf = new Configuration();
    Properties properties = new Properties();
    properties.putAll(conf.getValByRegex(".*"));
    AppProps.setApplicationJarClass(properties, TextFixedWidthSchemeTestsWithEncoding.class);
    flowConnector = new Hadoop2MR1FlowConnector(properties);
    types = new Type[] { Integer.class, Date.class, String.class, BigDecimal.class, Long.class };

    fields = new Fields("f1", "f2", "f3", "f4", "f5").applyTypes(types);
    fields_new = new Fields("f1", "f2", "f3", "f4", "f5").applyTypes(types);
}

From source file:hydrograph.engine.hive.scheme.HivePartRead.java

License:Apache License

public static void main(String args[]) throws IOException {

    Configuration conf = new Configuration();

    String[] otherArgs;/*w w w.jav a  2 s  .  c o  m*/

    otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    // print other args
    String argsString = "";
    for (String arg : otherArgs) {
        argsString = argsString + " " + arg;
    }
    System.out.println("After processing arguments are:" + argsString);

    Properties properties = new Properties();
    properties.putAll(conf.getValByRegex(".*"));

    Tap sink = new Hfs(new TextDelimited(false, ","), "/data/file_out_2", SinkMode.REPLACE);

    HiveTableDescriptor hiveTableDescriptor = new HiveTableDescriptor("testp14", new String[] { "a", "b", "c" },
            new String[] { "string", "string", "string" }, new String[] { "a" });
    HiveTap hivetap = new HiveTap(hiveTableDescriptor, new HiveParquetScheme(hiveTableDescriptor));
    Tap source = new HivePartitionTap(hivetap);
    Pipe pipe = new Pipe("pipe");

    properties.put("hive.metastore.uris", "thrift://UbuntuD5.bitwiseglobal.net:9083");
    FlowDef def = FlowDef.flowDef().addSource(pipe, source).addTailSink(pipe, sink);

    new Hadoop2MR1FlowConnector(properties).connect(def).complete();
}