Example usage for org.apache.hadoop.conf Configuration getConfResourceAsReader

List of usage examples for org.apache.hadoop.conf Configuration getConfResourceAsReader

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getConfResourceAsReader.

Prototype

public Reader getConfResourceAsReader(String name) 

Source Link

Document

Get a Reader attached to the configuration resource with the given name.

Usage

From source file:com.atlantbh.nutch.filter.index.omit.config.OmitIndexingFilterConfiguration.java

License:Apache License

public static OmitIndexingFilterConfiguration getInstance(Configuration configuration) {
    try {//from   w  ww  .ja  v  a2 s  .  co m

        // Get configuration from Nutch /conf folder
        Reader configReader = configuration
                .getConfResourceAsReader(configuration.get(CONFIG_FILE_PATH_PROPERTY));

        // Initialize JAXB
        JAXBContext context = JAXBContext.newInstance(new Class[] { OmitIndexingFilterConfiguration.class,
                OmitIndexingFilterConfigurationEntry.class, FilteringType.class, Target.class });
        Unmarshaller unmarshaller = context.createUnmarshaller();

        // Initialize configuration
        OmitIndexingFilterConfiguration xPathFilterConfiguration = (OmitIndexingFilterConfiguration) unmarshaller
                .unmarshal(configReader);
        return xPathFilterConfiguration;

    } catch (JAXBException e) {
        log.error("Configuration initialization error!");
    }

    return null;
}

From source file:com.atlantbh.nutch.index.alternativedataflow.conf.AlternativeDataFlowIndexingFilterConfiguration.java

License:Apache License

public static AlternativeDataFlowIndexingFilterConfiguration getInstance(Configuration configuration) {
    try {//from   ww  w .  j ava 2s  .c  o  m

        // Get configuration from Nutch /conf folder
        Reader configReader = configuration
                .getConfResourceAsReader(configuration.get(CONFIG_FILE_PATH_PROPERTY));

        // Initialize JAXB
        JAXBContext context = JAXBContext.newInstance(
                new Class[] { AlternativeDataFlowIndexingFilterConfiguration.class, Entry.class, Field.class });
        Unmarshaller unmarshaller = context.createUnmarshaller();

        // Initialize configuration
        AlternativeDataFlowIndexingFilterConfiguration xPathFilterConfiguration = (AlternativeDataFlowIndexingFilterConfiguration) unmarshaller
                .unmarshal(configReader);
        return xPathFilterConfiguration;

    } catch (JAXBException e) {
        log.error("Configuration initialization error!");
    }

    return null;
}

From source file:de.informera.dev.nutchManager.thirdParty.RegexURLFilter.java

License:Apache License

/**
 * Rules specified as a config property will override rules specified
 * as a config file.// www  . j  ava  2s.c  om
 */
protected Reader getRulesReader(Configuration conf) throws IOException {
    String stringRules = conf.get(URLFILTER_REGEX_RULES);
    if (stringRules != null) {
        return new StringReader(stringRules);
    }
    String fileRules = conf.get(URLFILTER_REGEX_FILE);
    return conf.getConfResourceAsReader(fileRules);
}

From source file:org.apache.nutch.analysis.CommonGrams.java

License:Apache License

/** Construct using the provided config file. */
private void init(Configuration conf) {
    // First, try to retrieve some commonTerms cached in configuration.
    commonTerms = (HashMap) conf.getObject(KEY);
    if (commonTerms != null) {
        return;//from  ww  w.  ja v  a 2 s  .  c  o  m
    }

    // Otherwise, read the terms.file
    try {
        commonTerms = new HashMap();
        Reader reader = conf.getConfResourceAsReader(conf.get("analysis.common.terms.file"));
        BufferedReader in = new BufferedReader(reader);
        String line;
        while ((line = in.readLine()) != null) {
            line = line.trim();
            if (line.startsWith("#") || "".equals(line)) // skip comments
                continue;
            TokenStream ts = new NutchDocumentTokenizer(new StringReader(line));
            Token token = ts.next();
            if (token == null) {
                if (LOG.isWarnEnabled()) {
                    LOG.warn("Line does not contain a field name: " + line);
                }
                continue;
            }
            String field = token.termText();
            token = ts.next();
            if (token == null) {
                if (LOG.isWarnEnabled()) {
                    LOG.warn("Line contains only a field name, no word: " + line);
                }
                continue;
            }
            String gram = token.termText();
            while ((token = ts.next()) != null) {
                gram = gram + SEPARATOR + token.termText();
            }
            HashSet table = (HashSet) commonTerms.get(field);
            if (table == null) {
                table = new HashSet();
                commonTerms.put(field, table);
            }
            table.add(gram);
        }
        conf.setObject(KEY, commonTerms);
    } catch (IOException e) {
        throw new RuntimeException(e.toString());
    }
}

From source file:org.apache.nutch.crawl.MimeAdaptiveFetchSchedule.java

License:Apache License

public void setConf(Configuration conf) {
    super.setConf(conf);
    if (conf == null)
        return;/*w  w  w. j av a  2  s . c o m*/

    // Read and set the default INC and DEC rates in case we cannot set values based on MIME-type
    defaultIncRate = conf.getFloat(SCHEDULE_INC_RATE, 0.2f);
    defaultDecRate = conf.getFloat(SCHEDULE_DEC_RATE, 0.2f);

    // Where's the mime/factor file?
    Reader mimeFile = conf.getConfResourceAsReader(conf.get(SCHEDULE_MIME_FILE, "adaptive-mimetypes.txt"));

    try {
        readMimeFile(mimeFile);
    } catch (IOException e) {
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
    }
}

From source file:org.apache.nutch.indexer.filter.MimeTypeIndexingFilter.java

License:Apache License

@Override
public void setConf(Configuration conf) {
    this.conf = conf;
    MIME = new MimeUtil(conf);

    // load the file of the values
    String file = conf.get(MIMEFILTER_REGEX_FILE, "");

    if (file != null) {
        if (file.isEmpty()) {
            LOG.warn(//from   w  w w  .  j a  v  a 2s.com
                    String.format("Missing %s property, ALL mimetypes will be allowed", MIMEFILTER_REGEX_FILE));
        } else {
            Reader reader = conf.getConfResourceAsReader(file);

            try {
                readConfiguration(reader);
            } catch (IOException e) {
                if (LOG.isErrorEnabled()) {
                    LOG.error(e.getMessage());
                }

                throw new RuntimeException(e.getMessage(), e);
            }
        }
    }
}

From source file:org.apache.nutch.indexer.urlfilter.UrlIndexingFilter.java

License:Apache License

@Override
protected Reader getRulesReader(Configuration conf) throws IOException {
    String fileRules = conf.get(URLINDEXINGFILTER_REGEX_FILE);
    return conf.getConfResourceAsReader(fileRules);
}

From source file:org.apache.nutch.indexwriter.elastic.ElasticIndexWriter.java

License:Apache License

@Override
public void open(Configuration job) throws IOException {
    clusterName = job.get(ElasticConstants.CLUSTER);
    host = job.get(ElasticConstants.HOST);
    port = job.getInt(ElasticConstants.PORT, 9300);

    Builder settingsBuilder = ImmutableSettings.settingsBuilder().classLoader(Settings.class.getClassLoader());

    BufferedReader reader = new BufferedReader(job.getConfResourceAsReader("elasticsearch.conf"));
    String line;/*  ww w  .java 2 s  .  co  m*/
    String parts[];

    while ((line = reader.readLine()) != null) {
        if (StringUtils.isNotBlank(line) && !line.startsWith("#")) {
            line.trim();
            parts = line.split("=");

            if (parts.length == 2) {
                settingsBuilder.put(parts[0].trim(), parts[1].trim());
            }
        }
    }

    if (StringUtils.isNotBlank(clusterName))
        settingsBuilder.put("cluster.name", clusterName);

    // Set the cluster name and build the settings
    Settings settings = settingsBuilder.build();

    // Prefer TransportClient
    if (host != null && port > 1) {
        client = new TransportClient(settings).addTransportAddress(new InetSocketTransportAddress(host, port));
    } else if (clusterName != null) {
        node = nodeBuilder().settings(settings).client(true).node();
        client = node.client();
    }

    bulk = client.prepareBulk();
    defaultIndex = job.get(ElasticConstants.INDEX, "nutch");
    maxBulkDocs = job.getInt(ElasticConstants.MAX_BULK_DOCS, DEFAULT_MAX_BULK_DOCS);
    maxBulkLength = job.getInt(ElasticConstants.MAX_BULK_LENGTH, DEFAULT_MAX_BULK_LENGTH);
}

From source file:org.apache.nutch.indexwriter.elastic2.ElasticIndexWriter.java

License:Apache License

@Override
public void open(Configuration job) throws IOException {
    clusterName = job.get(ElasticConstants.CLUSTER);
    host = job.get(ElasticConstants.HOST);
    port = job.getInt(ElasticConstants.PORT, 9300);

    Builder settingsBuilder = Settings.builder();

    BufferedReader reader = new BufferedReader(job.getConfResourceAsReader("elasticsearch.conf"));
    String line;/*w w w  .  j ava2s .c o m*/
    String parts[];

    while ((line = reader.readLine()) != null) {
        if (StringUtils.isNotBlank(line) && !line.startsWith("#")) {
            line.trim();
            parts = line.split("=");

            if (parts.length == 2) {
                settingsBuilder.put(parts[0].trim(), parts[1].trim());
            }
        }
    }

    if (StringUtils.isNotBlank(clusterName))
        settingsBuilder.put("cluster.name", clusterName);

    // Set the cluster name and build the settings
    Settings settings = settingsBuilder.build();

    // Prefer TransportClient
    if (host != null && port > 1) {
        client = TransportClient.builder().settings(settings).build()
                .addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(host), port));
    } else if (clusterName != null) {
        node = nodeBuilder().settings(settings).client(true).node();
        client = node.client();
    }

    bulk = client.prepareBulk();
    defaultIndex = job.get(ElasticConstants.INDEX, "nutch");
    maxBulkDocs = job.getInt(ElasticConstants.MAX_BULK_DOCS, DEFAULT_MAX_BULK_DOCS);
    maxBulkLength = job.getInt(ElasticConstants.MAX_BULK_LENGTH, DEFAULT_MAX_BULK_LENGTH);
}

From source file:org.apache.nutch.net.RegexURLFilter.java

License:Apache License

/**
 * Rules specified as a config property will override rules specified as a
 * config file./*ww w  .ja  v  a 2s  . c o m*/
 */
protected Reader getRulesReader(Configuration conf) throws IOException {
    String stringRules = conf.get(URLFILTER_REGEX_RULES);
    if (stringRules != null) {
        if (LOG.isDebugEnabled()) {
            // LOG.debug("Url filter regex rules : \n" + stringRules);
        }

        return new StringReader(stringRules);
    }

    String fileRules = conf.get(URLFILTER_REGEX_FILE);
    return conf.getConfResourceAsReader(fileRules);
}