Example usage for org.apache.hadoop.mapred JobConf get

List of usage examples for org.apache.hadoop.mapred JobConf get

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf get.

Prototype

public String get(String name) 

Source Link

Document

Get the value of the name property, null if no such property exists.

Usage

From source file:com.cloudera.sqoop.testutil.ExplicitSetMapper.java

License:Apache License

public void configure(JobConf job) {
    String userTypeName = job.get(USER_TYPE_NAME_KEY);
    if (null == userTypeName) {
        throw new RuntimeException("Unconfigured parameter: " + USER_TYPE_NAME_KEY);
    }//w  w w  .  ja va2 s  .c om

    setCol = job.get(SET_COL_KEY);
    setVal = job.get(SET_VAL_KEY);

    LOG.info("User type name set to " + userTypeName);
    LOG.info("Will try to set col " + setCol + " to " + setVal);

    this.userRecord = null;

    try {
        Configuration conf = new Configuration();
        Class userClass = Class.forName(userTypeName, true, Thread.currentThread().getContextClassLoader());
        this.userRecord = (SqoopRecord) ReflectionUtils.newInstance(userClass, conf);
    } catch (ClassNotFoundException cnfe) {
        // handled by the next block.
        LOG.error("ClassNotFound exception: " + cnfe.toString());
    } catch (Exception e) {
        LOG.error("Got an exception reflecting user class: " + e.toString());
    }

    if (null == this.userRecord) {
        LOG.error("Could not instantiate user record of type " + userTypeName);
        throw new RuntimeException("Could not instantiate user record of type " + userTypeName);
    }
}

From source file:com.cloudera.sqoop.testutil.ReparseMapper.java

License:Apache License

public void configure(JobConf job) {
    String userTypeName = job.get(USER_TYPE_NAME_KEY);
    if (null == userTypeName) {
        throw new RuntimeException("Unconfigured parameter: " + USER_TYPE_NAME_KEY);
    }/*from w  ww. java 2 s  .c o m*/

    LOG.info("User type name set to " + userTypeName);

    this.userRecord = null;

    try {
        Configuration conf = new Configuration();
        Class userClass = Class.forName(userTypeName, true, Thread.currentThread().getContextClassLoader());
        this.userRecord = (SqoopRecord) ReflectionUtils.newInstance(userClass, conf);
    } catch (ClassNotFoundException cnfe) {
        // handled by the next block.
        LOG.error("ClassNotFound exception: " + cnfe.toString());
    } catch (Exception e) {
        LOG.error("Got an exception reflecting user class: " + e.toString());
    }

    if (null == this.userRecord) {
        LOG.error("Could not instantiate user record of type " + userTypeName);
        throw new RuntimeException("Could not instantiate user record of type " + userTypeName);
    }
}

From source file:com.conductor.s3.S3TextFileInputFormatMRV1.java

License:Apache License

@Override
public RecordReader<LongWritable, Text> getRecordReader(InputSplit genericSplit, JobConf job, Reporter reporter)
        throws IOException {
    reporter.setStatus(genericSplit.toString());
    final String delimiter = job.get("textinputformat.record.delimiter");
    byte[] recordDelimiterBytes = null;
    if (null != delimiter) {
        recordDelimiterBytes = delimiter.getBytes();
    }//from   w w w  .  ja  va 2  s  .co  m
    return new LineRecordReader(job, (FileSplit) genericSplit, recordDelimiterBytes);
}

From source file:com.dappervision.hbase.mapred.TypedBytesTableInputFormat.java

License:Apache License

/**
 * Builds a TableRecordReader. If no TableRecordReader was provided, uses
 * the default./* w ww.  jav  a  2 s  .  c  o m*/
 *
 * @see org.apache.hadoop.mapred.InputFormat#getRecordReader(InputSplit,
 *      JobConf, Reporter)
 */

public void configure(JobConf job) {
    Path[] tableNames = FileInputFormat.getInputPaths(job);
    String colArg = job.get(COLUMN_LIST);
    String[] colNames = colArg.split(" ");
    byte[][] m_cols = new byte[colNames.length][];
    for (int i = 0; i < m_cols.length; i++) {
        m_cols[i] = Base64.decodeBase64(Bytes.toBytes(colNames[i]));
    }
    setInputColumns(m_cols);
    if (job.get(ROW_FILTER_REGEX) != null) {
        LOG.info("Row Regex Filter[" + job.get(ROW_FILTER_REGEX) + "]");
        setRowFilter(new RowFilter(CompareFilter.CompareOp.EQUAL,
                new RegexStringComparator(job.get(ROW_FILTER_REGEX))));
    }
    if (job.get(START_ROW) != null) {
        LOG.info("Start Row[" + job.get(START_ROW) + "]");
        try {
            setStartRow(Base64.decodeBase64(job.get(START_ROW).getBytes("US-ASCII")));
        } catch (UnsupportedEncodingException e) {
            LOG.error("Start Row[" + job.get(START_ROW) + "] - Error");
        }
    }
    if (job.get(STOP_ROW) != null) {
        LOG.info("Stop Row[" + job.get(STOP_ROW) + "]");
        try {
            setStopRow(Base64.decodeBase64(job.get(STOP_ROW).getBytes("US-ASCII")));
        } catch (UnsupportedEncodingException e) {
            LOG.error("Stop Row[" + job.get(STOP_ROW) + "] - Error");
        }
    }
    try {
        setHTable(new HTable(HBaseConfiguration.create(job), tableNames[0].getName()));
    } catch (Exception e) {
        LOG.error(StringUtils.stringifyException(e));
    }
    if (job.get(VALUE_FORMAT) != null && job.get(VALUE_FORMAT).equalsIgnoreCase("singlevalue")) {
        LOG.info("Value Format[" + job.get(VALUE_FORMAT) + "]");
        super.setTableRecordReader(new TypedBytesTableRecordReaderSingleValue());
    } else {
        LOG.info("Value Format[familiescolumns]");
        super.setTableRecordReader(new TypedBytesTableRecordReader());
    }
}

From source file:com.datasalt.utils.viewbuilder.ShardedSolrDocumentConverter.java

License:Apache License

@Override
@SuppressWarnings({ "rawtypes", "unchecked" })
public void setConf(Configuration conf) {
    super.setConf(conf);
    if (conf != null) {
        JobConf jobConf = (JobConf) conf;
        numShards = jobConf.getNumReduceTasks();
        shardNumberShift = (int) Math.ceil(numDigits(Long.MAX_VALUE)) - 1
                - (int) Math.ceil(numDigits(numShards));
        log.info("Num shards : " + numShards);

        Class partitionerClass;/* ww  w.  j av a 2 s .  c o m*/
        try {
            partitionerClass = Class.forName(jobConf.get(PARTITIONER_CONF));
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
            throw new RuntimeException(
                    "Partitioner not set.Use conf.set(ShardedSolrDocument.PARTITIONER_CONF,partitionerClassName)",
                    e);
        }
        log.info("Partitioner class : " + partitionerClass);
        partitioner = (Partitioner<KEY_TYPE, VALUE_TYPE>) ReflectionUtils.newInstance(partitionerClass, conf);
    }
}

From source file:com.datascience.hadoop.CsvInputFormat.java

License:Apache License

@Override
public RecordReader<LongWritable, ListWritable<Text>> getRecordReader(InputSplit inputSplit, JobConf conf,
        Reporter reporter) throws IOException {
    String charsetName = conf.get(CHARSET);
    Charset charset = charsetName != null ? Charset.forName(charsetName) : StandardCharsets.UTF_8;

    FileSplit split = (FileSplit) inputSplit;
    Path path = split.getPath();/*from w w  w  . j av  a 2s . c  om*/
    FileSystem fs = path.getFileSystem(conf);
    InputStream is = fs.open(path);

    // If the input is compressed, load the compression codec.
    CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
    CompressionCodec codec = codecFactory.getCodec(path);
    if (codec != null) {
        Decompressor decompressor = CodecPool.getDecompressor(codec);
        is = codec.createInputStream(is, decompressor);
    }
    return new CsvRecordReader(new InputStreamReader(is, charset), createFormat(conf), split.getLength(),
            conf.getBoolean(STRICT_MODE, true));
}

From source file:com.datascience.hadoop.CsvOutputFormat.java

License:Apache License

@Override
public RecordWriter<LongWritable, ListWritable<Text>> getRecordWriter(FileSystem fileSystem, JobConf conf,
        String name, Progressable progress) throws IOException {
    String charsetName = conf.get(CHARSET);
    Charset charset = charsetName != null ? Charset.forName(charsetName) : StandardCharsets.UTF_8;

    Path path;/*from   www  . j  a v  a 2  s  .  c  om*/
    if (FileOutputFormat.getCompressOutput(conf)) {
        Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(conf,
                GzipCodec.class);
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, conf);
        path = FileOutputFormat.getTaskOutputPath(conf, name + codec.getDefaultExtension());
    } else {
        path = FileOutputFormat.getTaskOutputPath(conf, name);
    }
    return new CsvRecordWriter(new OutputStreamWriter(path.getFileSystem(conf).create(path, progress), charset),
            createFormat(conf));
}

From source file:com.davidgildeh.hadoop.input.simpledb.SimpleDBDAO.java

License:Apache License

/**
 * Default Constructor, initialises SimpleDB Client
 * /*from ww  w  .  j  a v a2  s . c o  m*/
 * @param jobConf   Hadoop Job Configuration
 */
public SimpleDBDAO(JobConf jobConf) {

    // Load Configuration
    String awsAccessKey = jobConf.get(SIMPLEDB_AWS_ACCESSKEY);
    String awsSecretKey = jobConf.get(SIMPLEDB_AWS_SECRETKEY);
    // Default to US-EAST Region
    String simpleDBRegion = jobConf.get(SIMPLEDB_AWS_REGION, "sdb.amazonaws.com");
    sdb_domain = jobConf.get(SIMPLEDB_DOMAIN);
    whereQuery = jobConf.get(SIMPLEDB_WHERE_QUERY, null);

    // Initialise SimpleDB Client  
    sdb = new AmazonSimpleDBClient(new BasicAWSCredentials(awsAccessKey, awsSecretKey));
    sdb.setEndpoint(simpleDBRegion);
}

From source file:com.digitalpebble.behemoth.gate.AbstractGATEMapper.java

License:Apache License

public void configure(JobConf job) {
    super.configure(job);
    config = job;/*from w w  w.j a v  a 2 s  .  c  om*/

    // we try to load the gate application
    // using the gate.app file
    String application_path = job.get("gate.application.path");
    String gapp_file = job.get("gate.application.descriptor", "application.xgapp");

    URL applicationDescriptorURL = null;

    // the application will have been unzipped and put on the distributed
    // cache
    try {
        String applicationName = new File(application_path).getCanonicalFile().getName();
        // trim the zip
        if (applicationName.endsWith(".zip"))
            applicationName = applicationName.replaceAll(".zip", "");

        Path[] localArchives = DistributedCache.getLocalCacheArchives(job);
        // identify the right archive
        for (Path la : localArchives) {
            String localPath = la.toUri().toString();
            LOG.info("LocalCache : " + localPath);
            if (!localPath.endsWith(application_path))
                continue;
            // see if the gapp file is directly under the dir
            applicationDescriptorURL = new URL("file://" + localPath + "/" + gapp_file);
            File f = new File(applicationDescriptorURL.getFile());
            if (f.exists())
                break;
            // or for older versions of the zipped pipelines
            applicationDescriptorURL = new URL("file://" + localPath + "/" + applicationName + "/" + gapp_file);
            break;
        }
    } catch (Exception e) {
        throw new RuntimeException("Impossible to retrieve gate application from distributed cache", e);
    }

    if (applicationDescriptorURL == null)
        throw new RuntimeException("GATE app " + application_path + "not available in distributed cache");

    processor = new GATEProcessor(applicationDescriptorURL);
    processor.setConf(config);
}

From source file:com.digitalpebble.behemoth.solr.LucidWorksWriter.java

License:Apache License

public void open(JobConf job, String name) throws IOException {
    String zkHost = job.get("solr.zkhost");
    if (zkHost != null && zkHost.equals("") == false) {
        String collection = job.get("solr.zk.collection", "collection1");
        LOG.info("Indexing to collection: " + collection + " w/ ZK host: " + zkHost);
        solr = new CloudSolrServer(zkHost);
        ((CloudSolrServer) solr).setDefaultCollection(collection);
    } else {//from w ww . ja v a 2 s.  c o m
        String solrURL = job.get("solr.server.url");
        int queueSize = job.getInt("solr.client.queue.size", 100);
        int threadCount = job.getInt("solr.client.threads", 1);
        solr = new StreamingUpdateSolrServer(solrURL, queueSize, threadCount);
    }
    includeMetadata = job.getBoolean("lw.metadata", false);
    includeAnnotations = job.getBoolean("lw.annotations", false);

    // get the Behemoth annotations types and features
    // to store as SOLR fields
    // solr.f.name = BehemothType.featureName
    // e.g. solr.f.person = Person.string
    Iterator<Entry<String, String>> iterator = job.iterator();
    while (iterator.hasNext()) {
        Entry<String, String> entry = iterator.next();
        if (entry.getKey().startsWith("solr.f.") == false)
            continue;
        String fieldName = entry.getKey().substring("solr.f.".length());
        String val = entry.getValue();
        // see if a feature has been specified
        // if not we'll use '*' to indicate that we want
        // the text covered by the annotation
        HashMap<String, String> featureValMap = new HashMap<String, String>();
        int separator = val.indexOf(".");
        String featureName = "*";
        if (separator != -1)
            featureName = val.substring(separator + 1);
        featureValMap.put(featureName, fieldName);
        fieldMapping.put(entry.getValue(), featureValMap);
        LOG.debug("Adding to mapping " + entry.getValue() + " " + featureName + " " + fieldName);
    }
}