Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:net.thevis.groovyhadoop.backport.CombineFileInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {

    long minSizeNode = 0;
    long minSizeRack = 0;
    long maxSize = 0;
    Configuration conf = job.getConfiguration();

    // the values specified by setxxxSplitSize() takes precedence over the
    // values that might have been specified in the config
    if (minSplitSizeNode != 0) {
        minSizeNode = minSplitSizeNode;// w  w  w. j  av  a2 s.  c om
    } else {
        minSizeNode = conf.getLong(SPLIT_MINSIZE_PERNODE, 0);
    }
    if (minSplitSizeRack != 0) {
        minSizeRack = minSplitSizeRack;
    } else {
        minSizeRack = conf.getLong(SPLIT_MINSIZE_PERRACK, 0);
    }
    if (maxSplitSize != 0) {
        maxSize = maxSplitSize;
    } else {
        maxSize = conf.getLong("mapreduce.input.fileinputformat.split.maxsize", 0);
    }
    if (minSizeNode != 0 && maxSize != 0 && minSizeNode > maxSize) {
        throw new IOException("Minimum split size pernode " + minSizeNode
                + " cannot be larger than maximum split size " + maxSize);
    }
    if (minSizeRack != 0 && maxSize != 0 && minSizeRack > maxSize) {
        throw new IOException("Minimum split size per rack" + minSizeRack
                + " cannot be larger than maximum split size " + maxSize);
    }
    if (minSizeRack != 0 && minSizeNode > minSizeRack) {
        throw new IOException("Minimum split size per node" + minSizeNode
                + " cannot be smaller than minimum split " + "size per rack " + minSizeRack);
    }

    // all the files in input set
    Path[] paths = FileUtil.stat2Paths(listStatus(job).toArray(new FileStatus[0]));
    List<InputSplit> splits = new ArrayList<InputSplit>();
    if (paths.length == 0) {
        return splits;
    }

    // Convert them to Paths first. This is a costly operation and 
    // we should do it first, otherwise we will incur doing it multiple
    // times, one time each for each pool in the next loop.
    List<Path> newpaths = new LinkedList<Path>();
    for (int i = 0; i < paths.length; i++) {
        Path p = new Path(paths[i].toUri().getPath());
        newpaths.add(p);
    }
    paths = null;

    // In one single iteration, process all the paths in a single pool.
    // Processing one pool at a time ensures that a split contains paths
    // from a single pool only.
    for (MultiPathFilter onepool : pools) {
        ArrayList<Path> myPaths = new ArrayList<Path>();

        // pick one input path. If it matches all the filters in a pool,
        // add it to the output set
        for (Iterator<Path> iter = newpaths.iterator(); iter.hasNext();) {
            Path p = iter.next();
            if (onepool.accept(p)) {
                myPaths.add(p); // add it to my output set
                iter.remove();
            }
        }
        // create splits for all files in this pool.
        getMoreSplits(conf, myPaths.toArray(new Path[myPaths.size()]), maxSize, minSizeNode, minSizeRack,
                splits);
    }

    // create splits for all files that are not in any pool.
    getMoreSplits(conf, newpaths.toArray(new Path[newpaths.size()]), maxSize, minSizeNode, minSizeRack, splits);

    // free up rackToNodes map
    rackToNodes.clear();
    return splits;
}

From source file:nl.basjes.hadoop.input.ApacheHttpdLogfileInputFormat.java

License:Apache License

@Override
protected boolean isSplitable(JobContext context, Path file) {
    final CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
    return (null == codec) || codec instanceof SplittableCompressionCodec;
}

From source file:nl.gridline.zieook.runners.cf.RecommenderJobZieOok.java

License:Apache License

private static void setIOSort(JobContext job) {
    Configuration conf = job.getConfiguration();
    conf.setInt("io.sort.factor", 100);
    int assumedHeapSize = 512;
    String javaOpts = conf.get("mapred.child.java.opts");
    if (javaOpts != null) {
        Matcher m = Pattern.compile("-Xmx([0-9]+)([mMgG])").matcher(javaOpts);
        if (m.find()) {
            assumedHeapSize = Integer.parseInt(m.group(1));
            String megabyteOrGigabyte = m.group(2);
            if ("g".equalsIgnoreCase(megabyteOrGigabyte)) {
                assumedHeapSize *= 1024;
            }/*from   w  ww . j  a  v  a 2s  .com*/
        }
    }
    conf.setInt("io.sort.mb", assumedHeapSize / 2);
    // For some reason the Merger doesn't report status for a long time; increase
    // timeout when running these jobs
    conf.setInt("mapred.task.timeout", 60 * 60 * 1000);
}

From source file:nyu.cs.webgraph.MRhelpers.LzoTabSeperatedTextInputFormat.java

License:Open Source License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> files = super.listStatus(job);

    String fileExtension = new LzopCodec().getDefaultExtension();
    Configuration conf = job.getConfiguration();

    for (Iterator<FileStatus> iterator = files.iterator(); iterator.hasNext();) {
        FileStatus fileStatus = iterator.next();
        Path file = fileStatus.getPath();
        FileSystem fs = file.getFileSystem(conf);

        if (!file.toString().endsWith(fileExtension)) {
            //get rid of non lzo files
            iterator.remove();// www  . j  a v  a  2  s.c o m
        } else {
            //read the index file
            LzoIndex index = LzoIndex.readIndex(fs, file);
            indexes.put(file, index);
        }
    }

    return files;
}

From source file:nyu.cs.webgraph.MRhelpers.LzoTabSeperatedTextInputFormat.java

License:Open Source License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    List<InputSplit> splits = super.getSplits(job);
    Configuration conf = job.getConfiguration();
    // find new start/ends of the filesplit that aligns
    // with the lzo blocks

    List<InputSplit> result = new ArrayList<InputSplit>();

    for (InputSplit genericSplit : splits) {
        // load the index
        FileSplit fileSplit = (FileSplit) genericSplit;
        Path file = fileSplit.getPath();
        FileSystem fs = file.getFileSystem(conf);
        LzoIndex index = indexes.get(file);
        if (index == null) {
            throw new IOException("Index not found for " + file);
        }/*from w  w  w.  j  a v a 2s .  c o  m*/

        if (index.isEmpty()) {
            // empty index, keep as is
            result.add(fileSplit);
            continue;
        }

        long start = fileSplit.getStart();
        long end = start + fileSplit.getLength();

        long lzoStart = index.alignSliceStartToIndex(start, end);
        long lzoEnd = index.alignSliceEndToIndex(end, fs.getFileStatus(file).getLen());

        if (lzoStart != LzoIndex.NOT_FOUND && lzoEnd != LzoIndex.NOT_FOUND) {
            result.add(new FileSplit(file, lzoStart, lzoEnd - lzoStart, fileSplit.getLocations()));
        }
    }

    return result;
}

From source file:oracle.kv.hadoop.KVInputFormatBase.java

License:Open Source License

/**
 * @hidden//from w  w w  .j a  v a2  s  .c  o m
 * Logically split the set of input data for the job.
 *
 * @param context job configuration.
 *
 * @return an array of {@link InputSplit}s for the job.
 */
@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {

    if (context != null) {
        final Configuration conf = context.getConfiguration();
        initializeParameters(conf);
    }

    if (kvStoreName == null) {
        throw new IllegalArgumentException(
                "No KV Store Name provided. Use either the " + ParamConstant.KVSTORE_NAME.getName()
                        + " parameter or call " + KVInputFormatBase.class.getName() + ".setKVStoreName().");
    }

    if (kvHelperHosts == null) {
        throw new IllegalArgumentException(
                "No KV Helper Hosts were provided. Use either the " + ParamConstant.KVSTORE_NODES.getName()
                        + " parameter or call " + KVInputFormatBase.class.getName() + ".setKVHelperHosts().");
    }

    final KVStoreLogin storeLogin = new KVStoreLogin(null, kvStoreSecurityFile);
    storeLogin.loadSecurityProperties();
    storeLogin.prepareRegistryCSF();
    LoginManager loginMgr = null;
    if (storeLogin.foundSSLTransport()) {
        loginMgr = KVStoreLogin.getRepNodeLoginMgr(kvHelperHosts, storeLogin.getLoginCredentials(),
                kvStoreName);
    }
    Topology topology = null;
    try {
        topology = TopologyLocator.get(kvHelperHosts, 0, loginMgr, kvStoreName);
    } catch (KVStoreException KVSE) {
        KVSE.printStackTrace();
        return null;
    }

    /* Create a set of splits based on shards and consistency */
    final SplitBuilder sb = new SplitBuilder(topology);

    final List<TopoSplit> splits = sb.createShardSplits(consistency);
    final List<InputSplit> ret = new ArrayList<InputSplit>(splits.size());
    final RegistryUtils regUtils = new RegistryUtils(topology, loginMgr);

    for (TopoSplit ts : splits) {
        if (ts.isEmpty()) {
            /* Split is empty, skip */
            continue;
        }

        final List<String> repNodeNames = new ArrayList<String>();
        final List<String> repNodeNamesAndPorts = new ArrayList<String>();

        for (StorageNode sn : ts.getSns(consistency, topology, regUtils)) {
            repNodeNames.add(sn.getHostname());
            repNodeNamesAndPorts.add(sn.getHostname() + ":" + sn.getRegistryPort());
        }

        ret.add(new KVInputSplit().setKVHelperHosts(repNodeNamesAndPorts.toArray(new String[0]))
                .setKVStoreName(kvStoreName).setKVStoreSecurityFile(storeLogin.getSecurityFilePath())
                .setLocations(repNodeNames.toArray(new String[0])).setDirection(direction)
                .setBatchSize(batchSize).setParentKey(parentKey).setSubRange(subRange).setDepth(depth)
                .setConsistency(consistency).setTimeout(timeout).setTimeoutUnit(timeoutUnit)
                .setFormatterClassName(formatterClassName).setPartitionSets(ts.getPartitionSets()));
    }

    return ret;
}

From source file:oracle.kv.hadoop.table.TableInputFormatBase.java

License:Open Source License

/**
 * @hidden/*from   w ww. jav a 2s. com*/
 * Logically split the set of input data for the job.
 *
 * @param context job configuration.
 *
 * @return an array of {@link InputSplit}s for the job.
 */
@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {

    if (context != null) {
        final Configuration conf = context.getConfiguration();
        initializeParameters(conf);
    }

    if (kvStoreName == null) {
        throw new IllegalArgumentException(
                "No KV Store Name provided. Use either the " + ParamConstant.KVSTORE_NAME.getName()
                        + " parameter or call " + TableInputFormatBase.class.getName() + ".setKVStoreName().");
    }

    if (kvHelperHosts == null) {
        throw new IllegalArgumentException("No KV Helper Hosts were provided. Use either the "
                + ParamConstant.KVSTORE_NODES.getName() + " parameter or call "
                + TableInputFormatBase.class.getName() + ".setKVHelperHosts().");
    }

    if (kvHadoopHosts == null) {
        kvHadoopHosts = new String[kvHelperHosts.length];
        for (int i = 0; i < kvHelperHosts.length; i++) {
            /* Strip off the ':port' suffix */
            final String[] hostPort = (kvHelperHosts[i]).trim().split(":");
            kvHadoopHosts[i] = hostPort[0];
        }
    }

    if (tableName == null) {
        throw new IllegalArgumentException(
                "No Table Name provided. Use either the " + ParamConstant.TABLE_NAME.getName()
                        + " parameter or call " + TableInputFormatBase.class.getName() + ".setTableName().");
    }

    final String userName = (passwordCredentials == null ? null : passwordCredentials.getUsername());
    final KVStoreLogin storeLogin = new KVStoreLogin(userName, localLoginFile);
    storeLogin.loadSecurityProperties();
    storeLogin.prepareRegistryCSF();
    LoginManager loginMgr = null;

    if (storeLogin.foundSSLTransport()) {
        loginMgr = KVStoreLogin.getRepNodeLoginMgr(kvHelperHosts, passwordCredentials, kvStoreName);
    }

    /*
     * Retrieve the topology of the store.
     *
     * Note that if the same Hive CLI session is used to run queries that
     * must connect to different KVStores where one store is non-secure
     * and the other is secure, then if the most recent call to this method
     * invoked the code below to retrieve the topology from the secure
     * store, then the security information is stored in the system
     * properties and the state of the splits, and the client socket
     * factory used when communicating with the RMI registry while
     * retrieving the topology is configured for SSL communication. As
     * a result, if the current call to this method invokes the code below
     * to retrieve the topology of the non-secure store, and if the client
     * socket factory is not reconfigured for non-SSL communication, then
     * a KVServerException (wrapping a java.rmi.ConnectIOException) will
     * be encountered. To address this, KVStoreException is caught, the
     * client socket factory is reconfigured for non-SSL communication,
     * and the attempt to retrieve the topology is retried with no
     * security information.
     *
     * If both secure and non-secure attempts fail, then the stack trace
     * is sent to both the DataNode's stderr log file and the Hive CLI
     * display screen.
     */
    Topology topology;
    try {
        topology = TopologyLocator.get(kvHelperHosts, 0, loginMgr, kvStoreName);
    } catch (KVStoreException e) {

        if (passwordCredentials != null) {

            /* Retry with no security */
            LOG.debug("Failure on topology retrieval: attempt to "
                    + "communicate with RMI registry over SSL unsuccessful. "
                    + "Changing from SSLClientSocketFactory to " + "ClientSocketFactory and retrying ...");

            ClientSocketFactory.setRMIPolicy(null, kvStoreName);
            RegistryUtils.initRegistryCSF();
            try {
                topology = TopologyLocator.get(kvHelperHosts, 0, null, kvStoreName);
            } catch (KVStoreException e1) {
                e1.printStackTrace(); /* Send to DataNode's stderr file. */
                throw new IOException(e1); /* Send to Hive CLI. */
            }

        } else {
            e.printStackTrace(); /* Send to DataNode's stderr file. */
            throw new IOException(e); /* Send to Hive CLI. */
        }
    }

    /* Create splits based on the store's partitions or its shards. */
    final List<TopoSplitWrapper> splits = getSplitInfo(topology, consistency, queryBy, shardKeyPartitionId);

    final List<InputSplit> ret = new ArrayList<InputSplit>(splits.size());
    for (TopoSplitWrapper ts : splits) {

        final TableInputSplit split = new TableInputSplit();

        split.setKVStoreName(kvStoreName);
        split.setKVHelperHosts(kvHelperHosts);
        split.setLocations(kvHadoopHosts);
        split.setTableName(tableName);
        split.setKVStoreSecurity(loginFlnm, passwordCredentials, trustFlnm);
        split.setPrimaryKeyProperty(primaryKeyProperty);

        /* For MultiRowOptions */
        split.setFieldRangeProperty(fieldRangeProperty);

        /* For TableIteratorOptions */
        split.setDirection(direction);
        split.setConsistency(consistency);
        split.setTimeout(timeout);
        split.setTimeoutUnit(timeoutUnit);
        split.setMaxRequests(maxRequests);
        split.setBatchSize(batchSize);
        split.setMaxBatches(maxBatches);

        split.setPartitionSets(ts.getPartitionSets());
        split.setQueryInfo(queryBy, whereClause);
        split.setShardSet(ts.getShardSet());

        ret.add(split);
    }
    return ret;
}

From source file:org.apache.accumulo.core.client.mapreduce.AbstractInputFormat.java

License:Apache License

/**
 * Returns the name of the current classloader context set on this scanner
 *
 * @param job// w ww  . j av  a2s  .  com
 *          the Hadoop job instance to be configured
 * @return name of the current context
 * @since 1.8.0
 */
public static String getClassLoaderContext(JobContext job) {
    return InputConfigurator.getClassLoaderContext(CLASS, job.getConfiguration());
}

From source file:org.apache.accumulo.core.client.mapreduce.AbstractInputFormat.java

License:Apache License

/**
 * Determines if the connector has been configured.
 *
 * @param context/*  ww  w . j  av  a 2  s.c  om*/
 *          the Hadoop context for the configured job
 * @return true if the connector has been configured, false otherwise
 * @since 1.5.0
 * @see #setConnectorInfo(Job, String, AuthenticationToken)
 */
protected static Boolean isConnectorInfoSet(JobContext context) {
    return InputConfigurator.isConnectorInfoSet(CLASS, context.getConfiguration());
}

From source file:org.apache.accumulo.core.client.mapreduce.AbstractInputFormat.java

License:Apache License

/**
 * Gets the user name from the configuration.
 *
 * @param context//from  w  w w.  ja  va 2 s.  c  om
 *          the Hadoop context for the configured job
 * @return the user name
 * @since 1.5.0
 * @see #setConnectorInfo(Job, String, AuthenticationToken)
 */
protected static String getPrincipal(JobContext context) {
    return InputConfigurator.getPrincipal(CLASS, context.getConfiguration());
}