Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    List<InputSplit> superSplits = super.getSplits(job);
    List<InputSplit> splits = new ArrayList<InputSplit>();

    int numGroups = WikipediaConfiguration.getNumGroups(job.getConfiguration());

    for (int group = 0; group < numGroups; group++) {
        for (InputSplit split : superSplits) {
            FileSplit fileSplit = (FileSplit) split;
            splits.add(new WikipediaInputSplit(fileSplit, group));
        }//w  w w  .j  av  a2s  . c o m
    }
    return splits;
}

From source file:org.apache.accumulo.hadoop.mapreduce.AccumuloOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(JobContext job) throws IOException {
    OutputConfigurator.checkJobStored(CLASS, job.getConfiguration());
    Properties clientProps = OutputConfigurator.getClientProperties(CLASS, job.getConfiguration());
    AuthenticationToken token = ClientProperty.getAuthenticationToken(clientProps);
    try (AccumuloClient c = Accumulo.newClient().from(clientProps).build()) {
        if (!c.securityOperations().authenticateUser(c.whoami(), token))
            throw new IOException("Unable to authenticate user");
    } catch (AccumuloException | AccumuloSecurityException e) {
        throw new IOException(e);
    }//from  w w  w.  ja  v a 2 s .c  o  m
}

From source file:org.apache.accumulo.hadoopImpl.mapreduce.AbstractInputFormat.java

License:Apache License

/**
 * Returns the name of the current classloader context set on this scanner
 *
 * @param job/*from  w w w .  j  a  v a 2  s. com*/
 *          the Hadoop job instance to be configured
 * @return name of the current context
 * @since 1.8.0
 */
protected static String getClassLoaderContext(JobContext job) {
    return InputConfigurator.getClassLoaderContext(CLASS, job.getConfiguration());
}

From source file:org.apache.accumulo.hadoopImpl.mapreduce.AbstractInputFormat.java

License:Apache License

/**
 * Gets the {@link ClientInfo} from the configuration
 *
 * @param context/*from   w  w w  .ja  v  a 2 s  .c o  m*/
 *          Hadoop job context
 * @return ClientInfo
 * @since 2.0.0
 */
public static ClientInfo getClientInfo(JobContext context) {
    return InputConfigurator.getClientInfo(CLASS, context.getConfiguration());
}

From source file:org.apache.accumulo.hadoopImpl.mapreduce.AbstractInputFormat.java

License:Apache License

/**
 * Fetches all {@link InputTableConfig}s that have been set on the given job.
 *
 * @param context//  w  w w .j  a v a 2  s  .  co m
 *          the Hadoop job instance to be configured
 * @return the {@link InputTableConfig} objects for the job
 * @since 1.6.0
 */
public static Map<String, InputTableConfig> getInputTableConfigs(JobContext context) {
    return InputConfigurator.getInputTableConfigs(CLASS, context.getConfiguration());
}

From source file:org.apache.accumulo.hadoopImpl.mapreduce.AbstractInputFormat.java

License:Apache License

/**
 * Check whether a configuration is fully configured to be used with an Accumulo
 * {@link org.apache.hadoop.mapreduce.InputFormat}.
 *
 * @param context//  ww w. j a  v a  2  s.c o  m
 *          the Hadoop context for the configured job
 * @throws java.io.IOException
 *           if the context is improperly configured
 * @since 1.5.0
 */
public static void validateOptions(JobContext context) throws IOException {
    try (AccumuloClient client = InputConfigurator.createClient(CLASS, context.getConfiguration())) {
        InputConfigurator.validatePermissions(CLASS, context.getConfiguration(), client);
    }
}

From source file:org.apache.accumulo.hadoopImpl.mapreduce.AbstractInputFormat.java

License:Apache License

public static List<InputSplit> getSplits(JobContext context) throws IOException {
    validateOptions(context);/*from  ww w  . ja v a 2s.  c  o  m*/
    Random random = new SecureRandom();
    LinkedList<InputSplit> splits = new LinkedList<>();
    try (AccumuloClient client = createClient(context)) {
        Map<String, InputTableConfig> tableConfigs = getInputTableConfigs(context);
        for (Map.Entry<String, InputTableConfig> tableConfigEntry : tableConfigs.entrySet()) {

            String tableName = tableConfigEntry.getKey();
            InputTableConfig tableConfig = tableConfigEntry.getValue();

            ClientContext clientContext = (ClientContext) client;
            Table.ID tableId;
            // resolve table name to id once, and use id from this point forward
            try {
                tableId = Tables.getTableId(clientContext, tableName);
            } catch (TableNotFoundException e) {
                throw new IOException(e);
            }

            boolean batchScan = InputConfigurator.isBatchScan(CLASS, context.getConfiguration());
            boolean supportBatchScan = !(tableConfig.isOfflineScan() || tableConfig.shouldUseIsolatedScanners()
                    || tableConfig.shouldUseLocalIterators());
            if (batchScan && !supportBatchScan)
                throw new IllegalArgumentException("BatchScanner optimization not available for offline"
                        + " scan, isolated, or local iterators");

            boolean autoAdjust = tableConfig.shouldAutoAdjustRanges();
            if (batchScan && !autoAdjust)
                throw new IllegalArgumentException(
                        "AutoAdjustRanges must be enabled when using BatchScanner optimization");

            List<Range> ranges = autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges())
                    : tableConfig.getRanges();
            if (ranges.isEmpty()) {
                ranges = new ArrayList<>(1);
                ranges.add(new Range());
            }

            // get the metadata information for these ranges
            Map<String, Map<KeyExtent, List<Range>>> binnedRanges = new HashMap<>();
            TabletLocator tl;
            try {
                if (tableConfig.isOfflineScan()) {
                    binnedRanges = binOfflineTable(context, tableId, ranges);
                    while (binnedRanges == null) {
                        // Some tablets were still online, try again
                        // sleep randomly between 100 and 200 ms
                        sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
                        binnedRanges = binOfflineTable(context, tableId, ranges);

                    }
                } else {
                    tl = InputConfigurator.getTabletLocator(CLASS, context.getConfiguration(), tableId);
                    // its possible that the cache could contain complete, but old information about a
                    // tables tablets... so clear it
                    tl.invalidateCache();

                    while (!tl.binRanges(clientContext, ranges, binnedRanges).isEmpty()) {
                        String tableIdStr = tableId.canonicalID();
                        if (!Tables.exists(clientContext, tableId))
                            throw new TableDeletedException(tableIdStr);
                        if (Tables.getTableState(clientContext, tableId) == TableState.OFFLINE)
                            throw new TableOfflineException(Tables.getTableOfflineMsg(clientContext, tableId));
                        binnedRanges.clear();
                        log.warn("Unable to locate bins for specified ranges. Retrying.");
                        // sleep randomly between 100 and 200 ms
                        sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
                        tl.invalidateCache();
                    }
                }
            } catch (Exception e) {
                throw new IOException(e);
            }

            // all of this code will add either range per each locations or split ranges and add
            // range-location split
            // Map from Range to Array of Locations, we only use this if we're don't split
            HashMap<Range, ArrayList<String>> splitsToAdd = null;

            if (!autoAdjust)
                splitsToAdd = new HashMap<>();

            HashMap<String, String> hostNameCache = new HashMap<>();
            for (Map.Entry<String, Map<KeyExtent, List<Range>>> tserverBin : binnedRanges.entrySet()) {
                String ip = tserverBin.getKey().split(":", 2)[0];
                String location = hostNameCache.get(ip);
                if (location == null) {
                    InetAddress inetAddress = InetAddress.getByName(ip);
                    location = inetAddress.getCanonicalHostName();
                    hostNameCache.put(ip, location);
                }
                for (Map.Entry<KeyExtent, List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
                    Range ke = extentRanges.getKey().toDataRange();
                    if (batchScan) {
                        // group ranges by tablet to be read by a BatchScanner
                        ArrayList<Range> clippedRanges = new ArrayList<>();
                        for (Range r : extentRanges.getValue())
                            clippedRanges.add(ke.clip(r));
                        BatchInputSplit split = new BatchInputSplit(tableName, tableId, clippedRanges,
                                new String[] { location });
                        SplitUtils.updateSplit(split, tableConfig);

                        splits.add(split);
                    } else {
                        // not grouping by tablet
                        for (Range r : extentRanges.getValue()) {
                            if (autoAdjust) {
                                // divide ranges into smaller ranges, based on the tablets
                                RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonicalID(),
                                        ke.clip(r), new String[] { location });
                                SplitUtils.updateSplit(split, tableConfig);
                                split.setOffline(tableConfig.isOfflineScan());
                                split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
                                split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
                                splits.add(split);
                            } else {
                                // don't divide ranges
                                ArrayList<String> locations = splitsToAdd.get(r);
                                if (locations == null)
                                    locations = new ArrayList<>(1);
                                locations.add(location);
                                splitsToAdd.put(r, locations);
                            }
                        }
                    }
                }
            }

            if (!autoAdjust)
                for (Map.Entry<Range, ArrayList<String>> entry : splitsToAdd.entrySet()) {
                    RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonicalID(),
                            entry.getKey(), entry.getValue().toArray(new String[0]));
                    SplitUtils.updateSplit(split, tableConfig);
                    split.setOffline(tableConfig.isOfflineScan());
                    split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
                    split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());

                    splits.add(split);
                }
        }
    }
    return splits;
}

From source file:org.apache.accumulo.hadoopImpl.mapreduce.AccumuloOutputFormatImpl.java

License:Apache License

/**
 * Get connection information from this job
 *
 * @param context/*from   ww w  .ja v a 2 s  .c  om*/
 *          Hadoop job context
 * @return {@link ClientInfo}
 *
 * @since 2.0.0
 */
protected static ClientInfo getClientInfo(JobContext context) {
    return OutputConfigurator.getClientInfo(CLASS, context.getConfiguration());
}

From source file:org.apache.accumulo.hadoopImpl.mapreduce.AccumuloRecordReader.java

License:Apache License

/**
 * Check whether a configuration is fully configured to be used with an Accumulo
 * {@link org.apache.hadoop.mapreduce.InputFormat}.
 *//*  w  ww .j av a  2s  .  c  o  m*/
private static void validateOptions(JobContext context, Class<?> callingClass) throws IOException {
    InputConfigurator.checkJobStored(callingClass, context.getConfiguration());
    try (AccumuloClient client = InputConfigurator.createClient(callingClass, context.getConfiguration())) {
        InputConfigurator.validatePermissions(callingClass, context.getConfiguration(), client);
    }
}

From source file:org.apache.accumulo.hadoopImpl.mapreduce.AccumuloRecordReader.java

License:Apache License

public static List<InputSplit> getSplits(JobContext context, Class<?> callingClass) throws IOException {
    validateOptions(context, callingClass);
    Random random = new SecureRandom();
    LinkedList<InputSplit> splits = new LinkedList<>();
    try (AccumuloClient client = createClient(context, callingClass)) {
        Map<String, InputTableConfig> tableConfigs = InputConfigurator.getInputTableConfigs(callingClass,
                context.getConfiguration());
        for (Map.Entry<String, InputTableConfig> tableConfigEntry : tableConfigs.entrySet()) {

            String tableName = tableConfigEntry.getKey();
            InputTableConfig tableConfig = tableConfigEntry.getValue();

            ClientContext clientContext = (ClientContext) client;
            TableId tableId;// w  w w . j  a v a 2 s .  c  om
            // resolve table name to id once, and use id from this point forward
            try {
                tableId = Tables.getTableId(clientContext, tableName);
            } catch (TableNotFoundException e) {
                throw new IOException(e);
            }

            boolean batchScan = InputConfigurator.isBatchScan(callingClass, context.getConfiguration());
            boolean supportBatchScan = !(tableConfig.isOfflineScan() || tableConfig.shouldUseIsolatedScanners()
                    || tableConfig.shouldUseLocalIterators());
            if (batchScan && !supportBatchScan)
                throw new IllegalArgumentException("BatchScanner optimization not available for offline"
                        + " scan, isolated, or local iterators");

            boolean autoAdjust = tableConfig.shouldAutoAdjustRanges();
            if (batchScan && !autoAdjust)
                throw new IllegalArgumentException(
                        "AutoAdjustRanges must be enabled when using BatchScanner optimization");

            List<Range> ranges = autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges())
                    : tableConfig.getRanges();
            if (ranges.isEmpty()) {
                ranges = new ArrayList<>(1);
                ranges.add(new Range());
            }

            // get the metadata information for these ranges
            Map<String, Map<KeyExtent, List<Range>>> binnedRanges = new HashMap<>();
            TabletLocator tl;
            try {
                if (tableConfig.isOfflineScan()) {
                    binnedRanges = binOfflineTable(context, tableId, ranges, callingClass);
                    while (binnedRanges == null) {
                        // Some tablets were still online, try again
                        // sleep randomly between 100 and 200 ms
                        sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
                        binnedRanges = binOfflineTable(context, tableId, ranges, callingClass);

                    }
                } else {
                    tl = InputConfigurator.getTabletLocator(callingClass, context.getConfiguration(), tableId);
                    // its possible that the cache could contain complete, but old information about a
                    // tables tablets... so clear it
                    tl.invalidateCache();

                    while (!tl.binRanges(clientContext, ranges, binnedRanges).isEmpty()) {
                        String tableIdStr = tableId.canonical();
                        if (!Tables.exists(clientContext, tableId))
                            throw new TableDeletedException(tableIdStr);
                        if (Tables.getTableState(clientContext, tableId) == TableState.OFFLINE)
                            throw new TableOfflineException(Tables.getTableOfflineMsg(clientContext, tableId));
                        binnedRanges.clear();
                        log.warn("Unable to locate bins for specified ranges. Retrying.");
                        // sleep randomly between 100 and 200 ms
                        sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
                        tl.invalidateCache();
                    }
                }
            } catch (TableOfflineException | TableNotFoundException | AccumuloException
                    | AccumuloSecurityException e) {
                throw new IOException(e);
            }

            // all of this code will add either range per each locations or split ranges and add
            // range-location split
            // Map from Range to Array of Locations, we only use this if we're don't split
            HashMap<Range, ArrayList<String>> splitsToAdd = null;

            if (!autoAdjust)
                splitsToAdd = new HashMap<>();

            HashMap<String, String> hostNameCache = new HashMap<>();
            for (Map.Entry<String, Map<KeyExtent, List<Range>>> tserverBin : binnedRanges.entrySet()) {
                String ip = tserverBin.getKey().split(":", 2)[0];
                String location = hostNameCache.get(ip);
                if (location == null) {
                    InetAddress inetAddress = InetAddress.getByName(ip);
                    location = inetAddress.getCanonicalHostName();
                    hostNameCache.put(ip, location);
                }
                for (Map.Entry<KeyExtent, List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
                    Range ke = extentRanges.getKey().toDataRange();
                    if (batchScan) {
                        // group ranges by tablet to be read by a BatchScanner
                        ArrayList<Range> clippedRanges = new ArrayList<>();
                        for (Range r : extentRanges.getValue())
                            clippedRanges.add(ke.clip(r));
                        BatchInputSplit split = new BatchInputSplit(tableName, tableId, clippedRanges,
                                new String[] { location });
                        SplitUtils.updateSplit(split, tableConfig);

                        splits.add(split);
                    } else {
                        // not grouping by tablet
                        for (Range r : extentRanges.getValue()) {
                            if (autoAdjust) {
                                // divide ranges into smaller ranges, based on the tablets
                                RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonical(),
                                        ke.clip(r), new String[] { location });
                                SplitUtils.updateSplit(split, tableConfig);
                                split.setOffline(tableConfig.isOfflineScan());
                                split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
                                split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
                                splits.add(split);
                            } else {
                                // don't divide ranges
                                ArrayList<String> locations = splitsToAdd.get(r);
                                if (locations == null)
                                    locations = new ArrayList<>(1);
                                locations.add(location);
                                splitsToAdd.put(r, locations);
                            }
                        }
                    }
                }
            }

            if (!autoAdjust)
                for (Map.Entry<Range, ArrayList<String>> entry : splitsToAdd.entrySet()) {
                    RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonical(), entry.getKey(),
                            entry.getValue().toArray(new String[0]));
                    SplitUtils.updateSplit(split, tableConfig);
                    split.setOffline(tableConfig.isOfflineScan());
                    split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
                    split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());

                    splits.add(split);
                }
        }
    }
    return splits;
}