Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    List<InputSplit> superSplits = super.getSplits(job);
    List<InputSplit> splits = new ArrayList<InputSplit>();

    int numGroups = WikipediaConfiguration.getNumGroups(job.getConfiguration());

    for (int group = 0; group < numGroups; group++) {
        for (InputSplit split : superSplits) {
            FileSplit fileSplit = (FileSplit) split;
            splits.add(new WikipediaInputSplit(fileSplit, group));
        }//w  w w  .j  av  a2s  . c o m
    }
    return splits;
}

From source file:org.apache.accumulo.hadoop.mapreduce.AccumuloOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(JobContext job) throws IOException {
    OutputConfigurator.checkJobStored(CLASS, job.getConfiguration());
    Properties clientProps = OutputConfigurator.getClientProperties(CLASS, job.getConfiguration());
    AuthenticationToken token = ClientProperty.getAuthenticationToken(clientProps);
    try (AccumuloClient c = Accumulo.newClient().from(clientProps).build()) {
        if (!c.securityOperations().authenticateUser(c.whoami(), token))
            throw new IOException("Unable to authenticate user");
    } catch (AccumuloException | AccumuloSecurityException e) {
        throw new IOException(e);
    }//from  w w  w.  ja  v a 2 s .c  o  m
}

From source file:org.apache.accumulo.hadoopImpl.mapreduce.AbstractInputFormat.java

License:Apache License

/**
 * Returns the name of the current classloader context set on this scanner
 *
 * @param job/*from  w w w .  j  a  v a 2  s. com*/
 *          the Hadoop job instance to be configured
 * @return name of the current context
 * @since 1.8.0
 */
protected static String getClassLoaderContext(JobContext job) {
    return InputConfigurator.getClassLoaderContext(CLASS, job.getConfiguration());
}

From source file:org.apache.accumulo.hadoopImpl.mapreduce.AbstractInputFormat.java

License:Apache License

/**
 * Gets the {@link ClientInfo} from the configuration
 *
 * @param context/*from   w  w w  .ja  v  a 2 s  .c o  m*/
 *          Hadoop job context
 * @return ClientInfo
 * @since 2.0.0
 */
public static ClientInfo getClientInfo(JobContext context) {
    return InputConfigurator.getClientInfo(CLASS, context.getConfiguration());
}

From source file:org.apache.accumulo.hadoopImpl.mapreduce.AbstractInputFormat.java

License:Apache License

/**
 * Fetches all {@link InputTableConfig}s that have been set on the given job.
 *
 * @param context//  w  w w .j  a v a 2  s  .  co m
 *          the Hadoop job instance to be configured
 * @return the {@link InputTableConfig} objects for the job
 * @since 1.6.0
 */
public static Map<String, InputTableConfig> getInputTableConfigs(JobContext context) {
    return InputConfigurator.getInputTableConfigs(CLASS, context.getConfiguration());
}

From source file:org.apache.accumulo.hadoopImpl.mapreduce.AbstractInputFormat.java

License:Apache License

/**
 * Check whether a configuration is fully configured to be used with an Accumulo
 * {@link org.apache.hadoop.mapreduce.InputFormat}.
 *
 * @param context//  ww w. j a  v a  2  s.c o  m
 *          the Hadoop context for the configured job
 * @throws java.io.IOException
 *           if the context is improperly configured
 * @since 1.5.0
 */
public static void validateOptions(JobContext context) throws IOException {
    try (AccumuloClient client = InputConfigurator.createClient(CLASS, context.getConfiguration())) {
        InputConfigurator.validatePermissions(CLASS, context.getConfiguration(), client);
    }
}

From source file:org.apache.accumulo.hadoopImpl.mapreduce.AbstractInputFormat.java

License:Apache License

public static List<InputSplit> getSplits(JobContext context) throws IOException {
    validateOptions(context);/*from  ww w  . ja v a 2s.  c  o  m*/
    Random random = new SecureRandom();
    LinkedList<InputSplit> splits = new LinkedList<>();
    try (AccumuloClient client = createClient(context)) {
        Map<String, InputTableConfig> tableConfigs = getInputTableConfigs(context);
        for (Map.Entry<String, InputTableConfig> tableConfigEntry : tableConfigs.entrySet()) {

            String tableName = tableConfigEntry.getKey();
            InputTableConfig tableConfig = tableConfigEntry.getValue();

            ClientContext clientContext = (ClientContext) client;
            Table.ID tableId;
            // resolve table name to id once, and use id from this point forward
            try {
                tableId = Tables.getTableId(clientContext, tableName);
            } catch (TableNotFoundException e) {
                throw new IOException(e);
            }

            boolean batchScan = InputConfigurator.isBatchScan(CLASS, context.getConfiguration());
            boolean supportBatchScan = !(tableConfig.isOfflineScan() || tableConfig.shouldUseIsolatedScanners()
                    || tableConfig.shouldUseLocalIterators());
            if (batchScan && !supportBatchScan)
                throw new IllegalArgumentException("BatchScanner optimization not available for offline"
                        + " scan, isolated, or local iterators");

            boolean autoAdjust = tableConfig.shouldAutoAdjustRanges();
            if (batchScan && !autoAdjust)
                throw new IllegalArgumentException(
                        "AutoAdjustRanges must be enabled when using BatchScanner optimization");

            List<Range> ranges = autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges())
                    : tableConfig.getRanges();
            if (ranges.isEmpty()) {
                ranges = new ArrayList<>(1);
                ranges.add(new Range());
            }

            // get the metadata information for these ranges
            Map<String, Map<KeyExtent, List<Range>>> binnedRanges = new HashMap<>();
            TabletLocator tl;
            try {
                if (tableConfig.isOfflineScan()) {
                    binnedRanges = binOfflineTable(context, tableId, ranges);
                    while (binnedRanges == null) {
                        // Some tablets were still online, try again
                        // sleep randomly between 100 and 200 ms
                        sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
                        binnedRanges = binOfflineTable(context, tableId, ranges);

                    }
                } else {
                    tl = InputConfigurator.getTabletLocator(CLASS, context.getConfiguration(), tableId);
                    // its possible that the cache could contain complete, but old information about a
                    // tables tablets... so clear it
                    tl.invalidateCache();

                    while (!tl.binRanges(clientContext, ranges, binnedRanges).isEmpty()) {
                        String tableIdStr = tableId.canonicalID();
                        if (!Tables.exists(clientContext, tableId))
                            throw new TableDeletedException(tableIdStr);
                        if (Tables.getTableState(clientContext, tableId) == TableState.OFFLINE)
                            throw new TableOfflineException(Tables.getTableOfflineMsg(clientContext, tableId));
                        binnedRanges.clear();
                        log.warn("Unable to locate bins for specified ranges. Retrying.");
                        // sleep randomly between 100 and 200 ms
                        sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
                        tl.invalidateCache();
                    }
                }
            } catch (Exception e) {
                throw new IOException(e);
            }

            // all of this code will add either range per each locations or split ranges and add
            // range-location split
            // Map from Range to Array of Locations, we only use this if we're don't split
            HashMap<Range, ArrayList<String>> splitsToAdd = null;

            if (!autoAdjust)
                splitsToAdd = new HashMap<>();

            HashMap<String, String> hostNameCache = new HashMap<>();
            for (Map.Entry<String, Map<KeyExtent, List<Range>>> tserverBin : binnedRanges.entrySet()) {
                String ip = tserverBin.getKey().split(":", 2)[0];
                String location = hostNameCache.get(ip);
                if (location == null) {
                    InetAddress inetAddress = InetAddress.getByName(ip);
                    location = inetAddress.getCanonicalHostName();
                    hostNameCache.put(ip, location);
                }
                for (Map.Entry<KeyExtent, List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
                    Range ke = extentRanges.getKey().toDataRange();
                    if (batchScan) {
                        // group ranges by tablet to be read by a BatchScanner
                        ArrayList<Range> clippedRanges = new ArrayList<>();
                        for (Range r : extentRanges.getValue())
                            clippedRanges.add(ke.clip(r));
                        BatchInputSplit split = new BatchInputSplit(tableName, tableId, clippedRanges,
                                new String[] { location });
                        SplitUtils.updateSplit(split, tableConfig);

                        splits.add(split);
                    } else {
                        // not grouping by tablet
                        for (Range r : extentRanges.getValue()) {
                            if (autoAdjust) {
                                // divide ranges into smaller ranges, based on the tablets
                                RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonicalID(),
                                        ke.clip(r), new String[] { location });
                                SplitUtils.updateSplit(split, tableConfig);
                                split.setOffline(tableConfig.isOfflineScan());
                                split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
                                split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
                                splits.add(split);
                            } else {
                                // don't divide ranges
                                ArrayList<String> locations = splitsToAdd.get(r);
                                if (locations == null)
                                    locations = new ArrayList<>(1);
                                locations.add(location);
                                splitsToAdd.put(r, locations);
                            }
                        }
                    }
                }
            }

            if (!autoAdjust)
                for (Map.Entry<Range, ArrayList<String>> entry : splitsToAdd.entrySet()) {
                    RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonicalID(),
                            entry.getKey(), entry.getValue().toArray(new String[0]));
                    SplitUtils.updateSplit(split, tableConfig);
                    split.setOffline(tableConfig.isOfflineScan());
                    split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
                    split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());

                    splits.add(split);
                }
        }
    }
    return splits;
}

From source file:org.apache.accumulo.hadoopImpl.mapreduce.AccumuloOutputFormatImpl.java

License:Apache License

/**
 * Get connection information from this job
 *
 * @param context/*from   ww w  .ja v a 2 s  .c  om*/
 *          Hadoop job context
 * @return {@link ClientInfo}
 *
 * @since 2.0.0
 */
protected static ClientInfo getClientInfo(JobContext context) {
    return OutputConfigurator.getClientInfo(CLASS, context.getConfiguration());
}

From source file:org.apache.accumulo.hadoopImpl.mapreduce.AccumuloRecordReader.java

License:Apache License

/**
 * Check whether a configuration is fully configured to be used with an Accumulo
 * {@link org.apache.hadoop.mapreduce.InputFormat}.
 *//*  w  ww .j av a  2s  .  c  o  m*/
private static void validateOptions(JobContext context, Class<?> callingClass) throws IOException {
    InputConfigurator.checkJobStored(callingClass, context.getConfiguration());
    try (AccumuloClient client = InputConfigurator.createClient(callingClass, context.getConfiguration())) {
        InputConfigurator.validatePermissions(callingClass, context.getConfiguration(), client);
    }
}

From source file:org.apache.accumulo.hadoopImpl.mapreduce.AccumuloRecordReader.java

License:Apache License

public static List<InputSplit> getSplits(JobContext context, Class<?> callingClass) throws IOException {
    validateOptions(context, callingClass);
    Random random = new SecureRandom();
    LinkedList<InputSplit> splits = new LinkedList<>();
    try (AccumuloClient client = createClient(context, callingClass)) {
        Map<String, InputTableConfig> tableConfigs = InputConfigurator.getInputTableConfigs(callingClass,
                context.getConfiguration());
        for (Map.Entry<String, InputTableConfig> tableConfigEntry : tableConfigs.entrySet()) {

            String tableName = tableConfigEntry.getKey();
            InputTableConfig tableConfig = tableConfigEntry.getValue();

            ClientContext clientContext = (ClientContext) client;
            TableId tableId;// w  w w . j  a v a 2 s .  c  om
            // resolve table name to id once, and use id from this point forward
            try {
                tableId = Tables.getTableId(clientContext, tableName);
            } catch (TableNotFoundException e) {
                throw new IOException(e);
            }

            boolean batchScan = InputConfigurator.isBatchScan(callingClass, context.getConfiguration());
            boolean supportBatchScan = !(tableConfig.isOfflineScan() || tableConfig.shouldUseIsolatedScanners()
                    || tableConfig.shouldUseLocalIterators());
            if (batchScan && !supportBatchScan)
                throw new IllegalArgumentException("BatchScanner optimization not available for offline"
                        + " scan, isolated, or local iterators");

            boolean autoAdjust = tableConfig.shouldAutoAdjustRanges();
            if (batchScan && !autoAdjust)
                throw new IllegalArgumentException(
                        "AutoAdjustRanges must be enabled when using BatchScanner optimization");

            List<Range> ranges = autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges())
                    : tableConfig.getRanges();
            if (ranges.isEmpty()) {
                ranges = new ArrayList<>(1);
                ranges.add(new Range());
            }

            // get the metadata information for these ranges
            Map<String, Map<KeyExtent, List<Range>>> binnedRanges = new HashMap<>();
            TabletLocator tl;
            try {
                if (tableConfig.isOfflineScan()) {
                    binnedRanges = binOfflineTable(context, tableId, ranges, callingClass);
                    while (binnedRanges == null) {
                        // Some tablets were still online, try again
                        // sleep randomly between 100 and 200 ms
                        sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
                        binnedRanges = binOfflineTable(context, tableId, ranges, callingClass);

                    }
                } else {
                    tl = InputConfigurator.getTabletLocator(callingClass, context.getConfiguration(), tableId);
                    // its possible that the cache could contain complete, but old information about a
                    // tables tablets... so clear it
                    tl.invalidateCache();

                    while (!tl.binRanges(clientContext, ranges, binnedRanges).isEmpty()) {
                        String tableIdStr = tableId.canonical();
                        if (!Tables.exists(clientContext, tableId))
                            throw new TableDeletedException(tableIdStr);
                        if (Tables.getTableState(clientContext, tableId) == TableState.OFFLINE)
                            throw new TableOfflineException(Tables.getTableOfflineMsg(clientContext, tableId));
                        binnedRanges.clear();
                        log.warn("Unable to locate bins for specified ranges. Retrying.");
                        // sleep randomly between 100 and 200 ms
                        sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
                        tl.invalidateCache();
                    }
                }
            } catch (TableOfflineException | TableNotFoundException | AccumuloException
                    | AccumuloSecurityException e) {
                throw new IOException(e);
            }

            // all of this code will add either range per each locations or split ranges and add
            // range-location split
            // Map from Range to Array of Locations, we only use this if we're don't split
            HashMap<Range, ArrayList<String>> splitsToAdd = null;

            if (!autoAdjust)
                splitsToAdd = new HashMap<>();

            HashMap<String, String> hostNameCache = new HashMap<>();
            for (Map.Entry<String, Map<KeyExtent, List<Range>>> tserverBin : binnedRanges.entrySet()) {
                String ip = tserverBin.getKey().split(":", 2)[0];
                String location = hostNameCache.get(ip);
                if (location == null) {
                    InetAddress inetAddress = InetAddress.getByName(ip);
                    location = inetAddress.getCanonicalHostName();
                    hostNameCache.put(ip, location);
                }
                for (Map.Entry<KeyExtent, List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
                    Range ke = extentRanges.getKey().toDataRange();
                    if (batchScan) {
                        // group ranges by tablet to be read by a BatchScanner
                        ArrayList<Range> clippedRanges = new ArrayList<>();
                        for (Range r : extentRanges.getValue())
                            clippedRanges.add(ke.clip(r));
                        BatchInputSplit split = new BatchInputSplit(tableName, tableId, clippedRanges,
                                new String[] { location });
                        SplitUtils.updateSplit(split, tableConfig);

                        splits.add(split);
                    } else {
                        // not grouping by tablet
                        for (Range r : extentRanges.getValue()) {
                            if (autoAdjust) {
                                // divide ranges into smaller ranges, based on the tablets
                                RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonical(),
                                        ke.clip(r), new String[] { location });
                                SplitUtils.updateSplit(split, tableConfig);
                                split.setOffline(tableConfig.isOfflineScan());
                                split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
                                split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
                                splits.add(split);
                            } else {
                                // don't divide ranges
                                ArrayList<String> locations = splitsToAdd.get(r);
                                if (locations == null)
                                    locations = new ArrayList<>(1);
                                locations.add(location);
                                splitsToAdd.put(r, locations);
                            }
                        }
                    }
                }
            }

            if (!autoAdjust)
                for (Map.Entry<Range, ArrayList<String>> entry : splitsToAdd.entrySet()) {
                    RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonical(), entry.getKey(),
                            entry.getValue().toArray(new String[0]));
                    SplitUtils.updateSplit(split, tableConfig);
                    split.setOffline(tableConfig.isOfflineScan());
                    split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
                    split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());

                    splits.add(split);
                }
        }
    }
    return splits;
}