List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration
public Configuration getConfiguration();
From source file:org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { List<InputSplit> superSplits = super.getSplits(job); List<InputSplit> splits = new ArrayList<InputSplit>(); int numGroups = WikipediaConfiguration.getNumGroups(job.getConfiguration()); for (int group = 0; group < numGroups; group++) { for (InputSplit split : superSplits) { FileSplit fileSplit = (FileSplit) split; splits.add(new WikipediaInputSplit(fileSplit, group)); }//w w w .j av a2s . c o m } return splits; }
From source file:org.apache.accumulo.hadoop.mapreduce.AccumuloOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(JobContext job) throws IOException { OutputConfigurator.checkJobStored(CLASS, job.getConfiguration()); Properties clientProps = OutputConfigurator.getClientProperties(CLASS, job.getConfiguration()); AuthenticationToken token = ClientProperty.getAuthenticationToken(clientProps); try (AccumuloClient c = Accumulo.newClient().from(clientProps).build()) { if (!c.securityOperations().authenticateUser(c.whoami(), token)) throw new IOException("Unable to authenticate user"); } catch (AccumuloException | AccumuloSecurityException e) { throw new IOException(e); }//from w w w. ja v a 2 s .c o m }
From source file:org.apache.accumulo.hadoopImpl.mapreduce.AbstractInputFormat.java
License:Apache License
/** * Returns the name of the current classloader context set on this scanner * * @param job/*from w w w . j a v a 2 s. com*/ * the Hadoop job instance to be configured * @return name of the current context * @since 1.8.0 */ protected static String getClassLoaderContext(JobContext job) { return InputConfigurator.getClassLoaderContext(CLASS, job.getConfiguration()); }
From source file:org.apache.accumulo.hadoopImpl.mapreduce.AbstractInputFormat.java
License:Apache License
/** * Gets the {@link ClientInfo} from the configuration * * @param context/*from w w w .ja v a 2 s .c o m*/ * Hadoop job context * @return ClientInfo * @since 2.0.0 */ public static ClientInfo getClientInfo(JobContext context) { return InputConfigurator.getClientInfo(CLASS, context.getConfiguration()); }
From source file:org.apache.accumulo.hadoopImpl.mapreduce.AbstractInputFormat.java
License:Apache License
/** * Fetches all {@link InputTableConfig}s that have been set on the given job. * * @param context// w w w .j a v a 2 s . co m * the Hadoop job instance to be configured * @return the {@link InputTableConfig} objects for the job * @since 1.6.0 */ public static Map<String, InputTableConfig> getInputTableConfigs(JobContext context) { return InputConfigurator.getInputTableConfigs(CLASS, context.getConfiguration()); }
From source file:org.apache.accumulo.hadoopImpl.mapreduce.AbstractInputFormat.java
License:Apache License
/** * Check whether a configuration is fully configured to be used with an Accumulo * {@link org.apache.hadoop.mapreduce.InputFormat}. * * @param context// ww w. j a v a 2 s.c o m * the Hadoop context for the configured job * @throws java.io.IOException * if the context is improperly configured * @since 1.5.0 */ public static void validateOptions(JobContext context) throws IOException { try (AccumuloClient client = InputConfigurator.createClient(CLASS, context.getConfiguration())) { InputConfigurator.validatePermissions(CLASS, context.getConfiguration(), client); } }
From source file:org.apache.accumulo.hadoopImpl.mapreduce.AbstractInputFormat.java
License:Apache License
public static List<InputSplit> getSplits(JobContext context) throws IOException { validateOptions(context);/*from ww w . ja v a 2s. c o m*/ Random random = new SecureRandom(); LinkedList<InputSplit> splits = new LinkedList<>(); try (AccumuloClient client = createClient(context)) { Map<String, InputTableConfig> tableConfigs = getInputTableConfigs(context); for (Map.Entry<String, InputTableConfig> tableConfigEntry : tableConfigs.entrySet()) { String tableName = tableConfigEntry.getKey(); InputTableConfig tableConfig = tableConfigEntry.getValue(); ClientContext clientContext = (ClientContext) client; Table.ID tableId; // resolve table name to id once, and use id from this point forward try { tableId = Tables.getTableId(clientContext, tableName); } catch (TableNotFoundException e) { throw new IOException(e); } boolean batchScan = InputConfigurator.isBatchScan(CLASS, context.getConfiguration()); boolean supportBatchScan = !(tableConfig.isOfflineScan() || tableConfig.shouldUseIsolatedScanners() || tableConfig.shouldUseLocalIterators()); if (batchScan && !supportBatchScan) throw new IllegalArgumentException("BatchScanner optimization not available for offline" + " scan, isolated, or local iterators"); boolean autoAdjust = tableConfig.shouldAutoAdjustRanges(); if (batchScan && !autoAdjust) throw new IllegalArgumentException( "AutoAdjustRanges must be enabled when using BatchScanner optimization"); List<Range> ranges = autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges()) : tableConfig.getRanges(); if (ranges.isEmpty()) { ranges = new ArrayList<>(1); ranges.add(new Range()); } // get the metadata information for these ranges Map<String, Map<KeyExtent, List<Range>>> binnedRanges = new HashMap<>(); TabletLocator tl; try { if (tableConfig.isOfflineScan()) { binnedRanges = binOfflineTable(context, tableId, ranges); while (binnedRanges == null) { // Some tablets were still online, try again // sleep randomly between 100 and 200 ms sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS); binnedRanges = binOfflineTable(context, tableId, ranges); } } else { tl = InputConfigurator.getTabletLocator(CLASS, context.getConfiguration(), tableId); // its possible that the cache could contain complete, but old information about a // tables tablets... so clear it tl.invalidateCache(); while (!tl.binRanges(clientContext, ranges, binnedRanges).isEmpty()) { String tableIdStr = tableId.canonicalID(); if (!Tables.exists(clientContext, tableId)) throw new TableDeletedException(tableIdStr); if (Tables.getTableState(clientContext, tableId) == TableState.OFFLINE) throw new TableOfflineException(Tables.getTableOfflineMsg(clientContext, tableId)); binnedRanges.clear(); log.warn("Unable to locate bins for specified ranges. Retrying."); // sleep randomly between 100 and 200 ms sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS); tl.invalidateCache(); } } } catch (Exception e) { throw new IOException(e); } // all of this code will add either range per each locations or split ranges and add // range-location split // Map from Range to Array of Locations, we only use this if we're don't split HashMap<Range, ArrayList<String>> splitsToAdd = null; if (!autoAdjust) splitsToAdd = new HashMap<>(); HashMap<String, String> hostNameCache = new HashMap<>(); for (Map.Entry<String, Map<KeyExtent, List<Range>>> tserverBin : binnedRanges.entrySet()) { String ip = tserverBin.getKey().split(":", 2)[0]; String location = hostNameCache.get(ip); if (location == null) { InetAddress inetAddress = InetAddress.getByName(ip); location = inetAddress.getCanonicalHostName(); hostNameCache.put(ip, location); } for (Map.Entry<KeyExtent, List<Range>> extentRanges : tserverBin.getValue().entrySet()) { Range ke = extentRanges.getKey().toDataRange(); if (batchScan) { // group ranges by tablet to be read by a BatchScanner ArrayList<Range> clippedRanges = new ArrayList<>(); for (Range r : extentRanges.getValue()) clippedRanges.add(ke.clip(r)); BatchInputSplit split = new BatchInputSplit(tableName, tableId, clippedRanges, new String[] { location }); SplitUtils.updateSplit(split, tableConfig); splits.add(split); } else { // not grouping by tablet for (Range r : extentRanges.getValue()) { if (autoAdjust) { // divide ranges into smaller ranges, based on the tablets RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonicalID(), ke.clip(r), new String[] { location }); SplitUtils.updateSplit(split, tableConfig); split.setOffline(tableConfig.isOfflineScan()); split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners()); split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators()); splits.add(split); } else { // don't divide ranges ArrayList<String> locations = splitsToAdd.get(r); if (locations == null) locations = new ArrayList<>(1); locations.add(location); splitsToAdd.put(r, locations); } } } } } if (!autoAdjust) for (Map.Entry<Range, ArrayList<String>> entry : splitsToAdd.entrySet()) { RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonicalID(), entry.getKey(), entry.getValue().toArray(new String[0])); SplitUtils.updateSplit(split, tableConfig); split.setOffline(tableConfig.isOfflineScan()); split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners()); split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators()); splits.add(split); } } } return splits; }
From source file:org.apache.accumulo.hadoopImpl.mapreduce.AccumuloOutputFormatImpl.java
License:Apache License
/** * Get connection information from this job * * @param context/*from ww w .ja v a 2 s .c om*/ * Hadoop job context * @return {@link ClientInfo} * * @since 2.0.0 */ protected static ClientInfo getClientInfo(JobContext context) { return OutputConfigurator.getClientInfo(CLASS, context.getConfiguration()); }
From source file:org.apache.accumulo.hadoopImpl.mapreduce.AccumuloRecordReader.java
License:Apache License
/** * Check whether a configuration is fully configured to be used with an Accumulo * {@link org.apache.hadoop.mapreduce.InputFormat}. *//* w ww .j av a 2s . c o m*/ private static void validateOptions(JobContext context, Class<?> callingClass) throws IOException { InputConfigurator.checkJobStored(callingClass, context.getConfiguration()); try (AccumuloClient client = InputConfigurator.createClient(callingClass, context.getConfiguration())) { InputConfigurator.validatePermissions(callingClass, context.getConfiguration(), client); } }
From source file:org.apache.accumulo.hadoopImpl.mapreduce.AccumuloRecordReader.java
License:Apache License
public static List<InputSplit> getSplits(JobContext context, Class<?> callingClass) throws IOException { validateOptions(context, callingClass); Random random = new SecureRandom(); LinkedList<InputSplit> splits = new LinkedList<>(); try (AccumuloClient client = createClient(context, callingClass)) { Map<String, InputTableConfig> tableConfigs = InputConfigurator.getInputTableConfigs(callingClass, context.getConfiguration()); for (Map.Entry<String, InputTableConfig> tableConfigEntry : tableConfigs.entrySet()) { String tableName = tableConfigEntry.getKey(); InputTableConfig tableConfig = tableConfigEntry.getValue(); ClientContext clientContext = (ClientContext) client; TableId tableId;// w w w . j a v a 2 s . c om // resolve table name to id once, and use id from this point forward try { tableId = Tables.getTableId(clientContext, tableName); } catch (TableNotFoundException e) { throw new IOException(e); } boolean batchScan = InputConfigurator.isBatchScan(callingClass, context.getConfiguration()); boolean supportBatchScan = !(tableConfig.isOfflineScan() || tableConfig.shouldUseIsolatedScanners() || tableConfig.shouldUseLocalIterators()); if (batchScan && !supportBatchScan) throw new IllegalArgumentException("BatchScanner optimization not available for offline" + " scan, isolated, or local iterators"); boolean autoAdjust = tableConfig.shouldAutoAdjustRanges(); if (batchScan && !autoAdjust) throw new IllegalArgumentException( "AutoAdjustRanges must be enabled when using BatchScanner optimization"); List<Range> ranges = autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges()) : tableConfig.getRanges(); if (ranges.isEmpty()) { ranges = new ArrayList<>(1); ranges.add(new Range()); } // get the metadata information for these ranges Map<String, Map<KeyExtent, List<Range>>> binnedRanges = new HashMap<>(); TabletLocator tl; try { if (tableConfig.isOfflineScan()) { binnedRanges = binOfflineTable(context, tableId, ranges, callingClass); while (binnedRanges == null) { // Some tablets were still online, try again // sleep randomly between 100 and 200 ms sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS); binnedRanges = binOfflineTable(context, tableId, ranges, callingClass); } } else { tl = InputConfigurator.getTabletLocator(callingClass, context.getConfiguration(), tableId); // its possible that the cache could contain complete, but old information about a // tables tablets... so clear it tl.invalidateCache(); while (!tl.binRanges(clientContext, ranges, binnedRanges).isEmpty()) { String tableIdStr = tableId.canonical(); if (!Tables.exists(clientContext, tableId)) throw new TableDeletedException(tableIdStr); if (Tables.getTableState(clientContext, tableId) == TableState.OFFLINE) throw new TableOfflineException(Tables.getTableOfflineMsg(clientContext, tableId)); binnedRanges.clear(); log.warn("Unable to locate bins for specified ranges. Retrying."); // sleep randomly between 100 and 200 ms sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS); tl.invalidateCache(); } } } catch (TableOfflineException | TableNotFoundException | AccumuloException | AccumuloSecurityException e) { throw new IOException(e); } // all of this code will add either range per each locations or split ranges and add // range-location split // Map from Range to Array of Locations, we only use this if we're don't split HashMap<Range, ArrayList<String>> splitsToAdd = null; if (!autoAdjust) splitsToAdd = new HashMap<>(); HashMap<String, String> hostNameCache = new HashMap<>(); for (Map.Entry<String, Map<KeyExtent, List<Range>>> tserverBin : binnedRanges.entrySet()) { String ip = tserverBin.getKey().split(":", 2)[0]; String location = hostNameCache.get(ip); if (location == null) { InetAddress inetAddress = InetAddress.getByName(ip); location = inetAddress.getCanonicalHostName(); hostNameCache.put(ip, location); } for (Map.Entry<KeyExtent, List<Range>> extentRanges : tserverBin.getValue().entrySet()) { Range ke = extentRanges.getKey().toDataRange(); if (batchScan) { // group ranges by tablet to be read by a BatchScanner ArrayList<Range> clippedRanges = new ArrayList<>(); for (Range r : extentRanges.getValue()) clippedRanges.add(ke.clip(r)); BatchInputSplit split = new BatchInputSplit(tableName, tableId, clippedRanges, new String[] { location }); SplitUtils.updateSplit(split, tableConfig); splits.add(split); } else { // not grouping by tablet for (Range r : extentRanges.getValue()) { if (autoAdjust) { // divide ranges into smaller ranges, based on the tablets RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonical(), ke.clip(r), new String[] { location }); SplitUtils.updateSplit(split, tableConfig); split.setOffline(tableConfig.isOfflineScan()); split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners()); split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators()); splits.add(split); } else { // don't divide ranges ArrayList<String> locations = splitsToAdd.get(r); if (locations == null) locations = new ArrayList<>(1); locations.add(location); splitsToAdd.put(r, locations); } } } } } if (!autoAdjust) for (Map.Entry<Range, ArrayList<String>> entry : splitsToAdd.entrySet()) { RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonical(), entry.getKey(), entry.getValue().toArray(new String[0])); SplitUtils.updateSplit(split, tableConfig); split.setOffline(tableConfig.isOfflineScan()); split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners()); split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators()); splits.add(split); } } } return splits; }