List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration
public Configuration getConfiguration();
From source file:com.ery.hadoop.mrddx.hbase.HbaseInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException { String startRow = this.dbConf.getInputHBaseQueryStartRow(); String stopRow = this.dbConf.getInputHBaseQueryStopRow(); String tableName = this.dbConf.getInputTableName(); List<InputSplit> splits = new ArrayList<InputSplit>(); List<HRegionLocation> lstHRegionLocation = getTableHRegionInfo(job.getConfiguration(), tableName, startRow, stopRow);/*w w w .j av a 2 s. c om*/ for (int i = 0; i < lstHRegionLocation.size(); i++) { HRegionLocation hRegionLocation = lstHRegionLocation.get(i); String tempStart = null; String tempEnd = null; HRegionInfo hRegionInfo = hRegionLocation.getRegionInfo(); // NO.1? if (null == startRow && null == stopRow) { tempStart = new String(hRegionInfo.getStartKey()); tempEnd = new String(hRegionInfo.getEndKey()); HbaseInputSplit split = new HbaseInputSplit(tempStart, tempEnd); splits.add(split); continue; } // NO.2? byte[] startKeyByte = hRegionInfo.getStartKey(); byte[] endKeyByte = hRegionInfo.getEndKey(); if (null != startRow && hRegionInfo.containsRow(startRow.getBytes())) { tempStart = startRow; } if (null != stopRow && hRegionInfo.containsRow(stopRow.getBytes())) { tempEnd = stopRow; } tempStart = tempStart != null ? tempStart : new String(startKeyByte); tempEnd = tempEnd != null ? tempEnd : new String(endKeyByte); HbaseInputSplit split = new HbaseInputSplit(tempStart, tempEnd); splits.add(split); } MRLog.info(LOG, "Finished hbase split!"); return splits; }
From source file:com.facebook.hiveio.input.HiveApiInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext jobContext) throws IOException, InterruptedException { Configuration conf = jobContext.getConfiguration(); HiveInputDescription inputDesc = readProfileInputDesc(conf); ThriftHiveMetastore.Iface client;//from ww w . ja v a 2 s . c o m try { client = inputDesc.metastoreClient(conf); } catch (TException e) { throw new IOException(e); } return getSplits(conf, inputDesc, client); }
From source file:com.facebook.hiveio.output.HiveApiOutputCommitter.java
License:Apache License
@Override public void commitJob(JobContext jobContext) throws IOException { baseCommitter.commitJob(jobContext); Configuration conf = jobContext.getConfiguration(); OutputConf outputConf = new OutputConf(conf, profileId); HiveOutputDescription outputDesc = outputConf.readOutputDescription(); OutputInfo outputInfo = outputConf.readOutputTableInfo(); if (outputInfo.hasPartitionInfo()) { registerPartitions(conf, outputDesc, outputInfo); } else {//from w w w . j a v a 2s. com noPartitionsCopyData(conf, outputInfo); } writeSuccessFile(conf); }
From source file:com.facebook.hiveio.output.HiveApiOutputCommitter.java
License:Apache License
@Override public void abortJob(JobContext jobContext, JobStatus.State state) throws IOException { baseCommitter.abortJob(jobContext, state); HadoopUtils.deleteOutputDir(jobContext.getConfiguration()); }
From source file:com.facebook.hiveio.output.HiveApiOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(JobContext jobContext) throws IOException, InterruptedException { Configuration conf = jobContext.getConfiguration(); OutputConf outputConf = new OutputConf(conf, myProfileId); HiveOutputDescription description = outputConf.readOutputDescription(); OutputInfo oti = outputConf.readOutputTableInfo(); LOG.info("Check output specs of " + description); if (description == null) { LOG.error("HiveOutputDescription is null in Configuration, nothing to check"); return;//from w w w. j ava2 s .c o m } checkTableExists(conf, description); if (oti == null) { LOG.error("OutputInfo is null in Configuration, nothing to check"); return; } checkPartitionInfo(conf, description, oti, outputConf); }
From source file:com.flipkart.fdp.migration.distcp.core.MirrorFileInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { System.out.println("Calculating Job Splits..."); conf = context.getConfiguration(); dcmConfig = MirrorUtils.getConfigFromConf(conf); Set<String> excludeList = getExclusionsFileList(conf); Set<String> includeList = getInclusionFileList(conf); HashMap<String, FileTuple> inputFileMap = new HashMap<String, FileTuple>(); Map<String, TransferStatus> previousState = null; List<FileTuple> fileTuples = null; List<InputSplit> splits = new ArrayList<InputSplit>(); //TODO Bug - hashcode and equals not overridden in OptimTuple. If not HashSet, then fine. Set<OptimTuple> locations = new HashSet<OptimTuple>(); long totalBatchSize = 0; try {/* w w w . j a v a2 s . c om*/ System.out.println("Scanning source location..."); dcmInCodec = DCMCodecFactory.getCodec(conf, dcmConfig.getSourceConfig().getDefaultConnectionConfig()); //If includeList is used, then path is not considered, in either way excludeList been considered. if (includeList != null && includeList.size() > 0) { fileTuples = dcmInCodec.getInputPaths(includeList, excludeList); } else { fileTuples = dcmInCodec.getInputPaths(dcmConfig.getSourceConfig().getPath(), excludeList); } stateManager = StateManagerFactory.getStateManager(conf, dcmConfig); System.out.println("Fetching previous transfer states from StateManager..."); previousState = stateManager.getPreviousTransferStatus(); System.out.println("Filtering Input File Set based on User defined filters."); for (FileTuple fileTuple : fileTuples) { if (!ignoreFile(fileTuple, excludeList, previousState)) { locations.add(new OptimTuple(fileTuple.getFileName(), fileTuple.getSize())); inputFileMap.put(fileTuple.getFileName(), fileTuple); } } System.out.println("Optimizing Splits..."); WorkloadOptimizer optimizer = DCMCodecFactory .getCodecWorkloadOptimizer(dcmConfig.getSinkConfig().getDefaultConnectionConfig()); splits.addAll(optimizer.optimizeWorkload(dcmConfig, locations, inputFileMap)); sortSplits(splits); System.out.println( "Total input paths to process: " + locations.size() + ", Total input splits: " + splits.size()); System.out.println("Total Data to Transfer: " + totalBatchSize); stateManager.savePreviousTransferStatus(previousState); } catch (Exception e) { throw new IOException(e); } System.out.println("Done Calculating splits..."); return splits; }
From source file:com.fullcontact.sstable.hadoop.mapreduce.SSTableInputFormat.java
License:Apache License
@Override protected List<FileStatus> listStatus(final JobContext job) throws IOException { final List<FileStatus> files = Lists.newArrayList(); for (FileStatus file : super.listStatus(job)) { files.addAll(handleFile(file, job)); }/*ww w. j ava2 s .c om*/ LOG.debug("Initial file list: {} {}", files.size(), files); final Configuration configuration = job.getConfiguration(); for (Iterator<FileStatus> iterator = files.iterator(); iterator.hasNext();) { final FileStatus fileStatus = iterator.next(); final Path file = fileStatus.getPath(); final FileSystem fs = file.getFileSystem(configuration); if (!SSTablePredicates.IS_SSTABLE.apply(file.toString())) { // Ignore non-sstable date files, always (for now) LOG.debug("Removing non-sstable file: {}", file); iterator.remove(); } else { // read the index file LOG.debug("Reading index file for sstable file: {}", file); final Path indexFile = SSTableFunctions.INDEX_FILE.apply(file); LOG.debug("Reading index file: {}", indexFile); final SSTableIndexIndex index = SSTableIndexIndex.readIndex(fs, indexFile); indexes.put(file, index); } } LOG.debug("Final file list: {} {}", files.size(), files); return files; }
From source file:com.fullcontact.sstable.hadoop.mapreduce.SSTableInputFormat.java
License:Apache License
/** * If we have a directory recursively gather the files we care about for this job. * * @param file Root file/directory.//from w w w. j a v a 2 s . com * @param job Job context. * @return All files we care about. * @throws IOException */ private Collection<FileStatus> handleFile(final FileStatus file, final JobContext job) throws IOException { final List<FileStatus> results = Lists.newArrayList(); if (file.isDir()) { final Path p = file.getPath(); LOG.debug("Expanding {}", p); final FileSystem fs = p.getFileSystem(job.getConfiguration()); final FileStatus[] children = fs.listStatus(p); for (FileStatus child : children) { results.addAll(handleFile(child, job)); } } else { results.add(file); } return results; }
From source file:com.fullcontact.sstable.hadoop.mapreduce.SSTableInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(final JobContext job) throws IOException { final Configuration configuration = job.getConfiguration(); final List<InputSplit> result = Lists.newArrayList(); final List<FileStatus> files = listStatus(job); LOG.debug("Initial file list: {} {}", files.size(), files); for (final FileStatus fileStatus : files) { final Path dataFile = fileStatus.getPath(); final FileSystem fileSystem = dataFile.getFileSystem(configuration); final BlockLocation[] blockLocations = fileSystem.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());//from w ww . j a v a 2 s . co m // Data file, try to split if the .index file was found final SSTableIndexIndex index = indexes.get(dataFile); if (index == null) { throw new IOException("Index not found for " + dataFile); } for (final SSTableIndexIndex.Chunk chunk : index.getOffsets()) { // This isn't likely to work well because we are dealing with the index into uncompressed data... final int blockIndex = getBlockIndex(blockLocations, chunk.getStart() / COMPRESSION_RATIO_ASSUMPTION); final SSTableSplit split = new SSTableSplit(dataFile, chunk.getStart(), chunk.getEnd(), chunk.getEnd() - chunk.getStart(), blockLocations[blockIndex].getHosts()); result.add(split); } } LOG.debug("Splits calculated: {} {}", result.size(), result); return result; }
From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.GFInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { this.conf = job.getConfiguration(); Collection<FileStatus> hoplogs = getHoplogs(); return createSplits(hoplogs); }