List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration
public Configuration getConfiguration();
From source file:com.mongodb.hadoop.BSONFileInputFormat.java
License:Apache License
@Override public List<FileSplit> getSplits(final JobContext context) throws IOException { Configuration config = context.getConfiguration(); PathFilter pf = getInputPathFilter(context); ArrayList<FileSplit> splits = new ArrayList<FileSplit>(); List<FileStatus> inputFiles = listStatus(context); for (FileStatus file : inputFiles) { if (pf != null && !pf.accept(file.getPath())) { if (LOG.isDebugEnabled()) { LOG.debug(String.format("skipping file %s not matched path filter.", file.getPath())); }/*from w w w.ja v a 2s . c o m*/ continue; } else { if (LOG.isDebugEnabled()) { LOG.debug("processing file " + file.getPath()); } } BSONSplitter splitter = new BSONSplitter(); splitter.setConf(config); splitter.setInputPath(file.getPath()); Path splitFilePath = new Path(file.getPath().getParent(), "." + file.getPath().getName() + ".splits"); try { splitter.loadSplitsFromSplitFile(file, splitFilePath); } catch (BSONSplitter.NoSplitFileException nsfe) { if (LOG.isDebugEnabled()) { LOG.debug(String.format("No split file for %s; building split file", file.getPath())); } splitter.readSplitsForFile(file); } if (LOG.isDebugEnabled()) { LOG.debug(String.format("BSONSplitter found %d splits.", splitter.getAllSplits().size())); } splits.addAll(splitter.getAllSplits()); } if (LOG.isDebugEnabled()) { LOG.debug(String.format("Total of %d found.", splits.size())); } return splits; }
From source file:com.mongodb.hadoop.input.DelegatingInputFormat.java
License:Apache License
@SuppressWarnings("unchecked") public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException { Configuration conf = job.getConfiguration(); Job jobCopy = new Job(conf); List<InputSplit> splits = new ArrayList<InputSplit>(); Map<Path, InputFormat> formatMap = MongoMultipleInputs.getInputFormatMap(job); Map<Path, Class<? extends Mapper>> mapperMap = MongoMultipleInputs.getMapperTypeMap(job); // Map<Class<? extends InputFormat>, List<Path>> formatPaths = new HashMap<Class<? extends InputFormat>, List<Path>>(); for (Entry<Path, InputFormat> entry : formatMap.entrySet()) { InputFormat formatClass = (InputFormat) ReflectionUtils.newInstance(entry.getValue().getClass(), conf); Class<? extends Mapper> mapperClass; mapperClass = mapperMap.get(entry.getKey()); try {/*from ww w .j a v a2s.co m*/ List<InputSplit> pathSplits = ((MongoInputFormat) formatClass).getSplits(jobCopy, entry.getKey()); for (InputSplit pathSplit : pathSplits) { splits.add(TaggedInputSplitGenerator.getTaggedInputSplit(pathSplit, conf, formatClass.getClass(), mapperClass)); } } catch (ClassCastException e) { List<InputSplit> pathSplits = formatClass.getSplits(jobCopy); for (InputSplit pathSplit : pathSplits) { splits.add(TaggedInputSplitGenerator.getTaggedInputSplit(pathSplit, conf, formatClass.getClass(), mapperClass)); } } } return splits; }
From source file:com.mongodb.hadoop.input.MongoMultipleInputs.java
License:Apache License
/** * Retrieves a map of {@link Path}s to the {@link InputFormat} class * that should be used for them./*from w w w.j a v a2 s . c om*/ * * @param job The {@link JobContext} * @see #addInputPath(JobConf, Path, Class) * @return A map of paths to inputformats for the job */ @SuppressWarnings("unchecked") static Map<Path, InputFormat> getInputFormatMap(JobContext job) { Map<Path, InputFormat> m = new HashMap<Path, InputFormat>(); Configuration conf = job.getConfiguration(); List<MongoRequest> mongoRequests = MongoConfigUtil.getMongoRequests(conf); for (MongoRequest mongoRequest : mongoRequests) { InputFormat inputFormat; try { inputFormat = (InputFormat) ReflectionUtils .newInstance(conf.getClassByName(mongoRequest.getInputFormat()), conf); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } m.put(new Path(mongoRequest.getInputURI().toString()), inputFormat); } return m; }
From source file:com.mongodb.hadoop.input.MongoMultipleInputs.java
License:Apache License
/** * Retrieves a map of {@link Path}s to the {@link Mapper} class that * should be used for them.//from ww w . ja v a 2s. co m * * @param job The {@link JobContext} * @see #addInputPath(JobConf, Path, Class, Class) * @return A map of paths to mappers for the job */ @SuppressWarnings("unchecked") static Map<Path, Class<? extends Mapper>> getMapperTypeMap(JobContext job) { Configuration conf = job.getConfiguration(); List<MongoRequest> mongoRequests = MongoConfigUtil.getMongoRequests(conf); if (mongoRequests == null) { return Collections.emptyMap(); } Map<Path, Class<? extends Mapper>> m = new HashMap<Path, Class<? extends Mapper>>(); for (MongoRequest mongoRequest : mongoRequests) { Class<? extends Mapper> mapClass; try { mapClass = (Class<? extends Mapper>) conf.getClassByName(mongoRequest.getMapper()); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } m.put(new Path(mongoRequest.getInputURI().toString()), mapClass); } return m; }
From source file:com.mongodb.hadoop.MongoInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(final JobContext context) throws IOException { final Configuration conf = context.getConfiguration(); try {/*ww w.j a va 2 s. c o m*/ MongoSplitter splitterImpl = MongoSplitterFactory.getSplitter(conf); LOG.debug("Using " + splitterImpl.toString() + " to calculate splits."); return splitterImpl.calculateSplits(); } catch (SplitFailedException spfe) { throw new IOException(spfe); } }
From source file:com.mongodb.hadoop.MongoOutputFormat.java
License:Apache License
public void checkOutputSpecs(final JobContext context) throws IOException { if (MongoConfigUtil.getOutputURIs(context.getConfiguration()).isEmpty()) { throw new IOException("No output URI is specified. You must set mongo.output.uri."); }/* w ww . j av a 2s. co m*/ }
From source file:com.moz.fiji.mapreduce.framework.HBaseFijiTableInputFormat.java
License:Apache License
/** {@inheritDoc} */ @Override/*from w ww . ja v a2 s.com*/ public List<InputSplit> getSplits(JobContext context) throws IOException { final Configuration conf = context.getConfiguration(); final FijiURI inputTableURI = FijiURI.newBuilder(conf.get(FijiConfKeys.FIJI_INPUT_TABLE_URI)).build(); final Fiji fiji = Fiji.Factory.open(inputTableURI, conf); try { final FijiTable table = fiji.openTable(inputTableURI.getTable()); try { final byte[] htableName = getHBaseTableName(table); final List<InputSplit> splits = Lists.newArrayList(); byte[] scanStartKey = HConstants.EMPTY_START_ROW; if (null != conf.get(FijiConfKeys.FIJI_START_ROW_KEY)) { scanStartKey = Base64.decodeBase64(conf.get(FijiConfKeys.FIJI_START_ROW_KEY)); } byte[] scanLimitKey = HConstants.EMPTY_END_ROW; if (null != conf.get(FijiConfKeys.FIJI_LIMIT_ROW_KEY)) { scanLimitKey = Base64.decodeBase64(conf.get(FijiConfKeys.FIJI_LIMIT_ROW_KEY)); } for (FijiRegion region : table.getRegions()) { final byte[] regionStartKey = region.getStartKey(); final byte[] regionEndKey = region.getEndKey(); // Determine if the scan start and limit key fall into the region. // Logic was copied from o.a.h.h.m.TableInputFormatBase if ((scanStartKey.length == 0 || regionEndKey.length == 0 || Bytes.compareTo(scanStartKey, regionEndKey) < 0) && (scanLimitKey.length == 0 || Bytes.compareTo(scanLimitKey, regionStartKey) > 0)) { byte[] splitStartKey = (scanStartKey.length == 0 || Bytes.compareTo(regionStartKey, scanStartKey) >= 0) ? regionStartKey : scanStartKey; byte[] splitEndKey = ((scanLimitKey.length == 0 || Bytes.compareTo(regionEndKey, scanLimitKey) <= 0) && regionEndKey.length > 0) ? regionEndKey : scanLimitKey; // TODO(FIJIMR-65): For now pick the first available location (ie. region server), // if any. final String location = region.getLocations().isEmpty() ? null : region.getLocations().iterator().next(); final TableSplit tableSplit = new TableSplit(htableName, splitStartKey, splitEndKey, location); splits.add(new FijiTableSplit(tableSplit)); } } return splits; } finally { ResourceUtils.releaseOrLog(table); } } finally { ResourceUtils.releaseOrLog(fiji); } }
From source file:com.moz.fiji.schema.mapreduce.FijiTableInputFormat.java
License:Apache License
/** {@inheritDoc} */ @Override// w w w . java2 s. c o m public List<InputSplit> getSplits(JobContext context) throws IOException { final Configuration conf = context.getConfiguration(); final FijiURI inputTableURI = getInputTableURI(conf); final Fiji fiji = Fiji.Factory.open(inputTableURI, conf); final FijiTable table = fiji.openTable(inputTableURI.getTable()); final HTableInterface htable = HBaseFijiTable.downcast(table).openHTableConnection(); try { final List<InputSplit> splits = Lists.newArrayList(); for (FijiRegion region : table.getRegions()) { final byte[] startKey = region.getStartKey(); // TODO: a smart way to get which location is most relevant. final String location = region.getLocations().isEmpty() ? null : region.getLocations().iterator().next(); final TableSplit tableSplit = new TableSplit(htable.getTableName(), startKey, region.getEndKey(), location); splits.add(new FijiTableSplit(tableSplit, startKey)); } return splits; } finally { htable.close(); } }
From source file:com.netflix.aegisthus.input.AegisthusInputFormat.java
License:Apache License
@SuppressWarnings("rawtypes") private Map<String, AbstractType> initConvertors(JobContext job) throws IOException { Map<String, AbstractType> convertors = Maps.newHashMap(); String conversion = job.getConfiguration().get(KEY_TYPE); LOG.info(KEY_TYPE + ": " + conversion); if (conversion != null) { try {//from ww w .j a v a 2 s .c o m convertors.put(SSTableScanner.KEY, TypeParser.parse(conversion)); } catch (ConfigurationException e) { throw new IOException(e); } catch (SyntaxException e) { throw new IOException(e); } } conversion = job.getConfiguration().get(COLUMN_TYPE); LOG.info(COLUMN_TYPE + ": " + conversion); if (conversion != null) { try { convertors.put(SSTableScanner.COLUMN_NAME_KEY, TypeParser.parse(conversion)); } catch (ConfigurationException e) { throw new IOException(e); } catch (SyntaxException e) { throw new IOException(e); } } if (convertors.size() == 0) { return null; } return convertors; }
From source file:com.netflix.aegisthus.input.AegisthusInputFormat.java
License:Apache License
/** * The main thing that the addSSTableSplit handles is to split SSTables * using their index if available. The general algorithm is that if the file * is large than the blocksize plus some fuzzy factor to *//*from w ww . jav a 2 s .c o m*/ public void addSSTableSplit(List<InputSplit> splits, JobContext job, FileStatus file) throws IOException { Path path = file.getPath(); FileSystem fs = path.getFileSystem(job.getConfiguration()); long length = file.getLen(); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); if (length != 0) { long blockSize = file.getBlockSize(); long maxSplitSize = (long) (blockSize * .99); long fuzzySplit = (long) (blockSize * 1.2); long bytesRemaining = length; Iterator<Long> scanner = null; Path compressionPath = new Path(path.getParent(), path.getName().replaceAll("-Data.db", "-CompressionInfo.db")); if (!fs.exists(compressionPath)) { // Only initialize if we are going to have more than a single // split if (fuzzySplit < length) { Path indexPath = new Path(path.getParent(), path.getName().replaceAll("-Data.db", "-Index.db")); if (!fs.exists(indexPath)) { fuzzySplit = length; } else { FSDataInputStream fileIn = fs.open(indexPath); scanner = new OffsetScanner(new DataInputStream(new BufferedInputStream(fileIn)), indexPath.getName()); } } long splitStart = 0; while (splitStart + fuzzySplit < length && scanner.hasNext()) { long splitSize = 0; // The scanner returns an offset from the start of the file. while (splitSize < maxSplitSize && scanner.hasNext()) { splitSize = scanner.next() - splitStart; } int blkIndex = getBlockIndex(blkLocations, splitStart + (splitSize / 2)); LOG.info("split path: " + path.getName() + ":" + splitStart + ":" + splitSize); splits.add(new AegSplit(path, splitStart, splitSize, blkLocations[blkIndex].getHosts(), convertors)); bytesRemaining -= splitSize; splitStart += splitSize; } } if (bytesRemaining != 0) { LOG.info("end path: " + path.getName() + ":" + (length - bytesRemaining) + ":" + bytesRemaining); splits.add(new AegSplit(path, length - bytesRemaining, bytesRemaining, blkLocations[blkLocations.length - 1].getHosts(), convertors, fs.exists(compressionPath), compressionPath)); } } else { LOG.info("skipping zero length file: " + path.toString()); } }