List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration
public Configuration getConfiguration();
From source file:edu.arizona.cs.hadoop.fs.irods.output.HirodsMultipleOutputs.java
License:Apache License
private static Class<? extends Writable> getNamedOutputValueClass(JobContext job, String namedOutput) { return job.getConfiguration().getClass(MO_PREFIX + namedOutput + VALUE, null, Writable.class); }
From source file:edu.arizona.cs.hadoop.fs.irods.output.HirodsSequenceFileAsBinaryOutputFormat.java
License:Apache License
/** * Get the key class for the {@link SequenceFile} * * @return the key class of the {@link SequenceFile} *///from ww w . j a v a 2s. c om static public Class<? extends WritableComparable> getSequenceFileOutputKeyClass(JobContext job) { return job.getConfiguration().getClass(KEY_CLASS, job.getOutputKeyClass().asSubclass(WritableComparable.class), WritableComparable.class); }
From source file:edu.arizona.cs.hadoop.fs.irods.output.HirodsSequenceFileAsBinaryOutputFormat.java
License:Apache License
/** * Get the value class for the {@link SequenceFile} * * @return the value class of the {@link SequenceFile} *//* ww w. j av a2 s.c om*/ static public Class<? extends Writable> getSequenceFileOutputValueClass(JobContext job) { return job.getConfiguration().getClass(VALUE_CLASS, job.getOutputValueClass().asSubclass(Writable.class), Writable.class); }
From source file:edu.arizona.cs.hadoop.fs.irods.output.HirodsSequenceFileOutputFormat.java
License:Apache License
/** * Get the {@link CompressionType} for the output {@link SequenceFile}. * * @param job the {@link Job}/*ww w .j ava 2 s .com*/ * @return the {@link CompressionType} for the output {@link SequenceFile}, * defaulting to {@link CompressionType#RECORD} */ public static CompressionType getOutputCompressionType(JobContext job) { String val = job.getConfiguration().get("edu.arizona.cs.hadoop.fs.irods.mapred.output.compression.type", CompressionType.RECORD.toString()); return CompressionType.valueOf(val); }
From source file:edu.berkeley.cs.amplab.adam.io.InterleavedFastqInputFormat.java
License:Apache License
@Override public boolean isSplitable(JobContext context, Path path) { CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(path); return codec == null; }
From source file:edu.indiana.d2i.htrc.io.dataapi.IDInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { int numIdsInSplit = job.getConfiguration().getInt(HTRCConstants.MAX_IDNUM_SPLIT, (int) 1e6); String hostStr = job.getConfiguration().get(HTRCConstants.HOSTS_SEPARATEDBY_COMMA, HTRCConstants.DATA_API_DEFAULT_URL); if (hostStr == null) throw new RuntimeException("Cannot find hosts of HTRC Data Storage."); String[] hosts = hostStr.split(","); IDInputSplit split = new IDInputSplit(hosts); List<InputSplit> splits = new ArrayList<InputSplit>(); Path[] dirs = getInputPaths(job); try {/*ww w . ja va 2s.co m*/ for (int i = 0; i < dirs.length; i++) { FileSystem fs = dirs[i].getFileSystem(job.getConfiguration()); DataInputStream fsinput = new DataInputStream(fs.open(dirs[i])); Iterator<Text> idlist = new IDList(fsinput).iterator(); while (idlist.hasNext()) { Text id = idlist.next(); split.addID(id.toString()); if (split.getLength() >= numIdsInSplit) { splits.add(split); split = new IDInputSplit(hosts); } } // LineReader reader = new LineReader(fsinput); // Text line = new Text(); // while (reader.readLine(line) > 0) { // split.addID(line.toString()); // if (split.getLength() >= numIdsInSplit) { // splits.add(split); // split = new IDInputSplit(hosts); // } // } // reader.close(); } if (split != null && split.getLength() != 0) splits.add(split); } catch (InterruptedException e) { logger.error(e); } logger.info("#Splits " + splits.size()); return splits; }
From source file:edu.indiana.d2i.htrc.io.index.lucene.LuceneIDFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { int numIdsInSplit = job.getConfiguration().getInt(HTRCConstants.MAX_IDNUM_SPLIT, (int) 1e6); String line = null;//from w w w . j a v a 2 s . c om IDInputSplit split = new IDInputSplit(); List<InputSplit> splits = new ArrayList<InputSplit>(); Path[] dirs = getInputPaths(job); try { for (int i = 0; i < dirs.length; i++) { FileSystem fs = dirs[i].getFileSystem(job.getConfiguration()); DataInputStream fsinput = new DataInputStream(fs.open(dirs[i])); BufferedReader reader = new BufferedReader(new InputStreamReader(fsinput)); while ((line = reader.readLine()) != null) { split.addID(line); if (split.getLength() >= numIdsInSplit) { splits.add(split); split = new IDInputSplit(); } } reader.close(); } if (split != null && split.getLength() != 0) splits.add(split); } catch (InterruptedException e) { logger.error(e); } logger.info("#Splits " + splits.size()); return splits; }
From source file:edu.indiana.d2i.htrc.io.mem.MemIDInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { int numIdsInSplit = job.getConfiguration().getInt(HTRCConstants.MAX_IDNUM_SPLIT, 8000); // String[] hosts = job.getConfiguration().getStrings(HTRCConstants.MEMCACHED_HOSTS); // if (hosts == null) // throw new IllegalArgumentException("No host is found for memcached"); // IDInputSplit split = new IDInputSplit(hosts); IDInputSplit split = new IDInputSplit(); List<InputSplit> splits = new ArrayList<InputSplit>(); Path[] dirs = getInputPaths(job); try {/*from w w w .j a va 2 s . c o m*/ for (int i = 0; i < dirs.length; i++) { FileSystem fs = dirs[i].getFileSystem(job.getConfiguration()); DataInputStream fsinput = new DataInputStream(fs.open(dirs[i])); Iterator<Text> idlist = new IDList(fsinput).iterator(); while (idlist.hasNext()) { Text id = idlist.next(); split.addID(id.toString()); if (split.getLength() >= numIdsInSplit) { splits.add(split); // split = new IDInputSplit(hosts); split = new IDInputSplit(); } } } if (split != null && split.getLength() != 0) splits.add(split); } catch (InterruptedException e) { logger.error(e); } logger.info("#Splits " + splits.size()); return splits; }
From source file:edu.iu.common.MultiFileInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { // Generate splits List<InputSplit> splits = new ArrayList<InputSplit>(); List<FileStatus> files = listStatus(job); org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration(); int numMaps = jobConf.getNumMapTasks(); LOG.info("NUMBER OF FILES: " + files.size()); LOG.info("NUMBER OF MAPS: " + numMaps); int avg = files.size() / numMaps; int rest = files.size() % numMaps; int tmp = 0;//from w w w. j a va2 s . c o m long length = 0; List<Path> pathList = null; Set<String> hostSet = null; // Random random = new Random(System.nanoTime()); for (FileStatus file : files) { if (tmp == 0) { pathList = new ArrayList<Path>(); hostSet = new HashSet<String>(); } if (tmp < avg) { pathList.add(file.getPath()); length = length + file.getLen(); FileSystem fs = file.getPath().getFileSystem(job.getConfiguration()); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, file.getLen()); for (BlockLocation blockLocation : blkLocations) { for (String host : blockLocation.getHosts()) { hostSet.add(host); } } tmp++; if (tmp == avg && rest == 0) { LOG.info("Split on host: " + getHostsString(hostSet)); splits.add(new MultiFileSplit(pathList, length, hostSet.toArray(new String[0]))); tmp = 0; length = 0; } } else if (tmp == avg && rest > 0) { pathList.add(file.getPath()); length = length + file.getLen(); FileSystem fs = file.getPath().getFileSystem(job.getConfiguration()); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, file.getLen()); for (BlockLocation blockLocation : blkLocations) { for (String host : blockLocation.getHosts()) { hostSet.add(host); } } rest--; LOG.info("Split on host: " + getHostsString(hostSet)); splits.add(new MultiFileSplit(pathList, length, hostSet.toArray(new String[0]))); tmp = 0; length = 0; } } // Save the number of input files in the job-conf job.getConfiguration().setLong(NUM_INPUT_FILES, numMaps); LOG.info("Total # of splits: " + splits.size()); return splits; }
From source file:edu.iu.fileformat.MultiFileInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { // Generate splits List<InputSplit> splits = new ArrayList<>(); List<FileStatus> files = listStatus(job); org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration(); int numMaps = jobConf.getNumMapTasks(); LOG.info("NUMBER OF FILES: " + files.size()); LOG.info("NUMBER OF MAPS: " + numMaps); // randomizeFileListOrder(files); int avg = files.size() / numMaps; int rest = files.size() % numMaps; int tmp = 0;/* w w w . j a v a 2 s .c om*/ long length = 0; List<Path> pathList = null; Set<String> hostSet = null; for (FileStatus file : files) { if (tmp == 0) { pathList = new ArrayList<>(); hostSet = new HashSet<>(); } if (tmp < avg) { pathList.add(file.getPath()); length = length + file.getLen(); FileSystem fs = file.getPath().getFileSystem(job.getConfiguration()); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, file.getLen()); for (BlockLocation blockLocation : blkLocations) { for (String host : blockLocation.getHosts()) { hostSet.add(host); } } tmp++; if (tmp == avg && rest == 0) { LOG.info("Split on host: " + getHostsString(hostSet)); splits.add(new MultiFileSplit(pathList, length, hostSet.toArray(new String[0]))); tmp = 0; length = 0; } } else if (tmp == avg && rest > 0) { pathList.add(file.getPath()); length = length + file.getLen(); FileSystem fs = file.getPath().getFileSystem(job.getConfiguration()); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, file.getLen()); for (BlockLocation blockLocation : blkLocations) { for (String host : blockLocation.getHosts()) { hostSet.add(host); } } rest--; LOG.info("Split on host: " + getHostsString(hostSet)); splits.add(new MultiFileSplit(pathList, length, hostSet.toArray(new String[0]))); tmp = 0; length = 0; } } // Save the number of input files in the // job-conf job.getConfiguration().setLong(NUM_INPUT_FILES, numMaps); LOG.info("Total # of splits: " + splits.size()); return splits; }