List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration
public Configuration getConfiguration();
From source file:DupleInputFormat.java
License:Apache License
/** * Generate the list of files and make them into FileSplits. * @param job the job context//from w ww .java2s.c om * @throws IOException */ public List<InputSplit> getSplits(JobContext job) throws IOException { long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job)); long maxSize = getMaxSplitSize(job); // generate splits List<InputSplit> splits = new ArrayList<InputSplit>(); List<FileStatus> files = listStatus(job); // times that each file exists in the files List ArrayList<Integer> times = new ArrayList<Integer>(); ArrayList<Path> paths = new ArrayList<Path>(); for (FileStatus file : files) { Path path = file.getPath(); long length = file.getLen(); if (length != 0) { FileSystem fs = path.getFileSystem(job.getConfiguration()); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); int index; if ((index = paths.indexOf(path)) != -1) times.set(index, times.get(index) + 1); else { times.add(0); paths.add(path); index = times.size() - 1; } // not splitable splits.add(makeSplit(path, 0, length, blkLocations[0].getHosts(), times.get(index))); } else { //Create empty hosts array for zero length files splits.add(makeSplit(path, 0, length, new String[0])); } } // Save the number of input files for metrics/loadgen job.getConfiguration().setLong(NUM_INPUT_FILES, files.size()); //LOG.debug("Total # of splits: " + splits.size()); return splits; }
From source file:andromache.hadoop.CassandraInputFormat.java
License:Apache License
public List<InputSplit> getSplits(JobContext context) throws IOException { Configuration conf = context.getConfiguration(); validateConfiguration(conf);//ww w.j av a 2 s . co m // cannonical ranges and nodes holding replicas List<TokenRange> masterRangeNodes = getRangeMap(conf); keyspace = CassandraConfigHelper.getInputKeyspace(context.getConfiguration()); cfNames = CassandraConfigHelper.getInputColumnFamilies(context.getConfiguration()); // TODO: [IS] make sure this partitioner matches to what is set on each keyspace participating partitioner = CassandraConfigHelper.getInputPartitioner(context.getConfiguration()); logger.debug("partitioner is " + partitioner); // cannonical ranges, split into pieces, fetching the splits in parallel ExecutorService executor = Executors.newCachedThreadPool(); List<InputSplit> splits = new ArrayList<InputSplit>(); try { List<Future<List<CassandraSplit>>> splitfutures = new ArrayList<Future<List<CassandraSplit>>>(); KeyRange jobKeyRange = CassandraConfigHelper.getInputKeyRange(conf); Range<Token> jobRange = null; if (jobKeyRange != null && jobKeyRange.start_token != null) { assert partitioner .preservesOrder() : "ConfigHelper.setInputKeyRange(..) can only be used with a order preserving paritioner"; assert jobKeyRange.start_key == null : "only start_token supported"; assert jobKeyRange.end_key == null : "only end_token supported"; jobRange = new Range<Token>(partitioner.getTokenFactory().fromString(jobKeyRange.start_token), partitioner.getTokenFactory().fromString(jobKeyRange.end_token), partitioner); } for (TokenRange range : masterRangeNodes) { if (jobRange == null) { // for each range, pick a live owner and ask it to compute bite-sized splits splitfutures.add(executor.submit(new SplitCallable(range, conf))); } else { Range<Token> dhtRange = new Range<Token>( partitioner.getTokenFactory().fromString(range.start_token), partitioner.getTokenFactory().fromString(range.end_token), partitioner); if (dhtRange.intersects(jobRange)) { for (Range<Token> intersection : dhtRange.intersectionWith(jobRange)) { range.start_token = partitioner.getTokenFactory().toString(intersection.left); range.end_token = partitioner.getTokenFactory().toString(intersection.right); // for each range, pick a live owner and ask it to compute bite-sized splits splitfutures.add(executor.submit(new SplitCallable(range, conf))); } } } } // wait until we have all the results back for (Future<List<CassandraSplit>> futureInputSplits : splitfutures) { try { splits.addAll(futureInputSplits.get()); } catch (Exception e) { throw new IOException("Could not get input splits", e); } } } finally { executor.shutdownNow(); } assert splits.size() > 0; Collections.shuffle(splits, new Random(System.nanoTime())); return splits; }
From source file:andromache.hadoop.CassandraOutputFormat.java
License:Apache License
/** * Check for validity of the output-specification for the job. * * @param context information about the job * @throws java.io.IOException when output should not be attempted *///from w w w .ja va 2 s .com @Override public void checkOutputSpecs(JobContext context) { Configuration conf = context.getConfiguration(); if (CassandraConfigHelper.getOutputPartitioner(conf) == null) { throw new UnsupportedOperationException( "You must set the output partitioner to the one used by your Cassandra cluster"); } if (CassandraConfigHelper.getOutputInitialAddress(conf) == null) { throw new UnsupportedOperationException("You must set the initial output address to a Cassandra node"); } }
From source file:at.illecker.hama.rootbeer.examples.util.RandomGraphGenerator.java
License:Apache License
static long getNumberOfRows(JobContext job) { return job.getConfiguration().getLong("hama.num.vertices", 0); }
From source file:be.uantwerpen.adrem.hadoop.util.SplitByKTextInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { List<InputSplit> splits = newArrayList(); int numberOfSplits = getNumberOfSplits(job); for (FileStatus status : listStatus(job)) { splits.addAll(getSplitsForFile(status, job.getConfiguration(), numberOfSplits)); }/* ww w . j av a 2 s .c o m*/ return splits; }
From source file:be.uantwerpen.adrem.hadoop.util.SplitByKTextInputFormat.java
License:Apache License
/** * Get the number of splits//from w ww . ja va 2s . c o m * * @param job * the job * @return the number of splits to be created on this file */ public static int getNumberOfSplits(JobContext job) { return job.getConfiguration().getInt(NUMBER_OF_CHUNKS, 1); }
From source file:boa.aggregators.MLAggregator.java
License:Apache License
public void saveModel(Object model) { FSDataOutputStream out = null;// ww w.j a va 2 s . c om FileSystem fileSystem = null; Path filePath = null; try { JobContext context = (JobContext) getContext(); Configuration configuration = context.getConfiguration(); int boaJobId = configuration.getInt("boa.hadoop.jobid", 0); JobConf job = new JobConf(configuration); Path outputPath = FileOutputFormat.getOutputPath(job); fileSystem = outputPath.getFileSystem(context.getConfiguration()); fileSystem.mkdirs(new Path("/boa", new Path("" + boaJobId))); filePath = new Path("/boa", new Path("" + boaJobId, new Path(("" + getKey()).split("\\[")[0] + "ML.model"))); if (fileSystem.exists(filePath)) return; out = fileSystem.create(filePath); ByteArrayOutputStream byteOutStream = new ByteArrayOutputStream(); ObjectOutputStream objectOut = new ObjectOutputStream(byteOutStream); objectOut.writeObject(model); objectOut.close(); byte[] serializedObject = byteOutStream.toByteArray(); out.write(serializedObject, 0, serializedObject.length); this.collect(filePath.toString()); } catch (Exception e) { e.printStackTrace(); } finally { try { if (out != null) out.close(); } catch (final Exception e) { e.printStackTrace(); } } }
From source file:boa.io.BoaOutputCommitter.java
License:Apache License
@Override public void commitJob(JobContext context) throws java.io.IOException { super.commitJob(context); int boaJobId = context.getConfiguration().getInt("boa.hadoop.jobid", 0); storeOutput(context, boaJobId);// w w w . j a va 2 s. c o m updateStatus(null, boaJobId); }
From source file:boa.io.BoaOutputCommitter.java
License:Apache License
@Override public void abortJob(JobContext context, JobStatus.State runState) throws java.io.IOException { super.abortJob(context, runState); final JobClient jobClient = new JobClient(new JobConf(context.getConfiguration())); final RunningJob job = jobClient.getJob( (org.apache.hadoop.mapred.JobID) JobID.forName(context.getConfiguration().get("mapred.job.id"))); String diag = ""; for (final TaskCompletionEvent event : job.getTaskCompletionEvents(0)) switch (event.getTaskStatus()) { case SUCCEEDED: break; case FAILED: case KILLED: case OBSOLETE: case TIPFAILED: diag += "Diagnostics for: " + event.getTaskTrackerHttp() + "\n"; for (final String s : job.getTaskDiagnostics(event.getTaskAttemptId())) diag += s + "\n"; diag += "\n"; break; }/*from w w w .jav a 2s .c o m*/ updateStatus(diag, context.getConfiguration().getInt("boa.hadoop.jobid", 0)); }
From source file:boa.io.BoaOutputCommitter.java
License:Apache License
private void storeOutput(final JobContext context, final int jobId) { if (jobId == 0) return;//from w w w . j a v a 2s .c o m Connection con = null; FileSystem fileSystem = null; FSDataInputStream in = null; FSDataOutputStream out = null; try { fileSystem = outputPath.getFileSystem(context.getConfiguration()); con = DriverManager.getConnection(url, user, password); PreparedStatement ps = null; try { ps = con.prepareStatement("INSERT INTO boa_output (id, length) VALUES (" + jobId + ", 0)"); ps.executeUpdate(); } catch (final Exception e) { } finally { try { if (ps != null) ps.close(); } catch (final Exception e) { e.printStackTrace(); } } fileSystem.mkdirs(new Path("/boa", new Path("" + jobId))); out = fileSystem.create(new Path("/boa", new Path("" + jobId, new Path("output.txt")))); int partNum = 0; final byte[] b = new byte[64 * 1024 * 1024]; long length = 0; boolean hasWebResult = false; while (true) { final Path path = new Path(outputPath, "part-r-" + String.format("%05d", partNum++)); if (!fileSystem.exists(path)) break; if (in != null) try { in.close(); } catch (final Exception e) { e.printStackTrace(); } in = fileSystem.open(path); int numBytes = 0; while ((numBytes = in.read(b)) > 0) { if (!hasWebResult) { hasWebResult = true; try { ps = con.prepareStatement("UPDATE boa_output SET web_result=? WHERE id=" + jobId); int webSize = 64 * 1024 - 1; ps.setString(1, new String(b, 0, numBytes < webSize ? numBytes : webSize)); ps.executeUpdate(); } finally { try { if (ps != null) ps.close(); } catch (final Exception e) { e.printStackTrace(); } } } out.write(b, 0, numBytes); length += numBytes; this.context.progress(); } } try { ps = con.prepareStatement("UPDATE boa_output SET length=? WHERE id=" + jobId); ps.setLong(1, length); ps.executeUpdate(); } finally { try { if (ps != null) ps.close(); } catch (final Exception e) { e.printStackTrace(); } } } catch (final Exception e) { e.printStackTrace(); } finally { try { if (con != null) con.close(); } catch (final Exception e) { e.printStackTrace(); } try { if (in != null) in.close(); } catch (final Exception e) { e.printStackTrace(); } try { if (out != null) out.close(); } catch (final Exception e) { e.printStackTrace(); } try { if (fileSystem != null) fileSystem.close(); } catch (final Exception e) { e.printStackTrace(); } } }