List of usage examples for org.apache.hadoop.mapreduce TaskType MAP
TaskType MAP
To view the source code for org.apache.hadoop.mapreduce TaskType MAP.
Click Source Link
From source file:sg.edu.astar.dsi.mergespill.App.java
public synchronized static void doProcess(String directory, int spillNumber) throws IOException, InterruptedException { // TODO code application logic here System.out.println("directory: " + directory); System.out.println("numberOfSpill: " + spillNumber); //SETUP//from w w w. j av a 2s .c om JobConf job = new JobConf(); //job.setMapOutputKeyClass(Text.class); job.setMapOutputKeyClass(TextDsi.class); job.setMapOutputValueClass(IntWritable.class); //Class<Text> keyClass = (Class<Text>)job.getMapOutputKeyClass(); Class<TextDsi> keyClass = (Class<TextDsi>) job.getMapOutputKeyClass(); Class<IntWritable> valClass = (Class<IntWritable>) job.getMapOutputValueClass(); FileSystem rfs; CompressionCodec codec = null; Counters.Counter spilledRecordsCounter = null; rfs = ((LocalFileSystem) FileSystem.getLocal(job)).getRaw(); while (!new File(directory).isDirectory()) { sleep(5000); } if (new File(directory).isDirectory()) { ArrayList<Path> spillFile = new ArrayList(); ArrayList<Path> spillFileIndex = new ArrayList(); App myApp; myApp = new App(); myApp.getSpillFilesAndIndices(new File(directory), spillFile, spillFileIndex, spillNumber); ArrayList<SpillRecord> indexCacheList = new ArrayList<>(); int numSpills = 0; Iterator itrSpillFileIndex = spillFileIndex.iterator(); while (itrSpillFileIndex.hasNext()) { numSpills++; Path temp = (Path) itrSpillFileIndex.next(); System.out.println(temp); SpillRecord sr = new SpillRecord(temp, job); indexCacheList.add(sr); System.out.println("indexFile partition size: " + sr.size()); long startOffset = 0; for (int i = 0; i < sr.size(); i++) { //sr.size is the number of partitions IndexRecord ir = sr.getIndex(i); System.out.println("index[" + i + "] rawLength = " + ir.rawLength); System.out.println("index[" + i + "] partLength = " + ir.partLength); System.out.println("index[" + i + "] startOffset= " + ir.startOffset); startOffset = ir.startOffset; } System.out.println("========================================"); } System.out.println("Number of spills: " + numSpills); //FinalOutputFile Path finalOutputFile = new Path(directory + File.separator + "FINALOUTPUTFILE"); FSDataOutputStream finalOut = rfs.create(finalOutputFile, true, 4096); System.out.println("GOT HERE 1"); Path finalIndexFile = new Path(directory + File.separator + "FINALOUTPUTFILE.index"); //ONE PARTITION ONLY List<Segment<TextDsi, IntWritable>> segmentList = new ArrayList<>(numSpills); for (int i = 0; i < numSpills; i++) { IndexRecord theIndexRecord = indexCacheList.get(i).getIndex(0); Path temp = spillFileIndex.get(i); String temp1 = temp.toString(); String temp2 = temp1.substring(0, temp1.length() - 6); //System.out.println(temp2); //System.out.println(new Path(temp2).getParent()); //File myFile = new File(temp2); //System.out.println(myFile.getPath()); Segment<TextDsi, IntWritable> s = new Segment<>(job, rfs, new Path(temp2), theIndexRecord.startOffset, theIndexRecord.partLength, codec, true); segmentList.add(i, s); } System.out.println("GOT HERE 2"); RawKeyValueIterator kvIter = Merger.merge(job, rfs, keyClass, valClass, null, segmentList, 4, new Path("/home/hduser/spillSample2/My"), job.getOutputKeyComparator(), null, false, null, spilledRecordsCounter, null, TaskType.MAP); System.out.println("GOT HERE 3"); //write merged output to disk long segmentStart = finalOut.getPos(); FSDataOutputStream finalPartitionOut = CryptoUtils.wrapIfNecessary(job, finalOut); Writer<TextDsi, IntWritable> writer = new Writer<TextDsi, IntWritable>(job, finalPartitionOut, TextDsi.class, IntWritable.class, codec, spilledRecordsCounter); System.out.println("GOT HERE 4"); Merger.writeFile(kvIter, writer, null, job); writer.close(); finalOut.close(); System.out.println("GOT HERE 5"); IndexRecord rec = new IndexRecord(); final SpillRecord spillRec = new SpillRecord(1); rec.startOffset = segmentStart; rec.rawLength = writer.getRawLength() + CryptoUtils.cryptoPadding(job); rec.partLength = writer.getCompressedLength() + CryptoUtils.cryptoPadding(job); System.out.println("rec.startOffset: " + rec.startOffset); System.out.println("rec.rawLength : " + rec.rawLength); System.out.println("rec.partLength : " + rec.partLength); spillRec.putIndex(rec, 0); spillRec.writeToFile(finalIndexFile, job); System.out.println("GOT HERE 6"); } else { System.out.println("argument is not a directory! : " + directory); } }
From source file:simsql.runtime.RecordOutputFormat.java
License:Apache License
public RecordWriter<WritableKey, WritableValue> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { Configuration conf = job.getConfiguration(); // here's what we do -- if we have a map-only job and a value for // lastInputSplit as given to us by RecordInputFormat, then we // will get our part number from that file. otherwise, we'll use // the one we get from the job. // get the part from the job. TaskID taskId = job.getTaskAttemptID().getTaskID(); int part = taskId.getId(); if (RecordOutputFormat.lastInputSplit != null && taskId.getTaskType() == TaskType.MAP) { part = RecordOutputFormat.getPartNumber(RecordOutputFormat.lastInputSplit); System.out.println("MAP-ONLY JOB: USING PART NUMBER " + part + " FROM INPUT SPLIT"); // set it back to null RecordOutputFormat.lastInputSplit = null; }/*from ww w . j a va 2s. c o m*/ FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(job); Path file = new Path(committer.getWorkPath(), RecordOutputFormat.getFileNumber(part)); /* Path file = getDefaultWorkFile (job, ".tbl"); */ FileSystem fs = file.getFileSystem(conf); FSDataOutputStream fileOut = fs.create(file, false); return new OutputFileSerializer(fileOut); }
From source file:skewtune.mapreduce.STJobTracker.java
License:Apache License
PartitionPlanner.ClusterInfo getClusterAvailability(ReactionContext context, long now) throws IOException, InterruptedException { ClusterMetrics metrics = cluster.getClusterStatus(); TaskAttemptID attemptId = context.getTargetAttemptID(); TaskType type = attemptId == null ? context.getTaskID().getTaskType() : attemptId.getTaskType(); int maxSlots = type == TaskType.MAP ? metrics.getMapSlotCapacity() : metrics.getReduceSlotCapacity(); int runningSlots = type == TaskType.MAP ? metrics.getRunningMaps() : metrics.getRunningReduces(); int runningSkewTune = 0; double[] remainingTimes = new double[maxSlots]; int from = maxSlots; // if this is a speculative REDUCE, the original slot becomes available. We should make it available. boolean availRightNow = attemptId != null && type == TaskType.REDUCE && context.getTimePerByte() == 0.f; synchronized (this) { // FIXME this only involves tasks that are scheduled and running // we should keep an expected information as well. // on planning, we should add the planned tasks and getClusterAvailability should // incorporate any planned stuffs in it. // the information required: // Map<JobID, [long planned at, for tasks -- estimated runtime]> // on first heartbeat from each task, we remove each information. for (Map.Entry<TaskAttemptID, TaskInProgress> e : taskidToTIP.entrySet()) { TaskAttemptID taskid = e.getKey(); if (taskid.getTaskType() == type) { // extra check if (availRightNow && taskid.equals(attemptId)) continue; // this will become available immediately TaskInProgress tip = e.getValue(); double t = tip.getRemainingTime(taskid, now); if (t > 0.) { remainingTimes[--from] = tip.getRemainingTime(taskid, now); ++runningSkewTune;/* w ww .j a v a2s . c o m*/ if (from == 0) break; } } } if (from > 0) { synchronized (plannedJobs) { for (Map.Entry<JobID, PlannedJob> e : this.plannedJobs.entrySet()) { PlannedJob plan = e.getValue(); from = plan.fillCompletionTime(type, now, remainingTimes, from); if (from == 0) break; } } } } Arrays.sort(remainingTimes, from, maxSlots); if (LOG.isDebugEnabled()) { LOG.debug("cluster availability = " + Arrays.toString(remainingTimes)); } // FIXME incorporate other tasks that are not SkewTune return new PartitionPlanner.ClusterInfo(type, maxSlots, runningSlots, runningSkewTune, remainingTimes, maxSlots); }
From source file:skewtune.mapreduce.STJobTracker.java
License:Apache License
private Future<JobID> fastSplitTask(TaskID taskid, int n) throws IOException, InterruptedException { JobInProgress jip = null;/* w w w . ja va 2 s.c o m*/ synchronized (jobs) { jip = jobs.get(taskid.getJobID()); } if (jip == null) { String msg = "unknown task " + taskid; LOG.error(msg); throw new IOException(msg); } TaskInProgress tip = jip.getTaskInProgress(taskid); ReactionContext context = taskid.getTaskType() == TaskType.MAP ? new ReexecMap(tip, n) : new ReexecReduce(tip); return fastSplitTask(context, true); // return fastSplitTask(taskid,n,true); }