Example usage for org.apache.hadoop.mapreduce TaskType MAP

List of usage examples for org.apache.hadoop.mapreduce TaskType MAP

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskType MAP.

Prototype

TaskType MAP

To view the source code for org.apache.hadoop.mapreduce TaskType MAP.

Click Source Link

Usage

From source file:sg.edu.astar.dsi.mergespill.App.java

public synchronized static void doProcess(String directory, int spillNumber)
        throws IOException, InterruptedException {
    // TODO code application logic here
    System.out.println("directory: " + directory);
    System.out.println("numberOfSpill: " + spillNumber);
    //SETUP//from   w  w w. j  av  a  2s .c  om
    JobConf job = new JobConf();
    //job.setMapOutputKeyClass(Text.class);
    job.setMapOutputKeyClass(TextDsi.class);
    job.setMapOutputValueClass(IntWritable.class);
    //Class<Text> keyClass = (Class<Text>)job.getMapOutputKeyClass();
    Class<TextDsi> keyClass = (Class<TextDsi>) job.getMapOutputKeyClass();
    Class<IntWritable> valClass = (Class<IntWritable>) job.getMapOutputValueClass();
    FileSystem rfs;
    CompressionCodec codec = null;
    Counters.Counter spilledRecordsCounter = null;
    rfs = ((LocalFileSystem) FileSystem.getLocal(job)).getRaw();

    while (!new File(directory).isDirectory()) {
        sleep(5000);
    }

    if (new File(directory).isDirectory()) {
        ArrayList<Path> spillFile = new ArrayList();
        ArrayList<Path> spillFileIndex = new ArrayList();

        App myApp;
        myApp = new App();

        myApp.getSpillFilesAndIndices(new File(directory), spillFile, spillFileIndex, spillNumber);

        ArrayList<SpillRecord> indexCacheList = new ArrayList<>();
        int numSpills = 0;

        Iterator itrSpillFileIndex = spillFileIndex.iterator();
        while (itrSpillFileIndex.hasNext()) {
            numSpills++;
            Path temp = (Path) itrSpillFileIndex.next();
            System.out.println(temp);
            SpillRecord sr = new SpillRecord(temp, job);
            indexCacheList.add(sr);

            System.out.println("indexFile partition size: " + sr.size());
            long startOffset = 0;
            for (int i = 0; i < sr.size(); i++) { //sr.size is the number of partitions
                IndexRecord ir = sr.getIndex(i);
                System.out.println("index[" + i + "] rawLength = " + ir.rawLength);
                System.out.println("index[" + i + "] partLength = " + ir.partLength);
                System.out.println("index[" + i + "] startOffset= " + ir.startOffset);
                startOffset = ir.startOffset;
            }
            System.out.println("========================================");
        }
        System.out.println("Number of spills: " + numSpills);
        //FinalOutputFile
        Path finalOutputFile = new Path(directory + File.separator + "FINALOUTPUTFILE");
        FSDataOutputStream finalOut = rfs.create(finalOutputFile, true, 4096);
        System.out.println("GOT HERE 1");
        Path finalIndexFile = new Path(directory + File.separator + "FINALOUTPUTFILE.index");

        //ONE PARTITION ONLY
        List<Segment<TextDsi, IntWritable>> segmentList = new ArrayList<>(numSpills);
        for (int i = 0; i < numSpills; i++) {
            IndexRecord theIndexRecord = indexCacheList.get(i).getIndex(0);
            Path temp = spillFileIndex.get(i);
            String temp1 = temp.toString();
            String temp2 = temp1.substring(0, temp1.length() - 6);
            //System.out.println(temp2);
            //System.out.println(new Path(temp2).getParent());
            //File myFile = new File(temp2);
            //System.out.println(myFile.getPath());
            Segment<TextDsi, IntWritable> s = new Segment<>(job, rfs, new Path(temp2),
                    theIndexRecord.startOffset, theIndexRecord.partLength, codec, true);
            segmentList.add(i, s);
        }
        System.out.println("GOT HERE 2");
        RawKeyValueIterator kvIter = Merger.merge(job, rfs, keyClass, valClass, null, segmentList, 4,
                new Path("/home/hduser/spillSample2/My"), job.getOutputKeyComparator(), null, false, null,
                spilledRecordsCounter, null, TaskType.MAP);
        System.out.println("GOT HERE 3");
        //write merged output to disk
        long segmentStart = finalOut.getPos();
        FSDataOutputStream finalPartitionOut = CryptoUtils.wrapIfNecessary(job, finalOut);
        Writer<TextDsi, IntWritable> writer = new Writer<TextDsi, IntWritable>(job, finalPartitionOut,
                TextDsi.class, IntWritable.class, codec, spilledRecordsCounter);
        System.out.println("GOT HERE 4");
        Merger.writeFile(kvIter, writer, null, job);
        writer.close();
        finalOut.close();
        System.out.println("GOT HERE 5");

        IndexRecord rec = new IndexRecord();
        final SpillRecord spillRec = new SpillRecord(1);
        rec.startOffset = segmentStart;
        rec.rawLength = writer.getRawLength() + CryptoUtils.cryptoPadding(job);
        rec.partLength = writer.getCompressedLength() + CryptoUtils.cryptoPadding(job);
        System.out.println("rec.startOffset: " + rec.startOffset);
        System.out.println("rec.rawLength  : " + rec.rawLength);
        System.out.println("rec.partLength : " + rec.partLength);
        spillRec.putIndex(rec, 0);
        spillRec.writeToFile(finalIndexFile, job);
        System.out.println("GOT HERE 6");

    } else {
        System.out.println("argument is not a directory! : " + directory);
    }

}

From source file:simsql.runtime.RecordOutputFormat.java

License:Apache License

public RecordWriter<WritableKey, WritableValue> getRecordWriter(TaskAttemptContext job)
        throws IOException, InterruptedException {

    Configuration conf = job.getConfiguration();

    // here's what we do -- if we have a map-only job and a value for
    // lastInputSplit as given to us by RecordInputFormat, then we
    // will get our part number from that file. otherwise, we'll use
    // the one we get from the job.

    // get the part from the job.
    TaskID taskId = job.getTaskAttemptID().getTaskID();
    int part = taskId.getId();
    if (RecordOutputFormat.lastInputSplit != null && taskId.getTaskType() == TaskType.MAP) {

        part = RecordOutputFormat.getPartNumber(RecordOutputFormat.lastInputSplit);
        System.out.println("MAP-ONLY JOB: USING PART NUMBER " + part + " FROM INPUT SPLIT");

        // set it back to null
        RecordOutputFormat.lastInputSplit = null;
    }/*from   ww w .  j a va 2s.  c  o m*/

    FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(job);
    Path file = new Path(committer.getWorkPath(), RecordOutputFormat.getFileNumber(part));

    /* Path file = getDefaultWorkFile (job, ".tbl"); */
    FileSystem fs = file.getFileSystem(conf);
    FSDataOutputStream fileOut = fs.create(file, false);
    return new OutputFileSerializer(fileOut);
}

From source file:skewtune.mapreduce.STJobTracker.java

License:Apache License

PartitionPlanner.ClusterInfo getClusterAvailability(ReactionContext context, long now)
        throws IOException, InterruptedException {
    ClusterMetrics metrics = cluster.getClusterStatus();
    TaskAttemptID attemptId = context.getTargetAttemptID();
    TaskType type = attemptId == null ? context.getTaskID().getTaskType() : attemptId.getTaskType();

    int maxSlots = type == TaskType.MAP ? metrics.getMapSlotCapacity() : metrics.getReduceSlotCapacity();
    int runningSlots = type == TaskType.MAP ? metrics.getRunningMaps() : metrics.getRunningReduces();
    int runningSkewTune = 0;
    double[] remainingTimes = new double[maxSlots];
    int from = maxSlots;

    // if this is a speculative REDUCE, the original slot becomes available. We should make it available.
    boolean availRightNow = attemptId != null && type == TaskType.REDUCE && context.getTimePerByte() == 0.f;

    synchronized (this) {
        // FIXME this only involves tasks that are scheduled and running
        // we should keep an expected information as well.

        // on planning, we should add the planned tasks and getClusterAvailability should
        // incorporate any planned stuffs in it.

        // the information required:
        // Map<JobID, [long planned at, for tasks -- estimated runtime]>
        // on first heartbeat from each task, we remove each information.

        for (Map.Entry<TaskAttemptID, TaskInProgress> e : taskidToTIP.entrySet()) {
            TaskAttemptID taskid = e.getKey();
            if (taskid.getTaskType() == type) {
                // extra check
                if (availRightNow && taskid.equals(attemptId))
                    continue; // this will become available immediately

                TaskInProgress tip = e.getValue();
                double t = tip.getRemainingTime(taskid, now);
                if (t > 0.) {
                    remainingTimes[--from] = tip.getRemainingTime(taskid, now);
                    ++runningSkewTune;/* w ww  .j a  v  a2s  . c o  m*/
                    if (from == 0)
                        break;
                }
            }
        }
        if (from > 0) {
            synchronized (plannedJobs) {
                for (Map.Entry<JobID, PlannedJob> e : this.plannedJobs.entrySet()) {
                    PlannedJob plan = e.getValue();
                    from = plan.fillCompletionTime(type, now, remainingTimes, from);
                    if (from == 0)
                        break;
                }
            }
        }
    }
    Arrays.sort(remainingTimes, from, maxSlots);

    if (LOG.isDebugEnabled()) {
        LOG.debug("cluster availability = " + Arrays.toString(remainingTimes));
    }

    // FIXME incorporate other tasks that are not SkewTune

    return new PartitionPlanner.ClusterInfo(type, maxSlots, runningSlots, runningSkewTune, remainingTimes,
            maxSlots);
}

From source file:skewtune.mapreduce.STJobTracker.java

License:Apache License

private Future<JobID> fastSplitTask(TaskID taskid, int n) throws IOException, InterruptedException {
    JobInProgress jip = null;/*  w  w w .  ja  va  2 s.c o  m*/
    synchronized (jobs) {
        jip = jobs.get(taskid.getJobID());
    }

    if (jip == null) {
        String msg = "unknown task " + taskid;
        LOG.error(msg);
        throw new IOException(msg);
    }

    TaskInProgress tip = jip.getTaskInProgress(taskid);
    ReactionContext context = taskid.getTaskType() == TaskType.MAP ? new ReexecMap(tip, n)
            : new ReexecReduce(tip);
    return fastSplitTask(context, true);

    //        return fastSplitTask(taskid,n,true);
}