Example usage for org.apache.hadoop.mapreduce.lib.output FileOutputCommitter needsTaskCommit

List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputCommitter needsTaskCommit

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output FileOutputCommitter needsTaskCommit.

Prototype

@Override
public boolean needsTaskCommit(TaskAttemptContext context) throws IOException 

Source Link

Document

Did this task write any files in the work directory?

Usage

From source file:mlbench.bayes.test.BayesTest.java

License:Apache License

@SuppressWarnings("deprecation")
public static void main(String[] args) throws MPI_D_Exception, IOException, MPIException {
    parseArgs(args);/*  w w  w  .java  2s . com*/
    HashMap<String, String> conf = new HashMap<String, String>();
    initConf(conf);
    MPI_D.Init(args, MPI_D.Mode.Common, conf);

    if (MPI_D.COMM_BIPARTITE_O != null) {
        rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
        size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);
        NaiveBayesModel model = NaiveBayesModel.materialize(modelPath, config);
        classifier = new StandardNaiveBayesClassifier(model);

        MPI_D.COMM_BIPARTITE_O.Barrier();
        FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O,
                (JobConf) config, inDir, rank);

        for (int i = 0; i < inputs.length; i++) {
            FileSplit fsplit = inputs[i];
            SequenceFileRecordReader<Text, VectorWritable> kvrr = new SequenceFileRecordReader<>(config,
                    fsplit);
            Text key = kvrr.createKey();
            VectorWritable value = kvrr.createValue();
            while (kvrr.next(key, value)) {
                Vector result = classifier.classifyFull(value.get());
                MPI_D.Send(new Text(SLASH.split(key.toString())[1]), new VectorWritable(result));
            }
        }
    } else if (MPI_D.COMM_BIPARTITE_A != null) {
        int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A);
        config.set(MAPRED_OUTPUT_DIR, outDir);
        config.set("mapred.task.id", DataMPIUtil.getHadoopTaskAttemptID().toString().toString());
        ((JobConf) config).setOutputKeyClass(Text.class);
        ((JobConf) config).setOutputValueClass(VectorWritable.class);
        TaskAttemptContext taskContext = new TaskAttemptContextImpl(config,
                DataMPIUtil.getHadoopTaskAttemptID());
        SequenceFileOutputFormat<Text, VectorWritable> outfile = new SequenceFileOutputFormat<>();
        FileSystem fs = FileSystem.get(config);

        Path output = new Path(config.get(MAPRED_OUTPUT_DIR));
        FileOutputCommitter fcommitter = new FileOutputCommitter(output, taskContext);
        RecordWriter<Text, VectorWritable> outrw = null;
        try {
            fcommitter.setupJob(taskContext);
            outrw = outfile.getRecordWriter(fs, (JobConf) config, getOutputName(rank), null);
        } catch (IOException e) {
            e.printStackTrace();
            System.err.println("ERROR: Please set the HDFS configuration properly\n");
            System.exit(-1);
        }
        Text key = null;
        VectorWritable point = null;
        Vector vector = null;
        Object[] vals = MPI_D.Recv();
        while (vals != null) {
            key = (Text) vals[0];
            point = (VectorWritable) vals[1];
            if (key != null && point != null) {
                vector = point.get();
                outrw.write(key, new VectorWritable(vector));
            }
            vals = MPI_D.Recv();
        }
        outrw.close(null);
        if (fcommitter.needsTaskCommit(taskContext)) {
            fcommitter.commitTask(taskContext);
        }

        MPI_D.COMM_BIPARTITE_A.Barrier();
        if (rank == 0) {
            // load the labels
            Map<Integer, String> labelMap = BayesUtils.readLabelIndex(config, labPath);
            // loop over the results and create the confusion matrix
            SequenceFileDirIterable<Text, VectorWritable> dirIterable = new SequenceFileDirIterable<Text, VectorWritable>(
                    output, PathType.LIST, PathFilters.partFilter(), config);
            ResultAnalyzer analyzer = new ResultAnalyzer(labelMap.values(), "DEFAULT");
            analyzeResults(labelMap, dirIterable, analyzer);
        }
    }
    MPI_D.Finalize();
}

From source file:mlbench.bayes.train.IndexInstances.java

License:Apache License

@SuppressWarnings({ "deprecation" })
public static void main(String[] args) throws MPI_D_Exception, IOException, MPIException {
    parseArgs(args);/*  w  w w .  ja  v a  2  s.com*/
    HashMap<String, String> conf = new HashMap<String, String>();
    initConf(conf);
    MPI_D.Init(args, MPI_D.Mode.Common, conf);
    if (MPI_D.COMM_BIPARTITE_O != null) {
        rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);

        if (rank == 0) {
            System.out.println(IndexInstances.class.getSimpleName() + " O start.");
            createLabelIndex(labPath);
        }

        HadoopUtil.cacheFiles(labPath, config);

        MPI_D.COMM_BIPARTITE_O.Barrier();

        OpenObjectIntHashMap<String> labelIndex = BayesUtils.readIndexFromCache(config);

        if (MPI_D.COMM_BIPARTITE_O != null) {
            // O communicator
            int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
            int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);
            FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O,
                    (JobConf) config, inDir, rank);
            for (int i = 0; i < inputs.length; i++) {
                FileSplit fsplit = inputs[i];
                SequenceFileRecordReader<Text, VectorWritable> kvrr = new SequenceFileRecordReader<>(config,
                        fsplit);
                Text labelText = kvrr.createKey();
                VectorWritable instance = kvrr.createValue();
                while (kvrr.next(labelText, instance)) {
                    String label = SLASH.split(labelText.toString())[1];
                    if (labelIndex.containsKey(label)) {
                        MPI_D.Send(new IntWritable(labelIndex.get(label)), instance);
                    }
                }
            }
        }
    } else if (MPI_D.COMM_BIPARTITE_A != null) {
        int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A);
        config.set(MAPRED_OUTPUT_DIR, outDir);
        config.set("mapred.task.id", DataMPIUtil.getHadoopTaskAttemptID().toString().toString());
        ((JobConf) config).setOutputKeyClass(IntWritable.class);
        ((JobConf) config).setOutputValueClass(VectorWritable.class);
        TaskAttemptContext taskContext = new TaskAttemptContextImpl(config,
                DataMPIUtil.getHadoopTaskAttemptID());
        SequenceFileOutputFormat<IntWritable, VectorWritable> outfile = new SequenceFileOutputFormat<>();
        FileSystem fs = FileSystem.get(config);

        Path output = new Path(config.get(MAPRED_OUTPUT_DIR));
        FileOutputCommitter fcommitter = new FileOutputCommitter(output, taskContext);
        RecordWriter<IntWritable, VectorWritable> outrw = null;
        try {
            fcommitter.setupJob(taskContext);
            outrw = outfile.getRecordWriter(fs, (JobConf) config, getOutputName(rank), null);
        } catch (IOException e) {
            e.printStackTrace();
            System.err.println("ERROR: Please set the HDFS configuration properly\n");
            System.exit(-1);
        }

        IntWritable key = null, newKey = null;
        VectorWritable point = null, newPoint = null;
        Vector vector = null;
        Object[] vals = MPI_D.Recv();
        while (vals != null) {
            newKey = (IntWritable) vals[0];
            newPoint = (VectorWritable) vals[1];
            if (key == null && point == null) {
            } else if (!key.equals(newKey)) {
                outrw.write(key, new VectorWritable(vector));
                vector = null;
            }
            if (vector == null) {
                vector = newPoint.get();
            } else {
                vector.assign(newPoint.get(), Functions.PLUS);
            }

            key = newKey;
            point = newPoint;
            vals = MPI_D.Recv();
        }
        if (newKey != null && newPoint != null) {
            outrw.write(key, new VectorWritable(vector));
        }

        outrw.close(null);
        if (fcommitter.needsTaskCommit(taskContext)) {
            fcommitter.commitTask(taskContext);
        }
    }

    MPI_D.Finalize();
}

From source file:mlbench.bayes.train.WeightSummer.java

License:Apache License

@SuppressWarnings("deprecation")
public static void main(String[] args) throws MPI_D_Exception, IOException, MPIException {
    parseArgs(args);//from  w  w  w  .j a  v  a  2s  .com
    HashMap<String, String> conf = new HashMap<String, String>();
    initConf(conf);
    MPI_D.Init(args, MPI_D.Mode.Common, conf);
    if (MPI_D.COMM_BIPARTITE_O != null) {

        int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
        int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);
        FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O,
                (JobConf) config, inDir, rank);
        Vector weightsPerFeature = null;
        Vector weightsPerLabel = new DenseVector(labNum);

        for (int i = 0; i < inputs.length; i++) {
            FileSplit fsplit = inputs[i];
            SequenceFileRecordReader<IntWritable, VectorWritable> kvrr = new SequenceFileRecordReader<>(config,
                    fsplit);
            IntWritable index = kvrr.createKey();
            VectorWritable value = kvrr.createValue();
            while (kvrr.next(index, value)) {
                Vector instance = value.get();
                if (weightsPerFeature == null) {
                    weightsPerFeature = new RandomAccessSparseVector(instance.size(),
                            instance.getNumNondefaultElements());
                }

                int label = index.get();
                weightsPerFeature.assign(instance, Functions.PLUS);
                weightsPerLabel.set(label, weightsPerLabel.get(label) + instance.zSum());
            }
        }
        if (weightsPerFeature != null) {
            MPI_D.Send(new Text(WEIGHTS_PER_FEATURE), new VectorWritable(weightsPerFeature));
            MPI_D.Send(new Text(WEIGHTS_PER_LABEL), new VectorWritable(weightsPerLabel));
        }
    } else if (MPI_D.COMM_BIPARTITE_A != null) {
        int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A);
        config.set(MAPRED_OUTPUT_DIR, outDirW);
        config.set("mapred.task.id", DataMPIUtil.getHadoopTaskAttemptID().toString().toString());
        ((JobConf) config).setOutputKeyClass(Text.class);
        ((JobConf) config).setOutputValueClass(VectorWritable.class);
        TaskAttemptContext taskContext = new TaskAttemptContextImpl(config,
                DataMPIUtil.getHadoopTaskAttemptID());
        SequenceFileOutputFormat<Text, VectorWritable> outfile = new SequenceFileOutputFormat<>();
        FileSystem fs = FileSystem.get(config);

        Path output = new Path(config.get(MAPRED_OUTPUT_DIR));
        FileOutputCommitter fcommitter = new FileOutputCommitter(output, taskContext);
        RecordWriter<Text, VectorWritable> outrw = null;
        try {
            fcommitter.setupJob(taskContext);
            outrw = outfile.getRecordWriter(fs, (JobConf) config, getOutputName(rank), null);
        } catch (IOException e) {
            e.printStackTrace();
            System.err.println("ERROR: Please set the HDFS configuration properly\n");
            System.exit(-1);
        }

        Text key = null, newKey = null;
        VectorWritable point = null, newPoint = null;
        Vector vector = null;
        Object[] vals = MPI_D.Recv();
        while (vals != null) {
            newKey = (Text) vals[0];
            newPoint = (VectorWritable) vals[1];
            if (key == null && point == null) {
            } else if (!key.equals(newKey)) {
                outrw.write(key, new VectorWritable(vector));
                vector = null;
            }
            if (vector == null) {
                vector = newPoint.get();
            } else {
                vector.assign(newPoint.get(), Functions.PLUS);
            }

            key = newKey;
            point = newPoint;
            vals = MPI_D.Recv();
        }
        if (newKey != null && newPoint != null) {
            outrw.write(key, new VectorWritable(vector));
        }

        outrw.close(null);
        if (fcommitter.needsTaskCommit(taskContext)) {
            fcommitter.commitTask(taskContext);
        }

        MPI_D.COMM_BIPARTITE_A.Barrier();
        if (rank == 0) {
            Path resOut = new Path(outDir);
            NaiveBayesModel naiveBayesModel = BayesUtils.readModelFromDir(new Path(outDir), config);
            naiveBayesModel.serialize(resOut, config);
        }
    }

    MPI_D.Finalize();
}

From source file:org.goldenorb.OrbPartition.java

License:Apache License

private void dumpData() {
    Configuration conf = new Configuration();
    Job job = null;//from  w  w  w  .  j a  v  a 2  s.c o m
    JobContext jobContext = null;
    TaskAttemptContext tao = null;
    RecordWriter rw;
    VertexWriter vw;
    FileOutputFormat outputFormat;

    boolean tryAgain = true;
    int count = 0;
    while (tryAgain && count < 15)
        try {
            count++;
            tryAgain = false;
            if (job == null) {
                job = new Job(conf);
                job.setOutputFormatClass(TextOutputFormat.class);
                FileOutputFormat.setOutputPath(job,
                        new Path(new String(getOrbConf().getNameNode() + getOrbConf().getFileOutputPath())));
            }
            if (jobContext == null) {
                jobContext = new JobContext(job.getConfiguration(), new JobID());
            }

            System.out.println(jobContext.getConfiguration().get("mapred.output.dir"));

            tao = new TaskAttemptContext(jobContext.getConfiguration(),
                    new TaskAttemptID(new TaskID(jobContext.getJobID(), true, getPartitionID()), 0));
            outputFormat = (FileOutputFormat) tao.getOutputFormatClass().newInstance();
            rw = outputFormat.getRecordWriter(tao);
            vw = (VertexWriter) getOrbConf().getVertexOutputFormatClass().newInstance();
            for (Vertex v : vertices.values()) {
                OrbContext oc = vw.vertexWrite(v);
                rw.write(oc.getKey(), oc.getValue());
                // orbLogger.info("Partition: " + Integer.toString(partitionId) + "writing: " +
                // oc.getKey().toString() + ", " + oc.getValue().toString());
            }
            rw.close(tao);

            FileOutputCommitter cm = (FileOutputCommitter) outputFormat.getOutputCommitter(tao);
            if (cm.needsTaskCommit(tao)) {
                cm.commitTask(tao);
                cm.cleanupJob(jobContext);
            } else {
                cm.cleanupJob(jobContext);
                tryAgain = true;
            }

        } catch (IOException e) {
            tryAgain = true;
            e.printStackTrace();
        } catch (InstantiationException e) {
            tryAgain = true;
            e.printStackTrace();
        } catch (IllegalAccessException e) {
            tryAgain = true;
            e.printStackTrace();
        } catch (ClassNotFoundException e) {
            tryAgain = true;
            e.printStackTrace();
        } catch (InterruptedException e) {
            tryAgain = true;
            e.printStackTrace();
        }
    if (tryAgain) {
        synchronized (this) {
            try {
                wait(1000);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    }
}