Example usage for org.apache.hadoop.mapreduce.lib.output FileOutputCommitter commitTask

List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputCommitter commitTask

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output FileOutputCommitter commitTask.

Prototype

@Override
public void commitTask(TaskAttemptContext context) throws IOException 

Source Link

Document

Move the files from the work directory to the job output directory

Usage

From source file:mlbench.bayes.test.BayesTest.java

License:Apache License

@SuppressWarnings("deprecation")
public static void main(String[] args) throws MPI_D_Exception, IOException, MPIException {
    parseArgs(args);//from   ww w  . j av a  2 s .  c o m
    HashMap<String, String> conf = new HashMap<String, String>();
    initConf(conf);
    MPI_D.Init(args, MPI_D.Mode.Common, conf);

    if (MPI_D.COMM_BIPARTITE_O != null) {
        rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
        size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);
        NaiveBayesModel model = NaiveBayesModel.materialize(modelPath, config);
        classifier = new StandardNaiveBayesClassifier(model);

        MPI_D.COMM_BIPARTITE_O.Barrier();
        FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O,
                (JobConf) config, inDir, rank);

        for (int i = 0; i < inputs.length; i++) {
            FileSplit fsplit = inputs[i];
            SequenceFileRecordReader<Text, VectorWritable> kvrr = new SequenceFileRecordReader<>(config,
                    fsplit);
            Text key = kvrr.createKey();
            VectorWritable value = kvrr.createValue();
            while (kvrr.next(key, value)) {
                Vector result = classifier.classifyFull(value.get());
                MPI_D.Send(new Text(SLASH.split(key.toString())[1]), new VectorWritable(result));
            }
        }
    } else if (MPI_D.COMM_BIPARTITE_A != null) {
        int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A);
        config.set(MAPRED_OUTPUT_DIR, outDir);
        config.set("mapred.task.id", DataMPIUtil.getHadoopTaskAttemptID().toString().toString());
        ((JobConf) config).setOutputKeyClass(Text.class);
        ((JobConf) config).setOutputValueClass(VectorWritable.class);
        TaskAttemptContext taskContext = new TaskAttemptContextImpl(config,
                DataMPIUtil.getHadoopTaskAttemptID());
        SequenceFileOutputFormat<Text, VectorWritable> outfile = new SequenceFileOutputFormat<>();
        FileSystem fs = FileSystem.get(config);

        Path output = new Path(config.get(MAPRED_OUTPUT_DIR));
        FileOutputCommitter fcommitter = new FileOutputCommitter(output, taskContext);
        RecordWriter<Text, VectorWritable> outrw = null;
        try {
            fcommitter.setupJob(taskContext);
            outrw = outfile.getRecordWriter(fs, (JobConf) config, getOutputName(rank), null);
        } catch (IOException e) {
            e.printStackTrace();
            System.err.println("ERROR: Please set the HDFS configuration properly\n");
            System.exit(-1);
        }
        Text key = null;
        VectorWritable point = null;
        Vector vector = null;
        Object[] vals = MPI_D.Recv();
        while (vals != null) {
            key = (Text) vals[0];
            point = (VectorWritable) vals[1];
            if (key != null && point != null) {
                vector = point.get();
                outrw.write(key, new VectorWritable(vector));
            }
            vals = MPI_D.Recv();
        }
        outrw.close(null);
        if (fcommitter.needsTaskCommit(taskContext)) {
            fcommitter.commitTask(taskContext);
        }

        MPI_D.COMM_BIPARTITE_A.Barrier();
        if (rank == 0) {
            // load the labels
            Map<Integer, String> labelMap = BayesUtils.readLabelIndex(config, labPath);
            // loop over the results and create the confusion matrix
            SequenceFileDirIterable<Text, VectorWritable> dirIterable = new SequenceFileDirIterable<Text, VectorWritable>(
                    output, PathType.LIST, PathFilters.partFilter(), config);
            ResultAnalyzer analyzer = new ResultAnalyzer(labelMap.values(), "DEFAULT");
            analyzeResults(labelMap, dirIterable, analyzer);
        }
    }
    MPI_D.Finalize();
}

From source file:mlbench.bayes.train.IndexInstances.java

License:Apache License

@SuppressWarnings({ "deprecation" })
public static void main(String[] args) throws MPI_D_Exception, IOException, MPIException {
    parseArgs(args);//from w w  w .ja v a 2 s.c  o  m
    HashMap<String, String> conf = new HashMap<String, String>();
    initConf(conf);
    MPI_D.Init(args, MPI_D.Mode.Common, conf);
    if (MPI_D.COMM_BIPARTITE_O != null) {
        rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);

        if (rank == 0) {
            System.out.println(IndexInstances.class.getSimpleName() + " O start.");
            createLabelIndex(labPath);
        }

        HadoopUtil.cacheFiles(labPath, config);

        MPI_D.COMM_BIPARTITE_O.Barrier();

        OpenObjectIntHashMap<String> labelIndex = BayesUtils.readIndexFromCache(config);

        if (MPI_D.COMM_BIPARTITE_O != null) {
            // O communicator
            int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
            int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);
            FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O,
                    (JobConf) config, inDir, rank);
            for (int i = 0; i < inputs.length; i++) {
                FileSplit fsplit = inputs[i];
                SequenceFileRecordReader<Text, VectorWritable> kvrr = new SequenceFileRecordReader<>(config,
                        fsplit);
                Text labelText = kvrr.createKey();
                VectorWritable instance = kvrr.createValue();
                while (kvrr.next(labelText, instance)) {
                    String label = SLASH.split(labelText.toString())[1];
                    if (labelIndex.containsKey(label)) {
                        MPI_D.Send(new IntWritable(labelIndex.get(label)), instance);
                    }
                }
            }
        }
    } else if (MPI_D.COMM_BIPARTITE_A != null) {
        int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A);
        config.set(MAPRED_OUTPUT_DIR, outDir);
        config.set("mapred.task.id", DataMPIUtil.getHadoopTaskAttemptID().toString().toString());
        ((JobConf) config).setOutputKeyClass(IntWritable.class);
        ((JobConf) config).setOutputValueClass(VectorWritable.class);
        TaskAttemptContext taskContext = new TaskAttemptContextImpl(config,
                DataMPIUtil.getHadoopTaskAttemptID());
        SequenceFileOutputFormat<IntWritable, VectorWritable> outfile = new SequenceFileOutputFormat<>();
        FileSystem fs = FileSystem.get(config);

        Path output = new Path(config.get(MAPRED_OUTPUT_DIR));
        FileOutputCommitter fcommitter = new FileOutputCommitter(output, taskContext);
        RecordWriter<IntWritable, VectorWritable> outrw = null;
        try {
            fcommitter.setupJob(taskContext);
            outrw = outfile.getRecordWriter(fs, (JobConf) config, getOutputName(rank), null);
        } catch (IOException e) {
            e.printStackTrace();
            System.err.println("ERROR: Please set the HDFS configuration properly\n");
            System.exit(-1);
        }

        IntWritable key = null, newKey = null;
        VectorWritable point = null, newPoint = null;
        Vector vector = null;
        Object[] vals = MPI_D.Recv();
        while (vals != null) {
            newKey = (IntWritable) vals[0];
            newPoint = (VectorWritable) vals[1];
            if (key == null && point == null) {
            } else if (!key.equals(newKey)) {
                outrw.write(key, new VectorWritable(vector));
                vector = null;
            }
            if (vector == null) {
                vector = newPoint.get();
            } else {
                vector.assign(newPoint.get(), Functions.PLUS);
            }

            key = newKey;
            point = newPoint;
            vals = MPI_D.Recv();
        }
        if (newKey != null && newPoint != null) {
            outrw.write(key, new VectorWritable(vector));
        }

        outrw.close(null);
        if (fcommitter.needsTaskCommit(taskContext)) {
            fcommitter.commitTask(taskContext);
        }
    }

    MPI_D.Finalize();
}

From source file:mlbench.bayes.train.WeightSummer.java

License:Apache License

@SuppressWarnings("deprecation")
public static void main(String[] args) throws MPI_D_Exception, IOException, MPIException {
    parseArgs(args);/*from w w  w  . j av a  2s . c o m*/
    HashMap<String, String> conf = new HashMap<String, String>();
    initConf(conf);
    MPI_D.Init(args, MPI_D.Mode.Common, conf);
    if (MPI_D.COMM_BIPARTITE_O != null) {

        int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
        int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);
        FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O,
                (JobConf) config, inDir, rank);
        Vector weightsPerFeature = null;
        Vector weightsPerLabel = new DenseVector(labNum);

        for (int i = 0; i < inputs.length; i++) {
            FileSplit fsplit = inputs[i];
            SequenceFileRecordReader<IntWritable, VectorWritable> kvrr = new SequenceFileRecordReader<>(config,
                    fsplit);
            IntWritable index = kvrr.createKey();
            VectorWritable value = kvrr.createValue();
            while (kvrr.next(index, value)) {
                Vector instance = value.get();
                if (weightsPerFeature == null) {
                    weightsPerFeature = new RandomAccessSparseVector(instance.size(),
                            instance.getNumNondefaultElements());
                }

                int label = index.get();
                weightsPerFeature.assign(instance, Functions.PLUS);
                weightsPerLabel.set(label, weightsPerLabel.get(label) + instance.zSum());
            }
        }
        if (weightsPerFeature != null) {
            MPI_D.Send(new Text(WEIGHTS_PER_FEATURE), new VectorWritable(weightsPerFeature));
            MPI_D.Send(new Text(WEIGHTS_PER_LABEL), new VectorWritable(weightsPerLabel));
        }
    } else if (MPI_D.COMM_BIPARTITE_A != null) {
        int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A);
        config.set(MAPRED_OUTPUT_DIR, outDirW);
        config.set("mapred.task.id", DataMPIUtil.getHadoopTaskAttemptID().toString().toString());
        ((JobConf) config).setOutputKeyClass(Text.class);
        ((JobConf) config).setOutputValueClass(VectorWritable.class);
        TaskAttemptContext taskContext = new TaskAttemptContextImpl(config,
                DataMPIUtil.getHadoopTaskAttemptID());
        SequenceFileOutputFormat<Text, VectorWritable> outfile = new SequenceFileOutputFormat<>();
        FileSystem fs = FileSystem.get(config);

        Path output = new Path(config.get(MAPRED_OUTPUT_DIR));
        FileOutputCommitter fcommitter = new FileOutputCommitter(output, taskContext);
        RecordWriter<Text, VectorWritable> outrw = null;
        try {
            fcommitter.setupJob(taskContext);
            outrw = outfile.getRecordWriter(fs, (JobConf) config, getOutputName(rank), null);
        } catch (IOException e) {
            e.printStackTrace();
            System.err.println("ERROR: Please set the HDFS configuration properly\n");
            System.exit(-1);
        }

        Text key = null, newKey = null;
        VectorWritable point = null, newPoint = null;
        Vector vector = null;
        Object[] vals = MPI_D.Recv();
        while (vals != null) {
            newKey = (Text) vals[0];
            newPoint = (VectorWritable) vals[1];
            if (key == null && point == null) {
            } else if (!key.equals(newKey)) {
                outrw.write(key, new VectorWritable(vector));
                vector = null;
            }
            if (vector == null) {
                vector = newPoint.get();
            } else {
                vector.assign(newPoint.get(), Functions.PLUS);
            }

            key = newKey;
            point = newPoint;
            vals = MPI_D.Recv();
        }
        if (newKey != null && newPoint != null) {
            outrw.write(key, new VectorWritable(vector));
        }

        outrw.close(null);
        if (fcommitter.needsTaskCommit(taskContext)) {
            fcommitter.commitTask(taskContext);
        }

        MPI_D.COMM_BIPARTITE_A.Barrier();
        if (rank == 0) {
            Path resOut = new Path(outDir);
            NaiveBayesModel naiveBayesModel = BayesUtils.readModelFromDir(new Path(outDir), config);
            naiveBayesModel.serialize(resOut, config);
        }
    }

    MPI_D.Finalize();
}

From source file:org.goldenorb.OrbPartition.java

License:Apache License

private void dumpData() {
    Configuration conf = new Configuration();
    Job job = null;/*from  www. j ava 2 s.  c om*/
    JobContext jobContext = null;
    TaskAttemptContext tao = null;
    RecordWriter rw;
    VertexWriter vw;
    FileOutputFormat outputFormat;

    boolean tryAgain = true;
    int count = 0;
    while (tryAgain && count < 15)
        try {
            count++;
            tryAgain = false;
            if (job == null) {
                job = new Job(conf);
                job.setOutputFormatClass(TextOutputFormat.class);
                FileOutputFormat.setOutputPath(job,
                        new Path(new String(getOrbConf().getNameNode() + getOrbConf().getFileOutputPath())));
            }
            if (jobContext == null) {
                jobContext = new JobContext(job.getConfiguration(), new JobID());
            }

            System.out.println(jobContext.getConfiguration().get("mapred.output.dir"));

            tao = new TaskAttemptContext(jobContext.getConfiguration(),
                    new TaskAttemptID(new TaskID(jobContext.getJobID(), true, getPartitionID()), 0));
            outputFormat = (FileOutputFormat) tao.getOutputFormatClass().newInstance();
            rw = outputFormat.getRecordWriter(tao);
            vw = (VertexWriter) getOrbConf().getVertexOutputFormatClass().newInstance();
            for (Vertex v : vertices.values()) {
                OrbContext oc = vw.vertexWrite(v);
                rw.write(oc.getKey(), oc.getValue());
                // orbLogger.info("Partition: " + Integer.toString(partitionId) + "writing: " +
                // oc.getKey().toString() + ", " + oc.getValue().toString());
            }
            rw.close(tao);

            FileOutputCommitter cm = (FileOutputCommitter) outputFormat.getOutputCommitter(tao);
            if (cm.needsTaskCommit(tao)) {
                cm.commitTask(tao);
                cm.cleanupJob(jobContext);
            } else {
                cm.cleanupJob(jobContext);
                tryAgain = true;
            }

        } catch (IOException e) {
            tryAgain = true;
            e.printStackTrace();
        } catch (InstantiationException e) {
            tryAgain = true;
            e.printStackTrace();
        } catch (IllegalAccessException e) {
            tryAgain = true;
            e.printStackTrace();
        } catch (ClassNotFoundException e) {
            tryAgain = true;
            e.printStackTrace();
        } catch (InterruptedException e) {
            tryAgain = true;
            e.printStackTrace();
        }
    if (tryAgain) {
        synchronized (this) {
            try {
                wait(1000);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelLowFootPrintSAXTest.java

License:Apache License

@Test
public void writeExcelOutputFormatExcel2013SingleSheetLowFootprint() throws IOException, InterruptedException {
    // one row string and three columns ("test1","test2","test3")
    // (String formattedValue, String comment, String formula, String address,String
    // sheetName)
    SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1");
    SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1");
    SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1");
    // empty row => nothing todo
    // one row numbers (1,2,3)
    SpreadSheetCellDAO a3 = new SpreadSheetCellDAO("", "", "1", "A3", "Sheet1");
    SpreadSheetCellDAO b3 = new SpreadSheetCellDAO("", "", "2", "B3", "Sheet1");
    SpreadSheetCellDAO c3 = new SpreadSheetCellDAO("", "", "3", "C3", "Sheet1");
    // one row formulas (=A3+B3)
    SpreadSheetCellDAO a4 = new SpreadSheetCellDAO("", "", "A3+B3", "A4", "Sheet1");
    // write/* w w w.  ja v a2  s .co m*/
    Job job = Job.getInstance();
    Configuration conf = job.getConfiguration();

    String fileName = "excel2013singlesheettestout";
    String tmpDir = tmpPath.toString();
    Path outputPath = new Path(tmpDir);
    conf.set("mapreduce.output.basename", fileName);

    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");

    // low footprint
    conf.set("hadoopoffice.write.lowFootprint", "true");
    conf.set("hadoopoffice.write.mimeType",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new
    // Excel
    // format,
    // anyway
    // default,
    // but
    // here
    // for
    // illustrative
    // purposes
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1);
    FileOutputFormat.setOutputPath(job, outputPath);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());

    TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter(outputPath, context);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(context);
    // set generic outputformat settings
    ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat();
    RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context);
    assertNotNull(writer, "Format returned  null RecordWriter");
    writer.write(null, a1);
    writer.write(null, b1);
    writer.write(null, c1);
    writer.write(null, a3);
    writer.write(null, b3);
    writer.write(null, c3);
    writer.write(null, a4);
    writer.close(context);
    committer.commitTask(context);
    // try to read it again
    conf = new Configuration(defaultConf);
    job = Job.getInstance(conf);
    fileName = fileName + this.outputbaseAppendix;
    Path inputFile = new Path(tmpDir + File.separator + "_temporary" + File.separator + "0" + File.separator
            + taskAttempt + File.separator + fileName + ".xlsx");
    FileInputFormat.setInputPaths(job, inputFile);
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    ExcelFileInputFormat inputFormat = new ExcelFileInputFormat();
    FileInputFormat.setInputPaths(job, inputFile);
    context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    List<InputSplit> splits = inputFormat.getSplits(job);
    assertEquals(1, splits.size(), "Only one split generated for Excel file");
    RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context);
    assertNotNull(reader, "Format returned  null RecordReader");
    reader.initialize(splits.get(0), context);
    Text spreadSheetKey = new Text();
    ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals("[" + fileName + ".xlsx]Sheet1!A1", spreadSheetKey.toString(),
            "Input Split for Excel file has keyname == \"[" + fileName + ".xlsx]Sheet1!A1\"");
    assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 3 columns");
    assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 1 == \"test1\"");
    assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
    assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 3 == \"test3\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 2");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(0, spreadSheetValue.get().length, "Input Split for Excel file contain row 2 and is empty");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 3");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contain row 3 with 3 columns");
    assertEquals("1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 3 with cell 1 == \"1\"");
    assertEquals("2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 3 with cell 2 == \"2\"");
    assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
            "Input Split for Excel file contains row 3 with cell 3 == \"3\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 4");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contain row 4 with 1 column");
    assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 3 with cell 1 == \"3\"");
}

From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelLowFootPrintSAXTest.java

License:Apache License

@Test
public void writeExcelOutputFormatExcel2013SingleSheetEncryptedPositiveLowFootprint()
        throws IOException, InterruptedException {
    // one row string and three columns ("test1","test2","test3")
    // (String formattedValue, String comment, String formula, String address,String
    // sheetName)
    SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1");
    SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1");
    SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1");
    // empty row => nothing todo
    // one row numbers (1,2,3)
    SpreadSheetCellDAO a3 = new SpreadSheetCellDAO("", "", "1", "A3", "Sheet1");
    SpreadSheetCellDAO b3 = new SpreadSheetCellDAO("", "", "2", "B3", "Sheet1");
    SpreadSheetCellDAO c3 = new SpreadSheetCellDAO("", "", "3", "C3", "Sheet1");
    // one row formulas (=A3+B3)
    SpreadSheetCellDAO a4 = new SpreadSheetCellDAO("", "", "A3+B3", "A4", "Sheet1");
    // write/*  w ww .  jav a2s .  c om*/
    Job job = Job.getInstance();
    Configuration conf = job.getConfiguration();

    String fileName = "excel2013singlesheettestoutencryptedpositivelowfootprint";
    String tmpDir = tmpPath.toString();
    Path outputPath = new Path(tmpDir);
    conf.set("mapreduce.output.basename", fileName);
    // set generic outputformat settings
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");

    // low footprint
    conf.set("hadoopoffice.write.lowFootprint", "true");
    conf.set("hadoopoffice.write.mimeType",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new
    // excel
    // format
    // security
    // for the new Excel format you need to decide on your own which algorithms are
    // secure
    conf.set("hadoopoffice.write.security.crypt.encrypt.mode", "agile");
    conf.set("hadoopoffice.write.security.crypt.encrypt.algorithm", "aes256");
    conf.set("hadoopoffice.write.security.crypt.chain.mode", "cbc");
    conf.set("hadoopoffice.write.security.crypt.hash.algorithm", "sha512");
    conf.set("hadoopoffice.write.security.crypt.password", "test");
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1);
    FileOutputFormat.setOutputPath(job, outputPath);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());

    TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter(outputPath, context);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(context);
    ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat();
    RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context);
    assertNotNull(writer, "Format returned  null RecordWriter");
    writer.write(null, a1);
    writer.write(null, b1);
    writer.write(null, c1);
    writer.write(null, a3);
    writer.write(null, b3);
    writer.write(null, c3);
    writer.write(null, a4);
    writer.close(context);
    committer.commitTask(context);
    // try to read it again
    conf = new Configuration(defaultConf);
    job = Job.getInstance(conf);
    fileName = fileName + this.outputbaseAppendix;
    Path inputFile = new Path(tmpDir + File.separator + "_temporary" + File.separator + "0" + File.separator
            + taskAttempt + File.separator + fileName + ".xlsx");
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    // you just need to provide the password to read encrypted data
    conf.set("hadoopoffice.read.security.crypt.password", "test");
    ExcelFileInputFormat inputFormat = new ExcelFileInputFormat();
    FileInputFormat.setInputPaths(job, inputFile);
    context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    List<InputSplit> splits = inputFormat.getSplits(job);
    assertEquals(1, splits.size(), "Only one split generated for Excel file");
    RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context);
    assertNotNull(reader, "Format returned  null RecordReader");
    reader.initialize(splits.get(0), context);
    Text spreadSheetKey = new Text();
    ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals("[" + fileName + ".xlsx]Sheet1!A1", spreadSheetKey.toString(),
            "Input Split for Excel file has keyname == \"[" + fileName + ".xlsx]Sheet1!A1\"");
    assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 3 columns");
    assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 1 == \"test1\"");
    assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
    assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 3 == \"test3\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 2");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(0, spreadSheetValue.get().length, "Input Split for Excel file contain row 2 and is empty");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 3");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contain row 3 with 3 columns");
    assertEquals("1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 3 with cell 1 == \"1\"");
    assertEquals("2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 3 with cell 2 == \"2\"");
    assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
            "Input Split for Excel file contains row 3 with cell 3 == \"3\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 4");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contain row 4 with 1 column");
    assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 3 with cell 1 == \"3\"");
}

From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelLowFootPrintSAXTest.java

License:Apache License

@Test
public void writeExcelOutputFormatExcel2013SingleSheetEncryptedNegativeLowFootprint()
        throws IOException, InterruptedException {
    // one row string and three columns ("test1","test2","test3")
    // (String formattedValue, String comment, String formula, String address,String
    // sheetName)
    SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1");
    SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1");
    SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1");
    // empty row => nothing todo
    // one row numbers (1,2,3)
    SpreadSheetCellDAO a3 = new SpreadSheetCellDAO("", "", "1", "A3", "Sheet1");
    SpreadSheetCellDAO b3 = new SpreadSheetCellDAO("", "", "2", "B3", "Sheet1");
    SpreadSheetCellDAO c3 = new SpreadSheetCellDAO("", "", "3", "C3", "Sheet1");
    // one row formulas (=A3+B3)
    SpreadSheetCellDAO a4 = new SpreadSheetCellDAO("", "", "A3+B3", "A4", "Sheet1");
    // write/*from ww w.  ja v a 2 s  .c o m*/
    Job job = Job.getInstance();
    Configuration conf = job.getConfiguration();
    String fileName = "excel2013singlesheettestoutencryptednegativelowfootprint";
    String tmpDir = tmpPath.toString();
    Path outputPath = new Path(tmpDir);
    FileOutputFormat.setOutputPath(job, outputPath);
    conf.set("mapreduce.output.basename", fileName);
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");

    // low footprint
    conf.set("hadoopoffice.write.lowFootprint", "true");
    conf.set("hadoopoffice.write.mimeType",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new
    // excel
    // format
    // security
    // for the new Excel format you need to decide on your own which algorithms are
    // secure
    conf.set("hadoopoffice.write.security.crypt.encrypt.mode", "agile");
    conf.set("hadoopoffice.write.security.crypt.encrypt.algorithm", "aes256");
    conf.set("hadoopoffice.write.security.crypt.chain.mode", "cbc");
    conf.set("hadoopoffice.write.security.crypt.hash.algorithm", "sha512");
    conf.set("hadoopoffice.write.security.crypt.password", "test");
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1);
    FileOutputFormat.setOutputPath(job, outputPath);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());

    TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter(outputPath, context);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(context);
    ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat();
    RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context);
    assertNotNull(writer, "Format returned  null RecordWriter");
    writer.write(null, a1);
    writer.write(null, b1);
    writer.write(null, c1);
    writer.write(null, a3);
    writer.write(null, b3);
    writer.write(null, c3);
    writer.write(null, a4);
    writer.close(context);
    committer.commitTask(context);
    // try to read it again
    conf = new Configuration(defaultConf);
    job = Job.getInstance(conf);
    fileName = fileName + this.outputbaseAppendix;
    Path inputFile = new Path(tmpDir + File.separator + "_temporary" + File.separator + "0" + File.separator
            + taskAttempt + File.separator + fileName + ".xlsx");
    FileInputFormat.setInputPaths(job, inputFile);
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    // you just need to provide the password to read encrypted data
    conf.set("hadoopoffice.read.security.crypt.password", "test2");
    ExcelFileInputFormat inputFormat = new ExcelFileInputFormat();
    TaskAttemptContext context2 = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    List<InputSplit> splits = inputFormat.getSplits(job);
    assertEquals(1, splits.size(), "Only one split generated for Excel file");
    RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context2);

    InterruptedException ex = assertThrows(InterruptedException.class,
            () -> reader.initialize(splits.get(0), context2), "Exception is thrown in case of wrong password");
}

From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelLowFootPrintSAXTest.java

License:Apache License

@Test
public void writeExcelOutputFormatExcel2013SingleSheetSignedPositive()
        throws IOException, InterruptedException {
    // one row string and three columns ("test1","test2","test3")
    // (String formattedValue, String comment, String formula, String address,String
    // sheetName)
    SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1");
    SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1");
    SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1");
    // empty row => nothing todo
    // one row numbers (1,2,3)
    SpreadSheetCellDAO a3 = new SpreadSheetCellDAO("", "", "1", "A3", "Sheet1");
    SpreadSheetCellDAO b3 = new SpreadSheetCellDAO("", "", "2", "B3", "Sheet1");
    SpreadSheetCellDAO c3 = new SpreadSheetCellDAO("", "", "3", "C3", "Sheet1");
    // one row formulas (=A3+B3)
    SpreadSheetCellDAO a4 = new SpreadSheetCellDAO("", "", "A3+B3", "A4", "Sheet1");
    // write/*  w ww  . java 2  s.c  om*/
    Job job = Job.getInstance();
    Configuration conf = job.getConfiguration();

    String fileName = "excel2013singlesheettestoutsignedpositive";
    String tmpDir = tmpPath.toString();
    Path outputPath = new Path(tmpDir);
    conf.set("mapreduce.output.basename", fileName);

    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    conf.set("hadoopoffice.write.mimeType",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new
    // Excel
    // format,
    // anyway
    // default,
    // but
    // here
    // for
    // illustrative
    // purposes
    /// signature
    String pkFileName = "testsigning.pfx"; // private key
    ClassLoader classLoader = getClass().getClassLoader();
    String fileNameKeyStore = classLoader.getResource(pkFileName).getFile();

    conf.set("hadoopoffice.write.security.sign.keystore.file", fileNameKeyStore);
    conf.set("hadoopoffice.write.security.sign.keystore.type", "PKCS12");
    conf.set("hadoopoffice.write.security.sign.keystore.password", "changeit");
    conf.set("hadoopoffice.write.security.sign.keystore.alias", "testalias");
    conf.set("hadoopoffice.write.security.sign.hash.algorithm", "sha512");
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1);
    FileOutputFormat.setOutputPath(job, outputPath);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());

    TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter(outputPath, context);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(context);
    // set generic outputformat settings
    ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat();
    RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context);
    assertNotNull(writer, "Format returned  null RecordWriter");
    writer.write(null, a1);
    writer.write(null, b1);
    writer.write(null, c1);
    writer.write(null, a3);
    writer.write(null, b3);
    writer.write(null, c3);
    writer.write(null, a4);
    writer.close(context);
    committer.commitTask(context);
    // try to read it again
    conf = new Configuration(defaultConf);
    job = Job.getInstance(conf);
    fileName = fileName + this.outputbaseAppendix;
    Path inputFile = new Path(tmpDir + File.separator + "_temporary" + File.separator + "0" + File.separator
            + taskAttempt + File.separator + fileName + ".xlsx");
    FileInputFormat.setInputPaths(job, inputFile);
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    conf.set("hadoopoffice.read.security.sign.verifysignature", "true");
    ExcelFileInputFormat inputFormat = new ExcelFileInputFormat();
    FileInputFormat.setInputPaths(job, inputFile);
    context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    List<InputSplit> splits = inputFormat.getSplits(job);
    assertEquals(1, splits.size(), "Only one split generated for Excel file");
    RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context);
    assertNotNull(reader, "Format returned  null RecordReader");
    reader.initialize(splits.get(0), context);
    Text spreadSheetKey = new Text();
    ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals("[" + fileName + ".xlsx]Sheet1!A1", spreadSheetKey.toString(),
            "Input Split for Excel file has keyname == \"[" + fileName + ".xlsx]Sheet1!A1\"");
    assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 3 columns");
    assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 1 == \"test1\"");
    assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
    assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 3 == \"test3\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 2");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(0, spreadSheetValue.get().length, "Input Split for Excel file contain row 2 and is empty");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 3");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contain row 3 with 3 columns");
    assertEquals("1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 3 with cell 1 == \"1\"");
    assertEquals("2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 3 with cell 2 == \"2\"");
    assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
            "Input Split for Excel file contains row 3 with cell 3 == \"3\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 4");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contain row 4 with 1 column");
    assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 3 with cell 1 == \"3\"");
}

From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelLowFootPrintSAXTest.java

License:Apache License

@Test
public void writeExcelOutputFormatExcel2013SingleSheetLowFootprintSignedPositive()
        throws IOException, InterruptedException {
    // one row string and three columns ("test1","test2","test3")
    // (String formattedValue, String comment, String formula, String address,String
    // sheetName)
    SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1");
    SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1");
    SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1");
    // empty row => nothing todo
    // one row numbers (1,2,3)
    SpreadSheetCellDAO a3 = new SpreadSheetCellDAO("", "", "1", "A3", "Sheet1");
    SpreadSheetCellDAO b3 = new SpreadSheetCellDAO("", "", "2", "B3", "Sheet1");
    SpreadSheetCellDAO c3 = new SpreadSheetCellDAO("", "", "3", "C3", "Sheet1");
    // one row formulas (=A3+B3)
    SpreadSheetCellDAO a4 = new SpreadSheetCellDAO("", "", "A3+B3", "A4", "Sheet1");
    // write//from  w w w . j  a v  a2  s  .  c  o  m
    Job job = Job.getInstance();
    Configuration conf = job.getConfiguration();

    String fileName = "excel2013singlesheettestoutsignedpositive";
    String tmpDir = tmpPath.toString();
    Path outputPath = new Path(tmpDir);
    conf.set("mapreduce.output.basename", fileName);

    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");

    // low footprint
    conf.set("hadoopoffice.write.lowFootprint", "true");
    conf.set("hadoopoffice.write.mimeType",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new
    // Excel
    // format,
    // anyway
    // default,
    // but
    // here
    // for
    // illustrative
    // purposes
    /// signature
    String pkFileName = "testsigning.pfx"; // private key
    ClassLoader classLoader = getClass().getClassLoader();
    String fileNameKeyStore = classLoader.getResource(pkFileName).getFile();

    conf.set("hadoopoffice.write.security.sign.keystore.file", fileNameKeyStore);
    conf.set("hadoopoffice.write.security.sign.keystore.type", "PKCS12");
    conf.set("hadoopoffice.write.security.sign.keystore.password", "changeit");
    conf.set("hadoopoffice.write.security.sign.keystore.alias", "testalias");
    conf.set("hadoopoffice.write.security.sign.hash.algorithm", "sha512");
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1);
    FileOutputFormat.setOutputPath(job, outputPath);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());

    TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter(outputPath, context);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(context);
    // set generic outputformat settings
    ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat();
    RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context);
    assertNotNull(writer, "Format returned  null RecordWriter");
    writer.write(null, a1);
    writer.write(null, b1);
    writer.write(null, c1);
    writer.write(null, a3);
    writer.write(null, b3);
    writer.write(null, c3);
    writer.write(null, a4);
    writer.close(context);
    committer.commitTask(context);
    // try to read it again
    conf = new Configuration(defaultConf);
    job = Job.getInstance(conf);
    fileName = fileName + this.outputbaseAppendix;
    Path inputFile = new Path(tmpDir + File.separator + "_temporary" + File.separator + "0" + File.separator
            + taskAttempt + File.separator + fileName + ".xlsx");
    FileInputFormat.setInputPaths(job, inputFile);
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    conf.set("hadoopoffice.read.security.sign.verifysignature", "true");
    ExcelFileInputFormat inputFormat = new ExcelFileInputFormat();
    FileInputFormat.setInputPaths(job, inputFile);
    context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    List<InputSplit> splits = inputFormat.getSplits(job);
    assertEquals(1, splits.size(), "Only one split generated for Excel file");
    RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context);
    assertNotNull(reader, "Format returned  null RecordReader");
    reader.initialize(splits.get(0), context);
    Text spreadSheetKey = new Text();
    ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals("[" + fileName + ".xlsx]Sheet1!A1", spreadSheetKey.toString(),
            "Input Split for Excel file has keyname == \"[" + fileName + ".xlsx]Sheet1!A1\"");
    assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 3 columns");
    assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 1 == \"test1\"");
    assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
    assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 3 == \"test3\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 2");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(0, spreadSheetValue.get().length, "Input Split for Excel file contain row 2 and is empty");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 3");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contain row 3 with 3 columns");
    assertEquals("1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 3 with cell 1 == \"1\"");
    assertEquals("2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 3 with cell 2 == \"2\"");
    assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
            "Input Split for Excel file contains row 3 with cell 3 == \"3\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 4");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contain row 4 with 1 column");
    assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 3 with cell 1 == \"3\"");
}

From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelLowFootPrintSAXTest.java

License:Apache License

@Test
public void writeExcelOutputFormatExcel2013SingleSheetLowFootprintSignedNegative()
        throws IOException, InterruptedException {
    // one row string and three columns ("test1","test2","test3")
    // (String formattedValue, String comment, String formula, String address,String
    // sheetName)
    SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1");
    SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1");
    SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1");
    // empty row => nothing todo
    // one row numbers (1,2,3)
    SpreadSheetCellDAO a3 = new SpreadSheetCellDAO("", "", "1", "A3", "Sheet1");
    SpreadSheetCellDAO b3 = new SpreadSheetCellDAO("", "", "2", "B3", "Sheet1");
    SpreadSheetCellDAO c3 = new SpreadSheetCellDAO("", "", "3", "C3", "Sheet1");
    // one row formulas (=A3+B3)
    SpreadSheetCellDAO a4 = new SpreadSheetCellDAO("", "", "A3+B3", "A4", "Sheet1");
    // write/*from   www . j a va  2  s.  c  o  m*/
    Job job = Job.getInstance();
    Configuration conf = job.getConfiguration();

    String fileName = "excel2013singlesheettestoutsignednegative";
    String tmpDir = tmpPath.toString();
    Path outputPath = new Path(tmpDir);
    conf.set("mapreduce.output.basename", fileName);

    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");

    // low footprint
    conf.set("hadoopoffice.write.lowFootprint", "true");
    conf.set("hadoopoffice.write.mimeType",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new
    // Excel
    // format,
    // anyway
    // default,
    // but
    // here
    // for
    // illustrative
    // purposes

    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1);
    FileOutputFormat.setOutputPath(job, outputPath);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());

    TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter(outputPath, context);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(context);
    // set generic outputformat settings
    ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat();
    RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context);
    assertNotNull(writer, "Format returned  null RecordWriter");
    writer.write(null, a1);
    writer.write(null, b1);
    writer.write(null, c1);
    writer.write(null, a3);
    writer.write(null, b3);
    writer.write(null, c3);
    writer.write(null, a4);
    writer.close(context);
    committer.commitTask(context);
    // try to read it again
    conf = new Configuration(defaultConf);
    job = Job.getInstance(conf);
    fileName = fileName + this.outputbaseAppendix;
    Path inputFile = new Path(tmpDir + File.separator + "_temporary" + File.separator + "0" + File.separator
            + taskAttempt + File.separator + fileName + ".xlsx");
    FileInputFormat.setInputPaths(job, inputFile);
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    conf.set("hadoopoffice.read.security.sign.verifysignature", "true"); // will fail no signature present
    ExcelFileInputFormat inputFormat = new ExcelFileInputFormat();
    FileInputFormat.setInputPaths(job, inputFile);
    TaskAttemptContext context2 = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    List<InputSplit> splits = inputFormat.getSplits(job);
    assertEquals(1, splits.size(), "Only one split generated for Excel file");
    RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context2);
    InterruptedException ex = assertThrows(InterruptedException.class,
            () -> reader.initialize(splits.get(0), context2),
            "Exception is thrown in case signature cannot be verified");

}