Example usage for org.apache.hadoop.mapreduce RecordWriter close

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordWriter close.

Prototype

public abstract void close(TaskAttemptContext context) throws IOException, InterruptedException;

Source Link

Document

Close this RecordWriter to future operations.

Usage

From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelNormalTest.java

License:Apache License

@Test
public void writeExcelOutputFormatExcel2013SingleSheetEncryptedPositiveSignedNegative()
        throws IOException, InterruptedException {
    // one row string and three columns ("test1","test2","test3")
    // (String formattedValue, String comment, String formula, String address,String
    // sheetName)
    SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1");
    SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1");
    SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1");
    // empty row => nothing todo
    // one row numbers (1,2,3)
    SpreadSheetCellDAO a3 = new SpreadSheetCellDAO("", "", "1", "A3", "Sheet1");
    SpreadSheetCellDAO b3 = new SpreadSheetCellDAO("", "", "2", "B3", "Sheet1");
    SpreadSheetCellDAO c3 = new SpreadSheetCellDAO("", "", "3", "C3", "Sheet1");
    // one row formulas (=A3+B3)
    SpreadSheetCellDAO a4 = new SpreadSheetCellDAO("", "", "A3+B3", "A4", "Sheet1");
    // write/*from  w  w w .  ja  v  a  2  s. c  o  m*/
    Job job = Job.getInstance();
    Configuration conf = job.getConfiguration();

    String fileName = "excel2013singlesheettestoutencryptedpositivesignednegative";
    String tmpDir = tmpPath.toString();
    Path outputPath = new Path(tmpDir);
    conf.set("mapreduce.output.basename", fileName);
    // set generic outputformat settings
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    conf.set("hadoopoffice.write.mimeType",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new
    // excel
    // format
    // security
    // for the new Excel format you need to decide on your own which algorithms are
    // secure
    conf.set("hadoopoffice.write.security.crypt.encrypt.mode", "agile");
    conf.set("hadoopoffice.write.security.crypt.encrypt.algorithm", "aes256");
    conf.set("hadoopoffice.write.security.crypt.chain.mode", "cbc");
    conf.set("hadoopoffice.write.security.crypt.hash.algorithm", "sha512");
    conf.set("hadoopoffice.write.security.crypt.password", "test");

    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1);
    FileOutputFormat.setOutputPath(job, outputPath);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());

    TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter(outputPath, context);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(context);
    ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat();
    RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context);
    assertNotNull(writer, "Format returned  null RecordWriter");
    writer.write(null, a1);
    writer.write(null, b1);
    writer.write(null, c1);
    writer.write(null, a3);
    writer.write(null, b3);
    writer.write(null, c3);
    writer.write(null, a4);
    writer.close(context);
    committer.commitTask(context);
    // try to read it again
    conf = new Configuration(defaultConf);
    job = Job.getInstance(conf);
    fileName = fileName + this.outputbaseAppendix;
    Path inputFile = new Path(tmpDir + File.separator + "_temporary" + File.separator + "0" + File.separator
            + taskAttempt + File.separator + fileName + ".xlsx");
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    // you just need to provide the password to read encrypted data
    conf.set("hadoopoffice.read.security.crypt.password", "test");

    conf.set("hadoopoffice.read.security.sign.verifysignature", "true"); // will fail no signature provided
    ExcelFileInputFormat inputFormat = new ExcelFileInputFormat();
    FileInputFormat.setInputPaths(job, inputFile);
    TaskAttemptContext context2 = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    List<InputSplit> splits = inputFormat.getSplits(job);
    assertEquals(1, splits.size(), "Only one split generated for Excel file");
    RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context2);
    InterruptedException ex = assertThrows(InterruptedException.class,
            () -> reader.initialize(splits.get(0), context2),
            "Exception is thrown in case signature cannot be verified");
}

From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelNormalTest.java

License:Apache License

@Disabled("This does not work yet due to a bug in Apache POI that prevents writing correct workbooks containing external references: https://bz.apache.org/bugzilla/show_bug.cgi?id=57184")
@Test//w w  w  . j av  a  2  s  . c  o  m
public void writeExcelOutputFormatExcel2013SingleSheetOneLinkedWorkbook()
        throws IOException, InterruptedException {
    // write linkedworkbook1
    // one row string and three columns ("test1","test2","test3")
    // (String formattedValue, String comment, String formula, String address,String
    // sheetName)
    SpreadSheetCellDAO wb1a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1");
    SpreadSheetCellDAO wb1b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1");
    SpreadSheetCellDAO wb1c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1");
    // write
    Job job = Job.getInstance();
    Configuration conf = job.getConfiguration();
    String linkedWB1FileName = "excel2013linkedwb1";
    String tmpDir = tmpPath.toString();
    Path outputPath = new Path(tmpDir);
    FileOutputFormat.setOutputPath(job, outputPath);
    conf.set("mapreduce.output.basename", linkedWB1FileName);

    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    conf.set("hadoopoffice.write.mimeType",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new
    // Excel
    // format,
    // anyway
    // default,
    // but
    // here
    // for
    // illustrative
    // purposes
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1);
    FileOutputFormat.setOutputPath(job, outputPath);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());

    TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter(outputPath, context);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(context);
    ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat();
    RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context);
    assertNotNull(writer, "Format returned  null RecordWriter");
    writer.write(null, wb1a1);
    writer.write(null, wb1b1);
    writer.write(null, wb1c1);
    writer.close(context);
    committer.commitTask(context);
    committer.commitJob(jContext);
    // write mainworkbook
    linkedWB1FileName = linkedWB1FileName + this.outputbaseAppendix;
    String linkedWorkbookFilename = "[" + tmpDir + File.separator + linkedWB1FileName + ".xlsx]";
    SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test4", "", "", "A1", "Sheet1");
    SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("", "", "[" + linkedWB1FileName + ".xlsx]Sheet1!B1", "B1",
            "Sheet1"); // should be test2 in the end
    // write
    job = Job.getInstance();
    conf = job.getConfiguration();

    String mainWBfileName = "excel2013singlesheetlinkedwbtestout";
    outputPath = new Path(tmpDir);
    FileOutputFormat.setOutputPath(job, outputPath);
    conf.set("mapreduce.output.basename", mainWBfileName);
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    conf.set("hadoopoffice.write.mimeType",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new
    // Excel
    // format,
    // anyway
    // default,
    // but
    // here
    // for
    // illustrative
    // purposes
    conf.set("hadoopoffice.write.linkedworkbooks", linkedWorkbookFilename);
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1);
    FileOutputFormat.setOutputPath(job, outputPath);
    jContext = new JobContextImpl(conf, taskID.getJobID());

    context = new TaskAttemptContextImpl(conf, taskID);
    committer = new FileOutputCommitter(outputPath, context);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(context);
    outputFormat = new ExcelFileOutputFormat();
    RecordWriter<NullWritable, SpreadSheetCellDAO> writerMain = outputFormat.getRecordWriter(context);
    assertNotNull(writerMain, "Format returned  null RecordWriter");
    writerMain.write(null, a1);
    writerMain.write(null, b1);
    writerMain.close(context);
    committer.commitTask(context);
    committer.commitJob(jContext);
    // try to read it again
    job = Job.getInstance(conf);
    mainWBfileName = mainWBfileName + this.outputbaseAppendix;
    Path inputFile = new Path(tmpDir + File.separator + mainWBfileName + ".xlsx");
    FileInputFormat.setInputPaths(job, inputFile);
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    // enable option to read linked workbooks
    conf.setBoolean("hadoopoffice.read.linkedworkbooks", true);
    conf.setBoolean("hadoopoffice.read.ignoremissinglinkedworkbooks", false);
    ExcelFileInputFormat inputFormat = new ExcelFileInputFormat();
    context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    List<InputSplit> splits = inputFormat.getSplits(job);
    assertEquals(1, splits.size(), "Only one split generated for Excel file");
    RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context);
    assertNotNull(reader, "Format returned  null RecordReader");
    reader.initialize(splits.get(0), context);
    Text spreadSheetKey = new Text();
    ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1 Sheet1");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals("[" + mainWBfileName + ".xlsx]Sheet1!A1", spreadSheetKey.toString(),
            "Input Split for Excel file has keyname == \"[" + mainWBfileName + ".xlsx]Sheet1!A1\"");
    assertEquals(2, spreadSheetValue.get().length,
            "Input Split for Excel file contains row 1 with 2 columns for Sheet1");
    assertEquals("test4", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 1 == \"test4\"");
    // this comes from the external workbook
    assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
}

From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelNormalTest.java

License:Apache License

@Disabled("This does not work yet due to a bug in Apache POI that prevents writing correct workbooks containing external references: https://bz.apache.org/bugzilla/show_bug.cgi?id=57184")
@Test/*from  w ww.  ja v a2  s  . co m*/
public void writeExcelOutputFormatExcel2013SingleSheetTwoLinkedWorkbooks()
        throws IOException, InterruptedException {
    // write linkedworkbook1
    // one row string and three columns ("test1","test2","test3")
    // (String formattedValue, String comment, String formula, String address,String
    // sheetName)
    SpreadSheetCellDAO wb1a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1");
    SpreadSheetCellDAO wb1b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1");
    SpreadSheetCellDAO wb1c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1");
    // write
    Job job = Job.getInstance();
    Configuration conf = job.getConfiguration();
    String linkedWB1FileName = "excel2013linkedwb1";
    String tmpDir = tmpPath.toString();
    Path outputPath = new Path(tmpDir);
    FileOutputFormat.setOutputPath(job, outputPath);
    conf.set("mapreduce.output.basename", linkedWB1FileName);
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    conf.set("hadoopoffice.write.mimeType",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new
    // Excel
    // format,
    // anyway
    // default,
    // but
    // here
    // for
    // illustrative
    // purposes
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1);
    FileOutputFormat.setOutputPath(job, outputPath);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());

    TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter(outputPath, context);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(context);
    ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat();
    RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context);
    assertNotNull(writer, "Format returned  null RecordWriter");
    writer.write(null, wb1a1);
    writer.write(null, wb1b1);
    writer.write(null, wb1c1);
    writer.close(context);
    committer.commitTask(context);
    committer.commitJob(jContext);
    // write linkedworkbook2
    // one row string and three columns ("test1","test2","test3")
    // (String formattedValue, String comment, String formula, String address,String
    // sheetName)
    SpreadSheetCellDAO wb2a1 = new SpreadSheetCellDAO("test4", "", "", "A1", "Sheet1");
    SpreadSheetCellDAO wb2b1 = new SpreadSheetCellDAO("test5", "", "", "B1", "Sheet1");
    SpreadSheetCellDAO wb2c1 = new SpreadSheetCellDAO("test6", "", "", "C1", "Sheet1");
    // write
    job = Job.getInstance();
    conf = job.getConfiguration();
    String linkedWB2FileName = "excel2013linkedwb2";
    outputPath = new Path(tmpDir);
    FileOutputFormat.setOutputPath(job, outputPath);
    conf.set("mapreduce.output.basename", linkedWB2FileName);
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    conf.set("hadoopoffice.write.mimeType",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new
    // Excel
    // format,
    // anyway
    // default,
    // but
    // here
    // for
    // illustrative
    // purposes
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1);
    FileOutputFormat.setOutputPath(job, outputPath);
    jContext = new JobContextImpl(conf, taskID.getJobID());

    context = new TaskAttemptContextImpl(conf, taskID);
    committer = new FileOutputCommitter(outputPath, context);
    // setup
    committer.commitTask(context);
    committer.commitJob(jContext);
    outputFormat = new ExcelFileOutputFormat();
    writer = outputFormat.getRecordWriter(context);
    assertNotNull(writer, "Format returned  null RecordWriter");
    writer.write(null, wb2a1);
    writer.write(null, wb2b1);
    writer.write(null, wb2c1);
    writer.close(context);
    committer.commitTask(context);
    committer.commitJob(jContext);
    // write mainworkbook
    linkedWB1FileName = linkedWB1FileName + this.outputbaseAppendix;
    linkedWB2FileName = linkedWB2FileName + this.outputbaseAppendix;
    String linkedWorkbookFilename = "[" + tmpDir + File.separator + linkedWB1FileName + ".xlsx]:[" + tmpDir
            + File.separator + linkedWB2FileName + ".xlsx]";
    SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test7", "", "", "A1", "Sheet1");
    SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("", "", "'[" + linkedWB1FileName + ".xlsx]Sheet1'!B1", "B1",
            "Sheet1"); // should be test2 in the end
    SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("", "", "'[" + linkedWB2FileName + ".xlsx]Sheet1'!B1", "B1",
            "Sheet1"); // should be test5 in the end
    // write
    job = Job.getInstance();
    conf = job.getConfiguration();
    String mainWBfileName = "excel2013singlesheetlinkedwbtestout";
    outputPath = new Path(tmpDir);
    FileOutputFormat.setOutputPath(job, outputPath);
    conf.set("mapreduce.output.basename", mainWBfileName);
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    conf.set("hadoopoffice.write.mimeType",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new
    // Excel
    // format,
    // anyway
    // default,
    // but
    // here
    // for
    // illustrative
    // purposes
    conf.set("hadoopoffice.write.linkedworkbooks", linkedWorkbookFilename);
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1);
    FileOutputFormat.setOutputPath(job, outputPath);
    jContext = new JobContextImpl(conf, taskID.getJobID());

    context = new TaskAttemptContextImpl(conf, taskID);
    committer = new FileOutputCommitter(outputPath, context);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(context);

    outputFormat = new ExcelFileOutputFormat();
    RecordWriter<NullWritable, SpreadSheetCellDAO> writerMain = outputFormat.getRecordWriter(context);
    assertNotNull(writerMain, "Format returned  null RecordWriter");
    writerMain.write(null, a1);
    writerMain.write(null, b1);
    writerMain.write(null, c1);
    writerMain.close(context);
    committer.commitTask(context);
    committer.commitJob(jContext);
    // try to read it again
    job = Job.getInstance(conf);
    mainWBfileName = mainWBfileName + this.outputbaseAppendix;
    Path inputFile = new Path(tmpDir + File.separator + mainWBfileName + ".xlsx");
    FileInputFormat.setInputPaths(job, inputFile);
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    // enable option to read linked workbooks
    conf.setBoolean("hadoopoffice.read.linkedworkbooks", true);
    conf.setBoolean("hadoopoffice.read.ignoremissinglinkedworkbooks", false);
    ExcelFileInputFormat inputFormat = new ExcelFileInputFormat();
    context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    List<InputSplit> splits = inputFormat.getSplits(job);
    assertEquals(1, splits.size(), "Only one split generated for Excel file");
    RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context);
    assertNotNull(reader, "Format returned  null RecordReader");
    reader.initialize(splits.get(0), context);
    Text spreadSheetKey = new Text();
    ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1 Sheet1");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals("[" + mainWBfileName + ".xlsx]Sheet1!A1", spreadSheetKey.toString(),
            "Input Split for Excel file has keyname == \"[" + mainWBfileName + ".xlsx]Sheet1!A1\"");
    assertEquals(2, spreadSheetValue.get().length,
            "Input Split for Excel file contains row 1 with 2 columns for Sheet1");
    assertEquals("test7", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 1 == \"test7\"");
    // this comes from the external workbook
    assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
    assertEquals("test5", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 3 == \"test5\"");
}

From source file:parquet.hadoop.TestMemoryManager.java

License:Apache License

@Test
public void testMemoryManager() throws Exception {
    //Verify the adjusted rowGroupSize of writers
    RecordWriter writer1 = createWriter(1);
    verifyRowGroupSize(rowGroupSize);//  w  w w .j ava 2  s.c  o  m

    RecordWriter writer2 = createWriter(2);
    verifyRowGroupSize(rowGroupSize);

    RecordWriter writer3 = createWriter(3);
    verifyRowGroupSize((int) Math.floor(expectPoolSize / 3));

    writer1.close(null);
    verifyRowGroupSize(rowGroupSize);

    writer2.close(null);
    verifyRowGroupSize(rowGroupSize);

    writer3.close(null);

    //Verify the memory pool
    Assert.assertEquals("memory pool size is incorrect.", expectPoolSize,
            parquetOutputFormat.getMemoryManager().getTotalMemoryPool());
}