Example usage for org.apache.hadoop.mapreduce.lib.output MultipleOutputs write

List of usage examples for org.apache.hadoop.mapreduce.lib.output MultipleOutputs write

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output MultipleOutputs write.

Prototype

@SuppressWarnings("unchecked")
public void write(KEYOUT key, VALUEOUT value, String baseOutputPath) throws IOException, InterruptedException 

Source Link

Document

Write key value to an output file name.

Usage

From source file:be.uantwerpen.adrem.bigfim.AprioriPhaseReducerTest.java

License:Apache License

private void addToOutput(MultipleOutputs<Text, Text> mos, String prefix, int support)
        throws IOException, InterruptedException {
    mos.write(new Text(prefix), new Text("" + support), "base/tg0/trieGroup-0");
    mos.write(new Text(prefix), new Text("" + support), "fis");
}

From source file:be.uantwerpen.adrem.bigfim.ComputeTidListReducerTest.java

License:Apache License

@Test
public void One_PG_One_Item() throws Exception {
    MultipleOutputs<IntArrayWritable, IntMatrixWritable> mos = createMock(MultipleOutputs.class);
    mos.write(newIAW(1), EmptyImw, "pg/bucket-0");
    mos.write(newIAW(0), new IntMatrixWritable(newIAW(0, 1, 2, 4, 7, 9), newIAW(0, 1, 2, 3, 5, 6, 8)),
            "pg/bucket-0");
    mos.write(EmptyIaw, EmptyImw, "pg/bucket-0");
    mos.close();/*  ww w. j a v a 2s  .c  om*/

    Reducer.Context ctx = createMock(Reducer.Context.class);
    EasyMock.expect(ctx.getConfiguration()).andReturn(createConfiguration()).anyTimes();
    EasyMock.expect(ctx.getTaskAttemptID()).andReturn(new TaskAttemptID()).anyTimes();

    EasyMock.replay(ctx, mos);

    ComputeTidListReducer reducer = new ComputeTidListReducer();
    reducer.setup(ctx);
    setField(reducer, "mos", mos);

    reducer.reduce(new Text("1"), createTestInput_1Item(), ctx);
    reducer.cleanup(ctx);

    EasyMock.verify(mos);
}

From source file:be.uantwerpen.adrem.bigfim.ComputeTidListReducerTest.java

License:Apache License

@Test
public void One_PG_N_Items() throws Exception {
    MultipleOutputs<IntArrayWritable, IntMatrixWritable> mos = createMock(MultipleOutputs.class);

    mos.write(newIAW(1), EmptyImw, "pg/bucket-0");
    mos.write(newIAW(0), new IntMatrixWritable(newIAW(0, 1, 2, 4, 7, 9), newIAW(0, 1, 2, 3, 5, 6, 8)),
            "pg/bucket-0");
    mos.write(newIAW(1), new IntMatrixWritable(newIAW(1, 2, 3), newIAW(4, 5, 6)), "pg/bucket-0");
    mos.write(newIAW(3), new IntMatrixWritable(newIAW(4, 7, 9), newIAW(4, 7, 9)), "pg/bucket-0");
    mos.write(EmptyIaw, EmptyImw, "pg/bucket-0");
    mos.close();//from w  w  w.  j a  v  a  2 s. c  o  m

    Reducer.Context ctx = createMock(Reducer.Context.class);
    EasyMock.expect(ctx.getConfiguration()).andReturn(createConfiguration()).anyTimes();
    EasyMock.expect(ctx.getTaskAttemptID()).andReturn(new TaskAttemptID()).anyTimes();

    EasyMock.replay(ctx, mos);

    ComputeTidListReducer reducer = new ComputeTidListReducer();
    reducer.setup(ctx);
    setField(reducer, "mos", mos);

    reducer.reduce(new Text("1"), createTestInput_NItems(), ctx);
    reducer.cleanup(ctx);

    EasyMock.verify(mos);
}

From source file:be.uantwerpen.adrem.bigfim.ComputeTidListReducerTest.java

License:Apache License

@Test
public void N_PG_N_Items() throws Exception {
    MultipleOutputs<IntArrayWritable, IntMatrixWritable> mos = createMock(MultipleOutputs.class);

    mos.write(newIAW(1), EmptyImw, "pg/bucket-0");
    mos.write(newIAW(0), new IntMatrixWritable(newIAW(0, 1, 2, 4, 7, 9), newIAW(0, 1, 2, 3, 5, 6, 8)),
            "pg/bucket-0");
    mos.write(newIAW(1), new IntMatrixWritable(newIAW(1, 2, 3), newIAW(4, 5, 6)), "pg/bucket-0");
    mos.write(newIAW(3), new IntMatrixWritable(newIAW(4, 7, 9), newIAW(4, 7, 9)), "pg/bucket-0");
    mos.write(EmptyIaw, EmptyImw, "pg/bucket-0");

    mos.write(newIAW(2), EmptyImw, "pg/bucket-1");
    mos.write(newIAW(1), new IntMatrixWritable(newIAW(1, 4, 7, 8), newIAW(1, 5, 6, 8)), "pg/bucket-1");
    mos.write(newIAW(2), new IntMatrixWritable(newIAW(3, 5, 7), newIAW(1, 2, 3, 4, 5, 6, 7, 8, 9)),
            "pg/bucket-1");
    mos.write(EmptyIaw, EmptyImw, "pg/bucket-1");
    mos.close();//  w  ww  .ja v a 2  s  . c  o m

    Reducer.Context ctx = createMock(Reducer.Context.class);
    EasyMock.expect(ctx.getConfiguration()).andReturn(createConfiguration()).anyTimes();
    EasyMock.expect(ctx.getTaskAttemptID()).andReturn(new TaskAttemptID()).anyTimes();

    EasyMock.replay(ctx, mos);

    ComputeTidListReducer reducer = new ComputeTidListReducer();
    reducer.setup(ctx);
    setField(reducer, "mos", mos);

    reducer.reduce(new Text("1"), createTestInput_NItems(), ctx);
    reducer.reduce(new Text("2"), createTestInput_NItems2(), ctx);
    reducer.cleanup(ctx);

    EasyMock.verify(mos);
}

From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.full.WARCWriterReducerClass.java

License:Apache License

/**
 * Writes single WARCWritable to the output with specific output file prefix
 *
 * @param warcWritable    warc record//from ww w . j a v  a2  s.co  m
 * @param multipleOutputs output
 * @throws IOException          exception
 * @throws InterruptedException exception
 */
// TODO move somewhere else?
public static void writeSingleWARCWritableToOutput(WARCWritable warcWritable,
        MultipleOutputs<NullWritable, WARCWritable> multipleOutputs) throws IOException, InterruptedException {
    WARCRecord.Header header = warcWritable.getRecord().getHeader();
    String license = header.getField(WARCRecord.WARCRecordFieldConstants.LICENSE);
    String language = header.getField(WARCRecord.WARCRecordFieldConstants.LANGUAGE);
    String noBoilerplate = header.getField(WARCRecord.WARCRecordFieldConstants.NO_BOILERPLATE);
    String minimalHtml = header.getField(WARCRecord.WARCRecordFieldConstants.MINIMAL_HTML);

    // set the file name prefix
    String fileName = createOutputFilePrefix(license, language, noBoilerplate, minimalHtml);

    // bottleneck of single reducer for all "Lic_none_Lang_en" pages (majority of Web)
    //        if ("en".equals(language) && LicenseDetector.NO_LICENCE.equals(license)) {
    //            long simHash = Long
    //                    .valueOf(header.getField(WARCRecord.WARCRecordFieldConstants.SIMHASH));
    //            int binNumber = getBinNumberFromSimHash(simHash);
    //            fileName = createOutputFilePrefix(license, language, noBoilerplate);
    //        }

    multipleOutputs.write(NullWritable.get(), warcWritable, fileName);
}

From source file:eu.scape_project.up2ti.output.SimpleKeyValueOutputWriter.java

License:Apache License

/**
 * Record method for hadoop job./*from  w w  w .  j  a  va 2 s.co m*/
 *
 * @param resultMap Result map where K: recordkey-identificationtype, V:
 * tool identificationtype identificationresult)
 * @param mos Multiple output writer
 */
@Override
public void write(HashMap<String, List<String>> resultMap, MultipleOutputs mos) {
    Iterator iter = resultMap.keySet().iterator();
    while (iter.hasNext()) {
        String key = (String) iter.next();
        List<String> valueList = resultMap.get(key);
        try {
            for (String value : valueList) {
                mos.write("idtab", new Text(key), new Text(value));
            }
        } catch (IOException ex) {
            LOG.error("I/O Error", ex);
        } catch (InterruptedException ex) {
            LOG.error("Interrupted Error", ex);
        }
    }
}