Example usage for org.apache.hadoop.mapreduce.lib.output MultipleOutputs write

List of usage examples for org.apache.hadoop.mapreduce.lib.output MultipleOutputs write


In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output MultipleOutputs write.


public void write(KEYOUT key, VALUEOUT value, String baseOutputPath) throws IOException, InterruptedException 

Source Link


Write key value to an output file name.


From source file:be.uantwerpen.adrem.bigfim.AprioriPhaseReducerTest.java

License:Apache License

private void addToOutput(MultipleOutputs<Text, Text> mos, String prefix, int support)
        throws IOException, InterruptedException {
    mos.write(new Text(prefix), new Text("" + support), "base/tg0/trieGroup-0");
    mos.write(new Text(prefix), new Text("" + support), "fis");

From source file:be.uantwerpen.adrem.bigfim.ComputeTidListReducerTest.java

License:Apache License

public void One_PG_One_Item() throws Exception {
    MultipleOutputs<IntArrayWritable, IntMatrixWritable> mos = createMock(MultipleOutputs.class);
    mos.write(newIAW(1), EmptyImw, "pg/bucket-0");
    mos.write(newIAW(0), new IntMatrixWritable(newIAW(0, 1, 2, 4, 7, 9), newIAW(0, 1, 2, 3, 5, 6, 8)),
    mos.write(EmptyIaw, EmptyImw, "pg/bucket-0");
    mos.close();/*  ww w. j a v a 2s  .c  om*/

    Reducer.Context ctx = createMock(Reducer.Context.class);
    EasyMock.expect(ctx.getTaskAttemptID()).andReturn(new TaskAttemptID()).anyTimes();

    EasyMock.replay(ctx, mos);

    ComputeTidListReducer reducer = new ComputeTidListReducer();
    setField(reducer, "mos", mos);

    reducer.reduce(new Text("1"), createTestInput_1Item(), ctx);


From source file:be.uantwerpen.adrem.bigfim.ComputeTidListReducerTest.java

License:Apache License

public void One_PG_N_Items() throws Exception {
    MultipleOutputs<IntArrayWritable, IntMatrixWritable> mos = createMock(MultipleOutputs.class);

    mos.write(newIAW(1), EmptyImw, "pg/bucket-0");
    mos.write(newIAW(0), new IntMatrixWritable(newIAW(0, 1, 2, 4, 7, 9), newIAW(0, 1, 2, 3, 5, 6, 8)),
    mos.write(newIAW(1), new IntMatrixWritable(newIAW(1, 2, 3), newIAW(4, 5, 6)), "pg/bucket-0");
    mos.write(newIAW(3), new IntMatrixWritable(newIAW(4, 7, 9), newIAW(4, 7, 9)), "pg/bucket-0");
    mos.write(EmptyIaw, EmptyImw, "pg/bucket-0");
    mos.close();//from w  w  w.  j a  v  a  2 s. c  o  m

    Reducer.Context ctx = createMock(Reducer.Context.class);
    EasyMock.expect(ctx.getTaskAttemptID()).andReturn(new TaskAttemptID()).anyTimes();

    EasyMock.replay(ctx, mos);

    ComputeTidListReducer reducer = new ComputeTidListReducer();
    setField(reducer, "mos", mos);

    reducer.reduce(new Text("1"), createTestInput_NItems(), ctx);


From source file:be.uantwerpen.adrem.bigfim.ComputeTidListReducerTest.java

License:Apache License

public void N_PG_N_Items() throws Exception {
    MultipleOutputs<IntArrayWritable, IntMatrixWritable> mos = createMock(MultipleOutputs.class);

    mos.write(newIAW(1), EmptyImw, "pg/bucket-0");
    mos.write(newIAW(0), new IntMatrixWritable(newIAW(0, 1, 2, 4, 7, 9), newIAW(0, 1, 2, 3, 5, 6, 8)),
    mos.write(newIAW(1), new IntMatrixWritable(newIAW(1, 2, 3), newIAW(4, 5, 6)), "pg/bucket-0");
    mos.write(newIAW(3), new IntMatrixWritable(newIAW(4, 7, 9), newIAW(4, 7, 9)), "pg/bucket-0");
    mos.write(EmptyIaw, EmptyImw, "pg/bucket-0");

    mos.write(newIAW(2), EmptyImw, "pg/bucket-1");
    mos.write(newIAW(1), new IntMatrixWritable(newIAW(1, 4, 7, 8), newIAW(1, 5, 6, 8)), "pg/bucket-1");
    mos.write(newIAW(2), new IntMatrixWritable(newIAW(3, 5, 7), newIAW(1, 2, 3, 4, 5, 6, 7, 8, 9)),
    mos.write(EmptyIaw, EmptyImw, "pg/bucket-1");
    mos.close();//  w  ww  .ja v a 2  s  . c  o m

    Reducer.Context ctx = createMock(Reducer.Context.class);
    EasyMock.expect(ctx.getTaskAttemptID()).andReturn(new TaskAttemptID()).anyTimes();

    EasyMock.replay(ctx, mos);

    ComputeTidListReducer reducer = new ComputeTidListReducer();
    setField(reducer, "mos", mos);

    reducer.reduce(new Text("1"), createTestInput_NItems(), ctx);
    reducer.reduce(new Text("2"), createTestInput_NItems2(), ctx);


From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.full.WARCWriterReducerClass.java

License:Apache License

 * Writes single WARCWritable to the output with specific output file prefix
 * @param warcWritable    warc record//from ww w . j a v  a2  s.co  m
 * @param multipleOutputs output
 * @throws IOException          exception
 * @throws InterruptedException exception
// TODO move somewhere else?
public static void writeSingleWARCWritableToOutput(WARCWritable warcWritable,
        MultipleOutputs<NullWritable, WARCWritable> multipleOutputs) throws IOException, InterruptedException {
    WARCRecord.Header header = warcWritable.getRecord().getHeader();
    String license = header.getField(WARCRecord.WARCRecordFieldConstants.LICENSE);
    String language = header.getField(WARCRecord.WARCRecordFieldConstants.LANGUAGE);
    String noBoilerplate = header.getField(WARCRecord.WARCRecordFieldConstants.NO_BOILERPLATE);
    String minimalHtml = header.getField(WARCRecord.WARCRecordFieldConstants.MINIMAL_HTML);

    // set the file name prefix
    String fileName = createOutputFilePrefix(license, language, noBoilerplate, minimalHtml);

    // bottleneck of single reducer for all "Lic_none_Lang_en" pages (majority of Web)
    //        if ("en".equals(language) && LicenseDetector.NO_LICENCE.equals(license)) {
    //            long simHash = Long
    //                    .valueOf(header.getField(WARCRecord.WARCRecordFieldConstants.SIMHASH));
    //            int binNumber = getBinNumberFromSimHash(simHash);
    //            fileName = createOutputFilePrefix(license, language, noBoilerplate);
    //        }

    multipleOutputs.write(NullWritable.get(), warcWritable, fileName);

From source file:eu.scape_project.up2ti.output.SimpleKeyValueOutputWriter.java

License:Apache License

 * Record method for hadoop job./*from  w w  w .  j  a  va 2 s.co m*/
 * @param resultMap Result map where K: recordkey-identificationtype, V:
 * tool identificationtype identificationresult)
 * @param mos Multiple output writer
public void write(HashMap<String, List<String>> resultMap, MultipleOutputs mos) {
    Iterator iter = resultMap.keySet().iterator();
    while (iter.hasNext()) {
        String key = (String) iter.next();
        List<String> valueList = resultMap.get(key);
        try {
            for (String value : valueList) {
                mos.write("idtab", new Text(key), new Text(value));
        } catch (IOException ex) {
            LOG.error("I/O Error", ex);
        } catch (InterruptedException ex) {
            LOG.error("Interrupted Error", ex);