Example usage for org.apache.hadoop.mapred RecordWriter RecordWriter

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred RecordWriter RecordWriter.

Prototype

RecordWriter

Source Link

Usage

From source file:HiveKeyIgnoringBAMOutputFormat.java

License:Open Source License

@Override
public RecordWriter<Writable, SAMRecordWritable> getRecordWriter(FileSystem fs, JobConf job, String name,
        Progressable progress) throws IOException {
    setSAMHeaderFrom(job);//from  ww  w  .  j  a v a 2 s  .co  m

    final FakeTaskAttemptContext ctx = new FakeTaskAttemptContext(job);

    final org.apache.hadoop.mapreduce.RecordWriter<Writable, SAMRecordWritable> wrappedRecordWriter = wrappedOutputFormat
            .getRecordWriter(ctx, FileOutputFormat.getTaskOutputPath(job, name));

    return new RecordWriter<Writable, SAMRecordWritable>() {
        @Override
        public void write(Writable ignored, SAMRecordWritable rec) throws IOException {
            try {
                wrappedRecordWriter.write(ignored, rec);
            } catch (InterruptedException e) {
                throw new RuntimeException(e);
            }
        }

        @Override
        public void close(Reporter reporter) throws IOException {
            try {
                wrappedRecordWriter.close(ctx);
            } catch (InterruptedException e) {
                throw new RuntimeException(e);
            }
        }
    };
}

From source file:babel.prep.corpus.MultipleXMLLangFileOutputFormat.java

License:Apache License

public RecordWriter<Text, Page> getBaseRecordWriter(final FileSystem fs, JobConf job, String name,
        final Progressable progress) throws IOException {
    final Path dumpFile = new Path(FileOutputFormat.getOutputPath(job), name);

    // Get the old copy out of the way
    if (fs.exists(dumpFile))
        fs.delete(dumpFile, true);/*from   w w  w . j  a  v  a  2s . co m*/

    final XMLObjectWriter xmlWriter;

    try {
        xmlWriter = new XMLObjectWriter(fs.create(dumpFile), false);
    } catch (Exception e) {
        throw new RuntimeException("Failed to instantiate XMLObjectWriter.");
    }

    return new RecordWriter<Text, Page>() {
        public synchronized void write(Text key, Page page) throws IOException {
            try {
                xmlWriter.write(page);
            } catch (XMLStreamException e) {
                throw new RuntimeException("Error writing page XML.");
            }
        }

        public synchronized void close(Reporter reporter) throws IOException {
            try {
                xmlWriter.close();
            } catch (XMLStreamException e) {
                throw new RuntimeException("Error closing XMLObjectWriter.");
            }
        }
    };
}

From source file:babel.prep.datedcorpus.DatedLangFilesOutputFormat.java

License:Apache License

public RecordWriter<Text, Text> getBaseRecordWriter(final FileSystem fs, JobConf job, String name,
        final Progressable progress) throws IOException {
    final Path dumpFile = new Path(FileOutputFormat.getOutputPath(job), name);

    // Get the old copy out of the way
    if (fs.exists(dumpFile)) {
        fs.delete(dumpFile, true);//from   w w w .j av  a2s  . c  o  m
    } else {
        fs.mkdirs(dumpFile.getParent());
    }

    return new RecordWriter<Text, Text>() {
        public synchronized void write(Text key, Text versText) throws IOException {
            try {
                BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(
                        new FileOutputStream(new File(dumpFile.toUri()), true), DEFAULT_CHARSET));

                writer.write(versText.toString());
                writer.close();
            } catch (Exception e) {
                throw new RuntimeException("Error writing page versions: " + e.toString());
            }
        }

        public synchronized void close(Reporter reporter) throws IOException {
        }
    };
}

From source file:cn.edu.hfut.dmic.webcollectorcluster.fetcher.FetcherOutputFormat.java

@Override
public org.apache.hadoop.mapred.RecordWriter<Text, WebWritable> getRecordWriter(FileSystem fs, JobConf jc,
        String string, Progressable p) throws IOException {
    Configuration conf = jc;//ww  w.  j  a  v  a 2  s  .  c o m
    String outputPath = conf.get("mapred.output.dir");
    Path fetchPath = new Path(outputPath, "fetch/info");
    Path contentPath = new Path(outputPath, "content/info");
    Path parseDataPath = new Path(outputPath, "parse_data/info");
    Path parseTempPath = new Path(outputPath, "parse_temp/info");
    final SequenceFile.Writer fetchOut = new SequenceFile.Writer(fs, conf, fetchPath, Text.class,
            CrawlDatum.class);
    final SequenceFile.Writer contentOut = new SequenceFile.Writer(fs, conf, contentPath, Text.class,
            Content.class);
    final SequenceFile.Writer parseDataOut = new SequenceFile.Writer(fs, conf, parseDataPath, Text.class,
            ParseData.class);
    final SequenceFile.Writer parseTempOut = new SequenceFile.Writer(fs, conf, parseTempPath, Text.class,
            CrawlDatum.class);
    return new RecordWriter<Text, WebWritable>() {
        @Override
        public void write(Text key, WebWritable value) throws IOException {
            Writable w = value.get();
            if (w instanceof CrawlDatum) {
                fetchOut.append(key, w);
            } else if (w instanceof Content) {
                contentOut.append(key, w);
            } else if (w instanceof ParseData) {
                parseDataOut.append(key, w);
                ParseData parseData = (ParseData) w;
                if (parseData.getLinks() != null) {
                    for (Link link : parseData.getLinks()) {
                        CrawlDatum datum = new CrawlDatum();
                        datum.setUrl(link.getUrl());
                        datum.setStatus(CrawlDatum.STATUS_DB_UNFETCHED);
                        datum.setFetchTime(CrawlDatum.FETCHTIME_UNDEFINED);
                        parseTempOut.append(new Text(datum.getUrl()), datum);
                    }
                }
            }
        }

        @Override
        public void close(Reporter rprtr) throws IOException {
            fetchOut.close();
            contentOut.close();
            parseDataOut.close();
            parseTempOut.close();
        }
    };
}

From source file:cn.spark.Case.MyMultipleOutputFormat.java

License:Apache License

/**
 * Create a composite record writer that can write key/value data to
 * different output files/*  w ww.  ja va 2 s .  c  o m*/
 * 
 * @param fs
 *            the file system to use
 * @param job
 *            the job conf for the job
 * @param name
 *            the leaf file name for the output file (such as part-00000")
 * @param arg3
 *            a progressable for reporting progress.
 * @return a composite record writer
 * @throws IOException
 */
public RecordWriter<K, V> getRecordWriter(FileSystem fs, JobConf job, String name, Progressable arg3)
        throws IOException {

    final FileSystem myFS = fs;
    final String myName = generateLeafFileName(name);
    final JobConf myJob = job;
    final Progressable myProgressable = arg3;

    return new RecordWriter<K, V>() {

        // a cache storing the record writers for different output files.
        TreeMap<String, RecordWriter<K, V>> recordWriters = new TreeMap<String, RecordWriter<K, V>>();

        public void write(K key, V value) throws IOException {

            // get the file name based on the key
            String keyBasedPath = generateFileNameForKeyValue(key, value, myName);

            // get the file name based on the input file name
            String finalPath = getInputFileBasedOutputFileName(myJob, keyBasedPath);

            // get the actual key   //??key
            K actualKey = generateActualKey(null, value);
            V actualValue = generateActualValue(key, value);

            RecordWriter<K, V> rw = this.recordWriters.get(finalPath);
            if (rw == null) {
                // if we don't have the record writer yet for the final
                // path, create
                // one
                // and add it to the cache
                rw = getBaseRecordWriter(myFS, myJob, finalPath, myProgressable);
                this.recordWriters.put(finalPath, rw);
            }
            rw.write(actualKey, actualValue);
        };

        public void close(Reporter reporter) throws IOException {
            Iterator<String> keys = this.recordWriters.keySet().iterator();
            while (keys.hasNext()) {
                RecordWriter<K, V> rw = this.recordWriters.get(keys.next());
                rw.close(reporter);
            }
            this.recordWriters.clear();
        };
    };
}

From source file:com.digitalpebble.behemoth.solr.LucidWorksOutputFormat.java

License:Apache License

public RecordWriter<Text, BehemothDocument> getRecordWriter(FileSystem ignored, JobConf job, String name,
        Progressable progress) throws IOException {

    final LucidWorksWriter writer = new LucidWorksWriter(progress);
    writer.open(job, name);/* ww  w .ja v  a 2  s  .c o m*/

    return new RecordWriter<Text, BehemothDocument>() {

        public void close(Reporter reporter) throws IOException {
            writer.close();
        }

        public void write(Text key, BehemothDocument doc) throws IOException {
            writer.write(doc);
        }
    };
}

From source file:com.digitalpebble.behemoth.solr.SOLROutputFormat.java

License:Apache License

public RecordWriter<Text, BehemothDocument> getRecordWriter(FileSystem ignored, JobConf job, String name,
        Progressable progress) throws IOException {

    final SOLRWriter writer = new SOLRWriter(progress);
    writer.open(job, name);/*from  w  w  w .  j ava  2 s  . c  o m*/

    return new RecordWriter<Text, BehemothDocument>() {

        public void close(Reporter reporter) throws IOException {
            writer.close();
        }

        public void write(Text key, BehemothDocument doc) throws IOException {
            writer.write(doc);
        }
    };
}

From source file:com.ibm.jaql.io.hadoop.CompositeOutputAdapter.java

License:Apache License

@Override
public RecordWriter<JsonHolder, JsonHolder> getRecordWriter(FileSystem ignored, JobConf conf, String name,
        Progressable progress) throws IOException {
    final RecordWriter<JsonHolder, JsonHolder>[] writers = new RecordWriter[outputs.length];
    final JsonHolder[] outKey = new JsonHolder[outputs.length];
    final JsonHolder[] outValue = new JsonHolder[outputs.length];

    //    final Path[] taskOutputPaths = new Path[outputs.length]; // HACK: Hadoop 0.18
    for (int i = 0; i < outputs.length; i++) {
        //      Path outputPath = FileOutputFormat.getOutputPath(subconfs[i]);
        //      if( outputPath != null )
        //      {
        //        final String TEMP_DIR_NAME = "_temporary"; // MRConstants isn't public...
        //        taskOutputPaths[i] = new Path(outputPath,
        //            (TEMP_DIR_NAME + Path.SEPARATOR + "_" + name));
        //      }      
        writers[i] = outputs[i].getRecordWriter(ignored, subconfs[i], name, progress);
        outKey[i] = (JsonHolder) ReflectionUtils.newInstance(subconfs[i].getOutputKeyClass(), subconfs[i]);
        outValue[i] = (JsonHolder) ReflectionUtils.newInstance(subconfs[i].getOutputValueClass(), subconfs[i]);
    }// www  . j a v  a2s .  c o m

    return new RecordWriter<JsonHolder, JsonHolder>() {
        @Override
        public void write(JsonHolder key, JsonHolder value) throws IOException {
            JsonArray pair = (JsonArray) value.value;
            if (pair != null) {
                try {
                    JsonNumber n = (JsonNumber) pair.get(0);
                    int i = (int) n.longValueExact();
                    outKey[i].value = key.value;
                    outValue[i].value = pair.get(1);
                    writers[i].write(outKey[i], outValue[i]);
                } catch (Exception e) {
                    throw new UndeclaredThrowableException(e);
                }
            }
        }

        @Override
        public void close(Reporter reporter) throws IOException {
            for (int i = 0; i < writers.length; i++) {
                writers[i].close(reporter);

                // HACK: Hadoop 0.18
                //          Path taskOutput = taskOutputPaths[i];
                //          if(taskOutput != null)
                //          {
                //            FileSystem fs = taskOutput.getFileSystem(subconfs[i]);
                //            if( fs.exists(taskOutput) )
                //            {
                //              Path jobOutputPath = taskOutput.getParent().getParent();
                //
                //              // Move the task outputs to their final place
                //              // Path finalOutputPath = getFinalPath(jobOutputPath, taskOutput);
                //              Path finalOutputPath = new Path(jobOutputPath, taskOutput.getName());
                //              if( !fs.rename(taskOutput, finalOutputPath) )
                //              {
                //                if( !fs.delete(finalOutputPath, true) )
                //                {
                //                  throw new IOException("Failed to delete earlier output of task");
                //                }
                //                if( !fs.rename(taskOutput, finalOutputPath) )
                //                {
                //                  throw new IOException("Failed to save output of task: ");
                //                }
                //              }
                //              // LOG.debug("Moved " + taskOutput + " to " + finalOutputPath);
                //
                //              // Delete the temporary task-specific output directory
                //              if (!fs.delete(taskOutput, true)) {
                //                // LOG.info("Failed to delete the temporary output directory of task: " + 
                //                //    getTaskID() + " - " + taskOutputPath);
                //              }
                //              // LOG.info("Saved output of task '" + getTaskID() + "' to " + jobOutputPath);
                //            }
                //          }
            }
        }
    };
}

From source file:com.ibm.jaql.io.hadoop.DefaultHadoopOutputAdapter.java

License:Apache License

@SuppressWarnings("unchecked")
public RecordWriter<JsonHolder, JsonHolder> getRecordWriter(FileSystem ignored, JobConf job, String name,
        Progressable progress) throws IOException {
    if (converter == null) {
        final RecordWriter<JsonHolder, JsonHolder> baseWriter = ((OutputFormat<JsonHolder, JsonHolder>) oFormat)
                .getRecordWriter(ignored, job, name, progress);
        final JsonHolder nullHolder = keyHolder();
        writer = new RecordWriter<JsonHolder, JsonHolder>() {
            public void close(Reporter reporter) throws IOException {
                baseWriter.close(reporter);
            }//from w  ww.  j a va  2s  .  co  m

            public void write(JsonHolder key, JsonHolder value) throws IOException {
                baseWriter.write(nullHolder, value); // key is unused
            }
        };
    } else {
        final RecordWriter<K, V> baseWriter = ((OutputFormat<K, V>) oFormat).getRecordWriter(ignored, job, name,
                progress);

        final K baseKey = converter.createKeyTarget();
        final V baseValue = converter.createValueTarget();

        writer = new RecordWriter<JsonHolder, JsonHolder>() {

            public void close(Reporter reporter) throws IOException {
                baseWriter.close(reporter);
            }

            public void write(JsonHolder key, JsonHolder value) throws IOException {
                converter.convert(value.value, baseKey, baseValue);
                baseWriter.write(baseKey, baseValue);
            }
        };
    }
    return writer;
}

From source file:com.sensei.indexing.hadoop.reduce.IndexUpdateOutputFormat.java

License:Apache License

public RecordWriter<Shard, Text> getRecordWriter(final FileSystem fs, JobConf job, String name,
        final Progressable progress) throws IOException {

    final Path perm = new Path(getWorkOutputPath(job), name);

    return new RecordWriter<Shard, Text>() {
        public void write(Shard key, Text value) throws IOException {
            assert (DONE.equals(value));

            String shardName = key.getDirectory();
            shardName = shardName.replace("/", "_");

            Path doneFile = new Path(perm, DONE + "_" + shardName);
            if (!fs.exists(doneFile)) {
                fs.createNewFile(doneFile);
            }/*from www. j a v a 2 s .c  o  m*/
        }

        public void close(final Reporter reporter) throws IOException {
        }
    };
}