List of usage examples for org.apache.hadoop.mapred RecordWriter RecordWriter
RecordWriter
From source file:HiveKeyIgnoringBAMOutputFormat.java
License:Open Source License
@Override public RecordWriter<Writable, SAMRecordWritable> getRecordWriter(FileSystem fs, JobConf job, String name, Progressable progress) throws IOException { setSAMHeaderFrom(job);//from ww w . j a v a 2 s .co m final FakeTaskAttemptContext ctx = new FakeTaskAttemptContext(job); final org.apache.hadoop.mapreduce.RecordWriter<Writable, SAMRecordWritable> wrappedRecordWriter = wrappedOutputFormat .getRecordWriter(ctx, FileOutputFormat.getTaskOutputPath(job, name)); return new RecordWriter<Writable, SAMRecordWritable>() { @Override public void write(Writable ignored, SAMRecordWritable rec) throws IOException { try { wrappedRecordWriter.write(ignored, rec); } catch (InterruptedException e) { throw new RuntimeException(e); } } @Override public void close(Reporter reporter) throws IOException { try { wrappedRecordWriter.close(ctx); } catch (InterruptedException e) { throw new RuntimeException(e); } } }; }
From source file:babel.prep.corpus.MultipleXMLLangFileOutputFormat.java
License:Apache License
public RecordWriter<Text, Page> getBaseRecordWriter(final FileSystem fs, JobConf job, String name, final Progressable progress) throws IOException { final Path dumpFile = new Path(FileOutputFormat.getOutputPath(job), name); // Get the old copy out of the way if (fs.exists(dumpFile)) fs.delete(dumpFile, true);/*from w w w . j a v a 2s . co m*/ final XMLObjectWriter xmlWriter; try { xmlWriter = new XMLObjectWriter(fs.create(dumpFile), false); } catch (Exception e) { throw new RuntimeException("Failed to instantiate XMLObjectWriter."); } return new RecordWriter<Text, Page>() { public synchronized void write(Text key, Page page) throws IOException { try { xmlWriter.write(page); } catch (XMLStreamException e) { throw new RuntimeException("Error writing page XML."); } } public synchronized void close(Reporter reporter) throws IOException { try { xmlWriter.close(); } catch (XMLStreamException e) { throw new RuntimeException("Error closing XMLObjectWriter."); } } }; }
From source file:babel.prep.datedcorpus.DatedLangFilesOutputFormat.java
License:Apache License
public RecordWriter<Text, Text> getBaseRecordWriter(final FileSystem fs, JobConf job, String name, final Progressable progress) throws IOException { final Path dumpFile = new Path(FileOutputFormat.getOutputPath(job), name); // Get the old copy out of the way if (fs.exists(dumpFile)) { fs.delete(dumpFile, true);//from w w w .j av a2s . c o m } else { fs.mkdirs(dumpFile.getParent()); } return new RecordWriter<Text, Text>() { public synchronized void write(Text key, Text versText) throws IOException { try { BufferedWriter writer = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(new File(dumpFile.toUri()), true), DEFAULT_CHARSET)); writer.write(versText.toString()); writer.close(); } catch (Exception e) { throw new RuntimeException("Error writing page versions: " + e.toString()); } } public synchronized void close(Reporter reporter) throws IOException { } }; }
From source file:cn.edu.hfut.dmic.webcollectorcluster.fetcher.FetcherOutputFormat.java
@Override public org.apache.hadoop.mapred.RecordWriter<Text, WebWritable> getRecordWriter(FileSystem fs, JobConf jc, String string, Progressable p) throws IOException { Configuration conf = jc;//ww w. j a v a 2 s . c o m String outputPath = conf.get("mapred.output.dir"); Path fetchPath = new Path(outputPath, "fetch/info"); Path contentPath = new Path(outputPath, "content/info"); Path parseDataPath = new Path(outputPath, "parse_data/info"); Path parseTempPath = new Path(outputPath, "parse_temp/info"); final SequenceFile.Writer fetchOut = new SequenceFile.Writer(fs, conf, fetchPath, Text.class, CrawlDatum.class); final SequenceFile.Writer contentOut = new SequenceFile.Writer(fs, conf, contentPath, Text.class, Content.class); final SequenceFile.Writer parseDataOut = new SequenceFile.Writer(fs, conf, parseDataPath, Text.class, ParseData.class); final SequenceFile.Writer parseTempOut = new SequenceFile.Writer(fs, conf, parseTempPath, Text.class, CrawlDatum.class); return new RecordWriter<Text, WebWritable>() { @Override public void write(Text key, WebWritable value) throws IOException { Writable w = value.get(); if (w instanceof CrawlDatum) { fetchOut.append(key, w); } else if (w instanceof Content) { contentOut.append(key, w); } else if (w instanceof ParseData) { parseDataOut.append(key, w); ParseData parseData = (ParseData) w; if (parseData.getLinks() != null) { for (Link link : parseData.getLinks()) { CrawlDatum datum = new CrawlDatum(); datum.setUrl(link.getUrl()); datum.setStatus(CrawlDatum.STATUS_DB_UNFETCHED); datum.setFetchTime(CrawlDatum.FETCHTIME_UNDEFINED); parseTempOut.append(new Text(datum.getUrl()), datum); } } } } @Override public void close(Reporter rprtr) throws IOException { fetchOut.close(); contentOut.close(); parseDataOut.close(); parseTempOut.close(); } }; }
From source file:cn.spark.Case.MyMultipleOutputFormat.java
License:Apache License
/** * Create a composite record writer that can write key/value data to * different output files/* w ww. ja va 2 s . c o m*/ * * @param fs * the file system to use * @param job * the job conf for the job * @param name * the leaf file name for the output file (such as part-00000") * @param arg3 * a progressable for reporting progress. * @return a composite record writer * @throws IOException */ public RecordWriter<K, V> getRecordWriter(FileSystem fs, JobConf job, String name, Progressable arg3) throws IOException { final FileSystem myFS = fs; final String myName = generateLeafFileName(name); final JobConf myJob = job; final Progressable myProgressable = arg3; return new RecordWriter<K, V>() { // a cache storing the record writers for different output files. TreeMap<String, RecordWriter<K, V>> recordWriters = new TreeMap<String, RecordWriter<K, V>>(); public void write(K key, V value) throws IOException { // get the file name based on the key String keyBasedPath = generateFileNameForKeyValue(key, value, myName); // get the file name based on the input file name String finalPath = getInputFileBasedOutputFileName(myJob, keyBasedPath); // get the actual key //??key K actualKey = generateActualKey(null, value); V actualValue = generateActualValue(key, value); RecordWriter<K, V> rw = this.recordWriters.get(finalPath); if (rw == null) { // if we don't have the record writer yet for the final // path, create // one // and add it to the cache rw = getBaseRecordWriter(myFS, myJob, finalPath, myProgressable); this.recordWriters.put(finalPath, rw); } rw.write(actualKey, actualValue); }; public void close(Reporter reporter) throws IOException { Iterator<String> keys = this.recordWriters.keySet().iterator(); while (keys.hasNext()) { RecordWriter<K, V> rw = this.recordWriters.get(keys.next()); rw.close(reporter); } this.recordWriters.clear(); }; }; }
From source file:com.digitalpebble.behemoth.solr.LucidWorksOutputFormat.java
License:Apache License
public RecordWriter<Text, BehemothDocument> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { final LucidWorksWriter writer = new LucidWorksWriter(progress); writer.open(job, name);/* ww w .ja v a 2 s .c o m*/ return new RecordWriter<Text, BehemothDocument>() { public void close(Reporter reporter) throws IOException { writer.close(); } public void write(Text key, BehemothDocument doc) throws IOException { writer.write(doc); } }; }
From source file:com.digitalpebble.behemoth.solr.SOLROutputFormat.java
License:Apache License
public RecordWriter<Text, BehemothDocument> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { final SOLRWriter writer = new SOLRWriter(progress); writer.open(job, name);/*from w w w . j ava 2 s . c o m*/ return new RecordWriter<Text, BehemothDocument>() { public void close(Reporter reporter) throws IOException { writer.close(); } public void write(Text key, BehemothDocument doc) throws IOException { writer.write(doc); } }; }
From source file:com.ibm.jaql.io.hadoop.CompositeOutputAdapter.java
License:Apache License
@Override public RecordWriter<JsonHolder, JsonHolder> getRecordWriter(FileSystem ignored, JobConf conf, String name, Progressable progress) throws IOException { final RecordWriter<JsonHolder, JsonHolder>[] writers = new RecordWriter[outputs.length]; final JsonHolder[] outKey = new JsonHolder[outputs.length]; final JsonHolder[] outValue = new JsonHolder[outputs.length]; // final Path[] taskOutputPaths = new Path[outputs.length]; // HACK: Hadoop 0.18 for (int i = 0; i < outputs.length; i++) { // Path outputPath = FileOutputFormat.getOutputPath(subconfs[i]); // if( outputPath != null ) // { // final String TEMP_DIR_NAME = "_temporary"; // MRConstants isn't public... // taskOutputPaths[i] = new Path(outputPath, // (TEMP_DIR_NAME + Path.SEPARATOR + "_" + name)); // } writers[i] = outputs[i].getRecordWriter(ignored, subconfs[i], name, progress); outKey[i] = (JsonHolder) ReflectionUtils.newInstance(subconfs[i].getOutputKeyClass(), subconfs[i]); outValue[i] = (JsonHolder) ReflectionUtils.newInstance(subconfs[i].getOutputValueClass(), subconfs[i]); }// www . j a v a2s . c o m return new RecordWriter<JsonHolder, JsonHolder>() { @Override public void write(JsonHolder key, JsonHolder value) throws IOException { JsonArray pair = (JsonArray) value.value; if (pair != null) { try { JsonNumber n = (JsonNumber) pair.get(0); int i = (int) n.longValueExact(); outKey[i].value = key.value; outValue[i].value = pair.get(1); writers[i].write(outKey[i], outValue[i]); } catch (Exception e) { throw new UndeclaredThrowableException(e); } } } @Override public void close(Reporter reporter) throws IOException { for (int i = 0; i < writers.length; i++) { writers[i].close(reporter); // HACK: Hadoop 0.18 // Path taskOutput = taskOutputPaths[i]; // if(taskOutput != null) // { // FileSystem fs = taskOutput.getFileSystem(subconfs[i]); // if( fs.exists(taskOutput) ) // { // Path jobOutputPath = taskOutput.getParent().getParent(); // // // Move the task outputs to their final place // // Path finalOutputPath = getFinalPath(jobOutputPath, taskOutput); // Path finalOutputPath = new Path(jobOutputPath, taskOutput.getName()); // if( !fs.rename(taskOutput, finalOutputPath) ) // { // if( !fs.delete(finalOutputPath, true) ) // { // throw new IOException("Failed to delete earlier output of task"); // } // if( !fs.rename(taskOutput, finalOutputPath) ) // { // throw new IOException("Failed to save output of task: "); // } // } // // LOG.debug("Moved " + taskOutput + " to " + finalOutputPath); // // // Delete the temporary task-specific output directory // if (!fs.delete(taskOutput, true)) { // // LOG.info("Failed to delete the temporary output directory of task: " + // // getTaskID() + " - " + taskOutputPath); // } // // LOG.info("Saved output of task '" + getTaskID() + "' to " + jobOutputPath); // } // } } } }; }
From source file:com.ibm.jaql.io.hadoop.DefaultHadoopOutputAdapter.java
License:Apache License
@SuppressWarnings("unchecked") public RecordWriter<JsonHolder, JsonHolder> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { if (converter == null) { final RecordWriter<JsonHolder, JsonHolder> baseWriter = ((OutputFormat<JsonHolder, JsonHolder>) oFormat) .getRecordWriter(ignored, job, name, progress); final JsonHolder nullHolder = keyHolder(); writer = new RecordWriter<JsonHolder, JsonHolder>() { public void close(Reporter reporter) throws IOException { baseWriter.close(reporter); }//from w ww. j a va 2s . co m public void write(JsonHolder key, JsonHolder value) throws IOException { baseWriter.write(nullHolder, value); // key is unused } }; } else { final RecordWriter<K, V> baseWriter = ((OutputFormat<K, V>) oFormat).getRecordWriter(ignored, job, name, progress); final K baseKey = converter.createKeyTarget(); final V baseValue = converter.createValueTarget(); writer = new RecordWriter<JsonHolder, JsonHolder>() { public void close(Reporter reporter) throws IOException { baseWriter.close(reporter); } public void write(JsonHolder key, JsonHolder value) throws IOException { converter.convert(value.value, baseKey, baseValue); baseWriter.write(baseKey, baseValue); } }; } return writer; }
From source file:com.sensei.indexing.hadoop.reduce.IndexUpdateOutputFormat.java
License:Apache License
public RecordWriter<Shard, Text> getRecordWriter(final FileSystem fs, JobConf job, String name, final Progressable progress) throws IOException { final Path perm = new Path(getWorkOutputPath(job), name); return new RecordWriter<Shard, Text>() { public void write(Shard key, Text value) throws IOException { assert (DONE.equals(value)); String shardName = key.getDirectory(); shardName = shardName.replace("/", "_"); Path doneFile = new Path(perm, DONE + "_" + shardName); if (!fs.exists(doneFile)) { fs.createNewFile(doneFile); }/*from www. j a v a 2 s .c o m*/ } public void close(final Reporter reporter) throws IOException { } }; }