Example usage for org.apache.hadoop.mapred RecordWriter RecordWriter

List of usage examples for org.apache.hadoop.mapred RecordWriter RecordWriter

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred RecordWriter RecordWriter.

Prototype

RecordWriter

Source Link

Usage

From source file:HiveKeyIgnoringBAMOutputFormat.java

License:Open Source License

@Override
public RecordWriter<Writable, SAMRecordWritable> getRecordWriter(FileSystem fs, JobConf job, String name,
        Progressable progress) throws IOException {
    setSAMHeaderFrom(job);//from  ww  w  .  j  a v a 2 s  .co  m

    final FakeTaskAttemptContext ctx = new FakeTaskAttemptContext(job);

    final org.apache.hadoop.mapreduce.RecordWriter<Writable, SAMRecordWritable> wrappedRecordWriter = wrappedOutputFormat
            .getRecordWriter(ctx, FileOutputFormat.getTaskOutputPath(job, name));

    return new RecordWriter<Writable, SAMRecordWritable>() {
        @Override
        public void write(Writable ignored, SAMRecordWritable rec) throws IOException {
            try {
                wrappedRecordWriter.write(ignored, rec);
            } catch (InterruptedException e) {
                throw new RuntimeException(e);
            }
        }

        @Override
        public void close(Reporter reporter) throws IOException {
            try {
                wrappedRecordWriter.close(ctx);
            } catch (InterruptedException e) {
                throw new RuntimeException(e);
            }
        }
    };
}

From source file:babel.prep.corpus.MultipleXMLLangFileOutputFormat.java

License:Apache License

public RecordWriter<Text, Page> getBaseRecordWriter(final FileSystem fs, JobConf job, String name,
        final Progressable progress) throws IOException {
    final Path dumpFile = new Path(FileOutputFormat.getOutputPath(job), name);

    // Get the old copy out of the way
    if (fs.exists(dumpFile))
        fs.delete(dumpFile, true);/*from   w w  w . j  a  v  a  2s . co m*/

    final XMLObjectWriter xmlWriter;

    try {
        xmlWriter = new XMLObjectWriter(fs.create(dumpFile), false);
    } catch (Exception e) {
        throw new RuntimeException("Failed to instantiate XMLObjectWriter.");
    }

    return new RecordWriter<Text, Page>() {
        public synchronized void write(Text key, Page page) throws IOException {
            try {
                xmlWriter.write(page);
            } catch (XMLStreamException e) {
                throw new RuntimeException("Error writing page XML.");
            }
        }

        public synchronized void close(Reporter reporter) throws IOException {
            try {
                xmlWriter.close();
            } catch (XMLStreamException e) {
                throw new RuntimeException("Error closing XMLObjectWriter.");
            }
        }
    };
}

From source file:babel.prep.datedcorpus.DatedLangFilesOutputFormat.java

License:Apache License

public RecordWriter<Text, Text> getBaseRecordWriter(final FileSystem fs, JobConf job, String name,
        final Progressable progress) throws IOException {
    final Path dumpFile = new Path(FileOutputFormat.getOutputPath(job), name);

    // Get the old copy out of the way
    if (fs.exists(dumpFile)) {
        fs.delete(dumpFile, true);//from   w w w .j av  a2s  . c  o  m
    } else {
        fs.mkdirs(dumpFile.getParent());
    }

    return new RecordWriter<Text, Text>() {
        public synchronized void write(Text key, Text versText) throws IOException {
            try {
                BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(
                        new FileOutputStream(new File(dumpFile.toUri()), true), DEFAULT_CHARSET));

                writer.write(versText.toString());
                writer.close();
            } catch (Exception e) {
                throw new RuntimeException("Error writing page versions: " + e.toString());
            }
        }

        public synchronized void close(Reporter reporter) throws IOException {
        }
    };
}

From source file:cn.edu.hfut.dmic.webcollectorcluster.fetcher.FetcherOutputFormat.java

@Override
public org.apache.hadoop.mapred.RecordWriter<Text, WebWritable> getRecordWriter(FileSystem fs, JobConf jc,
        String string, Progressable p) throws IOException {
    Configuration conf = jc;//ww  w.  j  a  v  a 2  s  .  c o m
    String outputPath = conf.get("mapred.output.dir");
    Path fetchPath = new Path(outputPath, "fetch/info");
    Path contentPath = new Path(outputPath, "content/info");
    Path parseDataPath = new Path(outputPath, "parse_data/info");
    Path parseTempPath = new Path(outputPath, "parse_temp/info");
    final SequenceFile.Writer fetchOut = new SequenceFile.Writer(fs, conf, fetchPath, Text.class,
            CrawlDatum.class);
    final SequenceFile.Writer contentOut = new SequenceFile.Writer(fs, conf, contentPath, Text.class,
            Content.class);
    final SequenceFile.Writer parseDataOut = new SequenceFile.Writer(fs, conf, parseDataPath, Text.class,
            ParseData.class);
    final SequenceFile.Writer parseTempOut = new SequenceFile.Writer(fs, conf, parseTempPath, Text.class,
            CrawlDatum.class);
    return new RecordWriter<Text, WebWritable>() {
        @Override
        public void write(Text key, WebWritable value) throws IOException {
            Writable w = value.get();
            if (w instanceof CrawlDatum) {
                fetchOut.append(key, w);
            } else if (w instanceof Content) {
                contentOut.append(key, w);
            } else if (w instanceof ParseData) {
                parseDataOut.append(key, w);
                ParseData parseData = (ParseData) w;
                if (parseData.getLinks() != null) {
                    for (Link link : parseData.getLinks()) {
                        CrawlDatum datum = new CrawlDatum();
                        datum.setUrl(link.getUrl());
                        datum.setStatus(CrawlDatum.STATUS_DB_UNFETCHED);
                        datum.setFetchTime(CrawlDatum.FETCHTIME_UNDEFINED);
                        parseTempOut.append(new Text(datum.getUrl()), datum);
                    }
                }
            }
        }

        @Override
        public void close(Reporter rprtr) throws IOException {
            fetchOut.close();
            contentOut.close();
            parseDataOut.close();
            parseTempOut.close();
        }
    };
}

From source file:cn.spark.Case.MyMultipleOutputFormat.java

License:Apache License

/**
 * Create a composite record writer that can write key/value data to
 * different output files/*  w ww.  ja va 2 s .  c  o m*/
 * 
 * @param fs
 *            the file system to use
 * @param job
 *            the job conf for the job
 * @param name
 *            the leaf file name for the output file (such as part-00000")
 * @param arg3
 *            a progressable for reporting progress.
 * @return a composite record writer
 * @throws IOException
 */
public RecordWriter<K, V> getRecordWriter(FileSystem fs, JobConf job, String name, Progressable arg3)
        throws IOException {

    final FileSystem myFS = fs;
    final String myName = generateLeafFileName(name);
    final JobConf myJob = job;
    final Progressable myProgressable = arg3;

    return new RecordWriter<K, V>() {

        // a cache storing the record writers for different output files.
        TreeMap<String, RecordWriter<K, V>> recordWriters = new TreeMap<String, RecordWriter<K, V>>();

        public void write(K key, V value) throws IOException {

            // get the file name based on the key
            String keyBasedPath = generateFileNameForKeyValue(key, value, myName);

            // get the file name based on the input file name
            String finalPath = getInputFileBasedOutputFileName(myJob, keyBasedPath);

            // get the actual key   //??key
            K actualKey = generateActualKey(null, value);
            V actualValue = generateActualValue(key, value);

            RecordWriter<K, V> rw = this.recordWriters.get(finalPath);
            if (rw == null) {
                // if we don't have the record writer yet for the final
                // path, create
                // one
                // and add it to the cache
                rw = getBaseRecordWriter(myFS, myJob, finalPath, myProgressable);
                this.recordWriters.put(finalPath, rw);
            }
            rw.write(actualKey, actualValue);
        };

        public void close(Reporter reporter) throws IOException {
            Iterator<String> keys = this.recordWriters.keySet().iterator();
            while (keys.hasNext()) {
                RecordWriter<K, V> rw = this.recordWriters.get(keys.next());
                rw.close(reporter);
            }
            this.recordWriters.clear();
        };
    };
}

From source file:com.digitalpebble.behemoth.solr.LucidWorksOutputFormat.java

License:Apache License

public RecordWriter<Text, BehemothDocument> getRecordWriter(FileSystem ignored, JobConf job, String name,
        Progressable progress) throws IOException {

    final LucidWorksWriter writer = new LucidWorksWriter(progress);
    writer.open(job, name);/* ww  w .ja v  a 2  s  .c o m*/

    return new RecordWriter<Text, BehemothDocument>() {

        public void close(Reporter reporter) throws IOException {
            writer.close();
        }

        public void write(Text key, BehemothDocument doc) throws IOException {
            writer.write(doc);
        }
    };
}

From source file:com.digitalpebble.behemoth.solr.SOLROutputFormat.java

License:Apache License

public RecordWriter<Text, BehemothDocument> getRecordWriter(FileSystem ignored, JobConf job, String name,
        Progressable progress) throws IOException {

    final SOLRWriter writer = new SOLRWriter(progress);
    writer.open(job, name);/*from  w  w  w .  j ava  2 s  . c  o m*/

    return new RecordWriter<Text, BehemothDocument>() {

        public void close(Reporter reporter) throws IOException {
            writer.close();
        }

        public void write(Text key, BehemothDocument doc) throws IOException {
            writer.write(doc);
        }
    };
}

From source file:com.ibm.jaql.io.hadoop.CompositeOutputAdapter.java

License:Apache License

@Override
public RecordWriter<JsonHolder, JsonHolder> getRecordWriter(FileSystem ignored, JobConf conf, String name,
        Progressable progress) throws IOException {
    final RecordWriter<JsonHolder, JsonHolder>[] writers = new RecordWriter[outputs.length];
    final JsonHolder[] outKey = new JsonHolder[outputs.length];
    final JsonHolder[] outValue = new JsonHolder[outputs.length];

    //    final Path[] taskOutputPaths = new Path[outputs.length]; // HACK: Hadoop 0.18
    for (int i = 0; i < outputs.length; i++) {
        //      Path outputPath = FileOutputFormat.getOutputPath(subconfs[i]);
        //      if( outputPath != null )
        //      {
        //        final String TEMP_DIR_NAME = "_temporary"; // MRConstants isn't public...
        //        taskOutputPaths[i] = new Path(outputPath,
        //            (TEMP_DIR_NAME + Path.SEPARATOR + "_" + name));
        //      }      
        writers[i] = outputs[i].getRecordWriter(ignored, subconfs[i], name, progress);
        outKey[i] = (JsonHolder) ReflectionUtils.newInstance(subconfs[i].getOutputKeyClass(), subconfs[i]);
        outValue[i] = (JsonHolder) ReflectionUtils.newInstance(subconfs[i].getOutputValueClass(), subconfs[i]);
    }// www  . j a v  a2s .  c o m

    return new RecordWriter<JsonHolder, JsonHolder>() {
        @Override
        public void write(JsonHolder key, JsonHolder value) throws IOException {
            JsonArray pair = (JsonArray) value.value;
            if (pair != null) {
                try {
                    JsonNumber n = (JsonNumber) pair.get(0);
                    int i = (int) n.longValueExact();
                    outKey[i].value = key.value;
                    outValue[i].value = pair.get(1);
                    writers[i].write(outKey[i], outValue[i]);
                } catch (Exception e) {
                    throw new UndeclaredThrowableException(e);
                }
            }
        }

        @Override
        public void close(Reporter reporter) throws IOException {
            for (int i = 0; i < writers.length; i++) {
                writers[i].close(reporter);

                // HACK: Hadoop 0.18
                //          Path taskOutput = taskOutputPaths[i];
                //          if(taskOutput != null)
                //          {
                //            FileSystem fs = taskOutput.getFileSystem(subconfs[i]);
                //            if( fs.exists(taskOutput) )
                //            {
                //              Path jobOutputPath = taskOutput.getParent().getParent();
                //
                //              // Move the task outputs to their final place
                //              // Path finalOutputPath = getFinalPath(jobOutputPath, taskOutput);
                //              Path finalOutputPath = new Path(jobOutputPath, taskOutput.getName());
                //              if( !fs.rename(taskOutput, finalOutputPath) )
                //              {
                //                if( !fs.delete(finalOutputPath, true) )
                //                {
                //                  throw new IOException("Failed to delete earlier output of task");
                //                }
                //                if( !fs.rename(taskOutput, finalOutputPath) )
                //                {
                //                  throw new IOException("Failed to save output of task: ");
                //                }
                //              }
                //              // LOG.debug("Moved " + taskOutput + " to " + finalOutputPath);
                //
                //              // Delete the temporary task-specific output directory
                //              if (!fs.delete(taskOutput, true)) {
                //                // LOG.info("Failed to delete the temporary output directory of task: " + 
                //                //    getTaskID() + " - " + taskOutputPath);
                //              }
                //              // LOG.info("Saved output of task '" + getTaskID() + "' to " + jobOutputPath);
                //            }
                //          }
            }
        }
    };
}

From source file:com.ibm.jaql.io.hadoop.DefaultHadoopOutputAdapter.java

License:Apache License

@SuppressWarnings("unchecked")
public RecordWriter<JsonHolder, JsonHolder> getRecordWriter(FileSystem ignored, JobConf job, String name,
        Progressable progress) throws IOException {
    if (converter == null) {
        final RecordWriter<JsonHolder, JsonHolder> baseWriter = ((OutputFormat<JsonHolder, JsonHolder>) oFormat)
                .getRecordWriter(ignored, job, name, progress);
        final JsonHolder nullHolder = keyHolder();
        writer = new RecordWriter<JsonHolder, JsonHolder>() {
            public void close(Reporter reporter) throws IOException {
                baseWriter.close(reporter);
            }//from w  ww.  j a va  2s  .  co  m

            public void write(JsonHolder key, JsonHolder value) throws IOException {
                baseWriter.write(nullHolder, value); // key is unused
            }
        };
    } else {
        final RecordWriter<K, V> baseWriter = ((OutputFormat<K, V>) oFormat).getRecordWriter(ignored, job, name,
                progress);

        final K baseKey = converter.createKeyTarget();
        final V baseValue = converter.createValueTarget();

        writer = new RecordWriter<JsonHolder, JsonHolder>() {

            public void close(Reporter reporter) throws IOException {
                baseWriter.close(reporter);
            }

            public void write(JsonHolder key, JsonHolder value) throws IOException {
                converter.convert(value.value, baseKey, baseValue);
                baseWriter.write(baseKey, baseValue);
            }
        };
    }
    return writer;
}

From source file:com.sensei.indexing.hadoop.reduce.IndexUpdateOutputFormat.java

License:Apache License

public RecordWriter<Shard, Text> getRecordWriter(final FileSystem fs, JobConf job, String name,
        final Progressable progress) throws IOException {

    final Path perm = new Path(getWorkOutputPath(job), name);

    return new RecordWriter<Shard, Text>() {
        public void write(Shard key, Text value) throws IOException {
            assert (DONE.equals(value));

            String shardName = key.getDirectory();
            shardName = shardName.replace("/", "_");

            Path doneFile = new Path(perm, DONE + "_" + shardName);
            if (!fs.exists(doneFile)) {
                fs.createNewFile(doneFile);
            }/*from www. j a v a 2 s .c  o  m*/
        }

        public void close(final Reporter reporter) throws IOException {
        }
    };
}