List of usage examples for org.apache.hadoop.mapreduce OutputFormat getRecordWriter
public abstract RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException;
From source file:org.apache.mnemonic.mapreduce.MneMapreducePersonDataTest.java
License:Apache License
@Test(enabled = true) public void testWritePersonData() throws Exception { NullWritable nada = NullWritable.get(); MneDurableOutputSession<Person<Long>> sess = new MneDurableOutputSession<Person<Long>>(m_tacontext, null, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX); MneDurableOutputValue<Person<Long>> mdvalue = new MneDurableOutputValue<Person<Long>>(sess); OutputFormat<NullWritable, MneDurableOutputValue<Person<Long>>> outputFormat = new MneOutputFormat<MneDurableOutputValue<Person<Long>>>(); RecordWriter<NullWritable, MneDurableOutputValue<Person<Long>>> writer = outputFormat .getRecordWriter(m_tacontext); Person<Long> person = null; for (int i = 0; i < m_reccnt; ++i) { person = sess.newDurableObjectRecord(); person.setAge((short) m_rand.nextInt(50)); person.setName(String.format("Name: [%s]", Utils.genRandomString()), true); m_sumage += person.getAge();//from ww w. jav a2s .c o m writer.write(nada, mdvalue.of(person)); } writer.close(m_tacontext); sess.close(); }
From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java
License:Apache License
@Test public void testPredicatePushdown() throws Exception { TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0); TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id); final String typeStr = "struct<i:int,s:string>"; OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr); conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString()); conf.setInt(OrcConf.ROW_INDEX_STRIDE.getAttribute(), 1000); conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true); OutputFormat<NullWritable, OrcStruct> outputFormat = new OrcOutputFormat<OrcStruct>(); RecordWriter<NullWritable, OrcStruct> writer = outputFormat.getRecordWriter(attemptContext); // write 4000 rows with the integer and the binary string TypeDescription type = TypeDescription.fromString(typeStr); OrcStruct row = (OrcStruct) OrcStruct.createValue(type); NullWritable nada = NullWritable.get(); for (int r = 0; r < 4000; ++r) { row.setFieldValue(0, new IntWritable(r)); row.setFieldValue(1, new Text(Integer.toBinaryString(r))); writer.write(nada, row);/*from www. ja v a 2 s . co m*/ } writer.close(attemptContext); OrcInputFormat.setSearchArgument(conf, SearchArgumentFactory.newBuilder() .between("i", PredicateLeaf.Type.LONG, new Long(1500), new Long(1999)).build(), new String[] { null, "i", "s" }); FileSplit split = new FileSplit(new Path(workDir, "part-m-00000.orc"), 0, 1000000, new String[0]); RecordReader<NullWritable, OrcStruct> reader = new OrcInputFormat<OrcStruct>().createRecordReader(split, attemptContext); // the sarg should cause it to skip over the rows except 1000 to 2000 for (int r = 1000; r < 2000; ++r) { assertEquals(true, reader.nextKeyValue()); row = reader.getCurrentValue(); assertEquals(r, ((IntWritable) row.getFieldValue(0)).get()); assertEquals(Integer.toBinaryString(r), row.getFieldValue(1).toString()); } assertEquals(false, reader.nextKeyValue()); }
From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java
License:Apache License
@Test public void testColumnSelection() throws Exception { String typeStr = "struct<i:int,j:int,k:int>"; OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr); conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString()); conf.setInt(OrcConf.ROW_INDEX_STRIDE.getAttribute(), 1000); conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true); TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 1); TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id); OutputFormat<NullWritable, OrcStruct> outputFormat = new OrcOutputFormat<OrcStruct>(); RecordWriter<NullWritable, OrcStruct> writer = outputFormat.getRecordWriter(attemptContext); // write 4000 rows with the integer and the binary string TypeDescription type = TypeDescription.fromString(typeStr); OrcStruct row = (OrcStruct) OrcStruct.createValue(type); NullWritable nada = NullWritable.get(); for (int r = 0; r < 3000; ++r) { row.setFieldValue(0, new IntWritable(r)); row.setFieldValue(1, new IntWritable(r * 2)); row.setFieldValue(2, new IntWritable(r * 3)); writer.write(nada, row);//from ww w.j av a 2 s . c o m } writer.close(attemptContext); conf.set(OrcConf.INCLUDE_COLUMNS.getAttribute(), "0,2"); FileSplit split = new FileSplit(new Path(workDir, "part-m-00000.orc"), 0, 1000000, new String[0]); RecordReader<NullWritable, OrcStruct> reader = new OrcInputFormat<OrcStruct>().createRecordReader(split, attemptContext); // the sarg should cause it to skip over the rows except 1000 to 2000 for (int r = 0; r < 3000; ++r) { assertEquals(true, reader.nextKeyValue()); row = reader.getCurrentValue(); assertEquals(r, ((IntWritable) row.getFieldValue(0)).get()); assertEquals(null, row.getFieldValue(1)); assertEquals(r * 3, ((IntWritable) row.getFieldValue(2)).get()); } assertEquals(false, reader.nextKeyValue()); }
From source file:org.apache.parquet.pig.PerfTest2.java
License:Apache License
public static void write(String out) throws IOException, ParserException, InterruptedException, ExecException { {//from w w w.ja v a 2 s. c om StringBuilder schemaString = new StringBuilder("a0: chararray"); for (int i = 1; i < COLUMN_COUNT; i++) { schemaString.append(", a" + i + ": chararray"); } String location = out; String schema = schemaString.toString(); StoreFuncInterface storer = new ParquetStorer(); Job job = new Job(conf); storer.setStoreFuncUDFContextSignature("sig"); String absPath = storer.relToAbsPathForStoreLocation(location, new Path(new File(".").getAbsoluteFile().toURI())); storer.setStoreLocation(absPath, job); storer.checkSchema(new ResourceSchema(Utils.getSchemaFromString(schema))); @SuppressWarnings("unchecked") // that's how the base class is defined OutputFormat<Void, Tuple> outputFormat = storer.getOutputFormat(); // it's ContextUtil.getConfiguration(job) and not just conf ! JobContext jobContext = ContextUtil.newJobContext(ContextUtil.getConfiguration(job), new JobID("jt", jobid++)); outputFormat.checkOutputSpecs(jobContext); if (schema != null) { ResourceSchema resourceSchema = new ResourceSchema(Utils.getSchemaFromString(schema)); storer.checkSchema(resourceSchema); if (storer instanceof StoreMetadata) { ((StoreMetadata) storer).storeSchema(resourceSchema, absPath, job); } } TaskAttemptContext taskAttemptContext = ContextUtil.newTaskAttemptContext( ContextUtil.getConfiguration(job), new TaskAttemptID("jt", jobid, true, 1, 0)); RecordWriter<Void, Tuple> recordWriter = outputFormat.getRecordWriter(taskAttemptContext); storer.prepareToWrite(recordWriter); for (int i = 0; i < ROW_COUNT; i++) { Tuple tuple = TupleFactory.getInstance().newTuple(COLUMN_COUNT); for (int j = 0; j < COLUMN_COUNT; j++) { tuple.set(j, "a" + i + "_" + j); } storer.putNext(tuple); } recordWriter.close(taskAttemptContext); OutputCommitter outputCommitter = outputFormat.getOutputCommitter(taskAttemptContext); outputCommitter.commitTask(taskAttemptContext); outputCommitter.commitJob(jobContext); } }
From source file:org.apache.pig.backend.hadoop.executionengine.fetch.FetchPOStoreImpl.java
License:Apache License
@Override public StoreFuncInterface createStoreFunc(POStore store) throws IOException { Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties()); StoreFuncInterface storeFunc = store.getStoreFunc(); JobContext jc = HadoopShims.createJobContext(conf, new JobID()); OutputFormat<?, ?> outputFormat = storeFunc.getOutputFormat(); PigOutputFormat.setLocation(jc, store); context = HadoopShims.createTaskAttemptContext(conf, HadoopShims.getNewTaskAttemptID()); PigOutputFormat.setLocation(context, store); try {/*from w w w . j av a 2 s . com*/ outputFormat.checkOutputSpecs(jc); } catch (InterruptedException e) { throw new IOException(e); } try { outputCommitter = outputFormat.getOutputCommitter(context); outputCommitter.setupJob(jc); outputCommitter.setupTask(context); writer = outputFormat.getRecordWriter(context); } catch (InterruptedException e) { throw new IOException(e); } storeFunc.prepareToWrite(writer); return storeFunc; }
From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReducePOStoreImpl.java
License:Apache License
@Override public StoreFuncInterface createStoreFunc(POStore store) throws IOException { StoreFuncInterface storeFunc = store.getStoreFunc(); // call the setStoreLocation on the storeFunc giving it the // Job. Typically this will result in the OutputFormat of the // storeFunc storing the output location in the Configuration // in the Job. The PigOutFormat.setLocation() method will merge // this modified Configuration into the configuration of the // Context we have PigOutputFormat.setLocation(context, store); OutputFormat<?, ?> outputFormat = storeFunc.getOutputFormat(); // create a new record writer try {// w ww.j a v a 2s .c om writer = outputFormat.getRecordWriter(context); } catch (InterruptedException e) { throw new IOException(e); } storeFunc.prepareToWrite(writer); return storeFunc; }
From source file:org.apache.pig.impl.io.PigFile.java
License:Apache License
public void store(DataBag data, FuncSpec storeFuncSpec, PigContext pigContext) throws IOException { Configuration conf = ConfigurationUtil.toConfiguration(pigContext.getProperties()); // create a simulated JobContext JobContext jc = HadoopShims.createJobContext(conf, new JobID()); StoreFuncInterface sfunc = (StoreFuncInterface) PigContext.instantiateFuncFromSpec(storeFuncSpec); OutputFormat<?, ?> of = sfunc.getOutputFormat(); POStore store = new POStore(new OperatorKey()); store.setSFile(new FileSpec(file, storeFuncSpec)); PigOutputFormat.setLocation(jc, store); OutputCommitter oc;// www .ja v a2s. c om // create a simulated TaskAttemptContext TaskAttemptContext tac = HadoopShims.createTaskAttemptContext(conf, HadoopShims.getNewTaskAttemptID()); PigOutputFormat.setLocation(tac, store); RecordWriter<?, ?> rw; try { of.checkOutputSpecs(jc); oc = of.getOutputCommitter(tac); oc.setupJob(jc); oc.setupTask(tac); rw = of.getRecordWriter(tac); sfunc.prepareToWrite(rw); for (Iterator<Tuple> it = data.iterator(); it.hasNext();) { Tuple row = it.next(); sfunc.putNext(row); } rw.close(tac); } catch (InterruptedException e) { throw new IOException(e); } if (oc.needsTaskCommit(tac)) { oc.commitTask(tac); } HadoopShims.commitOrCleanup(oc, jc); }
From source file:org.apache.pig.piggybank.squeal.backend.storm.io.StormPOStoreImpl.java
License:Apache License
@Override public StoreFuncInterface createStoreFunc(POStore store) throws IOException { StoreFuncInterface storeFunc = store.getStoreFunc(); // call the setStoreLocation on the storeFunc giving it the // Job. Typically this will result in the OutputFormat of the // storeFunc storing the output location in the Configuration // in the Job. The PigOutFormat.setLocation() method will merge // this modified Configuration into the configuration of the // Context we have PigOutputFormat.setLocation(context, store); OutputFormat outputFormat = storeFunc.getOutputFormat(); // create a new record writer try {//from www . ja v a 2 s. com writer = outputFormat.getRecordWriter(context); } catch (InterruptedException e) { throw new IOException(e); } storeFunc.prepareToWrite(writer); if (storeFunc instanceof ISignStore) { ((ISignStore) storeFunc).setSign(sign); } return storeFunc; }
From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.RecordReaderWriterTest.java
License:Apache License
private static void validateFileSplits(final List<FileSplit> fileSplits, final Configuration configuration, final Class<? extends InputFormat<NullWritable, VertexWritable>> inputFormatClass, final Optional<Class<? extends OutputFormat<NullWritable, VertexWritable>>> outFormatClass) throws Exception { final InputFormat inputFormat = ReflectionUtils.newInstance(inputFormatClass, configuration); final TaskAttemptContext job = new TaskAttemptContextImpl(configuration, new TaskAttemptID(UUID.randomUUID().toString(), 0, TaskType.MAP, 0, 0)); int vertexCount = 0; int outEdgeCount = 0; int inEdgeCount = 0; final OutputFormat<NullWritable, VertexWritable> outputFormat = outFormatClass.isPresent() ? ReflectionUtils.newInstance(outFormatClass.get(), configuration) : null;/*from w ww . j a v a 2 s .c om*/ final RecordWriter<NullWritable, VertexWritable> writer = null == outputFormat ? null : outputFormat.getRecordWriter(job); boolean foundKeyValue = false; for (final FileSplit split : fileSplits) { logger.info("\treading file split {}", split.getPath().getName() + " ({}", split.getStart() + "..." + (split.getStart() + split.getLength()), "{} {} bytes)"); final RecordReader reader = inputFormat.createRecordReader(split, job); float lastProgress = -1f; while (reader.nextKeyValue()) { //System.out.println("" + reader.getProgress() + "> " + reader.getCurrentKey() + ": " + reader.getCurrentValue()); final float progress = reader.getProgress(); assertTrue(progress >= lastProgress); assertEquals(NullWritable.class, reader.getCurrentKey().getClass()); final VertexWritable vertexWritable = (VertexWritable) reader.getCurrentValue(); if (null != writer) writer.write(NullWritable.get(), vertexWritable); vertexCount++; outEdgeCount = outEdgeCount + (int) IteratorUtils.count(vertexWritable.get().edges(Direction.OUT)); inEdgeCount = inEdgeCount + (int) IteratorUtils.count(vertexWritable.get().edges(Direction.IN)); // final Vertex vertex = vertexWritable.get(); assertEquals(Integer.class, vertex.id().getClass()); if (vertex.value("name").equals("SUGAR MAGNOLIA")) { foundKeyValue = true; assertEquals(92, IteratorUtils.count(vertex.edges(Direction.OUT))); assertEquals(77, IteratorUtils.count(vertex.edges(Direction.IN))); } lastProgress = progress; } } assertEquals(8049, outEdgeCount); assertEquals(8049, inEdgeCount); assertEquals(outEdgeCount, inEdgeCount); assertEquals(808, vertexCount); assertTrue(foundKeyValue); if (null != writer) { writer.close(new TaskAttemptContextImpl(configuration, job.getTaskAttemptID())); for (int i = 1; i < 10; i++) { final File outputDirectory = new File( new URL(configuration.get("mapreduce.output.fileoutputformat.outputdir")).toURI()); final List<FileSplit> splits = generateFileSplits( new File(outputDirectory.getAbsoluteFile() + "/_temporary/0/_temporary/" + job.getTaskAttemptID().getTaskID().toString().replace("task", "attempt") + "_0" + "/part-m-00000"), i); validateFileSplits(splits, configuration, inputFormatClass, Optional.empty()); } } }
From source file:org.elasticsearch.hadoop.mr.MultiOutputFormat.java
License:Apache License
@Override public RecordWriter getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { List<OutputFormat> formats = getNewApiFormats(CompatHandler.taskAttemptContext(context).getConfiguration()); List<RecordWriter> writers = new ArrayList<RecordWriter>(); for (OutputFormat format : formats) { writers.add(format.getRecordWriter(context)); }/* www.j a va2 s . co m*/ return new MultiNewRecordWriter(writers); }