List of usage examples for org.apache.hadoop.mapreduce RecordReader initialize
public abstract void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException;
From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoInputFormat.java
License:Apache License
@Override public RecordReader<NullWritable, VertexWritable> createRecordReader(final InputSplit split, final TaskAttemptContext context) throws IOException, InterruptedException { final RecordReader<NullWritable, VertexWritable> reader = new GryoRecordReader(); reader.initialize(split, context); return reader; }
From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.script.ScriptInputFormat.java
License:Apache License
@Override public RecordReader<NullWritable, VertexWritable> createRecordReader(final InputSplit split, final TaskAttemptContext context) throws IOException, InterruptedException { RecordReader<NullWritable, VertexWritable> reader = new ScriptRecordReader(); reader.initialize(split, context); return reader; }
From source file:org.apache.vxquery.hdfs2.HDFSFunctions.java
License:Apache License
public RecordReader getReader() { List<FileSplit> fileSplits = new ArrayList<FileSplit>(); for (int i = 0; i < splits.size(); i++) { fileSplits.add((FileSplit) splits.get(i)); }/*w ww . j a v a 2 s .c om*/ FileSplitsFactory splitsFactory; try { splitsFactory = new FileSplitsFactory(fileSplits); List<FileSplit> inputSplits = splitsFactory.getSplits(); ContextFactory ctxFactory = new ContextFactory(); int size = inputSplits.size(); for (int i = 0; i < size; i++) { /** * read the split */ TaskAttemptContext context; try { context = ctxFactory.createContext(job.getConfiguration(), i); RecordReader reader = inputFormat.createRecordReader(inputSplits.get(i), context); reader.initialize(inputSplits.get(i), context); return reader; } catch (HyracksDataException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } catch (IOException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } catch (InterruptedException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } } } catch (HyracksDataException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } return null; }
From source file:org.apache.vxquery.metadata.VXQueryCollectionOperatorDescriptor.java
License:Apache License
@Override public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException { final FrameTupleAccessor fta = new FrameTupleAccessor(ctx.getFrameSize(), recordDescProvider.getInputRecordDescriptor(getActivityId(), 0)); final int fieldOutputCount = recordDescProvider.getOutputRecordDescriptor(getActivityId(), 0) .getFieldCount();// w w w . j a v a 2s . co m final ByteBuffer frame = ctx.allocateFrame(); final FrameTupleAppender appender = new FrameTupleAppender(ctx.getFrameSize(), fieldOutputCount); final short partitionId = (short) ctx.getTaskAttemptId().getTaskId().getPartition(); final ITreeNodeIdProvider nodeIdProvider = new TreeNodeIdProvider(partitionId, dataSourceId, totalDataSources); final String nodeId = ctx.getJobletContext().getApplicationContext().getNodeId(); final DynamicContext dCtx = (DynamicContext) ctx.getJobletContext().getGlobalJobData(); final String collectionName = collectionPartitions[partition % collectionPartitions.length]; final XMLParser parser = new XMLParser(false, nodeIdProvider, nodeId, frame, appender, childSeq, dCtx.getStaticContext()); return new AbstractUnaryInputUnaryOutputOperatorNodePushable() { @Override public void open() throws HyracksDataException { appender.reset(frame, true); writer.open(); hdfs = new HDFSFunctions(); } @Override public void nextFrame(ByteBuffer buffer) throws HyracksDataException { fta.reset(buffer); String collectionModifiedName = collectionName.replace("${nodeId}", nodeId); if (!collectionModifiedName.contains("hdfs:/")) { File collectionDirectory = new File(collectionModifiedName); //check if directory is in the local file system if (collectionDirectory.exists()) { // Go through each tuple. if (collectionDirectory.isDirectory()) { for (int tupleIndex = 0; tupleIndex < fta.getTupleCount(); ++tupleIndex) { Iterator<File> it = FileUtils.iterateFiles(collectionDirectory, new VXQueryIOFileFilter(), TrueFileFilter.INSTANCE); while (it.hasNext()) { File xmlDocument = it.next(); if (LOGGER.isLoggable(Level.FINE)) { LOGGER.fine( "Starting to read XML document: " + xmlDocument.getAbsolutePath()); } parser.parseElements(xmlDocument, writer, fta, tupleIndex); } } } else { throw new HyracksDataException("Invalid directory parameter (" + nodeId + ":" + collectionDirectory.getAbsolutePath() + ") passed to collection."); } } } else { // Else check in HDFS file system // Get instance of the HDFS filesystem FileSystem fs = hdfs.getFileSystem(); if (fs != null) { collectionModifiedName = collectionModifiedName.replaceAll("hdfs:/", ""); Path directory = new Path(collectionModifiedName); Path xmlDocument; if (tag != null) { hdfs.setJob(directory.getName(), tag); tag = "<" + tag + ">"; Job job = hdfs.getJob(); InputFormat inputFormat = hdfs.getinputFormat(); try { hdfs.scheduleSplits(); ArrayList<Integer> schedule = hdfs .getScheduleForNode(InetAddress.getLocalHost().getHostName()); List<InputSplit> splits = hdfs.getSplits(); List<FileSplit> fileSplits = new ArrayList<FileSplit>(); for (int i : schedule) { fileSplits.add((FileSplit) splits.get(i)); } FileSplitsFactory splitsFactory = new FileSplitsFactory(fileSplits); List<FileSplit> inputSplits = splitsFactory.getSplits(); ContextFactory ctxFactory = new ContextFactory(); int size = inputSplits.size(); InputStream stream; String value; RecordReader reader; TaskAttemptContext context; for (int i = 0; i < size; i++) { //read split context = ctxFactory.createContext(job.getConfiguration(), i); try { reader = inputFormat.createRecordReader(inputSplits.get(i), context); reader.initialize(inputSplits.get(i), context); while (reader.nextKeyValue()) { value = reader.getCurrentValue().toString(); //Split value if it contains more than one item with the tag if (StringUtils.countMatches(value, tag) > 1) { String items[] = value.split(tag); for (String item : items) { if (item.length() > 0) { item = START_TAG + tag + item; stream = new ByteArrayInputStream( item.getBytes(StandardCharsets.UTF_8)); parser.parseHDFSElements(stream, writer, fta, i); } } } else { value = START_TAG + value; //create an input stream to the file currently reading and send it to parser stream = new ByteArrayInputStream( value.getBytes(StandardCharsets.UTF_8)); parser.parseHDFSElements(stream, writer, fta, i); } } } catch (InterruptedException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } } } catch (IOException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } catch (ParserConfigurationException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } catch (SAXException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } } else { try { //check if the path exists and is a directory if (fs.exists(directory) && fs.isDirectory(directory)) { for (int tupleIndex = 0; tupleIndex < fta.getTupleCount(); ++tupleIndex) { //read every file in the directory RemoteIterator<LocatedFileStatus> it = fs.listFiles(directory, true); while (it.hasNext()) { xmlDocument = it.next().getPath(); if (fs.isFile(xmlDocument)) { if (LOGGER.isLoggable(Level.FINE)) { LOGGER.fine("Starting to read XML document: " + xmlDocument.getName()); } //create an input stream to the file currently reading and send it to parser InputStream in = fs.open(xmlDocument).getWrappedStream(); parser.parseHDFSElements(in, writer, fta, tupleIndex); } } } } else { throw new HyracksDataException("Invalid HDFS directory parameter (" + nodeId + ":" + directory + ") passed to collection."); } } catch (FileNotFoundException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } catch (IOException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } } try { fs.close(); } catch (IOException e) { if (LOGGER.isLoggable(Level.SEVERE)) { LOGGER.severe(e.getMessage()); } } } } } @Override public void fail() throws HyracksDataException { writer.fail(); } @Override public void close() throws HyracksDataException { // Check if needed? fta.reset(frame); if (fta.getTupleCount() > 0) { FrameUtils.flushFrame(frame, writer); } writer.close(); } }; }
From source file:org.bgi.flexlab.gaea.data.mapreduce.input.bam.GaeaBamInputFormat.java
License:Open Source License
public RecordReader<LongWritable, SamRecordWritable> createRecordReader(InputSplit split, TaskAttemptContext ctx) throws InterruptedException, IOException { RecordReader<LongWritable, SamRecordWritable> rr = new GaeaBamRecordReader(); Configuration conf = ctx.getConfiguration(); DEBUG_BAM_SPLITTER = conf.getBoolean("debug.bam.splitter", false); rr.initialize(split, ctx); return rr;/*from w ww. j ava 2s . c om*/ }
From source file:org.bgi.flexlab.gaea.data.mapreduce.input.sam.GaeaSamInputFormat.java
License:Open Source License
@Override public RecordReader<LongWritable, SamRecordWritable> createRecordReader(InputSplit split, TaskAttemptContext ctx) throws InterruptedException, IOException { final RecordReader<LongWritable, SamRecordWritable> rr = new GaeaSamRecordReader(); rr.initialize(split, ctx); return rr;// ww w . ja v a2 s. co m }
From source file:org.gridgain.grid.kernal.processors.hadoop.v2.GridHadoopV2MapTask.java
License:Open Source License
/** {@inheritDoc} */ @SuppressWarnings({ "ConstantConditions", "unchecked" }) @Override//from w w w .ja va 2 s . c o m public void run0(GridHadoopV2TaskContext taskCtx) throws GridException { GridHadoopInputSplit split = info().inputSplit(); InputSplit nativeSplit; if (split instanceof GridHadoopFileBlock) { GridHadoopFileBlock block = (GridHadoopFileBlock) split; nativeSplit = new FileSplit(new Path(block.file().toString()), block.start(), block.length(), null); } else nativeSplit = (InputSplit) taskCtx.getNativeSplit(split); assert nativeSplit != null; OutputFormat outputFormat = null; Exception err = null; JobContextImpl jobCtx = taskCtx.jobContext(); try { InputFormat inFormat = ReflectionUtils.newInstance(jobCtx.getInputFormatClass(), hadoopContext().getConfiguration()); RecordReader reader = inFormat.createRecordReader(nativeSplit, hadoopContext()); reader.initialize(nativeSplit, hadoopContext()); hadoopContext().reader(reader); GridHadoopJobInfo jobInfo = taskCtx.job().info(); outputFormat = jobInfo.hasCombiner() || jobInfo.hasReducer() ? null : prepareWriter(jobCtx); Mapper mapper = ReflectionUtils.newInstance(jobCtx.getMapperClass(), hadoopContext().getConfiguration()); try { mapper.run(new WrappedMapper().getMapContext(hadoopContext())); } finally { closeWriter(); } commit(outputFormat); } catch (InterruptedException e) { err = e; Thread.currentThread().interrupt(); throw new GridInterruptedException(e); } catch (Exception e) { err = e; throw new GridException(e); } finally { if (err != null) abort(outputFormat); } }
From source file:org.kududb.mapreduce.TestKuduTableInputFormat.java
License:Apache License
private RecordReader<NullWritable, RowResult> createRecordReader(String columnProjection, List<ColumnRangePredicate> predicates) throws IOException, InterruptedException { KuduTableInputFormat input = new KuduTableInputFormat(); Configuration conf = new Configuration(); conf.set(KuduTableInputFormat.MASTER_ADDRESSES_KEY, getMasterAddresses()); conf.set(KuduTableInputFormat.INPUT_TABLE_KEY, TABLE_NAME); if (columnProjection != null) { conf.set(KuduTableInputFormat.COLUMN_PROJECTION_KEY, columnProjection); }//from www . ja v a 2s .c o m if (predicates != null) { String encodedPredicates = KuduTableMapReduceUtil.base64EncodePredicates(predicates); conf.set(KuduTableInputFormat.ENCODED_COLUMN_RANGE_PREDICATES_KEY, encodedPredicates); } input.setConf(conf); List<InputSplit> splits = input.getSplits(null); // We need to re-create the input format to reconnect the client. input = new KuduTableInputFormat(); input.setConf(conf); RecordReader<NullWritable, RowResult> reader = input.createRecordReader(null, null); reader.initialize(Iterables.getOnlyElement(splits), null); return reader; }
From source file:org.mrgeo.data.MrsPyramidRecordReader.java
License:Apache License
private RecordReader<TileIdWritable, RasterWritable> createRecordReader(final MrsPyramidInputSplit split, final TaskAttemptContext context) throws IOException { InputSplit initializeWithSplit;//from w w w . j a va2s . c o m // The record reader needs the native split returned from // the data plugin. RecordReader<TileIdWritable, RasterWritable> recordReader = getRecordReader(split.getName(), context.getConfiguration()); initializeWithSplit = split.getWrappedSplit(); try { recordReader.initialize(initializeWithSplit, context); } catch (Exception e) { throw new IOException(e); } return recordReader; }
From source file:org.mrgeo.data.MrsPyramidSimpleRecordReader.java
License:Apache License
private RecordReader<TileIdWritable, TWritable> createRecordReader(final MrsPyramidInputSplit split, final TaskAttemptContext context) throws DataProviderNotFound, IOException { InputSplit initializeWithSplit = null; RecordReader<TileIdWritable, TWritable> recordReader = null; if (ifContext.getIncludeEmptyTiles()) { if (split.getWrappedSplit().getWrappedSplit() == null) { recordReader = new AllBlankTilesRecordReader(); } else {/*ww w . jav a 2 s .c o m*/ recordReader = new AllTilesRecordReader(); } // The all tiles record readers need the MrsPyramidInputSplit which // wraps the native split returned from the data plugin. initializeWithSplit = split; } else { // The standard record reader needs the native split returned from // the data plugin. recordReader = getRecordReader(split.getName(), context.getConfiguration()); initializeWithSplit = split.getWrappedSplit(); } try { recordReader.initialize(initializeWithSplit, context); } catch (Throwable t) { throw new IOException(t); } return recordReader; }