Example usage for org.apache.hadoop.mapreduce RecordReader getCurrentValue

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordReader getCurrentValue.

Prototype

public abstract VALUEIN getCurrentValue() throws IOException, InterruptedException;

Source Link

Document

Get the current value.

Usage

From source file:io.ssc.trackthetrackers.extraction.hadoop.util.Compaction.java

License:Open Source License

public static void main(String[] args) throws IOException, InterruptedException {

    if (args.length != 2) {
        System.out.println("Usage: <input folder> <output file>");
        System.exit(-1);/* w w  w.j ava2s. c  o  m*/
    }

    String inputPath = args[0];
    String outputFile = args[1];

    Configuration conf = new Configuration();

    FileSystem fs = FileSystem.get(conf);

    FileStatus[] input = fs.listStatus(new Path(inputPath), new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.toString().endsWith(".parquet");
        }
    });

    Path output = new Path(outputFile);

    fs.delete(output, true);

    ProtoParquetInputFormat<ParsedPageProtos.ParsedPageOrBuilder> inputFormat = new ProtoParquetInputFormat<ParsedPageProtos.ParsedPageOrBuilder>();
    inputFormat.setReadSupportClass(new JobConf(conf), ProtoReadSupport.class);

    Job job = new Job(conf);
    ProtoParquetOutputFormat<ParsedPageProtos.ParsedPage> outputFormat = new ProtoParquetOutputFormat<ParsedPageProtos.ParsedPage>(
            ParsedPageProtos.ParsedPage.class);
    ProtoParquetOutputFormat.setProtobufClass(job, ParsedPageProtos.ParsedPage.class);
    ProtoParquetOutputFormat.setCompression(job, CompressionCodecName.SNAPPY);
    ProtoParquetOutputFormat.setEnableDictionary(job, true);

    RecordWriter<Void, ParsedPageProtos.ParsedPage> recordWriter = outputFormat.getRecordWriter(conf, output,
            CompressionCodecName.SNAPPY);

    List<ParquetInputSplit> splits = new ArrayList<ParquetInputSplit>();

    for (FileStatus fileStatus : input) {
        System.out.println(fileStatus.getPath().toString());
        splits.addAll(inputFormat.getSplits(conf, ParquetFileReader.readFooters(conf, fileStatus)));
    }

    int splitIndex = 0;
    for (ParquetInputSplit split : splits) {

        System.out.println("Processing split: " + split.getPath().toString() + "(" + splitIndex + " of "
                + splits.size() + ")");

        TaskAttemptID taskAttemptID = new TaskAttemptID(new TaskID("identifier", splitIndex, true, splitIndex),
                splitIndex);
        TaskAttemptContext ctx = new org.apache.hadoop.mapreduce.TaskAttemptContext(conf, taskAttemptID);

        RecordReader<Void, ParsedPageProtos.ParsedPageOrBuilder> reader = inputFormat.createRecordReader(split,
                ctx);
        reader.initialize(split, ctx);

        while (reader.nextKeyValue()) {

            ParsedPageProtos.ParsedPageOrBuilder record = reader.getCurrentValue();

            ParsedPageProtos.ParsedPage.Builder builder = ParsedPageProtos.ParsedPage.newBuilder();

            builder.setUrl(record.getUrl());
            builder.setArchiveTime(record.getArchiveTime());

            builder.addAllScripts(record.getScriptsList());
            builder.addAllIframes(record.getIframesList());
            builder.addAllLinks(record.getLinksList());
            builder.addAllImages(record.getImagesList());

            recordWriter.write(null, builder.build());
        }

        if (reader != null) {
            reader.close();
        }

        splitIndex++;
    }

    TaskAttemptID taskAttemptID = new TaskAttemptID(new TaskID("identifier", 1, true, 1), 1);
    TaskAttemptContext ctx = new org.apache.hadoop.mapreduce.TaskAttemptContext(conf, taskAttemptID);

    if (recordWriter != null) {
        recordWriter.close(ctx);
    }

}

From source file:it.crs4.pydoop.mapreduce.pipes.PipesMapper.java

License:Apache License

@Override
public void run(Context context) throws IOException, InterruptedException {
    setup(context);/*from   www  . j  a  v a  2  s  . c o m*/
    Configuration conf = context.getConfiguration();
    InputSplit split = context.getInputSplit();
    // FIXME: do we really need to be so convoluted?
    InputFormat<K1, V1> inputFormat;
    try {
        inputFormat = (InputFormat<K1, V1>) ReflectionUtils.newInstance(context.getInputFormatClass(), conf);
    } catch (ClassNotFoundException ce) {
        throw new RuntimeException("class not found", ce);
    }
    RecordReader<K1, V1> input = inputFormat.createRecordReader(split, context);
    input.initialize(split, context);
    boolean isJavaInput = Submitter.getIsJavaRecordReader(conf);
    try {
        // FIXME: what happens for a java mapper and no java record reader?
        DummyRecordReader fakeInput = (!isJavaInput && !Submitter.getIsJavaMapper(conf))
                ? (DummyRecordReader) input
                : null;
        application = new Application<K1, V1, K2, V2>(context, fakeInput);
    } catch (InterruptedException ie) {
        throw new RuntimeException("interrupted", ie);
    }
    DownwardProtocol<K1, V1> downlink = application.getDownlink();
    // FIXME: InputSplit is not Writable, but still, this is ugly...
    downlink.runMap((FileSplit) context.getInputSplit(), context.getNumReduceTasks(), isJavaInput);
    boolean skipping = conf.getBoolean(context.SKIP_RECORDS, false);
    boolean sent_input_types = false;
    try {
        if (isJavaInput) {
            // FIXME
            while (input.nextKeyValue()) {
                if (!sent_input_types) {
                    sent_input_types = true;
                    NullWritable n = NullWritable.get();
                    String kclass_name = n.getClass().getName();
                    String vclass_name = n.getClass().getName();
                    if (input.getCurrentKey() != null) {
                        kclass_name = input.getCurrentKey().getClass().getName();
                    }
                    if (input.getCurrentValue() != null) {
                        vclass_name = input.getCurrentValue().getClass().getName();
                    }
                    downlink.setInputTypes(kclass_name, vclass_name);
                }
                downlink.mapItem(input.getCurrentKey(), input.getCurrentValue());
                if (skipping) {
                    //flush the streams on every record input if running in skip mode
                    //so that we don't buffer other records surrounding a bad record.
                    downlink.flush();
                }
            }
            downlink.endOfInput();
        }
        application.waitForFinish();
    } catch (Throwable t) {
        application.abort(t);
    } finally {
        cleanup(context);
    }
}

From source file:org.apache.avro.mapreduce.TestAvroKeyRecordReader.java

License:Apache License

/**
 * Verifies that avro records can be read and progress is reported correctly.
 */// www.  ja  v  a  2s .co  m
@Test
public void testReadRecords() throws IOException, InterruptedException {
    // Create the test avro file input with two records:
    //   1. "first"
    //   2. "second"
    final SeekableInput avroFileInput = new SeekableFileInput(
            AvroFiles.createFile(new File(mTempDir.getRoot(), "myStringfile.avro"),
                    Schema.create(Schema.Type.STRING), "first", "second"));

    // Create the record reader.
    Schema readerSchema = Schema.create(Schema.Type.STRING);
    RecordReader<AvroKey<CharSequence>, NullWritable> recordReader = new AvroKeyRecordReader<CharSequence>(
            readerSchema) {
        @Override
        protected SeekableInput createSeekableInput(Configuration conf, Path path) throws IOException {
            return avroFileInput;
        }
    };

    // Set up the job configuration.
    Configuration conf = new Configuration();

    // Create a mock input split for this record reader.
    FileSplit inputSplit = createMock(FileSplit.class);
    expect(inputSplit.getPath()).andReturn(new Path("/path/to/an/avro/file")).anyTimes();
    expect(inputSplit.getStart()).andReturn(0L).anyTimes();
    expect(inputSplit.getLength()).andReturn(avroFileInput.length()).anyTimes();

    // Create a mock task attempt context for this record reader.
    TaskAttemptContext context = createMock(TaskAttemptContext.class);
    expect(context.getConfiguration()).andReturn(conf).anyTimes();

    // Initialize the record reader.
    replay(inputSplit);
    replay(context);
    recordReader.initialize(inputSplit, context);

    assertEquals("Progress should be zero before any records are read", 0.0f, recordReader.getProgress(), 0.0f);

    // Some variables to hold the records.
    AvroKey<CharSequence> key;
    NullWritable value;

    // Read the first record.
    assertTrue("Expected at least one record", recordReader.nextKeyValue());
    key = recordReader.getCurrentKey();
    value = recordReader.getCurrentValue();

    assertNotNull("First record had null key", key);
    assertNotNull("First record had null value", value);

    CharSequence firstString = key.datum();
    assertEquals("first", firstString.toString());

    assertTrue("getCurrentKey() returned different keys for the same record",
            key == recordReader.getCurrentKey());
    assertTrue("getCurrentValue() returned different values for the same record",
            value == recordReader.getCurrentValue());

    // Read the second record.
    assertTrue("Expected to read a second record", recordReader.nextKeyValue());
    key = recordReader.getCurrentKey();
    value = recordReader.getCurrentValue();

    assertNotNull("Second record had null key", key);
    assertNotNull("Second record had null value", value);

    CharSequence secondString = key.datum();
    assertEquals("second", secondString.toString());

    assertEquals("Progress should be complete (2 out of 2 records processed)", 1.0f, recordReader.getProgress(),
            0.0f);

    // There should be no more records.
    assertFalse("Expected only 2 records", recordReader.nextKeyValue());

    // Close the record reader.
    recordReader.close();

    // Verify the expected calls on the mocks.
    verify(inputSplit);
    verify(context);
}

From source file:org.apache.avro.mapreduce.TestAvroKeyValueRecordReader.java

License:Apache License

/**
 * Verifies that avro records can be read and progress is reported correctly.
 *//*  w  w w.  j  ava 2  s .c  o m*/
@Test
public void testReadRecords() throws IOException, InterruptedException {
    // Create the test avro file input with two records:
    //   1. <"firstkey", 1>
    //   2. <"second", 2>
    Schema keyValueSchema = AvroKeyValue.getSchema(Schema.create(Schema.Type.STRING),
            Schema.create(Schema.Type.INT));

    AvroKeyValue<CharSequence, Integer> firstInputRecord = new AvroKeyValue<CharSequence, Integer>(
            new GenericData.Record(keyValueSchema));
    firstInputRecord.setKey("first");
    firstInputRecord.setValue(1);

    AvroKeyValue<CharSequence, Integer> secondInputRecord = new AvroKeyValue<CharSequence, Integer>(
            new GenericData.Record(keyValueSchema));
    secondInputRecord.setKey("second");
    secondInputRecord.setValue(2);

    final SeekableInput avroFileInput = new SeekableFileInput(
            AvroFiles.createFile(new File(mTempDir.getRoot(), "myInputFile.avro"), keyValueSchema,
                    firstInputRecord.get(), secondInputRecord.get()));

    // Create the record reader over the avro input file.
    RecordReader<AvroKey<CharSequence>, AvroValue<Integer>> recordReader = new AvroKeyValueRecordReader<CharSequence, Integer>(
            Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.INT)) {
        @Override
        protected SeekableInput createSeekableInput(Configuration conf, Path path) throws IOException {
            return avroFileInput;
        }
    };

    // Set up the job configuration.
    Configuration conf = new Configuration();

    // Create a mock input split for this record reader.
    FileSplit inputSplit = createMock(FileSplit.class);
    expect(inputSplit.getPath()).andReturn(new Path("/path/to/an/avro/file")).anyTimes();
    expect(inputSplit.getStart()).andReturn(0L).anyTimes();
    expect(inputSplit.getLength()).andReturn(avroFileInput.length()).anyTimes();

    // Create a mock task attempt context for this record reader.
    TaskAttemptContext context = createMock(TaskAttemptContext.class);
    expect(context.getConfiguration()).andReturn(conf).anyTimes();

    // Initialize the record reader.
    replay(inputSplit);
    replay(context);
    recordReader.initialize(inputSplit, context);

    assertEquals("Progress should be zero before any records are read", 0.0f, recordReader.getProgress(), 0.0f);

    // Some variables to hold the records.
    AvroKey<CharSequence> key;
    AvroValue<Integer> value;

    // Read the first record.
    assertTrue("Expected at least one record", recordReader.nextKeyValue());
    key = recordReader.getCurrentKey();
    value = recordReader.getCurrentValue();

    assertNotNull("First record had null key", key);
    assertNotNull("First record had null value", value);

    assertEquals("first", key.datum().toString());
    assertEquals(1, value.datum().intValue());

    assertTrue("getCurrentKey() returned different keys for the same record",
            key == recordReader.getCurrentKey());
    assertTrue("getCurrentValue() returned different values for the same record",
            value == recordReader.getCurrentValue());

    // Read the second record.
    assertTrue("Expected to read a second record", recordReader.nextKeyValue());
    key = recordReader.getCurrentKey();
    value = recordReader.getCurrentValue();

    assertNotNull("Second record had null key", key);
    assertNotNull("Second record had null value", value);

    assertEquals("second", key.datum().toString());
    assertEquals(2, value.datum().intValue());

    assertEquals("Progress should be complete (2 out of 2 records processed)", 1.0f, recordReader.getProgress(),
            0.0f);

    // There should be no more records.
    assertFalse("Expected only 2 records", recordReader.nextKeyValue());

    // Close the record reader.
    recordReader.close();

    // Verify the expected calls on the mocks.
    verify(inputSplit);
    verify(context);
}

From source file:org.apache.carbondata.store.LocalCarbonStore.java

License:Apache License

@Override
public Iterator<CarbonRow> scan(AbsoluteTableIdentifier tableIdentifier, String[] projectColumns,
        Expression filter) throws IOException {
    Objects.requireNonNull(tableIdentifier);
    Objects.requireNonNull(projectColumns);

    CarbonTable table = getTable(tableIdentifier.getTablePath());
    if (table.isStreamingSink() || table.isHivePartitionTable()) {
        throw new UnsupportedOperationException("streaming and partition table is not supported");
    }/* w  ww . j ava2s  .co m*/
    // TODO: use InputFormat to prune data and read data

    final CarbonTableInputFormat format = new CarbonTableInputFormat();
    final Job job = new Job(new Configuration());
    CarbonInputFormat.setTableInfo(job.getConfiguration(), table.getTableInfo());
    CarbonInputFormat.setTablePath(job.getConfiguration(), table.getTablePath());
    CarbonInputFormat.setTableName(job.getConfiguration(), table.getTableName());
    CarbonInputFormat.setDatabaseName(job.getConfiguration(), table.getDatabaseName());
    CarbonInputFormat.setCarbonReadSupport(job.getConfiguration(), CarbonRowReadSupport.class);
    CarbonInputFormat.setColumnProjection(job.getConfiguration(), new CarbonProjection(projectColumns));
    if (filter != null) {
        CarbonInputFormat.setFilterPredicates(job.getConfiguration(), filter);
    }

    final List<InputSplit> splits = format.getSplits(new JobContextImpl(job.getConfiguration(), new JobID()));

    List<RecordReader<Void, Object>> readers = new ArrayList<>(splits.size());

    List<CarbonRow> rows = new ArrayList<>();

    try {
        for (InputSplit split : splits) {
            TaskAttemptContextImpl attempt = new TaskAttemptContextImpl(job.getConfiguration(),
                    new TaskAttemptID());
            RecordReader reader = format.createRecordReader(split, attempt);
            reader.initialize(split, attempt);
            readers.add(reader);
        }

        for (RecordReader<Void, Object> reader : readers) {
            while (reader.nextKeyValue()) {
                rows.add((CarbonRow) reader.getCurrentValue());
            }
            try {
                reader.close();
            } catch (IOException e) {
                LOGGER.error(e);
            }
        }
    } catch (InterruptedException e) {
        throw new IOException(e);
    } finally {
        for (RecordReader<Void, Object> reader : readers) {
            try {
                reader.close();
            } catch (IOException e) {
                LOGGER.error(e);
            }
        }
    }
    return rows.iterator();
}

From source file:org.apache.crunch.kafka.inputformat.KafkaInputFormatIT.java

License:Apache License

@Test
public void getSplitsCreateReaders() throws IOException, InterruptedException {
    List<String> keys = ClusterTest.writeData(ClusterTest.getProducerProperties(), topic, "batch", 10, 10);
    Map<TopicPartition, Long> startOffsets = getBrokerOffsets(consumerProps, OffsetRequest.EarliestTime(),
            topic);/*ww  w .  ja  v a2 s  .  c o  m*/
    Map<TopicPartition, Long> endOffsets = getBrokerOffsets(consumerProps, OffsetRequest.LatestTime(), topic);

    Map<TopicPartition, Pair<Long, Long>> offsets = new HashMap<>();
    for (Map.Entry<TopicPartition, Long> entry : startOffsets.entrySet()) {
        Long endingOffset = endOffsets.get(entry.getKey());
        offsets.put(entry.getKey(), Pair.of(entry.getValue(), endingOffset));
    }

    KafkaInputFormat.writeOffsetsToConfiguration(offsets, config);

    KafkaInputFormat inputFormat = new KafkaInputFormat();
    inputFormat.setConf(config);
    List<InputSplit> splits = inputFormat.getSplits(null);

    assertThat(splits.size(), is(offsets.size()));

    for (InputSplit split : splits) {
        KafkaInputSplit inputSplit = (KafkaInputSplit) split;
        Pair<Long, Long> startEnd = offsets.get(inputSplit.getTopicPartition());
        assertThat(inputSplit.getStartingOffset(), is(startEnd.first()));
        assertThat(inputSplit.getEndingOffset(), is(startEnd.second()));
    }

    //create readers and consume the data
    when(taskContext.getConfiguration()).thenReturn(config);
    Set<String> keysRead = new HashSet<>();
    //read all data from all splits
    for (InputSplit split : splits) {
        KafkaInputSplit inputSplit = (KafkaInputSplit) split;
        long start = inputSplit.getStartingOffset();
        long end = inputSplit.getEndingOffset();

        RecordReader<BytesWritable, BytesWritable> recordReader = inputFormat.createRecordReader(split,
                taskContext);
        recordReader.initialize(split, taskContext);

        int numRecordsFound = 0;
        String currentKey;
        while (recordReader.nextKeyValue()) {
            currentKey = new String(recordReader.getCurrentKey().getBytes());
            keysRead.add(currentKey);
            assertThat(keys, hasItem(currentKey));
            assertThat(recordReader.getCurrentValue(), is(notNullValue()));
            numRecordsFound++;
        }
        recordReader.close();

        //assert that it encountered a partitions worth of data
        assertThat(((long) numRecordsFound), is(end - start));
    }

    //validate the same number of unique keys was read as were written.
    assertThat(keysRead.size(), is(keys.size()));
}

From source file:org.apache.druid.data.input.orc.DruidOrcInputFormatTest.java

License:Apache License

@Test
public void testRead() throws IOException, InterruptedException {
    InputFormat inputFormat = ReflectionUtils.newInstance(OrcNewInputFormat.class, job.getConfiguration());

    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
    RecordReader reader = inputFormat.createRecordReader(split, context);
    InputRowParser<OrcStruct> parser = (InputRowParser<OrcStruct>) config.getParser();

    reader.initialize(split, context);/*from  w  w w. j ava  2s  .  co m*/

    reader.nextKeyValue();

    OrcStruct data = (OrcStruct) reader.getCurrentValue();

    MapBasedInputRow row = (MapBasedInputRow) parser.parseBatch(data).get(0);

    Assert.assertTrue(row.getEvent().keySet().size() == 4);
    Assert.assertEquals(DateTimes.of(timestamp), row.getTimestamp());
    Assert.assertEquals(parser.getParseSpec().getDimensionsSpec().getDimensionNames(), row.getDimensions());
    Assert.assertEquals(col1, row.getEvent().get("col1"));
    Assert.assertEquals(Arrays.asList(col2), row.getDimension("col2"));

    reader.close();
}

From source file:org.apache.druid.data.input.orc.DruidOrcInputFormatTest.java

License:Apache License

@Test
public void testReadDateColumn() throws IOException, InterruptedException {
    File testFile2 = makeOrcFileWithDate();
    Path path = new Path(testFile2.getAbsoluteFile().toURI());
    FileSplit split = new FileSplit(path, 0, testFile2.length(), null);

    InputFormat inputFormat = ReflectionUtils.newInstance(OrcNewInputFormat.class, job.getConfiguration());

    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
    RecordReader reader = inputFormat.createRecordReader(split, context);
    InputRowParser<OrcStruct> parser = (InputRowParser<OrcStruct>) config.getParser();

    reader.initialize(split, context);/*from  w w  w .  j  a  v a 2 s  .c  om*/

    reader.nextKeyValue();

    OrcStruct data = (OrcStruct) reader.getCurrentValue();

    MapBasedInputRow row = (MapBasedInputRow) parser.parseBatch(data).get(0);

    Assert.assertTrue(row.getEvent().keySet().size() == 4);
    Assert.assertEquals(DateTimes.of(timestamp), row.getTimestamp());
    Assert.assertEquals(parser.getParseSpec().getDimensionsSpec().getDimensionNames(), row.getDimensions());
    Assert.assertEquals(col1, row.getEvent().get("col1"));
    Assert.assertEquals(Arrays.asList(col2), row.getDimension("col2"));

    reader.close();
}

From source file:org.apache.hcatalog.pig.TestE2EScenarios.java

License:Apache License

private void copyTable(String in, String out) throws IOException, InterruptedException {
    Job ijob = new Job();
    Job ojob = new Job();
    HCatInputFormat inpy = new HCatInputFormat();
    inpy.setInput(ijob, null, in);/*from w w  w . j a  va  2s. c  om*/
    HCatOutputFormat oupy = new HCatOutputFormat();
    oupy.setOutput(ojob, OutputJobInfo.create(null, out, new HashMap<String, String>()));

    // Test HCatContext

    System.err.println("HCatContext INSTANCE is present : " + HCatContext.INSTANCE.getConf().isPresent());
    if (HCatContext.INSTANCE.getConf().isPresent()) {
        System.err.println("HCatContext tinyint->int promotion says " + HCatContext.INSTANCE.getConf().get()
                .getBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION,
                        HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT));
    }

    HCatSchema tableSchema = inpy.getTableSchema(ijob.getConfiguration());
    System.err.println("Copying from [" + in + "] to [" + out + "] with schema : " + tableSchema.toString());
    oupy.setSchema(ojob, tableSchema);
    oupy.checkOutputSpecs(ojob);
    OutputCommitter oc = oupy.getOutputCommitter(createTaskAttemptContext(ojob.getConfiguration()));
    oc.setupJob(ojob);

    for (InputSplit split : inpy.getSplits(ijob)) {

        TaskAttemptContext rtaskContext = createTaskAttemptContext(ijob.getConfiguration());
        TaskAttemptContext wtaskContext = createTaskAttemptContext(ojob.getConfiguration());

        RecordReader<WritableComparable, HCatRecord> rr = inpy.createRecordReader(split, rtaskContext);
        rr.initialize(split, rtaskContext);

        OutputCommitter taskOc = oupy.getOutputCommitter(wtaskContext);
        taskOc.setupTask(wtaskContext);
        RecordWriter<WritableComparable<?>, HCatRecord> rw = oupy.getRecordWriter(wtaskContext);

        while (rr.nextKeyValue()) {
            rw.write(rr.getCurrentKey(), rr.getCurrentValue());
        }
        rw.close(wtaskContext);
        taskOc.commitTask(wtaskContext);
        rr.close();
    }

    oc.commitJob(ojob);
}

From source file:org.apache.hyracks.hdfs2.dataflow.HDFSReadOperatorDescriptor.java

License:Apache License

@Override
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
        IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions)
        throws HyracksDataException {
    final List<FileSplit> inputSplits = splitsFactory.getSplits();

    return new AbstractUnaryOutputSourceOperatorNodePushable() {
        private String nodeName = ctx.getJobletContext().getApplicationContext().getNodeId();
        private ContextFactory ctxFactory = new ContextFactory();

        @SuppressWarnings("unchecked")
        @Override// w  ww  .  j  a  v a2  s. c o  m
        public void initialize() throws HyracksDataException {
            ClassLoader ctxCL = Thread.currentThread().getContextClassLoader();
            try {
                writer.open();
                Thread.currentThread().setContextClassLoader(ctx.getJobletContext().getClassLoader());
                Job job = confFactory.getConf();
                job.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader());
                IKeyValueParser parser = tupleParserFactory.createKeyValueParser(ctx);
                InputFormat inputFormat = ReflectionUtils.newInstance(job.getInputFormatClass(),
                        job.getConfiguration());
                int size = inputSplits.size();
                for (int i = 0; i < size; i++) {
                    /**
                     * read all the partitions scheduled to the current node
                     */
                    if (scheduledLocations[i].equals(nodeName)) {
                        /**
                         * pick an unread split to read synchronize among
                         * simultaneous partitions in the same machine
                         */
                        synchronized (executed) {
                            if (executed[i] == false) {
                                executed[i] = true;
                            } else {
                                continue;
                            }
                        }

                        /**
                         * read the split
                         */
                        TaskAttemptContext context = ctxFactory.createContext(job.getConfiguration(), i);
                        context.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader());
                        RecordReader reader = inputFormat.createRecordReader(inputSplits.get(i), context);
                        reader.initialize(inputSplits.get(i), context);
                        while (reader.nextKeyValue() == true) {
                            parser.parse(reader.getCurrentKey(), reader.getCurrentValue(), writer,
                                    inputSplits.get(i).toString());
                        }
                    }
                }
                parser.close(writer);
            } catch (Throwable th) {
                writer.fail();
                throw new HyracksDataException(th);
            } finally {
                writer.close();
                Thread.currentThread().setContextClassLoader(ctxCL);
            }
        }
    };
}