Example usage for org.apache.hadoop.mapred JobConf getInputFormat

List of usage examples for org.apache.hadoop.mapred JobConf getInputFormat

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getInputFormat.

Prototype

public InputFormat getInputFormat() 

Source Link

Document

Get the InputFormat implementation for the map-reduce job, defaults to TextInputFormat if not specified explicity.

Usage

From source file:org.apache.drill.exec.store.hive.HiveInputReader.java

License:Apache License

public static void main(String args[]) throws Exception {
    /*//w  ww  .  j  a v  a 2  s .  c om
        String[] columnNames = {"n_nationkey", "n_name", "n_regionkey",   "n_comment"};
        String[] columnTypes = {"bigint", "string", "bigint", "string"};
            
        List<FieldSchema> cols = Lists.newArrayList();
            
        for (int i = 0; i < columnNames.length; i++) {
          cols.add(new FieldSchema(columnNames[i], columnTypes[i], null));
        }
        String location = "file:///tmp/nation_s";
        String inputFormat = TextInputFormat.class.getCanonicalName();
        String serdeLib = LazySimpleSerDe.class.getCanonicalName();
    //    String inputFormat = HiveHBaseTableInputFormat.class.getCanonicalName();
    //    String serdeLib = HBaseSerDe.class.getCanonicalName();
        Map<String, String> serdeParams = new HashMap();
    //    serdeParams.put("serialization.format", "1");
    //    serdeParams.put("hbase.columns.mapping", ":key,f:name,f:regionkey,f:comment");
        serdeParams.put("serialization.format", "|");
        serdeParams.put("field.delim", "|");
            
            
        Map<String, String> tableParams = new HashMap();
        tableParams.put("hbase.table.name", "nation");
        SerDeInfo serDeInfo = new SerDeInfo(null, serdeLib, serdeParams);
        StorageDescriptor storageDescriptor = new StorageDescriptor(cols, location, inputFormat, null, false, -1, serDeInfo, null, null, null);
        Table table = new Table("table", "default", "sphillips", 0, 0, 0, storageDescriptor, new ArrayList<FieldSchema>(), tableParams, null, null, "MANAGED_TABLE");
        Properties properties = MetaStoreUtils.getTableMetadata(table);
        */

    HiveConf conf = new HiveConf();
    conf.set("hive.metastore.uris", "thrift://10.10.31.51:9083");
    HiveMetaStoreClient client = new HiveMetaStoreClient(conf);
    Table table = client.getTable("default", "nation");
    Properties properties = MetaStoreUtils.getTableMetadata(table);

    Path path = new Path(table.getSd().getLocation());
    JobConf job = new JobConf();
    for (Object obj : properties.keySet()) {
        job.set((String) obj, (String) properties.get(obj));
    }
    //    job.set("hbase.zookeeper.quorum", "10.10.31.51");
    //    job.set("hbase.zookeeper.property.clientPort", "5181");
    InputFormat f = (InputFormat) Class.forName(table.getSd().getInputFormat()).getConstructor().newInstance();
    job.setInputFormat(f.getClass());
    FileInputFormat.addInputPath(job, path);
    InputFormat format = job.getInputFormat();
    SerDe serde = (SerDe) Class.forName(table.getSd().getSerdeInfo().getSerializationLib()).getConstructor()
            .newInstance();
    serde.initialize(job, properties);
    ObjectInspector inspector = serde.getObjectInspector();
    ObjectInspector.Category cat = inspector.getCategory();
    TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(inspector);
    List<String> columns = null;
    List<TypeInfo> colTypes = null;
    List<ObjectInspector> fieldObjectInspectors = Lists.newArrayList();

    switch (typeInfo.getCategory()) {
    case STRUCT:
        columns = ((StructTypeInfo) typeInfo).getAllStructFieldNames();
        colTypes = ((StructTypeInfo) typeInfo).getAllStructFieldTypeInfos();
        for (int i = 0; i < columns.size(); i++) {
            System.out.print(columns.get(i));
            System.out.print(" ");
            System.out.print(colTypes.get(i));
        }
        System.out.println("");
        for (StructField field : ((StructObjectInspector) inspector).getAllStructFieldRefs()) {
            fieldObjectInspectors.add(field.getFieldObjectInspector());
        }
    }

    for (InputSplit split : format.getSplits(job, 1)) {
        String encoded = serializeInputSplit(split);
        System.out.println(encoded);
        InputSplit newSplit = deserializeInputSplit(encoded, split.getClass().getCanonicalName());
        System.out.print("Length: " + newSplit.getLength() + " ");
        System.out.print("Locations: ");
        for (String loc : newSplit.getLocations())
            System.out.print(loc + " ");
        System.out.println();
    }

    for (InputSplit split : format.getSplits(job, 1)) {
        RecordReader reader = format.getRecordReader(split, job, Reporter.NULL);
        Object key = reader.createKey();
        Object value = reader.createValue();
        int count = 0;
        while (reader.next(key, value)) {
            List<Object> values = ((StructObjectInspector) inspector)
                    .getStructFieldsDataAsList(serde.deserialize((Writable) value));
            StructObjectInspector sInsp = (StructObjectInspector) inspector;
            Object obj = sInsp.getStructFieldData(serde.deserialize((Writable) value),
                    sInsp.getStructFieldRef("n_name"));
            System.out.println(obj);
            /*
            for (Object obj : values) {
              PrimitiveObjectInspector.PrimitiveCategory pCat = ((PrimitiveObjectInspector)fieldObjectInspectors.get(count)).getPrimitiveCategory();
              Object pObj = ((PrimitiveObjectInspector)fieldObjectInspectors.get(count)).getPrimitiveJavaObject(obj);
              System.out.print(pObj + " ");
            }
            */
            System.out.println("");
        }
    }
}

From source file:org.apache.drill.exec.store.hive.HiveMetadataProvider.java

License:Apache License

private List<InputSplitWrapper> splitInputWithUGI(final Properties properties, final StorageDescriptor sd,
        final Partition partition) throws Exception {
    watch.start();//from   w w  w  . j a  v  a 2 s  .c o  m
    try {
        return ugi.doAs(new PrivilegedExceptionAction<List<InputSplitWrapper>>() {
            public List<InputSplitWrapper> run() throws Exception {
                final List<InputSplitWrapper> splits = Lists.newArrayList();
                final JobConf job = new JobConf(hiveConf);
                HiveUtilities.addConfToJob(job, properties);
                job.setInputFormat(HiveUtilities.getInputFormatClass(job, sd, hiveReadEntry.getTable()));
                final Path path = new Path(sd.getLocation());
                final FileSystem fs = path.getFileSystem(job);

                if (fs.exists(path)) {
                    FileInputFormat.addInputPath(job, path);
                    final InputFormat<?, ?> format = job.getInputFormat();
                    for (final InputSplit split : format.getSplits(job, 1)) {
                        splits.add(new InputSplitWrapper(split, partition));
                    }
                }

                return splits;
            }
        });
    } catch (final InterruptedException | IOException e) {
        final String errMsg = String.format("Failed to create input splits: %s", e.getMessage());
        logger.error(errMsg, e);
        throw new DrillRuntimeException(errMsg, e);
    } finally {
        logger.trace("Took {} s to get splits from {}", watch.elapsed(TimeUnit.NANOSECONDS) / 1000,
                sd.getLocation());
        watch.stop();
    }
}

From source file:org.apache.drill.exec.store.hive.HiveRecordReader.java

License:Apache License

private void init() throws ExecutionSetupException {
    final JobConf job = new JobConf(hiveConf);

    // Get the configured default val
    defaultPartitionValue = hiveConf.get(ConfVars.DEFAULTPARTITIONNAME.varname);

    Properties tableProperties;//from   www .  j  a v  a  2 s.c om
    try {
        tableProperties = MetaStoreUtils.getTableMetadata(table);
        final Properties partitionProperties = (partition == null) ? tableProperties
                : HiveUtilities.getPartitionMetadata(partition, table);
        HiveUtilities.addConfToJob(job, partitionProperties);

        final SerDe tableSerDe = createSerDe(job, table.getSd().getSerdeInfo().getSerializationLib(),
                tableProperties);
        final StructObjectInspector tableOI = getStructOI(tableSerDe);

        if (partition != null) {
            partitionSerDe = createSerDe(job, partition.getSd().getSerdeInfo().getSerializationLib(),
                    partitionProperties);
            partitionOI = getStructOI(partitionSerDe);

            finalOI = (StructObjectInspector) ObjectInspectorConverters.getConvertedOI(partitionOI, tableOI);
            partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter(partitionOI, finalOI);
            job.setInputFormat(HiveUtilities.getInputFormatClass(job, partition.getSd(), table));
        } else {
            // For non-partitioned tables, there is no need to create converter as there are no schema changes expected.
            partitionSerDe = tableSerDe;
            partitionOI = tableOI;
            partTblObjectInspectorConverter = null;
            finalOI = tableOI;
            job.setInputFormat(HiveUtilities.getInputFormatClass(job, table.getSd(), table));
        }

        // Get list of partition column names
        final List<String> partitionNames = Lists.newArrayList();
        for (FieldSchema field : table.getPartitionKeys()) {
            partitionNames.add(field.getName());
        }

        // We should always get the columns names from ObjectInspector. For some of the tables (ex. avro) metastore
        // may not contain the schema, instead it is derived from other sources such as table properties or external file.
        // SerDe object knows how to get the schema with all the config and table properties passed in initialization.
        // ObjectInspector created from the SerDe object has the schema.
        final StructTypeInfo sTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromObjectInspector(finalOI);
        final List<String> tableColumnNames = sTypeInfo.getAllStructFieldNames();

        // Select list of columns for project pushdown into Hive SerDe readers.
        final List<Integer> columnIds = Lists.newArrayList();
        if (isStarQuery()) {
            selectedColumnNames = tableColumnNames;
            for (int i = 0; i < selectedColumnNames.size(); i++) {
                columnIds.add(i);
            }
            selectedPartitionNames = partitionNames;
        } else {
            selectedColumnNames = Lists.newArrayList();
            for (SchemaPath field : getColumns()) {
                String columnName = field.getRootSegment().getPath();
                if (partitionNames.contains(columnName)) {
                    selectedPartitionNames.add(columnName);
                } else {
                    columnIds.add(tableColumnNames.indexOf(columnName));
                    selectedColumnNames.add(columnName);
                }
            }
        }
        ColumnProjectionUtils.appendReadColumns(job, columnIds, selectedColumnNames);

        for (String columnName : selectedColumnNames) {
            ObjectInspector fieldOI = finalOI.getStructFieldRef(columnName).getFieldObjectInspector();
            TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(fieldOI.getTypeName());

            selectedColumnObjInspectors.add(fieldOI);
            selectedColumnTypes.add(typeInfo);
            selectedColumnFieldConverters.add(HiveFieldConverter.create(typeInfo, fragmentContext));
        }

        for (int i = 0; i < table.getPartitionKeys().size(); i++) {
            FieldSchema field = table.getPartitionKeys().get(i);
            if (selectedPartitionNames.contains(field.getName())) {
                TypeInfo pType = TypeInfoUtils.getTypeInfoFromTypeString(field.getType());
                selectedPartitionTypes.add(pType);

                if (partition != null) {
                    selectedPartitionValues.add(HiveUtilities.convertPartitionType(pType,
                            partition.getValues().get(i), defaultPartitionValue));
                }
            }
        }
    } catch (Exception e) {
        throw new ExecutionSetupException("Failure while initializing HiveRecordReader: " + e.getMessage(), e);
    }

    if (!empty) {
        try {
            reader = (org.apache.hadoop.mapred.RecordReader<Object, Object>) job.getInputFormat()
                    .getRecordReader(inputSplit, job, Reporter.NULL);
        } catch (Exception e) {
            throw new ExecutionSetupException(
                    "Failed to get o.a.hadoop.mapred.RecordReader from Hive InputFormat", e);
        }
        key = reader.createKey();
        skipRecordsInspector = new SkipRecordsInspector(tableProperties, reader);
    }
}

From source file:org.apache.drill.exec.store.hive.readers.HiveAbstractReader.java

License:Apache License

/**
 * Initializes next reader if available, will close previous reader if any.
 *
 * @param job map / reduce job configuration.
 * @return true if new reader was initialized, false is no more readers are available
 * @throws ExecutionSetupException if could not init record reader
 *///from   ww w .j  a v a2s.  c o m
protected boolean initNextReader(JobConf job) throws ExecutionSetupException {
    if (inputSplitsIterator.hasNext()) {
        if (reader != null) {
            closeReader();
        }
        InputSplit inputSplit = inputSplitsIterator.next();
        try {
            reader = (org.apache.hadoop.mapred.RecordReader<Object, Object>) job.getInputFormat()
                    .getRecordReader(inputSplit, job, Reporter.NULL);
            logger.trace("hive reader created: {} for inputSplit {}", reader.getClass().getName(),
                    inputSplit.toString());
        } catch (Exception e) {
            throw new ExecutionSetupException(
                    "Failed to get o.a.hadoop.mapred.RecordReader from Hive InputFormat", e);
        }
        return true;
    }
    return false;
}

From source file:org.apache.hyracks.dataflow.hadoop.HadoopReadOperatorDescriptor.java

License:Apache License

@SuppressWarnings("deprecation")
@Override//  w w w.j  av  a2s  .c om
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
        final IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions)
        throws HyracksDataException {
    return new AbstractUnaryOutputSourceOperatorNodePushable() {
        @Override
        public void initialize() throws HyracksDataException {
            try {
                JobConf conf = DatatypeHelper.map2JobConf((HashMap) jobConfMap);
                Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
                conf.setClassLoader(this.getClass().getClassLoader());
                RecordReader hadoopRecordReader;
                Object key;
                Object value;
                Object[] splits = inputSplitsProxy.toInputSplits(conf);
                Object inputSplit = splits[partition];

                if (conf.getUseNewMapper()) {
                    JobContext context = new ContextFactory().createJobContext(conf);
                    org.apache.hadoop.mapreduce.InputFormat inputFormat = (org.apache.hadoop.mapreduce.InputFormat) ReflectionUtils
                            .newInstance(context.getInputFormatClass(), conf);
                    TaskAttemptContext taskAttemptContext = new ContextFactory().createContext(jobConf, null);
                    hadoopRecordReader = (RecordReader) inputFormat.createRecordReader(
                            (org.apache.hadoop.mapreduce.InputSplit) inputSplit, taskAttemptContext);
                } else {
                    Class inputFormatClass = conf.getInputFormat().getClass();
                    InputFormat inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, conf);
                    hadoopRecordReader = (RecordReader) inputFormat.getRecordReader(
                            (org.apache.hadoop.mapred.InputSplit) inputSplit, conf, createReporter());
                }

                Class inputKeyClass;
                Class inputValueClass;
                if (hadoopRecordReader instanceof SequenceFileRecordReader) {
                    inputKeyClass = ((SequenceFileRecordReader) hadoopRecordReader).getKeyClass();
                    inputValueClass = ((SequenceFileRecordReader) hadoopRecordReader).getValueClass();
                } else {
                    inputKeyClass = hadoopRecordReader.createKey().getClass();
                    inputValueClass = hadoopRecordReader.createValue().getClass();
                }

                key = hadoopRecordReader.createKey();
                value = hadoopRecordReader.createValue();
                FrameTupleAppender appender = new FrameTupleAppender(new VSizeFrame(ctx));
                RecordDescriptor outputRecordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(
                        (Class<? extends Writable>) hadoopRecordReader.createKey().getClass(),
                        (Class<? extends Writable>) hadoopRecordReader.createValue().getClass());
                int nFields = outputRecordDescriptor.getFieldCount();
                ArrayTupleBuilder tb = new ArrayTupleBuilder(nFields);
                writer.open();
                try {
                    while (hadoopRecordReader.next(key, value)) {
                        tb.reset();
                        switch (nFields) {
                        case 2:
                            tb.addField(outputRecordDescriptor.getFields()[0], key);
                        case 1:
                            tb.addField(outputRecordDescriptor.getFields()[1], value);
                        }
                        FrameUtils.appendToWriter(writer, appender, tb.getFieldEndOffsets(), tb.getByteArray(),
                                0, tb.getSize());
                    }
                    appender.flush(writer, true);
                } catch (Exception e) {
                    writer.fail();
                    throw new HyracksDataException(e);
                } finally {
                    writer.close();
                }
                hadoopRecordReader.close();
            } catch (InstantiationException e) {
                throw new HyracksDataException(e);
            } catch (IllegalAccessException e) {
                throw new HyracksDataException(e);
            } catch (ClassNotFoundException e) {
                throw new HyracksDataException(e);
            } catch (InterruptedException e) {
                throw new HyracksDataException(e);
            } catch (IOException e) {
                throw new HyracksDataException(e);
            }
        }
    };
}

From source file:org.apache.hyracks.hdfs.dataflow.HDFSReadOperatorDescriptor.java

License:Apache License

@Override
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
        IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions)
        throws HyracksDataException {
    final InputSplit[] inputSplits = splitsFactory.getSplits();

    return new AbstractUnaryOutputSourceOperatorNodePushable() {
        private String nodeName = ctx.getJobletContext().getApplicationContext().getNodeId();

        @SuppressWarnings("unchecked")
        @Override/*from  w ww  .j  a  va2s.c  om*/
        public void initialize() throws HyracksDataException {
            ClassLoader ctxCL = Thread.currentThread().getContextClassLoader();
            try {
                writer.open();
                Thread.currentThread().setContextClassLoader(ctx.getJobletContext().getClassLoader());
                JobConf conf = confFactory.getConf();
                conf.setClassLoader(ctx.getJobletContext().getClassLoader());
                IKeyValueParser parser = tupleParserFactory.createKeyValueParser(ctx);
                try {
                    parser.open(writer);
                    InputFormat inputFormat = conf.getInputFormat();
                    for (int i = 0; i < inputSplits.length; i++) {
                        /**
                         * read all the partitions scheduled to the current node
                         */
                        if (scheduledLocations[i].equals(nodeName)) {
                            /**
                             * pick an unread split to read
                             * synchronize among simultaneous partitions in the same machine
                             */
                            synchronized (executed) {
                                if (executed[i] == false) {
                                    executed[i] = true;
                                } else {
                                    continue;
                                }
                            }

                            /**
                             * read the split
                             */
                            RecordReader reader = inputFormat.getRecordReader(inputSplits[i], conf,
                                    Reporter.NULL);
                            Object key = reader.createKey();
                            Object value = reader.createValue();
                            while (reader.next(key, value) == true) {
                                parser.parse(key, value, writer, inputSplits[i].toString());
                            }
                        }
                    }
                } finally {
                    parser.close(writer);
                }
            } catch (Throwable th) {
                writer.fail();
                throw new HyracksDataException(th);
            } finally {
                writer.close();
                Thread.currentThread().setContextClassLoader(ctxCL);
            }
        }
    };
}

From source file:org.apache.hyracks.imru.dataflow.Hdtest.java

License:Apache License

public static JobSpecification createJob() throws Exception {
    JobSpecification spec = new JobSpecification();
    spec.setFrameSize(4096);/*  www . j  ava2 s. c om*/

    String PATH_TO_HADOOP_CONF = "/home/wangrui/a/imru/hadoop-0.20.2/conf";
    String HDFS_INPUT_PATH = "/customer/customer.tbl,/customer_result/part-0";
    JobConf conf = new JobConf();
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
    FileInputFormat.setInputPaths(conf, HDFS_INPUT_PATH);
    conf.setInputFormat(TextInputFormat.class);
    RecordDescriptor recordDesc = new RecordDescriptor(
            new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
    InputSplit[] splits = conf.getInputFormat().getSplits(conf, 1);
    HDFSReadOperatorDescriptor readOperator = new HDFSReadOperatorDescriptor(spec, recordDesc, conf, splits,
            new String[] { "NC0", "NC1" }, new IKeyValueParserFactory<LongWritable, Text>() {
                @Override
                public IKeyValueParser<LongWritable, Text> createKeyValueParser(final IHyracksTaskContext ctx) {
                    return new IKeyValueParser<LongWritable, Text>() {
                        TupleWriter tupleWriter;

                        @Override
                        public void open(IFrameWriter writer) throws HyracksDataException {
                            tupleWriter = new TupleWriter(ctx, writer, 1);
                        }

                        @Override
                        public void parse(LongWritable key, Text value, IFrameWriter writer, String fileString)
                                throws HyracksDataException {
                            try {
                                tupleWriter.write(value.getBytes(), 0, value.getLength());
                                tupleWriter.finishField();
                                tupleWriter.finishTuple();
                            } catch (IOException e) {
                                throw new HyracksDataException(e);
                            }
                        }

                        @Override
                        public void close(IFrameWriter writer) throws HyracksDataException {
                            tupleWriter.close();
                        }
                    };
                }

            });

    // createPartitionConstraint(spec, readOperator, new String[] {"NC0"});
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, readOperator, new String[] { "NC0", "NC1" });

    IOperatorDescriptor writer = new HDFSOD(spec, null, null, null);
    // createPartitionConstraint(spec, writer, outSplits);

    spec.connect(new OneToOneConnectorDescriptor(spec), readOperator, 0, writer, 0);

    spec.addRoot(writer);
    return spec;
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v1.HadoopV1MapTask.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
@Override/*from  w w w .jav a  2  s.c om*/
public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException {
    HadoopJobEx job = taskCtx.job();

    HadoopV2TaskContext taskCtx0 = (HadoopV2TaskContext) taskCtx;

    if (taskCtx.taskInfo().hasMapperIndex())
        HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex());
    else
        HadoopMapperUtils.clearMapperIndex();

    try {
        JobConf jobConf = taskCtx0.jobConf();

        InputFormat inFormat = jobConf.getInputFormat();

        HadoopInputSplit split = info().inputSplit();

        InputSplit nativeSplit;

        if (split instanceof HadoopFileBlock) {
            HadoopFileBlock block = (HadoopFileBlock) split;

            nativeSplit = new FileSplit(new Path(block.file().toString()), block.start(), block.length(),
                    EMPTY_HOSTS);
        } else
            nativeSplit = (InputSplit) taskCtx0.getNativeSplit(split);

        assert nativeSplit != null;

        Reporter reporter = new HadoopV1Reporter(taskCtx);

        HadoopV1OutputCollector collector = null;

        try {
            collector = collector(jobConf, taskCtx0, !job.info().hasCombiner() && !job.info().hasReducer(),
                    fileName(), taskCtx0.attemptId());

            RecordReader reader = inFormat.getRecordReader(nativeSplit, jobConf, reporter);

            Mapper mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf);

            Object key = reader.createKey();
            Object val = reader.createValue();

            assert mapper != null;

            try {
                try {
                    while (reader.next(key, val)) {
                        if (isCancelled())
                            throw new HadoopTaskCancelledException("Map task cancelled.");

                        mapper.map(key, val, collector, reporter);
                    }

                    taskCtx.onMapperFinished();
                } finally {
                    mapper.close();
                }
            } finally {
                collector.closeWriter();
            }

            collector.commit();
        } catch (Exception e) {
            if (collector != null)
                collector.abort();

            throw new IgniteCheckedException(e);
        }
    } finally {
        HadoopMapperUtils.clearMapperIndex();
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v1.HadoopV1Splitter.java

License:Apache License

/**
 * @param jobConf Job configuration.//from  w ww .j a va  2 s.com
 * @return Collection of mapped splits.
 * @throws IgniteCheckedException If mapping failed.
 */
public static Collection<HadoopInputSplit> splitJob(JobConf jobConf) throws IgniteCheckedException {
    try {
        InputFormat<?, ?> format = jobConf.getInputFormat();

        assert format != null;

        InputSplit[] splits = format.getSplits(jobConf, 0);

        Collection<HadoopInputSplit> res = new ArrayList<>(splits.length);

        for (int i = 0; i < splits.length; i++) {
            InputSplit nativeSplit = splits[i];

            if (nativeSplit instanceof FileSplit) {
                FileSplit s = (FileSplit) nativeSplit;

                res.add(new HadoopFileBlock(s.getLocations(), s.getPath().toUri(), s.getStart(),
                        s.getLength()));
            } else
                res.add(HadoopUtils.wrapSplit(i, nativeSplit, nativeSplit.getLocations()));
        }

        return res;
    } catch (IOException e) {
        throw new IgniteCheckedException(e);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v1.GridHadoopV1MapTask.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
@Override//from w w w .j  a  v a 2 s. c  om
public void run(GridHadoopTaskContext taskCtx) throws IgniteCheckedException {
    GridHadoopJob job = taskCtx.job();

    GridHadoopV2TaskContext ctx = (GridHadoopV2TaskContext) taskCtx;

    JobConf jobConf = ctx.jobConf();

    InputFormat inFormat = jobConf.getInputFormat();

    GridHadoopInputSplit split = info().inputSplit();

    InputSplit nativeSplit;

    if (split instanceof GridHadoopFileBlock) {
        GridHadoopFileBlock block = (GridHadoopFileBlock) split;

        nativeSplit = new FileSplit(new Path(block.file().toString()), block.start(), block.length(),
                EMPTY_HOSTS);
    } else
        nativeSplit = (InputSplit) ctx.getNativeSplit(split);

    assert nativeSplit != null;

    Reporter reporter = new GridHadoopV1Reporter(taskCtx);

    GridHadoopV1OutputCollector collector = null;

    try {
        collector = collector(jobConf, ctx, !job.info().hasCombiner() && !job.info().hasReducer(), fileName(),
                ctx.attemptId());

        RecordReader reader = inFormat.getRecordReader(nativeSplit, jobConf, reporter);

        Mapper mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf);

        Object key = reader.createKey();
        Object val = reader.createValue();

        assert mapper != null;

        try {
            try {
                while (reader.next(key, val)) {
                    if (isCancelled())
                        throw new GridHadoopTaskCancelledException("Map task cancelled.");

                    mapper.map(key, val, collector, reporter);
                }
            } finally {
                mapper.close();
            }
        } finally {
            collector.closeWriter();
        }

        collector.commit();
    } catch (Exception e) {
        if (collector != null)
            collector.abort();

        throw new IgniteCheckedException(e);
    }
}