com.hazelcast.jet.benchmark.wordcount.HdfsToMap.java Source code

Java tutorial

Introduction

Here is the source code for com.hazelcast.jet.benchmark.wordcount.HdfsToMap.java

Source

/*
 * Copyright (c) 2008-2017, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.jet.benchmark.wordcount;

import com.hazelcast.core.IMap;
import com.hazelcast.jet.Jet;
import com.hazelcast.jet.JetInstance;
import com.hazelcast.jet.Util;
import com.hazelcast.jet.config.JobConfig;
import com.hazelcast.jet.core.DAG;
import com.hazelcast.jet.core.Vertex;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.TextInputFormat;

import java.util.Map;

import static com.hazelcast.jet.Util.entry;
import static com.hazelcast.jet.core.Edge.between;
import static com.hazelcast.jet.core.processor.Processors.mapP;
import static com.hazelcast.jet.core.processor.SinkProcessors.writeMapP;
import static com.hazelcast.jet.hadoop.HdfsProcessors.readHdfsP;

public class HdfsToMap {

    public static void main(String[] args) throws Exception {
        if (args.length == 0) {
            System.out.println("Usage: hdfs-to-map <name> <input path> <parallelism>");
            return;
        }

        String name = args[0];
        String inputPath = args[1];
        int parallelism = Integer.parseInt(args[2]);

        JetInstance client = Jet.newJetClient();
        IMap<Long, String> map = client.getMap(name);
        map.clear();

        try {
            long begin = System.currentTimeMillis();
            fillMap(client, name, inputPath, parallelism);
            long elapsed = System.currentTimeMillis() - begin;
            System.out.println("Time=" + elapsed);
        } finally {
            client.shutdown();
        }
    }

    private static void fillMap(JetInstance client, String name, String inputPath, int parallelism)
            throws Exception {
        DAG dag = new DAG();
        JobConf conf = new JobConf();
        conf.setInputFormat(TextInputFormat.class);
        TextInputFormat.addInputPath(conf, new Path(inputPath));

        Vertex reader = dag.newVertex("reader", readHdfsP(conf, Util::entry));
        Vertex mapper = dag.newVertex("mapper",
                mapP((Map.Entry<LongWritable, Text> e) -> entry(e.getKey().get(), e.getValue().toString())));
        Vertex writer = dag.newVertex("writer", writeMapP(name));

        reader.localParallelism(parallelism);
        mapper.localParallelism(parallelism);
        writer.localParallelism(parallelism);

        dag.edge(between(reader, mapper));
        dag.edge(between(mapper, writer));

        JobConfig jobConfig = new JobConfig();
        jobConfig.addClass(HdfsToMap.class);

        client.newJob(dag, jobConfig).join();
    }
}