Example usage for org.apache.hadoop.mapreduce Job getInstance

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getInstance.

Prototype

public static Job getInstance() throws IOException

Source Link

Document

Creates a new Job with no particular Cluster .

Usage

From source file:com.google.cloud.dataflow.contrib.hadoop.HadoopFileSource.java

License:Apache License

@Override
public long getEstimatedSizeBytes(PipelineOptions options) {
    long size = 0;
    try {//  w  w w  .j av a 2s.c  o m
        Job job = Job.getInstance(); // new instance
        for (FileStatus st : listStatus(createFormat(job), job)) {
            size += st.getLen();
        }
    } catch (IOException | NoSuchMethodException | InvocationTargetException | IllegalAccessException
            | InstantiationException e) {
        // ignore, and return 0
    }
    return size;
}

From source file:com.google.cloud.dataflow.sdk.io.hdfs.HDFSFileSink.java

License:Apache License

private Job jobInstance() throws IOException {
    Job job = Job.getInstance();
    if (serializableConfiguration != null) {
        for (Map.Entry<String, String> entry : serializableConfiguration.get()) {
            job.getConfiguration().set(entry.getKey(), entry.getValue());
        }/*  ww  w  .j  ava2  s .c om*/
    }
    job.setJobID(jobId);
    return job;
}

From source file:com.google.cloud.dataflow.sdk.io.hdfs.HDFSFileSource.java

License:Apache License

@Override
public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
    long size = 0;
    Job job = Job.getInstance(); // new instance
    for (FileStatus st : listStatus(createFormat(job), job)) {
        size += st.getLen();/*w  w w.  ja  v a 2  s  . com*/
    }
    return size;
}

From source file:com.google.cloud.dataflow.sdk.io.hdfs.HDFSFileSource.java

License:Apache License

@Override
public void validate() {
    if (validate) {
        try {/*  w  ww  .j  a v  a  2 s.co  m*/
            FileSystem fs = FileSystem.get(new URI(filepattern), Job.getInstance().getConfiguration());
            FileStatus[] fileStatuses = fs.globStatus(new Path(filepattern));
            checkState(fileStatuses != null && fileStatuses.length > 0, "Unable to find any files matching %s",
                    filepattern);
        } catch (IOException | URISyntaxException e) {
            throw new RuntimeException(e);
        }
    }
}

From source file:com.jbw.mutioutputformat.PatitionByStation.java

@Override
public int run(String[] strings) throws Exception {
    Configuration conf = getConf();
    Path input = new Path(conf.get("input"));
    Path output = new Path(conf.get("output"));
    Job job = Job.getInstance();
    job.setJarByClass(PatitionByStation.class);
    job.setJobName("papapa");
    job.setMapperClass(StationMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setReducerClass(StationReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    FileInputFormat.addInputPath(job, input);
    FileOutputFormat.setOutputPath(job, output);
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.jbw.taroutputformat.JobDriver.java

@Override
public int run(String[] strings) throws Exception {
    Configuration conf = getConf();
    Path in = new Path(conf.get("input"));
    Path out = new Path(conf.get("output"));
    Job job = Job.getInstance();
    job.setJobName("test");
    job.setInputFormatClass(FileInputFormat.class);
    job.setOutputFormatClass(TarOutputFormat.class);
    FileInputFormat.addInputPath(job, in);
    TarOutputFormat.setOutputPath(job, out);
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.justgiving.raven.kissmetrics.jsonenricher.KissmetricsJsonToEnrichedJsonDriver.java

License:Open Source License

public static void main(String[] args) throws Exception {

    logger.info("Logger - Converting Kissmetrics Json to Valid Json files");
    System.out.println("Converting Kissmetrics Json to Valid Json files");
    System.out.println("defaultCharacterEncoding by property: " + System.getProperty("file.encoding"));
    System.out.println("defaultCharacterEncoding by code: " + getDefaultCharEncoding());
    System.out.println("defaultCharacterEncoding by charSet: " + Charset.defaultCharset());

    Job job = Job.getInstance();
    job.setJarByClass(KissmetricsJsonToEnrichedJsonDriver.class);
    job.setJobName("Kissmetrics Json to valid and enriched Json files");
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    //Add number of reducers
    int numberOfReducers = 2;
    if (args.length > 2 && args[2] != null) {
        numberOfReducers = Integer.parseInt(args[2]);
        if (numberOfReducers <= 0) {
            numberOfReducers = 2;//from   ww  w  . java  2  s  .  com
        }
    }

    job.setMapperClass(com.justgiving.raven.kissmetrics.jsonenricher.KissmetricsJsonToEnrichedJsonMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setReducerClass(
            com.justgiving.raven.kissmetrics.jsonenricher.KissmetricsJsonToEnrichedJsonReducer.class);
    job.setNumReduceTasks(numberOfReducers);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.justgiving.raven.kissmetrics.schema.KissmetricsJsonToSchemaDriver.java

License:Open Source License

public static void main(String[] args) throws Exception {

    int numberOfReducers = 1;
    if (args.length > 2 && args[2] != null) {
        numberOfReducers = Integer.parseInt(args[2]);
        if (numberOfReducers <= 0) {
            numberOfReducers = 1;//from   ww w  .jav a2  s . c  o  m
        }
    }

    System.out.println("Kissmetrics Json Schema Extrator");

    Job job = Job.getInstance();
    job.setJarByClass(KissmetricsJsonToSchemaDriver.class);
    job.setJobName("Kissmetrics Json Schema Extrator");
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setMapperClass(com.justgiving.raven.kissmetrics.schema.KissmetricsJsonToSchemaMapper.class);
    job.setReducerClass(com.justgiving.raven.kissmetrics.schema.KissmetricsJsonToSchemaReducer.class);
    job.setNumReduceTasks(numberOfReducers);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.knewton.mapreduce.SSTableRecordReaderTest.java

License:Apache License

@Before
public void setup() throws IOException {
    job = Job.getInstance();
    conf = job.getConfiguration();/*from   w  w  w . jav a 2s .c om*/
    attemptId = new TaskAttemptID();
    conf.setInt("mapreduce.task.attempt.id", attemptId.getId());
    conf.set("mapreduce.cluster.temp.dir", "tempdir");

    Path inputPath = new Path(TABLE_PATH_STR);
    inputSplit = new FileSplit(inputPath, 0, 1, null);
    Descriptor desc = Descriptor.fromFilename(TABLE_PATH_STR);

    doReturn(desc).when(ssTableColumnRecordReader).getDescriptor();
    doReturn(desc).when(ssTableRowRecordReader).getDescriptor();

    doNothing().when(ssTableColumnRecordReader).copyTablesToLocal(any(FileSystem.class), any(FileSystem.class),
            any(Path.class), any(TaskAttemptContext.class));
    doNothing().when(ssTableRowRecordReader).copyTablesToLocal(any(FileSystem.class), any(FileSystem.class),
            any(Path.class), any(TaskAttemptContext.class));

    doReturn(ssTableReader).when(ssTableColumnRecordReader).openSSTableReader(any(IPartitioner.class),
            any(CFMetaData.class));
    doReturn(ssTableReader).when(ssTableRowRecordReader).openSSTableReader(any(IPartitioner.class),
            any(CFMetaData.class));
    when(ssTableReader.getScanner()).thenReturn(tableScanner);
}

From source file:com.ov.project.dev.crawler.ClientOVSocket.java

public Map<VelibStation, Prediction> jobToDo(Map<VelibKey, Integer> iDataProvided) {

    System.setProperty("hadoop.home.dir", BundelUtils.get("hadoop.home"));

    // Create a Java version of the Spark Context from the configuration
    JavaSparkContext lSparkctx = SingletonWrappers.sparkContextGetInstance();

    Job lJob;/*from  ww w . ja  v  a 2s.com*/

    try {
        lJob = Job.getInstance();
        FileInputFormat.setInputPaths(lJob, new Path(BundelUtils.get("data.frame.path")));
        FileInputFormat.setInputDirRecursive(lJob, true);
    } catch (IOException e1) {
        e1.printStackTrace();
        System.exit(1);
    }

    Prediction predi = new Prediction();

    //   com.ov.PredictionsBuilder.runPredictions(BundelUtils.get("bruteData.path"), BundelUtils.get("static.path"), BundelUtils.get("output.path"), BundelUtils.get("model.path"), BundelUtils.get("hadoop.home"), iSave, Calendar.MINUTE, new TimeStamp(), new TimeStamp(), BundelUtils.get("license.path"));

    //   JavaRDD<Text> sourceData = lSparkctx
    //         .newAPIHadoopRDD(lJob.getConfiguration(), TextInputFormat.class, LongWritable.class, Text.class)
    //         .values();

    // Each line will be translate to a session defined by the IP adress
    //   JavaPairRDD<VelibStation, Prediction> lsession = sourceData
    //         .mapToPair(
    //               w -> new Tuple2<VelibStation, Prediction>(LogParser.getFirstToken(w), LogParser.parseTokenz(w)))
    //         .reduceByKey((a, b) -> reduceByIP(a, b));

    // Save the word count back out to a text file, causing evaluation.
    //      FileUtils.deleteQuietly(new File(BundelUtils.get("suffix.for.result.file")));
    //      lsession.saveAsTextFile(BundelUtils.get("suffix.for.result.file"));

    return null;
}