Example usage for org.apache.hadoop.mapreduce Job getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getConfiguration.

Prototype

public Configuration getConfiguration()

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.linkedin.cubert.pig.piggybank.storage.avro.AvroStorage.java

License:Apache License

/**
 * Get avro schema from "location" and return the converted
 * PigSchema.//from w w w  . j  a v a2 s. c  om
 */
@Override
public ResourceSchema getSchema(String location, Job job) throws IOException {

    /* get avro schema */
    AvroStorageLog.funcCall("getSchema");
    if (inputAvroSchema == null) {
        Configuration conf = job.getConfiguration();
        // If within a script, you store to one location and read from same
        // location using AvroStorage getPaths will be empty. Since
        // getSchema is called during script parsing we don't want to fail
        // here if path not found

        Set<Path> paths = AvroStorageUtils.getPaths(location, conf, false);
        if (!paths.isEmpty()) {
            setInputAvroSchema(paths, conf);
        }
    }
    if (inputAvroSchema != null) {
        AvroStorageLog.details("avro input schema:" + inputAvroSchema);

        /* convert to pig schema */
        ResourceSchema pigSchema = AvroSchema2Pig.convert(inputAvroSchema);
        AvroStorageLog.details("pig input schema:" + pigSchema);
        if (pigSchema.getFields().length == 1) {
            pigSchema = pigSchema.getFields()[0].getSchema();
        }
        Properties udfProps = getUDFProperties();
        udfProps.put(AVRO_INPUT_SCHEMA_PROPERTY, inputAvroSchema.toString());
        udfProps.put(AVRO_INPUT_PIG_SCHEMA_PROPERTY, pigSchema);
        if (schemaToMergedSchemaMap != null) {
            HashMap<URI, Map<Integer, Integer>> mergedSchemaMap = new HashMap<URI, Map<Integer, Integer>>();
            for (Entry<Path, Map<Integer, Integer>> entry : schemaToMergedSchemaMap.entrySet()) {
                //Path is not serializable
                mergedSchemaMap.put(entry.getKey().toUri(), entry.getValue());
            }
            udfProps.put(AVRO_MERGED_SCHEMA_PROPERTY, ObjectSerializer.serialize(mergedSchemaMap));
        }

        return pigSchema;
    } else {
        return null;
    }
}

From source file:com.linkedin.cubert.pig.piggybank.storage.avro.PigAvroOutputFormat.java

License:Apache License

/**
 * Enable output compression using the deflate codec and
 * specify its level.//from   w  w  w.  j  a v a  2 s  .c o  m
 */
public static void setDeflateLevel(Job job, int level) {
    FileOutputFormat.setCompressOutput(job, true);
    job.getConfiguration().setInt(DEFLATE_LEVEL_KEY, level);

}

From source file:com.linkedin.cubert.utils.CubertMD.java

License:Open Source License

private static void writeMetaFile(String metaFilePath, HashMap<String, String> metaFileKeyValues)
        throws IOException {
    Job tempjob = new Job();
    Configuration tempconf = tempjob.getConfiguration();
    FileSystem fs = FileSystem.get(tempconf);

    FSDataOutputStream outStream = fs.create(new Path(metaFilePath + "/.meta"));
    for (String key : metaFileKeyValues.keySet())
        outStream.write((key + " " + metaFileKeyValues.get(key) + "\n").getBytes());
    outStream.flush();//from   ww  w . ja v a2s  .  c  om
    outStream.close();
}

From source file:com.linkedin.cubert.utils.CubertMD.java

License:Open Source License

public static HashMap<String, String> readMetafile(String metaFilePath) throws IOException {
    Job tempjob = new Job();
    Configuration tempconf = tempjob.getConfiguration();
    FileSystem fs = FileSystem.get(tempconf);

    HashMap<String, String> result = new HashMap<String, String>();
    FSDataInputStream inStream;//from   w w  w .  ja va 2 s .c  o m
    try {
        inStream = fs.open(new Path(metaFilePath + "/.meta"));

        BufferedReader breader = new BufferedReader(new InputStreamReader(inStream));
        String line;
        while ((line = breader.readLine()) != null) {
            String[] splits = line.split("\\s+");
            result.put(splits[0], splits[1]);
        }
    } catch (IOException e) {
        return result;
    }
    return result;
}

From source file:com.linkedin.hadoop.example.WordCountCounters.java

License:Apache License

/**
 * Azkaban will look for a method named `run` to start your job. Use this method to setup all the
 * Hadoop-related configuration for your job and submit it.
 *
 * @throws Exception If there is an exception during the configuration or submission of your job
 *///from   w w w .ja v  a  2 s  .  c  o m
public void run() throws Exception {
    _logger.info(String.format("Configuring job for the class %s", getClass().getSimpleName()));

    Job job = Job.getInstance(getConf());
    job.setJarByClass(WordCountJob.class);
    job.setJobName(_name);

    job.setMapperClass(WordCountMapper.class);
    job.setCombinerClass(WordCountCombiner.class);
    job.setReducerClass(WordCountReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    String inputPath = _properties.getProperty("input.path");
    String outputPath = _properties.getProperty("output.path");
    boolean forceOverwrite = Boolean.parseBoolean(_properties.getProperty("force.output.overwrite", "false"));

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    // Before we submit the job, remove the old the output directory
    if (forceOverwrite) {
        FileSystem fs = FileSystem.get(job.getConfiguration());
        fs.delete(FileOutputFormat.getOutputPath(job), true);
    }

    // Since we have Kerberos enabled at LinkedIn, we must add the token to our configuration. If
    // you don't use Kerberos security for your Hadoop cluster, you don't need this code.
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        job.getConfiguration().set("mapreduce.job.credentials.binary",
                System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Submit the job for execution
    _logger.info(String.format("About to submit the job named %s", _name));
    boolean succeeded = job.waitForCompletion(true);

    // Before we return, display our custom counters for the job in the Azkaban logs
    long inputWords = job.getCounters().findCounter(WordCountCounters.INPUT_WORDS).getValue();
    _logger.info(String.format("Read a total of %d input words", inputWords));

    // Azkaban will not realize the Hadoop job failed unless you specifically throw an exception
    if (!succeeded) {
        throw new Exception(String.format("Azkaban job %s failed", _name));
    }
}

From source file:com.linkedin.mr_kluj.GenericClojureJob.java

License:Apache License

public void run() {
    info("Starting " + getClass().getSimpleName());

    /*** Get clojure source ***/
    final String cljSource;
    if (props.getProperty(LI_CLJ_SOURCE) == null) {
        final String resourceName = props.getProperty("li.clj.source.file");
        if (resourceName == null) {
            throw new RuntimeException(
                    "Must define either li.clj.source or li.clj.source.file on the Props object.");
        }/*from   w w  w. ja  va  2s.co m*/

        URL resource = getClass().getClassLoader().getResource(resourceName);

        if (resource == null) { // Perhaps it's a URL for a Hadoop-understood file-system
            try {
                resource = getScriptFromPath(new Configuration(), resourceName).toURI().toURL();
            } catch (Exception e) {
                // perhaps it wasn't...
            }
        }

        if (resource == null) { // Maybe it's a file
            File theFile = new File(resourceName);
            if (theFile.exists()) {
                try {
                    resource = theFile.toURI().toURL();
                } catch (MalformedURLException e) {
                    throw new RuntimeException("WTF?", e);
                }
            }

        }

        if (resource == null) {
            throw new RuntimeException(
                    String.format("Resource[%s] does not exist on the classpath.", resourceName));
        }

        try {
            cljSource = new String(getBytes(resource.openStream()));
        } catch (IOException e) {
            throw new RuntimeException(e);
        }

        props.setProperty(LI_CLJ_SOURCE, cljSource);
    } else {
        cljSource = props.getProperty(LI_CLJ_SOURCE);
    }

    final String theActualFunction = String.format(
            "(require '[com.linkedin.mr-kluj.job :as job])\n\n" + "%s\n" + "(map job/starter the-jobs)\n",
            cljSource);

    info("--- Source: ---");
    info(theActualFunction);
    info("       ---------       ");

    boolean jobCompleted;
    try {
        RT.var("clojure.core", "require").invoke(Symbol.intern("clojure.main"));

        Var.pushThreadBindings(RT.map(RT.var("clojure.core", "*warn-on-reflection*"), RT.T,
                RT.var("user", "*context*"), null, RT.var("user", "*props*"), props));

        Iterable<IFn> jobs = (Iterable<IFn>) clojure.lang.Compiler.load(new StringReader(theActualFunction),
                "start-job-input", "clj-job");

        int count = 0;
        for (IFn ifn : jobs) {
            Job job = (Job) ifn.invoke();
            job.getConfiguration().set(LI_CLJ_SOURCE, cljSource);
            job.getConfiguration().set(LI_CLJ_JOB_INDEX, String.valueOf(count));

            ByteArrayOutputStream baos = new ByteArrayOutputStream(1024 * 10);
            props.storeToXML(baos, null);

            job.getConfiguration().set(LI_CLJ_PROPERTIES, new String(baos.toByteArray()));

            info(String.format("Starting job %s[%s]", job.getJobID(), job.getJobName()));

            jobCompleted = job.waitForCompletion(true);
            ++count;

            if (!jobCompleted) {
                throw new RuntimeException(String.format("Job[%s] failed for some reason.", job.getJobID()));
            }
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

}

From source file:com.linkedin.pinot.hadoop.io.PinotOutputFormat.java

License:Apache License

public static void setTempSegmentDir(Job job, String segmentDir) {
    job.getConfiguration().set(PinotOutputFormat.TEMP_SEGMENT_DIR, segmentDir);
}

From source file:com.linkedin.pinot.hadoop.io.PinotOutputFormat.java

License:Apache License

public static void setTableName(Job job, String table) {
    job.getConfiguration().set(PinotOutputFormat.TABLE_NAME, table);
}

From source file:com.linkedin.pinot.hadoop.io.PinotOutputFormat.java

License:Apache License

public static void setSegmentName(Job job, String segmentName) {
    job.getConfiguration().set(PinotOutputFormat.SEGMENT_NAME, segmentName);
}

From source file:com.linkedin.pinot.hadoop.io.PinotOutputFormat.java

License:Apache License

public static void setSchema(Job job, Schema schema) {
    job.getConfiguration().set(PinotOutputFormat.SCHEMA, schema.getJSONSchema());
}