Example usage for org.apache.hadoop.mapred JobConf set

List of usage examples for org.apache.hadoop.mapred JobConf set

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf set.

Prototype

public void set(String name, String value) 

Source Link

Document

Set the value of the name property.

Usage

From source file:cascading.tap.hadoop.HadoopMR1TapPlatformTest.java

License:Open Source License

@Test
public void testCombinedHfs() throws Exception {
    getPlatform().copyFromLocal(inputFileLower);
    getPlatform().copyFromLocal(inputFileUpper);

    Hfs sourceLower = new Hfs(new TextLine(new Fields("offset", "line")), InputData.inputFileLower);
    Hfs sourceUpper = new Hfs(new TextLine(new Fields("offset", "line")), InputData.inputFileUpper);

    // create a CombinedHfs instance on these files
    Tap source = new MultiSourceTap<Hfs, JobConf, RecordReader>(sourceLower, sourceUpper);

    FlowProcess<JobConf> process = getPlatform().getFlowProcess();
    JobConf conf = process.getConfigCopy();

    // set the combine flag
    conf.setBoolean(HfsProps.COMBINE_INPUT_FILES, true);

    conf.set("cascading.flow.platform", "hadoop"); // only supported on mr based platforms

    // test the input format and the split
    source.sourceConfInit(process, conf);

    InputFormat inputFormat = conf.getInputFormat();

    assertEquals(Hfs.CombinedInputFormat.class, inputFormat.getClass());
    InputSplit[] splits = inputFormat.getSplits(conf, 1);

    assertEquals(1, splits.length);//from ww  w  .java2  s.com

    validateLength(source.openForRead(process), 10);
}

From source file:cascading.tap.hadoop.io.MultiInputFormat.java

License:Open Source License

/**
 * Used to set the current JobConf with all sub jobs configurations.
 *
 * @param toJob//  w  w w.  j  a v  a2 s  .  com
 * @param fromJobs
 */
public static void addInputFormat(JobConf toJob, JobConf... fromJobs) {
    toJob.setInputFormat(MultiInputFormat.class);
    List<Map<String, String>> configs = new ArrayList<Map<String, String>>();
    List<Path> allPaths = new ArrayList<Path>();

    boolean isLocal = false;

    for (JobConf fromJob : fromJobs) {
        if (fromJob.get("mapred.input.format.class") == null)
            throw new CascadingException(
                    "mapred.input.format.class is required, should be set in source Scheme#sourceConfInit");

        configs.add(HadoopUtil.getConfig(toJob, fromJob));
        Collections.addAll(allPaths, FileInputFormat.getInputPaths(fromJob));

        if (!isLocal)
            isLocal = HadoopUtil.isLocal(fromJob);
    }

    if (!allPaths.isEmpty()) // it's possible there aren't any
        FileInputFormat.setInputPaths(toJob, (Path[]) allPaths.toArray(new Path[allPaths.size()]));

    try {
        toJob.set("cascading.multiinputformats", HadoopUtil.serializeBase64(configs, toJob, true));
    } catch (IOException exception) {
        throw new CascadingException("unable to pack input formats", exception);
    }

    if (isLocal)
        HadoopUtil.setLocal(toJob);
}

From source file:cascading.tap.hadoop.MultiInputFormat.java

License:Open Source License

/**
 * Used to set the current JobConf with all sub jobs configurations.
 *
 * @param toJob/*from   ww w  . ja va2s. c  om*/
 * @param fromJobs
 */
public static void addInputFormat(JobConf toJob, JobConf... fromJobs) {
    toJob.setInputFormat(MultiInputFormat.class);
    List<Map<String, String>> configs = new ArrayList<Map<String, String>>();
    List<Path> allPaths = new ArrayList<Path>();

    boolean isLocal = false;

    for (JobConf fromJob : fromJobs) {
        configs.add(getConfig(toJob, fromJob));
        Collections.addAll(allPaths, FileInputFormat.getInputPaths(fromJob));

        if (!isLocal)
            isLocal = fromJob.get("mapred.job.tracker").equalsIgnoreCase("local");
    }

    FileInputFormat.setInputPaths(toJob, (Path[]) allPaths.toArray(new Path[allPaths.size()]));

    try {
        toJob.set("cascading.multiinputformats", Util.serializeBase64(configs));
    } catch (IOException exception) {
        throw new CascadingException("unable to pack input formats", exception);
    }

    if (isLocal)
        toJob.set("mapred.job.tracker", "local");
}

From source file:cascading.tap.hadoop.MultiInputFormat.java

License:Open Source License

static JobConf mergeConf(JobConf job, Map<String, String> config, boolean directly) {
    JobConf currentConf = directly ? job : new JobConf(job);

    for (String key : config.keySet()) {
        if (LOG.isDebugEnabled())
            LOG.debug("merging key: " + key + " value: " + config.get(key));

        currentConf.set(key, config.get(key));
    }/*from w  ww .j  a  v  a2  s  . co m*/

    return currentConf;
}

From source file:cascading.tap.Hfs.java

License:Open Source License

private void makeLocal(JobConf conf, Path qualifiedPath, String infoMessage) {
    if (!conf.get("mapred.job.tracker", "").equalsIgnoreCase("local")
            && qualifiedPath.toUri().getScheme().equalsIgnoreCase("file")) {
        if (LOG.isInfoEnabled())
            LOG.info(infoMessage + toString());

        conf.set("mapred.job.tracker", "local"); // force job to run locally
    }//from w w w . j  av  a  2  s  .  c  o  m
}

From source file:cascading.tuple.hadoop.HadoopSerializationTest.java

License:Open Source License

public void testInputOutputSerialization() throws IOException {
    long time = System.currentTimeMillis();

    JobConf jobConf = new JobConf();

    jobConf.set("io.serializations",
            TestSerialization.class.getName() + "," + WritableSerialization.class.getName()); // disable/replace WritableSerialization class
    jobConf.set("cascading.serialization.tokens",
            "1000=" + BooleanWritable.class.getName() + ",10001=" + Text.class.getName()); // not using Text, just testing parsing

    TupleSerialization tupleSerialization = new TupleSerialization(jobConf);

    File file = new File(outputPath);

    file.mkdirs();/*from w w  w  .java  2s  .c  o m*/
    file = new File(file, "/test.bytes");

    TupleOutputStream output = new TupleOutputStream(new FileOutputStream(file, false),
            tupleSerialization.getElementWriter());

    for (int i = 0; i < 501; i++) // 501 is arbitrary
    {
        String aString = "string number " + i;
        double random = Math.random();

        output.writeTuple(new Tuple(i, aString, random, new TestText(aString),
                new Tuple("inner tuple", new BytesWritable("some string".getBytes())),
                new BytesWritable(Integer.toString(i).getBytes("UTF-8")), new BooleanWritable(false)));
    }

    output.close();

    assertEquals("wrong size", 89967L, file.length()); // just makes sure the file size doesnt change from expected

    TupleInputStream input = new TupleInputStream(new FileInputStream(file),
            tupleSerialization.getElementReader());

    int k = -1;
    for (int i = 0; i < 501; i++) {
        Tuple tuple = input.readTuple();
        int value = tuple.getInteger(0);
        assertTrue("wrong diff", value - k == 1);
        assertTrue("wrong type", tuple.get(3) instanceof TestText);
        assertTrue("wrong type", tuple.get(4) instanceof Tuple);
        assertTrue("wrong type", tuple.get(5) instanceof BytesWritable);

        byte[] bytes = ((BytesWritable) tuple.get(5)).getBytes();
        String string = new String(bytes, 0, bytes.length > 1 ? bytes.length - 1 : bytes.length, "UTF-8");
        assertEquals("wrong value", Integer.parseInt(string), i);
        assertTrue("wrong type", tuple.get(6) instanceof BooleanWritable);
        k = value;
    }

    input.close();

    System.out.println("time = " + (System.currentTimeMillis() - time));
}

From source file:cascading.tuple.hadoop.TupleSerialization.java

License:Open Source License

/**
 * Adds this class as a Hadoop Serialization class. This method is safe to call redundantly.
 *
 * @param jobConf of type JobConf//from   w  ww.  j  a  v  a  2s  . co  m
 */
public static void setSerializations(JobConf jobConf) {
    String serializations = getSerializations(jobConf);

    if (serializations.contains(TupleSerialization.class.getName()))
        return;

    jobConf.set("io.serializations",
            Util.join(",", Util.removeNulls(serializations, TupleSerialization.class.getName())));
}

From source file:cascalog.TupleMemoryInputFormat.java

License:Open Source License

public static void setObject(JobConf conf, String key, Object o) {
    conf.set(key, StringUtils.byteToHexString(serialize(o)));
}

From source file:co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.java

License:Apache License

/**
 * setInput() takes a custom query and a separate "bounding query" to use
 * instead of the custom "count query" used by DBInputFormat.
 */// w w  w  . j a  v  a 2s  . com
public static void setInput(JobConf job, String inputQuery, String inputBoundingQuery, ArrayList params)
        throws IOException {
    DBInputFormat.setInput(job, GenericDBWritable.class, inputQuery, "");

    if (inputBoundingQuery != null) {
        job.set(DBConfiguration.INPUT_BOUNDING_QUERY, inputBoundingQuery);
    }
    if (params != null) {
        DefaultStringifier<ArrayList> stringifier = new DefaultStringifier<ArrayList>(job, ArrayList.class);
        job.set(HIHOConf.QUERY_PARAMS, stringifier.toString(params));
        logger.debug("Converted params and saved them into config");
    }
    job.setInputFormat(DBQueryInputFormat.class);
}

From source file:com.acme.extensions.data.SeedingHadoopAdapter.java

License:Apache License

@Override
protected void set(JobConf conf) throws Exception {
    super.set(conf);
    //  set the input format to this adapter
    conf.setInputFormat(this.getClass());
    conf.set("seed", String.valueOf(seed));
}