Example usage for org.apache.hadoop.mapred JobConf get

List of usage examples for org.apache.hadoop.mapred JobConf get

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf get.

Prototype

public String get(String name) 

Source Link

Document

Get the value of the name property, null if no such property exists.

Usage

From source file:BU.MET.CS755.SpeciesIterMapper2.java

public void configure(JobConf job) {
    iterationNumber = Long.parseLong(job.get("iterationNumber"));
    totalLinks = Long.parseLong(job.get("totalLinks"));
}

From source file:BU.MET.CS755.SpeciesIterReducer2.java

public void configure(JobConf job) {
    iterationNumber = Long.parseLong(job.get("iterationNumber"));
}

From source file:cascading.flow.hadoop.HadoopFlowStep.java

License:Open Source License

/**
 * Method clean removes any temporary files used by this FlowStep instance. It will log any IOExceptions thrown.
 *
 * @param config of type JobConf//from   ww  w . java  2 s  .c o  m
 */
public void clean(JobConf config) {
    String stepStatePath = config.get("cascading.flow.step.path");

    if (stepStatePath != null) {
        try {
            HadoopUtil.removeStateFromDistCache(config, stepStatePath);
        } catch (IOException exception) {
            logWarn("unable to remove step state file: " + stepStatePath, exception);
        }
    }

    if (tempSink != null) {
        try {
            tempSink.deleteResource(config);
        } catch (Exception exception) {
            // sink all exceptions, don't fail app
            logWarn("unable to remove temporary file: " + tempSink, exception);
        }
    }

    if (getSink().isTemporary() && (getFlow().getFlowStats().isSuccessful() || getFlow().getRunID() == null)) {
        try {
            getSink().deleteResource(config);
        } catch (Exception exception) {
            // sink all exceptions, don't fail app
            logWarn("unable to remove temporary file: " + getSink(), exception);
        }
    } else {
        cleanTapMetaData(config, getSink());
    }

    for (Tap tap : getTraps())
        cleanTapMetaData(config, tap);
}

From source file:cascading.hbase.helper.TableInputFormat.java

License:Apache License

public void configure(JobConf job) {
    //        Path[] tableNames = FileInputFormat.getInputPaths(job);
    //        String colArg = job.get(COLUMN_LIST);
    //        String[] colNames = colArg.split(" ");
    //        byte [][] m_cols = new byte[colNames.length][];
    //        for (int i = 0; i < m_cols.length; i++) {
    //            m_cols[i] = Bytes.toBytes(colNames[i]);
    //        }/*w w w .  j  ava  2 s  .  co  m*/
    //        setInputColumns(m_cols);
    //        try {
    //            setHTable(new HTable(HBaseConfiguration.create(job), tableNames[0].getName()));
    //        } catch (Exception e) {
    //            LOG.error(StringUtils.stringifyException(e));
    //        }

    //this.conf = configuration;
    String tableName = job.get(INPUT_TABLE);
    try {
        setHTable(new HTable(new Configuration(job), tableName));
    } catch (Exception e) {
        LOG.error(StringUtils.stringifyException(e));
    }

    Scan scan = null;

    if (job.get(SCAN) != null) {
        try {
            scan = convertStringToScan(job.get(SCAN));
        } catch (IOException e) {
            LOG.error("An error occurred.", e);
        }
    } else {
        try {
            scan = new Scan();

            if (job.get(SCAN_COLUMNS) != null) {
                addColumns(scan, job.get(SCAN_COLUMNS));
            }

            if (job.get(SCAN_COLUMN_FAMILY) != null) {
                scan.addFamily(Bytes.toBytes(job.get(SCAN_COLUMN_FAMILY)));
            }

            if (job.get(SCAN_TIMESTAMP) != null) {
                scan.setTimeStamp(Long.parseLong(job.get(SCAN_TIMESTAMP)));
            }

            if (job.get(SCAN_TIMERANGE_START) != null && job.get(SCAN_TIMERANGE_END) != null) {
                scan.setTimeRange(Long.parseLong(job.get(SCAN_TIMERANGE_START)),
                        Long.parseLong(job.get(SCAN_TIMERANGE_END)));
            }

            if (job.get(SCAN_MAXVERSIONS) != null) {
                scan.setMaxVersions(Integer.parseInt(job.get(SCAN_MAXVERSIONS)));
            }

            if (job.get(SCAN_CACHEDROWS) != null) {
                scan.setCaching(Integer.parseInt(job.get(SCAN_CACHEDROWS)));
            }

            // false by default, full table scans generate too much BC churn
            scan.setCacheBlocks((job.getBoolean(SCAN_CACHEBLOCKS, false)));
        } catch (Exception e) {
            LOG.error(StringUtils.stringifyException(e));
        }
    }

    setScan(scan);
}

From source file:cascading.hbase.helper.TableInputFormat.java

License:Apache License

public void validateInput(JobConf job) throws IOException {
    // expecting exactly one path
    Path[] tableNames = FileInputFormat.getInputPaths(job);
    if (tableNames == null || tableNames.length > 1) {
        throw new IOException("expecting one table name");
    }/*  ww w .  j a  va  2  s . c o  m*/

    // connected to table?
    if (getHTable() == null) {
        throw new IOException("could not connect to table '" + tableNames[0].getName() + "'");
    }

    // expecting at least one column
    String colArg = job.get(COLUMN_LIST);
    if (colArg == null || colArg.length() == 0) {
        throw new IOException("expecting at least one column");
    }
}

From source file:cascading.jdbc.JDBCTap.java

License:Open Source License

@Override
public void sinkInit(JobConf conf) throws IOException {
    if (!isSink())
        return;/*from  w  ww.  j a v  a2s . co  m*/

    // do not delete if initialized from within a task
    if (isReplace() && conf.get("mapred.task.partition") == null && !deletePath(conf))
        throw new TapException("unable to drop table: " + tableDesc.getTableName());

    if (!makeDirs(conf))
        throw new TapException("unable to create table: " + tableDesc.getTableName());

    if (username == null)
        DBConfiguration.configureDB(conf, driverClassName, connectionUrl);
    else
        DBConfiguration.configureDB(conf, driverClassName, connectionUrl, username, password);

    super.sinkInit(conf);
}

From source file:cascading.tap.hadoop.Hadoop18TapUtil.java

License:Open Source License

/**
 * should only be called if not in a Flow
 *
 * @param conf/* w w w  .j ava2 s  .co m*/
 * @throws IOException
 */
public static void setupJob(JobConf conf) throws IOException {
    Path outputPath = FileOutputFormat.getOutputPath(conf);

    if (outputPath == null)
        return;

    if (getFSSafe(conf, outputPath) == null)
        return;

    if (conf.get("mapred.task.id") == null) // need to stuff a fake id
    {
        String mapper = conf.getBoolean("mapred.task.is.map", true) ? "m" : "r";
        conf.set("mapred.task.id", String.format("attempt_%012d_0000_%s_000000_0",
                (int) Math.rint(System.currentTimeMillis()), mapper));
    }

    makeTempPath(conf);

    if (writeDirectlyToWorkingPath(conf, outputPath)) {
        LOG.info("writing directly to output path: " + outputPath);
        setWorkOutputPath(conf, outputPath);
        return;
    }

    // "mapred.work.output.dir"
    Path taskOutputPath = getTaskOutputPath(conf);
    setWorkOutputPath(conf, taskOutputPath);
}

From source file:cascading.tap.hadoop.Hadoop18TapUtil.java

License:Open Source License

static synchronized void setupTask(JobConf conf) throws IOException {
    String workpath = conf.get("mapred.work.output.dir");

    if (workpath == null)
        return;/*  ww  w .ja  va2  s.  co m*/

    FileSystem fs = getFSSafe(conf, new Path(workpath));

    if (fs == null)
        return;

    String taskId = conf.get("mapred.task.id");

    LOG.info("setting up task: '" + taskId + "' - " + workpath);

    AtomicInteger integer = pathCounts.get(workpath);

    if (integer == null) {
        integer = new AtomicInteger();
        pathCounts.put(workpath, integer);
    }

    integer.incrementAndGet();
}

From source file:cascading.tap.hadoop.Hadoop18TapUtil.java

License:Open Source License

public static boolean needsTaskCommit(JobConf conf) throws IOException {
    String workpath = conf.get("mapred.work.output.dir");

    if (workpath == null)
        return false;

    Path taskOutputPath = new Path(workpath);

    if (taskOutputPath != null) {
        FileSystem fs = getFSSafe(conf, taskOutputPath);

        if (fs == null)
            return false;

        if (fs.exists(taskOutputPath))
            return true;
    }//from  w  w  w .ja v  a2s .c  om

    return false;
}

From source file:cascading.tap.hadoop.Hadoop18TapUtil.java

License:Open Source License

/**
 * copies all files from the taskoutputpath to the outputpath
 *
 * @param conf//from w w w.  j  ava 2s . c om
 */
public static void commitTask(JobConf conf) throws IOException {
    Path taskOutputPath = new Path(conf.get("mapred.work.output.dir"));

    FileSystem fs = getFSSafe(conf, taskOutputPath);

    if (fs == null)
        return;

    AtomicInteger integer = pathCounts.get(taskOutputPath.toString());

    if (integer.decrementAndGet() != 0)
        return;

    String taskId = conf.get("mapred.task.id");

    LOG.info("committing task: '" + taskId + "' - " + taskOutputPath);

    if (taskOutputPath != null) {
        if (writeDirectlyToWorkingPath(conf, taskOutputPath))
            return;

        if (fs.exists(taskOutputPath)) {
            Path jobOutputPath = taskOutputPath.getParent().getParent();
            // Move the task outputs to their final place
            moveTaskOutputs(conf, fs, jobOutputPath, taskOutputPath);

            // Delete the temporary task-specific output directory
            if (!fs.delete(taskOutputPath, true))
                LOG.info("failed to delete the temporary output directory of task: '" + taskId + "' - "
                        + taskOutputPath);

            LOG.info("saved output of task '" + taskId + "' to " + jobOutputPath);
        }
    }
}