Example usage for org.apache.hadoop.mapred JobConf getBoolean

List of usage examples for org.apache.hadoop.mapred JobConf getBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getBoolean.

Prototype

public boolean getBoolean(String name, boolean defaultValue) 

Source Link

Document

Get the value of the name property as a boolean.

Usage

From source file:org.apache.oozie.action.hadoop.JavaActionExecutor.java

License:Apache License

@SuppressWarnings("unchecked")
JobConf createLauncherConf(FileSystem actionFs, Context context, WorkflowAction action, Element actionXml,
        Configuration actionConf) throws ActionExecutorException {
    try {/*from   w w w.  j  a v  a  2s .co  m*/

        // app path could be a file
        Path appPathRoot = new Path(context.getWorkflow().getAppPath());
        if (actionFs.isFile(appPathRoot)) {
            appPathRoot = appPathRoot.getParent();
        }

        // launcher job configuration
        JobConf launcherJobConf = createBaseHadoopConf(context, actionXml);
        // cancel delegation token on a launcher job which stays alive till child job(s) finishes
        // otherwise (in mapred action), doesn't cancel not to disturb running child job
        launcherJobConf.setBoolean("mapreduce.job.complete.cancel.delegation.tokens", true);
        setupLauncherConf(launcherJobConf, actionXml, appPathRoot, context);

        String launcherTag = null;
        // Extracting tag and appending action name to maintain the uniqueness.
        if (context.getVar(ActionStartXCommand.OOZIE_ACTION_YARN_TAG) != null) {
            launcherTag = context.getVar(ActionStartXCommand.OOZIE_ACTION_YARN_TAG);
        } else { //Keeping it to maintain backward compatibly with test cases.
            launcherTag = action.getId();
        }

        // Properties for when a launcher job's AM gets restarted
        if (ConfigurationService.getBoolean(HADOOP_YARN_KILL_CHILD_JOBS_ON_AMRESTART)) {
            // launcher time filter is required to prune the search of launcher tag.
            // Setting coordinator action nominal time as launcher time as it child job cannot launch before nominal
            // time. Workflow created time is good enough when workflow is running independently or workflow is
            // rerunning from failed node.
            long launcherTime = System.currentTimeMillis();
            String coordActionNominalTime = context.getProtoActionConf()
                    .get(CoordActionStartXCommand.OOZIE_COORD_ACTION_NOMINAL_TIME);
            if (coordActionNominalTime != null) {
                launcherTime = Long.parseLong(coordActionNominalTime);
            } else if (context.getWorkflow().getCreatedTime() != null) {
                launcherTime = context.getWorkflow().getCreatedTime().getTime();
            }
            LauncherMapperHelper.setupYarnRestartHandling(launcherJobConf, actionConf, launcherTag,
                    launcherTime);
        } else {
            LOG.info(MessageFormat.format("{0} is set to false, not setting YARN restart properties",
                    HADOOP_YARN_KILL_CHILD_JOBS_ON_AMRESTART));
        }

        String actionShareLibProperty = actionConf.get(ACTION_SHARELIB_FOR + getType());
        if (actionShareLibProperty != null) {
            launcherJobConf.set(ACTION_SHARELIB_FOR + getType(), actionShareLibProperty);
        }
        setLibFilesArchives(context, actionXml, appPathRoot, launcherJobConf);

        String jobName = launcherJobConf.get(HADOOP_JOB_NAME);
        if (jobName == null || jobName.isEmpty()) {
            jobName = XLog.format("oozie:launcher:T={0}:W={1}:A={2}:ID={3}", getType(),
                    context.getWorkflow().getAppName(), action.getName(), context.getWorkflow().getId());
            launcherJobConf.setJobName(jobName);
        }

        // Inject Oozie job information if enabled.
        injectJobInfo(launcherJobConf, actionConf, context, action);

        injectLauncherCallback(context, launcherJobConf);

        String jobId = context.getWorkflow().getId();
        String actionId = action.getId();
        Path actionDir = context.getActionDir();
        String recoveryId = context.getRecoveryId();

        // Getting the prepare XML from the action XML
        Namespace ns = actionXml.getNamespace();
        Element prepareElement = actionXml.getChild("prepare", ns);
        String prepareXML = "";
        if (prepareElement != null) {
            if (prepareElement.getChildren().size() > 0) {
                prepareXML = XmlUtils.prettyPrint(prepareElement).toString().trim();
            }
        }
        LauncherMapperHelper.setupLauncherInfo(launcherJobConf, jobId, actionId, actionDir, recoveryId,
                actionConf, prepareXML);

        // Set the launcher Main Class
        LauncherMapperHelper.setupMainClass(launcherJobConf, getLauncherMain(launcherJobConf, actionXml));
        LauncherMapperHelper.setupLauncherURIHandlerConf(launcherJobConf);
        LauncherMapperHelper.setupMaxOutputData(launcherJobConf, maxActionOutputLen);
        LauncherMapperHelper.setupMaxExternalStatsSize(launcherJobConf, maxExternalStatsSize);
        LauncherMapperHelper.setupMaxFSGlob(launcherJobConf, maxFSGlobMax);

        List<Element> list = actionXml.getChildren("arg", ns);
        String[] args = new String[list.size()];
        for (int i = 0; i < list.size(); i++) {
            args[i] = list.get(i).getTextTrim();
        }
        LauncherMapperHelper.setupMainArguments(launcherJobConf, args);

        // Make mapred.child.java.opts and mapreduce.map.java.opts equal, but give values from the latter priority; also append
        // <java-opt> and <java-opts> and give those highest priority
        StringBuilder opts = new StringBuilder(launcherJobConf.get(HADOOP_CHILD_JAVA_OPTS, ""));
        if (launcherJobConf.get(HADOOP_MAP_JAVA_OPTS) != null) {
            opts.append(" ").append(launcherJobConf.get(HADOOP_MAP_JAVA_OPTS));
        }
        List<Element> javaopts = actionXml.getChildren("java-opt", ns);
        for (Element opt : javaopts) {
            opts.append(" ").append(opt.getTextTrim());
        }
        Element opt = actionXml.getChild("java-opts", ns);
        if (opt != null) {
            opts.append(" ").append(opt.getTextTrim());
        }
        launcherJobConf.set(HADOOP_CHILD_JAVA_OPTS, opts.toString().trim());
        launcherJobConf.set(HADOOP_MAP_JAVA_OPTS, opts.toString().trim());

        // setting for uber mode
        if (launcherJobConf.getBoolean(HADOOP_YARN_UBER_MODE, false)) {
            if (checkPropertiesToDisableUber(launcherJobConf)) {
                launcherJobConf.setBoolean(HADOOP_YARN_UBER_MODE, false);
            } else {
                updateConfForUberMode(launcherJobConf);
            }
        }
        updateConfForJavaTmpDir(launcherJobConf);
        injectLauncherTimelineServiceEnabled(launcherJobConf, actionConf);

        // properties from action that are needed by the launcher (e.g. QUEUE NAME, ACLs)
        // maybe we should add queue to the WF schema, below job-tracker
        actionConfToLauncherConf(actionConf, launcherJobConf);

        return launcherJobConf;
    } catch (Exception ex) {
        throw convertException(ex);
    }
}

From source file:org.apache.oozie.action.hadoop.OozieActionConfiguratorForTest.java

License:Apache License

@Override
public void configure(JobConf actionConf) throws OozieActionConfiguratorException {
    if (actionConf.getBoolean("oozie.test.throw.exception", false)) {
        throw new OozieActionConfiguratorException("doh");
    }//  w ww .  j  av a 2  s  .c om

    actionConf.set("A", "a");
    actionConf.set("B", "c");
}

From source file:org.apache.oozie.action.hadoop.TestJavaActionExecutor.java

License:Apache License

public void testUpdateConfForTimeLineServiceEnabled() throws Exception {
    Element actionXml = XmlUtils/*from   ww  w  .jav  a2 s .c om*/
            .parseXml("<java>" + "<job-tracker>" + getJobTrackerUri() + "</job-tracker>" + "<name-node>"
                    + getNameNodeUri() + "</name-node>" + "<main-class>MAIN-CLASS</main-class>" + "</java>");
    JavaActionExecutor ae = new JavaActionExecutor();
    XConfiguration protoConf = new XConfiguration();
    protoConf.set(WorkflowAppService.HADOOP_USER, getTestUser());
    WorkflowJobBean wf = createBaseWorkflow(protoConf, "action");
    WorkflowActionBean action = (WorkflowActionBean) wf.getActions().get(0);
    action.setType(ae.getType());
    Context context = new Context(wf, action);
    JobConf actionConf = new JobConf();

    // Test when server side setting is not enabled
    JobConf launcherConf = ae.createLauncherConf(getFileSystem(), context, action, actionXml, actionConf);
    if (HadoopShims.isYARN()) {
        assertEquals("true", launcherConf.get(JavaActionExecutor.HADOOP_YARN_TIMELINE_SERVICE_ENABLED));
    } else {
        assertNull(launcherConf.get(JavaActionExecutor.HADOOP_YARN_TIMELINE_SERVICE_ENABLED));
    }

    ConfigurationService.set("oozie.action.launcher." + JavaActionExecutor.HADOOP_YARN_TIMELINE_SERVICE_ENABLED,
            "true");

    // Test when server side setting is enabled but tez-site.xml is not in DistributedCache
    launcherConf = ae.createLauncherConf(getFileSystem(), context, action, actionXml, actionConf);
    if (HadoopShims.isYARN()) {
        assertEquals("true", launcherConf.get(JavaActionExecutor.HADOOP_YARN_TIMELINE_SERVICE_ENABLED));
    } else {
        assertNull(launcherConf.get(JavaActionExecutor.HADOOP_YARN_TIMELINE_SERVICE_ENABLED));
    }
    final Path tezSite = new Path("/tmp/tez-site.xml");
    final FSDataOutputStream out = getFileSystem().create(tezSite);
    out.close();

    // Test when server side setting is enabled and tez-site.xml is in DistributedCache
    Element actionXmlWithTez = XmlUtils.parseXml("<java>" + "<job-tracker>" + getJobTrackerUri()
            + "</job-tracker>" + "<name-node>" + getNameNodeUri() + "</name-node>"
            + "<main-class>MAIN-CLASS</main-class>" + "<file>" + tezSite + "</file>" + "</java>");
    launcherConf = ae.createLauncherConf(getFileSystem(), context, action, actionXmlWithTez, actionConf);
    assertTrue(launcherConf.getBoolean(JavaActionExecutor.HADOOP_YARN_TIMELINE_SERVICE_ENABLED, false));

    // Test when server side setting is enabled, tez-site.xml is in DistributedCache
    // but user has disabled in action configuration
    Element actionXmlATSDisabled = XmlUtils.parseXml("<java>" + "<job-tracker>" + getJobTrackerUri()
            + "</job-tracker>" + "<name-node>" + getNameNodeUri() + "</name-node>" + "<configuration>"
            + "<property><name>oozie.launcher.yarn.timeline-service.enabled</name>"
            + "<value>false</value></property>" + "</configuration>" + "<main-class>MAIN-CLASS</main-class>"
            + "<file>" + tezSite + "</file>" + "</java>");
    actionConf = ae.createBaseHadoopConf(context, actionXmlATSDisabled);
    ae.setupActionConf(actionConf, context, actionXmlATSDisabled, new Path("hdfs:///tmp/workflow"));
    launcherConf = ae.createLauncherConf(getFileSystem(), context, action, actionXmlATSDisabled, actionConf);
    assertFalse(launcherConf.getBoolean(JavaActionExecutor.HADOOP_YARN_TIMELINE_SERVICE_ENABLED, false));

    getFileSystem().delete(tezSite, true);
}

From source file:org.apache.oozie.action.hadoop.TestLauncher.java

License:Apache License

public void testSetupLauncherInfoHadoop2_0_2_alphaWorkaround() throws Exception {
    Path actionDir = getFsTestCaseDir();
    // Setting up the job configuration
    JobConf jobConf = Services.get().get(HadoopAccessorService.class)
            .createJobConf(new URI(getNameNodeUri()).getAuthority());
    jobConf.set("user.name", getTestUser());
    jobConf.set("fs.default.name", getNameNodeUri());

    Configuration actionConf = new XConfiguration();
    actionConf.set("mapreduce.job.cache.files", "a.jar,aa.jar#aa.jar");
    LauncherMapperHelper.setupLauncherInfo(jobConf, "1", "1@a", actionDir, "1@a-0", actionConf, "");
    assertFalse(jobConf.getBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache", false));
    assertEquals("a.jar,aa.jar#aa.jar", actionConf.get("mapreduce.job.cache.files"));

    Services.get().getConf().setBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache", true);
    actionConf = new XConfiguration();
    actionConf.set("mapreduce.job.cache.files", "a.jar,aa.jar#aa.jar");
    LauncherMapperHelper.setupLauncherInfo(jobConf, "1", "1@a", actionDir, "1@a-0", actionConf, "");
    assertTrue(jobConf.getBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache", false));
    assertEquals("aa.jar#aa.jar", actionConf.get("mapreduce.job.cache.files"));
}

From source file:org.apache.oozie.action.hadoop.TestMapReduceActionExecutor.java

License:Apache License

@SuppressWarnings("unchecked")
public void testSetupMethods() throws Exception {
    MapReduceActionExecutor ae = new MapReduceActionExecutor();
    assertEquals(Arrays.asList(StreamingMain.class), ae.getLauncherClasses());

    Element actionXml = XmlUtils.parseXml("<map-reduce>" + "<job-tracker>" + getJobTrackerUri()
            + "</job-tracker>" + "<name-node>" + getNameNodeUri() + "</name-node>" + "<configuration>"
            + "<property><name>mapred.input.dir</name><value>IN</value></property>"
            + "<property><name>mapred.output.dir</name><value>OUT</value></property>" + "</configuration>"
            + "</map-reduce>");

    XConfiguration protoConf = new XConfiguration();
    protoConf.set(WorkflowAppService.HADOOP_USER, getTestUser());

    WorkflowJobBean wf = createBaseWorkflow(protoConf, "mr-action");
    WorkflowActionBean action = (WorkflowActionBean) wf.getActions().get(0);
    action.setType(ae.getType());// www. j ava  2s  .  co  m

    Context context = new Context(wf, action);

    Configuration conf = ae.createBaseHadoopConf(context, actionXml);
    ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir());
    assertEquals("IN", conf.get("mapred.input.dir"));
    JobConf launcherJobConf = ae.createLauncherConf(getFileSystem(), context, action, actionXml, conf);
    assertEquals(false, launcherJobConf.getBoolean("mapreduce.job.complete.cancel.delegation.tokens", true));
    assertEquals(true, conf.getBoolean("mapreduce.job.complete.cancel.delegation.tokens", false));

    // Enable uber jars to test that MapReduceActionExecutor picks up the oozie.mapreduce.uber.jar property correctly
    Services serv = Services.get();
    boolean originalUberJarDisabled = serv.getConf().getBoolean("oozie.action.mapreduce.uber.jar.enable",
            false);
    serv.getConf().setBoolean("oozie.action.mapreduce.uber.jar.enable", true);

    actionXml = createUberJarActionXML(getNameNodeUri() + "/app/job.jar", "");
    conf = ae.createBaseHadoopConf(context, actionXml);
    ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir());
    assertEquals(getNameNodeUri() + "/app/job.jar", conf.get("oozie.mapreduce.uber.jar")); // absolute path with namenode
    launcherJobConf = ae.createLauncherConf(getFileSystem(), context, action, actionXml, conf);
    assertEquals(getNameNodeUri() + "/app/job.jar", launcherJobConf.getJar()); // same for launcher conf

    actionXml = createUberJarActionXML("/app/job.jar", "");
    conf = ae.createBaseHadoopConf(context, actionXml);
    ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir());
    assertEquals(getNameNodeUri() + "/app/job.jar", conf.get("oozie.mapreduce.uber.jar")); // absolute path without namenode
    launcherJobConf = ae.createLauncherConf(getFileSystem(), context, action, actionXml, conf);
    assertEquals(getNameNodeUri() + "/app/job.jar", launcherJobConf.getJar()); // same for launcher conf

    actionXml = createUberJarActionXML("job.jar", "");
    conf = ae.createBaseHadoopConf(context, actionXml);
    ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir());
    assertEquals(getFsTestCaseDir() + "/job.jar", conf.get("oozie.mapreduce.uber.jar")); // relative path
    launcherJobConf = ae.createLauncherConf(getFileSystem(), context, action, actionXml, conf);
    assertEquals(getFsTestCaseDir() + "/job.jar", launcherJobConf.getJar()); // same for launcher

    actionXml = createUberJarActionXML("job.jar", "<streaming></streaming>");
    conf = ae.createBaseHadoopConf(context, actionXml);
    ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir());
    assertEquals("", conf.get("oozie.mapreduce.uber.jar")); // ignored for streaming
    launcherJobConf = ae.createLauncherConf(getFileSystem(), context, action, actionXml, conf);
    assertNull(launcherJobConf.getJar()); // same for launcher conf (not set)

    actionXml = createUberJarActionXML("job.jar", "<pipes></pipes>");
    conf = ae.createBaseHadoopConf(context, actionXml);
    ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir());
    assertEquals("", conf.get("oozie.mapreduce.uber.jar")); // ignored for pipes
    launcherJobConf = ae.createLauncherConf(getFileSystem(), context, action, actionXml, conf);
    assertNull(launcherJobConf.getJar()); // same for launcher conf (not set)

    actionXml = XmlUtils.parseXml("<map-reduce>" + "<job-tracker>" + getJobTrackerUri() + "</job-tracker>"
            + "<name-node>" + getNameNodeUri() + "</name-node>" + "</map-reduce>");
    conf = ae.createBaseHadoopConf(context, actionXml);
    ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir());
    assertNull(conf.get("oozie.mapreduce.uber.jar")); // doesn't resolve if not set
    launcherJobConf = ae.createLauncherConf(getFileSystem(), context, action, actionXml, conf);
    assertNull(launcherJobConf.getJar()); // same for launcher conf

    // Disable uber jars to test that MapReduceActionExecutor won't allow the oozie.mapreduce.uber.jar property
    serv.getConf().setBoolean("oozie.action.mapreduce.uber.jar.enable", false);
    try {
        actionXml = createUberJarActionXML(getNameNodeUri() + "/app/job.jar", "");
        conf = ae.createBaseHadoopConf(context, actionXml);
        ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir());
        fail("ActionExecutorException expected because uber jars are disabled");
    } catch (ActionExecutorException aee) {
        assertEquals("MR003", aee.getErrorCode());
        assertEquals(ActionExecutorException.ErrorType.ERROR, aee.getErrorType());
        assertTrue(aee.getMessage().contains("oozie.action.mapreduce.uber.jar.enable"));
        assertTrue(aee.getMessage().contains("oozie.mapreduce.uber.jar"));
    }
    serv.getConf().setBoolean("oozie.action.mapreduce.uber.jar.enable", originalUberJarDisabled);

    actionXml = XmlUtils.parseXml("<map-reduce>" + "<job-tracker>" + getJobTrackerUri() + "</job-tracker>"
            + "<name-node>" + getNameNodeUri() + "</name-node>" + "<streaming>" + "<mapper>M</mapper>"
            + "<reducer>R</reducer>" + "<record-reader>RR</record-reader>"
            + "<record-reader-mapping>RRM1=1</record-reader-mapping>"
            + "<record-reader-mapping>RRM2=2</record-reader-mapping>" + "<env>e=E</env>" + "<env>ee=EE</env>"
            + "</streaming>" + "<configuration>"
            + "<property><name>mapred.input.dir</name><value>IN</value></property>"
            + "<property><name>mapred.output.dir</name><value>OUT</value></property>" + "</configuration>"
            + "</map-reduce>");

    conf = ae.createBaseHadoopConf(context, actionXml);
    ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir());
    assertEquals("M", conf.get("oozie.streaming.mapper"));
    assertEquals("R", conf.get("oozie.streaming.reducer"));
    assertEquals("RR", conf.get("oozie.streaming.record-reader"));
    assertEquals("2", conf.get("oozie.streaming.record-reader-mapping.size"));
    assertEquals("2", conf.get("oozie.streaming.env.size"));

    actionXml = XmlUtils.parseXml("<map-reduce>" + "<job-tracker>" + getJobTrackerUri() + "</job-tracker>"
            + "<name-node>" + getNameNodeUri() + "</name-node>" + "<pipes>" + "<map>M</map>"
            + "<reduce>R</reduce>" + "<inputformat>IF</inputformat>" + "<partitioner>P</partitioner>"
            + "<writer>W</writer>" + "<program>PP</program>" + "</pipes>" + "<configuration>"
            + "<property><name>mapred.input.dir</name><value>IN</value></property>"
            + "<property><name>mapred.output.dir</name><value>OUT</value></property>" + "</configuration>"
            + "</map-reduce>");

    conf = ae.createBaseHadoopConf(context, actionXml);
    ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir());
    assertEquals("M", conf.get("oozie.pipes.map"));
    assertEquals("R", conf.get("oozie.pipes.reduce"));
    assertEquals("IF", conf.get("oozie.pipes.inputformat"));
    assertEquals("P", conf.get("oozie.pipes.partitioner"));
    assertEquals("W", conf.get("oozie.pipes.writer"));
    assertEquals(getFsTestCaseDir() + "/PP", conf.get("oozie.pipes.program"));
}

From source file:org.apache.oozie.service.HadoopAccessorService.java

License:Apache License

/**
 * Return a JobClient created with the provided user/group.
 *
 *
 * @param conf JobConf with all necessary information to create the
 *        JobClient.//from ww  w .  j a v  a 2s .c om
 * @return JobClient created with the provided user/group.
 * @throws HadoopAccessorException if the client could not be created.
 */
public JobClient createJobClient(String user, final JobConf conf) throws HadoopAccessorException {
    ParamChecker.notEmpty(user, "user");
    if (!conf.getBoolean(OOZIE_HADOOP_ACCESSOR_SERVICE_CREATED, false)) {
        throw new HadoopAccessorException(ErrorCode.E0903);
    }
    String jobTracker = conf.get(JavaActionExecutor.HADOOP_JOB_TRACKER);
    validateJobTracker(jobTracker);
    try {
        UserGroupInformation ugi = getUGI(user);
        JobClient jobClient = ugi.doAs(new PrivilegedExceptionAction<JobClient>() {
            public JobClient run() throws Exception {
                return new JobClient(conf);
            }
        });
        Token<DelegationTokenIdentifier> mrdt = jobClient.getDelegationToken(getMRDelegationTokenRenewer(conf));
        conf.getCredentials().addToken(MR_TOKEN_ALIAS, mrdt);
        return jobClient;
    } catch (InterruptedException ex) {
        throw new HadoopAccessorException(ErrorCode.E0902, ex.getMessage(), ex);
    } catch (IOException ex) {
        throw new HadoopAccessorException(ErrorCode.E0902, ex.getMessage(), ex);
    }
}

From source file:org.apache.parquet.hadoop.mapred.DeprecatedParquetInputFormat.java

License:Apache License

public static boolean isTaskSideMetaData(JobConf job) {
    return job.getBoolean(ParquetInputFormat.TASK_SIDE_METADATA, TRUE);
}

From source file:org.apache.phoenix.hive.mapreduce.PhoenixInputFormat.java

License:Apache License

private List<InputSplit> generateSplits(final JobConf jobConf, final QueryPlan qplan,
        final List<KeyRange> splits, String query) throws IOException {
    Preconditions.checkNotNull(qplan);//  ww  w .ja va 2  s . c  om
    Preconditions.checkNotNull(splits);
    final List<InputSplit> psplits = Lists.newArrayListWithExpectedSize(splits.size());

    Path[] tablePaths = FileInputFormat
            .getInputPaths(ShimLoader.getHadoopShims().newJobContext(new Job(jobConf)));
    boolean splitByStats = jobConf.getBoolean(PhoenixStorageHandlerConstants.SPLIT_BY_STATS, false);

    setScanCacheSize(jobConf);

    // Adding Localization
    HConnection connection = HConnectionManager
            .createConnection(PhoenixConnectionUtil.getConfiguration(jobConf));
    RegionLocator regionLocator = connection
            .getRegionLocator(TableName.valueOf(qplan.getTableRef().getTable().getPhysicalName().toString()));
    RegionSizeCalculator sizeCalculator = new RegionSizeCalculator(regionLocator, connection.getAdmin());

    for (List<Scan> scans : qplan.getScans()) {
        PhoenixInputSplit inputSplit;

        HRegionLocation location = regionLocator.getRegionLocation(scans.get(0).getStartRow(), false);
        long regionSize = sizeCalculator.getRegionSize(location.getRegionInfo().getRegionName());
        String regionLocation = PhoenixStorageHandlerUtil.getRegionLocation(location, LOG);

        if (splitByStats) {
            for (Scan aScan : scans) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Split for  scan : " + aScan + "with scanAttribute : " + aScan.getAttributesMap()
                            + " [scanCache, cacheBlock, scanBatch] : [" + aScan.getCaching() + ", "
                            + aScan.getCacheBlocks() + ", " + aScan.getBatch() + "] and  regionLocation : "
                            + regionLocation);
                }

                inputSplit = new PhoenixInputSplit(Lists.newArrayList(aScan), tablePaths[0], regionLocation,
                        regionSize);
                inputSplit.setQuery(query);
                psplits.add(inputSplit);
            }
        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug(
                        "Scan count[" + scans.size() + "] : " + Bytes.toStringBinary(scans.get(0).getStartRow())
                                + " ~ " + Bytes.toStringBinary(scans.get(scans.size() - 1).getStopRow()));
                LOG.debug("First scan : " + scans.get(0) + "with scanAttribute : "
                        + scans.get(0).getAttributesMap() + " [scanCache, cacheBlock, scanBatch] : " + "["
                        + scans.get(0).getCaching() + ", " + scans.get(0).getCacheBlocks() + ", "
                        + scans.get(0).getBatch() + "] and  regionLocation : " + regionLocation);

                for (int i = 0, limit = scans.size(); i < limit; i++) {
                    LOG.debug("EXPECTED_UPPER_REGION_KEY[" + i + "] : " + Bytes.toStringBinary(
                            scans.get(i).getAttribute(BaseScannerRegionObserver.EXPECTED_UPPER_REGION_KEY)));
                }
            }

            inputSplit = new PhoenixInputSplit(scans, tablePaths[0], regionLocation, regionSize);
            inputSplit.setQuery(query);
            psplits.add(inputSplit);
        }
    }

    return psplits;
}

From source file:org.apache.phoenix.hive.query.PhoenixQueryBuilder.java

License:Apache License

private String getHint(JobConf jobConf, String tableName) {
    StringBuilder hints = new StringBuilder("/*+ ");
    if (!jobConf.getBoolean(PhoenixStorageHandlerConstants.HBASE_SCAN_CACHEBLOCKS, Boolean.FALSE)) {
        hints.append("NO_CACHE ");/*  w  w w .  ja  v  a  2 s.  c o  m*/
    }

    String queryHint = jobConf.get(tableName + PhoenixStorageHandlerConstants.PHOENIX_TABLE_QUERY_HINT);
    if (queryHint != null) {
        hints.append(queryHint);
    }
    hints.append(" */");

    return hints.toString();
}

From source file:org.apache.pig.backend.hadoop.executionengine.mapreduceExec.PigInputFormat.java

License:Apache License

@SuppressWarnings("unchecked")
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    boolean isSplittable = job.getBoolean("pig.input.splittable", true);
    ArrayList<FileSpec> inputs = (ArrayList<FileSpec>) ObjectSerializer.deserialize(job.get("pig.inputs"));
    ArrayList<EvalSpec> mapFuncs = (ArrayList<EvalSpec>) ObjectSerializer
            .deserialize(job.get("pig.mapFuncs", ""));
    ArrayList<EvalSpec> groupFuncs = (ArrayList<EvalSpec>) ObjectSerializer
            .deserialize(job.get("pig.groupFuncs", ""));

    PigContext pigContext = (PigContext) ObjectSerializer.deserialize(job.get("pig.pigContext"));
    // TODO: don't understand this code
    // added for UNION: set group func arity to match arity of inputs
    if (groupFuncs != null && groupFuncs.size() != inputs.size()) {
        groupFuncs = new ArrayList<EvalSpec>();
        for (int i = 0; i < groupFuncs.size(); i++) {
            groupFuncs.set(i, null);//from w w  w .  j  av  a 2s  . c  o  m
        }
    }

    if (inputs.size() != mapFuncs.size()) {
        StringBuilder sb = new StringBuilder();
        sb.append("number of inputs != number of map functions: ");
        sb.append(inputs.size());
        sb.append(" != ");
        sb.append(mapFuncs.size());
        sb.append(": ");
        sb.append(job.get("pig.mapFuncs", "missing"));
        throw new IOException(sb.toString());
    }
    if (groupFuncs != null && inputs.size() != groupFuncs.size()) {
        StringBuilder sb = new StringBuilder();
        sb.append("number of inputs != number of group functions: ");
        sb.append(inputs.size());
        sb.append(" != ");
        sb.append(groupFuncs.size());
        throw new IOException(sb.toString());
    }

    FileSystem fs = FileSystem.get(job);
    List<SliceWrapper> splits = new ArrayList<SliceWrapper>();
    for (int i = 0; i < inputs.size(); i++) {
        DataStorage store = new HDataStorage(ConfigurationUtil.toProperties(job));
        ValidatingInputFileSpec spec;
        if (inputs.get(i) instanceof ValidatingInputFileSpec) {
            spec = (ValidatingInputFileSpec) inputs.get(i);
        } else {
            spec = new ValidatingInputFileSpec(inputs.get(i), store);
        }
        EvalSpec groupBy = groupFuncs == null ? null : groupFuncs.get(i);
        if (isSplittable && (spec.getSlicer() instanceof PigSlicer)) {
            ((PigSlicer) spec.getSlicer()).setSplittable(isSplittable);
        }
        Slice[] pigs = spec.getSlicer().slice(store, spec.getFileName());
        for (Slice split : pigs) {
            splits.add(new SliceWrapper(split, pigContext, groupBy, mapFuncs.get(i), i, fs));
        }
    }
    return splits.toArray(new SliceWrapper[splits.size()]);
}