Example usage for org.apache.hadoop.util StringInterner weakIntern

List of usage examples for org.apache.hadoop.util StringInterner weakIntern

Introduction

In this page you can find the example usage for org.apache.hadoop.util StringInterner weakIntern.

Prototype

public static String weakIntern(String sample) 

Source Link

Document

Interns and returns a reference to the representative instance for any of a collection of string instances that are equal to each other.

Usage

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.TaggedInputSplit.java

License:Apache License

private Class<?> readClass(DataInput in) throws IOException {
    String className = StringInterner.weakIntern(Text.readString(in));
    try {/* ww  w. j ava2 s.  co  m*/
        return conf.getClassByName(className);
    } catch (ClassNotFoundException e) {
        throw new RuntimeException("readObject can't find class", e);
    }
}

From source file:com.newland.bi.bigdata.hdfs.Configuration.java

License:Apache License

private Resource loadResource(Properties properties, Resource wrapper, boolean quiet) {
    String name = UNKNOWN_RESOURCE;
    try {/*from w  w  w. ja v  a  2  s.  c o  m*/
        Object resource = wrapper.getResource();
        name = wrapper.getName();

        DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
        //ignore all comments inside the xml file
        docBuilderFactory.setIgnoringComments(true);

        //allow includes in the xml file
        docBuilderFactory.setNamespaceAware(true);
        try {
            docBuilderFactory.setXIncludeAware(true);
        } catch (UnsupportedOperationException e) {
            LOG.error("Failed to set setXIncludeAware(true) for parser " + docBuilderFactory + ":" + e, e);
        }
        DocumentBuilder builder = docBuilderFactory.newDocumentBuilder();
        Document doc = null;
        Element root = null;
        boolean returnCachedProperties = false;

        if (resource instanceof URL) { // an URL resource
            doc = parse(builder, (URL) resource);
        } else if (resource instanceof String) { // a CLASSPATH resource
            URL url = getResource((String) resource);
            doc = parse(builder, url);
        } else if (resource instanceof Path) { // a file resource
            // Can't use FileSystem API or we get an infinite loop
            // since FileSystem uses Configuration API.  Use java.io.File instead.
            File file = new File(((Path) resource).toUri().getPath()).getAbsoluteFile();
            if (file.exists()) {
                if (!quiet) {
                    LOG.debug("parsing File " + file);
                }
                doc = parse(builder, new BufferedInputStream(new FileInputStream(file)),
                        ((Path) resource).toString());
            }
        } else if (resource instanceof InputStream) {
            doc = parse(builder, (InputStream) resource, null);
            returnCachedProperties = true;
        } else if (resource instanceof Properties) {
            overlay(properties, (Properties) resource);
        } else if (resource instanceof Element) {
            root = (Element) resource;
        }

        if (doc == null && root == null) {
            if (quiet)
                return null;
            throw new RuntimeException(resource + " not found");
        }

        if (root == null) {
            root = doc.getDocumentElement();
        }
        Properties toAddTo = properties;
        if (returnCachedProperties) {
            toAddTo = new Properties();
        }
        if (!"configuration".equals(root.getTagName()))
            LOG.fatal("bad conf file: top-level element not <configuration>");
        NodeList props = root.getChildNodes();
        DeprecationContext deprecations = deprecationContext.get();
        for (int i = 0; i < props.getLength(); i++) {
            Node propNode = props.item(i);
            if (!(propNode instanceof Element))
                continue;
            Element prop = (Element) propNode;
            if ("configuration".equals(prop.getTagName())) {
                loadResource(toAddTo, new Resource(prop, name), quiet);
                continue;
            }
            if (!"property".equals(prop.getTagName()))
                LOG.warn("bad conf file: element not <property>");
            NodeList fields = prop.getChildNodes();
            String attr = null;
            String value = null;
            boolean finalParameter = false;
            LinkedList<String> source = new LinkedList<String>();
            for (int j = 0; j < fields.getLength(); j++) {
                Node fieldNode = fields.item(j);
                if (!(fieldNode instanceof Element))
                    continue;
                Element field = (Element) fieldNode;
                if ("name".equals(field.getTagName()) && field.hasChildNodes())
                    attr = StringInterner.weakIntern(((Text) field.getFirstChild()).getData().trim());
                if ("value".equals(field.getTagName()) && field.hasChildNodes())
                    value = StringInterner.weakIntern(((Text) field.getFirstChild()).getData());
                if ("final".equals(field.getTagName()) && field.hasChildNodes())
                    finalParameter = "true".equals(((Text) field.getFirstChild()).getData());
                if ("source".equals(field.getTagName()) && field.hasChildNodes())
                    source.add(StringInterner.weakIntern(((Text) field.getFirstChild()).getData()));
            }
            source.add(name);

            // Ignore this parameter if it has already been marked as 'final'
            if (attr != null) {
                if (deprecations.getDeprecatedKeyMap().containsKey(attr)) {
                    DeprecatedKeyInfo keyInfo = deprecations.getDeprecatedKeyMap().get(attr);
                    keyInfo.clearAccessed();
                    for (String key : keyInfo.newKeys) {
                        // update new keys with deprecated key's value 
                        loadProperty(toAddTo, name, key, value, finalParameter,
                                source.toArray(new String[source.size()]));
                    }
                } else {
                    loadProperty(toAddTo, name, attr, value, finalParameter,
                            source.toArray(new String[source.size()]));
                }
            }
        }

        if (returnCachedProperties) {
            overlay(properties, toAddTo);
            return new Resource(toAddTo, name);
        }
        return null;
    } catch (IOException e) {
        LOG.fatal("error parsing conf " + name, e);
        throw new RuntimeException(e);
    } catch (DOMException e) {
        LOG.fatal("error parsing conf " + name, e);
        throw new RuntimeException(e);
    } catch (SAXException e) {
        LOG.fatal("error parsing conf " + name, e);
        throw new RuntimeException(e);
    } catch (ParserConfigurationException e) {
        LOG.fatal("error parsing conf " + name, e);
        throw new RuntimeException(e);
    }
}

From source file:com.scaleoutsoftware.soss.hserver.hadoop.SubmittedJob.java

License:Apache License

@SuppressWarnings("unchecked")
private static <T> T getSplitDetails(FSDataInputStream inFile, long offset, Configuration configuration)
        throws IOException {
    inFile.seek(offset);/*from  w  ww  .j  a v a 2 s . co m*/
    String className = StringInterner.weakIntern(Text.readString(inFile));
    Class<T> cls;
    try {
        cls = (Class<T>) configuration.getClassByName(className);
    } catch (ClassNotFoundException ce) {
        IOException wrap = new IOException("Split class " + className + " not found");
        wrap.initCause(ce);
        throw wrap;
    }
    SerializationFactory factory = new SerializationFactory(configuration);
    Deserializer<T> deserializer = (Deserializer<T>) factory.getDeserializer(cls);
    deserializer.open(inFile);
    T split = deserializer.deserialize(null);
    return split;
}

From source file:eu.stratosphere.yarn.Utils.java

License:Apache License

/**
 * Copied method from org.apache.hadoop.yarn.util.Apps
 * It was broken by YARN-1824 (2.4.0) and fixed for 2.4.1
 * by https://issues.apache.org/jira/browse/YARN-1931
 *//*from  w  ww.ja  v a 2  s.  co m*/
public static void addToEnvironment(Map<String, String> environment, String variable, String value) {
    String val = environment.get(variable);
    if (val == null) {
        val = value;
    } else {
        val = val + File.pathSeparator + value;
    }
    environment.put(StringInterner.weakIntern(variable), StringInterner.weakIntern(val));
}

From source file:org.apache.tez.common.TezYARNUtils.java

License:Apache License

public static String getFrameworkClasspath(Configuration conf, boolean usingArchive) {
    StringBuilder classpathBuilder = new StringBuilder();

    // Add any additional user-specified classpath
    String additionalClasspath = conf.get(TezConfiguration.TEZ_CLUSTER_ADDITIONAL_CLASSPATH_PREFIX);
    if (additionalClasspath != null && !additionalClasspath.trim().isEmpty()) {
        classpathBuilder.append(additionalClasspath).append(File.pathSeparator);
    }// w  ww.  j av a2 s .c om

    // Add PWD:PWD/*
    classpathBuilder.append(Environment.PWD.$()).append(File.pathSeparator)
            .append(Environment.PWD.$() + File.separator + "*").append(File.pathSeparator);

    // Next add the tez libs, if specified via an archive.
    if (usingArchive) {
        // Add PWD/tezlib/*
        classpathBuilder.append(Environment.PWD.$()).append(File.separator).append(TezConstants.TEZ_TAR_LR_NAME)
                .append(File.separator).append("*").append(File.pathSeparator);

        // Add PWD/tezlib/lib/*
        classpathBuilder.append(Environment.PWD.$()).append(File.separator).append(TezConstants.TEZ_TAR_LR_NAME)
                .append(File.separator).append("lib").append(File.separator).append("*")
                .append(File.pathSeparator);
    }

    // Last add HADOOP_CLASSPATH, if it's required.
    if (conf.getBoolean(TezConfiguration.TEZ_USE_CLUSTER_HADOOP_LIBS,
            TezConfiguration.TEZ_USE_CLUSTER_HADOOP_LIBS_DEFAULT)) {
        for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
                YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) {
            classpathBuilder.append(c.trim()).append(File.pathSeparator);
        }
    } else {
        // Setup HADOOP_CONF_DIR after PWD and tez-libs, if it's required.
        classpathBuilder.append(Environment.HADOOP_CONF_DIR.$()).append(File.pathSeparator);
    }

    String classpath = classpathBuilder.toString();
    return StringInterner.weakIntern(classpath);
}

From source file:org.apache.tez.common.TezYARNUtils.java

License:Apache License

public static void addToEnvironment(Map<String, String> environment, String variable, String value,
        String classPathSeparator) {
    String val = environment.get(variable);
    if (val == null) {
        val = value;
    } else {//from   w w w  . java 2s  . com
        val = val + classPathSeparator + value;
    }
    environment.put(StringInterner.weakIntern(variable), StringInterner.weakIntern(val));
}

From source file:org.apache.tez.common.TezYARNUtils.java

License:Apache License

private static void putIfAbsent(Map<String, String> env, String key, String value) {
    if (!env.containsKey(key)) {
        env.put(StringInterner.weakIntern(key), StringInterner.weakIntern(value));
    }/*from   w  w  w  .  j  a va 2  s  .  c o  m*/
}

From source file:org.apache.tez.common.TezYARNUtils.java

License:Apache License

public static void replaceInEnv(Map<String, String> env, String key, String value) {
    env.put(StringInterner.weakIntern(key), StringInterner.weakIntern(value));
}

From source file:org.apache.tez.dag.app.dag.impl.VertexImpl.java

License:Apache License

public VertexImpl(TezVertexID vertexId, VertexPlan vertexPlan, String vertexName, Configuration dagConf,
        EventHandler eventHandler, TaskAttemptListener taskAttemptListener, Clock clock,
        TaskHeartbeatHandler thh, boolean commitVertexOutputs, AppContext appContext,
        VertexLocationHint vertexLocationHint, Map<String, VertexGroupInfo> dagVertexGroups,
        TaskSpecificLaunchCmdOption taskSpecificLaunchCmdOption, StateChangeNotifier entityStatusTracker) {
    this.vertexId = vertexId;
    this.vertexPlan = vertexPlan;
    this.vertexName = StringInterner.weakIntern(vertexName);
    this.vertexConf = new Configuration(dagConf);
    // override dag configuration by using vertex's specified configuration
    if (vertexPlan.hasVertexConf()) {
        ConfigurationProto confProto = vertexPlan.getVertexConf();
        for (PlanKeyValuePair keyValuePair : confProto.getConfKeyValuesList()) {
            TezConfiguration.validateProperty(keyValuePair.getKey(), Scope.VERTEX);
            vertexConf.set(keyValuePair.getKey(), keyValuePair.getValue());
        }/*from w  w  w  . j a  v  a  2 s .c  o  m*/
    }
    this.clock = clock;
    this.appContext = appContext;
    this.commitVertexOutputs = commitVertexOutputs;

    this.taskAttemptListener = taskAttemptListener;
    this.taskHeartbeatHandler = thh;
    this.eventHandler = eventHandler;
    ReadWriteLock readWriteLock = new ReentrantReadWriteLock();
    this.readLock = readWriteLock.readLock();
    this.writeLock = readWriteLock.writeLock();

    if (LOG.isDebugEnabled()) {
        logLocationHints(this.vertexName, vertexLocationHint);
    }
    setTaskLocationHints(vertexLocationHint);

    this.dagUgi = appContext.getCurrentDAG().getDagUGI();

    this.taskResource = DagTypeConverters.createResourceRequestFromTaskConfig(vertexPlan.getTaskConfig());
    this.processorDescriptor = DagTypeConverters
            .convertProcessorDescriptorFromDAGPlan(vertexPlan.getProcessorDescriptor());
    this.localResources = DagTypeConverters
            .createLocalResourceMapFromDAGPlan(vertexPlan.getTaskConfig().getLocalResourceList());
    this.environment = DagTypeConverters
            .createEnvironmentMapFromDAGPlan(vertexPlan.getTaskConfig().getEnvironmentSettingList());
    this.taskSpecificLaunchCmdOpts = taskSpecificLaunchCmdOption;

    // Set up log properties, including task specific log properties.
    String javaOptsWithoutLoggerMods = vertexPlan.getTaskConfig().hasJavaOpts()
            ? vertexPlan.getTaskConfig().getJavaOpts()
            : null;
    String logString = vertexConf.get(TezConfiguration.TEZ_TASK_LOG_LEVEL,
            TezConfiguration.TEZ_TASK_LOG_LEVEL_DEFAULT);
    String[] taskLogParams = TezClientUtils.parseLogParams(logString);
    this.javaOpts = TezClientUtils.maybeAddDefaultLoggingJavaOpts(taskLogParams[0], javaOptsWithoutLoggerMods);

    if (taskSpecificLaunchCmdOpts.hasModifiedLogProperties()) {
        String[] taskLogParamsTaskSpecific = taskSpecificLaunchCmdOption.getTaskSpecificLogParams();
        this.javaOptsTaskSpecific = TezClientUtils.maybeAddDefaultLoggingJavaOpts(taskLogParamsTaskSpecific[0],
                javaOptsWithoutLoggerMods);

        environmentTaskSpecific = new HashMap<String, String>(this.environment.size());
        environmentTaskSpecific.putAll(environment);
        if (taskLogParamsTaskSpecific.length == 2 && !Strings.isNullOrEmpty(taskLogParamsTaskSpecific[1])) {
            TezClientUtils.addLogParamsToEnv(environmentTaskSpecific, taskLogParamsTaskSpecific);
        }
    } else {
        this.javaOptsTaskSpecific = null;
        this.environmentTaskSpecific = null;
    }

    // env for tasks which don't have task-specific configuration. Has to be set up later to
    // optionally allow copying this for specific tasks
    TezClientUtils.addLogParamsToEnv(this.environment, taskLogParams);

    this.containerContext = new ContainerContext(this.localResources,
            appContext.getCurrentDAG().getCredentials(), this.environment, this.javaOpts, this);

    if (vertexPlan.getInputsCount() > 0) {
        setAdditionalInputs(vertexPlan.getInputsList());
    }
    if (vertexPlan.getOutputsCount() > 0) {
        setAdditionalOutputs(vertexPlan.getOutputsList());
    }
    this.stateChangeNotifier = entityStatusTracker;

    // Setup the initial parallelism early. This may be changed after
    // initialization or on a setParallelism call.
    this.numTasks = vertexPlan.getTaskConfig().getNumTasks();
    // Not sending the notifier a parallelism update since this is the initial parallelism

    this.dagVertexGroups = dagVertexGroups;

    isSpeculationEnabled = vertexConf.getBoolean(TezConfiguration.TEZ_AM_SPECULATION_ENABLED,
            TezConfiguration.TEZ_AM_SPECULATION_ENABLED_DEFAULT);
    LOG.info("isSpeculationEnabled:" + isSpeculationEnabled);
    if (isSpeculationEnabled()) {
        speculator = new LegacySpeculator(vertexConf, getAppContext(), this);
    }

    logIdentifier = this.getVertexId() + " [" + this.getName() + "]";
    // This "this leak" is okay because the retained pointer is in an
    //  instance variable.

    stateMachine = new StateMachineTez<VertexState, VertexEventType, VertexEvent, VertexImpl>(
            stateMachineFactory.make(this), this);
    augmentStateMachine();
}

From source file:org.apache.tez.history.parser.datamodel.DagInfo.java

License:Apache License

DagInfo(JSONObject jsonObject) throws JSONException {
    super(jsonObject);

    vertexNameMap = Maps.newHashMap();// w  ww .j  a v a  2 s  .  c  o  m
    vertexNameIDMapping = new DualHashBidiMap();
    edgeInfoMap = Maps.newHashMap();
    basicVertexInfoMap = Maps.newHashMap();
    containerMapping = LinkedHashMultimap.create();

    Preconditions
            .checkArgument(jsonObject.getString(Constants.ENTITY_TYPE).equalsIgnoreCase(Constants.TEZ_DAG_ID));

    dagId = StringInterner.weakIntern(jsonObject.getString(Constants.ENTITY));

    //Parse additional Info
    JSONObject otherInfoNode = jsonObject.getJSONObject(Constants.OTHER_INFO);

    long sTime = otherInfoNode.optLong(Constants.START_TIME);
    long eTime = otherInfoNode.optLong(Constants.FINISH_TIME);
    userName = otherInfoNode.optString(Constants.USER);
    if (eTime < sTime) {
        LOG.warn("DAG has got wrong start/end values. " + "startTime=" + sTime + ", endTime=" + eTime
                + ". Will check " + "timestamps in DAG started/finished events");

        // Check if events DAG_STARTED, DAG_FINISHED can be made use of
        for (Event event : eventList) {
            switch (HistoryEventType.valueOf(event.getType())) {
            case DAG_STARTED:
                sTime = event.getAbsoluteTime();
                break;
            case DAG_FINISHED:
                eTime = event.getAbsoluteTime();
                break;
            default:
                break;
            }
        }

        if (eTime < sTime) {
            LOG.warn("DAG has got wrong start/end values in events as well. " + "startTime=" + sTime
                    + ", endTime=" + eTime);
        }
    }
    startTime = sTime;
    endTime = eTime;

    //TODO: Not getting populated correctly for lots of jobs.  Verify
    submitTime = otherInfoNode.optLong(Constants.START_REQUESTED_TIME);
    diagnostics = otherInfoNode.optString(Constants.DIAGNOSTICS);
    failedTasks = otherInfoNode.optInt(Constants.NUM_FAILED_TASKS);
    JSONObject dagPlan = otherInfoNode.optJSONObject(Constants.DAG_PLAN);
    name = StringInterner.weakIntern((dagPlan != null) ? (dagPlan.optString(Constants.DAG_NAME)) : null);
    if (dagPlan != null) {
        JSONArray vertices = dagPlan.optJSONArray(Constants.VERTICES);
        if (vertices != null) {
            numVertices = vertices.length();
        } else {
            numVertices = 0;
        }
        parseDAGPlan(dagPlan);
    } else {
        numVertices = 0;
    }
    status = StringInterner.weakIntern(otherInfoNode.optString(Constants.STATUS));

    //parse name id mapping
    JSONObject vertexIDMappingJson = otherInfoNode.optJSONObject(Constants.VERTEX_NAME_ID_MAPPING);
    if (vertexIDMappingJson != null) {
        //get vertex name
        for (Map.Entry<String, BasicVertexInfo> entry : basicVertexInfoMap.entrySet()) {
            String vertexId = vertexIDMappingJson.optString(entry.getKey());
            //vertexName --> vertexId
            vertexNameIDMapping.put(entry.getKey(), vertexId);
        }
    }
}