org.apache.zeppelin.python.PythonInterpreter.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.zeppelin.python.PythonInterpreter.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.zeppelin.python;

import com.google.common.io.Files;
import com.google.gson.Gson;
import org.apache.commons.exec.CommandLine;
import org.apache.commons.exec.DefaultExecutor;
import org.apache.commons.exec.ExecuteException;
import org.apache.commons.exec.ExecuteResultHandler;
import org.apache.commons.exec.ExecuteWatchdog;
import org.apache.commons.exec.PumpStreamHandler;
import org.apache.commons.exec.environment.EnvironmentUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.zeppelin.interpreter.BaseZeppelinContext;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterGroup;
import org.apache.zeppelin.interpreter.InterpreterHookRegistry.HookType;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.apache.zeppelin.interpreter.InterpreterResultMessage;
import org.apache.zeppelin.interpreter.InvalidHookException;
import org.apache.zeppelin.interpreter.remote.RemoteInterpreterUtils;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.apache.zeppelin.interpreter.util.InterpreterOutputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import py4j.GatewayServer;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.atomic.AtomicBoolean;

/**
 * Interpreter for Python, it is the first implementation of interpreter for Python, so with less
 * features compared to IPythonInterpreter, but requires less prerequisites than
 * IPythonInterpreter, only python installation is required.
 */
public class PythonInterpreter extends Interpreter implements ExecuteResultHandler {
    private static final Logger LOGGER = LoggerFactory.getLogger(PythonInterpreter.class);
    private static final int MAX_TIMEOUT_SEC = 30;

    private GatewayServer gatewayServer;
    private DefaultExecutor executor;
    private File pythonWorkDir;
    protected boolean useBuiltinPy4j = true;

    // used to forward output from python process to InterpreterOutput
    private InterpreterOutputStream outputStream;
    private AtomicBoolean pythonScriptRunning = new AtomicBoolean(false);
    private AtomicBoolean pythonScriptInitialized = new AtomicBoolean(false);
    private long pythonPid = -1;
    private IPythonInterpreter iPythonInterpreter;
    private BaseZeppelinContext zeppelinContext;
    private String condaPythonExec; // set by PythonCondaInterpreter
    private boolean usePy4jAuth = false;

    public PythonInterpreter(Properties property) {
        super(property);
    }

    @Override
    public void open() throws InterpreterException {
        // try IPythonInterpreter first
        iPythonInterpreter = getIPythonInterpreter();
        if (getProperty("zeppelin.python.useIPython", "true").equals("true")
                && StringUtils.isEmpty(iPythonInterpreter.checkIPythonPrerequisite(getPythonExec()))) {
            try {
                iPythonInterpreter.open();
                LOGGER.info("IPython is available, Use IPythonInterpreter to replace PythonInterpreter");
                return;
            } catch (Exception e) {
                iPythonInterpreter = null;
                LOGGER.warn("Fail to open IPythonInterpreter", e);
            }
        }

        // reset iPythonInterpreter to null as it is not available
        iPythonInterpreter = null;
        LOGGER.info("IPython is not available, use the native PythonInterpreter");
        // Add matplotlib display hook
        InterpreterGroup intpGroup = getInterpreterGroup();
        if (intpGroup != null && intpGroup.getInterpreterHookRegistry() != null) {
            try {
                // just for unit test I believe (zjffdu)
                registerHook(HookType.POST_EXEC_DEV.getName(), "__zeppelin__._displayhook()");
            } catch (InvalidHookException e) {
                throw new InterpreterException(e);
            }
        }

        try {
            this.usePy4jAuth = Boolean.parseBoolean(getProperty("zeppelin.py4j.useAuth", "true"));
            createGatewayServerAndStartScript();
        } catch (IOException e) {
            LOGGER.error("Fail to open PythonInterpreter", e);
            throw new InterpreterException("Fail to open PythonInterpreter", e);
        }
    }

    // start gateway sever and start python process
    private void createGatewayServerAndStartScript() throws IOException {
        // start gateway server in JVM side
        int port = RemoteInterpreterUtils.findRandomAvailablePortOnAllLocalInterfaces();
        // use the FQDN as the server address instead of 127.0.0.1 so that python process in docker
        // container can also connect to this gateway server.
        String serverAddress = PythonUtils.getLocalIP(properties);
        String secret = PythonUtils.createSecret(256);
        this.gatewayServer = PythonUtils.createGatewayServer(this, serverAddress, port, secret, usePy4jAuth);
        gatewayServer.start();

        // launch python process to connect to the gateway server in JVM side
        createPythonScript();
        String pythonExec = getPythonExec();
        CommandLine cmd = CommandLine.parse(pythonExec);
        if (!pythonExec.endsWith(".py")) {
            // PythonDockerInterpreter set pythonExec with script
            cmd.addArgument(pythonWorkDir + "/zeppelin_python.py", false);
        }
        cmd.addArgument(serverAddress, false);
        cmd.addArgument(Integer.toString(port), false);

        executor = new DefaultExecutor();
        outputStream = new InterpreterOutputStream(LOGGER);
        PumpStreamHandler streamHandler = new PumpStreamHandler(outputStream);
        executor.setStreamHandler(streamHandler);
        executor.setWatchdog(new ExecuteWatchdog(ExecuteWatchdog.INFINITE_TIMEOUT));
        Map<String, String> env = setupPythonEnv();
        if (usePy4jAuth) {
            env.put("PY4J_GATEWAY_SECRET", secret);
        }
        LOGGER.info("Launching Python Process Command: " + cmd.getExecutable() + " "
                + StringUtils.join(cmd.getArguments(), " "));
        executor.execute(cmd, env, this);
        pythonScriptRunning.set(true);
    }

    private void createPythonScript() throws IOException {
        // set java.io.tmpdir to /tmp on MacOS, because docker can not share the /var folder which will
        // cause PythonDockerInterpreter fails.
        // https://stackoverflow.com/questions/45122459/docker-mounts-denied-the-paths-are-not-shared-
        // from-os-x-and-are-not-known
        if (System.getProperty("os.name", "").contains("Mac")) {
            System.setProperty("java.io.tmpdir", "/tmp");
        }
        this.pythonWorkDir = Files.createTempDir();
        this.pythonWorkDir.deleteOnExit();
        LOGGER.info("Create Python working dir: " + pythonWorkDir.getAbsolutePath());
        copyResourceToPythonWorkDir("python/zeppelin_python.py", "zeppelin_python.py");
        copyResourceToPythonWorkDir("python/zeppelin_context.py", "zeppelin_context.py");
        copyResourceToPythonWorkDir("python/backend_zinline.py", "backend_zinline.py");
        copyResourceToPythonWorkDir("python/mpl_config.py", "mpl_config.py");
        copyResourceToPythonWorkDir("python/py4j-src-0.10.7.zip", "py4j-src-0.10.7.zip");
    }

    protected boolean useIPython() {
        return this.iPythonInterpreter != null;
    }

    private void copyResourceToPythonWorkDir(String srcResourceName, String dstFileName) throws IOException {
        FileOutputStream out = null;
        try {
            out = new FileOutputStream(pythonWorkDir.getAbsoluteFile() + "/" + dstFileName);
            IOUtils.copy(getClass().getClassLoader().getResourceAsStream(srcResourceName), out);
        } finally {
            if (out != null) {
                out.close();
            }
        }
    }

    protected Map<String, String> setupPythonEnv() throws IOException {
        Map<String, String> env = EnvironmentUtils.getProcEnvironment();
        appendToPythonPath(env, pythonWorkDir.getAbsolutePath());
        if (useBuiltinPy4j) {
            appendToPythonPath(env, pythonWorkDir.getAbsolutePath() + "/py4j-src-0.10.7.zip");
        }
        LOGGER.info("PYTHONPATH: " + env.get("PYTHONPATH"));
        return env;
    }

    private void appendToPythonPath(Map<String, String> env, String path) {
        if (!env.containsKey("PYTHONPATH")) {
            env.put("PYTHONPATH", path);
        } else {
            env.put("PYTHONPATH", env.get("PYTHONPATH") + ":" + path);
        }
    }

    // Run python script
    // Choose python in the order of
    // condaPythonExec > zeppelin.python
    protected String getPythonExec() {
        if (condaPythonExec != null) {
            return condaPythonExec;
        } else {
            return getProperty("zeppelin.python", "python");
        }
    }

    public File getPythonWorkDir() {
        return pythonWorkDir;
    }

    @Override
    public void close() throws InterpreterException {
        if (iPythonInterpreter != null) {
            iPythonInterpreter.close();
            return;
        }

        pythonScriptRunning.set(false);
        pythonScriptInitialized.set(false);
        executor.getWatchdog().destroyProcess();
        gatewayServer.shutdown();

        // reset these 2 monitors otherwise when you restart PythonInterpreter it would fails to execute
        // python code as these 2 objects are in incorrect state.
        statementSetNotifier = new Integer(0);
        statementFinishedNotifier = new Integer(0);
    }

    private PythonInterpretRequest pythonInterpretRequest = null;
    private Integer statementSetNotifier = new Integer(0);
    private Integer statementFinishedNotifier = new Integer(0);
    private String statementOutput = null;
    private boolean statementError = false;

    public void setPythonExec(String pythonExec) {
        LOGGER.info("Set Python Command : {}", pythonExec);
        this.condaPythonExec = pythonExec;
    }

    /**
     * Request send to Python Daemon
     */
    public class PythonInterpretRequest {
        public String statements;
        public boolean isForCompletion;
        public boolean isCallHooks;

        public PythonInterpretRequest(String statements, boolean isForCompletion) {
            this(statements, isForCompletion, true);
        }

        public PythonInterpretRequest(String statements, boolean isForCompletion, boolean isCallHooks) {
            this.statements = statements;
            this.isForCompletion = isForCompletion;
            this.isCallHooks = isCallHooks;
        }

        public String statements() {
            return statements;
        }

        public boolean isForCompletion() {
            return isForCompletion;
        }

        public boolean isCallHooks() {
            return isCallHooks;
        }
    }

    // called by Python Process
    public PythonInterpretRequest getStatements() {
        synchronized (statementSetNotifier) {
            while (pythonInterpretRequest == null) {
                try {
                    statementSetNotifier.wait(1000);
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }
            PythonInterpretRequest req = pythonInterpretRequest;
            pythonInterpretRequest = null;
            return req;
        }
    }

    // called by Python Process
    public void setStatementsFinished(String out, boolean error) {
        synchronized (statementFinishedNotifier) {
            LOGGER.debug("Setting python statement output: " + out + ", error: " + error);
            statementOutput = out;
            statementError = error;
            statementFinishedNotifier.notify();
        }
    }

    // called by Python Process
    public void onPythonScriptInitialized(long pid) {
        pythonPid = pid;
        synchronized (pythonScriptInitialized) {
            LOGGER.debug("onPythonScriptInitialized is called");
            pythonScriptInitialized.set(true);
            pythonScriptInitialized.notifyAll();
        }
    }

    // called by Python Process
    public void appendOutput(String message) throws IOException {
        LOGGER.debug("Output from python process: " + message);
        outputStream.getInterpreterOutput().write(message);
    }

    // used by subclass such as PySparkInterpreter to set JobGroup before executing spark code
    protected void preCallPython(InterpreterContext context) {

    }

    // blocking call. Send python code to python process and get response
    protected void callPython(PythonInterpretRequest request) {
        synchronized (statementSetNotifier) {
            this.pythonInterpretRequest = request;
            statementOutput = null;
            statementSetNotifier.notify();
        }

        synchronized (statementFinishedNotifier) {
            while (statementOutput == null) {
                try {
                    statementFinishedNotifier.wait(1000);
                } catch (InterruptedException e) {
                    // ignore this exception
                }
            }
        }
    }

    @Override
    public InterpreterResult interpret(String st, InterpreterContext context) throws InterpreterException {
        if (iPythonInterpreter != null) {
            return iPythonInterpreter.interpret(st, context);
        }

        if (!pythonScriptRunning.get()) {
            return new InterpreterResult(Code.ERROR, "python process not running " + outputStream.toString());
        }

        outputStream.setInterpreterOutput(context.out);

        synchronized (pythonScriptInitialized) {
            long startTime = System.currentTimeMillis();
            while (!pythonScriptInitialized.get()
                    && System.currentTimeMillis() - startTime < MAX_TIMEOUT_SEC * 1000) {
                try {
                    LOGGER.info("Wait for PythonScript initialized");
                    pythonScriptInitialized.wait(100);
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }
        }

        List<InterpreterResultMessage> errorMessage;
        try {
            context.out.flush();
            errorMessage = context.out.toInterpreterResultMessage();
        } catch (IOException e) {
            throw new InterpreterException(e);
        }

        if (!pythonScriptInitialized.get()) {
            // timeout. didn't get initialized message
            errorMessage
                    .add(new InterpreterResultMessage(InterpreterResult.Type.TEXT, "Failed to initialize Python"));
            return new InterpreterResult(Code.ERROR, errorMessage);
        }

        BaseZeppelinContext z = getZeppelinContext();
        z.setInterpreterContext(context);
        z.setGui(context.getGui());
        z.setNoteGui(context.getNoteGui());
        InterpreterContext.set(context);

        preCallPython(context);
        callPython(new PythonInterpretRequest(st, false));

        if (statementError) {
            return new InterpreterResult(Code.ERROR, statementOutput);
        } else {
            try {
                context.out.flush();
            } catch (IOException e) {
                throw new InterpreterException(e);
            }
            return new InterpreterResult(Code.SUCCESS);
        }
    }

    public void interrupt() throws IOException, InterpreterException {
        if (pythonPid > -1) {
            LOGGER.info("Sending SIGINT signal to PID : " + pythonPid);
            Runtime.getRuntime().exec("kill -SIGINT " + pythonPid);
        } else {
            LOGGER.warn("Non UNIX/Linux system, close the interpreter");
            close();
        }
    }

    @Override
    public void cancel(InterpreterContext context) throws InterpreterException {
        if (iPythonInterpreter != null) {
            iPythonInterpreter.cancel(context);
            return;
        }
        try {
            interrupt();
        } catch (IOException e) {
            LOGGER.error("Error", e);
        }
    }

    @Override
    public FormType getFormType() {
        return FormType.NATIVE;
    }

    @Override
    public int getProgress(InterpreterContext context) throws InterpreterException {
        if (iPythonInterpreter != null) {
            return iPythonInterpreter.getProgress(context);
        }
        return 0;
    }

    @Override
    public List<InterpreterCompletion> completion(String buf, int cursor, InterpreterContext interpreterContext)
            throws InterpreterException {
        if (iPythonInterpreter != null) {
            return iPythonInterpreter.completion(buf, cursor, interpreterContext);
        }
        if (buf.length() < cursor) {
            cursor = buf.length();
        }
        String completionString = getCompletionTargetString(buf, cursor);
        String completionCommand = "__zeppelin_completion__.getCompletion('" + completionString + "')";
        LOGGER.debug("completionCommand: " + completionCommand);

        pythonInterpretRequest = new PythonInterpretRequest(completionCommand, true);
        statementOutput = null;

        synchronized (statementSetNotifier) {
            statementSetNotifier.notify();
        }

        String[] completionList = null;
        synchronized (statementFinishedNotifier) {
            long startTime = System.currentTimeMillis();
            while (statementOutput == null && pythonScriptRunning.get()) {
                try {
                    if (System.currentTimeMillis() - startTime > MAX_TIMEOUT_SEC * 1000) {
                        LOGGER.error("Python completion didn't have response for {}sec.", MAX_TIMEOUT_SEC);
                        break;
                    }
                    statementFinishedNotifier.wait(1000);
                } catch (InterruptedException e) {
                    // not working
                    LOGGER.info("wait drop");
                    return new LinkedList<>();
                }
            }
            if (statementError) {
                return new LinkedList<>();
            }
            Gson gson = new Gson();
            completionList = gson.fromJson(statementOutput, String[].class);
        }
        //end code for completion
        if (completionList == null) {
            return new LinkedList<>();
        }

        List<InterpreterCompletion> results = new LinkedList<>();
        for (String name : completionList) {
            results.add(new InterpreterCompletion(name, name, StringUtils.EMPTY));
        }
        return results;
    }

    private String getCompletionTargetString(String text, int cursor) {
        String[] completionSeqCharaters = { " ", "\n", "\t" };
        int completionEndPosition = cursor;
        int completionStartPosition = cursor;
        int indexOfReverseSeqPostion = cursor;

        String resultCompletionText = "";
        String completionScriptText = "";
        try {
            completionScriptText = text.substring(0, cursor);
        } catch (Exception e) {
            LOGGER.error(e.toString());
            return null;
        }
        completionEndPosition = completionScriptText.length();

        String tempReverseCompletionText = new StringBuilder(completionScriptText).reverse().toString();

        for (String seqCharacter : completionSeqCharaters) {
            indexOfReverseSeqPostion = tempReverseCompletionText.indexOf(seqCharacter);

            if (indexOfReverseSeqPostion < completionStartPosition && indexOfReverseSeqPostion > 0) {
                completionStartPosition = indexOfReverseSeqPostion;
            }

        }

        if (completionStartPosition == completionEndPosition) {
            completionStartPosition = 0;
        } else {
            completionStartPosition = completionEndPosition - completionStartPosition;
        }
        resultCompletionText = completionScriptText.substring(completionStartPosition, completionEndPosition);

        return resultCompletionText;
    }

    protected IPythonInterpreter getIPythonInterpreter() throws InterpreterException {
        return getInterpreterInTheSameSessionByClassName(IPythonInterpreter.class, false);
    }

    protected BaseZeppelinContext createZeppelinContext() {
        return new PythonZeppelinContext(getInterpreterGroup().getInterpreterHookRegistry(),
                Integer.parseInt(getProperty("zeppelin.python.maxResult", "1000")));
    }

    public BaseZeppelinContext getZeppelinContext() {
        if (zeppelinContext == null) {
            zeppelinContext = createZeppelinContext();
        }
        return zeppelinContext;
    }

    protected void bootstrapInterpreter(String resourceName) throws IOException {
        LOGGER.info("Bootstrap interpreter via " + resourceName);
        String bootstrapCode = IOUtils.toString(getClass().getClassLoader().getResourceAsStream(resourceName));
        try {
            // Add hook explicitly, otherwise python will fail to execute the statement
            InterpreterResult result = interpret(bootstrapCode + "\n" + "__zeppelin__._displayhook()",
                    InterpreterContext.get());
            if (result.code() != Code.SUCCESS) {
                throw new IOException("Fail to run bootstrap script: " + resourceName);
            }
        } catch (InterpreterException e) {
            throw new IOException(e);
        }
    }

    @Override
    public void onProcessComplete(int exitValue) {
        LOGGER.info("python process terminated. exit code " + exitValue);
        pythonScriptRunning.set(false);
        pythonScriptInitialized.set(false);
    }

    @Override
    public void onProcessFailed(ExecuteException e) {
        LOGGER.error("python process failed", e);
        pythonScriptRunning.set(false);
        pythonScriptInitialized.set(false);
    }

    // Called by Python Process, used for debugging purpose
    public void logPythonOutput(String message) {
        LOGGER.debug("Python Process Output: " + message);
    }
}