Example usage for org.apache.hadoop.fs FileUtil unTar

List of usage examples for org.apache.hadoop.fs FileUtil unTar

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileUtil unTar.

Prototype

public static void unTar(File inFile, File untarDir) throws IOException 

Source Link

Document

Given a Tar File as input it will untar the file in a the untar directory passed as the second parameter This utility will untar ".tar" files and ".tar.gz","tgz" files.

Usage

From source file:com.github.sakserv.minicluster.oozie.sharelib.util.OozieShareLibUtil.java

License:Apache License

public String extractOozieTarFileToTempDir(File fullOozieTarFilePath) throws IOException {
    File tempDir = File.createTempFile(SHARE_LIB_LOCAL_TEMP_PREFIX, "");
    tempDir.delete();//from   w  ww .j  a v  a2s .  c o m
    tempDir.mkdir();
    tempDir.deleteOnExit();

    FileUtil.unTar(fullOozieTarFilePath, tempDir);

    return tempDir.getAbsolutePath();
}

From source file:com.github.sakserv.minicluster.oozie.sharelib.util.OozieShareLibUtil.java

License:Apache License

public String extractOozieShareLibTarFileToTempDir(File fullOozieShareLibTarFilePath) throws IOException {
    File tempDir = File.createTempFile(SHARE_LIB_LOCAL_TEMP_PREFIX, "");
    tempDir.delete();//from ww  w.  ja va  2 s .c  o  m
    tempDir.mkdir();
    tempDir.deleteOnExit();

    FileUtil.unTar(fullOozieShareLibTarFilePath, tempDir);

    // Remove spark to try to get the CP down.
    if (oozieShareLibFrameworks != null || !oozieShareLibFrameworks.isEmpty()) {
        for (Framework framework : Framework.values()) {
            if (!oozieShareLibFrameworks.contains(framework)) {
                LOG.info("OOZIE: Excluding framework " + framework.getValue() + " from shared lib.");
                File removeShareLibDir = new File(
                        tempDir.getAbsolutePath() + "/share/lib/" + framework.getValue());
                if (removeShareLibDir.isDirectory()) {
                    FileUtils.deleteDirectory(removeShareLibDir);
                }
            }
        }
    }
    return tempDir.getAbsolutePath();
}

From source file:com.github.sakserv.minicluster.oozie.util.OozieShareLibUtil.java

License:Apache License

public String extractOozieShareLibTarFileToTempDir(File fullOozieShareLibTarFilePath) throws IOException {
    File tempDir = File.createTempFile(SHARE_LIB_LOCAL_TEMP_PREFIX, "");
    tempDir.delete();/*from  www  . j  ava  2  s  .com*/
    tempDir.mkdir();
    tempDir.deleteOnExit();

    FileUtil.unTar(fullOozieShareLibTarFilePath, tempDir);

    return tempDir.getAbsolutePath();
}

From source file:io.apigee.lembos.utils.RunnerUtils.java

License:Apache License

/**
 * Takes a module path, which could be a local filesystem path or a url, and returns the local path to the module.
 *
 * <b>Note:</b> If the value is a URL, the URL will be downloaded locally to create the necessary filesystem
 * location for the Node.js module to allow for archiving and adding to DistributedCache.
 *
 * @param conf the Hadoop configuration//from www  .j  av a  2s .  c  om
 *
 * @return the local filesystem path to the module
 *
 * @throws IOException if anything goes wrong
 */
public static File createLocalCopyOfModule(final Configuration conf) throws IOException {
    final String moduleName = conf.get(LembosConstants.MR_MODULE_NAME);
    final String modulePath = conf.get(LembosConstants.MR_MODULE_PATH);
    File localTempModule = null;

    if (moduleName != null && !moduleName.trim().isEmpty() && modulePath != null
            && !modulePath.trim().isEmpty()) {
        URL moduleUrl;

        // Test if this is a URL or a file
        try {
            moduleUrl = new URL(modulePath);
        } catch (MalformedURLException e) {
            // This is to be expected if the configuration path is not a URL
            moduleUrl = null;
        }

        // Create a local temporary directory to contain the Node.js module
        final java.nio.file.Path tmpDir = Files.createTempDirectory("LembosMapReduceModule");
        FileSystem fs;

        // Delete the temp directory
        tmpDir.toFile().deleteOnExit();

        // Create the proper FileSystem
        if (moduleUrl == null) {
            fs = FileSystem.getLocal(conf);
        } else {
            try {
                fs = FileSystem.get(moduleUrl.toURI(), conf);
            } catch (URISyntaxException e) {
                throw new IOException(e);
            }
        }

        final org.apache.hadoop.fs.Path pathObj = new org.apache.hadoop.fs.Path(modulePath);

        if (fs.exists(pathObj)) {
            final org.apache.hadoop.fs.Path tmpPathObj = new org.apache.hadoop.fs.Path(
                    tmpDir.toAbsolutePath().toString());

            // Copy the local/remote file(s) to the temporary directory
            fs.copyToLocalFile(pathObj, tmpPathObj);

            final File moduleFile = new File(
                    new org.apache.hadoop.fs.Path(tmpPathObj, pathObj.getName()).toString());

            // Set the MapReduce module path accordingly
            if (moduleFile.isFile()) {
                final String fileName = moduleFile.getName();
                boolean wasArchive = false;

                if (fileName.endsWith(".tar") || fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz")) {
                    FileUtil.unTar(moduleFile, tmpDir.toFile());
                    wasArchive = true;
                } else if (fileName.endsWith(".zip")) {
                    FileUtil.unZip(moduleFile, tmpDir.toFile());
                    wasArchive = true;
                }

                if (wasArchive) {
                    for (final String extension : KNOWN_NODE_MODULE_EXTENSIONS) {
                        final File potentialModuleFile = new File(tmpDir.toFile(), moduleName + extension);

                        if (potentialModuleFile.exists()) {
                            localTempModule = potentialModuleFile;
                            break;
                        }
                    }
                } else {
                    localTempModule = moduleFile;
                }
            } else {
                localTempModule = new File(tmpDir.toFile(), moduleName);
            }
        } else {
            throw new RuntimeException("Unable to create/locate Node.js module locally: " + modulePath);
        }
    }

    if (localTempModule == null) {
        throw new RuntimeException("Unable to create local copy of Node.js module from path: "
                + conf.get(LembosConstants.MR_MODULE_PATH));
    }

    return localTempModule;
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2JobResourceManager.java

License:Apache License

/**
 * Process list of resources.// w ww .ja v  a 2 s  . c  o m
 *
 * @param jobLocDir Job working directory.
 * @param files Array of {@link URI} or {@link org.apache.hadoop.fs.Path} to process resources.
 * @param download {@code true}, if need to download. Process class path only else.
 * @param extract {@code true}, if need to extract archive.
 * @param clsPathUrls Collection to add resource as classpath resource.
 * @param rsrcNameProp Property for resource name array setting.
 * @throws IOException If failed.
 */
private void processFiles(File jobLocDir, @Nullable Object[] files, boolean download, boolean extract,
        @Nullable Collection<URL> clsPathUrls, @Nullable String rsrcNameProp) throws IOException {
    if (F.isEmptyOrNulls(files))
        return;

    Collection<String> res = new ArrayList<>();

    for (Object pathObj : files) {
        Path srcPath;

        if (pathObj instanceof URI) {
            URI uri = (URI) pathObj;

            srcPath = new Path(uri);
        } else
            srcPath = (Path) pathObj;

        String locName = srcPath.getName();

        File dstPath = new File(jobLocDir.getAbsolutePath(), locName);

        res.add(locName);

        rsrcSet.add(dstPath);

        if (clsPathUrls != null)
            clsPathUrls.add(dstPath.toURI().toURL());

        if (!download)
            continue;

        JobConf cfg = ctx.getJobConf();

        FileSystem dstFs = FileSystem.getLocal(cfg);

        FileSystem srcFs = job.fileSystem(srcPath.toUri(), cfg);

        if (extract) {
            File archivesPath = new File(jobLocDir.getAbsolutePath(), ".cached-archives");

            if (!archivesPath.exists() && !archivesPath.mkdir())
                throw new IOException(
                        "Failed to create directory " + "[path=" + archivesPath + ", jobId=" + jobId + ']');

            File archiveFile = new File(archivesPath, locName);

            FileUtil.copy(srcFs, srcPath, dstFs, new Path(archiveFile.toString()), false, cfg);

            String archiveNameLC = archiveFile.getName().toLowerCase();

            if (archiveNameLC.endsWith(".jar"))
                RunJar.unJar(archiveFile, dstPath);
            else if (archiveNameLC.endsWith(".zip"))
                FileUtil.unZip(archiveFile, dstPath);
            else if (archiveNameLC.endsWith(".tar.gz") || archiveNameLC.endsWith(".tgz")
                    || archiveNameLC.endsWith(".tar"))
                FileUtil.unTar(archiveFile, dstPath);
            else
                throw new IOException("Cannot unpack archive [path=" + srcPath + ", jobId=" + jobId + ']');
        } else
            FileUtil.copy(srcFs, srcPath, dstFs, new Path(dstPath.toString()), false, cfg);
    }

    if (!res.isEmpty() && rsrcNameProp != null)
        ctx.getJobConf().setStrings(rsrcNameProp, res.toArray(new String[res.size()]));
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2JobResourceManager.java

License:Apache License

/**
 * Process list of resources./*from  ww  w .  j  a  v  a 2s . c  o  m*/
 *
 * @param jobLocDir Job working directory.
 * @param files Array of {@link java.net.URI} or {@link org.apache.hadoop.fs.Path} to process resources.
 * @param download {@code true}, if need to download. Process class path only else.
 * @param extract {@code true}, if need to extract archive.
 * @param clsPathUrls Collection to add resource as classpath resource.
 * @param rsrcNameProp Property for resource name array setting.
 * @throws IOException If failed.
 */
private void processFiles(File jobLocDir, @Nullable Object[] files, boolean download, boolean extract,
        @Nullable Collection<URL> clsPathUrls, @Nullable String rsrcNameProp) throws IOException {
    if (F.isEmptyOrNulls(files))
        return;

    Collection<String> res = new ArrayList<>();

    for (Object pathObj : files) {
        String locName = null;
        Path srcPath;

        if (pathObj instanceof URI) {
            URI uri = (URI) pathObj;

            locName = uri.getFragment();

            srcPath = new Path(uri);
        } else
            srcPath = (Path) pathObj;

        if (locName == null)
            locName = srcPath.getName();

        File dstPath = new File(jobLocDir.getAbsolutePath(), locName);

        res.add(locName);

        rsrcSet.add(dstPath);

        if (clsPathUrls != null)
            clsPathUrls.add(dstPath.toURI().toURL());

        if (!download)
            continue;

        JobConf cfg = ctx.getJobConf();

        FileSystem dstFs = FileSystem.getLocal(cfg);

        FileSystem srcFs = srcPath.getFileSystem(cfg);

        if (extract) {
            File archivesPath = new File(jobLocDir.getAbsolutePath(), ".cached-archives");

            if (!archivesPath.exists() && !archivesPath.mkdir())
                throw new IOException(
                        "Failed to create directory " + "[path=" + archivesPath + ", jobId=" + jobId + ']');

            File archiveFile = new File(archivesPath, locName);

            FileUtil.copy(srcFs, srcPath, dstFs, new Path(archiveFile.toString()), false, cfg);

            String archiveNameLC = archiveFile.getName().toLowerCase();

            if (archiveNameLC.endsWith(".jar"))
                RunJar.unJar(archiveFile, dstPath);
            else if (archiveNameLC.endsWith(".zip"))
                FileUtil.unZip(archiveFile, dstPath);
            else if (archiveNameLC.endsWith(".tar.gz") || archiveNameLC.endsWith(".tgz")
                    || archiveNameLC.endsWith(".tar"))
                FileUtil.unTar(archiveFile, dstPath);
            else
                throw new IOException("Cannot unpack archive [path=" + srcPath + ", jobId=" + jobId + ']');
        } else
            FileUtil.copy(srcFs, srcPath, dstFs, new Path(dstPath.toString()), false, cfg);
    }

    if (!res.isEmpty() && rsrcNameProp != null)
        ctx.getJobConf().setStrings(rsrcNameProp, res.toArray(new String[res.size()]));
}

From source file:org.apache.kylin.common.util.HbaseImporter.java

License:Apache License

public static void importHBaseData(String hbaseTarLocation, Configuration conf)
        throws IOException, ClassNotFoundException, InterruptedException {

    if (System.getenv("JAVA_HOME") == null) {
        logger.error(/*  w w  w .j a  va2s .  c  o  m*/
                "Didn't find $JAVA_HOME, this will cause HBase data import failed. Please set $JAVA_HOME.");
        logger.error("Skipping table import...");
        return;
    }

    File exportFile = new File(hbaseTarLocation);
    if (!exportFile.exists()) {
        logger.error("Didn't find the export achieve file on " + exportFile.getAbsolutePath());
        return;
    }

    File folder = new File("/tmp/hbase-export/");
    if (folder.exists()) {
        FileUtils.deleteDirectory(folder);
    }
    folder.mkdirs();
    folder.deleteOnExit();

    //TarGZUtil.uncompressTarGZ(exportFile, folder);
    FileUtil.unTar(exportFile, folder);
    String[] child = folder.list();
    Preconditions.checkState(child.length == 1);
    String backupFolderName = child[0];
    File backupFolder = new File(folder, backupFolderName);
    String[] tableNames = backupFolder.list();

    for (String table : tableNames) {

        if (!(table.equalsIgnoreCase(HBaseMiniclusterHelper.TEST_METADATA_TABLE)
                || table.startsWith(HBaseMiniclusterHelper.SHARED_STORAGE_PREFIX))) {
            continue;
        }

        // create the htable; otherwise the import will fail.
        if (table.startsWith(HBaseMiniclusterHelper.II_STORAGE_PREFIX)) {
            HBaseConnection.createHTableIfNeeded(KylinConfig.getInstanceFromEnv().getStorageUrl(), table, "f");
        } else if (table.startsWith(HBaseMiniclusterHelper.CUBE_STORAGE_PREFIX)) {
            HBaseConnection.createHTableIfNeeded(KylinConfig.getInstanceFromEnv().getStorageUrl(), table, "F1",
                    "F2");
        }

        // directly import from local fs, no need to copy to hdfs
        String importLocation = "file://" + backupFolder.getAbsolutePath() + "/" + table;
        String[] args = new String[] { table, importLocation };
        boolean result = runImport(args, conf);
        logger.info("importing table '" + table + "' with result:" + result);

        if (!result)
            break;
    }

}

From source file:org.apache.oozie.tools.OozieSharelibCLI.java

License:Apache License

public synchronized int run(String[] args) throws Exception {
    if (used) {/*from   w  ww  . jav a  2 s. c  o m*/
        throw new IllegalStateException("CLI instance already used");
    }

    used = true;

    CLIParser parser = new CLIParser("oozie-setup.sh", HELP_INFO);
    String oozieHome = System.getProperty(OOZIE_HOME);
    parser.addCommand(HELP_CMD, "", "display usage for all commands or specified command", new Options(),
            false);
    parser.addCommand(CREATE_CMD, "", "create a new timestamped version of oozie sharelib",
            createUpgradeOptions(CREATE_CMD), false);
    parser.addCommand(UPGRADE_CMD, "",
            "[deprecated][use command \"create\" to create new version]   upgrade oozie sharelib \n",
            createUpgradeOptions(UPGRADE_CMD), false);

    try {
        final CLIParser.Command command = parser.parse(args);
        String sharelibAction = command.getName();

        if (sharelibAction.equals(HELP_CMD)) {
            parser.showHelp(command.getCommandLine());
            return 0;
        }

        if (!command.getCommandLine().hasOption(FS_OPT)) {
            throw new Exception("-fs option must be specified");
        }

        int threadPoolSize = Integer.valueOf(command.getCommandLine().getOptionValue(CONCURRENCY_OPT, "1"));
        File srcFile = null;

        //Check whether user provided locallib
        if (command.getCommandLine().hasOption(LIB_OPT)) {
            srcFile = new File(command.getCommandLine().getOptionValue(LIB_OPT));
        } else {
            //Since user did not provide locallib, find the default one under oozie home dir
            Collection<File> files = FileUtils.listFiles(new File(oozieHome),
                    new WildcardFileFilter("oozie-sharelib*.tar.gz"), null);

            if (files.size() > 1) {
                throw new IOException("more than one sharelib tar found at " + oozieHome);
            }

            if (files.isEmpty()) {
                throw new IOException("default sharelib tar not found in oozie home dir: " + oozieHome);
            }

            srcFile = files.iterator().next();
        }

        File temp = File.createTempFile("oozie", ".dir");
        temp.delete();
        temp.mkdir();
        temp.deleteOnExit();

        //Check whether the lib is a tar file or folder
        if (!srcFile.isDirectory()) {
            FileUtil.unTar(srcFile, temp);
            srcFile = new File(temp.toString() + "/share/lib");
        } else {
            //Get the lib directory since it's a folder
            srcFile = new File(srcFile, "lib");
        }

        String hdfsUri = command.getCommandLine().getOptionValue(FS_OPT);
        Path srcPath = new Path(srcFile.toString());

        Services services = new Services();
        services.getConf().set(Services.CONF_SERVICE_CLASSES,
                "org.apache.oozie.service.LiteWorkflowAppService, org.apache.oozie.service.HadoopAccessorService");
        services.getConf().set(Services.CONF_SERVICE_EXT_CLASSES, "");
        services.init();
        WorkflowAppService lwas = services.get(WorkflowAppService.class);
        HadoopAccessorService has = services.get(HadoopAccessorService.class);
        Path dstPath = lwas.getSystemLibPath();

        if (sharelibAction.equals(CREATE_CMD) || sharelibAction.equals(UPGRADE_CMD)) {
            dstPath = new Path(
                    dstPath.toString() + Path.SEPARATOR + SHARE_LIB_PREFIX + getTimestampDirectory());
        }

        System.out.println("the destination path for sharelib is: " + dstPath);

        URI uri = new Path(hdfsUri).toUri();
        Configuration fsConf = has.createJobConf(uri.getAuthority());
        FileSystem fs = has.createFileSystem(System.getProperty("user.name"), uri, fsConf);

        if (!srcFile.exists()) {
            throw new IOException(srcPath + " cannot be found");
        }

        if (threadPoolSize > 1) {
            concurrentCopyFromLocal(fs, threadPoolSize, srcFile, dstPath);
        } else {
            fs.copyFromLocalFile(false, srcPath, dstPath);
        }

        services.destroy();
        FileUtils.deleteDirectory(temp);

        return 0;
    } catch (ParseException ex) {
        System.err.println("Invalid sub-command: " + ex.getMessage());
        System.err.println();
        System.err.println(parser.shortHelp());
        return 1;
    } catch (Exception ex) {
        logError(ex.getMessage(), ex);
        return 1;
    }
}

From source file:org.gridgain.grid.kernal.processors.hadoop.v2.GridHadoopV2JobResourceManager.java

License:Open Source License

/**
 * Process list of resources./*from w  ww  .  j a  va 2 s  . co m*/
 *
 * @param jobLocDir Job working directory.
 * @param files Array of {@link java.net.URI} or {@link org.apache.hadoop.fs.Path} to process resources.
 * @param download {@code true}, if need to download. Process class path only else.
 * @param extract {@code true}, if need to extract archive.
 * @param clsPathUrls Collection to add resource as classpath resource.
 * @param rsrcNameProp Property for resource name array setting.
 * @throws IOException If failed.
 */
private void processFiles(File jobLocDir, @Nullable Object[] files, boolean download, boolean extract,
        @Nullable Collection<URL> clsPathUrls, @Nullable String rsrcNameProp) throws IOException {
    if (F.isEmptyOrNulls(files))
        return;

    Collection<String> res = new ArrayList<>();

    for (Object pathObj : files) {
        String locName = null;
        Path srcPath;

        if (pathObj instanceof URI) {
            URI uri = (URI) pathObj;

            locName = uri.getFragment();

            srcPath = new Path(uri);
        } else
            srcPath = (Path) pathObj;

        if (locName == null)
            locName = srcPath.getName();

        File dstPath = new File(jobLocDir.getAbsolutePath(), locName);

        res.add(locName);

        rsrcList.add(dstPath);

        if (clsPathUrls != null)
            clsPathUrls.add(dstPath.toURI().toURL());

        if (!download)
            continue;

        JobConf cfg = ctx.getJobConf();

        FileSystem dstFs = FileSystem.getLocal(cfg);

        FileSystem srcFs = srcPath.getFileSystem(cfg);

        if (extract) {
            File archivesPath = new File(jobLocDir.getAbsolutePath(), ".cached-archives");

            if (!archivesPath.exists() && !archivesPath.mkdir())
                throw new IOException(
                        "Failed to create directory " + "[path=" + archivesPath + ", jobId=" + jobId + ']');

            File archiveFile = new File(archivesPath, locName);

            FileUtil.copy(srcFs, srcPath, dstFs, new Path(archiveFile.toString()), false, cfg);

            String archiveNameLC = archiveFile.getName().toLowerCase();

            if (archiveNameLC.endsWith(".jar"))
                RunJar.unJar(archiveFile, dstPath);
            else if (archiveNameLC.endsWith(".zip"))
                FileUtil.unZip(archiveFile, dstPath);
            else if (archiveNameLC.endsWith(".tar.gz") || archiveNameLC.endsWith(".tgz")
                    || archiveNameLC.endsWith(".tar"))
                FileUtil.unTar(archiveFile, dstPath);
            else
                throw new IOException("Cannot unpack archive [path=" + srcPath + ", jobId=" + jobId + ']');
        } else
            FileUtil.copy(srcFs, srcPath, dstFs, new Path(dstPath.toString()), false, cfg);
    }

    if (!res.isEmpty() && rsrcNameProp != null)
        ctx.getJobConf().setStrings(rsrcNameProp, res.toArray(new String[res.size()]));
}