Example usage for org.apache.hadoop.fs FsShell run

List of usage examples for org.apache.hadoop.fs FsShell run

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FsShell run.

Prototype

@Override
public int run(String argv[]) throws Exception 

Source Link

Document

run

Usage

From source file:org.apache.rya.accumulo.mr.merge.CopyTool.java

License:Apache License

/**
 * Imports the files that hold the table data into the child instance.
 * @param childTableName the name of the child table to import.
 * @throws Exception//from  w  ww .  j  ava2 s .c om
 */
public void importFilesToChildTable(final String childTableName) throws Exception {
    final Configuration childConfig = MergeToolMapper.getChildConfig(conf);
    final AccumuloRdfConfiguration childAccumuloRdfConfiguration = new AccumuloRdfConfiguration(childConfig);
    childAccumuloRdfConfiguration.setTablePrefix(childTablePrefix);
    final Connector childConnector = AccumuloRyaUtils.setupConnector(childAccumuloRdfConfiguration);
    final TableOperations childTableOperations = childConnector.tableOperations();

    final Path localWorkDir = getPath(localCopyFileImportDir, childTableName);
    final Path hdfsBaseWorkDir = getPath(baseImportDir, childTableName);

    final FileSystem fs = FileSystem.get(conf);
    if (fs.exists(hdfsBaseWorkDir)) {
        fs.delete(hdfsBaseWorkDir, true);
    }

    log.info("Importing from the local directory: " + localWorkDir);
    log.info("Importing to the HDFS directory: " + hdfsBaseWorkDir);
    copyLocalToHdfs(localWorkDir, hdfsBaseWorkDir);

    final Path files = getPath(hdfsBaseWorkDir.toString(), "files");
    final Path failures = getPath(hdfsBaseWorkDir.toString(), "failures");

    // With HDFS permissions on, we need to make sure the Accumulo user can read/move the files
    final FsShell shell = new FsShell(conf);
    shell.run(new String[] { "-chmod", "777", hdfsBaseWorkDir.toString() });
    if (fs.exists(failures)) {
        fs.delete(failures, true);
    }
    fs.mkdirs(failures);

    childTableOperations.importDirectory(childTableName, files.toString(), failures.toString(), false);
}

From source file:org.jd.copier.mapred.DistCp.java

License:Apache License

/** Delete the dst files/dirs which do not exist in src */
static private void deleteNonexisting(FileSystem dstfs, FileStatus dstroot, Path dstsorted, FileSystem jobfs,
        Path jobdir, JobConf jobconf, Configuration conf) throws IOException {
    if (!dstroot.isDir()) {
        throw new IOException("dst must be a directory when option " + Options.DELETE.cmd
                + " is set, but dst (= " + dstroot.getPath() + ") is not a directory.");
    }/* w  ww. j  ava2 s  .  co m*/

    //write dst lsr results
    final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr");
    final SequenceFile.Writer writer = SequenceFile.createWriter(jobfs, jobconf, dstlsr, Text.class,
            dstroot.getClass(), SequenceFile.CompressionType.NONE);
    try {
        //do lsr to get all file statuses in dstroot
        final Stack<FileStatus> lsrstack = new Stack<FileStatus>();
        for (lsrstack.push(dstroot); !lsrstack.isEmpty();) {
            final FileStatus status = lsrstack.pop();
            if (status.isDir()) {
                for (FileStatus child : dstfs.listStatus(status.getPath())) {
                    String relative = makeRelative(dstroot.getPath(), child.getPath());
                    writer.append(new Text(relative), child);
                    lsrstack.push(child);
                }
            }
        }
    } finally {
        checkAndClose(writer);
    }

    //sort lsr results
    final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted");
    SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs, new Text.Comparator(), Text.class,
            FileStatus.class, jobconf);
    sorter.sort(dstlsr, sortedlsr);

    //compare lsr list and dst list  
    SequenceFile.Reader lsrin = null;
    SequenceFile.Reader dstin = null;
    try {
        lsrin = new SequenceFile.Reader(jobfs, sortedlsr, jobconf);
        dstin = new SequenceFile.Reader(jobfs, dstsorted, jobconf);

        //compare sorted lsr list and sorted dst list
        final Text lsrpath = new Text();
        final FileStatus lsrstatus = new FileStatus();
        final Text dstpath = new Text();
        final Text dstfrom = new Text();
        final FsShell shell = new FsShell(conf);
        final String[] shellargs = { "-rmr", null };

        boolean hasnext = dstin.next(dstpath, dstfrom);
        for (; lsrin.next(lsrpath, lsrstatus);) {
            int dst_cmp_lsr = dstpath.compareTo(lsrpath);
            for (; hasnext && dst_cmp_lsr < 0;) {
                hasnext = dstin.next(dstpath, dstfrom);
                dst_cmp_lsr = dstpath.compareTo(lsrpath);
            }

            if (dst_cmp_lsr == 0) {
                //lsrpath exists in dst, skip it
                hasnext = dstin.next(dstpath, dstfrom);
            } else {
                //lsrpath does not exist, delete it
                String s = new Path(dstroot.getPath(), lsrpath.toString()).toString();
                if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) {
                    shellargs[1] = s;
                    int r = 0;
                    try {
                        r = shell.run(shellargs);
                    } catch (Exception e) {
                        throw new IOException("Exception from shell.", e);
                    }
                    if (r != 0) {
                        throw new IOException(
                                "\"" + shellargs[0] + " " + shellargs[1] + "\" returns non-zero value " + r);
                    }
                }
            }
        }
    } finally {
        checkAndClose(lsrin);
        checkAndClose(dstin);
    }
}

From source file:org.oclc.firefly.hadoop.backup.Backup.java

License:Apache License

/**
 * Performs a complete copy of the source hbase to the given destination
 * @param tables The names of the tables to backup
 * @param maxTries The maximum number of times to try to copy regions.
 * @return True if successful, false otherwise
 * @throws IOException If failed to interact with Hadoop
 * @throws ClassNotFoundException //  w  w  w  .  j  a va2 s . co  m
 * @throws InterruptedException 
 */
public boolean doMajorCopy(String[] tables, int maxTries)
        throws IOException, InterruptedException, ClassNotFoundException {
    boolean ret = false;
    String username = getUsername();
    short replication = (short) getInitialReplication();

    // Get a list of regions from HBase
    // Then filter out the regions we are not extracting, and group them by table
    List<CatalogRow> regions = getHBaseRegions(srcConf);
    Map<String, List<CatalogRow>> filtered = groupAndFilter(regions, tables);
    List<Pair<String, HRegionInfo>> mapperInput = new ArrayList<Pair<String, HRegionInfo>>();

    // Prepare the input for the mappers to use
    // This creates a list of region server and region pairs
    LOG.info("Exporting the following tables:");
    for (Entry<String, List<CatalogRow>> entry : filtered.entrySet()) {
        String tablename = entry.getKey();
        List<CatalogRow> rows = entry.getValue();

        LOG.info(". " + tablename);

        for (CatalogRow r : rows) {
            String regionServer = r.getHost() + ":" + r.getPort();
            HRegionInfo region = r.getHRegionInfo();
            mapperInput.add(Pair.newPair(regionServer, region));
        }
    }

    // Make sure we write to a directory that does not exist
    backupDirectoryPath = createBackupDirectory(getCurrentDateString());
    LOG.info("Starting backup path: " + backupDirectoryPath);

    // Copy the .tableinfo files for the tables we are extracting
    // These files are not copied by the MR job as it only focuses on regions
    List<FileStatus> tableInfoFiles = getTableInfoFiles(srcFs, filtered);
    for (FileStatus file : tableInfoFiles) {
        Path srcFilePath = file.getPath();
        Path relPath = new Path(BackupUtils.getFsRelativePath(srcFs, srcFilePath));
        Path dstFilePath = new Path(backupDirectoryPath.toString() + relPath.toString());
        BackupUtils.copy(srcFs, srcFilePath, dstFs, dstFilePath, buffer, username, replication);
    }

    // Dispatch MR job and monitor
    // Retry regions if necessary
    if (mapperInput.size() > 0) {
        int tries = 0;

        while (!ret && (maxTries == 0 || tries < maxTries)) {
            if (getNumMapTasks() > mapperInput.size()) {
                setNumMapTasks(mapperInput.size());
                LOG.info("Not enough regions. Reducing number of map tasks");
            }

            // Generate a list of mapper input files and create job
            List<Path> sourceFiles = createMapperInputSequenceFiles(mapperInput, getNumMapTasks(), srcFs,
                    tries);
            Job job = createMRJob(srcConf, dstConf, sourceFiles, backupDirectoryPath, tries);

            LOG.info(job.getJobName());
            LOG.info("--------------------------------------------------");
            LOG.info("Number of regions  : " + mapperInput.size());
            LOG.info("Number of map tasks: " + getNumMapTasks());
            LOG.info("Mapper input path  : " + getMapInputDirectory(tries));
            LOG.info("Mapper output path : " + FileOutputFormat.getOutputPath(job));
            LOG.info("--------------------------------------------------");

            job.waitForCompletion(true);
            if (job.isSuccessful()) {
                // Check if any regions failed
                Counters counters = job.getCounters();
                Counter failedCounter = counters.findCounter("Backup", "FailedRegions");
                long failed = failedCounter.getValue();

                if (failed > 0) {
                    LOG.info("Number of failed regions: " + failed + ".");

                    // get a fresh list of regions to copy
                    List<Pair<String, HRegionInfo>> failedRegions = getFailedRegions(srcFs, srcConf, tries);
                    addCopiedRegions(mapperInput, failedRegions);
                    mapperInput = getRemainingRegions(mapperInput, tables);

                    for (Pair<String, HRegionInfo> pair : mapperInput) {
                        LOG.info("Retry: " + pair.getSecond());
                    }

                    if (mapperInput.size() == 0) {
                        ret = true;
                        backupDirectoryPath = appendEndTime(backupDirectoryPath);

                        LOG.warn("No regions left to copy, but expected to copy more. "
                                + "Please inspect logs/files manually for errors");
                    }
                } else {
                    ret = true;

                    addCopiedRegions(mapperInput, null);
                    backupDirectoryPath = appendEndTime(backupDirectoryPath);
                    LOG.info("MR job finished successfully");
                }
            } else {
                LOG.error("An unexpected error occurred during the MR job. Please see MR logs.");
                break;
            }

            tries++;
        }

        if (ret) {
            if (verifyCopiedRegions()) {
                LOG.info("Verification passed succesfully");
            } else {
                ret = false;
                LOG.info("Verification failed. Please inspect errors manually");
            }
        } else {
            LOG.info("No attempts left. Try setting -n to a higher value, or setting it to 0");
        }
    }

    if (ret) {
        // Set replication factor of backup directory to default.
        // This may not be the best solution, but let built-in shell take care of it
        // because it can do it recursively with out us having to rediscover all the files
        short finalReplication = (short) getFinalReplication();

        if (replication != finalReplication) {
            FsShell shell = new FsShell(dstConf);
            String[] repArgs = { "-setrep", "-R", "-w", "" + finalReplication, backupDirectoryPath.toString() };

            try {
                LOG.info("Setting final replication factor of backup files to " + finalReplication);
                shell.run(repArgs);
            } catch (Exception e) {
                LOG.warn("Could not set replication factor of backup files to " + finalReplication);
            }
        }
    }

    return ret;
}

From source file:org.springframework.data.hadoop.impala.mapreduce.MapReduceCommandsTest.java

License:Apache License

/**
 * Test method for {@link org.springframework.data.hadoop.impala.mapreduce.MapReduceCommands#submit(java.lang.String)}.
 * @throws Exception /*w  w w. j a  v  a  2s .co m*/
 */
@Test
public void testSubmit() throws Exception {
    Configuration jobConfig = new Configuration(false);

    Configuration hadoopConfig = mrCmds.getHadoopConfiguration();

    FsShell shell = new FsShell(hadoopConfig);
    List<String> argv = new ArrayList<String>();
    argv.add("-rmr");
    argv.add("/tmp/wc-input");
    shell.run(argv.toArray(new String[0]));

    argv = new ArrayList<String>();
    argv.add("-put");
    File f = new File("src/test/resources/wordcount-conf.xml");
    argv.add(f.getAbsolutePath());
    argv.add("/tmp/wc-input/");
    shell.run(argv.toArray(new String[0]));

    argv = new ArrayList<String>();
    argv.add("-rmr");
    argv.add("/tmp/wc-output");
    shell.run(argv.toArray(new String[0]));

    String hadoopFsName = hadoopConfig.get("fs.default.name");
    String hadoopJT = hadoopConfig.get("mapred.job.tracker");
    File jarFile = new File(hadoopExampleJarFile);

    jobConfig.set("fs.default.name", hadoopFsName);
    jobConfig.set("mapred.job.tracker", hadoopJT);
    jobConfig.set("mapred.jar", jarFile.getAbsolutePath());
    jobConfig.set("mapred.input.dir", "/tmp/wc-input");
    jobConfig.set("mapred.output.dir", "/tmp/wc-output");
    jobConfig.set("mapreduce.map.class", "org.apache.hadoop.examples.WordCount.TokenizerMapper");
    jobConfig.set("mapreduce.reduce.class", "org.apache.hadoop.examples.WordCount.IntSumReducer");

    String tmpFile = "/tmp/impala-test-wordcount-conf.xml";
    try {
        jobConfig.writeXml(new FileOutputStream(new File(tmpFile)));
    } catch (Exception e) {
        Assert.fail("fail to write temp MR configuration file");
    }

    mrCmds.submit(tmpFile);
}

From source file:org.springframework.data.hadoop.impala.mapreduce.MapReduceCommandsTest.java

License:Apache License

@Test
public void testJar() throws Exception {
    Configuration hadoopConfig = mrCmds.getHadoopConfiguration();
    FsShell shell = new FsShell(hadoopConfig);
    List<String> argv = new ArrayList<String>();
    argv.add("-rmr");
    argv.add("/tmp/wc-input2");
    shell.run(argv.toArray(new String[0]));

    argv = new ArrayList<String>();
    argv.add("-put");
    File f = new File("src/test/resources/wordcount-conf.xml");
    argv.add(f.getAbsolutePath());/*  www . j  av  a2s .  c om*/
    argv.add("/tmp/wc-input2/");
    shell.run(argv.toArray(new String[0]));

    argv = new ArrayList<String>();
    argv.add("-rmr");
    argv.add("/tmp/wc-output2");
    shell.run(argv.toArray(new String[0]));

    File jarFile = new File(hadoopExampleJarFile);
    mrCmds.jar(jarFile.getAbsolutePath(), "org.apache.hadoop.examples.WordCount",
            "/tmp/wc-input2 /tmp/wc-output2");
}