Example usage for org.apache.hadoop.fs FsShell run

List of usage examples for org.apache.hadoop.fs FsShell run


In this page you can find the example usage for org.apache.hadoop.fs FsShell run.


public int run(String argv[]) throws Exception 

Source Link




From source file:org.apache.rya.accumulo.mr.merge.CopyTool.java

License:Apache License

 * Imports the files that hold the table data into the child instance.
 * @param childTableName the name of the child table to import.
 * @throws Exception//from  w  ww .  j  ava2 s .c om
public void importFilesToChildTable(final String childTableName) throws Exception {
    final Configuration childConfig = MergeToolMapper.getChildConfig(conf);
    final AccumuloRdfConfiguration childAccumuloRdfConfiguration = new AccumuloRdfConfiguration(childConfig);
    final Connector childConnector = AccumuloRyaUtils.setupConnector(childAccumuloRdfConfiguration);
    final TableOperations childTableOperations = childConnector.tableOperations();

    final Path localWorkDir = getPath(localCopyFileImportDir, childTableName);
    final Path hdfsBaseWorkDir = getPath(baseImportDir, childTableName);

    final FileSystem fs = FileSystem.get(conf);
    if (fs.exists(hdfsBaseWorkDir)) {
        fs.delete(hdfsBaseWorkDir, true);

    log.info("Importing from the local directory: " + localWorkDir);
    log.info("Importing to the HDFS directory: " + hdfsBaseWorkDir);
    copyLocalToHdfs(localWorkDir, hdfsBaseWorkDir);

    final Path files = getPath(hdfsBaseWorkDir.toString(), "files");
    final Path failures = getPath(hdfsBaseWorkDir.toString(), "failures");

    // With HDFS permissions on, we need to make sure the Accumulo user can read/move the files
    final FsShell shell = new FsShell(conf);
    shell.run(new String[] { "-chmod", "777", hdfsBaseWorkDir.toString() });
    if (fs.exists(failures)) {
        fs.delete(failures, true);

    childTableOperations.importDirectory(childTableName, files.toString(), failures.toString(), false);

From source file:org.jd.copier.mapred.DistCp.java

License:Apache License

/** Delete the dst files/dirs which do not exist in src */
static private void deleteNonexisting(FileSystem dstfs, FileStatus dstroot, Path dstsorted, FileSystem jobfs,
        Path jobdir, JobConf jobconf, Configuration conf) throws IOException {
    if (!dstroot.isDir()) {
        throw new IOException("dst must be a directory when option " + Options.DELETE.cmd
                + " is set, but dst (= " + dstroot.getPath() + ") is not a directory.");
    }/* w  ww. j  ava2 s  .  co m*/

    //write dst lsr results
    final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr");
    final SequenceFile.Writer writer = SequenceFile.createWriter(jobfs, jobconf, dstlsr, Text.class,
            dstroot.getClass(), SequenceFile.CompressionType.NONE);
    try {
        //do lsr to get all file statuses in dstroot
        final Stack<FileStatus> lsrstack = new Stack<FileStatus>();
        for (lsrstack.push(dstroot); !lsrstack.isEmpty();) {
            final FileStatus status = lsrstack.pop();
            if (status.isDir()) {
                for (FileStatus child : dstfs.listStatus(status.getPath())) {
                    String relative = makeRelative(dstroot.getPath(), child.getPath());
                    writer.append(new Text(relative), child);
    } finally {

    //sort lsr results
    final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted");
    SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs, new Text.Comparator(), Text.class,
            FileStatus.class, jobconf);
    sorter.sort(dstlsr, sortedlsr);

    //compare lsr list and dst list  
    SequenceFile.Reader lsrin = null;
    SequenceFile.Reader dstin = null;
    try {
        lsrin = new SequenceFile.Reader(jobfs, sortedlsr, jobconf);
        dstin = new SequenceFile.Reader(jobfs, dstsorted, jobconf);

        //compare sorted lsr list and sorted dst list
        final Text lsrpath = new Text();
        final FileStatus lsrstatus = new FileStatus();
        final Text dstpath = new Text();
        final Text dstfrom = new Text();
        final FsShell shell = new FsShell(conf);
        final String[] shellargs = { "-rmr", null };

        boolean hasnext = dstin.next(dstpath, dstfrom);
        for (; lsrin.next(lsrpath, lsrstatus);) {
            int dst_cmp_lsr = dstpath.compareTo(lsrpath);
            for (; hasnext && dst_cmp_lsr < 0;) {
                hasnext = dstin.next(dstpath, dstfrom);
                dst_cmp_lsr = dstpath.compareTo(lsrpath);

            if (dst_cmp_lsr == 0) {
                //lsrpath exists in dst, skip it
                hasnext = dstin.next(dstpath, dstfrom);
            } else {
                //lsrpath does not exist, delete it
                String s = new Path(dstroot.getPath(), lsrpath.toString()).toString();
                if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) {
                    shellargs[1] = s;
                    int r = 0;
                    try {
                        r = shell.run(shellargs);
                    } catch (Exception e) {
                        throw new IOException("Exception from shell.", e);
                    if (r != 0) {
                        throw new IOException(
                                "\"" + shellargs[0] + " " + shellargs[1] + "\" returns non-zero value " + r);
    } finally {

From source file:org.oclc.firefly.hadoop.backup.Backup.java

License:Apache License

 * Performs a complete copy of the source hbase to the given destination
 * @param tables The names of the tables to backup
 * @param maxTries The maximum number of times to try to copy regions.
 * @return True if successful, false otherwise
 * @throws IOException If failed to interact with Hadoop
 * @throws ClassNotFoundException //  w  w  w  .  j  a va2 s . co  m
 * @throws InterruptedException 
public boolean doMajorCopy(String[] tables, int maxTries)
        throws IOException, InterruptedException, ClassNotFoundException {
    boolean ret = false;
    String username = getUsername();
    short replication = (short) getInitialReplication();

    // Get a list of regions from HBase
    // Then filter out the regions we are not extracting, and group them by table
    List<CatalogRow> regions = getHBaseRegions(srcConf);
    Map<String, List<CatalogRow>> filtered = groupAndFilter(regions, tables);
    List<Pair<String, HRegionInfo>> mapperInput = new ArrayList<Pair<String, HRegionInfo>>();

    // Prepare the input for the mappers to use
    // This creates a list of region server and region pairs
    LOG.info("Exporting the following tables:");
    for (Entry<String, List<CatalogRow>> entry : filtered.entrySet()) {
        String tablename = entry.getKey();
        List<CatalogRow> rows = entry.getValue();

        LOG.info(". " + tablename);

        for (CatalogRow r : rows) {
            String regionServer = r.getHost() + ":" + r.getPort();
            HRegionInfo region = r.getHRegionInfo();
            mapperInput.add(Pair.newPair(regionServer, region));

    // Make sure we write to a directory that does not exist
    backupDirectoryPath = createBackupDirectory(getCurrentDateString());
    LOG.info("Starting backup path: " + backupDirectoryPath);

    // Copy the .tableinfo files for the tables we are extracting
    // These files are not copied by the MR job as it only focuses on regions
    List<FileStatus> tableInfoFiles = getTableInfoFiles(srcFs, filtered);
    for (FileStatus file : tableInfoFiles) {
        Path srcFilePath = file.getPath();
        Path relPath = new Path(BackupUtils.getFsRelativePath(srcFs, srcFilePath));
        Path dstFilePath = new Path(backupDirectoryPath.toString() + relPath.toString());
        BackupUtils.copy(srcFs, srcFilePath, dstFs, dstFilePath, buffer, username, replication);

    // Dispatch MR job and monitor
    // Retry regions if necessary
    if (mapperInput.size() > 0) {
        int tries = 0;

        while (!ret && (maxTries == 0 || tries < maxTries)) {
            if (getNumMapTasks() > mapperInput.size()) {
                LOG.info("Not enough regions. Reducing number of map tasks");

            // Generate a list of mapper input files and create job
            List<Path> sourceFiles = createMapperInputSequenceFiles(mapperInput, getNumMapTasks(), srcFs,
            Job job = createMRJob(srcConf, dstConf, sourceFiles, backupDirectoryPath, tries);

            LOG.info("Number of regions  : " + mapperInput.size());
            LOG.info("Number of map tasks: " + getNumMapTasks());
            LOG.info("Mapper input path  : " + getMapInputDirectory(tries));
            LOG.info("Mapper output path : " + FileOutputFormat.getOutputPath(job));

            if (job.isSuccessful()) {
                // Check if any regions failed
                Counters counters = job.getCounters();
                Counter failedCounter = counters.findCounter("Backup", "FailedRegions");
                long failed = failedCounter.getValue();

                if (failed > 0) {
                    LOG.info("Number of failed regions: " + failed + ".");

                    // get a fresh list of regions to copy
                    List<Pair<String, HRegionInfo>> failedRegions = getFailedRegions(srcFs, srcConf, tries);
                    addCopiedRegions(mapperInput, failedRegions);
                    mapperInput = getRemainingRegions(mapperInput, tables);

                    for (Pair<String, HRegionInfo> pair : mapperInput) {
                        LOG.info("Retry: " + pair.getSecond());

                    if (mapperInput.size() == 0) {
                        ret = true;
                        backupDirectoryPath = appendEndTime(backupDirectoryPath);

                        LOG.warn("No regions left to copy, but expected to copy more. "
                                + "Please inspect logs/files manually for errors");
                } else {
                    ret = true;

                    addCopiedRegions(mapperInput, null);
                    backupDirectoryPath = appendEndTime(backupDirectoryPath);
                    LOG.info("MR job finished successfully");
            } else {
                LOG.error("An unexpected error occurred during the MR job. Please see MR logs.");


        if (ret) {
            if (verifyCopiedRegions()) {
                LOG.info("Verification passed succesfully");
            } else {
                ret = false;
                LOG.info("Verification failed. Please inspect errors manually");
        } else {
            LOG.info("No attempts left. Try setting -n to a higher value, or setting it to 0");

    if (ret) {
        // Set replication factor of backup directory to default.
        // This may not be the best solution, but let built-in shell take care of it
        // because it can do it recursively with out us having to rediscover all the files
        short finalReplication = (short) getFinalReplication();

        if (replication != finalReplication) {
            FsShell shell = new FsShell(dstConf);
            String[] repArgs = { "-setrep", "-R", "-w", "" + finalReplication, backupDirectoryPath.toString() };

            try {
                LOG.info("Setting final replication factor of backup files to " + finalReplication);
            } catch (Exception e) {
                LOG.warn("Could not set replication factor of backup files to " + finalReplication);

    return ret;

From source file:org.springframework.data.hadoop.impala.mapreduce.MapReduceCommandsTest.java

License:Apache License

 * Test method for {@link org.springframework.data.hadoop.impala.mapreduce.MapReduceCommands#submit(java.lang.String)}.
 * @throws Exception /*w  w w. j a  v  a  2s .co m*/
public void testSubmit() throws Exception {
    Configuration jobConfig = new Configuration(false);

    Configuration hadoopConfig = mrCmds.getHadoopConfiguration();

    FsShell shell = new FsShell(hadoopConfig);
    List<String> argv = new ArrayList<String>();
    shell.run(argv.toArray(new String[0]));

    argv = new ArrayList<String>();
    File f = new File("src/test/resources/wordcount-conf.xml");
    shell.run(argv.toArray(new String[0]));

    argv = new ArrayList<String>();
    shell.run(argv.toArray(new String[0]));

    String hadoopFsName = hadoopConfig.get("fs.default.name");
    String hadoopJT = hadoopConfig.get("mapred.job.tracker");
    File jarFile = new File(hadoopExampleJarFile);

    jobConfig.set("fs.default.name", hadoopFsName);
    jobConfig.set("mapred.job.tracker", hadoopJT);
    jobConfig.set("mapred.jar", jarFile.getAbsolutePath());
    jobConfig.set("mapred.input.dir", "/tmp/wc-input");
    jobConfig.set("mapred.output.dir", "/tmp/wc-output");
    jobConfig.set("mapreduce.map.class", "org.apache.hadoop.examples.WordCount.TokenizerMapper");
    jobConfig.set("mapreduce.reduce.class", "org.apache.hadoop.examples.WordCount.IntSumReducer");

    String tmpFile = "/tmp/impala-test-wordcount-conf.xml";
    try {
        jobConfig.writeXml(new FileOutputStream(new File(tmpFile)));
    } catch (Exception e) {
        Assert.fail("fail to write temp MR configuration file");


From source file:org.springframework.data.hadoop.impala.mapreduce.MapReduceCommandsTest.java

License:Apache License

public void testJar() throws Exception {
    Configuration hadoopConfig = mrCmds.getHadoopConfiguration();
    FsShell shell = new FsShell(hadoopConfig);
    List<String> argv = new ArrayList<String>();
    shell.run(argv.toArray(new String[0]));

    argv = new ArrayList<String>();
    File f = new File("src/test/resources/wordcount-conf.xml");
    argv.add(f.getAbsolutePath());/*  www . j  av  a2s .  c om*/
    shell.run(argv.toArray(new String[0]));

    argv = new ArrayList<String>();
    shell.run(argv.toArray(new String[0]));

    File jarFile = new File(hadoopExampleJarFile);
    mrCmds.jar(jarFile.getAbsolutePath(), "org.apache.hadoop.examples.WordCount",
            "/tmp/wc-input2 /tmp/wc-output2");