List of usage examples for org.apache.hadoop.fs FsShell run
@Override public int run(String argv[]) throws Exception
From source file:org.apache.rya.accumulo.mr.merge.CopyTool.java
License:Apache License
/** * Imports the files that hold the table data into the child instance. * @param childTableName the name of the child table to import. * @throws Exception//from w ww . j ava2 s .c om */ public void importFilesToChildTable(final String childTableName) throws Exception { final Configuration childConfig = MergeToolMapper.getChildConfig(conf); final AccumuloRdfConfiguration childAccumuloRdfConfiguration = new AccumuloRdfConfiguration(childConfig); childAccumuloRdfConfiguration.setTablePrefix(childTablePrefix); final Connector childConnector = AccumuloRyaUtils.setupConnector(childAccumuloRdfConfiguration); final TableOperations childTableOperations = childConnector.tableOperations(); final Path localWorkDir = getPath(localCopyFileImportDir, childTableName); final Path hdfsBaseWorkDir = getPath(baseImportDir, childTableName); final FileSystem fs = FileSystem.get(conf); if (fs.exists(hdfsBaseWorkDir)) { fs.delete(hdfsBaseWorkDir, true); } log.info("Importing from the local directory: " + localWorkDir); log.info("Importing to the HDFS directory: " + hdfsBaseWorkDir); copyLocalToHdfs(localWorkDir, hdfsBaseWorkDir); final Path files = getPath(hdfsBaseWorkDir.toString(), "files"); final Path failures = getPath(hdfsBaseWorkDir.toString(), "failures"); // With HDFS permissions on, we need to make sure the Accumulo user can read/move the files final FsShell shell = new FsShell(conf); shell.run(new String[] { "-chmod", "777", hdfsBaseWorkDir.toString() }); if (fs.exists(failures)) { fs.delete(failures, true); } fs.mkdirs(failures); childTableOperations.importDirectory(childTableName, files.toString(), failures.toString(), false); }
From source file:org.jd.copier.mapred.DistCp.java
License:Apache License
/** Delete the dst files/dirs which do not exist in src */ static private void deleteNonexisting(FileSystem dstfs, FileStatus dstroot, Path dstsorted, FileSystem jobfs, Path jobdir, JobConf jobconf, Configuration conf) throws IOException { if (!dstroot.isDir()) { throw new IOException("dst must be a directory when option " + Options.DELETE.cmd + " is set, but dst (= " + dstroot.getPath() + ") is not a directory."); }/* w ww. j ava2 s . co m*/ //write dst lsr results final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr"); final SequenceFile.Writer writer = SequenceFile.createWriter(jobfs, jobconf, dstlsr, Text.class, dstroot.getClass(), SequenceFile.CompressionType.NONE); try { //do lsr to get all file statuses in dstroot final Stack<FileStatus> lsrstack = new Stack<FileStatus>(); for (lsrstack.push(dstroot); !lsrstack.isEmpty();) { final FileStatus status = lsrstack.pop(); if (status.isDir()) { for (FileStatus child : dstfs.listStatus(status.getPath())) { String relative = makeRelative(dstroot.getPath(), child.getPath()); writer.append(new Text(relative), child); lsrstack.push(child); } } } } finally { checkAndClose(writer); } //sort lsr results final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted"); SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs, new Text.Comparator(), Text.class, FileStatus.class, jobconf); sorter.sort(dstlsr, sortedlsr); //compare lsr list and dst list SequenceFile.Reader lsrin = null; SequenceFile.Reader dstin = null; try { lsrin = new SequenceFile.Reader(jobfs, sortedlsr, jobconf); dstin = new SequenceFile.Reader(jobfs, dstsorted, jobconf); //compare sorted lsr list and sorted dst list final Text lsrpath = new Text(); final FileStatus lsrstatus = new FileStatus(); final Text dstpath = new Text(); final Text dstfrom = new Text(); final FsShell shell = new FsShell(conf); final String[] shellargs = { "-rmr", null }; boolean hasnext = dstin.next(dstpath, dstfrom); for (; lsrin.next(lsrpath, lsrstatus);) { int dst_cmp_lsr = dstpath.compareTo(lsrpath); for (; hasnext && dst_cmp_lsr < 0;) { hasnext = dstin.next(dstpath, dstfrom); dst_cmp_lsr = dstpath.compareTo(lsrpath); } if (dst_cmp_lsr == 0) { //lsrpath exists in dst, skip it hasnext = dstin.next(dstpath, dstfrom); } else { //lsrpath does not exist, delete it String s = new Path(dstroot.getPath(), lsrpath.toString()).toString(); if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) { shellargs[1] = s; int r = 0; try { r = shell.run(shellargs); } catch (Exception e) { throw new IOException("Exception from shell.", e); } if (r != 0) { throw new IOException( "\"" + shellargs[0] + " " + shellargs[1] + "\" returns non-zero value " + r); } } } } } finally { checkAndClose(lsrin); checkAndClose(dstin); } }
From source file:org.oclc.firefly.hadoop.backup.Backup.java
License:Apache License
/** * Performs a complete copy of the source hbase to the given destination * @param tables The names of the tables to backup * @param maxTries The maximum number of times to try to copy regions. * @return True if successful, false otherwise * @throws IOException If failed to interact with Hadoop * @throws ClassNotFoundException // w w w . j a va2 s . co m * @throws InterruptedException */ public boolean doMajorCopy(String[] tables, int maxTries) throws IOException, InterruptedException, ClassNotFoundException { boolean ret = false; String username = getUsername(); short replication = (short) getInitialReplication(); // Get a list of regions from HBase // Then filter out the regions we are not extracting, and group them by table List<CatalogRow> regions = getHBaseRegions(srcConf); Map<String, List<CatalogRow>> filtered = groupAndFilter(regions, tables); List<Pair<String, HRegionInfo>> mapperInput = new ArrayList<Pair<String, HRegionInfo>>(); // Prepare the input for the mappers to use // This creates a list of region server and region pairs LOG.info("Exporting the following tables:"); for (Entry<String, List<CatalogRow>> entry : filtered.entrySet()) { String tablename = entry.getKey(); List<CatalogRow> rows = entry.getValue(); LOG.info(". " + tablename); for (CatalogRow r : rows) { String regionServer = r.getHost() + ":" + r.getPort(); HRegionInfo region = r.getHRegionInfo(); mapperInput.add(Pair.newPair(regionServer, region)); } } // Make sure we write to a directory that does not exist backupDirectoryPath = createBackupDirectory(getCurrentDateString()); LOG.info("Starting backup path: " + backupDirectoryPath); // Copy the .tableinfo files for the tables we are extracting // These files are not copied by the MR job as it only focuses on regions List<FileStatus> tableInfoFiles = getTableInfoFiles(srcFs, filtered); for (FileStatus file : tableInfoFiles) { Path srcFilePath = file.getPath(); Path relPath = new Path(BackupUtils.getFsRelativePath(srcFs, srcFilePath)); Path dstFilePath = new Path(backupDirectoryPath.toString() + relPath.toString()); BackupUtils.copy(srcFs, srcFilePath, dstFs, dstFilePath, buffer, username, replication); } // Dispatch MR job and monitor // Retry regions if necessary if (mapperInput.size() > 0) { int tries = 0; while (!ret && (maxTries == 0 || tries < maxTries)) { if (getNumMapTasks() > mapperInput.size()) { setNumMapTasks(mapperInput.size()); LOG.info("Not enough regions. Reducing number of map tasks"); } // Generate a list of mapper input files and create job List<Path> sourceFiles = createMapperInputSequenceFiles(mapperInput, getNumMapTasks(), srcFs, tries); Job job = createMRJob(srcConf, dstConf, sourceFiles, backupDirectoryPath, tries); LOG.info(job.getJobName()); LOG.info("--------------------------------------------------"); LOG.info("Number of regions : " + mapperInput.size()); LOG.info("Number of map tasks: " + getNumMapTasks()); LOG.info("Mapper input path : " + getMapInputDirectory(tries)); LOG.info("Mapper output path : " + FileOutputFormat.getOutputPath(job)); LOG.info("--------------------------------------------------"); job.waitForCompletion(true); if (job.isSuccessful()) { // Check if any regions failed Counters counters = job.getCounters(); Counter failedCounter = counters.findCounter("Backup", "FailedRegions"); long failed = failedCounter.getValue(); if (failed > 0) { LOG.info("Number of failed regions: " + failed + "."); // get a fresh list of regions to copy List<Pair<String, HRegionInfo>> failedRegions = getFailedRegions(srcFs, srcConf, tries); addCopiedRegions(mapperInput, failedRegions); mapperInput = getRemainingRegions(mapperInput, tables); for (Pair<String, HRegionInfo> pair : mapperInput) { LOG.info("Retry: " + pair.getSecond()); } if (mapperInput.size() == 0) { ret = true; backupDirectoryPath = appendEndTime(backupDirectoryPath); LOG.warn("No regions left to copy, but expected to copy more. " + "Please inspect logs/files manually for errors"); } } else { ret = true; addCopiedRegions(mapperInput, null); backupDirectoryPath = appendEndTime(backupDirectoryPath); LOG.info("MR job finished successfully"); } } else { LOG.error("An unexpected error occurred during the MR job. Please see MR logs."); break; } tries++; } if (ret) { if (verifyCopiedRegions()) { LOG.info("Verification passed succesfully"); } else { ret = false; LOG.info("Verification failed. Please inspect errors manually"); } } else { LOG.info("No attempts left. Try setting -n to a higher value, or setting it to 0"); } } if (ret) { // Set replication factor of backup directory to default. // This may not be the best solution, but let built-in shell take care of it // because it can do it recursively with out us having to rediscover all the files short finalReplication = (short) getFinalReplication(); if (replication != finalReplication) { FsShell shell = new FsShell(dstConf); String[] repArgs = { "-setrep", "-R", "-w", "" + finalReplication, backupDirectoryPath.toString() }; try { LOG.info("Setting final replication factor of backup files to " + finalReplication); shell.run(repArgs); } catch (Exception e) { LOG.warn("Could not set replication factor of backup files to " + finalReplication); } } } return ret; }
From source file:org.springframework.data.hadoop.impala.mapreduce.MapReduceCommandsTest.java
License:Apache License
/** * Test method for {@link org.springframework.data.hadoop.impala.mapreduce.MapReduceCommands#submit(java.lang.String)}. * @throws Exception /*w w w. j a v a 2s .co m*/ */ @Test public void testSubmit() throws Exception { Configuration jobConfig = new Configuration(false); Configuration hadoopConfig = mrCmds.getHadoopConfiguration(); FsShell shell = new FsShell(hadoopConfig); List<String> argv = new ArrayList<String>(); argv.add("-rmr"); argv.add("/tmp/wc-input"); shell.run(argv.toArray(new String[0])); argv = new ArrayList<String>(); argv.add("-put"); File f = new File("src/test/resources/wordcount-conf.xml"); argv.add(f.getAbsolutePath()); argv.add("/tmp/wc-input/"); shell.run(argv.toArray(new String[0])); argv = new ArrayList<String>(); argv.add("-rmr"); argv.add("/tmp/wc-output"); shell.run(argv.toArray(new String[0])); String hadoopFsName = hadoopConfig.get("fs.default.name"); String hadoopJT = hadoopConfig.get("mapred.job.tracker"); File jarFile = new File(hadoopExampleJarFile); jobConfig.set("fs.default.name", hadoopFsName); jobConfig.set("mapred.job.tracker", hadoopJT); jobConfig.set("mapred.jar", jarFile.getAbsolutePath()); jobConfig.set("mapred.input.dir", "/tmp/wc-input"); jobConfig.set("mapred.output.dir", "/tmp/wc-output"); jobConfig.set("mapreduce.map.class", "org.apache.hadoop.examples.WordCount.TokenizerMapper"); jobConfig.set("mapreduce.reduce.class", "org.apache.hadoop.examples.WordCount.IntSumReducer"); String tmpFile = "/tmp/impala-test-wordcount-conf.xml"; try { jobConfig.writeXml(new FileOutputStream(new File(tmpFile))); } catch (Exception e) { Assert.fail("fail to write temp MR configuration file"); } mrCmds.submit(tmpFile); }
From source file:org.springframework.data.hadoop.impala.mapreduce.MapReduceCommandsTest.java
License:Apache License
@Test public void testJar() throws Exception { Configuration hadoopConfig = mrCmds.getHadoopConfiguration(); FsShell shell = new FsShell(hadoopConfig); List<String> argv = new ArrayList<String>(); argv.add("-rmr"); argv.add("/tmp/wc-input2"); shell.run(argv.toArray(new String[0])); argv = new ArrayList<String>(); argv.add("-put"); File f = new File("src/test/resources/wordcount-conf.xml"); argv.add(f.getAbsolutePath());/* www . j av a2s . c om*/ argv.add("/tmp/wc-input2/"); shell.run(argv.toArray(new String[0])); argv = new ArrayList<String>(); argv.add("-rmr"); argv.add("/tmp/wc-output2"); shell.run(argv.toArray(new String[0])); File jarFile = new File(hadoopExampleJarFile); mrCmds.jar(jarFile.getAbsolutePath(), "org.apache.hadoop.examples.WordCount", "/tmp/wc-input2 /tmp/wc-output2"); }