List of usage examples for org.apache.hadoop.fs FileSystem copyFromLocalFile
public void copyFromLocalFile(Path src, Path dst) throws IOException
From source file:org.apache.rya.accumulo.mr.merge.CopyTool.java
License:Apache License
/** * Copies the file from the local file system into the HDFS. * @param localInputPath the local system input {@link Path}. * @param hdfsOutputPath the HDFS output {@link Path}. * @param configuration the {@link Configuration} to use. * @throws IOException// w ww . ja v a 2s. c om */ public static void copyLocalToHdfs(final Path localInputPath, final Path hdfsOutputPath, final Configuration configuration) throws IOException { final FileSystem fs = FileSystem.get(configuration); fs.copyFromLocalFile(localInputPath, hdfsOutputPath); }
From source file:org.apache.solr.hadoop.hack.MiniMRClientClusterFactory.java
License:Apache License
public static MiniMRClientCluster create(Class<?> caller, String identifier, int noOfNMs, Configuration conf, File testWorkDir) throws IOException { if (conf == null) { conf = new Configuration(); }//from w ww .j av a 2s .c om FileSystem fs = FileSystem.get(conf); Path testRootDir = new Path(testWorkDir.getPath(), identifier + "-tmpDir").makeQualified(fs); Path appJar = new Path(testRootDir, "MRAppJar.jar"); // Copy MRAppJar and make it private. Path appMasterJar = new Path(MiniMRYarnCluster.APPJAR); fs.copyFromLocalFile(appMasterJar, appJar); fs.setPermission(appJar, new FsPermission("744")); Job job = Job.getInstance(conf); job.addFileToClassPath(appJar); Path callerJar = new Path(JarFinder.getJar(caller)); Path remoteCallerJar = new Path(testRootDir, callerJar.getName()); fs.copyFromLocalFile(callerJar, remoteCallerJar); fs.setPermission(remoteCallerJar, new FsPermission("744")); job.addFileToClassPath(remoteCallerJar); MiniMRYarnCluster miniMRYarnCluster; try { miniMRYarnCluster = new MiniMRYarnCluster(identifier, noOfNMs, testWorkDir); } catch (Exception e) { throw new RuntimeException(e); } job.getConfiguration().set("minimrclientcluster.caller.name", identifier); job.getConfiguration().setInt("minimrclientcluster.nodemanagers.number", noOfNMs); miniMRYarnCluster.init(job.getConfiguration()); miniMRYarnCluster.start(); return new MiniMRYarnClusterAdapter(miniMRYarnCluster, testWorkDir); }
From source file:org.apache.solr.hadoop.MorphlineBasicMiniMRTest.java
License:Apache License
@Test public void mrRun() throws Exception { FileSystem fs = dfsCluster.getFileSystem(); Path inDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/input")); fs.delete(inDir, true);/* www .j a va 2 s .co m*/ String DATADIR = "/user/testing/testMapperReducer/data"; Path dataDir = fs.makeQualified(new Path(DATADIR)); fs.delete(dataDir, true); Path outDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/output")); fs.delete(outDir, true); assertTrue(fs.mkdirs(inDir)); Path INPATH = new Path(inDir, "input.txt"); OutputStream os = fs.create(INPATH); Writer wr = new OutputStreamWriter(os, "UTF-8"); wr.write(DATADIR + "/" + inputAvroFile); wr.close(); assertTrue(fs.mkdirs(dataDir)); fs.copyFromLocalFile(new Path(DOCUMENTS_DIR, inputAvroFile), dataDir); JobConf jobConf = getJobConf(); if (ENABLE_LOCAL_JOB_RUNNER) { // enable Hadoop LocalJobRunner; this enables to run in debugger and set breakpoints jobConf.set("mapred.job.tracker", "local"); } jobConf.setMaxMapAttempts(1); jobConf.setMaxReduceAttempts(1); jobConf.setJar(SEARCH_ARCHIVES_JAR); jobConf.setBoolean(ExtractingParams.IGNORE_TIKA_EXCEPTION, false); int shards = 2; int maxReducers = Integer.MAX_VALUE; if (ENABLE_LOCAL_JOB_RUNNER) { // local job runner has a couple of limitations: only one reducer is supported and the DistributedCache doesn't work. // see http://blog.cloudera.com/blog/2009/07/advice-on-qa-testing-your-mapreduce-jobs/ maxReducers = 1; shards = 1; } String[] args = new String[] { "--morphline-file=" + RESOURCES_DIR + "/test-morphlines/solrCellDocumentTypes.conf", "--morphline-id=morphline1", "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(), "--output-dir=" + outDir.toString(), "--shards=" + shards, "--verbose", numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(), numRuns % 3 == 0 ? "--reducers=" + shards : (numRuns % 3 == 1 ? "--reducers=-1" : "--reducers=" + Math.min(8, maxReducers)) }; if (numRuns % 3 == 2) { args = concat(args, new String[] { "--fanout=2" }); } if (numRuns == 0) { // force (slow) MapReduce based randomization to get coverage for that as well args = concat(new String[] { "-D", MapReduceIndexerTool.MAIN_MEMORY_RANDOMIZATION_THRESHOLD + "=-1" }, args); } MapReduceIndexerTool tool = createTool(); int res = ToolRunner.run(jobConf, tool, args); assertEquals(0, res); Job job = tool.job; assertTrue(job.isComplete()); assertTrue(job.isSuccessful()); if (numRuns % 3 != 2) { // Only run this check if mtree merge is disabled. // With mtree merge enabled the BatchWriter counters aren't available anymore because // variable "job" now refers to the merge job rather than the indexing job assertEquals( "Invalid counter " + SolrRecordWriter.class.getName() + "." + SolrCounters.DOCUMENTS_WRITTEN, count, job.getCounters() .findCounter(SolrCounters.class.getName(), SolrCounters.DOCUMENTS_WRITTEN.toString()) .getValue()); } // Check the output is as expected outDir = new Path(outDir, MapReduceIndexerTool.RESULTS_DIR); Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(outDir)); System.out.println("outputfiles:" + Arrays.toString(outputFiles)); TestUtils.validateSolrServerDocumentCount(MINIMR_CONF_DIR, fs, outDir, count, shards); // run again with --dryrun mode: tool = createTool(); args = concat(args, new String[] { "--dry-run" }); res = ToolRunner.run(jobConf, tool, args); assertEquals(0, res); numRuns++; }
From source file:org.apache.solr.hadoop.MorphlineGoLiveMiniMRTest.java
License:Apache License
private Path upAvroFile(FileSystem fs, Path inDir, String DATADIR, Path dataDir, String localFile) throws IOException, UnsupportedEncodingException { Path INPATH = new Path(inDir, "input.txt"); OutputStream os = fs.create(INPATH); Writer wr = new OutputStreamWriter(os, "UTF-8"); wr.write(DATADIR + File.separator + localFile); wr.close();//from w w w .j a v a2 s .c o m assertTrue(fs.mkdirs(dataDir)); fs.copyFromLocalFile(new Path(DOCUMENTS_DIR, localFile), dataDir); return INPATH; }
From source file:org.apache.solr.hadoop.SolrOutputFormat.java
License:Apache License
public static void addSolrConfToDistributedCache(Job job, File solrHomeZip) throws IOException { // Make a reasonably unique name for the zip file in the distributed cache // to avoid collisions if multiple jobs are running. String hdfsZipName = UUID.randomUUID().toString() + '.' + ZIP_FILE_BASE_NAME; Configuration jobConf = job.getConfiguration(); jobConf.set(ZIP_NAME, hdfsZipName);/*from w w w . jav a 2 s .com*/ Path zipPath = new Path("/tmp", getZipName(jobConf)); FileSystem fs = FileSystem.get(jobConf); fs.copyFromLocalFile(new Path(solrHomeZip.toString()), zipPath); final URI baseZipUrl = fs.getUri().resolve(zipPath.toString() + '#' + getZipName(jobConf)); DistributedCache.addCacheArchive(baseZipUrl, jobConf); LOG.debug("Set Solr distributed cache: {}", Arrays.asList(job.getCacheArchives())); LOG.debug("Set zipPath: {}", zipPath); // Actually send the path for the configuration zip file jobConf.set(SETUP_OK, zipPath.toString()); }
From source file:org.apache.spark.tez.utils.HadoopUtils.java
License:Apache License
/** * //w w w . j a v a 2 s. c o m * @param fs * @param sourcePath * @param destPath */ private static synchronized void provisioinResourceToFs(FileSystem fs, Path sourcePath, Path destPath) { try { if (logger.isDebugEnabled()) { logger.debug("Provisioning '" + sourcePath + "' to " + destPath); } if (!fs.exists(destPath)) { fs.copyFromLocalFile(sourcePath, destPath); } else { logger.debug("Skipping provisioning of " + destPath + " since it already exists."); } } catch (IOException e) { logger.warn("Failed to copy local resource " + sourcePath + " to " + destPath, e); } }
From source file:org.apache.tajo.LocalTajoTestingUtility.java
License:Apache License
public void setup(String[] names, String[] tablepaths, Schema[] schemas, KeyValueSet option) throws Exception { LOG.info("==================================================="); LOG.info("Starting Test Cluster."); LOG.info("==================================================="); util = new TajoTestingCluster(); util.startMiniCluster(1);//from ww w . j a v a2 s . c om conf = util.getConfiguration(); client = util.newTajoClient(); FileSystem fs = util.getDefaultFileSystem(); Path rootDir = TajoConf.getWarehouseDir(conf); fs.mkdirs(rootDir); for (int i = 0; i < tablepaths.length; i++) { Path localPath = new Path(tablepaths[i]); Path tablePath = new Path(rootDir, names[i]); fs.mkdirs(tablePath); Path dfsPath = new Path(tablePath, localPath.getName()); fs.copyFromLocalFile(localPath, dfsPath); TableMeta meta = CatalogUtil.newTableMeta(CatalogProtos.StoreType.CSV, option); // Add fake table statistic data to tables. // It gives more various situations to unit tests. TableStats stats = new TableStats(); stats.setNumBytes(TPCH.tableVolumes.get(names[i])); TableDesc tableDesc = new TableDesc( CatalogUtil.buildFQName(TajoConstants.DEFAULT_DATABASE_NAME, names[i]), schemas[i], meta, tablePath.toUri()); tableDesc.setStats(stats); util.getMaster().getCatalog().createTable(tableDesc); } LOG.info("==================================================="); LOG.info("Test Cluster ready and test table created."); LOG.info("==================================================="); }
From source file:org.apache.tajo.yarn.command.LaunchCommand.java
License:Apache License
/** * @return Destinate n/* w ww .j av a2s .c om*/ */ private Path addToLocalResources(FileSystem fs, String fileSrcPath, String fileDstPath, int appId, Map<String, LocalResource> localResources, LocalResourceType type) throws IOException { String suffix = appName + "/" + appId + "/" + fileSrcPath; Path dst = new Path(fs.getHomeDirectory(), suffix); fs.copyFromLocalFile(new Path(fileSrcPath), dst); FileStatus scFileStatus = fs.getFileStatus(dst); LocalResource scRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromURI(dst.toUri()), type, LocalResourceVisibility.APPLICATION, scFileStatus.getLen(), scFileStatus.getModificationTime()); localResources.put(fileDstPath, scRsrc); return dst; }
From source file:org.apache.tez.benchmark.SessionTest.java
License:Apache License
protected Map<String, LocalResource> getLocalResources(TezConfiguration tezConf) throws IOException, URISyntaxException { Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); Path stagingDir = TezCommonUtils.getTezBaseStagingPath(tezConf); // staging dir FileSystem fs = FileSystem.get(tezConf); Path jobJar = new Path(stagingDir, "job.jar"); if (fs.exists(jobJar)) { fs.delete(jobJar, true);// ww w. j av a 2 s.co m } fs.copyFromLocalFile(getCurrentJarURL(), jobJar); localResources.put("job.jar", createLocalResource(fs, jobJar)); return localResources; }
From source file:org.apache.tez.mapreduce.examples.FilterLinesByWord.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Credentials credentials = new Credentials(); boolean generateSplitsInClient = false; SplitsInClientOptionParser splitCmdLineParser = new SplitsInClientOptionParser(); try {// w ww . j av a 2 s. c o m generateSplitsInClient = splitCmdLineParser.parse(otherArgs, false); otherArgs = splitCmdLineParser.getRemainingArgs(); } catch (ParseException e1) { System.err.println("Invalid options"); printUsage(); return 2; } if (otherArgs.length != 3) { printUsage(); return 2; } String inputPath = otherArgs[0]; String outputPath = otherArgs[1]; String filterWord = otherArgs[2]; FileSystem fs = FileSystem.get(conf); if (fs.exists(new Path(outputPath))) { System.err.println("Output directory : " + outputPath + " already exists"); return 2; } TezConfiguration tezConf = new TezConfiguration(conf); fs.getWorkingDirectory(); Path stagingDir = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString()); tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString()); TezClientUtils.ensureStagingDirExists(tezConf, stagingDir); String jarPath = ClassUtil.findContainingJar(FilterLinesByWord.class); if (jarPath == null) { throw new TezUncheckedException( "Could not find any jar containing" + FilterLinesByWord.class.getName() + " in the classpath"); } Path remoteJarPath = fs.makeQualified(new Path(stagingDir, "dag_job.jar")); fs.copyFromLocalFile(new Path(jarPath), remoteJarPath); FileStatus remoteJarStatus = fs.getFileStatus(remoteJarPath); TokenCache.obtainTokensForNamenodes(credentials, new Path[] { remoteJarPath }, conf); Map<String, LocalResource> commonLocalResources = new TreeMap<String, LocalResource>(); LocalResource dagJarLocalRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(remoteJarPath), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, remoteJarStatus.getLen(), remoteJarStatus.getModificationTime()); commonLocalResources.put("dag_job.jar", dagJarLocalRsrc); TezClient tezSession = TezClient.create("FilterLinesByWordSession", tezConf, commonLocalResources, credentials); tezSession.start(); // Why do I need to start the TezSession. Configuration stage1Conf = new JobConf(conf); stage1Conf.set(FILTER_PARAM_NAME, filterWord); Configuration stage2Conf = new JobConf(conf); stage2Conf.set(FileOutputFormat.OUTDIR, outputPath); stage2Conf.setBoolean("mapred.mapper.new-api", false); UserPayload stage1Payload = TezUtils.createUserPayloadFromConf(stage1Conf); // Setup stage1 Vertex Vertex stage1Vertex = Vertex.create("stage1", ProcessorDescriptor .create(FilterByWordInputProcessor.class.getName()).setUserPayload(stage1Payload)) .addTaskLocalFiles(commonLocalResources); DataSourceDescriptor dsd; if (generateSplitsInClient) { // TODO TEZ-1406. Dont' use MRInputLegacy stage1Conf.set(FileInputFormat.INPUT_DIR, inputPath); stage1Conf.setBoolean("mapred.mapper.new-api", false); dsd = MRInputHelpers.configureMRInputWithLegacySplitGeneration(stage1Conf, stagingDir, true); } else { dsd = MRInputLegacy.createConfigBuilder(stage1Conf, TextInputFormat.class, inputPath).groupSplits(false) .build(); } stage1Vertex.addDataSource("MRInput", dsd); // Setup stage2 Vertex Vertex stage2Vertex = Vertex.create("stage2", ProcessorDescriptor.create(FilterByWordOutputProcessor.class.getName()) .setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf)), 1); stage2Vertex.addTaskLocalFiles(commonLocalResources); // Configure the Output for stage2 OutputDescriptor od = OutputDescriptor.create(MROutput.class.getName()) .setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf)); OutputCommitterDescriptor ocd = OutputCommitterDescriptor.create(MROutputCommitter.class.getName()); stage2Vertex.addDataSink("MROutput", DataSinkDescriptor.create(od, ocd, null)); UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig .newBuilder(Text.class.getName(), TextLongPair.class.getName()).setFromConfiguration(tezConf) .build(); DAG dag = DAG.create("FilterLinesByWord"); Edge edge = Edge.create(stage1Vertex, stage2Vertex, edgeConf.createDefaultBroadcastEdgeProperty()); dag.addVertex(stage1Vertex).addVertex(stage2Vertex).addEdge(edge); LOG.info("Submitting DAG to Tez Session"); DAGClient dagClient = tezSession.submitDAG(dag); LOG.info("Submitted DAG to Tez Session"); DAGStatus dagStatus = null; String[] vNames = { "stage1", "stage2" }; try { while (true) { dagStatus = dagClient.getDAGStatus(null); if (dagStatus.getState() == DAGStatus.State.RUNNING || dagStatus.getState() == DAGStatus.State.SUCCEEDED || dagStatus.getState() == DAGStatus.State.FAILED || dagStatus.getState() == DAGStatus.State.KILLED || dagStatus.getState() == DAGStatus.State.ERROR) { break; } try { Thread.sleep(500); } catch (InterruptedException e) { // continue; } } while (dagStatus.getState() == DAGStatus.State.RUNNING) { try { ExampleDriver.printDAGStatus(dagClient, vNames); try { Thread.sleep(1000); } catch (InterruptedException e) { // continue; } dagStatus = dagClient.getDAGStatus(null); } catch (TezException e) { LOG.fatal("Failed to get application progress. Exiting"); return -1; } } dagStatus = dagClient.getDAGStatus(Sets.newHashSet(StatusGetOpts.GET_COUNTERS)); } finally { fs.delete(stagingDir, true); tezSession.stop(); } ExampleDriver.printDAGStatus(dagClient, vNames, true, true); LOG.info("Application completed. " + "FinalState=" + dagStatus.getState()); return dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1; }