List of usage examples for org.apache.hadoop.fs FileSystem copyFromLocalFile
public void copyFromLocalFile(Path src, Path dst) throws IOException
From source file:org.apache.nutch.api.resources.SeedResource.java
License:Apache License
@POST @Path("/create") @Consumes(MediaType.APPLICATION_JSON)//from w w w .j a v a 2s . co m /** * Method creates seed list file and returns temorary directory path * @param seedList * @return */ public String createSeedFile(SeedList seedList) { if (seedList == null) { throw new WebApplicationException( Response.status(Status.BAD_REQUEST).entity("Seed list cannot be empty!").build()); } File seedFile = createSeedFile(); BufferedWriter writer = getWriter(seedFile); Collection<SeedUrl> seedUrls = seedList.getSeedUrls(); if (CollectionUtils.isNotEmpty(seedUrls)) { for (SeedUrl seedUrl : seedUrls) { writeUrl(writer, seedUrl); } } try { Configuration conf = configManager.get(ConfManager.DEFAULT); String fsName = conf.get("fs.default.name"); LOG.info("fs.default.name : " + fsName); if (fsName.contains("hdfs")) { LOG.info("Running under hadoop distributed file system"); FileSystem fs = FileSystem.get(conf); org.apache.hadoop.fs.Path path = new org.apache.hadoop.fs.Path(seedFile.getAbsolutePath()); fs.copyFromLocalFile(path, path); } else { LOG.info("Running under local file system"); } } catch (IOException e) { LOG.error(e.toString()); } return seedFile.getParent(); }
From source file:org.apache.oozie.action.hadoop.TestLauncher.java
License:Apache License
private RunningJob _test(String... arg) throws Exception { Path actionDir = getFsTestCaseDir(); File jar = IOUtils.createJar(new File(getTestCaseDir()), "launcher.jar", LauncherMapper.class, LauncherMainException.class, LauncherSecurityManager.class, LauncherException.class, LauncherMainTester.class); FileSystem fs = getFileSystem(); Path launcherJar = new Path(actionDir, "launcher.jar"); fs.copyFromLocalFile(new Path(jar.toString()), launcherJar); JobConf jobConf = Services.get().get(HadoopAccessorService.class) .createJobConf(new URI(getNameNodeUri()).getAuthority()); // jobConf.setJar(jar.getAbsolutePath()); jobConf.set("user.name", getTestUser()); jobConf.setInt("mapred.map.tasks", 1); jobConf.setInt("mapred.map.max.attempts", 1); jobConf.setInt("mapred.reduce.max.attempts", 1); jobConf.set("mapreduce.framework.name", "yarn"); jobConf.set("mapred.job.tracker", getJobTrackerUri()); jobConf.set("fs.default.name", getNameNodeUri()); LauncherMapperHelper.setupMainClass(jobConf, LauncherMainTester.class.getName()); LauncherMapperHelper.setupMainArguments(jobConf, arg); Configuration actionConf = new XConfiguration(); LauncherMapperHelper.setupLauncherInfo(jobConf, "1", "1@a", actionDir, "1@a-0", actionConf, ""); LauncherMapperHelper.setupYarnRestartHandling(jobConf, jobConf, "1@a", System.currentTimeMillis()); assertEquals("1", actionConf.get("oozie.job.id")); assertEquals("1@a", actionConf.get("oozie.action.id")); DistributedCache.addFileToClassPath(new Path(launcherJar.toUri().getPath()), jobConf); JobClient jobClient = createJobClient(); final RunningJob runningJob = jobClient.submitJob(jobConf); System.out.println("Action Dir: " + actionDir); System.out.println("LauncherMapper ID: " + runningJob.getJobID().toString()); waitFor(180 * 1000, new Predicate() { public boolean evaluate() throws Exception { return runningJob.isComplete(); }/*from w w w .ja v a2s. c o m*/ }); assertTrue(jobConf.get("oozie.action.prepare.xml").equals("")); return runningJob; }
From source file:org.apache.oozie.service.ShareLibService.java
License:Apache License
/** * Copy jar containing classes./*from ww w . j a va2 s .c om*/ * * @param classes the classes * @param fs the FileSystem * @param executorDir is Path * @param type is sharelib key * @throws IOException Signals that an I/O exception has occurred. */ private void copyJarContainingClasses(List<Class> classes, FileSystem fs, Path executorDir, String type) throws IOException { fs.mkdirs(executorDir); Set<String> localJarSet = new HashSet<String>(); for (Class c : classes) { String localJar = findContainingJar(c); if (localJar != null) { localJarSet.add(localJar); } else { throw new IOException("No jar containing " + c + " found"); } } List<Path> listOfPaths = new ArrayList<Path>(); for (String localJarStr : localJarSet) { File localJar = new File(localJarStr); fs.copyFromLocalFile(new Path(localJar.getPath()), executorDir); Path path = new Path(executorDir, localJar.getName()); listOfPaths.add(path); LOG.info(localJar.getName() + " uploaded to " + executorDir.toString()); } launcherLibMap.put(type, listOfPaths); }
From source file:org.apache.oozie.tools.OozieSharelibCLI.java
License:Apache License
private List<Future<Void>> copyFolderRecursively(final FileSystem fs, final ExecutorService threadPool, File srcFile, final Path dstPath) throws IOException { List<Future<Void>> taskList = new ArrayList<Future<Void>>(); for (final File file : srcFile.listFiles()) { final Path trgName = new Path(dstPath, file.getName()); if (file.isDirectory()) { taskList.addAll(copyFolderRecursively(fs, threadPool, file, trgName)); } else {//from w w w.ja v a2 s. co m taskList.add(threadPool.submit(new Callable<Void>() { @Override public Void call() throws Exception { fs.copyFromLocalFile(new Path(file.toURI()), trgName); return null; } })); } } return taskList; }
From source file:org.apache.pig.backend.hadoop.executionengine.mapreduceExec.MapReduceLauncher.java
License:Apache License
private static void setupDistributedCache(PigContext pigContext, Configuration conf, Properties properties, String key, boolean shipToCluster) throws IOException { // Turn on the symlink feature DistributedCache.createSymlink(conf); // Set up the DistributedCache for this job String fileNames = properties.getProperty(key); if (fileNames != null) { String[] paths = fileNames.split(","); for (String path : paths) { path = path.trim();//from www. j a v a 2 s .co m if (path.length() != 0) { Path src = new Path(path); // Ensure that 'src' is a valid URI URI srcURI = null; try { srcURI = new URI(src.toString()); } catch (URISyntaxException ue) { throw new IOException("Invalid cache specification, " + "file doesn't exist: " + src); } // Ship it to the cluster if necessary and add to the // DistributedCache if (shipToCluster) { Path dst = new Path(FileLocalizer.getTemporaryPath(null, pigContext).toString()); FileSystem fs = dst.getFileSystem(conf); fs.copyFromLocalFile(src, dst); // Construct the dst#srcName uri for DistributedCache URI dstURI = null; try { dstURI = new URI(dst.toString() + "#" + src.getName()); } catch (URISyntaxException ue) { throw new IOException("Invalid ship specification, " + "file doesn't exist: " + dst); } DistributedCache.addCacheFile(dstURI, conf); } else { DistributedCache.addCacheFile(srcURI, conf); } } } } }
From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler.java
License:Apache License
private static void setupDistributedCache(PigContext pigContext, Configuration conf, String[] paths, boolean shipToCluster) throws IOException { // Turn on the symlink feature DistributedCache.createSymlink(conf); for (String path : paths) { path = path.trim();/* w w w . ja v a 2 s. co m*/ if (path.length() != 0) { Path src = new Path(path); // Ensure that 'src' is a valid URI URI srcURI = toURI(src); // Ship it to the cluster if necessary and add to the // DistributedCache if (shipToCluster) { Path dst = new Path(FileLocalizer.getTemporaryPath(pigContext).toString()); FileSystem fs = dst.getFileSystem(conf); fs.copyFromLocalFile(src, dst); fs.setReplication(dst, (short) conf.getInt(MRConfiguration.SUMIT_REPLICATION, 3)); // Construct the dst#srcName uri for DistributedCache URI dstURI = null; try { dstURI = new URI(dst.toString() + "#" + src.getName()); } catch (URISyntaxException ue) { byte errSrc = pigContext.getErrorSource(); int errCode = 0; switch (errSrc) { case PigException.REMOTE_ENVIRONMENT: errCode = 6004; break; case PigException.USER_ENVIRONMENT: errCode = 4004; break; default: errCode = 2037; break; } String msg = "Invalid ship specification. " + "File doesn't exist: " + dst; throw new ExecException(msg, errCode, errSrc); } addToDistributedCache(dstURI, conf); } else { addToDistributedCache(srcURI, conf); } } } }
From source file:org.apache.pig.piggybank.test.evaluation.string.TestLookupInFiles.java
License:Apache License
@Test public void testLookupInFiles() throws Exception { File tmpFile = File.createTempFile("test", ".txt"); PrintStream ps1 = new PrintStream(new FileOutputStream(tmpFile)); ps1.println("one"); ps1.println("notexist"); ps1.println("three"); ps1.close();//from w ww . j av a 2 s .c o m File lookupFile1 = File.createTempFile("lookup", ".txt"); PrintStream lps1 = new PrintStream(new FileOutputStream(lookupFile1)); lps1.println("one"); lps1.println("two"); lps1.println("three"); lps1.close(); File lookupFile2 = File.createTempFile("lookup", "txt"); PrintStream lps2 = new PrintStream(new FileOutputStream(lookupFile2)); lps2.println("one"); lps2.println("ten"); lps2.println("eleven"); lps2.close(); FileSystem fs = FileSystem .get(ConfigurationUtil.toConfiguration(pigServer.getPigContext().getProperties())); fs.copyFromLocalFile(new Path(lookupFile1.toString()), new Path("lookup1")); fs.copyFromLocalFile(new Path(lookupFile1.toString()), new Path("lookup2")); pigServer.registerQuery("A = LOAD '" + Util.generateURI(tmpFile.toString(), pigServer.getPigContext()) + "' AS (key:chararray);"); pigServer.registerQuery( "B = FOREACH A GENERATE org.apache.pig.piggybank.evaluation.string.LookupInFiles(key, 'lookup1', 'lookup2');"); Iterator<Tuple> iter = pigServer.openIterator("B"); int r = (Integer) iter.next().get(0); assertTrue(r == 1); r = (Integer) iter.next().get(0); assertTrue(r == 0); }
From source file:org.apache.pig.test.TestPigServer.java
License:Apache License
@Test public void testRegisterRemoteScript() throws Throwable { String scriptName = "script.py"; File scriptFile = File.createTempFile("tmp", ""); PrintWriter pw = new PrintWriter(new FileWriter(scriptFile)); pw.println("@outputSchema(\"word:chararray\")\ndef helloworld():\n return 'Hello, World'"); pw.close();/*from ww w . ja v a2 s. c o m*/ FileSystem fs = cluster.getFileSystem(); fs.copyFromLocalFile(new Path(scriptFile.getAbsolutePath()), new Path(scriptName)); // find the absolute path for the directory so that it does not // depend on configuration String absPath = fs.getFileStatus(new Path(scriptName)).getPath().toString(); Util.createInputFile(cluster, "testRegisterRemoteScript_input", new String[] { "1", "2" }); PigServer pig = new PigServer(cluster.getExecType(), properties); pig.registerCode(absPath, "jython", "pig"); pig.registerQuery("a = load 'testRegisterRemoteScript_input';"); pig.registerQuery("b = foreach a generate pig.helloworld($0);"); Iterator<Tuple> iter = pig.openIterator("b"); assertTrue(iter.hasNext()); Tuple t = iter.next(); assertTrue(t.size() > 0); assertEquals("Hello, World", t.get(0)); assertTrue(iter.hasNext()); t = iter.next(); assertTrue(t.size() > 0); assertEquals("Hello, World", t.get(0)); assertFalse(iter.hasNext()); }
From source file:org.apache.pig.test.TestPigServerWithMacros.java
License:Apache License
@Test public void testRegisterRemoteMacro() throws Throwable { PigServer pig = new PigServer(cluster.getExecType(), cluster.getProperties()); String macroName = "util.pig"; File macroFile = File.createTempFile("tmp", ""); PrintWriter pw = new PrintWriter(new FileWriter(macroFile)); pw.println("DEFINE row_count(X) RETURNS Z { Y = group $X all; $Z = foreach Y generate COUNT($X); };"); pw.close();//from w ww. j av a 2 s . co m FileSystem fs = cluster.getFileSystem(); fs.copyFromLocalFile(new Path(macroFile.getAbsolutePath()), new Path(macroName)); // find the absolute path for the directory so that it does not // depend on configuration String absPath = fs.getFileStatus(new Path(macroName)).getPath().toString(); Util.createInputFile(cluster, "testRegisterRemoteMacro_input", new String[] { "1", "2" }); pig.registerQuery("import '" + absPath + "';"); pig.registerQuery("a = load 'testRegisterRemoteMacro_input';"); pig.registerQuery("b = row_count(a);"); Iterator<Tuple> iter = pig.openIterator("b"); assertEquals(2L, ((Long) iter.next().get(0)).longValue()); pig.shutdown(); }
From source file:org.apache.reef.runtime.mesos.driver.REEFScheduler.java
License:Apache License
private String getReefTarUri(final String jobIdentifier) { try {// w ww . j a v a2 s .c om // Create REEF_TAR final FileOutputStream fileOutputStream = new FileOutputStream(REEF_TAR); final TarArchiveOutputStream tarArchiveOutputStream = new TarArchiveOutputStream( new GZIPOutputStream(fileOutputStream)); final File globalFolder = new File(this.fileNames.getGlobalFolderPath()); final DirectoryStream<Path> directoryStream = Files.newDirectoryStream(globalFolder.toPath()); for (final Path path : directoryStream) { tarArchiveOutputStream.putArchiveEntry( new TarArchiveEntry(path.toFile(), globalFolder + "/" + path.getFileName())); final BufferedInputStream bufferedInputStream = new BufferedInputStream( new FileInputStream(path.toFile())); IOUtils.copy(bufferedInputStream, tarArchiveOutputStream); bufferedInputStream.close(); tarArchiveOutputStream.closeArchiveEntry(); } directoryStream.close(); tarArchiveOutputStream.close(); fileOutputStream.close(); // Upload REEF_TAR to HDFS final FileSystem fileSystem = FileSystem.get(new Configuration()); final org.apache.hadoop.fs.Path src = new org.apache.hadoop.fs.Path(REEF_TAR); final String reefTarUri = fileSystem.getUri().toString() + "/" + jobIdentifier + "/" + REEF_TAR; final org.apache.hadoop.fs.Path dst = new org.apache.hadoop.fs.Path(reefTarUri); fileSystem.copyFromLocalFile(src, dst); return reefTarUri; } catch (IOException e) { throw new RuntimeException(e); } }