List of usage examples for org.apache.hadoop.fs FileSystem makeQualified
public Path makeQualified(Path path)
From source file:org.apache.tez.runtime.TestReflectionUtils.java
License:Apache License
@Test public void testAddResourceToClasspath() throws IOException, TezException { String rsrcName = "dummyfile.xml"; FileSystem localFs = FileSystem.getLocal(new Configuration()); Path p = new Path(rsrcName); p = localFs.makeQualified(p); localFs.delete(p, false);//from w w w . j a v a 2 s. co m try { URL loadedUrl = null; loadedUrl = Thread.currentThread().getContextClassLoader().getResource(rsrcName); assertNull(loadedUrl); // Add parent to classpath since we're not adding a jar assertTrue(localFs.createNewFile(p)); String urlForm = p.toUri().toURL().toString(); urlForm = urlForm.substring(0, urlForm.lastIndexOf('/') + 1); URL url = new URL(urlForm); ReflectionUtils.addResourcesToClasspath(Collections.singletonList(url)); loadedUrl = Thread.currentThread().getContextClassLoader().getResource(rsrcName); assertNotNull(loadedUrl); } finally { localFs.delete(p, false); } }
From source file:org.apache.tez.runtime.TestRuntimeUtils.java
License:Apache License
@Test public void testAddResourceToClasspath() throws IOException, TezException { String rsrcName = "dummyfile.xml"; FileSystem localFs = FileSystem.getLocal(new Configuration()); Path p = new Path(rsrcName); p = localFs.makeQualified(p); localFs.delete(p, false);//from w w w. ja v a 2 s .c o m try { URL loadedUrl = null; loadedUrl = Thread.currentThread().getContextClassLoader().getResource(rsrcName); assertNull(loadedUrl); // Add parent to classpath since we're not adding a jar assertTrue(localFs.createNewFile(p)); String urlForm = p.toUri().toURL().toString(); urlForm = urlForm.substring(0, urlForm.lastIndexOf('/') + 1); URL url = new URL(urlForm); RuntimeUtils.addResourcesToClasspath(Collections.singletonList(url)); loadedUrl = Thread.currentThread().getContextClassLoader().getResource(rsrcName); assertNotNull(loadedUrl); } finally { localFs.delete(p, false); } }
From source file:org.apache.tez.test.FaultToleranceTestRunner.java
License:Apache License
void setup() throws Exception { TezConfiguration tezConf = null;/* w w w. j a v a2 s. c om*/ if (conf == null) { tezConf = new TezConfiguration(new YarnConfiguration()); } else { tezConf = new TezConfiguration(new YarnConfiguration(this.conf)); } FileSystem defaultFs = FileSystem.get(tezConf); Path remoteStagingDir = defaultFs .makeQualified(new Path(TEST_ROOT_DIR, String.valueOf(new Random().nextInt(100000)))); TezClientUtils.ensureStagingDirExists(tezConf, remoteStagingDir); tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, remoteStagingDir.toString()); tezSession = TezClient.create("FaultToleranceTestRunner", tezConf); tezSession.start(); }
From source file:org.apache.tez.test.MiniTezCluster.java
License:Apache License
@Override public void serviceInit(Configuration conf) throws Exception { conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_TEZ_FRAMEWORK_NAME); // Use libs from cluster since no build is available conf.setBoolean(TezConfiguration.TEZ_USE_CLUSTER_HADOOP_LIBS, true); // blacklisting disabled to prevent scheduling issues conf.setBoolean(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, false); if (conf.get(MRJobConfig.MR_AM_STAGING_DIR) == null) { conf.set(MRJobConfig.MR_AM_STAGING_DIR, new File(getTestWorkDir(), "apps_staging_dir" + Path.SEPARATOR).getAbsolutePath()); }/*from w ww .j av a 2 s . c om*/ if (conf.get(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC) == null) { // nothing defined. set quick delete value conf.setLong(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC, 0l); } File appJarLocalFile = new File(MiniTezCluster.APPJAR); if (!appJarLocalFile.exists()) { String message = "TezAppJar " + MiniTezCluster.APPJAR + " not found. Exiting."; LOG.info(message); throw new TezUncheckedException(message); } else { LOG.info("Using Tez AppJar: " + appJarLocalFile.getAbsolutePath()); } FileSystem fs = FileSystem.get(conf); Path testRootDir = fs.makeQualified(new Path("target", getName() + "-tmpDir")); Path appRemoteJar = new Path(testRootDir, "TezAppJar.jar"); // Copy AppJar and make it public. Path appMasterJar = new Path(MiniTezCluster.APPJAR); fs.copyFromLocalFile(appMasterJar, appRemoteJar); fs.setPermission(appRemoteJar, new FsPermission("777")); conf.set(TezConfiguration.TEZ_LIB_URIS, appRemoteJar.toUri().toString()); LOG.info("Set TEZ-LIB-URI to: " + conf.get(TezConfiguration.TEZ_LIB_URIS)); // VMEM monitoring disabled, PMEM monitoring enabled. conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false); conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false); conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "000"); try { Path stagingPath = FileContext.getFileContext(conf) .makeQualified(new Path(conf.get(MRJobConfig.MR_AM_STAGING_DIR))); /* * Re-configure the staging path on Windows if the file system is localFs. * We need to use a absolute path that contains the drive letter. The unit * test could run on a different drive than the AM. We can run into the * issue that job files are localized to the drive where the test runs on, * while the AM starts on a different drive and fails to find the job * metafiles. Using absolute path can avoid this ambiguity. */ if (Path.WINDOWS) { if (LocalFileSystem.class.isInstance(stagingPath.getFileSystem(conf))) { conf.set(MRJobConfig.MR_AM_STAGING_DIR, new File(conf.get(MRJobConfig.MR_AM_STAGING_DIR)).getAbsolutePath()); } } FileContext fc = FileContext.getFileContext(stagingPath.toUri(), conf); if (fc.util().exists(stagingPath)) { LOG.info(stagingPath + " exists! deleting..."); fc.delete(stagingPath, true); } LOG.info("mkdir: " + stagingPath); fc.mkdir(stagingPath, null, true); //mkdir done directory as well String doneDir = JobHistoryUtils.getConfiguredHistoryServerDoneDirPrefix(conf); Path doneDirPath = fc.makeQualified(new Path(doneDir)); fc.mkdir(doneDirPath, null, true); } catch (IOException e) { throw new TezUncheckedException("Could not create staging directory. ", e); } conf.set(MRConfig.MASTER_ADDRESS, "test"); //configure the shuffle service in NM conf.setStrings(YarnConfiguration.NM_AUX_SERVICES, new String[] { ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID }); conf.setClass( String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID), ShuffleHandler.class, Service.class); // Non-standard shuffle port conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0); conf.setClass(YarnConfiguration.NM_CONTAINER_EXECUTOR, DefaultContainerExecutor.class, ContainerExecutor.class); // TestMRJobs is for testing non-uberized operation only; see TestUberAM // for corresponding uberized tests. conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false); super.serviceInit(conf); }
From source file:org.apache.tez.test.TestFaultTolerance.java
License:Apache License
@BeforeClass public static void setup() throws Exception { LOG.info("Starting mini clusters"); FileSystem remoteFs = null; try {//from www. java 2s . c o m conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, TEST_ROOT_DIR); dfsCluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).format(true).racks(null).build(); remoteFs = dfsCluster.getFileSystem(); } catch (IOException io) { throw new RuntimeException("problem starting mini dfs cluster", io); } if (miniTezCluster == null) { miniTezCluster = new MiniTezCluster(TestFaultTolerance.class.getName(), 4, 1, 1); Configuration miniTezconf = new Configuration(conf); miniTezconf.set("fs.defaultFS", remoteFs.getUri().toString()); // use HDFS miniTezCluster.init(miniTezconf); miniTezCluster.start(); Path remoteStagingDir = remoteFs .makeQualified(new Path(TEST_ROOT_DIR, String.valueOf(new Random().nextInt(100000)))); TezClientUtils.ensureStagingDirExists(conf, remoteStagingDir); TezConfiguration tezConf = new TezConfiguration(miniTezCluster.getConfig()); tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, remoteStagingDir.toString()); tezConf.setBoolean(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, false); tezSession = TezClient.create("TestFaultTolerance", tezConf, true); tezSession.start(); } }
From source file:org.apache.tez.tests.MiniTezClusterWithTimeline.java
License:Apache License
@Override public void serviceInit(Configuration conf) throws Exception { conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_TEZ_FRAMEWORK_NAME); // Use libs from cluster since no build is available conf.setBoolean(TezConfiguration.TEZ_USE_CLUSTER_HADOOP_LIBS, true); // blacklisting disabled to prevent scheduling issues conf.setBoolean(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, false); if (conf.get(MRJobConfig.MR_AM_STAGING_DIR) == null) { conf.set(MRJobConfig.MR_AM_STAGING_DIR, new File(getTestWorkDir(), "apps_staging_dir" + Path.SEPARATOR).getAbsolutePath()); }/*from w w w . j a va2s .c o m*/ if (conf.get(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC) == null) { // nothing defined. set quick delete value conf.setLong(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC, 0l); } File appJarLocalFile = new File(MiniTezClusterWithTimeline.APPJAR); if (!appJarLocalFile.exists()) { String message = "TezAppJar " + MiniTezClusterWithTimeline.APPJAR + " not found. Exiting."; LOG.info(message); throw new TezUncheckedException(message); } else { LOG.info("Using Tez AppJar: " + appJarLocalFile.getAbsolutePath()); } FileSystem fs = FileSystem.get(conf); Path testRootDir = fs.makeQualified(new Path("target", getName() + "-tmpDir")); Path appRemoteJar = new Path(testRootDir, "TezAppJar.jar"); // Copy AppJar and make it public. Path appMasterJar = new Path(MiniTezClusterWithTimeline.APPJAR); fs.copyFromLocalFile(appMasterJar, appRemoteJar); fs.setPermission(appRemoteJar, new FsPermission("777")); conf.set(TezConfiguration.TEZ_LIB_URIS, appRemoteJar.toUri().toString()); LOG.info("Set TEZ-LIB-URI to: " + conf.get(TezConfiguration.TEZ_LIB_URIS)); // VMEM monitoring disabled, PMEM monitoring enabled. conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false); conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false); conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "000"); try { Path stagingPath = FileContext.getFileContext(conf) .makeQualified(new Path(conf.get(MRJobConfig.MR_AM_STAGING_DIR))); /* * Re-configure the staging path on Windows if the file system is localFs. * We need to use a absolute path that contains the drive letter. The unit * test could run on a different drive than the AM. We can run into the * issue that job files are localized to the drive where the test runs on, * while the AM starts on a different drive and fails to find the job * metafiles. Using absolute path can avoid this ambiguity. */ if (Path.WINDOWS) { if (LocalFileSystem.class.isInstance(stagingPath.getFileSystem(conf))) { conf.set(MRJobConfig.MR_AM_STAGING_DIR, new File(conf.get(MRJobConfig.MR_AM_STAGING_DIR)).getAbsolutePath()); } } FileContext fc = FileContext.getFileContext(stagingPath.toUri(), conf); if (fc.util().exists(stagingPath)) { LOG.info(stagingPath + " exists! deleting..."); fc.delete(stagingPath, true); } LOG.info("mkdir: " + stagingPath); fc.mkdir(stagingPath, null, true); //mkdir done directory as well String doneDir = JobHistoryUtils.getConfiguredHistoryServerDoneDirPrefix(conf); Path doneDirPath = fc.makeQualified(new Path(doneDir)); fc.mkdir(doneDirPath, null, true); } catch (IOException e) { throw new TezUncheckedException("Could not create staging directory. ", e); } conf.set(MRConfig.MASTER_ADDRESS, "test"); //configure the shuffle service in NM conf.setStrings(YarnConfiguration.NM_AUX_SERVICES, new String[] { ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID }); conf.setClass( String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID), ShuffleHandler.class, Service.class); // Non-standard shuffle port conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0); conf.setClass(YarnConfiguration.NM_CONTAINER_EXECUTOR, DefaultContainerExecutor.class, ContainerExecutor.class); // TestMRJobs is for testing non-uberized operation only; see TestUberAM // for corresponding uberized tests. conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false); super.serviceInit(conf); }
From source file:org.cloudata.core.parallel.hadoop.CloudataMapReduceUtil.java
License:Apache License
/** * Configure classpath to run MapReduce job which uses Cloudata<BR> * 1. Uploads cloudata library to HDFS: cloudata-xxx-core.jar<BR> * 2. make cloudata-configure.jar includes cloudata-site.xml and uploads to HDFS<BR> * 3. run DistributedCache.addArchiveToClassPath() for each file<BR> * Must call clearMapReduce() after running job. * @param jobConf//from w w w . ja v a 2s .c o m * @param cloudataHomeDir * @return temporary directory for linrary in HDFS. When calling clearMapReduce(), use this value. * @throws IOException */ public static String initMapReduce(JobConf jobConf) throws IOException { String jarPath = "Cloudata_Lib_" + System.currentTimeMillis(); FileSystem fs = FileSystem.get(conf); Path parentPath = fs.makeQualified(new Path(jarPath)); //upload jar to dfs fs.mkdirs(parentPath); String[] classpaths = System.getProperty("java.class.path", "").split(File.pathSeparator); if (classpaths == null || classpaths.length == 0) { throw new IOException("No classpath"); } List<String> uploadedFiles = new ArrayList<String>(); for (String eachPath : classpaths) { if (eachPath.indexOf("cloudata") >= 0 || eachPath.indexOf("lib") >= 0 || eachPath.indexOf("conf") >= 0) { uploadFile(fs, parentPath, new File(eachPath), uploadedFiles); } } if (uploadedFiles.size() == 0) { throw new IOException("No lib files[cloudata-xxx-core.jar, zookeeper-xxx.jar] in classpath"); } Path rootPath = new Path("/"); String rootUri = fs.makeQualified(rootPath).toString(); for (String eachPath : uploadedFiles) { Path path = fs.makeQualified(new Path(eachPath)); String pathStr = path.toUri().toString(); if (pathStr.indexOf(rootUri) >= 0) { pathStr = pathStr.substring(pathStr.indexOf(rootUri) + rootUri.length()); } if (!pathStr.startsWith("/")) { pathStr = "/" + pathStr; } LOG.debug("DistributedCache.addArchiveToClassPath: " + pathStr); DistributedCache.addArchiveToClassPath(new Path(pathStr), jobConf); } return jarPath; }
From source file:org.deeplearning4j.iterativereduce.runtime.Utils.java
License:Apache License
private static void copyToFs(Configuration conf, String local, String remote) throws IOException { FileSystem fs = FileSystem.get(conf); Path src = new Path(local); Path dst = fs.makeQualified(new Path(remote)); LOG.debug("Copying to filesystem, src=" + src.toString() + ", dst=" + dst); fs.copyFromLocalFile(false, true, src, dst); }
From source file:org.kitesdk.apps.spark.spi.streaming.SparkStreamingJobManager.java
License:Apache License
@Override public void start(FileSystem fs, Path appRoot) { JobConf jobConf = new JobConf(); jobConf.setJarByClass(SparkStreamingJobMain.class); String containingJar = jobConf.getJar(); Path libPath = new Path(appRoot, "lib"); Path jarPath = new Path(libPath, new File(containingJar).getName()); jarPath = fs.makeQualified(jarPath); SparkLauncher launcher = new SparkLauncher(); launcher.setMainClass(SparkStreamingJobMain.class.getName()); launcher.setAppResource(jarPath.toString()); launcher.setMaster("yarn-cluster"); try {/*from w w w . ja v a 2 s . c o m*/ // Add the library JARs from HDFS so we don't need to reload // them separately into Spark. FileStatus[] libJars = fs.listStatus(libPath); for (FileStatus jar : libJars) { launcher.addJar(jar.getPath().toString()); } // Add the sharelib JARs, since they are not visible to Spark otherwise. List<Path> shareLibJars = ShareLibs.jars(sparkJobContext.getHadoopConf(), "hive2"); for (Path sharelibJar : shareLibJars) { launcher.addJar(fs.makeQualified(sharelibJar).toString()); } } catch (IOException e) { throw new AppException(e); } launcher.addAppArgs(appRoot.toString(), description.getJobName()); // Explicitly set the metastore URI to be usable in the job. launcher.setConf("spark.hadoop.hive.metastore.uris", sparkJobContext.getHadoopConf().get("hive.metastore.uris")); // Add the Avro classes. List<Schema> schemas = JobReflection.getSchemas(job); StringBuilder avroClassesArg = new StringBuilder(); avroClassesArg.append("-D").append(KryoAvroRegistrator.KITE_AVRO_CLASSES).append("="); boolean first = true; for (Schema schema : schemas) { if (!first) { avroClassesArg.append(","); } avroClassesArg.append(SpecificData.get().getClass(schema).getName()); first = false; } launcher.setConf("spark.driver.extraJavaOptions", avroClassesArg.toString()); launcher.setConf("spark.executor.extraJavaOptions", avroClassesArg.toString()); try { Process process = launcher.launch(); // Redirect the spark-submit output to be visible to the reader. Thread stdoutThread = writeOutput(process.getInputStream(), System.out); Thread stderrThread = writeOutput(process.getErrorStream(), System.err); int result = process.waitFor(); stdoutThread.join(); stderrThread.join(); if (result != 0) { throw new AppException("spark-submit returned error status: " + result); } } catch (IOException e) { throw new AppException(e); } catch (InterruptedException e) { throw new AppException(e); } }
From source file:org.kitesdk.cli.commands.CSVImportCommand.java
License:Apache License
@Override @SuppressWarnings("unchecked") public int run() throws IOException { Preconditions.checkArgument(targets != null && targets.size() == 2, "CSV path and target dataset name are required."); Path source = qualifiedPath(targets.get(0)); FileSystem sourceFS = source.getFileSystem(getConf()); Preconditions.checkArgument(sourceFS.exists(source), "CSV path does not exist: " + source); if (header != null) { // if a header is given on the command line, do assume one is in the file noHeader = true;//from w w w . ja va 2 s . com } CSVProperties props = new CSVProperties.Builder().delimiter(delimiter).escape(escape).quote(quote) .header(header).hasHeader(!noHeader).linesToSkip(linesToSkip).charset(charsetName).build(); String dataset = targets.get(1); View<Record> target = load(dataset, Record.class); Schema datasetSchema = target.getDataset().getDescriptor().getSchema(); // TODO: replace this with a temporary Dataset from a FS repo // TODO: CDK-92: always use GenericRecord? DatasetDescriptor csvDescriptor = new DatasetDescriptor.Builder().location(source.toUri()) .schema(ColumnMappingParser .removeEmbeddedMapping(PartitionStrategyParser.removeEmbeddedStrategy(datasetSchema))) .format("csv").build(); csvDescriptor = props.addToDescriptor(csvDescriptor); TemporaryFileSystemDatasetRepository repo = new TemporaryFileSystemDatasetRepository(getConf(), // ensure the same FS as the file source is used sourceFS.makeQualified(new Path("/tmp/" + UUID.randomUUID().toString())), target.getDataset().getNamespace(), UUID.randomUUID().toString()); try { FileSystemDataset<Record> csvDataset = (FileSystemDataset) repo.create("default", "csv", csvDescriptor); Iterator<Path> iter = csvDataset.pathIterator().iterator(); Preconditions.checkArgument(iter.hasNext(), "CSV path has no data files: " + source); TaskUtil.configure(getConf()).addJars(jars); TransformTask task; if (transform != null) { DoFn<Record, Record> transformFn; try { DynConstructors.Ctor<DoFn<Record, Record>> ctor = new DynConstructors.Builder(DoFn.class) .loader(loaderForJars(jars)).impl(transform).buildChecked(); transformFn = ctor.newInstance(); } catch (NoSuchMethodException e) { throw new DatasetException("Cannot find no-arg constructor for class: " + transform, e); } task = new TransformTask<Record, Record>(csvDataset, target, transformFn); } else { task = new CopyTask<Record>(csvDataset, target); } task.setConf(getConf()); if (noCompaction) { task.noCompaction(); } if (numWriters >= 0) { task.setNumWriters(numWriters); } PipelineResult result = task.run(); if (result.succeeded()) { long count = task.getCount(); if (count > 0) { console.info("Added {} records to \"{}\"", count, dataset); } return 0; } else { return 1; } } finally { // clean up the temporary repository repo.delete(); } }