List of usage examples for org.apache.hadoop.fs FileSystem makeQualified
public Path makeQualified(Path path)
From source file:org.notmysock.tez.BroadcastTest.java
License:Apache License
public boolean run(Configuration conf, boolean doLocalityCheck) throws Exception { System.out.println("Running BroadcastTest"); // conf and UGI TezConfiguration tezConf;// w ww . j a v a2 s . com if (conf != null) { tezConf = new TezConfiguration(conf); } else { tezConf = new TezConfiguration(); } tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true); UserGroupInformation.setConfiguration(tezConf); String user = UserGroupInformation.getCurrentUser().getShortUserName(); // staging dir FileSystem fs = FileSystem.get(tezConf); String stagingDirStr = Path.SEPARATOR + "user" + Path.SEPARATOR + user + Path.SEPARATOR + ".staging" + Path.SEPARATOR + Path.SEPARATOR + Long.toString(System.currentTimeMillis()); Path stagingDir = new Path(stagingDirStr); tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirStr); stagingDir = fs.makeQualified(stagingDir); Path jobJar = new Path(stagingDir, "job.jar"); fs.copyFromLocalFile(getCurrentJarURL(), jobJar); Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); localResources.put("job.jar", createLocalResource(fs, jobJar)); TezClient tezSession = null; // needs session or else TaskScheduler does not hold onto containers tezSession = TezClient.create("BroadcastTest", tezConf); tezSession.addAppMasterLocalFiles(localResources); tezSession.start(); DAGClient dagClient = null; try { DAG dag = createDAG(fs, tezConf, stagingDir, localResources); dag.addTaskLocalFiles(localResources); tezSession.waitTillReady(); dagClient = tezSession.submitDAG(dag); // monitoring DAGStatus dagStatus = dagClient.waitForCompletionWithStatusUpdates(null); if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) { System.out.println("DAG diagnostics: " + dagStatus.getDiagnostics()); return false; } return true; } finally { fs.delete(stagingDir, true); tezSession.stop(); } }
From source file:org.pentaho.di.job.entries.hadooptransjobexecutor.DistributedCacheUtil.java
License:Apache License
/** * Add an file path to the current set of classpath entries. It adds the file * to cache as well./*w ww .j a v a 2 s.co m*/ * * This is copied from Hadoop 0.20.2 o.a.h.filecache.DistributedCache so we can inject the correct path separator * for the environment the cluster is executing in. See {@link #getClusterPathSeparator()}. * * @param file Path of the file to be added * @param conf Configuration that contains the classpath setting */ public void addFileToClassPath(Path file, Configuration conf) throws IOException { // TODO Replace this with a Hadoop shim if we end up having version-specific implementations scattered around if (VersionInfo.getVersion().contains("0.21")) { DistributedCache.addFileToClassPath(file, conf); } else { String classpath = conf.get("mapred.job.classpath.files"); conf.set("mapred.job.classpath.files", classpath == null ? file.toString() : classpath + getClusterPathSeparator() + file.toString()); FileSystem fs = FileSystem.get(conf); URI uri = fs.makeQualified(file).toUri(); DistributedCache.addCacheFile(uri, conf); } }
From source file:org.pentaho.hadoop.shim.cdh50.HadoopShim.java
License:Apache License
@Override public void onLoad(HadoopConfiguration config, HadoopConfigurationFileSystemManager fsm) throws Exception { fsm.addProvider(config, "hdfs", config.getIdentifier(), new HDFSFileProvider()); setDistributedCacheUtil(new DistributedCacheUtilImpl(config) { public void addFileToClassPath(Path file, Configuration conf) throws IOException { String classpath = conf.get("mapred.job.classpath.files"); conf.set("mapred.job.classpath.files", classpath == null ? file.toString() : classpath + getClusterPathSeparator() + file.toString()); FileSystem fs = FileSystem.get(conf); URI uri = fs.makeQualified(file).toUri(); DistributedCache.addCacheFile(uri, conf); }//from w ww . j ava2 s .c o m public String getClusterPathSeparator() { // Use a comma rather than an OS-specific separator (see https://issues.apache.org/jira/browse/HADOOP-4864) return System.getProperty("hadoop.cluster.path.separator", ","); } }); }
From source file:org.pentaho.hadoop.shim.common.DistributedCacheUtilImpl.java
License:Apache License
/** * Add an file path to the current set of classpath entries. It adds the file to cache as well. * <p/>/*from w w w . j a v a 2s .c o m*/ * This is copied from Hadoop 0.20.2 o.a.h.filecache.DistributedCache so we can inject the correct path separator for * the environment the cluster is executing in. See {@link #getClusterPathSeparator()}. * * @param file Path of the file to be added * @param conf Configuration that contains the classpath setting */ public void addFileToClassPath(Path file, Configuration conf) throws IOException { // TODO Replace this with a Hadoop shim if we end up having version-specific implementations scattered around // Save off the classloader, to make sure the version info can be loaded successfully from the hadoop-common JAR ClassLoader cl = Thread.currentThread().getContextClassLoader(); Thread.currentThread().setContextClassLoader(VersionInfo.class.getClassLoader()); // Get the version string or set to a default value String version; try { version = VersionInfo.getVersion(); } catch (Throwable t) { version = "unknown"; } // Restore the original classloader Thread.currentThread().setContextClassLoader(cl); String classpath = conf.get("mapred.job.classpath.files"); conf.set("mapred.job.classpath.files", classpath == null ? file.toString() : classpath + getClusterPathSeparator() + file.toString()); FileSystem fs = FileSystem.get(conf); URI uri = fs.makeQualified(file).toUri(); DistributedCache.addCacheFile(uri, conf); }
From source file:org.pentaho.hadoop.shim.common.HadoopShimImpl.java
License:Apache License
@Override public void onLoad(HadoopConfiguration config, HadoopConfigurationFileSystemManager fsm) throws Exception { validateHadoopHomeWithWinutils();/*from w w w . j a v a2 s . co m*/ fsm.addProvider(config, "hdfs", config.getIdentifier(), new HDFSFileProvider()); setDistributedCacheUtil(new DistributedCacheUtilImpl(config) { public void addFileToClassPath(Path file, Configuration conf) throws IOException { String classpath = conf.get("mapred.job.classpath.files"); conf.set("mapred.job.classpath.files", classpath == null ? file.toString() : classpath + getClusterPathSeparator() + file.toString()); FileSystem fs = FileSystem.get(file.toUri(), conf); URI uri = fs.makeQualified(file).toUri(); DistributedCache.addCacheFile(uri, conf); } public String getClusterPathSeparator() { // Use a comma rather than an OS-specific separator (see https://issues.apache.org/jira/browse/HADOOP-4864) return System.getProperty("hadoop.cluster.path.separator", ","); } }); }
From source file:org.pentaho.hadoop.shim.emr32.HadoopShim.java
License:Apache License
@Override public void onLoad(HadoopConfiguration config, HadoopConfigurationFileSystemManager fsm) throws Exception { fsm.addProvider(config, "hdfs", config.getIdentifier(), new HDFSFileProvider()); setDistributedCacheUtil(new DistributedCacheUtilImpl(config) { public void addFileToClassPath(Path file, Configuration conf) throws IOException { String classpath = conf.get("mapred.job.classpath.files"); conf.set("mapred.job.classpath.files", classpath == null ? file.toString() : classpath + getClusterPathSeparator() + file.toString()); FileSystem fs = FileSystem.get(conf); URI uri = fs.makeQualified(file).toUri(); //Job.getInstance( conf ).addCacheFile( uri ); DistributedCache.addCacheFile(uri, conf); }//from www.j a v a 2s . c o m public String getClusterPathSeparator() { // Use a comma rather than an OS-specific separator (see https://issues.apache.org/jira/browse/HADOOP-4864) return System.getProperty("hadoop.cluster.path.separator", ","); } }); if (!fsm.hasProvider("s3n")) { fsm.addProvider(config, "s3n", config.getIdentifier(), new HDFSFileProvider()); } }
From source file:org.pentaho.hadoop.shim.hdp20.HadoopShim.java
License:Apache License
@Override public void onLoad(HadoopConfiguration config, HadoopConfigurationFileSystemManager fsm) throws Exception { fsm.addProvider(config, "hdfs", config.getIdentifier(), new HDFSFileProvider()); setDistributedCacheUtil(new DistributedCacheUtilImpl(config) { public void addFileToClassPath(Path file, Configuration conf) throws IOException { String classpath = conf.get("mapred.job.classpath.files"); conf.set("mapred.job.classpath.files", classpath == null ? file.toString() : classpath + getClusterPathSeparator() + file.toString()); FileSystem fs = FileSystem.get(conf); URI uri = fs.makeQualified(file).toUri(); DistributedCache.addCacheFile(uri, conf); }// www .j a v a 2s . c om public String getClusterPathSeparator() { return System.getProperty("hadoop.cluster.path.separator", ","); } }); }
From source file:org.pentaho.hadoop.shim.hsp101.HadoopShim.java
License:Apache License
@Override public void onLoad(HadoopConfiguration config, HadoopConfigurationFileSystemManager fsm) throws Exception { fsm.addProvider(config, "hdfs", config.getIdentifier(), new HDFSFileProvider()); setDistributedCacheUtil(new DistributedCacheUtilImpl(config) { /**/* w w w . java2 s . co m*/ * Default permission for cached files * <p/> * Not using FsPermission.createImmutable due to EOFExceptions when using it with Hadoop 0.20.2 */ private final FsPermission CACHED_FILE_PERMISSION = new FsPermission((short) 0755); public void addFileToClassPath(Path file, Configuration conf) throws IOException { String classpath = conf.get("mapred.job.classpath.files"); conf.set("mapred.job.classpath.files", classpath == null ? file.toString() : classpath + getClusterPathSeparator() + file.toString()); FileSystem fs = FileSystem.get(conf); URI uri = fs.makeQualified(file).toUri(); DistributedCache.addCacheFile(uri, conf); } /** * Stages the source file or folder to a Hadoop file system and sets their permission and replication * value appropriately to be used with the Distributed Cache. WARNING: This will delete the contents of * dest before staging the archive. * * @param source File or folder to copy to the file system. If it is a folder all contents will be * copied into dest. * @param fs Hadoop file system to store the contents of the archive in * @param dest Destination to copy source into. If source is a file, the new file name will be * exactly dest. If source is a folder its contents will be copied into dest. For more * info see {@link FileSystem#copyFromLocalFile(org.apache.hadoop.fs.Path, * org.apache.hadoop.fs.Path)}. * @param overwrite Should an existing file or folder be overwritten? If not an exception will be * thrown. * @throws IOException Destination exists is not a directory * @throws KettleFileException Source does not exist or destination exists and overwrite is false. */ public void stageForCache(FileObject source, FileSystem fs, Path dest, boolean overwrite) throws IOException, KettleFileException { if (!source.exists()) { throw new KettleFileException(BaseMessages.getString(DistributedCacheUtilImpl.class, "DistributedCacheUtil.SourceDoesNotExist", source)); } if (fs.exists(dest)) { if (overwrite) { // It is a directory, clear it out fs.delete(dest, true); } else { throw new KettleFileException(BaseMessages.getString(DistributedCacheUtilImpl.class, "DistributedCacheUtil.DestinationExists", dest.toUri().getPath())); } } // Use the same replication we'd use for submitting jobs short replication = (short) fs.getConf().getInt("mapred.submit.replication", 10); copyFile(source, fs, dest, overwrite); fs.setReplication(dest, replication); } private void copyFile(FileObject source, FileSystem fs, Path dest, boolean overwrite) throws IOException { if (source.getType() == FileType.FOLDER) { fs.mkdirs(dest); fs.setPermission(dest, CACHED_FILE_PERMISSION); for (FileObject fileObject : source.getChildren()) { copyFile(fileObject, fs, new Path(dest, fileObject.getName().getBaseName()), overwrite); } } else { try (FSDataOutputStream fsDataOutputStream = fs.create(dest, overwrite)) { IOUtils.copy(source.getContent().getInputStream(), fsDataOutputStream); fs.setPermission(dest, CACHED_FILE_PERMISSION); } } } public String getClusterPathSeparator() { return System.getProperty("hadoop.cluster.path.separator", ","); } }); }
From source file:org.pentaho.hadoop.shim.mapr31.MapR3DistributedCacheUtilImpl.java
License:Apache License
/** * Add an file path to the current set of classpath entries. It adds the file * to cache as well.//from ww w. j ava2s. c om * * This is copied from Hadoop 0.20.2 o.a.h.filecache.DistributedCache so we can inject the correct path separator * for the environment the cluster is executing in. See {@link #getClusterPathSeparator()}. * * @param file Path of the file to be added * @param conf Configuration that contains the classpath setting */ @Override public void addFileToClassPath(Path file, Configuration conf) throws IOException { String classpath = conf.get("mapred.job.classpath.files"); conf.set("mapred.job.classpath.files", classpath == null ? file.toString() : classpath + getClusterPathSeparator() + file.toString()); FileSystem fs = FileSystem.get(conf); URI uri = fs.makeQualified(file).toUri(); DistributedCache.addCacheFile(uri, conf); }
From source file:org.qcri.pca.CompositeJob.java
/** * Computes XtX and YtX/*from w w w. j av a 2s . c om*/ * * Xc = (Y - Ym) * MEM = Y * MEM - Ym * MEM = X - Xm * * XtX = (X - Xm)' * (X - Xm) YtX = (Y - Ym)' * (Y - Ym) * * @param conf * the configuration * @param matrixInputPath * Y * @param inMemMatrixDir * MEM, where X = Y * MEM * @param inMemMatrixNumRows * MEM.rows * @param inMemMatrixNumCols * MEM.cols * @param ymPath * Ym * @param xmPath * Xm * @param matrixOutputPath * YtX * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void run(Configuration conf, Path matrixInputPath, String inMemMatrixDir, int inMemMatrixNumRows, int inMemMatrixNumCols, String ymPath, String xmPath, Path matrixOutputPath) throws IOException, InterruptedException, ClassNotFoundException { conf.set(MATRIXINMEMORY, inMemMatrixDir); conf.setInt(MATRIXINMEMORYROWS, inMemMatrixNumRows); conf.setInt(MATRIXINMEMORYCOLS, inMemMatrixNumCols); conf.set(YMPATH, ymPath); conf.set(XMPATH, xmPath); Path xtxOutputPath = getXtXPathBasedOnYm(new Path(ymPath)); conf.set(XTXPATH, xtxOutputPath.toString()); Job job = new Job(conf); job.setJobName("CompositeJob-" + matrixInputPath.getName()); job.setJarByClass(CompositeJob.class); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(CompositeWritable.class); job.setMapOutputValueClass(VectorWritable.class); job.setSortComparatorClass(CompositeWritable.class); job.setGroupingComparatorClass(CompositeWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); job.waitForCompletion(true); }