List of usage examples for org.apache.hadoop.fs FileContext getFileContext
public static FileContext getFileContext(final Configuration aConf) throws UnsupportedFileSystemException
From source file:org.apache.gobblin.writer.FsDataWriter.java
License:Apache License
public FsDataWriter(FsDataWriterBuilder<?, ?> builder, State properties) throws IOException { this.properties = properties; this.id = builder.getWriterId(); this.numBranches = builder.getBranches(); this.branchId = builder.getBranch(); this.fileName = builder.getFileName(properties); this.writerAttemptIdOptional = Optional.fromNullable(builder.getWriterAttemptId()); this.encoders = builder.getEncoders(); Configuration conf = new Configuration(); // Add all job configuration properties so they are picked up by Hadoop JobConfigurationUtils.putStateIntoConfiguration(properties, conf); this.fs = WriterUtils.getWriterFS(properties, this.numBranches, this.branchId); this.fileContext = FileContext.getFileContext(conf); // Initialize staging/output directory Path writerStagingDir = this.writerAttemptIdOptional.isPresent() ? WriterUtils.getWriterStagingDir(properties, this.numBranches, this.branchId, this.writerAttemptIdOptional.get()) : WriterUtils.getWriterStagingDir(properties, this.numBranches, this.branchId); this.stagingFile = new Path(writerStagingDir, this.fileName); this.outputFile = new Path(WriterUtils.getWriterOutputDir(properties, this.numBranches, this.branchId), this.fileName); this.allOutputFilesPropName = ForkOperatorUtils.getPropertyNameForBranch( ConfigurationKeys.WRITER_FINAL_OUTPUT_FILE_PATHS, this.numBranches, this.branchId); // Deleting the staging file if it already exists, which can happen if the // task failed and the staging file didn't get cleaned up for some reason. // Deleting the staging file prevents the task retry from being blocked. if (this.fs.exists(this.stagingFile)) { LOG.warn(String.format("Task staging file %s already exists, deleting it", this.stagingFile)); HadoopUtils.deletePath(this.fs, this.stagingFile, false); }// w w w .j a v a2 s. c o m this.shouldIncludeRecordCountInFileName = properties.getPropAsBoolean( ForkOperatorUtils.getPropertyNameForBranch(WRITER_INCLUDE_RECORD_COUNT_IN_FILE_NAMES, this.numBranches, this.branchId), false); this.bufferSize = properties.getPropAsInt(ForkOperatorUtils .getPropertyNameForBranch(ConfigurationKeys.WRITER_BUFFER_SIZE, this.numBranches, this.branchId), ConfigurationKeys.DEFAULT_BUFFER_SIZE); this.replicationFactor = properties.getPropAsShort( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_REPLICATION_FACTOR, this.numBranches, this.branchId), this.fs.getDefaultReplication(this.outputFile)); this.blockSize = properties .getPropAsLong(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_BLOCK_SIZE, this.numBranches, this.branchId), this.fs.getDefaultBlockSize(this.outputFile)); this.filePermission = HadoopUtils.deserializeWriterFilePermissions(properties, this.numBranches, this.branchId); this.dirPermission = HadoopUtils.deserializeWriterDirPermissions(properties, this.numBranches, this.branchId); this.group = Optional.fromNullable(properties.getProp(ForkOperatorUtils .getPropertyNameForBranch(ConfigurationKeys.WRITER_GROUP_NAME, this.numBranches, this.branchId))); // Create the parent directory of the output file if it does not exist WriterUtils.mkdirsWithRecursivePermission(this.fs, this.outputFile.getParent(), this.dirPermission); this.bytesWritten = Optional.absent(); this.defaultMetadata = new GlobalMetadata(); for (StreamCodec c : getEncoders()) { this.defaultMetadata.addTransferEncoding(c.getTag()); } this.partitionKey = builder.getPartitionPath(properties); if (builder.getPartitionPath(properties) != null) { properties.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY + "_" + builder.getWriterId(), partitionKey); } }
From source file:org.apache.oozie.tools.diag.OozieLauncherLogFetcher.java
License:Apache License
public int dumpAllContainersLogs(ApplicationId appId, String appOwner, PrintStream out) throws IOException { Path remoteRootLogDir = new Path(hadoopConfig.get(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR)); String logDirSuffix = getRemoteNodeLogDirSuffix(hadoopConfig); Path remoteAppLogDir = getRemoteAppLogDir(remoteRootLogDir, appId, appOwner, logDirSuffix); RemoteIterator nodeFiles;//from w w w .j av a 2s . c o m try { Path qualifiedLogDir = FileContext.getFileContext(hadoopConfig).makeQualified(remoteAppLogDir); nodeFiles = FileContext.getFileContext(qualifiedLogDir.toUri(), hadoopConfig) .listStatus(remoteAppLogDir); } catch (FileNotFoundException fileNotFoundException) { logDirNotExist(remoteAppLogDir.toString()); return -1; } boolean foundAnyLogs = false; while (true) { FileStatus thisNodeFile; do { if (!nodeFiles.hasNext()) { if (!foundAnyLogs) { emptyLogDir(remoteAppLogDir.toString()); return -1; } return 0; } thisNodeFile = (FileStatus) nodeFiles.next(); } while (thisNodeFile.getPath().getName().endsWith(TMP_FILE_SUFFIX)); AggregatedLogFormat.LogReader reader = new AggregatedLogFormat.LogReader(hadoopConfig, thisNodeFile.getPath()); try { AggregatedLogFormat.LogKey key = new AggregatedLogFormat.LogKey(); DataInputStream valueStream = reader.next(key); while (valueStream != null) { String containerString = "\n\nContainer: " + key + " on " + thisNodeFile.getPath().getName(); out.println(containerString); out.println(StringUtils.repeat("=", containerString.length())); while (true) { try { AggregatedLogFormat.LogReader.readAContainerLogsForALogType(valueStream, out, thisNodeFile.getModificationTime()); foundAnyLogs = true; } catch (EOFException eofException) { key = new AggregatedLogFormat.LogKey(); valueStream = reader.next(key); break; } } } } finally { reader.close(); } } }
From source file:org.apache.reef.runtime.yarn.client.uploader.JobFolder.java
License:Apache License
/** * Creates a LocalResource instance for the JAR file referenced by the given Path. *///from w w w .j av a 2s . c o m public LocalResource getLocalResourceForPath(final Path jarPath) throws IOException { final LocalResource localResource = Records.newRecord(LocalResource.class); final FileStatus status = FileContext.getFileContext(fileSystem.getUri()).getFileStatus(jarPath); localResource.setType(LocalResourceType.ARCHIVE); localResource.setVisibility(LocalResourceVisibility.APPLICATION); localResource.setResource(ConverterUtils.getYarnUrlFromPath(status.getPath())); localResource.setTimestamp(status.getModificationTime()); localResource.setSize(status.getLen()); return localResource; }
From source file:org.apache.reef.runtime.yarn.client.YarnJobSubmissionHandler.java
License:Apache License
/** * Creates a LocalResource instance for the JAR file referenced by the given Path */// ww w . java 2 s .co m private LocalResource makeLocalResourceForJarFile(final Path path) throws IOException { final LocalResource localResource = Records.newRecord(LocalResource.class); final FileStatus status = FileContext.getFileContext(fileSystem.getUri()).getFileStatus(path); localResource.setType(LocalResourceType.ARCHIVE); localResource.setVisibility(LocalResourceVisibility.APPLICATION); localResource.setResource(ConverterUtils.getYarnUrlFromPath(status.getPath())); localResource.setTimestamp(status.getModificationTime()); localResource.setSize(status.getLen()); return localResource; }
From source file:org.apache.reef.runtime.yarn.driver.UploaderToJobFolder.java
License:Apache License
/** * Creates a LocalResource instance for the JAR file referenced by the given Path * * @param path//from ww w. ja v a 2 s .co m * @return * @throws IOException */ LocalResource makeLocalResourceForJarFile(final Path path) throws IOException { final LocalResource localResource = Records.newRecord(LocalResource.class); final FileStatus status = FileContext.getFileContext(this.fileSystem.getUri()).getFileStatus(path); localResource.setType(LocalResourceType.ARCHIVE); localResource.setVisibility(LocalResourceVisibility.APPLICATION); localResource.setResource(ConverterUtils.getYarnUrlFromPath(status.getPath())); localResource.setTimestamp(status.getModificationTime()); localResource.setSize(status.getLen()); return localResource; }
From source file:org.apache.samza.system.hdfs.reader.AvroFileHdfsReader.java
License:Apache License
@Override public void open(String pathStr, String singleFileOffset) { LOG.info(String.format("%s: Open file [%s] with file offset [%s] for read", systemStreamPartition, pathStr, singleFileOffset));/*from w ww .j a v a2 s. com*/ Path path = new Path(pathStr); try { AvroFSInput input = new AvroFSInput(FileContext.getFileContext(path.toUri()), path); fileReader = new DataFileReader<>(input, new GenericDatumReader<>()); seek(singleFileOffset); } catch (IOException e) { throw new SamzaException(e); } }
From source file:org.apache.solr.hadoop.hack.MiniMRYarnCluster.java
License:Apache License
@Override public void serviceInit(Configuration conf) throws Exception { conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME); if (conf.get(MRJobConfig.MR_AM_STAGING_DIR) == null) { conf.set(MRJobConfig.MR_AM_STAGING_DIR, new File(getTestWorkDir(), "apps_staging_dir/").getAbsolutePath()); }/*from w ww . j a v a 2 s . co m*/ // By default, VMEM monitoring disabled, PMEM monitoring enabled. if (!conf.getBoolean(MRConfig.MAPREDUCE_MINICLUSTER_CONTROL_RESOURCE_MONITORING, MRConfig.DEFAULT_MAPREDUCE_MINICLUSTER_CONTROL_RESOURCE_MONITORING)) { conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false); conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false); } conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "000"); try { Path stagingPath = FileContext.getFileContext(conf) .makeQualified(new Path(conf.get(MRJobConfig.MR_AM_STAGING_DIR))); /* * Re-configure the staging path on Windows if the file system is localFs. * We need to use a absolute path that contains the drive letter. The unit * test could run on a different drive than the AM. We can run into the * issue that job files are localized to the drive where the test runs on, * while the AM starts on a different drive and fails to find the job * metafiles. Using absolute path can avoid this ambiguity. */ if (Path.WINDOWS) { if (LocalFileSystem.class.isInstance(stagingPath.getFileSystem(conf))) { conf.set(MRJobConfig.MR_AM_STAGING_DIR, new File(conf.get(MRJobConfig.MR_AM_STAGING_DIR)).getAbsolutePath()); } } FileContext fc = FileContext.getFileContext(stagingPath.toUri(), conf); if (fc.util().exists(stagingPath)) { LOG.info(stagingPath + " exists! deleting..."); fc.delete(stagingPath, true); } LOG.info("mkdir: " + stagingPath); //mkdir the staging directory so that right permissions are set while running as proxy user fc.mkdir(stagingPath, null, true); //mkdir done directory as well String doneDir = JobHistoryUtils.getConfiguredHistoryServerDoneDirPrefix(conf); Path doneDirPath = fc.makeQualified(new Path(doneDir)); fc.mkdir(doneDirPath, null, true); } catch (IOException e) { throw new YarnRuntimeException("Could not create staging directory. ", e); } conf.set(MRConfig.MASTER_ADDRESS, "test"); // The default is local because of // which shuffle doesn't happen //configure the shuffle service in NM conf.setStrings(YarnConfiguration.NM_AUX_SERVICES, new String[] { ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID }); conf.setClass(String.format(Locale.ENGLISH, YarnConfiguration.NM_AUX_SERVICE_FMT, ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID), ShuffleHandler.class, Service.class); // Non-standard shuffle port conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0); conf.setClass(YarnConfiguration.NM_CONTAINER_EXECUTOR, DefaultContainerExecutor.class, ContainerExecutor.class); // TestMRJobs is for testing non-uberized operation only; see TestUberAM // for corresponding uberized tests. conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false); super.serviceInit(conf); }
From source file:org.apache.tajo.master.TaskRunnerLauncherImpl.java
License:Apache License
private ContainerLaunchContext createCommonContainerLaunchContext() { TajoConf conf = (TajoConf) getConfig(); ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class); try {//from ww w. j a va2 s . co m ctx.setUser(UserGroupInformation.getCurrentUser().getShortUserName()); } catch (IOException e) { e.printStackTrace(); } //////////////////////////////////////////////////////////////////////////// // Set the env variables to be setup //////////////////////////////////////////////////////////////////////////// LOG.info("Set the environment for the application master"); Map<String, String> environment = new HashMap<String, String>(); //String initialClassPath = getInitialClasspath(conf); environment.put(Environment.SHELL.name(), "/bin/bash"); environment.put(Environment.JAVA_HOME.name(), System.getenv(Environment.JAVA_HOME.name())); // TODO - to be improved with org.apache.tajo.sh shell script Properties prop = System.getProperties(); if (prop.getProperty("tajo.test", "FALSE").equalsIgnoreCase("TRUE")) { environment.put(Environment.CLASSPATH.name(), prop.getProperty("java.class.path", null)); } else { // Add AppMaster.jar location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar StringBuilder classPathEnv = new StringBuilder("./"); //for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH)) { for (String c : YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH) { classPathEnv.append(':'); classPathEnv.append(c.trim()); } classPathEnv.append(":" + System.getenv("TAJO_BASE_CLASSPATH")); classPathEnv.append(":./log4j.properties:./*"); environment.put("HADOOP_HOME", System.getenv("HADOOP_HOME")); environment.put(Environment.HADOOP_COMMON_HOME.name(), System.getenv("HADOOP_HOME")); environment.put(Environment.HADOOP_HDFS_HOME.name(), System.getenv("HADOOP_HOME")); environment.put(Environment.HADOOP_YARN_HOME.name(), System.getenv("HADOOP_HOME")); environment.put("TAJO_BASE_CLASSPATH", System.getenv("TAJO_BASE_CLASSPATH")); environment.put(Environment.CLASSPATH.name(), classPathEnv.toString()); } ctx.setEnvironment(environment); //////////////////////////////////////////////////////////////////////////// // Set the local resources //////////////////////////////////////////////////////////////////////////// Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); FileSystem fs = null; LOG.info("defaultFS: " + conf.get("fs.default.name")); LOG.info("defaultFS: " + conf.get("fs.defaultFS")); try { fs = FileSystem.get(conf); } catch (IOException e) { e.printStackTrace(); } FileContext fsCtx = null; try { fsCtx = FileContext.getFileContext(getConfig()); } catch (UnsupportedFileSystemException e) { e.printStackTrace(); } LOG.info("Writing a QueryConf to HDFS and add to local environment"); Path queryConfPath = new Path(fs.getHomeDirectory(), QueryConf.FILENAME); try { writeConf(conf, queryConfPath); LocalResource queryConfSrc = createApplicationResource(fsCtx, queryConfPath, LocalResourceType.FILE); localResources.put(QueryConf.FILENAME, queryConfSrc); ctx.setLocalResources(localResources); } catch (IOException e) { e.printStackTrace(); } // Add shuffle token Map<String, ByteBuffer> serviceData = new HashMap<String, ByteBuffer>(); try { //LOG.info("Putting shuffle token in serviceData"); serviceData.put(PullServerAuxService.PULLSERVER_SERVICEID, PullServerAuxService.serializeMetaData(0)); } catch (IOException ioe) { LOG.error(ioe); } ctx.setServiceData(serviceData); return ctx; }
From source file:org.apache.tajo.master.YarnContainerProxy.java
License:Apache License
public static ContainerLaunchContext createCommonContainerLaunchContext(Configuration config, String queryId, boolean isMaster) { TajoConf conf = (TajoConf) config;//from www.j a va 2 s. c o m ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class); try { ByteBuffer userToken = ByteBuffer .wrap(UserGroupInformation.getCurrentUser().getShortUserName().getBytes()); ctx.setTokens(userToken); } catch (IOException e) { e.printStackTrace(); } //////////////////////////////////////////////////////////////////////////// // Set the env variables to be setup //////////////////////////////////////////////////////////////////////////// LOG.info("Set the environment for the application master"); Map<String, String> environment = new HashMap<String, String>(); //String initialClassPath = getInitialClasspath(conf); environment.put(ApplicationConstants.Environment.SHELL.name(), "/bin/bash"); if (System.getenv(ApplicationConstants.Environment.JAVA_HOME.name()) != null) { environment.put(ApplicationConstants.Environment.JAVA_HOME.name(), System.getenv(ApplicationConstants.Environment.JAVA_HOME.name())); } // TODO - to be improved with org.apache.tajo.sh shell script Properties prop = System.getProperties(); if (prop.getProperty("tajo.test", "FALSE").equalsIgnoreCase("TRUE") || (System.getenv("tajo.test") != null && System.getenv("tajo.test").equalsIgnoreCase("TRUE"))) { LOG.info("tajo.test is TRUE"); environment.put(ApplicationConstants.Environment.CLASSPATH.name(), prop.getProperty("java.class.path", null)); environment.put("tajo.test", "TRUE"); } else { // Add AppMaster.jar location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar StringBuilder classPathEnv = new StringBuilder("./"); //for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH)) { for (String c : YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH) { classPathEnv.append(':'); classPathEnv.append(c.trim()); } classPathEnv.append(":" + System.getenv("TAJO_BASE_CLASSPATH")); classPathEnv.append(":./log4j.properties:./*"); if (System.getenv("HADOOP_HOME") != null) { environment.put("HADOOP_HOME", System.getenv("HADOOP_HOME")); environment.put(ApplicationConstants.Environment.HADOOP_COMMON_HOME.name(), System.getenv("HADOOP_HOME")); environment.put(ApplicationConstants.Environment.HADOOP_HDFS_HOME.name(), System.getenv("HADOOP_HOME")); environment.put(ApplicationConstants.Environment.HADOOP_YARN_HOME.name(), System.getenv("HADOOP_HOME")); } if (System.getenv("TAJO_BASE_CLASSPATH") != null) { environment.put("TAJO_BASE_CLASSPATH", System.getenv("TAJO_BASE_CLASSPATH")); } environment.put(ApplicationConstants.Environment.CLASSPATH.name(), classPathEnv.toString()); } ctx.setEnvironment(environment); if (LOG.isDebugEnabled()) { LOG.debug("================================================="); for (Map.Entry<String, String> entry : environment.entrySet()) { LOG.debug(entry.getKey() + "=" + entry.getValue()); } LOG.debug("================================================="); } //////////////////////////////////////////////////////////////////////////// // Set the local resources //////////////////////////////////////////////////////////////////////////// Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); LOG.info("defaultFS: " + conf.get(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY)); try { FileSystem fs = FileSystem.get(conf); FileContext fsCtx = FileContext.getFileContext(conf); Path systemConfPath = TajoConf.getSystemConfPath(conf); if (!fs.exists(systemConfPath)) { LOG.error("system_conf.xml (" + systemConfPath.toString() + ") Not Found"); } LocalResource systemConfResource = createApplicationResource(fsCtx, systemConfPath, LocalResourceType.FILE); localResources.put(TajoConstants.SYSTEM_CONF_FILENAME, systemConfResource); ctx.setLocalResources(localResources); } catch (IOException e) { LOG.error(e.getMessage(), e); } Map<String, ByteBuffer> serviceData = new HashMap<String, ByteBuffer>(); try { serviceData.put(PullServerAuxService.PULLSERVER_SERVICEID, PullServerAuxService.serializeMetaData(0)); } catch (IOException ioe) { LOG.error(ioe); } ctx.setServiceData(serviceData); return ctx; }
From source file:org.apache.tajo.MiniTajoYarnCluster.java
License:Apache License
@Override public void init(Configuration conf) { conf.setSocketAddr(YarnConfiguration.RM_ADDRESS, new InetSocketAddress("127.0.0.1", 0)); conf.setSocketAddr(YarnConfiguration.RM_SCHEDULER_ADDRESS, new InetSocketAddress("127.0.0.1", 0)); conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME); if (conf.get(MRJobConfig.MR_AM_STAGING_DIR) == null) { conf.set(MRJobConfig.MR_AM_STAGING_DIR, new File(getTestWorkDir(), "apps_staging_dir/").getAbsolutePath()); }/*from w ww. j a va 2s . com*/ conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "000"); try { Path stagingPath = FileContext.getFileContext(conf) .makeQualified(new Path(conf.get(MRJobConfig.MR_AM_STAGING_DIR))); FileContext fc = FileContext.getFileContext(stagingPath.toUri(), conf); if (fc.util().exists(stagingPath)) { LOG.info(stagingPath + " exists! deleting..."); fc.delete(stagingPath, true); } LOG.info("mkdir: " + stagingPath); //mkdir the staging directory so that right permissions are set while running as proxy user fc.mkdir(stagingPath, null, true); //mkdir done directory as well String doneDir = JobHistoryUtils.getConfiguredHistoryServerDoneDirPrefix(conf); Path doneDirPath = fc.makeQualified(new Path(doneDir)); fc.mkdir(doneDirPath, null, true); } catch (IOException e) { throw new YarnRuntimeException("Could not create staging directory. ", e); } conf.set(MRConfig.MASTER_ADDRESS, "test"); // The default is local because of // which shuffle doesn't happen //configure the shuffle service in NM conf.setStrings(YarnConfiguration.NM_AUX_SERVICES, PullServerAuxService.PULLSERVER_SERVICEID); conf.setClass( String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, PullServerAuxService.PULLSERVER_SERVICEID), PullServerAuxService.class, Service.class); // Non-standard shuffle port conf.setInt(TajoConf.ConfVars.PULLSERVER_PORT.name(), 0); // local directory conf.set(TajoConf.ConfVars.WORKER_TEMPORAL_DIR.name(), "/tmp/tajo-localdir"); conf.setClass(YarnConfiguration.NM_CONTAINER_EXECUTOR, DefaultContainerExecutor.class, ContainerExecutor.class); // TestMRJobs is for testing non-uberized operation only; see TestUberAM // for corresponding uberized tests. conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false); conf.setInt("yarn.nodemanager.delete.debug-delay-sec", 600); super.init(conf); }