List of usage examples for org.apache.hadoop.fs Path toString
@Override
public String toString()
From source file:com.datatorrent.flume.storage.HDFSStorage.java
License:Open Source License
@Override public byte[] retrieve(byte[] identifier) { skipFile = -1;/*from w ww. ja v a 2s .c o m*/ skipOffset = 0; logger.debug("retrieve with address {}", Arrays.toString(identifier)); // flushing the last incomplete flushed file closeUnflushedFiles(); retrievalOffset = byteArrayToLong(identifier, 0); retrievalFile = byteArrayToLong(identifier, offset); if (retrievalFile == 0 && retrievalOffset == 0 && currentWrittenFile == 0 && fileWriteOffset == 0) { skipOffset = 0; return null; } // making sure that the deleted address is not requested again if (retrievalFile != 0 || retrievalOffset != 0) { long cleanedFile = byteArrayToLong(cleanedOffset, offset); if (retrievalFile < cleanedFile || (retrievalFile == cleanedFile && retrievalOffset < byteArrayToLong(cleanedOffset, 0))) { logger.warn( "The address asked has been deleted retrievalFile={}, cleanedFile={}, retrievalOffset={}, " + "cleanedOffset={}", retrievalFile, cleanedFile, retrievalOffset, byteArrayToLong(cleanedOffset, 0)); closeFs(); throw new IllegalArgumentException(String.format("The data for address %s has already been deleted", Arrays.toString(identifier))); } } // we have just started if (retrievalFile == 0 && retrievalOffset == 0) { retrievalFile = byteArrayToLong(cleanedOffset, offset); retrievalOffset = byteArrayToLong(cleanedOffset, 0); } if ((retrievalFile > flushedFileCounter)) { skipFile = retrievalFile; skipOffset = retrievalOffset; retrievalFile = -1; return null; } if ((retrievalFile == flushedFileCounter && retrievalOffset >= flushedFileWriteOffset)) { skipFile = retrievalFile; skipOffset = retrievalOffset - flushedFileWriteOffset; retrievalFile = -1; return null; } try { if (readStream != null) { readStream.close(); readStream = null; } Path path = new Path(basePath, String.valueOf(retrievalFile)); if (!fs.exists(path)) { retrievalFile = -1; closeFs(); throw new RuntimeException(String.format("File %s does not exist", path.toString())); } byte[] flushedOffset = readData(new Path(basePath, retrievalFile + OFFSET_SUFFIX)); flushedLong = Server.readLong(flushedOffset, 0); while (retrievalOffset >= flushedLong && retrievalFile < flushedFileCounter) { retrievalOffset -= flushedLong; retrievalFile++; flushedOffset = readData(new Path(basePath, retrievalFile + OFFSET_SUFFIX)); flushedLong = Server.readLong(flushedOffset, 0); } if (retrievalOffset >= flushedLong) { logger.warn("data not flushed for the given identifier"); retrievalFile = -1; return null; } synchronized (HDFSStorage.this) { if (nextReadStream != null) { nextReadStream.close(); nextReadStream = null; } } currentData = null; path = new Path(basePath, String.valueOf(retrievalFile)); //readStream = new FSDataInputStream(fs.open(path)); currentData = readData(path); //readStream.seek(retrievalOffset); storageExecutor.submit(getNextStream()); return retrieveHelper(); } catch (IOException e) { closeFs(); throw new RuntimeException(e); } }
From source file:com.datatorrent.lib.io.fs.AbstractFileOutputOperator.java
License:Open Source License
@Override public void setup(Context.OperatorContext context) { LOG.debug("setup initiated"); rollingFile = maxLength < Long.MAX_VALUE; //Getting required file system instance. try {// ww w . j av a 2s.c o m fs = getFSInstance(); } catch (IOException ex) { throw new RuntimeException(ex); } if (replication <= 0) { replication = fs.getDefaultReplication(new Path(filePath)); } LOG.debug("FS class {}", fs.getClass()); //When an entry is removed from the cache, removal listener is notified and it closes the output stream. RemovalListener<String, FSDataOutputStream> removalListener = new RemovalListener<String, FSDataOutputStream>() { @Override public void onRemoval(RemovalNotification<String, FSDataOutputStream> notification) { FSDataOutputStream value = notification.getValue(); if (value != null) { try { LOG.debug("closing {}", notification.getKey()); value.close(); } catch (IOException e) { throw new RuntimeException(e); } } } }; //Define cache CacheLoader<String, FSDataOutputStream> loader = new CacheLoader<String, FSDataOutputStream>() { @Override public FSDataOutputStream load(String filename) { String partFileName = getPartFileNamePri(filename); Path lfilepath = new Path(filePath + Path.SEPARATOR + partFileName); FSDataOutputStream fsOutput; boolean sawThisFileBefore = endOffsets.containsKey(filename); try { if (fs.exists(lfilepath)) { if (sawThisFileBefore) { FileStatus fileStatus = fs.getFileStatus(lfilepath); MutableLong endOffset = endOffsets.get(filename); if (endOffset != null) { endOffset.setValue(fileStatus.getLen()); } else { endOffsets.put(filename, new MutableLong(fileStatus.getLen())); } fsOutput = fs.append(lfilepath); LOG.debug("appending to {}", lfilepath); } //We never saw this file before and we don't want to append else { //If the file is rolling we need to delete all its parts. if (rollingFile) { int part = 0; while (true) { Path seenPartFilePath = new Path( filePath + Path.SEPARATOR + getPartFileName(filename, part)); if (!fs.exists(seenPartFilePath)) { break; } fs.delete(seenPartFilePath, true); part = part + 1; } fsOutput = fs.create(lfilepath, (short) replication); } //Not rolling is easy, just delete the file and create it again. else { fs.delete(lfilepath, true); fsOutput = fs.create(lfilepath, (short) replication); } } } else { fsOutput = fs.create(lfilepath, (short) replication); } //Get the end offset of the file. LOG.info("opened: {}", fs.getFileStatus(lfilepath).getPath()); return fsOutput; } catch (IOException e) { throw new RuntimeException(e); } } }; streamsCache = CacheBuilder.newBuilder().maximumSize(maxOpenFiles).removalListener(removalListener) .build(loader); try { LOG.debug("File system class: {}", fs.getClass()); LOG.debug("end-offsets {}", endOffsets); //Restore the files in case they were corrupted and the operator Path writerPath = new Path(filePath); if (fs.exists(writerPath)) { for (String seenFileName : endOffsets.keySet()) { String seenFileNamePart = getPartFileNamePri(seenFileName); LOG.debug("seenFileNamePart: {}", seenFileNamePart); Path seenPartFilePath = new Path(filePath + Path.SEPARATOR + seenFileNamePart); if (fs.exists(seenPartFilePath)) { LOG.debug("file exists {}", seenFileNamePart); long offset = endOffsets.get(seenFileName).longValue(); FSDataInputStream inputStream = fs.open(seenPartFilePath); FileStatus status = fs.getFileStatus(seenPartFilePath); if (status.getLen() != offset) { LOG.info("file corrupted {} {} {}", seenFileNamePart, offset, status.getLen()); byte[] buffer = new byte[COPY_BUFFER_SIZE]; Path tmpFilePath = new Path( filePath + Path.SEPARATOR + seenFileNamePart + TMP_EXTENSION); FSDataOutputStream fsOutput = fs.create(tmpFilePath, (short) replication); while (inputStream.getPos() < offset) { long remainingBytes = offset - inputStream.getPos(); int bytesToWrite = remainingBytes < COPY_BUFFER_SIZE ? (int) remainingBytes : COPY_BUFFER_SIZE; inputStream.read(buffer); fsOutput.write(buffer, 0, bytesToWrite); } flush(fsOutput); fsOutput.close(); inputStream.close(); FileContext fileContext = FileContext.getFileContext(fs.getUri()); LOG.debug("temp file path {}, rolling file path {}", tmpFilePath.toString(), status.getPath().toString()); fileContext.rename(tmpFilePath, status.getPath(), Options.Rename.OVERWRITE); } else { inputStream.close(); } } } } //delete the left over future rolling files produced from the previous crashed instance //of this operator. if (rollingFile) { for (String seenFileName : endOffsets.keySet()) { try { Integer part = openPart.get(seenFileName).getValue() + 1; while (true) { Path seenPartFilePath = new Path( filePath + Path.SEPARATOR + getPartFileName(seenFileName, part)); if (!fs.exists(seenPartFilePath)) { break; } fs.delete(seenPartFilePath, true); part = part + 1; } Path seenPartFilePath = new Path(filePath + Path.SEPARATOR + getPartFileName(seenFileName, openPart.get(seenFileName).intValue())); //Handle the case when restoring to a checkpoint where the current rolling file //already has a length greater than max length. if (fs.getFileStatus(seenPartFilePath).getLen() > maxLength) { LOG.debug("rotating file at setup."); rotate(seenFileName); } } catch (IOException e) { throw new RuntimeException(e); } catch (ExecutionException e) { throw new RuntimeException(e); } } } LOG.debug("setup completed"); LOG.debug("end-offsets {}", endOffsets); } catch (IOException e) { throw new RuntimeException(e); } this.context = context; lastTimeStamp = System.currentTimeMillis(); fileCounters.setCounter(Counters.TOTAL_BYTES_WRITTEN, new MutableLong()); fileCounters.setCounter(Counters.TOTAL_TIME_ELAPSED, new MutableLong()); }
From source file:com.datatorrent.lib.io.fs.AbstractFSDirectoryInputOperator.java
License:Open Source License
@Override public void emitTuples() { //emit will be true if the operator is not idempotent. If the operator is //idempotent then emit will be true the first time emitTuples is called //within a window and false the other times emit tuples is called within a //window// w w w . j a v a2s . c om if (emit) { if (inputStream == null) { try { if (!unfinishedFiles.isEmpty()) { retryFailedFile(unfinishedFiles.poll()); } else if (!pendingFiles.isEmpty()) { String newPathString = pendingFiles.iterator().next(); pendingFiles.remove(newPathString); this.inputStream = openFile(new Path(newPathString)); } else if (!failedFiles.isEmpty()) { retryFailedFile(failedFiles.poll()); } else { if (System.currentTimeMillis() - scanIntervalMillis >= lastScanMillis) { Set<Path> newPaths = scanner.scan(fs, filePath, processedFiles); for (Path newPath : newPaths) { String newPathString = newPath.toString(); pendingFiles.add(newPathString); processedFiles.add(newPathString); } lastScanMillis = System.currentTimeMillis(); } } } catch (IOException ex) { if (maxRetryCount <= 0) { throw new RuntimeException(ex); } LOG.error("FS reader error", ex); addToFailedList(); } } if (inputStream != null) { try { int counterForTuple = 0; while (counterForTuple++ < emitBatchSize) { T line = readEntity(); if (line == null) { LOG.info("done reading file ({} entries).", offset); closeFile(inputStream); break; } // If skipCount is non zero, then failed file recovery is going on, skipCount is // used to prevent already emitted records from being emitted again during recovery. // When failed file is open, skipCount is set to the last read offset for that file. // if (skipCount == 0) { offset++; emit(line); } else skipCount--; } } catch (IOException e) { if (maxRetryCount <= 0) { throw new RuntimeException(e); } LOG.error("FS reader error", e); addToFailedList(); } } //If the operator is idempotent, do nothing on other calls to emittuples //within the same window if (idempotentEmit) { emit = false; } } }
From source file:com.datatorrent.lib.io.fs.AbstractFSDirectoryInputOperator.java
License:Open Source License
protected InputStream openFile(Path path) throws IOException { LOG.info("opening file {}", path); InputStream input = fs.open(path); currentFile = path.toString(); offset = 0;//from w w w . j av a 2 s . c om retryCount = 0; skipCount = 0; return input; }
From source file:com.datatorrent.lib.io.fs.AbstractFSWriter.java
License:Open Source License
@Override public void setup(Context.OperatorContext context) { rollingFile = maxLength < Long.MAX_VALUE; //Getting required file system instance. try {//from w w w. ja v a 2s . c o m fs = getFSInstance(); } catch (IOException ex) { throw new RuntimeException(ex); } LOG.debug("FS class {}", fs.getClass()); //Setting listener for debugging LOG.debug("setup initiated"); RemovalListener<String, FSDataOutputStream> removalListener = new RemovalListener<String, FSDataOutputStream>() { @Override public void onRemoval(RemovalNotification<String, FSDataOutputStream> notification) { FSDataOutputStream value = notification.getValue(); if (value != null) { try { LOG.debug("closing {}", notification.getKey()); value.close(); } catch (IOException e) { throw new RuntimeException(e); } } } }; //Define cache CacheLoader<String, FSDataOutputStream> loader = new CacheLoader<String, FSDataOutputStream>() { @Override public FSDataOutputStream load(String filename) { String partFileName = getPartFileNamePri(filename); Path lfilepath = new Path(filePath + File.separator + partFileName); FSDataOutputStream fsOutput; if (replication <= 0) { replication = fs.getDefaultReplication(lfilepath); } boolean sawThisFileBefore = endOffsets.containsKey(filename); try { if (fs.exists(lfilepath)) { if (sawThisFileBefore || append) { FileStatus fileStatus = fs.getFileStatus(lfilepath); MutableLong endOffset = endOffsets.get(filename); if (endOffset != null) { endOffset.setValue(fileStatus.getLen()); } else { endOffsets.put(filename, new MutableLong(fileStatus.getLen())); } fsOutput = fs.append(lfilepath); LOG.debug("appending to {}", lfilepath); } //We never saw this file before and we don't want to append else { //If the file is rolling we need to delete all its parts. if (rollingFile) { int part = 0; while (true) { Path seenPartFilePath = new Path( filePath + "/" + getPartFileName(filename, part)); if (!fs.exists(seenPartFilePath)) { break; } fs.delete(seenPartFilePath, true); part = part + 1; } fsOutput = fs.create(lfilepath, (short) replication); } //Not rolling is easy, just delete the file and create it again. else { fs.delete(lfilepath, true); fsOutput = fs.create(lfilepath, (short) replication); } } } else { fsOutput = fs.create(lfilepath, (short) replication); } //Get the end offset of the file. LOG.debug("full path: {}", fs.getFileStatus(lfilepath).getPath()); return fsOutput; } catch (IOException e) { throw new RuntimeException(e); } } }; streamsCache = CacheBuilder.newBuilder().maximumSize(maxOpenFiles).removalListener(removalListener) .build(loader); try { LOG.debug("File system class: {}", fs.getClass()); LOG.debug("end-offsets {}", endOffsets); //Restore the files in case they were corrupted and the operator Path writerPath = new Path(filePath); if (fs.exists(writerPath)) { for (String seenFileName : endOffsets.keySet()) { String seenFileNamePart = getPartFileNamePri(seenFileName); LOG.debug("seenFileNamePart: {}", seenFileNamePart); Path seenPartFilePath = new Path(filePath + "/" + seenFileNamePart); if (fs.exists(seenPartFilePath)) { LOG.debug("file exists {}", seenFileNamePart); long offset = endOffsets.get(seenFileName).longValue(); FSDataInputStream inputStream = fs.open(seenPartFilePath); FileStatus status = fs.getFileStatus(seenPartFilePath); if (status.getLen() != offset) { LOG.info("file corrupted {} {} {}", seenFileNamePart, offset, status.getLen()); byte[] buffer = new byte[COPY_BUFFER_SIZE]; String tmpFileName = seenFileNamePart + TMP_EXTENSION; FSDataOutputStream fsOutput = streamsCache.get(tmpFileName); while (inputStream.getPos() < offset) { long remainingBytes = offset - inputStream.getPos(); int bytesToWrite = remainingBytes < COPY_BUFFER_SIZE ? (int) remainingBytes : COPY_BUFFER_SIZE; inputStream.read(buffer); fsOutput.write(buffer, 0, bytesToWrite); } flush(fsOutput); FileContext fileContext = FileContext.getFileContext(fs.getUri()); String tempTmpFilePath = getPartFileNamePri(filePath + File.separator + tmpFileName); Path tmpFilePath = new Path(tempTmpFilePath); tmpFilePath = fs.getFileStatus(tmpFilePath).getPath(); LOG.debug("temp file path {}, rolling file path {}", tmpFilePath.toString(), status.getPath().toString()); fileContext.rename(tmpFilePath, status.getPath(), Options.Rename.OVERWRITE); } } } } //delete the left over future rolling files produced from the previous crashed instance //of this operator. if (rollingFile) { for (String seenFileName : endOffsets.keySet()) { try { Integer part = openPart.get(seenFileName).getValue() + 1; while (true) { Path seenPartFilePath = new Path(filePath + "/" + getPartFileName(seenFileName, part)); if (!fs.exists(seenPartFilePath)) { break; } fs.delete(seenPartFilePath, true); part = part + 1; } Path seenPartFilePath = new Path(filePath + "/" + getPartFileName(seenFileName, openPart.get(seenFileName).intValue())); //Handle the case when restoring to a checkpoint where the current rolling file //already has a length greater than max length. if (fs.getFileStatus(seenPartFilePath).getLen() > maxLength) { LOG.debug("rotating file at setup."); rotate(seenFileName); } } catch (IOException e) { throw new RuntimeException(e); } catch (ExecutionException e) { throw new RuntimeException(e); } } } LOG.debug("setup completed"); LOG.debug("end-offsets {}", endOffsets); } catch (IOException e) { throw new RuntimeException(e); } catch (ExecutionException e) { throw new RuntimeException(e); } this.context = context; lastTimeStamp = System.currentTimeMillis(); fileCounters.setCounter(Counters.TOTAL_BYTES_WRITTEN, new MutableLong()); fileCounters.setCounter(Counters.TOTAL_TIME_ELAPSED, new MutableLong()); }
From source file:com.datatorrent.lib.io.fs.AbstractThroughputHashFSDirectoryInputOperator.java
License:Open Source License
@Override public void emitTuples() { if (System.currentTimeMillis() - scanIntervalMillis >= lastScanMillis) { Set<Path> newPaths = scanner.scan(fs, filePath, processedFiles); for (Path newPath : newPaths) { String newPathString = newPath.toString(); pendingFiles.add(newPathString); processedFiles.add(newPathString); }//w w w. j av a2 s.c om lastScanMillis = System.currentTimeMillis(); } super.emitTuples(); }
From source file:com.datatorrent.stram.client.StramAppLauncher.java
License:Apache License
/** * Submit application to the cluster and return the app id. * Sets the context class loader for application dependencies. * * @param appConfig/*from w w w .j ava 2 s. c o m*/ * @return ApplicationId * @throws Exception */ public ApplicationId launchApp(AppFactory appConfig) throws Exception { loadDependencies(); Configuration conf = propertiesBuilder.conf; conf.setEnum(StreamingApplication.ENVIRONMENT, StreamingApplication.Environment.CLUSTER); LogicalPlan dag = appConfig.createApp(propertiesBuilder); String hdfsTokenMaxLifeTime = conf.get(StramClientUtils.HDFS_TOKEN_MAX_LIFE_TIME); if (hdfsTokenMaxLifeTime != null && hdfsTokenMaxLifeTime.trim().length() > 0) { dag.setAttribute(LogicalPlan.HDFS_TOKEN_LIFE_TIME, Long.parseLong(hdfsTokenMaxLifeTime)); } String rmTokenMaxLifeTime = conf.get(StramClientUtils.RM_TOKEN_MAX_LIFE_TIME); if (rmTokenMaxLifeTime != null && rmTokenMaxLifeTime.trim().length() > 0) { dag.setAttribute(LogicalPlan.RM_TOKEN_LIFE_TIME, Long.parseLong(rmTokenMaxLifeTime)); } if (conf.get(StramClientUtils.KEY_TAB_FILE) != null) { dag.setAttribute(LogicalPlan.KEY_TAB_FILE, conf.get(StramClientUtils.KEY_TAB_FILE)); } else if (conf.get(StramUserLogin.DT_AUTH_KEYTAB) != null) { Path localKeyTabPath = new Path(conf.get(StramUserLogin.DT_AUTH_KEYTAB)); FileSystem fs = StramClientUtils.newFileSystemInstance(conf); try { Path destPath = new Path(StramClientUtils.getDTDFSRootDir(fs, conf), localKeyTabPath.getName()); if (!fs.exists(destPath)) { fs.copyFromLocalFile(false, false, localKeyTabPath, destPath); } dag.setAttribute(LogicalPlan.KEY_TAB_FILE, destPath.toString()); } finally { fs.close(); } } String tokenRefreshFactor = conf.get(StramClientUtils.TOKEN_ANTICIPATORY_REFRESH_FACTOR); if (tokenRefreshFactor != null && tokenRefreshFactor.trim().length() > 0) { dag.setAttribute(LogicalPlan.TOKEN_REFRESH_ANTICIPATORY_FACTOR, Double.parseDouble(tokenRefreshFactor)); } StramClient client = new StramClient(conf, dag); try { client.start(); LinkedHashSet<String> libjars = Sets.newLinkedHashSet(); String libjarsCsv = conf.get(LIBJARS_CONF_KEY_NAME); if (libjarsCsv != null) { String[] jars = StringUtils.splitByWholeSeparator(libjarsCsv, StramClient.LIB_JARS_SEP); libjars.addAll(Arrays.asList(jars)); } if (deployJars != null) { for (File deployJar : deployJars) { libjars.add(deployJar.getAbsolutePath()); } } client.setResources(libjars); client.setFiles(conf.get(FILES_CONF_KEY_NAME)); client.setArchives(conf.get(ARCHIVES_CONF_KEY_NAME)); client.setOriginalAppId(conf.get(ORIGINAL_APP_ID)); client.setQueueName(conf.get(QUEUE_NAME)); client.startApplication(); return client.getApplicationReport().getApplicationId(); } finally { client.stop(); } }
From source file:com.datatorrent.stram.StramClient.java
License:Apache License
private String copyFromLocal(FileSystem fs, Path basePath, String[] files) throws IOException { StringBuilder csv = new StringBuilder(files.length * (basePath.toString().length() + 16)); for (String localFile : files) { Path src = new Path(localFile); String filename = src.getName(); Path dst = new Path(basePath, filename); URI localFileURI = null;// w ww .ja v a 2s.c o m try { localFileURI = new URI(localFile); } catch (URISyntaxException e) { throw new IOException(e); } if (localFileURI.getScheme() == null || localFileURI.getScheme().startsWith("file")) { LOG.info("Copy {} from local filesystem to {}", localFile, dst); fs.copyFromLocalFile(false, true, src, dst); } else { LOG.info("Copy {} from DFS to {}", localFile, dst); FileUtil.copy(fs, src, fs, dst, false, true, conf); } if (csv.length() > 0) { csv.append(LIB_JARS_SEP); } csv.append(dst.toString()); } return csv.toString(); }
From source file:com.datatorrent.stram.StramClient.java
License:Apache License
public void copyInitialState(Path origAppDir) throws IOException { // locate previous snapshot String newAppDir = this.dag.assertAppPath(); FSRecoveryHandler recoveryHandler = new FSRecoveryHandler(origAppDir.toString(), conf); // read snapshot against new dependencies Object snapshot = recoveryHandler.restore(); if (snapshot == null) { throw new IllegalArgumentException("No previous application state found in " + origAppDir); }//www . j a va2 s .co m InputStream logIs = recoveryHandler.getLog(); // modify snapshot state to switch app id ((StreamingContainerManager.CheckpointState) snapshot).setApplicationId(this.dag, conf); Path checkpointPath = new Path(newAppDir, LogicalPlan.SUBDIR_CHECKPOINTS); FileSystem fs = FileSystem.newInstance(origAppDir.toUri(), conf); // remove the path that was created by the storage agent during deserialization and replacement fs.delete(checkpointPath, true); // write snapshot to new location recoveryHandler = new FSRecoveryHandler(newAppDir, conf); recoveryHandler.save(snapshot); OutputStream logOs = recoveryHandler.rotateLog(); IOUtils.copy(logIs, logOs); logOs.flush(); logOs.close(); logIs.close(); // copy sub directories that are not present in target FileStatus[] lFiles = fs.listStatus(origAppDir); for (FileStatus f : lFiles) { if (f.isDirectory()) { String targetPath = f.getPath().toString().replace(origAppDir.toString(), newAppDir); if (!fs.exists(new Path(targetPath))) { LOG.debug("Copying {} to {}", f.getPath(), targetPath); FileUtil.copy(fs, f.getPath(), fs, new Path(targetPath), false, conf); //FSUtil.copy(fs, f, fs, new Path(targetPath), false, false, conf); } else { LOG.debug("Ignoring {} as it already exists under {}", f.getPath(), targetPath); //FSUtil.setPermission(fs, new Path(targetPath), new FsPermission((short)0777)); } } } }
From source file:com.datatorrent.stram.StramClient.java
License:Apache License
/** * Launch application for the dag represented by this client. * * @throws YarnException//from w w w .j av a 2 s . c o m * @throws IOException */ public void startApplication() throws YarnException, IOException { Class<?>[] defaultClasses; if (applicationType.equals(YARN_APPLICATION_TYPE)) { //TODO restrict the security check to only check if security is enabled for webservices. if (UserGroupInformation.isSecurityEnabled()) { defaultClasses = DATATORRENT_SECURITY_CLASSES; } else { defaultClasses = DATATORRENT_CLASSES; } } else { throw new IllegalStateException(applicationType + " is not a valid application type."); } LinkedHashSet<String> localJarFiles = findJars(dag, defaultClasses); if (resources != null) { localJarFiles.addAll(resources); } YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); //GetClusterNodesRequest clusterNodesReq = Records.newRecord(GetClusterNodesRequest.class); //GetClusterNodesResponse clusterNodesResp = rmClient.clientRM.getClusterNodes(clusterNodesReq); //LOG.info("Got Cluster node info from ASM"); //for (NodeReport node : clusterNodesResp.getNodeReports()) { // LOG.info("Got node report from ASM for" // + ", nodeId=" + node.getNodeId() // + ", nodeAddress" + node.getHttpAddress() // + ", nodeRackName" + node.getRackName() // + ", nodeNumContainers" + node.getNumContainers() // + ", nodeHealthStatus" + node.getHealthReport()); //} List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } // Get a new application id YarnClientApplication newApp = yarnClient.createApplication(); appId = newApp.getNewApplicationResponse().getApplicationId(); // Dump out information about cluster capability as seen by the resource manager int maxMem = newApp.getNewApplicationResponse().getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); int amMemory = dag.getMasterMemoryMB(); if (amMemory > maxMem) { LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } if (dag.getAttributes().get(LogicalPlan.APPLICATION_ID) == null) { dag.setAttribute(LogicalPlan.APPLICATION_ID, appId.toString()); } // Create launch context for app master LOG.info("Setting up application submission context for ASM"); ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class); // set the application id appContext.setApplicationId(appId); // set the application name appContext.setApplicationName(dag.getValue(LogicalPlan.APPLICATION_NAME)); appContext.setApplicationType(this.applicationType); if (YARN_APPLICATION_TYPE.equals(this.applicationType)) { //appContext.setMaxAppAttempts(1); // no retries until Stram is HA } // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); // Setup security tokens // If security is enabled get ResourceManager and NameNode delegation tokens. // Set these tokens on the container so that they are sent as part of application submission. // This also sets them up for renewal by ResourceManager. The NameNode delegation rmToken // is also used by ResourceManager to fetch the jars from HDFS and set them up for the // application master launch. if (UserGroupInformation.isSecurityEnabled()) { Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. FileSystem fs = StramClientUtils.newFileSystemInstance(conf); try { final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } } finally { fs.close(); } addRMDelegationToken(tokenRenewer, credentials); DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); // copy required jar files to dfs, to be localized for containers FileSystem fs = StramClientUtils.newFileSystemInstance(conf); try { Path appsBasePath = new Path(StramClientUtils.getDTDFSRootDir(fs, conf), StramClientUtils.SUBDIR_APPS); Path appPath = new Path(appsBasePath, appId.toString()); String libJarsCsv = copyFromLocal(fs, appPath, localJarFiles.toArray(new String[] {})); LOG.info("libjars: {}", libJarsCsv); dag.getAttributes().put(LogicalPlan.LIBRARY_JARS, libJarsCsv); LaunchContainerRunnable.addFilesToLocalResources(LocalResourceType.FILE, libJarsCsv, localResources, fs); if (archives != null) { String[] localFiles = archives.split(","); String archivesCsv = copyFromLocal(fs, appPath, localFiles); LOG.info("archives: {}", archivesCsv); dag.getAttributes().put(LogicalPlan.ARCHIVES, archivesCsv); LaunchContainerRunnable.addFilesToLocalResources(LocalResourceType.ARCHIVE, archivesCsv, localResources, fs); } if (files != null) { String[] localFiles = files.split(","); String filesCsv = copyFromLocal(fs, appPath, localFiles); LOG.info("files: {}", filesCsv); dag.getAttributes().put(LogicalPlan.FILES, filesCsv); LaunchContainerRunnable.addFilesToLocalResources(LocalResourceType.FILE, filesCsv, localResources, fs); } dag.getAttributes().put(LogicalPlan.APPLICATION_PATH, appPath.toString()); if (dag.getAttributes() .get(OperatorContext.STORAGE_AGENT) == null) { /* which would be the most likely case */ Path checkpointPath = new Path(appPath, LogicalPlan.SUBDIR_CHECKPOINTS); // use conf client side to pickup any proxy settings from dt-site.xml dag.setAttribute(OperatorContext.STORAGE_AGENT, new FSStorageAgent(checkpointPath.toString(), conf)); } if (dag.getAttributes().get(LogicalPlan.CONTAINER_OPTS_CONFIGURATOR) == null) { dag.setAttribute(LogicalPlan.CONTAINER_OPTS_CONFIGURATOR, new BasicContainerOptConfigurator()); } // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { Path log4jSrc = new Path(log4jPropFile); Path log4jDst = new Path(appPath, "log4j.props"); fs.copyFromLocalFile(false, true, log4jSrc, log4jDst); FileStatus log4jFileStatus = fs.getFileStatus(log4jDst); LocalResource log4jRsrc = Records.newRecord(LocalResource.class); log4jRsrc.setType(LocalResourceType.FILE); log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION); log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri())); log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime()); log4jRsrc.setSize(log4jFileStatus.getLen()); localResources.put("log4j.properties", log4jRsrc); } if (originalAppId != null) { Path origAppPath = new Path(appsBasePath, this.originalAppId); LOG.info("Restart from {}", origAppPath); copyInitialState(origAppPath); } // push logical plan to DFS location Path cfgDst = new Path(appPath, LogicalPlan.SER_FILE_NAME); FSDataOutputStream outStream = fs.create(cfgDst, true); LogicalPlan.write(this.dag, outStream); outStream.close(); Path launchConfigDst = new Path(appPath, LogicalPlan.LAUNCH_CONFIG_FILE_NAME); outStream = fs.create(launchConfigDst, true); conf.writeXml(outStream); outStream.close(); FileStatus topologyFileStatus = fs.getFileStatus(cfgDst); LocalResource topologyRsrc = Records.newRecord(LocalResource.class); topologyRsrc.setType(LocalResourceType.FILE); topologyRsrc.setVisibility(LocalResourceVisibility.APPLICATION); topologyRsrc.setResource(ConverterUtils.getYarnUrlFromURI(cfgDst.toUri())); topologyRsrc.setTimestamp(topologyFileStatus.getModificationTime()); topologyRsrc.setSize(topologyFileStatus.getLen()); localResources.put(LogicalPlan.SER_FILE_NAME, topologyRsrc); // Set local resource info into app master container launch context amContainer.setLocalResources(localResources); // Set the necessary security tokens as needed //amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); // Add application jar(s) location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar(s) // including ${CLASSPATH} will duplicate the class path in app master, removing it for now //StringBuilder classPathEnv = new StringBuilder("${CLASSPATH}:./*"); StringBuilder classPathEnv = new StringBuilder("./*"); String classpath = conf.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH); for (String c : StringUtils.isBlank(classpath) ? YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH : classpath.split(",")) { if (c.equals("$HADOOP_CLIENT_CONF_DIR")) { // SPOI-2501 continue; } classPathEnv.append(':'); classPathEnv.append(c.trim()); } env.put("CLASSPATH", classPathEnv.toString()); // propagate to replace node managers user name (effective in non-secure mode) env.put("HADOOP_USER_NAME", UserGroupInformation.getLoginUser().getUserName()); amContainer.setEnvironment(env); // Set the necessary command to execute the application master ArrayList<CharSequence> vargs = new ArrayList<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(javaCmd); if (dag.isDebug()) { vargs.add("-agentlib:jdwp=transport=dt_socket,server=y,suspend=n"); } // Set Xmx based on am memory size // default heap size 75% of total memory if (dag.getMasterJVMOptions() != null) { vargs.add(dag.getMasterJVMOptions()); } vargs.add("-Xmx" + (amMemory * 3 / 4) + "m"); vargs.add("-XX:+HeapDumpOnOutOfMemoryError"); vargs.add("-XX:HeapDumpPath=/tmp/dt-heap-" + appId.getId() + ".bin"); vargs.add("-Dhadoop.root.logger=" + (dag.isDebug() ? "DEBUG" : "INFO") + ",RFA"); vargs.add("-Dhadoop.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR); vargs.add(String.format("-D%s=%s", StreamingContainer.PROP_APP_PATH, dag.assertAppPath())); if (dag.isDebug()) { vargs.add("-Dlog4j.debug=true"); } String loggersLevel = conf.get(DTLoggerFactory.DT_LOGGERS_LEVEL); if (loggersLevel != null) { vargs.add(String.format("-D%s=%s", DTLoggerFactory.DT_LOGGERS_LEVEL, loggersLevel)); } vargs.add(StreamingAppMaster.class.getName()); vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); // Get final command StringBuilder command = new StringBuilder(9 * vargs.size()); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); amContainer.setCommands(commands); // Set up resource type requirements // For now, only memory is supported so we set memory requirements Resource capability = Records.newRecord(Resource.class); capability.setMemory(amMemory); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); appContext.setAMContainerSpec(amContainer); // Set the priority for the application master Priority pri = Records.newRecord(Priority.class); pri.setPriority(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(queueName); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = rmClient.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on success // or an exception thrown to denote some form of a failure String specStr = Objects.toStringHelper("Submitting application: ") .add("name", appContext.getApplicationName()).add("queue", appContext.getQueue()) .add("user", UserGroupInformation.getLoginUser()).add("resource", appContext.getResource()) .toString(); LOG.info(specStr); if (dag.isDebug()) { //LOG.info("Full submission context: " + appContext); } yarnClient.submitApplication(appContext); } finally { fs.close(); } }