List of usage examples for org.apache.hadoop.io DataOutputBuffer DataOutputBuffer
public DataOutputBuffer()
From source file:com.flyhz.avengers.framework.AvengersAppMaster.java
License:Apache License
/** * Main run function for the application master * // www .ja va 2 s .co m * @throws YarnException * @throws IOException */ public boolean run() throws IOException, YarnException { LOG.info("Starting AvengersAppMaster"); Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials(); DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); // Now remove the AM->RM token so that containers cannot access it. Iterator<Token<?>> iter = credentials.getAllTokens().iterator(); while (iter.hasNext()) { Token<?> token = iter.next(); if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) { iter.remove(); } } allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); AMRMClientAsync.CallbackHandler allocListener = new RMCallbackHandler(); amRMClient = AMRMClientAsync.createAMRMClientAsync(1000, allocListener); amRMClient.init(conf); amRMClient.start(); containerListener = createNMCallbackHandler(); nmClientAsync = new NMClientAsyncImpl(containerListener); nmClientAsync.init(conf); nmClientAsync.start(); // Setup local RPC Server to accept status requests directly from // clients // the RPC server // send requests to this app master // Register self with ResourceManager // This will first hearInitializing Clienttbeating to the RM appMasterHostname = NetUtils.getHostname(); RegisterApplicationMasterResponse response = amRMClient.registerApplicationMaster(appMasterHostname, appMasterRpcPort, appMasterTrackingUrl); // Dump out information about cluster capability as seen by the // resource manager int maxMem = response.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); // A resource ask cannot exceed the max. if (containerMemory > maxMem) { LOG.info("Container memory specified above max threshold of cluster." + " Using max value." + ", specified=" + containerMemory + ", max=" + maxMem); containerMemory = maxMem; } initJar(); crawl(); fetch(); finish(); return success; }
From source file:com.flyhz.avengers.framework.AvengersClient.java
License:Apache License
/** * Main run function for the client/*from w w w . j a v a2 s . co m*/ * * @return true if application completed successfully * @throws IOException * @throws YarnException */ private boolean run(String appName, List<String> commands) throws IOException, YarnException { LOG.info("Running Client"); yarnClient.start(); YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); } QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue); LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } // Get a new application id YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); // if needed // If we do not have min/max, we may not be able to correctly request // the required resources from the RM for the app master // Memory ask has to be a multiple of min and less than max. // Dump out information about cluster capability as seen by the resource // manager int maxMem = appResponse.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); // A resource ask cannot exceed the max. if (amMemory > maxMem) { LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } // set the application name ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); appContext.setApplicationName(appName); // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of // the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); FileSystem fs = DistributedFileSystem.get(conf); Path src = new Path(appJar); Path dst = new Path(fs.getHomeDirectory(), "avengers/" + batchId + "/avengers.jar"); if (copy) { LOG.info("copy local jar to hdfs"); fs.copyFromLocalFile(false, true, src, dst); copy = false; } this.hdfsPath = dst.toUri().toString(); LOG.info("hdfs hdfsPath = {}", dst); FileStatus destStatus = fs.getFileStatus(dst); LocalResource amJarRsrc = Records.newRecord(LocalResource.class); amJarRsrc.setType(LocalResourceType.FILE); amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION); LOG.info("YarnURLFromPath ->{}", ConverterUtils.getYarnUrlFromPath(dst)); amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst)); amJarRsrc.setTimestamp(destStatus.getModificationTime()); amJarRsrc.setSize(destStatus.getLen()); localResources.put("avengers.jar", amJarRsrc); // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { Path log4jSrc = new Path(log4jPropFile); Path log4jDst = new Path(fs.getHomeDirectory(), "log4j.props"); fs.copyFromLocalFile(false, true, log4jSrc, log4jDst); FileStatus log4jFileStatus = fs.getFileStatus(log4jDst); LocalResource log4jRsrc = Records.newRecord(LocalResource.class); log4jRsrc.setType(LocalResourceType.FILE); log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION); log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri())); log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime()); log4jRsrc.setSize(log4jFileStatus.getLen()); localResources.put("log4j.properties", log4jRsrc); } // The shell script has to be made available on the final container(s) // where it will be executed. // To do this, we need to first copy into the filesystem that is visible // to the yarn framework. // We do not need to set this as a local resource for the application // master as the application master does not need it. // Set local resource info into app master container launch context amContainer.setLocalResources(localResources); // Set the necessary security tokens as needed // amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application // master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$()).append(File.pathSeparatorChar); for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) { classPathEnv.append(File.pathSeparatorChar); classPathEnv.append(c.trim()); } classPathEnv.append(File.pathSeparatorChar).append("./log4j.properties"); // add the runtime classpath needed for tests to work if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(':'); classPathEnv.append(System.getProperty("java.class.path")); } LOG.info("CLASSPATH -> " + classPathEnv); env.put("CLASSPATH", classPathEnv.toString()); amContainer.setEnvironment(env); for (String cmd : commands) { LOG.info("run command {},appId {}", cmd, appId.getId()); } amContainer.setCommands(commands); // Set up resource type requirements // For now, only memory is supported so we set memory requirements Resource capability = Records.newRecord(Resource.class); capability.setMemory(amMemory); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setAMContainerSpec(amContainer); // Set the priority for the application master Priority pri = Records.newRecord(Priority.class); pri.setPriority(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(amQueue); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = // applicationsManager.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on // success // or an exception thrown to denote some form of a failure LOG.info("Submitting application to ASM"); yarnClient.submitApplication(appContext); // Try submitting the same request again // app submission failure? // Monitor the application return monitorApplication(appId); }
From source file:com.github.hdl.tensorflow.yarn.app.ApplicationMaster.java
License:Apache License
/** * Main run function for the application master * * @throws YarnException/* ww w .ja v a 2 s. co m*/ * @throws IOException */ @SuppressWarnings({ "unchecked" }) public void run() throws YarnException, IOException, InterruptedException { LOG.info("Starting ApplicationMaster"); // Note: Credentials, Token, UserGroupInformation, DataOutputBuffer class // are marked as LimitedPrivate Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials(); DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); // Now remove the AM->RM token so that containers cannot access it. Iterator<Token<?>> iter = credentials.getAllTokens().iterator(); LOG.info("Executing with tokens:"); while (iter.hasNext()) { Token<?> token = iter.next(); LOG.info(token); if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) { iter.remove(); } } allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); // Create appSubmitterUgi and add original tokens to it String appSubmitterUserName = System.getenv(ApplicationConstants.Environment.USER.name()); appSubmitterUgi = UserGroupInformation.createRemoteUser(appSubmitterUserName); appSubmitterUgi.addCredentials(credentials); AMRMClientAsync.AbstractCallbackHandler allocListener = new RMCallbackHandler(); amRMClient = AMRMClientAsync.createAMRMClientAsync(1000, allocListener); amRMClient.init(conf); amRMClient.start(); containerListener = createNMCallbackHandler(); nmClientAsync = new NMClientAsyncImpl(containerListener); nmClientAsync.init(conf); nmClientAsync.start(); appMasterHostname = System.getenv(Environment.NM_HOST.name()); TFApplicationRpcServer rpcServer = new TFApplicationRpcServer(appMasterHostname, new RpcForClient()); appMasterRpcPort = rpcServer.getRpcPort(); rpcServer.startRpcServiceThread(); // Register self with ResourceManager // This will start heartbeating to the RM RegisterApplicationMasterResponse response = amRMClient.registerApplicationMaster(appMasterHostname, appMasterRpcPort, appMasterTrackingUrl); // Dump out information about cluster capability as seen by the // resource manager long maxMem = response.getMaximumResourceCapability().getMemorySize(); LOG.info("Max mem capability of resources in this cluster " + maxMem); int maxVCores = response.getMaximumResourceCapability().getVirtualCores(); LOG.info("Max vcores capability of resources in this cluster " + maxVCores); // A resource ask cannot exceed the max. if (containerMemory > maxMem) { LOG.info("Container memory specified above max threshold of cluster." + " Using max value." + ", specified=" + containerMemory + ", max=" + maxMem); containerMemory = maxMem; } if (containerVirtualCores > maxVCores) { LOG.info("Container virtual cores specified above max threshold of cluster." + " Using max value." + ", specified=" + containerVirtualCores + ", max=" + maxVCores); containerVirtualCores = maxVCores; } List<Container> previousAMRunningContainers = response.getContainersFromPreviousAttempts(); LOG.info(appAttemptID + " received " + previousAMRunningContainers.size() + " previous attempts' running containers on AM registration."); for (Container container : previousAMRunningContainers) { launchedContainers.add(container.getId()); } numAllocatedContainers.addAndGet(previousAMRunningContainers.size()); int numTotalContainersToRequest = numTotalContainers - previousAMRunningContainers.size(); // Setup ask for containers from RM // Send request for containers to RM // Until we get our fully allocated quota, we keep on polling RM for // containers // Keep looping until all the containers are launched and shell script // executed on them ( regardless of success/failure). for (int i = 0; i < numTotalContainersToRequest; ++i) { ContainerRequest containerAsk = setupContainerAskForRM(); amRMClient.addContainerRequest(containerAsk); } numRequestedContainers.set(numTotalContainers); }
From source file:com.github.hdl.tensorflow.yarn.app.Client.java
License:Apache License
/** * Main run function for the client/* ww w . ja va2 s.c o m*/ * @return true if application completed successfully * @throws IOException * @throws YarnException */ public boolean run() throws IOException, YarnException { yarnClient.start(); YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress=" + node.getHttpAddress() + ", nodeRackName=" + node.getRackName() + ", nodeNumContainers=" + node.getNumContainers()); } QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue); LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } // Get a new application id YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); // TODO get min/max resource capabilities from RM and change memory ask if needed long maxMem = appResponse.getMaximumResourceCapability().getMemorySize(); LOG.info("Max mem capability of resources in this cluster " + maxMem); if (amMemory > maxMem) { LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores(); LOG.info("Max virtual cores capability of resources in this cluster " + maxVCores); if (amVCores > maxVCores) { LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value." + ", specified=" + amVCores + ", max=" + maxVCores); amVCores = maxVCores; } ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); appContext.setApplicationName(appName); if (attemptFailuresValidityInterval >= 0) { appContext.setAttemptFailuresValidityInterval(attemptFailuresValidityInterval); } Set<String> tags = new HashSet<String>(); appContext.setApplicationTags(tags); Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); TFAmContainer tfAmContainer = new TFAmContainer(this); // Copy the application jar to the filesystem FileSystem fs = FileSystem.get(conf); String dstJarPath = copyLocalFileToDfs(fs, appId.toString(), appMasterJar, TFContainer.SERVER_JAR_PATH); tfAmContainer.addToLocalResources(fs, new Path(dstJarPath), TFAmContainer.APPMASTER_JAR_PATH, localResources); String jniSoDfsPath = ""; if (jniSoFile != null && !jniSoFile.equals("")) { jniSoDfsPath = copyLocalFileToDfs(fs, appId.toString(), jniSoFile, "libbridge.so"); } // Set the log4j properties if needed /* if (!log4jPropFile.isEmpty()) { tfAmContainer.addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null); }*/ // Set the necessary security tokens as needed //amContainer.setContainerTokens(containerToken); Map<String, String> env = tfAmContainer.setJavaEnv(conf); if (null != nodeLabelExpression) { appContext.setNodeLabelExpression(nodeLabelExpression); } StringBuilder command = tfAmContainer.makeCommands(amMemory, appMasterMainClass, containerMemory, containerVirtualCores, workerNum, psNum, dstJarPath, containerRetryOptions, jniSoDfsPath); LOG.info("AppMaster command: " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null, null, null); Resource capability = Resource.newInstance(amMemory, amVCores); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce Credentials credentials = new Credentials(); String tokenRenewer = YarnClientUtils.getRmPrincipal(conf); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setAMContainerSpec(amContainer); // Set the priority for the application master // TODO - what is the range for priority? how to decide? Priority pri = Priority.newInstance(amPriority); appContext.setPriority(pri); appContext.setQueue(amQueue); LOG.info("Submitting application to ASM"); yarnClient.submitApplication(appContext); handleSignal(appId); return monitorApplication(appId); }
From source file:com.hadoop.compression.fourmc.FourMcOutputStream.java
License:BSD License
protected static void write4mcHeader(OutputStream out) throws IOException { DataOutputBuffer dob = new DataOutputBuffer(); try {//from w w w .j a v a2 s .c om dob.writeInt(FourMcCodec.FOURMC_MAGIC); dob.writeInt(FourMcCodec.FOURMC_VERSION); int checksum = Lz4Compressor.xxhash32(dob.getData(), 0, 8, 0); dob.writeInt(checksum); out.write(dob.getData(), 0, dob.getLength()); } finally { dob.close(); } }
From source file:com.hadoop.compression.fourmc.FourMcOutputStream.java
License:BSD License
/** * Before closing the stream, 4mc footer must be written. *///from www. jav a2s . c o m @Override public void close() throws IOException { if (closed) return; finish(); // write last block marker rawWriteInt(0); rawWriteInt(0); rawWriteInt(0); // time to write footer with block index int footerSize = 20 + blockOffsets.size() * 4; DataOutputBuffer dob = new DataOutputBuffer(); dob.writeInt(footerSize); dob.writeInt(FourMcCodec.FOURMC_VERSION); // write block deltas for (int i = 0; i < blockOffsets.size(); ++i) { long blockDelta = i == 0 ? (blockOffsets.get(i)) : (blockOffsets.get(i) - blockOffsets.get(i - 1)); dob.writeInt((int) blockDelta); } // tail of footer and checksum dob.writeInt(footerSize); dob.writeInt(FourMcCodec.FOURMC_MAGIC); int checksum = Lz4Compressor.xxhash32(dob.getData(), 0, dob.getLength(), 0); dob.writeInt(checksum); out.write(dob.getData(), 0, dob.getLength()); out.close(); closed = true; // force release compressor and related direct buffers ((Lz4Compressor) compressor).releaseDirectBuffers(); compressor = null; }
From source file:com.hadoop.compression.fourmc.FourMzOutputStream.java
License:BSD License
protected static void write4mzHeader(OutputStream out) throws IOException { DataOutputBuffer dob = new DataOutputBuffer(); try {//from www.j av a2 s . c o m dob.writeInt(FourMzCodec.FOURMZ_MAGIC); dob.writeInt(FourMzCodec.FOURMZ_VERSION); int checksum = ZstdCompressor.xxhash32(dob.getData(), 0, 8, 0); dob.writeInt(checksum); out.write(dob.getData(), 0, dob.getLength()); } finally { dob.close(); } }
From source file:com.hadoop.compression.fourmc.FourMzOutputStream.java
License:BSD License
/** * Before closing the stream, 4mc footer must be written. *///from w w w .jav a 2 s.c o m @Override public void close() throws IOException { if (closed) return; finish(); // write last block marker rawWriteInt(0); rawWriteInt(0); rawWriteInt(0); // time to write footer with block index int footerSize = 20 + blockOffsets.size() * 4; DataOutputBuffer dob = new DataOutputBuffer(); dob.writeInt(footerSize); dob.writeInt(FourMzCodec.FOURMZ_VERSION); // write block deltas for (int i = 0; i < blockOffsets.size(); ++i) { long blockDelta = i == 0 ? (blockOffsets.get(i)) : (blockOffsets.get(i) - blockOffsets.get(i - 1)); dob.writeInt((int) blockDelta); } // tail of footer and checksum dob.writeInt(footerSize); dob.writeInt(FourMzCodec.FOURMZ_MAGIC); int checksum = ZstdCompressor.xxhash32(dob.getData(), 0, dob.getLength(), 0); dob.writeInt(checksum); out.write(dob.getData(), 0, dob.getLength()); out.close(); closed = true; // force release compressor and related direct buffers ((ZstdCompressor) compressor).releaseDirectBuffers(); compressor = null; }
From source file:com.hadoop.compression.lzo.LzopOutputStream.java
License:Open Source License
/** * Write an lzop-compatible header to the OutputStream provided. */// w w w.j a va 2s. com protected static void writeLzopHeader(OutputStream out, LzoCompressor.CompressionStrategy strategy) throws IOException { DataOutputBuffer dob = new DataOutputBuffer(); try { dob.writeShort(LzopCodec.LZOP_VERSION); dob.writeShort(LzoCompressor.LZO_LIBRARY_VERSION); dob.writeShort(LzopCodec.LZOP_COMPAT_VERSION); switch (strategy) { case LZO1X_1: dob.writeByte(1); dob.writeByte(5); break; case LZO1X_15: dob.writeByte(2); dob.writeByte(1); break; case LZO1X_999: dob.writeByte(3); dob.writeByte(9); break; default: throw new IOException("Incompatible lzop strategy: " + strategy); } dob.writeInt(0); // all flags 0 dob.writeInt(0x81A4); // mode dob.writeInt((int) (System.currentTimeMillis() / 1000)); // mtime dob.writeInt(0); // gmtdiff ignored dob.writeByte(0); // no filename Adler32 headerChecksum = new Adler32(); headerChecksum.update(dob.getData(), 0, dob.getLength()); int hc = (int) headerChecksum.getValue(); dob.writeInt(hc); out.write(LzopCodec.LZO_MAGIC); out.write(dob.getData(), 0, dob.getLength()); } finally { dob.close(); } }
From source file:com.ibm.jaql.lang.expr.hadoop.ChainedMapFn.java
License:Apache License
public JsonValue eval(final Context context) throws Exception { JsonRecord args = baseSetup(context); JsonValue state = args.getRequired(new JsonString("init")); Function mapFn = (Function) args.getRequired(new JsonString("map")); JsonValue schema = args.get(new JsonString("schema")); JaqlUtil.enforceNonNull(mapFn);//from w w w. j ava2s. c o m conf.setNumReduceTasks(0); conf.setMapRunnerClass(MapEval.class); // setup serialization setupSerialization(false); if (schema != null) { conf.set(SCHEMA_NAME, schema.toString()); } prepareFunction("map", 2, mapFn, 0); InputSplit[] splits = conf.getInputFormat().getSplits(conf, conf.getNumMapTasks()); // Override the input format to select one partition int targetSplits = conf.getNumMapTasks(); String oldFormat = conf.get("mapred.input.format.class"); conf.set(SelectSplitInputFormat.INPUT_FORMAT, oldFormat); // It would be nice to know how many splits we are generating to avoid // using an exception to quit... // int numSplits = oldFormat.getSplits(conf, ??); // This parameter is avoided in the new API conf.setInputFormat(SelectSplitInputFormat.class); conf.setNumMapTasks(1); DataOutputBuffer buffer = new DataOutputBuffer(); for (int i = 0; i < splits.length; i++) { // TODO: we should move the model around using hdfs files instead of serializing conf.setClass(SelectSplitInputFormat.SPLIT_CLASS, splits[i].getClass(), InputSplit.class); conf.set(SelectSplitInputFormat.STATE, state.toString()); buffer.reset(); splits[i].write(buffer); ConfUtil.writeBinary(conf, SelectSplitInputFormat.SPLIT, buffer.getData(), 0, buffer.getLength()); conf.setJobName("chainedMap " + (i + 1) + "/" + splits.length); // This causes the output file to be deleted. HadoopOutputAdapter outAdapter = (HadoopOutputAdapter) JaqlUtil.getAdapterStore().output .getAdapter(outArgs); outAdapter.setParallel(conf); try { JobClient.runJob(conf); } catch (EOFException ex) { // Thrown when we've processed all of the splits break; } // Read the new state final InputAdapter adapter = (InputAdapter) JaqlUtil.getAdapterStore().input.getAdapter(outArgs); adapter.open(); ClosableJsonIterator reader = adapter.iter(); state = null; if (reader.moveNext()) { state = reader.current(); } reader.close(); } return state; }