Example usage for org.apache.hadoop.io DataOutputBuffer DataOutputBuffer

List of usage examples for org.apache.hadoop.io DataOutputBuffer DataOutputBuffer

Introduction

In this page you can find the example usage for org.apache.hadoop.io DataOutputBuffer DataOutputBuffer.

Prototype

public DataOutputBuffer() 

Source Link

Document

Constructs a new empty buffer.

Usage

From source file:com.flyhz.avengers.framework.AvengersAppMaster.java

License:Apache License

/**
 * Main run function for the application master
 * //  www  .ja va  2  s .co m
 * @throws YarnException
 * @throws IOException
 */
public boolean run() throws IOException, YarnException {
    LOG.info("Starting AvengersAppMaster");

    Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    DataOutputBuffer dob = new DataOutputBuffer();
    credentials.writeTokenStorageToStream(dob);
    // Now remove the AM->RM token so that containers cannot access it.
    Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
    while (iter.hasNext()) {
        Token<?> token = iter.next();
        if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) {
            iter.remove();
        }
    }
    allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());

    AMRMClientAsync.CallbackHandler allocListener = new RMCallbackHandler();

    amRMClient = AMRMClientAsync.createAMRMClientAsync(1000, allocListener);
    amRMClient.init(conf);
    amRMClient.start();

    containerListener = createNMCallbackHandler();
    nmClientAsync = new NMClientAsyncImpl(containerListener);
    nmClientAsync.init(conf);
    nmClientAsync.start();
    // Setup local RPC Server to accept status requests directly from
    // clients
    // the RPC server
    // send requests to this app master

    // Register self with ResourceManager
    // This will first hearInitializing Clienttbeating to the RM
    appMasterHostname = NetUtils.getHostname();
    RegisterApplicationMasterResponse response = amRMClient.registerApplicationMaster(appMasterHostname,
            appMasterRpcPort, appMasterTrackingUrl);
    // Dump out information about cluster capability as seen by the
    // resource manager
    int maxMem = response.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    // A resource ask cannot exceed the max.
    if (containerMemory > maxMem) {
        LOG.info("Container memory specified above max threshold of cluster." + " Using max value."
                + ", specified=" + containerMemory + ", max=" + maxMem);
        containerMemory = maxMem;
    }

    initJar();

    crawl();

    fetch();

    finish();

    return success;
}

From source file:com.flyhz.avengers.framework.AvengersClient.java

License:Apache License

/**
 * Main run function for the client/*from w  w  w . j  a v  a2  s .  co  m*/
 * 
 * @return true if application completed successfully
 * @throws IOException
 * @throws YarnException
 */
private boolean run(String appName, List<String> commands) throws IOException, YarnException {

    LOG.info("Running Client");

    yarnClient.start();

    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
    LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers());

    List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
    LOG.info("Got Cluster node info from ASM");
    for (NodeReport node : clusterNodeReports) {
        LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress"
                + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers"
                + node.getNumContainers());
    }

    QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue);
    LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity="
            + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity()
            + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount="
            + queueInfo.getChildQueues().size());

    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    for (QueueUserACLInfo aclInfo : listAclInfo) {
        for (QueueACL userAcl : aclInfo.getUserAcls()) {
            LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl="
                    + userAcl.name());
        }
    }

    // Get a new application id
    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
    // if needed
    // If we do not have min/max, we may not be able to correctly request
    // the required resources from the RM for the app master
    // Memory ask has to be a multiple of min and less than max.
    // Dump out information about cluster capability as seen by the resource
    // manager
    int maxMem = appResponse.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    // A resource ask cannot exceed the max.
    if (amMemory > maxMem) {
        LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified="
                + amMemory + ", max=" + maxMem);
        amMemory = maxMem;
    }

    // set the application name
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    ApplicationId appId = appContext.getApplicationId();
    appContext.setApplicationName(appName);

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);

    // set local resources for the application master
    // local files or archives as needed
    // In this scenario, the jar file for the application master is part of
    // the local resources
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

    FileSystem fs = DistributedFileSystem.get(conf);
    Path src = new Path(appJar);
    Path dst = new Path(fs.getHomeDirectory(), "avengers/" + batchId + "/avengers.jar");
    if (copy) {
        LOG.info("copy local jar to hdfs");
        fs.copyFromLocalFile(false, true, src, dst);
        copy = false;
    }
    this.hdfsPath = dst.toUri().toString();
    LOG.info("hdfs hdfsPath = {}", dst);
    FileStatus destStatus = fs.getFileStatus(dst);
    LocalResource amJarRsrc = Records.newRecord(LocalResource.class);

    amJarRsrc.setType(LocalResourceType.FILE);
    amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
    LOG.info("YarnURLFromPath ->{}", ConverterUtils.getYarnUrlFromPath(dst));
    amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst));
    amJarRsrc.setTimestamp(destStatus.getModificationTime());
    amJarRsrc.setSize(destStatus.getLen());
    localResources.put("avengers.jar", amJarRsrc);

    // Set the log4j properties if needed
    if (!log4jPropFile.isEmpty()) {
        Path log4jSrc = new Path(log4jPropFile);
        Path log4jDst = new Path(fs.getHomeDirectory(), "log4j.props");
        fs.copyFromLocalFile(false, true, log4jSrc, log4jDst);
        FileStatus log4jFileStatus = fs.getFileStatus(log4jDst);
        LocalResource log4jRsrc = Records.newRecord(LocalResource.class);
        log4jRsrc.setType(LocalResourceType.FILE);
        log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
        log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri()));
        log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime());
        log4jRsrc.setSize(log4jFileStatus.getLen());
        localResources.put("log4j.properties", log4jRsrc);
    }

    // The shell script has to be made available on the final container(s)
    // where it will be executed.
    // To do this, we need to first copy into the filesystem that is visible
    // to the yarn framework.
    // We do not need to set this as a local resource for the application
    // master as the application master does not need it.

    // Set local resource info into app master container launch context
    amContainer.setLocalResources(localResources);

    // Set the necessary security tokens as needed
    // amContainer.setContainerTokens(containerToken);

    // Set the env variables to be setup in the env where the application
    // master will be run
    LOG.info("Set the environment for the application master");
    Map<String, String> env = new HashMap<String, String>();
    StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$()).append(File.pathSeparatorChar);
    for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
            YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) {
        classPathEnv.append(File.pathSeparatorChar);
        classPathEnv.append(c.trim());
    }
    classPathEnv.append(File.pathSeparatorChar).append("./log4j.properties");

    // add the runtime classpath needed for tests to work
    if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
        classPathEnv.append(':');
        classPathEnv.append(System.getProperty("java.class.path"));
    }
    LOG.info("CLASSPATH -> " + classPathEnv);
    env.put("CLASSPATH", classPathEnv.toString());

    amContainer.setEnvironment(env);

    for (String cmd : commands) {
        LOG.info("run command {},appId {}", cmd, appId.getId());
    }

    amContainer.setCommands(commands);

    // Set up resource type requirements
    // For now, only memory is supported so we set memory requirements
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(amMemory);
    appContext.setResource(capability);

    // Service data is a binary blob that can be passed to the application
    // Not needed in this scenario
    // amContainer.setServiceData(serviceData);

    // Setup security tokens
    if (UserGroupInformation.isSecurityEnabled()) {
        Credentials credentials = new Credentials();
        String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL);
        if (tokenRenewer == null || tokenRenewer.length() == 0) {
            throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
        }

        // For now, only getting tokens for the default file-system.
        final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials);
        if (tokens != null) {
            for (Token<?> token : tokens) {
                LOG.info("Got dt for " + fs.getUri() + "; " + token);
            }
        }
        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        amContainer.setTokens(fsTokens);
    }

    appContext.setAMContainerSpec(amContainer);

    // Set the priority for the application master
    Priority pri = Records.newRecord(Priority.class);
    pri.setPriority(amPriority);
    appContext.setPriority(pri);

    // Set the queue to which this application is to be submitted in the RM
    appContext.setQueue(amQueue);

    // Submit the application to the applications manager
    // SubmitApplicationResponse submitResp =
    // applicationsManager.submitApplication(appRequest);
    // Ignore the response as either a valid response object is returned on
    // success
    // or an exception thrown to denote some form of a failure
    LOG.info("Submitting application to ASM");

    yarnClient.submitApplication(appContext);

    // Try submitting the same request again
    // app submission failure?

    // Monitor the application
    return monitorApplication(appId);

}

From source file:com.github.hdl.tensorflow.yarn.app.ApplicationMaster.java

License:Apache License

/**
 * Main run function for the application master
 *
 * @throws YarnException/*  ww w .ja  v a 2  s. co  m*/
 * @throws IOException
 */
@SuppressWarnings({ "unchecked" })
public void run() throws YarnException, IOException, InterruptedException {
    LOG.info("Starting ApplicationMaster");

    // Note: Credentials, Token, UserGroupInformation, DataOutputBuffer class
    // are marked as LimitedPrivate
    Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    DataOutputBuffer dob = new DataOutputBuffer();
    credentials.writeTokenStorageToStream(dob);
    // Now remove the AM->RM token so that containers cannot access it.
    Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
    LOG.info("Executing with tokens:");
    while (iter.hasNext()) {
        Token<?> token = iter.next();
        LOG.info(token);
        if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) {
            iter.remove();
        }
    }
    allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());

    // Create appSubmitterUgi and add original tokens to it
    String appSubmitterUserName = System.getenv(ApplicationConstants.Environment.USER.name());
    appSubmitterUgi = UserGroupInformation.createRemoteUser(appSubmitterUserName);
    appSubmitterUgi.addCredentials(credentials);

    AMRMClientAsync.AbstractCallbackHandler allocListener = new RMCallbackHandler();
    amRMClient = AMRMClientAsync.createAMRMClientAsync(1000, allocListener);
    amRMClient.init(conf);
    amRMClient.start();

    containerListener = createNMCallbackHandler();
    nmClientAsync = new NMClientAsyncImpl(containerListener);
    nmClientAsync.init(conf);
    nmClientAsync.start();

    appMasterHostname = System.getenv(Environment.NM_HOST.name());
    TFApplicationRpcServer rpcServer = new TFApplicationRpcServer(appMasterHostname, new RpcForClient());
    appMasterRpcPort = rpcServer.getRpcPort();
    rpcServer.startRpcServiceThread();

    // Register self with ResourceManager
    // This will start heartbeating to the RM

    RegisterApplicationMasterResponse response = amRMClient.registerApplicationMaster(appMasterHostname,
            appMasterRpcPort, appMasterTrackingUrl);
    // Dump out information about cluster capability as seen by the
    // resource manager
    long maxMem = response.getMaximumResourceCapability().getMemorySize();
    LOG.info("Max mem capability of resources in this cluster " + maxMem);

    int maxVCores = response.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max vcores capability of resources in this cluster " + maxVCores);

    // A resource ask cannot exceed the max.
    if (containerMemory > maxMem) {
        LOG.info("Container memory specified above max threshold of cluster." + " Using max value."
                + ", specified=" + containerMemory + ", max=" + maxMem);
        containerMemory = maxMem;
    }

    if (containerVirtualCores > maxVCores) {
        LOG.info("Container virtual cores specified above max threshold of cluster." + " Using max value."
                + ", specified=" + containerVirtualCores + ", max=" + maxVCores);
        containerVirtualCores = maxVCores;
    }

    List<Container> previousAMRunningContainers = response.getContainersFromPreviousAttempts();
    LOG.info(appAttemptID + " received " + previousAMRunningContainers.size()
            + " previous attempts' running containers on AM registration.");
    for (Container container : previousAMRunningContainers) {
        launchedContainers.add(container.getId());
    }
    numAllocatedContainers.addAndGet(previousAMRunningContainers.size());

    int numTotalContainersToRequest = numTotalContainers - previousAMRunningContainers.size();
    // Setup ask for containers from RM
    // Send request for containers to RM
    // Until we get our fully allocated quota, we keep on polling RM for
    // containers
    // Keep looping until all the containers are launched and shell script
    // executed on them ( regardless of success/failure).
    for (int i = 0; i < numTotalContainersToRequest; ++i) {
        ContainerRequest containerAsk = setupContainerAskForRM();
        amRMClient.addContainerRequest(containerAsk);
    }
    numRequestedContainers.set(numTotalContainers);

}

From source file:com.github.hdl.tensorflow.yarn.app.Client.java

License:Apache License

/**
 * Main run function for the client/*  ww  w  . ja va2 s.c o  m*/
 * @return true if application completed successfully
 * @throws IOException
 * @throws YarnException
 */
public boolean run() throws IOException, YarnException {

    yarnClient.start();

    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
    LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers());

    List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
    LOG.info("Got Cluster node info from ASM");
    for (NodeReport node : clusterNodeReports) {
        LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress="
                + node.getHttpAddress() + ", nodeRackName=" + node.getRackName() + ", nodeNumContainers="
                + node.getNumContainers());
    }

    QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue);
    LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity="
            + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity()
            + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount="
            + queueInfo.getChildQueues().size());

    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    for (QueueUserACLInfo aclInfo : listAclInfo) {
        for (QueueACL userAcl : aclInfo.getUserAcls()) {
            LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl="
                    + userAcl.name());
        }
    }

    // Get a new application id
    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
    // TODO get min/max resource capabilities from RM and change memory ask if needed

    long maxMem = appResponse.getMaximumResourceCapability().getMemorySize();
    LOG.info("Max mem capability of resources in this cluster " + maxMem);

    if (amMemory > maxMem) {
        LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified="
                + amMemory + ", max=" + maxMem);
        amMemory = maxMem;
    }

    int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max virtual cores capability of resources in this cluster " + maxVCores);

    if (amVCores > maxVCores) {
        LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value."
                + ", specified=" + amVCores + ", max=" + maxVCores);
        amVCores = maxVCores;
    }

    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    ApplicationId appId = appContext.getApplicationId();

    appContext.setApplicationName(appName);

    if (attemptFailuresValidityInterval >= 0) {
        appContext.setAttemptFailuresValidityInterval(attemptFailuresValidityInterval);
    }

    Set<String> tags = new HashSet<String>();
    appContext.setApplicationTags(tags);

    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

    TFAmContainer tfAmContainer = new TFAmContainer(this);

    // Copy the application jar to the filesystem
    FileSystem fs = FileSystem.get(conf);
    String dstJarPath = copyLocalFileToDfs(fs, appId.toString(), appMasterJar, TFContainer.SERVER_JAR_PATH);
    tfAmContainer.addToLocalResources(fs, new Path(dstJarPath), TFAmContainer.APPMASTER_JAR_PATH,
            localResources);

    String jniSoDfsPath = "";
    if (jniSoFile != null && !jniSoFile.equals("")) {
        jniSoDfsPath = copyLocalFileToDfs(fs, appId.toString(), jniSoFile, "libbridge.so");
    }
    // Set the log4j properties if needed
    /*    if (!log4jPropFile.isEmpty()) {
          tfAmContainer.addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(),
              localResources, null);
        }*/

    // Set the necessary security tokens as needed
    //amContainer.setContainerTokens(containerToken);

    Map<String, String> env = tfAmContainer.setJavaEnv(conf);

    if (null != nodeLabelExpression) {
        appContext.setNodeLabelExpression(nodeLabelExpression);
    }

    StringBuilder command = tfAmContainer.makeCommands(amMemory, appMasterMainClass, containerMemory,
            containerVirtualCores, workerNum, psNum, dstJarPath, containerRetryOptions, jniSoDfsPath);

    LOG.info("AppMaster command: " + command.toString());
    List<String> commands = new ArrayList<String>();
    commands.add(command.toString());

    ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null,
            null, null);

    Resource capability = Resource.newInstance(amMemory, amVCores);
    appContext.setResource(capability);

    // Service data is a binary blob that can be passed to the application
    // Not needed in this scenario
    // amContainer.setServiceData(serviceData);

    // Setup security tokens
    if (UserGroupInformation.isSecurityEnabled()) {
        // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce
        Credentials credentials = new Credentials();
        String tokenRenewer = YarnClientUtils.getRmPrincipal(conf);
        if (tokenRenewer == null || tokenRenewer.length() == 0) {
            throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
        }

        // For now, only getting tokens for the default file-system.
        final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials);
        if (tokens != null) {
            for (Token<?> token : tokens) {
                LOG.info("Got dt for " + fs.getUri() + "; " + token);
            }
        }
        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        amContainer.setTokens(fsTokens);
    }

    appContext.setAMContainerSpec(amContainer);

    // Set the priority for the application master
    // TODO - what is the range for priority? how to decide?
    Priority pri = Priority.newInstance(amPriority);
    appContext.setPriority(pri);

    appContext.setQueue(amQueue);

    LOG.info("Submitting application to ASM");

    yarnClient.submitApplication(appContext);
    handleSignal(appId);
    return monitorApplication(appId);

}

From source file:com.hadoop.compression.fourmc.FourMcOutputStream.java

License:BSD License

protected static void write4mcHeader(OutputStream out) throws IOException {
    DataOutputBuffer dob = new DataOutputBuffer();
    try {//from w  w  w .j a v a2 s  .c om
        dob.writeInt(FourMcCodec.FOURMC_MAGIC);
        dob.writeInt(FourMcCodec.FOURMC_VERSION);
        int checksum = Lz4Compressor.xxhash32(dob.getData(), 0, 8, 0);
        dob.writeInt(checksum);
        out.write(dob.getData(), 0, dob.getLength());
    } finally {
        dob.close();
    }
}

From source file:com.hadoop.compression.fourmc.FourMcOutputStream.java

License:BSD License

/**
 * Before closing the stream, 4mc footer must be written.
 *///from   www. jav a2s  . c o  m
@Override
public void close() throws IOException {
    if (closed)
        return;

    finish();

    // write last block marker
    rawWriteInt(0);
    rawWriteInt(0);
    rawWriteInt(0);

    // time to write footer with block index
    int footerSize = 20 + blockOffsets.size() * 4;
    DataOutputBuffer dob = new DataOutputBuffer();
    dob.writeInt(footerSize);
    dob.writeInt(FourMcCodec.FOURMC_VERSION);

    // write block deltas
    for (int i = 0; i < blockOffsets.size(); ++i) {
        long blockDelta = i == 0 ? (blockOffsets.get(i)) : (blockOffsets.get(i) - blockOffsets.get(i - 1));
        dob.writeInt((int) blockDelta);
    }

    // tail of footer and checksum
    dob.writeInt(footerSize);
    dob.writeInt(FourMcCodec.FOURMC_MAGIC);
    int checksum = Lz4Compressor.xxhash32(dob.getData(), 0, dob.getLength(), 0);
    dob.writeInt(checksum);
    out.write(dob.getData(), 0, dob.getLength());

    out.close();
    closed = true;

    // force release compressor and related direct buffers
    ((Lz4Compressor) compressor).releaseDirectBuffers();
    compressor = null;
}

From source file:com.hadoop.compression.fourmc.FourMzOutputStream.java

License:BSD License

protected static void write4mzHeader(OutputStream out) throws IOException {
    DataOutputBuffer dob = new DataOutputBuffer();
    try {//from   www.j av  a2  s .  c  o m
        dob.writeInt(FourMzCodec.FOURMZ_MAGIC);
        dob.writeInt(FourMzCodec.FOURMZ_VERSION);
        int checksum = ZstdCompressor.xxhash32(dob.getData(), 0, 8, 0);
        dob.writeInt(checksum);
        out.write(dob.getData(), 0, dob.getLength());
    } finally {
        dob.close();
    }
}

From source file:com.hadoop.compression.fourmc.FourMzOutputStream.java

License:BSD License

/**
 * Before closing the stream, 4mc footer must be written.
 *///from  w w w  .jav  a 2 s.c o m
@Override
public void close() throws IOException {
    if (closed)
        return;

    finish();

    // write last block marker
    rawWriteInt(0);
    rawWriteInt(0);
    rawWriteInt(0);

    // time to write footer with block index
    int footerSize = 20 + blockOffsets.size() * 4;
    DataOutputBuffer dob = new DataOutputBuffer();
    dob.writeInt(footerSize);
    dob.writeInt(FourMzCodec.FOURMZ_VERSION);

    // write block deltas
    for (int i = 0; i < blockOffsets.size(); ++i) {
        long blockDelta = i == 0 ? (blockOffsets.get(i)) : (blockOffsets.get(i) - blockOffsets.get(i - 1));
        dob.writeInt((int) blockDelta);
    }

    // tail of footer and checksum
    dob.writeInt(footerSize);
    dob.writeInt(FourMzCodec.FOURMZ_MAGIC);
    int checksum = ZstdCompressor.xxhash32(dob.getData(), 0, dob.getLength(), 0);
    dob.writeInt(checksum);
    out.write(dob.getData(), 0, dob.getLength());

    out.close();
    closed = true;

    // force release compressor and related direct buffers
    ((ZstdCompressor) compressor).releaseDirectBuffers();
    compressor = null;
}

From source file:com.hadoop.compression.lzo.LzopOutputStream.java

License:Open Source License

/**
 * Write an lzop-compatible header to the OutputStream provided.
 */// w w  w.j  a va 2s. com
protected static void writeLzopHeader(OutputStream out, LzoCompressor.CompressionStrategy strategy)
        throws IOException {
    DataOutputBuffer dob = new DataOutputBuffer();
    try {
        dob.writeShort(LzopCodec.LZOP_VERSION);
        dob.writeShort(LzoCompressor.LZO_LIBRARY_VERSION);
        dob.writeShort(LzopCodec.LZOP_COMPAT_VERSION);
        switch (strategy) {
        case LZO1X_1:
            dob.writeByte(1);
            dob.writeByte(5);
            break;
        case LZO1X_15:
            dob.writeByte(2);
            dob.writeByte(1);
            break;
        case LZO1X_999:
            dob.writeByte(3);
            dob.writeByte(9);
            break;
        default:
            throw new IOException("Incompatible lzop strategy: " + strategy);
        }
        dob.writeInt(0); // all flags 0
        dob.writeInt(0x81A4); // mode
        dob.writeInt((int) (System.currentTimeMillis() / 1000)); // mtime
        dob.writeInt(0); // gmtdiff ignored
        dob.writeByte(0); // no filename
        Adler32 headerChecksum = new Adler32();
        headerChecksum.update(dob.getData(), 0, dob.getLength());
        int hc = (int) headerChecksum.getValue();
        dob.writeInt(hc);
        out.write(LzopCodec.LZO_MAGIC);
        out.write(dob.getData(), 0, dob.getLength());
    } finally {
        dob.close();
    }
}

From source file:com.ibm.jaql.lang.expr.hadoop.ChainedMapFn.java

License:Apache License

public JsonValue eval(final Context context) throws Exception {
    JsonRecord args = baseSetup(context);

    JsonValue state = args.getRequired(new JsonString("init"));
    Function mapFn = (Function) args.getRequired(new JsonString("map"));
    JsonValue schema = args.get(new JsonString("schema"));

    JaqlUtil.enforceNonNull(mapFn);//from   w  w  w. j ava2s. c o  m

    conf.setNumReduceTasks(0);
    conf.setMapRunnerClass(MapEval.class);

    // setup serialization
    setupSerialization(false);
    if (schema != null) {
        conf.set(SCHEMA_NAME, schema.toString());
    }

    prepareFunction("map", 2, mapFn, 0);

    InputSplit[] splits = conf.getInputFormat().getSplits(conf, conf.getNumMapTasks());

    // Override the input format to select one partition
    int targetSplits = conf.getNumMapTasks();
    String oldFormat = conf.get("mapred.input.format.class");
    conf.set(SelectSplitInputFormat.INPUT_FORMAT, oldFormat);
    // It would be nice to know how many splits we are generating to avoid 
    // using an exception to quit...
    // int numSplits = oldFormat.getSplits(conf, ??);
    // This parameter is avoided in the new API
    conf.setInputFormat(SelectSplitInputFormat.class);
    conf.setNumMapTasks(1);

    DataOutputBuffer buffer = new DataOutputBuffer();
    for (int i = 0; i < splits.length; i++) {
        // TODO: we should move the model around using hdfs files instead of serializing
        conf.setClass(SelectSplitInputFormat.SPLIT_CLASS, splits[i].getClass(), InputSplit.class);
        conf.set(SelectSplitInputFormat.STATE, state.toString());
        buffer.reset();
        splits[i].write(buffer);
        ConfUtil.writeBinary(conf, SelectSplitInputFormat.SPLIT, buffer.getData(), 0, buffer.getLength());
        conf.setJobName("chainedMap " + (i + 1) + "/" + splits.length);

        // This causes the output file to be deleted.
        HadoopOutputAdapter outAdapter = (HadoopOutputAdapter) JaqlUtil.getAdapterStore().output
                .getAdapter(outArgs);
        outAdapter.setParallel(conf);

        try {
            JobClient.runJob(conf);
        } catch (EOFException ex) {
            // Thrown when we've processed all of the splits
            break;
        }

        // Read the new state
        final InputAdapter adapter = (InputAdapter) JaqlUtil.getAdapterStore().input.getAdapter(outArgs);
        adapter.open();
        ClosableJsonIterator reader = adapter.iter();
        state = null;
        if (reader.moveNext()) {
            state = reader.current();
        }
        reader.close();
    }

    return state;
}