Example usage for org.apache.hadoop.io DataOutputBuffer DataOutputBuffer

Introduction

In this page you can find the example usage for org.apache.hadoop.io DataOutputBuffer DataOutputBuffer.

Prototype

public DataOutputBuffer()

Source Link

Document

Constructs a new empty buffer.

Usage

From source file:com.ibm.jaql.lang.expr.index.HashtableServer.java

License:Apache License

@Override
public void run() {
    JsonValue readKey = null;//from  www  . j  a  va2s .  c  om
    JsonValue[] keys = new JsonValue[0];

    try {
        while (true) {
            byte command = in.readByte();
            switch (command) {
            // GET Key -> FOUND Value | NOT_FOUND
            case GET_CMD: {
                readKey = table.keySerializer.read(in, readKey);
                byte[] value = table.table.get(readKey);
                if (value == null) {
                    out.write(NOT_FOUND_CMD);
                } else {
                    out.write(FOUND_CMD);
                    out.write(value);
                }
                break;
            }
            // GETN n, [Key]*n -> OK n [FOUND Value | NOT_FOUND]*n  OK
            case GETN_CMD: {
                int n = BaseUtil.readVUInt(in);
                if (n > keys.length || // bigger array required
                        3 * n < keys.length) // array is way too big
                {
                    keys = new JsonValue[n];
                }
                for (int i = 0; i < n; i++) {
                    keys[i] = table.keySerializer.read(in, keys[i]);
                }
                out.write(OK_CMD);
                BaseUtil.writeVUInt(out, n);
                for (int i = 0; i < n; i++) {
                    byte[] value = table.table.get(keys[i]);
                    if (value == null) {
                        out.write(NOT_FOUND_CMD);
                    } else {
                        out.write(FOUND_CMD);
                        out.write(value);
                    }
                }
                out.write(OK_CMD);
                break;
            }
            // USE tableId string, age msec, lease msec
            //   -> OK lease, schema [ Key, Value ], 
            //    | BUILD 
            case USE_CMD: {
                if (table != null) {
                    HashMapCache.instance.release(table);
                    table = null;
                }
                JsonString tableId = (JsonString) defaultSerializer.read(in, null);
                long ageMS = BaseUtil.readVSLong(in);
                long leaseMS = BaseUtil.readVSLong(in);

                table = HashMapCache.instance.get(tableId.toString(), ageMS, leaseMS);
                if (table.isBuilt()) // The table is good to go
                {
                    out.write(OK_CMD);
                    BaseUtil.writeVSLong(out, 0); // TODO: implement leases
                    defaultSerializer.write(out, table.schema);
                } else // We need to build the table
                {
                    out.write(BUILD_CMD);
                    out.flush();

                    // SCHEMA schema [Key,Value] (PUT key, value)* OK -> OK
                    command = in.readByte();
                    if (command == RELEASE_CMD) {
                        // The client couldn't build the table, so just release it
                        HashMapCache.instance.release(table);
                        break;
                    }
                    if (command != SCHEMA_CMD) {
                        throw new ProtocolException("expected SCHEMA");
                    }
                    table.setSchema((JsonSchema) defaultSerializer.read(in, null));
                    DataOutputBuffer buf = new DataOutputBuffer();

                    System.err.println("building hashtable " + table.tableId);

                    while ((command = in.readByte()) == PUT_CMD) {
                        // TODO: we need to use a spilling hashtable to avoid memory overflows...
                        // TODO: we could at least pack the values more tightly 
                        buf.reset();
                        JsonValue key = table.keySerializer.read(in, null); // Be sure NOT to reuse the key here!
                        table.valueSerializer.copy(in, buf);
                        byte[] val = new byte[buf.getLength()];
                        System.arraycopy(buf.getData(), 0, val, 0, val.length);
                        table.table.put(key, val);
                    }
                    if (command != OK_CMD) {
                        throw new ProtocolException("expected OK");
                    }
                    HashMapCache.instance.doneBuilding(table);
                    out.write(OK_CMD);
                    System.err.println("built hashtable " + table.tableId);
                }
                break;
            }
            // RELEASE -> OK
            case RELEASE_CMD: {
                if (table != null) {
                    HashMapCache.instance.release(table);
                    table = null;
                }
                out.write(OK_CMD);
                break;
            }
            // LIST_TABLES -> (FOUND tableId built age lease schema numEntries)* OK
            // GET_ALL -> (FOUND key value)* OK
            // UNDEFINE tableId -> OK | NOT_FOUND
            // UNDEFINE_ALL -> OK
            default:
                throw new ProtocolException("invalid command code");
            }
            out.flush();
        }
    } catch (EOFException e) {
        // ignored
    } catch (Exception e) {
        // log and exit thread
        e.printStackTrace();
    } finally {
        if (table != null) {
            HashMapCache.instance.release(table);
        }
        try {
            socket.close();
        } catch (Exception e) {
            // log and exit thread
            e.printStackTrace();
        }
    }
}

From source file:com.ibm.jaql.lang.expr.io.InputSplitsFn.java

License:Apache License

@Override
public JsonIterator iter(Context context) throws Exception {
    JsonValue iod = exprs[0].eval(context);

    Adapter adapter = JaqlUtil.getAdapterStore().input.getAdapter(iod);
    if (!(adapter instanceof HadoopInputAdapter)) {
        throw new ClassCastException("i/o descriptor must be for an input format");
    }/*from   w w w .j  ava  2 s  .  c o m*/
    HadoopInputAdapter hia = (HadoopInputAdapter) adapter;
    JobConf conf = new JobConf(); // TODO: allow configuration
    hia.setParallel(conf); // right thing to do?
    hia.configure(conf); // right thing to do?
    int numSplits = conf.getNumMapTasks(); // TODO: allow override
    final InputSplit[] splits = hia.getSplits(conf, numSplits);
    final MutableJsonString className = new MutableJsonString();
    final MutableJsonBinary rawSplit = new MutableJsonBinary();
    final BufferedJsonRecord rec = new BufferedJsonRecord(3);
    final BufferedJsonArray locArr = new BufferedJsonArray();
    rec.add(CLASS_TAG, className);
    rec.add(SPLIT_TAG, rawSplit);
    rec.add(LOCATIONS_TAG, locArr);

    return new JsonIterator(rec) {
        DataOutputBuffer out = new DataOutputBuffer();
        int i = 0;

        @Override
        public boolean moveNext() throws Exception {
            if (i >= splits.length) {
                return false;
            }
            InputSplit split = splits[i++];
            className.setCopy(split.getClass().getName());
            out.reset();
            split.write(out);
            rawSplit.setCopy(out.getData(), out.getLength());
            locArr.clear();
            String[] locs = split.getLocations();
            if (locs != null) {
                for (String loc : locs) {
                    locArr.add(new JsonString(loc));
                }
            }
            return true;
        }
    };
}

From source file:com.ibm.jaql.lang.expr.io.MakeFileSplitFn.java

License:Apache License

@Override
public JsonRecord eval(Context context) throws Exception {
    if (out == null) {
        out = new DataOutputBuffer();
        className = new MutableJsonString();
        rawSplit = new MutableJsonBinary();
        values = new JsonValue[] { className, rawSplit, null };
        resultRec = new BufferedJsonRecord();
        resultRec.set(NAMES, values, NAMES.length);
    }/* w ww. ja  v a 2  s.c o  m*/

    JsonString jfile = (JsonString) exprs[0].eval(context);
    JsonNumber jstart = (JsonNumber) exprs[1].eval(context);
    JsonNumber jlength = (JsonNumber) exprs[2].eval(context);
    JsonArray jhosts = (JsonArray) exprs[3].eval(context);

    String file = jfile.toString();
    long start = jstart.longValueExact();
    long length = jlength.longValueExact();
    if (jhosts == null)
        jhosts = JsonArray.EMPTY;
    String[] hosts = new String[(int) jhosts.count()];
    JsonIterator iter = jhosts.iter();
    for (int i = 0; i < hosts.length; i++) {
        iter.moveNext();
        JsonString jhost = (JsonString) iter.current();
        hosts[i] = jhost.toString();
    }

    FileSplit split = new FileSplit(new Path(file), start, length, hosts);
    className.setCopy(split.getClass().getCanonicalName());
    out.reset();
    split.write(out);
    rawSplit.set(out.getData(), out.getLength());
    values[2] = jhosts;

    return resultRec;
}

From source file:com.inforefiner.hdata.ApplicationMaster.java

License:Apache License

/**
 * Main run function for the application master
 *
 * @throws YarnException/*from  w w w .  j  a v a2s. c  o m*/
 * @throws IOException
 */
@SuppressWarnings({ "unchecked" })
public void run() throws YarnException, IOException, InterruptedException {
    LOG.info("Starting ApplicationMaster");

    // Note: Credentials, Token, UserGroupInformation, DataOutputBuffer class
    // are marked as LimitedPrivate
    Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    DataOutputBuffer dob = new DataOutputBuffer();
    credentials.writeTokenStorageToStream(dob);
    // Now remove the AM->RM token so that containers cannot access it.
    Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
    LOG.info("Executing with tokens:");
    while (iter.hasNext()) {
        Token<?> token = iter.next();
        LOG.info(token);
        if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) {
            iter.remove();
        }
    }
    allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());

    // Create appSubmitterUgi and add original tokens to it
    String appSubmitterUserName = System.getenv(ApplicationConstants.Environment.USER.name());
    appSubmitterUgi = UserGroupInformation.createRemoteUser(appSubmitterUserName);
    appSubmitterUgi.addCredentials(credentials);

    AMRMClientAsync.CallbackHandler allocListener = new RMCallbackHandler();
    amRMClient = AMRMClientAsync.createAMRMClientAsync(1000, allocListener);
    amRMClient.init(conf);
    amRMClient.start();

    containerListener = createNMCallbackHandler();
    nmClientAsync = new NMClientAsyncImpl(containerListener);
    nmClientAsync.init(conf);
    nmClientAsync.start();

    startTimelineClient(conf);
    if (timelineClient != null) {
        publishApplicationAttemptEvent(timelineClient, appAttemptID.toString(), DSEvent.DS_APP_ATTEMPT_START,
                domainId, appSubmitterUgi);
    }

    // Setup local RPC Server to accept status requests directly from clients
    // TODO need to setup a protocol for client to be able to communicate to
    // the RPC server
    // TODO use the rpc port info to register with the RM for the client to
    // send requests to this app master

    // Register self with ResourceManager
    // This will start heartbeating to the RM
    appMasterHostname = NetUtils.getHostname();
    RegisterApplicationMasterResponse response = amRMClient.registerApplicationMaster(appMasterHostname,
            appMasterRpcPort, appMasterTrackingUrl);
    // Dump out information about cluster capability as seen by the
    // resource manager
    int maxMem = response.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    int maxVCores = response.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max vcores capabililty of resources in this cluster " + maxVCores);

    // A resource ask cannot exceed the max.
    if (containerMemory > maxMem) {
        LOG.info("Container memory specified above max threshold of cluster." + " Using max value."
                + ", specified=" + containerMemory + ", max=" + maxMem);
        containerMemory = maxMem;
    }

    if (containerVirtualCores > maxVCores) {
        LOG.info("Container virtual cores specified above max threshold of cluster." + " Using max value."
                + ", specified=" + containerVirtualCores + ", max=" + maxVCores);
        containerVirtualCores = maxVCores;
    }

    List<Container> previousAMRunningContainers = response.getContainersFromPreviousAttempts();
    LOG.info(appAttemptID + " received " + previousAMRunningContainers.size()
            + " previous attempts' running containers on AM registration.");
    numAllocatedContainers.addAndGet(previousAMRunningContainers.size());

    int numTotalContainersToRequest = numTotalContainers - previousAMRunningContainers.size();
    // Setup ask for containers from RM
    // Send request for containers to RM
    // Until we get our fully allocated quota, we keep on polling RM for
    // containers
    // Keep looping until all the containers are launched and shell script
    // executed on them ( regardless of success/failure).
    for (int i = 0; i < numTotalContainersToRequest; ++i) {
        ContainerRequest containerAsk = setupContainerAskForRM();
        amRMClient.addContainerRequest(containerAsk);
    }
    numRequestedContainers.set(numTotalContainers);
}

From source file:com.inforefiner.hdata.SubmitClient.java

License:Apache License

/**
 * Main run function for the client/*from w ww.  j  av  a  2 s  .com*/
 *
 * @return true if application completed successfully
 * @throws IOException
 * @throws YarnException
 */
public boolean run() throws IOException, YarnException {

    LOG.info("Running Client");
    yarnClient.start();

    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
    LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers());

    List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
    LOG.info("Got Cluster node info from ASM");
    for (NodeReport node : clusterNodeReports) {
        LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress"
                + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers"
                + node.getNumContainers());
    }

    QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue);
    LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity="
            + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity()
            + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount="
            + queueInfo.getChildQueues().size());

    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    for (QueueUserACLInfo aclInfo : listAclInfo) {
        for (QueueACL userAcl : aclInfo.getUserAcls()) {
            LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl="
                    + userAcl.name());
        }
    }

    if (domainId != null && domainId.length() > 0 && toCreateDomain) {
        prepareTimelineDomain();
    }

    // Get a new application id
    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
    // TODO get min/max resource capabilities from RM and change memory ask if needed
    // If we do not have min/max, we may not be able to correctly request
    // the required resources from the RM for the app master
    // Memory ask has to be a multiple of min and less than max.
    // Dump out information about cluster capability as seen by the resource manager
    int maxMem = appResponse.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    // A resource ask cannot exceed the max.
    if (amMemory > maxMem) {
        LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified="
                + amMemory + ", max=" + maxMem);
        amMemory = maxMem;
    }

    int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores);

    if (amVCores > maxVCores) {
        LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value."
                + ", specified=" + amVCores + ", max=" + maxVCores);
        amVCores = maxVCores;
    }

    // set the application name
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    ApplicationId appId = appContext.getApplicationId();

    appContext.setKeepContainersAcrossApplicationAttempts(keepContainers);
    appContext.setApplicationName(appName);

    if (attemptFailuresValidityInterval >= 0) {
        appContext.setAttemptFailuresValidityInterval(attemptFailuresValidityInterval);
    }

    // set local resources for the application master
    // local files or archives as needed
    // In this scenario, the jar file for the application master is part of the local resources
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

    LOG.info("Copy App Master jar from local filesystem and add to local environment");
    // Copy the application master jar to the filesystem
    // Create a local resource to point to the destination jar path
    FileSystem fs = FileSystem.get(conf);
    addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(), localResources, null);

    // Set the log4j properties if needed
    if (!log4jPropFile.isEmpty()) {
        addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null);
    }

    // The shell script has to be made available on the final container(s)
    // where it will be executed.
    // To do this, we need to first copy into the filesystem that is visible
    // to the yarn framework.
    // We do not need to set this as a local resource for the application
    // master as the application master does not need it.
    String hdfsShellScriptLocation = "";
    long hdfsShellScriptLen = 0;
    long hdfsShellScriptTimestamp = 0;
    if (!shellScriptPath.isEmpty()) {
        Path shellSrc = new Path(shellScriptPath);
        String shellPathSuffix = appName + "/" + appId.toString() + "/" + SCRIPT_PATH;
        Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix);
        fs.copyFromLocalFile(false, true, shellSrc, shellDst);
        hdfsShellScriptLocation = shellDst.toUri().toString();
        FileStatus shellFileStatus = fs.getFileStatus(shellDst);
        hdfsShellScriptLen = shellFileStatus.getLen();
        hdfsShellScriptTimestamp = shellFileStatus.getModificationTime();
    }

    if (!shellCommand.isEmpty()) {
        addToLocalResources(fs, null, shellCommandPath, appId.toString(), localResources, shellCommand);
    }

    if (shellArgs.length > 0) {
        addToLocalResources(fs, null, shellArgsPath, appId.toString(), localResources,
                StringUtils.join(shellArgs, " "));
    }

    // Set the necessary security tokens as needed
    //amContainer.setContainerTokens(containerToken);

    // Set the env variables to be setup in the env where the application master will be run
    LOG.info("Set the environment for the application master");
    Map<String, String> env = new HashMap<String, String>();

    // put location of shell script into env
    // using the env info, the application master will create the correct local resource for the
    // eventual containers that will be launched to execute the shell scripts
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation);
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp));
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen));
    if (domainId != null && domainId.length() > 0) {
        env.put(DSConstants.DISTRIBUTEDSHELLTIMELINEDOMAIN, domainId);
    }

    // Add AppMaster.jar location to classpath
    // At some point we should not be required to add
    // the hadoop specific classpaths to the env.
    // It should be provided out of the box.
    // For now setting all required classpaths including
    // the classpath to "." for the application jar
    StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$())
            .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*");
    for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
            YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) {
        classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR);
        classPathEnv.append(c.trim());
    }
    classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./log4j.properties");

    // add the runtime classpath needed for tests to work
    if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
        classPathEnv.append(':');
        classPathEnv.append(System.getProperty("java.class.path"));
    }

    env.put("CLASSPATH", classPathEnv.toString());

    // Set the necessary command to execute the application master
    Vector<CharSequence> vargs = new Vector<CharSequence>(30);

    // Set java executable command
    LOG.info("Setting up app master command");
    vargs.add(Environment.JAVA_HOME.$$() + "/bin/java");
    // Set Xmx based on am memory size
    vargs.add("-Xmx" + amMemory + "m");
    // Set class name
    vargs.add(appMasterMainClass);
    // Set params for Application Master
    vargs.add("--container_memory " + String.valueOf(containerMemory));
    vargs.add("--container_vcores " + String.valueOf(containerVirtualCores));
    vargs.add("--num_containers " + String.valueOf(numContainers));
    if (null != nodeLabelExpression) {
        appContext.setNodeLabelExpression(nodeLabelExpression);
    }
    vargs.add("--priority " + String.valueOf(shellCmdPriority));

    for (Map.Entry<String, String> entry : shellEnv.entrySet()) {
        vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue());
    }
    if (debugFlag) {
        vargs.add("--debug");
    }

    vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout");
    vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr");

    // Get final commmand
    StringBuilder command = new StringBuilder();
    for (CharSequence str : vargs) {
        command.append(str).append(" ");
    }

    LOG.info("Completed setting up app master command " + command.toString());
    List<String> commands = new ArrayList<String>();
    commands.add(command.toString());

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null,
            null, null);

    // Set up resource type requirements
    // For now, both memory and vcores are supported, so we set memory and
    // vcores requirements
    Resource capability = Resource.newInstance(amMemory, amVCores);
    appContext.setResource(capability);

    // Service data is a binary blob that can be passed to the application
    // Not needed in this scenario
    // amContainer.setServiceData(serviceData);

    // Setup security tokens
    if (UserGroupInformation.isSecurityEnabled()) {
        // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce
        Credentials credentials = new Credentials();
        String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL);
        if (tokenRenewer == null || tokenRenewer.length() == 0) {
            throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
        }

        // For now, only getting tokens for the default file-system.
        final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials);
        if (tokens != null) {
            for (Token<?> token : tokens) {
                LOG.info("Got dt for " + fs.getUri() + "; " + token);
            }
        }
        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        amContainer.setTokens(fsTokens);
    }

    appContext.setAMContainerSpec(amContainer);

    // Set the priority for the application master
    // TODO - what is the range for priority? how to decide?
    Priority pri = Priority.newInstance(amPriority);
    appContext.setPriority(pri);

    // Set the queue to which this application is to be submitted in the RM
    appContext.setQueue(amQueue);

    // Submit the application to the applications manager
    // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest);
    // Ignore the response as either a valid response object is returned on success
    // or an exception thrown to denote some form of a failure
    LOG.info("Submitting application to ASM");

    yarnClient.submitApplication(appContext);

    // TODO
    // Try submitting the same request again
    // app submission failure?
    Thread t = new Thread(new LogReceiver());
    t.start();
    // Monitor the application
    return monitorApplication(appId);
}

From source file:com.mellanox.hadoop.mapred.UdaShuffleHandler.java

License:Apache License

public static ByteBuffer serializeServiceData(Token<JobTokenIdentifier> jobToken) throws IOException {
    //TODO these bytes should be versioned
    DataOutputBuffer jobToken_dob = new DataOutputBuffer();
    jobToken.write(jobToken_dob);/*from   w ww  .j  a  v  a2 s.co  m*/
    return ByteBuffer.wrap(jobToken_dob.getData(), 0, jobToken_dob.getLength());
}

From source file:com.mellanox.r4h.DFSClient.java

License:Apache License

/**
 * Get the checksum of the whole file of a range of the file. Note that the
 * range always starts from the beginning of the file.
 * //ww  w .ja v  a2  s  . c  o  m
 * @param src
 *            The file path
 * @param length
 *            the length of the range, i.e., the range is [0, length]
 * @return The checksum
 * @see DistributedFileSystem#getFileChecksum(Path)
 */
public MD5MD5CRC32FileChecksum getFileChecksum(String src, long length) throws IOException {
    checkOpen();
    Preconditions.checkArgument(length >= 0);
    // get block locations for the file range
    LocatedBlocks blockLocations = callGetBlockLocations(namenode, src, 0, length);
    if (null == blockLocations) {
        throw new FileNotFoundException("File does not exist: " + src);
    }
    List<LocatedBlock> locatedblocks = blockLocations.getLocatedBlocks();
    final DataOutputBuffer md5out = new DataOutputBuffer();
    int bytesPerCRC = -1;
    DataChecksum.Type crcType = DataChecksum.Type.DEFAULT;
    long crcPerBlock = 0;
    boolean refetchBlocks = false;
    int lastRetriedIndex = -1;

    // get block checksum for each block
    long remaining = length;
    if (src.contains(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR_SEPARATOR)) {
        remaining = Math.min(length, blockLocations.getFileLength());
    }
    for (int i = 0; i < locatedblocks.size() && remaining > 0; i++) {
        if (refetchBlocks) { // refetch to get fresh tokens
            blockLocations = callGetBlockLocations(namenode, src, 0, length);
            if (null == blockLocations) {
                throw new FileNotFoundException("File does not exist: " + src);
            }
            locatedblocks = blockLocations.getLocatedBlocks();
            refetchBlocks = false;
        }
        LocatedBlock lb = locatedblocks.get(i);
        final ExtendedBlock block = lb.getBlock();
        if (remaining < block.getNumBytes()) {
            block.setNumBytes(remaining);
        }
        remaining -= block.getNumBytes();
        final DatanodeInfo[] datanodes = lb.getLocations();

        // try each datanode location of the block
        final int timeout = 3000 * datanodes.length + dfsClientConf.socketTimeout();
        boolean done = false;
        for (int j = 0; !done && j < datanodes.length; j++) {
            DataOutputStream out = null;
            DataInputStream in = null;

            try {
                // connect to a datanode
                IOStreamPair pair = connectToDN(datanodes[j], timeout, lb);
                out = new DataOutputStream(new BufferedOutputStream(pair.out, HdfsConstants.SMALL_BUFFER_SIZE));
                in = new DataInputStream(pair.in);

                if (LOG.isDebugEnabled()) {
                    LOG.debug("write to " + datanodes[j] + ": " + Op.BLOCK_CHECKSUM + ", block=" + block);
                }
                // get block MD5
                new Sender(out).blockChecksum(block, lb.getBlockToken());

                final BlockOpResponseProto reply = BlockOpResponseProto.parseFrom(PBHelper.vintPrefixed(in));

                String logInfo = "for block " + block + " from datanode " + datanodes[j];
                DataTransferProtoUtil.checkBlockOpStatus(reply, logInfo);

                OpBlockChecksumResponseProto checksumData = reply.getChecksumResponse();

                // read byte-per-checksum
                final int bpc = checksumData.getBytesPerCrc();
                if (i == 0) { // first block
                    bytesPerCRC = bpc;
                } else if (bpc != bytesPerCRC) {
                    throw new IOException(
                            "Byte-per-checksum not matched: bpc=" + bpc + " but bytesPerCRC=" + bytesPerCRC);
                }

                // read crc-per-block
                final long cpb = checksumData.getCrcPerBlock();
                if (locatedblocks.size() > 1 && i == 0) {
                    crcPerBlock = cpb;
                }

                // read md5
                final MD5Hash md5 = new MD5Hash(checksumData.getMd5().toByteArray());
                md5.write(md5out);

                // read crc-type
                final DataChecksum.Type ct;
                if (checksumData.hasCrcType()) {
                    ct = PBHelper.convert(checksumData.getCrcType());
                } else {
                    LOG.debug("Retrieving checksum from an earlier-version DataNode: "
                            + "inferring checksum by reading first byte");
                    ct = inferChecksumTypeByReading(lb, datanodes[j]);
                }

                if (i == 0) { // first block
                    crcType = ct;
                } else if (crcType != DataChecksum.Type.MIXED && crcType != ct) {
                    // if crc types are mixed in a file
                    crcType = DataChecksum.Type.MIXED;
                }

                done = true;

                if (LOG.isDebugEnabled()) {
                    if (i == 0) {
                        LOG.debug("set bytesPerCRC=" + bytesPerCRC + ", crcPerBlock=" + crcPerBlock);
                    }
                    LOG.debug("got reply from " + datanodes[j] + ": md5=" + md5);
                }
            } catch (InvalidBlockTokenException ibte) {
                if (i > lastRetriedIndex) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Got access token error in response to OP_BLOCK_CHECKSUM " + "for file " + src
                                + " for block " + block + " from datanode " + datanodes[j]
                                + ". Will retry the block once.");
                    }
                    lastRetriedIndex = i;
                    done = true; // actually it's not done; but we'll retry
                    i--; // repeat at i-th block
                    refetchBlocks = true;
                    break;
                }
            } catch (IOException ie) {
                LOG.warn("src=" + src + ", datanodes[" + j + "]=" + datanodes[j], ie);
            } finally {
                IOUtils.closeStream(in);
                IOUtils.closeStream(out);
            }
        }

        if (!done) {
            throw new IOException("Fail to get block MD5 for " + block);
        }
    }

    // compute file MD5
    final MD5Hash fileMD5 = MD5Hash.digest(md5out.getData());
    switch (crcType) {
    case CRC32:
        return new MD5MD5CRC32GzipFileChecksum(bytesPerCRC, crcPerBlock, fileMD5);
    case CRC32C:
        return new MD5MD5CRC32CastagnoliFileChecksum(bytesPerCRC, crcPerBlock, fileMD5);
    default:
        // If there is no block allocated for the file,
        // return one with the magic entry that matches what previous
        // hdfs versions return.
        if (locatedblocks.size() == 0) {
            return new MD5MD5CRC32GzipFileChecksum(0, 0, fileMD5);
        }

        // we should never get here since the validity was checked
        // when getCrcType() was called above.
        return null;
    }
}

From source file:com.mongodb.hadoop.io.BSONWritable.java

License:Apache License

/**
 * Used by child copy constructors.//  w ww.ja  va2s .  c om
 */
protected synchronized void copy(final Writable other) {
    if (other != null) {
        try {
            DataOutputBuffer out = new DataOutputBuffer();
            other.write(out);
            DataInputBuffer in = new DataInputBuffer();
            in.reset(out.getData(), out.getLength());
            readFields(in);

        } catch (IOException e) {
            throw new IllegalArgumentException("map cannot be copied: " + e.getMessage());
        }

    } else {
        throw new IllegalArgumentException("source map cannot be null");
    }
}

From source file:com.netflix.suro.input.thrift.MessageSetSerDe.java

License:Apache License

@Override
public byte[] serialize(TMessageSet payload) {
    DataOutputBuffer outBuffer = new DataOutputBuffer();

    try {/*from  w  ww.j a  v  a 2 s . c o m*/
        outBuffer.reset();

        outBuffer.writeUTF(payload.getApp());
        outBuffer.writeInt(payload.getNumMessages());
        outBuffer.writeByte(payload.getCompression());
        outBuffer.writeLong(payload.getCrc());
        outBuffer.writeInt(payload.getMessages().length);
        outBuffer.write(payload.getMessages());

        return ByteBuffer.wrap(outBuffer.getData(), 0, outBuffer.getLength()).array();
    } catch (Exception e) {
        throw new RuntimeException("Failed to serialize TMessageSet: " + e.getMessage(), e);
    } finally {
        Closeables.closeQuietly(outBuffer);
    }
}

From source file:com.salesforce.phoenix.client.TestClientKeyValueLocal.java

License:Apache License

private void validate(KeyValue kv, byte[] row, byte[] family, byte[] qualifier, long ts, Type type,
        byte[] value) throws IOException {
    DataOutputBuffer out = new DataOutputBuffer();
    kv.write(out);/* w w  w .ja v a 2 s  . co m*/
    out.close();
    byte[] data = out.getData();
    // read it back in
    KeyValue read = new KeyValue();
    DataInputBuffer in = new DataInputBuffer();
    in.reset(data, data.length);
    read.readFields(in);
    in.close();

    // validate that its the same
    assertTrue("Row didn't match!", Bytes.equals(row, read.getRow()));
    assertTrue("Family didn't match!", Bytes.equals(family, read.getFamily()));
    assertTrue("Qualifier didn't match!", Bytes.equals(qualifier, read.getQualifier()));
    assertTrue("Value didn't match!", Bytes.equals(value, read.getValue()));
    assertEquals("Timestamp didn't match", ts, read.getTimestamp());
    assertEquals("Type didn't match", type.getCode(), read.getType());
}