Example usage for org.apache.hadoop.io DataOutputBuffer DataOutputBuffer

List of usage examples for org.apache.hadoop.io DataOutputBuffer DataOutputBuffer

Introduction

In this page you can find the example usage for org.apache.hadoop.io DataOutputBuffer DataOutputBuffer.

Prototype

public DataOutputBuffer() 

Source Link

Document

Constructs a new empty buffer.

Usage

From source file:com.ibm.jaql.lang.expr.index.HashtableServer.java

License:Apache License

@Override
public void run() {
    JsonValue readKey = null;//from  www  . j  a  va2s .  c  om
    JsonValue[] keys = new JsonValue[0];

    try {
        while (true) {
            byte command = in.readByte();
            switch (command) {
            // GET Key -> FOUND Value | NOT_FOUND
            case GET_CMD: {
                readKey = table.keySerializer.read(in, readKey);
                byte[] value = table.table.get(readKey);
                if (value == null) {
                    out.write(NOT_FOUND_CMD);
                } else {
                    out.write(FOUND_CMD);
                    out.write(value);
                }
                break;
            }
            // GETN n, [Key]*n -> OK n [FOUND Value | NOT_FOUND]*n  OK
            case GETN_CMD: {
                int n = BaseUtil.readVUInt(in);
                if (n > keys.length || // bigger array required
                        3 * n < keys.length) // array is way too big
                {
                    keys = new JsonValue[n];
                }
                for (int i = 0; i < n; i++) {
                    keys[i] = table.keySerializer.read(in, keys[i]);
                }
                out.write(OK_CMD);
                BaseUtil.writeVUInt(out, n);
                for (int i = 0; i < n; i++) {
                    byte[] value = table.table.get(keys[i]);
                    if (value == null) {
                        out.write(NOT_FOUND_CMD);
                    } else {
                        out.write(FOUND_CMD);
                        out.write(value);
                    }
                }
                out.write(OK_CMD);
                break;
            }
            // USE tableId string, age msec, lease msec
            //   -> OK lease, schema [ Key, Value ], 
            //    | BUILD 
            case USE_CMD: {
                if (table != null) {
                    HashMapCache.instance.release(table);
                    table = null;
                }
                JsonString tableId = (JsonString) defaultSerializer.read(in, null);
                long ageMS = BaseUtil.readVSLong(in);
                long leaseMS = BaseUtil.readVSLong(in);

                table = HashMapCache.instance.get(tableId.toString(), ageMS, leaseMS);
                if (table.isBuilt()) // The table is good to go
                {
                    out.write(OK_CMD);
                    BaseUtil.writeVSLong(out, 0); // TODO: implement leases
                    defaultSerializer.write(out, table.schema);
                } else // We need to build the table
                {
                    out.write(BUILD_CMD);
                    out.flush();

                    // SCHEMA schema [Key,Value] (PUT key, value)* OK -> OK
                    command = in.readByte();
                    if (command == RELEASE_CMD) {
                        // The client couldn't build the table, so just release it
                        HashMapCache.instance.release(table);
                        break;
                    }
                    if (command != SCHEMA_CMD) {
                        throw new ProtocolException("expected SCHEMA");
                    }
                    table.setSchema((JsonSchema) defaultSerializer.read(in, null));
                    DataOutputBuffer buf = new DataOutputBuffer();

                    System.err.println("building hashtable " + table.tableId);

                    while ((command = in.readByte()) == PUT_CMD) {
                        // TODO: we need to use a spilling hashtable to avoid memory overflows...
                        // TODO: we could at least pack the values more tightly 
                        buf.reset();
                        JsonValue key = table.keySerializer.read(in, null); // Be sure NOT to reuse the key here!
                        table.valueSerializer.copy(in, buf);
                        byte[] val = new byte[buf.getLength()];
                        System.arraycopy(buf.getData(), 0, val, 0, val.length);
                        table.table.put(key, val);
                    }
                    if (command != OK_CMD) {
                        throw new ProtocolException("expected OK");
                    }
                    HashMapCache.instance.doneBuilding(table);
                    out.write(OK_CMD);
                    System.err.println("built hashtable " + table.tableId);
                }
                break;
            }
            // RELEASE -> OK
            case RELEASE_CMD: {
                if (table != null) {
                    HashMapCache.instance.release(table);
                    table = null;
                }
                out.write(OK_CMD);
                break;
            }
            // LIST_TABLES -> (FOUND tableId built age lease schema numEntries)* OK
            // GET_ALL -> (FOUND key value)* OK
            // UNDEFINE tableId -> OK | NOT_FOUND
            // UNDEFINE_ALL -> OK
            default:
                throw new ProtocolException("invalid command code");
            }
            out.flush();
        }
    } catch (EOFException e) {
        // ignored
    } catch (Exception e) {
        // log and exit thread
        e.printStackTrace();
    } finally {
        if (table != null) {
            HashMapCache.instance.release(table);
        }
        try {
            socket.close();
        } catch (Exception e) {
            // log and exit thread
            e.printStackTrace();
        }
    }
}

From source file:com.ibm.jaql.lang.expr.io.InputSplitsFn.java

License:Apache License

@Override
public JsonIterator iter(Context context) throws Exception {
    JsonValue iod = exprs[0].eval(context);

    Adapter adapter = JaqlUtil.getAdapterStore().input.getAdapter(iod);
    if (!(adapter instanceof HadoopInputAdapter)) {
        throw new ClassCastException("i/o descriptor must be for an input format");
    }/*from   w w w .j  ava  2 s  .  c o m*/
    HadoopInputAdapter hia = (HadoopInputAdapter) adapter;
    JobConf conf = new JobConf(); // TODO: allow configuration
    hia.setParallel(conf); // right thing to do?
    hia.configure(conf); // right thing to do?
    int numSplits = conf.getNumMapTasks(); // TODO: allow override
    final InputSplit[] splits = hia.getSplits(conf, numSplits);
    final MutableJsonString className = new MutableJsonString();
    final MutableJsonBinary rawSplit = new MutableJsonBinary();
    final BufferedJsonRecord rec = new BufferedJsonRecord(3);
    final BufferedJsonArray locArr = new BufferedJsonArray();
    rec.add(CLASS_TAG, className);
    rec.add(SPLIT_TAG, rawSplit);
    rec.add(LOCATIONS_TAG, locArr);

    return new JsonIterator(rec) {
        DataOutputBuffer out = new DataOutputBuffer();
        int i = 0;

        @Override
        public boolean moveNext() throws Exception {
            if (i >= splits.length) {
                return false;
            }
            InputSplit split = splits[i++];
            className.setCopy(split.getClass().getName());
            out.reset();
            split.write(out);
            rawSplit.setCopy(out.getData(), out.getLength());
            locArr.clear();
            String[] locs = split.getLocations();
            if (locs != null) {
                for (String loc : locs) {
                    locArr.add(new JsonString(loc));
                }
            }
            return true;
        }
    };
}

From source file:com.ibm.jaql.lang.expr.io.MakeFileSplitFn.java

License:Apache License

@Override
public JsonRecord eval(Context context) throws Exception {
    if (out == null) {
        out = new DataOutputBuffer();
        className = new MutableJsonString();
        rawSplit = new MutableJsonBinary();
        values = new JsonValue[] { className, rawSplit, null };
        resultRec = new BufferedJsonRecord();
        resultRec.set(NAMES, values, NAMES.length);
    }/* w ww. ja  v a 2  s.c o  m*/

    JsonString jfile = (JsonString) exprs[0].eval(context);
    JsonNumber jstart = (JsonNumber) exprs[1].eval(context);
    JsonNumber jlength = (JsonNumber) exprs[2].eval(context);
    JsonArray jhosts = (JsonArray) exprs[3].eval(context);

    String file = jfile.toString();
    long start = jstart.longValueExact();
    long length = jlength.longValueExact();
    if (jhosts == null)
        jhosts = JsonArray.EMPTY;
    String[] hosts = new String[(int) jhosts.count()];
    JsonIterator iter = jhosts.iter();
    for (int i = 0; i < hosts.length; i++) {
        iter.moveNext();
        JsonString jhost = (JsonString) iter.current();
        hosts[i] = jhost.toString();
    }

    FileSplit split = new FileSplit(new Path(file), start, length, hosts);
    className.setCopy(split.getClass().getCanonicalName());
    out.reset();
    split.write(out);
    rawSplit.set(out.getData(), out.getLength());
    values[2] = jhosts;

    return resultRec;
}

From source file:com.inforefiner.hdata.ApplicationMaster.java

License:Apache License

/**
 * Main run function for the application master
 *
 * @throws YarnException/*from  w w w .  j  a v a2s. c  o m*/
 * @throws IOException
 */
@SuppressWarnings({ "unchecked" })
public void run() throws YarnException, IOException, InterruptedException {
    LOG.info("Starting ApplicationMaster");

    // Note: Credentials, Token, UserGroupInformation, DataOutputBuffer class
    // are marked as LimitedPrivate
    Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    DataOutputBuffer dob = new DataOutputBuffer();
    credentials.writeTokenStorageToStream(dob);
    // Now remove the AM->RM token so that containers cannot access it.
    Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
    LOG.info("Executing with tokens:");
    while (iter.hasNext()) {
        Token<?> token = iter.next();
        LOG.info(token);
        if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) {
            iter.remove();
        }
    }
    allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());

    // Create appSubmitterUgi and add original tokens to it
    String appSubmitterUserName = System.getenv(ApplicationConstants.Environment.USER.name());
    appSubmitterUgi = UserGroupInformation.createRemoteUser(appSubmitterUserName);
    appSubmitterUgi.addCredentials(credentials);

    AMRMClientAsync.CallbackHandler allocListener = new RMCallbackHandler();
    amRMClient = AMRMClientAsync.createAMRMClientAsync(1000, allocListener);
    amRMClient.init(conf);
    amRMClient.start();

    containerListener = createNMCallbackHandler();
    nmClientAsync = new NMClientAsyncImpl(containerListener);
    nmClientAsync.init(conf);
    nmClientAsync.start();

    startTimelineClient(conf);
    if (timelineClient != null) {
        publishApplicationAttemptEvent(timelineClient, appAttemptID.toString(), DSEvent.DS_APP_ATTEMPT_START,
                domainId, appSubmitterUgi);
    }

    // Setup local RPC Server to accept status requests directly from clients
    // TODO need to setup a protocol for client to be able to communicate to
    // the RPC server
    // TODO use the rpc port info to register with the RM for the client to
    // send requests to this app master

    // Register self with ResourceManager
    // This will start heartbeating to the RM
    appMasterHostname = NetUtils.getHostname();
    RegisterApplicationMasterResponse response = amRMClient.registerApplicationMaster(appMasterHostname,
            appMasterRpcPort, appMasterTrackingUrl);
    // Dump out information about cluster capability as seen by the
    // resource manager
    int maxMem = response.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    int maxVCores = response.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max vcores capabililty of resources in this cluster " + maxVCores);

    // A resource ask cannot exceed the max.
    if (containerMemory > maxMem) {
        LOG.info("Container memory specified above max threshold of cluster." + " Using max value."
                + ", specified=" + containerMemory + ", max=" + maxMem);
        containerMemory = maxMem;
    }

    if (containerVirtualCores > maxVCores) {
        LOG.info("Container virtual cores specified above max threshold of cluster." + " Using max value."
                + ", specified=" + containerVirtualCores + ", max=" + maxVCores);
        containerVirtualCores = maxVCores;
    }

    List<Container> previousAMRunningContainers = response.getContainersFromPreviousAttempts();
    LOG.info(appAttemptID + " received " + previousAMRunningContainers.size()
            + " previous attempts' running containers on AM registration.");
    numAllocatedContainers.addAndGet(previousAMRunningContainers.size());

    int numTotalContainersToRequest = numTotalContainers - previousAMRunningContainers.size();
    // Setup ask for containers from RM
    // Send request for containers to RM
    // Until we get our fully allocated quota, we keep on polling RM for
    // containers
    // Keep looping until all the containers are launched and shell script
    // executed on them ( regardless of success/failure).
    for (int i = 0; i < numTotalContainersToRequest; ++i) {
        ContainerRequest containerAsk = setupContainerAskForRM();
        amRMClient.addContainerRequest(containerAsk);
    }
    numRequestedContainers.set(numTotalContainers);
}

From source file:com.inforefiner.hdata.SubmitClient.java

License:Apache License

/**
 * Main run function for the client/*from w ww.  j  av  a  2 s  .com*/
 *
 * @return true if application completed successfully
 * @throws IOException
 * @throws YarnException
 */
public boolean run() throws IOException, YarnException {

    LOG.info("Running Client");
    yarnClient.start();

    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
    LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers());

    List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
    LOG.info("Got Cluster node info from ASM");
    for (NodeReport node : clusterNodeReports) {
        LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress"
                + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers"
                + node.getNumContainers());
    }

    QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue);
    LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity="
            + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity()
            + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount="
            + queueInfo.getChildQueues().size());

    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    for (QueueUserACLInfo aclInfo : listAclInfo) {
        for (QueueACL userAcl : aclInfo.getUserAcls()) {
            LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl="
                    + userAcl.name());
        }
    }

    if (domainId != null && domainId.length() > 0 && toCreateDomain) {
        prepareTimelineDomain();
    }

    // Get a new application id
    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
    // TODO get min/max resource capabilities from RM and change memory ask if needed
    // If we do not have min/max, we may not be able to correctly request
    // the required resources from the RM for the app master
    // Memory ask has to be a multiple of min and less than max.
    // Dump out information about cluster capability as seen by the resource manager
    int maxMem = appResponse.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    // A resource ask cannot exceed the max.
    if (amMemory > maxMem) {
        LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified="
                + amMemory + ", max=" + maxMem);
        amMemory = maxMem;
    }

    int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores);

    if (amVCores > maxVCores) {
        LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value."
                + ", specified=" + amVCores + ", max=" + maxVCores);
        amVCores = maxVCores;
    }

    // set the application name
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    ApplicationId appId = appContext.getApplicationId();

    appContext.setKeepContainersAcrossApplicationAttempts(keepContainers);
    appContext.setApplicationName(appName);

    if (attemptFailuresValidityInterval >= 0) {
        appContext.setAttemptFailuresValidityInterval(attemptFailuresValidityInterval);
    }

    // set local resources for the application master
    // local files or archives as needed
    // In this scenario, the jar file for the application master is part of the local resources
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

    LOG.info("Copy App Master jar from local filesystem and add to local environment");
    // Copy the application master jar to the filesystem
    // Create a local resource to point to the destination jar path
    FileSystem fs = FileSystem.get(conf);
    addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(), localResources, null);

    // Set the log4j properties if needed
    if (!log4jPropFile.isEmpty()) {
        addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null);
    }

    // The shell script has to be made available on the final container(s)
    // where it will be executed.
    // To do this, we need to first copy into the filesystem that is visible
    // to the yarn framework.
    // We do not need to set this as a local resource for the application
    // master as the application master does not need it.
    String hdfsShellScriptLocation = "";
    long hdfsShellScriptLen = 0;
    long hdfsShellScriptTimestamp = 0;
    if (!shellScriptPath.isEmpty()) {
        Path shellSrc = new Path(shellScriptPath);
        String shellPathSuffix = appName + "/" + appId.toString() + "/" + SCRIPT_PATH;
        Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix);
        fs.copyFromLocalFile(false, true, shellSrc, shellDst);
        hdfsShellScriptLocation = shellDst.toUri().toString();
        FileStatus shellFileStatus = fs.getFileStatus(shellDst);
        hdfsShellScriptLen = shellFileStatus.getLen();
        hdfsShellScriptTimestamp = shellFileStatus.getModificationTime();
    }

    if (!shellCommand.isEmpty()) {
        addToLocalResources(fs, null, shellCommandPath, appId.toString(), localResources, shellCommand);
    }

    if (shellArgs.length > 0) {
        addToLocalResources(fs, null, shellArgsPath, appId.toString(), localResources,
                StringUtils.join(shellArgs, " "));
    }

    // Set the necessary security tokens as needed
    //amContainer.setContainerTokens(containerToken);

    // Set the env variables to be setup in the env where the application master will be run
    LOG.info("Set the environment for the application master");
    Map<String, String> env = new HashMap<String, String>();

    // put location of shell script into env
    // using the env info, the application master will create the correct local resource for the
    // eventual containers that will be launched to execute the shell scripts
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation);
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp));
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen));
    if (domainId != null && domainId.length() > 0) {
        env.put(DSConstants.DISTRIBUTEDSHELLTIMELINEDOMAIN, domainId);
    }

    // Add AppMaster.jar location to classpath
    // At some point we should not be required to add
    // the hadoop specific classpaths to the env.
    // It should be provided out of the box.
    // For now setting all required classpaths including
    // the classpath to "." for the application jar
    StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$())
            .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*");
    for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
            YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) {
        classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR);
        classPathEnv.append(c.trim());
    }
    classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./log4j.properties");

    // add the runtime classpath needed for tests to work
    if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
        classPathEnv.append(':');
        classPathEnv.append(System.getProperty("java.class.path"));
    }

    env.put("CLASSPATH", classPathEnv.toString());

    // Set the necessary command to execute the application master
    Vector<CharSequence> vargs = new Vector<CharSequence>(30);

    // Set java executable command
    LOG.info("Setting up app master command");
    vargs.add(Environment.JAVA_HOME.$$() + "/bin/java");
    // Set Xmx based on am memory size
    vargs.add("-Xmx" + amMemory + "m");
    // Set class name
    vargs.add(appMasterMainClass);
    // Set params for Application Master
    vargs.add("--container_memory " + String.valueOf(containerMemory));
    vargs.add("--container_vcores " + String.valueOf(containerVirtualCores));
    vargs.add("--num_containers " + String.valueOf(numContainers));
    if (null != nodeLabelExpression) {
        appContext.setNodeLabelExpression(nodeLabelExpression);
    }
    vargs.add("--priority " + String.valueOf(shellCmdPriority));

    for (Map.Entry<String, String> entry : shellEnv.entrySet()) {
        vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue());
    }
    if (debugFlag) {
        vargs.add("--debug");
    }

    vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout");
    vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr");

    // Get final commmand
    StringBuilder command = new StringBuilder();
    for (CharSequence str : vargs) {
        command.append(str).append(" ");
    }

    LOG.info("Completed setting up app master command " + command.toString());
    List<String> commands = new ArrayList<String>();
    commands.add(command.toString());

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null,
            null, null);

    // Set up resource type requirements
    // For now, both memory and vcores are supported, so we set memory and
    // vcores requirements
    Resource capability = Resource.newInstance(amMemory, amVCores);
    appContext.setResource(capability);

    // Service data is a binary blob that can be passed to the application
    // Not needed in this scenario
    // amContainer.setServiceData(serviceData);

    // Setup security tokens
    if (UserGroupInformation.isSecurityEnabled()) {
        // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce
        Credentials credentials = new Credentials();
        String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL);
        if (tokenRenewer == null || tokenRenewer.length() == 0) {
            throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
        }

        // For now, only getting tokens for the default file-system.
        final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials);
        if (tokens != null) {
            for (Token<?> token : tokens) {
                LOG.info("Got dt for " + fs.getUri() + "; " + token);
            }
        }
        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        amContainer.setTokens(fsTokens);
    }

    appContext.setAMContainerSpec(amContainer);

    // Set the priority for the application master
    // TODO - what is the range for priority? how to decide?
    Priority pri = Priority.newInstance(amPriority);
    appContext.setPriority(pri);

    // Set the queue to which this application is to be submitted in the RM
    appContext.setQueue(amQueue);

    // Submit the application to the applications manager
    // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest);
    // Ignore the response as either a valid response object is returned on success
    // or an exception thrown to denote some form of a failure
    LOG.info("Submitting application to ASM");

    yarnClient.submitApplication(appContext);

    // TODO
    // Try submitting the same request again
    // app submission failure?
    Thread t = new Thread(new LogReceiver());
    t.start();
    // Monitor the application
    return monitorApplication(appId);
}

From source file:com.mellanox.hadoop.mapred.UdaShuffleHandler.java

License:Apache License

public static ByteBuffer serializeServiceData(Token<JobTokenIdentifier> jobToken) throws IOException {
    //TODO these bytes should be versioned
    DataOutputBuffer jobToken_dob = new DataOutputBuffer();
    jobToken.write(jobToken_dob);/*from   w ww  .j  a  v  a2 s.co  m*/
    return ByteBuffer.wrap(jobToken_dob.getData(), 0, jobToken_dob.getLength());
}

From source file:com.mellanox.r4h.DFSClient.java

License:Apache License

/**
 * Get the checksum of the whole file of a range of the file. Note that the
 * range always starts from the beginning of the file.
 * //ww  w .ja v  a2  s  . c  o  m
 * @param src
 *            The file path
 * @param length
 *            the length of the range, i.e., the range is [0, length]
 * @return The checksum
 * @see DistributedFileSystem#getFileChecksum(Path)
 */
public MD5MD5CRC32FileChecksum getFileChecksum(String src, long length) throws IOException {
    checkOpen();
    Preconditions.checkArgument(length >= 0);
    // get block locations for the file range
    LocatedBlocks blockLocations = callGetBlockLocations(namenode, src, 0, length);
    if (null == blockLocations) {
        throw new FileNotFoundException("File does not exist: " + src);
    }
    List<LocatedBlock> locatedblocks = blockLocations.getLocatedBlocks();
    final DataOutputBuffer md5out = new DataOutputBuffer();
    int bytesPerCRC = -1;
    DataChecksum.Type crcType = DataChecksum.Type.DEFAULT;
    long crcPerBlock = 0;
    boolean refetchBlocks = false;
    int lastRetriedIndex = -1;

    // get block checksum for each block
    long remaining = length;
    if (src.contains(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR_SEPARATOR)) {
        remaining = Math.min(length, blockLocations.getFileLength());
    }
    for (int i = 0; i < locatedblocks.size() && remaining > 0; i++) {
        if (refetchBlocks) { // refetch to get fresh tokens
            blockLocations = callGetBlockLocations(namenode, src, 0, length);
            if (null == blockLocations) {
                throw new FileNotFoundException("File does not exist: " + src);
            }
            locatedblocks = blockLocations.getLocatedBlocks();
            refetchBlocks = false;
        }
        LocatedBlock lb = locatedblocks.get(i);
        final ExtendedBlock block = lb.getBlock();
        if (remaining < block.getNumBytes()) {
            block.setNumBytes(remaining);
        }
        remaining -= block.getNumBytes();
        final DatanodeInfo[] datanodes = lb.getLocations();

        // try each datanode location of the block
        final int timeout = 3000 * datanodes.length + dfsClientConf.socketTimeout();
        boolean done = false;
        for (int j = 0; !done && j < datanodes.length; j++) {
            DataOutputStream out = null;
            DataInputStream in = null;

            try {
                // connect to a datanode
                IOStreamPair pair = connectToDN(datanodes[j], timeout, lb);
                out = new DataOutputStream(new BufferedOutputStream(pair.out, HdfsConstants.SMALL_BUFFER_SIZE));
                in = new DataInputStream(pair.in);

                if (LOG.isDebugEnabled()) {
                    LOG.debug("write to " + datanodes[j] + ": " + Op.BLOCK_CHECKSUM + ", block=" + block);
                }
                // get block MD5
                new Sender(out).blockChecksum(block, lb.getBlockToken());

                final BlockOpResponseProto reply = BlockOpResponseProto.parseFrom(PBHelper.vintPrefixed(in));

                String logInfo = "for block " + block + " from datanode " + datanodes[j];
                DataTransferProtoUtil.checkBlockOpStatus(reply, logInfo);

                OpBlockChecksumResponseProto checksumData = reply.getChecksumResponse();

                // read byte-per-checksum
                final int bpc = checksumData.getBytesPerCrc();
                if (i == 0) { // first block
                    bytesPerCRC = bpc;
                } else if (bpc != bytesPerCRC) {
                    throw new IOException(
                            "Byte-per-checksum not matched: bpc=" + bpc + " but bytesPerCRC=" + bytesPerCRC);
                }

                // read crc-per-block
                final long cpb = checksumData.getCrcPerBlock();
                if (locatedblocks.size() > 1 && i == 0) {
                    crcPerBlock = cpb;
                }

                // read md5
                final MD5Hash md5 = new MD5Hash(checksumData.getMd5().toByteArray());
                md5.write(md5out);

                // read crc-type
                final DataChecksum.Type ct;
                if (checksumData.hasCrcType()) {
                    ct = PBHelper.convert(checksumData.getCrcType());
                } else {
                    LOG.debug("Retrieving checksum from an earlier-version DataNode: "
                            + "inferring checksum by reading first byte");
                    ct = inferChecksumTypeByReading(lb, datanodes[j]);
                }

                if (i == 0) { // first block
                    crcType = ct;
                } else if (crcType != DataChecksum.Type.MIXED && crcType != ct) {
                    // if crc types are mixed in a file
                    crcType = DataChecksum.Type.MIXED;
                }

                done = true;

                if (LOG.isDebugEnabled()) {
                    if (i == 0) {
                        LOG.debug("set bytesPerCRC=" + bytesPerCRC + ", crcPerBlock=" + crcPerBlock);
                    }
                    LOG.debug("got reply from " + datanodes[j] + ": md5=" + md5);
                }
            } catch (InvalidBlockTokenException ibte) {
                if (i > lastRetriedIndex) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Got access token error in response to OP_BLOCK_CHECKSUM " + "for file " + src
                                + " for block " + block + " from datanode " + datanodes[j]
                                + ". Will retry the block once.");
                    }
                    lastRetriedIndex = i;
                    done = true; // actually it's not done; but we'll retry
                    i--; // repeat at i-th block
                    refetchBlocks = true;
                    break;
                }
            } catch (IOException ie) {
                LOG.warn("src=" + src + ", datanodes[" + j + "]=" + datanodes[j], ie);
            } finally {
                IOUtils.closeStream(in);
                IOUtils.closeStream(out);
            }
        }

        if (!done) {
            throw new IOException("Fail to get block MD5 for " + block);
        }
    }

    // compute file MD5
    final MD5Hash fileMD5 = MD5Hash.digest(md5out.getData());
    switch (crcType) {
    case CRC32:
        return new MD5MD5CRC32GzipFileChecksum(bytesPerCRC, crcPerBlock, fileMD5);
    case CRC32C:
        return new MD5MD5CRC32CastagnoliFileChecksum(bytesPerCRC, crcPerBlock, fileMD5);
    default:
        // If there is no block allocated for the file,
        // return one with the magic entry that matches what previous
        // hdfs versions return.
        if (locatedblocks.size() == 0) {
            return new MD5MD5CRC32GzipFileChecksum(0, 0, fileMD5);
        }

        // we should never get here since the validity was checked
        // when getCrcType() was called above.
        return null;
    }
}

From source file:com.mongodb.hadoop.io.BSONWritable.java

License:Apache License

/**
 * Used by child copy constructors.//  w ww.ja  va2s .  c om
 */
protected synchronized void copy(final Writable other) {
    if (other != null) {
        try {
            DataOutputBuffer out = new DataOutputBuffer();
            other.write(out);
            DataInputBuffer in = new DataInputBuffer();
            in.reset(out.getData(), out.getLength());
            readFields(in);

        } catch (IOException e) {
            throw new IllegalArgumentException("map cannot be copied: " + e.getMessage());
        }

    } else {
        throw new IllegalArgumentException("source map cannot be null");
    }
}

From source file:com.netflix.suro.input.thrift.MessageSetSerDe.java

License:Apache License

@Override
public byte[] serialize(TMessageSet payload) {
    DataOutputBuffer outBuffer = new DataOutputBuffer();

    try {/*from  w  ww.j a  v  a 2 s . c o m*/
        outBuffer.reset();

        outBuffer.writeUTF(payload.getApp());
        outBuffer.writeInt(payload.getNumMessages());
        outBuffer.writeByte(payload.getCompression());
        outBuffer.writeLong(payload.getCrc());
        outBuffer.writeInt(payload.getMessages().length);
        outBuffer.write(payload.getMessages());

        return ByteBuffer.wrap(outBuffer.getData(), 0, outBuffer.getLength()).array();
    } catch (Exception e) {
        throw new RuntimeException("Failed to serialize TMessageSet: " + e.getMessage(), e);
    } finally {
        Closeables.closeQuietly(outBuffer);
    }
}

From source file:com.salesforce.phoenix.client.TestClientKeyValueLocal.java

License:Apache License

private void validate(KeyValue kv, byte[] row, byte[] family, byte[] qualifier, long ts, Type type,
        byte[] value) throws IOException {
    DataOutputBuffer out = new DataOutputBuffer();
    kv.write(out);/* w w  w .ja v a 2 s  . co m*/
    out.close();
    byte[] data = out.getData();
    // read it back in
    KeyValue read = new KeyValue();
    DataInputBuffer in = new DataInputBuffer();
    in.reset(data, data.length);
    read.readFields(in);
    in.close();

    // validate that its the same
    assertTrue("Row didn't match!", Bytes.equals(row, read.getRow()));
    assertTrue("Family didn't match!", Bytes.equals(family, read.getFamily()));
    assertTrue("Qualifier didn't match!", Bytes.equals(qualifier, read.getQualifier()));
    assertTrue("Value didn't match!", Bytes.equals(value, read.getValue()));
    assertEquals("Timestamp didn't match", ts, read.getTimestamp());
    assertEquals("Type didn't match", type.getCode(), read.getType());
}