Example usage for java.io DataInputStream read

List of usage examples for java.io DataInputStream read

Introduction

In this page you can find the example usage for java.io DataInputStream read.

Prototype

public final int read(byte b[], int off, int len) throws IOException 

Source Link

Document

Reads up to len bytes of data from the contained input stream into an array of bytes.

Usage

From source file:com.splout.db.dnode.HttpFileExchanger.java

@Override
public void handle(HttpExchange exchange) throws IOException {
    DataInputStream iS = null;
    FileOutputStream writer = null;
    File dest = null;//from   w  ww .j  av  a2s  .  c o  m

    String tablespace = null;
    Integer partition = null;
    Long version = null;

    try {
        iS = new DataInputStream(new GZIPInputStream(exchange.getRequestBody()));
        String fileName = exchange.getRequestHeaders().getFirst("filename");
        tablespace = exchange.getRequestHeaders().getFirst("tablespace");
        partition = Integer.valueOf(exchange.getRequestHeaders().getFirst("partition"));
        version = Long.valueOf(exchange.getRequestHeaders().getFirst("version"));

        dest = new File(
                new File(tempDir,
                        DNodeHandler.getLocalStoragePartitionRelativePath(tablespace, partition, version)),
                fileName);

        // just in case, avoid copying the same file concurrently
        // (but we also shouldn't avoid this in other levels of the app)
        synchronized (currentTransfersMonitor) {
            if (currentTransfers.containsKey(dest.toString())) {
                throw new IOException("Incoming file already being transferred - " + dest);
            }
            currentTransfers.put(dest.toString(), new Object());
        }

        if (!dest.getParentFile().exists()) {
            dest.getParentFile().mkdirs();
        }
        if (dest.exists()) {
            dest.delete();
        }

        writer = new FileOutputStream(dest);
        byte[] buffer = new byte[config.getInt(FetcherProperties.DOWNLOAD_BUFFER)];

        Checksum checkSum = new CRC32();

        // 1- Read file size
        long fileSize = iS.readLong();
        log.debug("Going to read file [" + fileName + "] of size: " + fileSize);
        // 2- Read file contents
        long readSoFar = 0;

        do {
            long missingBytes = fileSize - readSoFar;
            int bytesToRead = (int) Math.min(missingBytes, buffer.length);
            int read = iS.read(buffer, 0, bytesToRead);
            checkSum.update(buffer, 0, read);
            writer.write(buffer, 0, read);
            readSoFar += read;
            callback.onProgress(tablespace, partition, version, dest, fileSize, readSoFar);
        } while (readSoFar < fileSize);

        // 3- Read CRC
        long expectedCrc = iS.readLong();
        if (expectedCrc == checkSum.getValue()) {
            log.info("File [" + dest.getAbsolutePath() + "] received -> Checksum -- " + checkSum.getValue()
                    + " matches expected CRC [OK]");
            callback.onFileReceived(tablespace, partition, version, dest);
        } else {
            log.error("File received [" + dest.getAbsolutePath() + "] -> Checksum -- " + checkSum.getValue()
                    + " doesn't match expected CRC: " + expectedCrc);
            callback.onBadCRC(tablespace, partition, version, dest);
            dest.delete();
        }
    } catch (Throwable t) {
        log.error(t);
        callback.onError(t, tablespace, partition, version, dest);
        if (dest != null && dest.exists()
                && !t.getMessage().contains("Incoming file already being transferred")) {
            dest.delete();
        }
    } finally {
        if (writer != null) {
            writer.close();
        }
        if (iS != null) {
            iS.close();
        }
        if (dest != null) {
            currentTransfers.remove(dest.toString());
        }
    }
}

From source file:edu.cmu.lemurproject.WarcRecord.java

private static byte[] readNextRecord(DataInputStream in, StringBuffer headerBuffer) throws IOException {
    if (in == null) {
        return null;
    }/*  w w  w  .j av a  2  s  .  c  o  m*/
    if (headerBuffer == null) {
        return null;
    }

    String line = null;
    boolean foundMark = false;
    byte[] retContent = null;

    // cannot be using a buffered reader here!!!!
    // just read the header
    // first - find our WARC header
    while ((!foundMark) && ((line = readLineFromInputStream(in)) != null)) {
        if (line.startsWith(WARC_VERSION)) {
            WARC_VERSION_LINE = line;
            foundMark = true;
        }
    }

    // no WARC mark?
    if (!foundMark) {
        return null;
    }

    //LOG.info("Found WARC_VERSION");

    int contentLength = -1;
    // read until we see contentLength then an empty line
    // (to handle malformed ClueWeb09 headers that have blank lines)
    // get the content length and set our retContent
    for (line = readLineFromInputStream(in).trim(); line.length() > 0
            || contentLength < 0; line = readLineFromInputStream(in).trim()) {

        if (line.length() > 0) {
            headerBuffer.append(line);
            headerBuffer.append(LINE_ENDING);

            // find the content length designated by Content-Length: <length>
            String[] parts = line.split(":", 2);
            if (parts.length == 2 && parts[0].equals("Content-Length")) {
                try {
                    contentLength = Integer.parseInt(parts[1].trim());
                    //LOG.info("WARC record content length: " + contentLength);
                } catch (NumberFormatException nfEx) {
                    contentLength = -1;
                }
            }
        }
    }

    // now read the bytes of the content
    retContent = new byte[contentLength];
    int totalWant = contentLength;
    int totalRead = 0;
    //
    // LOOP TO REMOVE LEADING CR * LF 
    // To prevent last few characters from being cut off of the content
    // when reading
    //
    while ((totalRead == 0) && (totalRead < contentLength)) {
        byte CR = in.readByte();
        byte LF = in.readByte();
        if ((CR != 13) && (LF != 10)) {
            retContent[0] = CR;

            // Minor change to process Common Crawl WET files.
            // Handle conversion records with single character line endings.
            if (retContent.length > 1) {
                retContent[1] = LF;
                totalRead = 2;
            } else
                totalRead = 1;

            totalWant = contentLength - totalRead;
        }
    }
    //
    //
    //
    while (totalRead < contentLength) {
        try {
            int numRead = in.read(retContent, totalRead, totalWant);
            if (numRead < 0) {
                return null;
            } else {
                totalRead += numRead;
                totalWant = contentLength - totalRead;
            } // end if (numRead < 0) / else
        } catch (EOFException eofEx) {
            // resize to what we have
            if (totalRead > 0) {
                byte[] newReturn = new byte[totalRead];
                System.arraycopy(retContent, 0, newReturn, 0, totalRead);
                return newReturn;
            } else {
                return null;
            }
        } // end try/catch (EOFException)
    } // end while (totalRead < contentLength)

    return retContent;
}

From source file:org.lemurproject.galago.core.parse.WARCRecord.java

private static byte[] readNextRecord(DataInputStream in, StringBuffer headerBuffer) throws IOException {
    if (in == null) {
        return null;
    }//from www  .  j  a  va  2 s  .co  m
    if (headerBuffer == null) {
        return null;
    }

    String line = null;
    //    boolean foundMark = false;
    byte[] retContent = null;

    boolean foundMark = findNextWARCRecord(in);

    //    // cannot be using a buffered reader here!!!!
    //    // just read the header
    //    // first - find our WARC header
    //    while ((!foundMark) && ((line = readLineFromInputStream(in)) != null)) {
    //      if (line.startsWith(WARC_VERSION)) {
    //        WARC_VERSION_LINE = line;
    //        foundMark = true;
    //      }
    //    }

    // no WARC mark?
    if (!foundMark) {
        return null;
    }

    // LOG.info("Found WARC_VERSION");

    int contentLength = -1;
    // read until we see contentLength then an empty line
    // (to handle malformed ClueWeb09 headers that have blank lines)
    // get the content length and set our retContent
    for (line = readLineFromInputStream(in).trim(); line.length() > 0
            || contentLength < 0; line = readLineFromInputStream(in).trim()) {

        if (line.length() > 0) {
            headerBuffer.append(line);
            headerBuffer.append(LINE_ENDING);

            // find the content length designated by Content-Length: <length>
            String[] parts = line.split(":", 2);
            if (parts.length == 2 && parts[0].equals("Content-Length")) {
                try {
                    contentLength = Integer.parseInt(parts[1].trim());
                    // LOG.info("WARC record content length: " + contentLength);

                    // if this document is too long
                    if (contentLength > MAX_CONTENT_LENGTH) {
                        in.skip(contentLength);
                        if (!findNextWARCRecord(in)) {
                            return null;
                        }
                        headerBuffer.delete(0, headerBuffer.length());
                    }

                } catch (NumberFormatException nfEx) {
                    contentLength = -1;
                }
            }
        }
    }

    // now read the bytes of the content
    retContent = new byte[contentLength];
    int totalWant = contentLength;
    int totalRead = 0;
    //
    // LOOP TO REMOVE LEADING CR * LF 
    // To prevent last few characters from being cut off of the content
    // when reading
    //
    while ((totalRead == 0) && (totalRead < contentLength)) {
        byte CR = in.readByte();
        byte LF = in.readByte();
        if ((CR != 13) && (LF != 10)) {
            retContent[0] = CR;
            retContent[1] = LF;
            totalRead = 2;
            totalWant = contentLength - totalRead;
        }
    }
    //
    //
    //
    while (totalRead < contentLength) {
        try {
            int numRead = in.read(retContent, totalRead, totalWant);
            if (numRead < 0) {
                return null;
            } else {
                totalRead += numRead;
                totalWant = contentLength - totalRead;
            } // end if (numRead < 0) / else
        } catch (EOFException eofEx) {
            // resize to what we have
            if (totalRead > 0) {
                byte[] newReturn = new byte[totalRead];
                System.arraycopy(retContent, 0, newReturn, 0, totalRead);
                return newReturn;
            } else {
                return null;
            }
        } // end try/catch (EOFException)
    } // end while (totalRead < contentLength)

    return retContent;
}

From source file:com.chinamobile.bcbsp.partition.HashWithBalancerWritePartition.java

/**
 * This method is used to partition graph vertexes. Writing Each vertex to the
 * corresponding partition. In this method calls recordParse method to create
 * an HeadNode object. The last call partitioner's getPartitionId method to
 * calculate the HeadNode belongs to partition's id. If the HeadNode belongs
 * local partition then written to the local partition or send it to the
 * appropriate partition./*from  w  w w. j  av a  2 s. c om*/
 * @param recordReader The recordreader of the split.
 * @throws IOException The io exception
 * @throws InterruptedException The Interrupted Exception
 */
@Override
public void write(RecordReader recordReader) throws IOException, InterruptedException {
    int headNodeNum = 0;
    int local = 0;
    int send = 0;
    int lost = 0;
    ThreadPool tpool = new ThreadPool(this.sendThreadNum);
    int staffNum = this.staff.getStaffNum();
    BytesWritable kbytes = new BytesWritable();
    int ksize = 0;
    BytesWritable vbytes = new BytesWritable();
    int vsize = 0;
    DataOutputBuffer bb = new DataOutputBuffer();
    int bufferSize = (int) ((this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER) * PART);
    int dataBufferSize = (this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER)
            / (this.staff.getStaffNum() + this.sendThreadNum);
    byte[] buffer = new byte[bufferSize];
    int bufindex = 0;
    SerializationFactory sFactory = new SerializationFactory(new Configuration());
    Serializer<IntWritable> psserializer = sFactory.getSerializer(IntWritable.class);
    byte[] pidandsize = new byte[TIME * CONTAINERNUMBER * CONTAINERNUMBER];
    int psindex = 0;
    BytesWritable pidbytes = new BytesWritable();
    int psize = 0;
    BytesWritable sizebytes = new BytesWritable();
    int ssize = 0;
    try {
        this.keyserializer.open(bb);
        this.valueserializer.open(bb);
        psserializer.open(bb);
    } catch (IOException e) {
        throw e;
    }
    String path = "/tmp/bcbsp/" + this.staff.getJobID() + "/" + this.staff.getStaffID();
    File dir = new File("/tmp/bcbsp/" + this.staff.getJobID());
    dir.mkdir();
    dir = new File("/tmp/bcbsp/" + this.staff.getJobID() + "/" + this.staff.getStaffID());
    dir.mkdir();
    ArrayList<File> files = new ArrayList<File>();
    try {
        File file = new File(path + "/" + "data" + ".txt");
        files.add(file);
        DataOutputStream dataWriter = new DataOutputStream(
                new BufferedOutputStream(new FileOutputStream(path + "/" + "data" + ".txt", true)));
        DataInputStream dataReader = new DataInputStream(
                new BufferedInputStream(new FileInputStream(path + "/" + "data" + ".txt")));
        File filet = new File(path + "/" + "pidandsize" + ".txt");
        files.add(filet);
        DataOutputStream psWriter = new DataOutputStream(
                new BufferedOutputStream(new FileOutputStream(path + "/" + "pidandsize" + ".txt", true)));
        DataInputStream psReader = new DataInputStream(
                new BufferedInputStream(new FileInputStream(path + "/" + "pidandsize" + ".txt")));
        while (recordReader != null && recordReader.nextKeyValue()) {
            headNodeNum++;
            Text key = new Text(recordReader.getCurrentKey().toString());
            Text value = new Text(recordReader.getCurrentValue().toString());
            int pid = -1;
            Text vertexID = this.recordParse.getVertexID(key);
            if (vertexID != null) {
                pid = this.partitioner.getPartitionID(vertexID);
            } else {
                lost++;
                continue;
            }
            if (this.counter.containsKey(pid)) {
                this.counter.put(pid, (this.counter.get(pid) + 1));
            } else {
                this.counter.put(pid, 1);
            }
            bb.reset();
            this.keyserializer.serialize(key);
            kbytes.set(bb.getData(), 0, bb.getLength());
            ksize = kbytes.getLength();
            bb.reset();
            this.valueserializer.serialize(value);
            vbytes.set(bb.getData(), 0, bb.getLength());
            vsize = vbytes.getLength();
            bb.reset();
            psserializer.serialize(new IntWritable(ksize + vsize));
            sizebytes.set(bb.getData(), 0, bb.getLength());
            ssize = sizebytes.getLength();
            bb.reset();
            psserializer.serialize(new IntWritable(pid));
            pidbytes.set(bb.getData(), 0, bb.getLength());
            psize = pidbytes.getLength();
            if ((pidandsize.length - psindex) > (ssize + psize)) {
                System.arraycopy(sizebytes.getBytes(), 0, pidandsize, psindex, ssize);
                psindex += ssize;
                System.arraycopy(pidbytes.getBytes(), 0, pidandsize, psindex, psize);
                psindex += psize;
            } else {
                psWriter.write(pidandsize, 0, psindex);
                psindex = 0;
                System.arraycopy(sizebytes.getBytes(), 0, pidandsize, psindex, ssize);
                psindex += ssize;
                System.arraycopy(pidbytes.getBytes(), 0, pidandsize, psindex, psize);
                psindex += psize;
            }
            if ((buffer.length - bufindex) > (ksize + vsize)) {
                System.arraycopy(kbytes.getBytes(), 0, buffer, bufindex, ksize);
                bufindex += ksize;
                System.arraycopy(vbytes.getBytes(), 0, buffer, bufindex, vsize);
                bufindex += vsize;
            } else if (buffer.length < (ksize + vsize)) {
                dataWriter.write(buffer, 0, bufindex);
                bufindex = 0;
                LOG.info("This is a super record");
                dataWriter.write(kbytes.getBytes(), 0, ksize);
                dataWriter.write(vbytes.getBytes(), 0, vsize);
            } else {
                dataWriter.write(buffer, 0, bufindex);
                bufindex = 0;
                System.arraycopy(kbytes.getBytes(), 0, buffer, bufindex, ksize);
                bufindex += ksize;
                System.arraycopy(vbytes.getBytes(), 0, buffer, bufindex, vsize);
                bufindex += vsize;
            }
        }
        if (psindex != 0) {
            psWriter.write(pidandsize, 0, psindex);
        }
        if (bufindex != 0) {
            dataWriter.write(buffer, 0, bufindex);
            bufindex = 0;
        }
        dataWriter.close();
        dataWriter = null;
        psWriter.close();
        psWriter = null;
        buffer = null;
        pidandsize = null;
        this.ssrc.setDirFlag(new String[] { "3" });
        this.ssrc.setCounter(this.counter);
        HashMap<Integer, Integer> hashBucketToPartition = this.sssc.loadDataInBalancerBarrier(ssrc,
                Constants.PARTITION_TYPE.HASH);
        this.staff.setHashBucketToPartition(hashBucketToPartition);
        byte[][] databuf = new byte[staffNum][dataBufferSize];
        int[] databufindex = new int[staffNum];
        try {
            IntWritable pid = new IntWritable();
            IntWritable size = new IntWritable();
            int belongPid = 0;
            while (true) {
                size.readFields(psReader);
                pid.readFields(psReader);
                belongPid = hashBucketToPartition.get(pid.get());
                if (belongPid != this.staff.getPartition()) {
                    send++;
                } else {
                    local++;
                }
                if ((databuf[belongPid].length - databufindex[belongPid]) > size.get()) {
                    dataReader.read(databuf[belongPid], databufindex[belongPid], size.get());
                    databufindex[belongPid] += size.get();
                } else if (databuf[belongPid].length < size.get()) {
                    LOG.info("This is a super record");
                    byte[] tmp = new byte[size.get()];
                    dataReader.read(tmp, 0, size.get());
                    if (belongPid == this.staff.getPartition()) {
                        DataInputStream reader = new DataInputStream(
                                new BufferedInputStream(new ByteArrayInputStream(tmp)));
                        try {
                            boolean stop = true;
                            while (stop) {
                                Text key = new Text();
                                key.readFields(reader);
                                Text value = new Text();
                                value.readFields(reader);
                                if (key.getLength() > 0 && value.getLength() > 0) {
                                    Vertex vertex = this.recordParse.recordParse(key.toString(),
                                            value.toString());
                                    if (vertex == null) {
                                        lost++;
                                        continue;
                                    }
                                    this.staff.getGraphData().addForAll(vertex);
                                } else {
                                    stop = false;
                                }
                            }
                        } catch (IOException e) {
                            LOG.info("IO exception: " + e.getStackTrace());
                        }
                    } else {
                        ThreadSignle t = tpool.getThread();
                        while (t == null) {
                            t = tpool.getThread();
                        }
                        t.setWorker(
                                this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), belongPid));
                        t.setJobId(staff.getJobID());
                        t.setTaskId(staff.getStaffID());
                        t.setBelongPartition(belongPid);
                        BytesWritable data = new BytesWritable();
                        data.set(tmp, 0, size.get());
                        t.setData(data);
                        LOG.info("Using Thread is: " + t.getThreadNumber());
                        t.setStatus(true);
                    }
                    tmp = null;
                } else {
                    if (belongPid == this.staff.getPartition()) {
                        DataInputStream reader = new DataInputStream(new BufferedInputStream(
                                new ByteArrayInputStream(databuf[belongPid], 0, databufindex[belongPid])));
                        try {
                            boolean stop = true;
                            while (stop) {
                                Text key = new Text();
                                key.readFields(reader);
                                Text value = new Text();
                                value.readFields(reader);
                                if (key.getLength() > 0 && value.getLength() > 0) {
                                    Vertex vertex = this.recordParse.recordParse(key.toString(),
                                            value.toString());
                                    if (vertex == null) {
                                        lost++;
                                        continue;
                                    }
                                    this.staff.getGraphData().addForAll(vertex);
                                } else {
                                    stop = false;
                                }
                            }
                        } catch (IOException e) {
                            LOG.info("IO exception: " + e.getStackTrace());
                        }
                    } else {
                        ThreadSignle t = tpool.getThread();
                        while (t == null) {
                            t = tpool.getThread();
                        }
                        t.setWorker(
                                this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), belongPid));
                        t.setJobId(staff.getJobID());
                        t.setTaskId(staff.getStaffID());
                        t.setBelongPartition(belongPid);
                        BytesWritable data = new BytesWritable();
                        data.set(databuf[belongPid], 0, databufindex[belongPid]);
                        t.setData(data);
                        LOG.info("Using Thread is: " + t.getThreadNumber());
                        t.setStatus(true);
                    }
                    databufindex[belongPid] = 0;
                    dataReader.read(databuf[belongPid], databufindex[belongPid], size.get());
                    databufindex[belongPid] += size.get();
                }
            }
        } catch (EOFException ex) {
            LOG.error("[write]", ex);
        }
        for (int i = 0; i < staffNum; i++) {
            if (databufindex[i] != 0) {
                if (i == this.staff.getPartition()) {
                    DataInputStream reader = new DataInputStream(
                            new BufferedInputStream(new ByteArrayInputStream(databuf[i], 0, databufindex[i])));
                    try {
                        boolean stop = true;
                        while (stop) {
                            Text key = new Text();
                            key.readFields(reader);
                            Text value = new Text();
                            value.readFields(reader);
                            if (key.getLength() > 0 && value.getLength() > 0) {
                                Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString());
                                if (vertex == null) {
                                    lost++;
                                    continue;
                                }
                                this.staff.getGraphData().addForAll(vertex);
                            } else {
                                stop = false;
                            }
                        }
                    } catch (IOException e) {
                        LOG.info("IO exception: " + e.getStackTrace());
                    }
                } else {
                    ThreadSignle t = tpool.getThread();
                    while (t == null) {
                        t = tpool.getThread();
                    }
                    t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), i));
                    t.setJobId(staff.getJobID());
                    t.setTaskId(staff.getStaffID());
                    t.setBelongPartition(i);
                    BytesWritable data = new BytesWritable();
                    data.set(databuf[i], 0, databufindex[i]);
                    t.setData(data);
                    LOG.info("Using Thread is: " + t.getThreadNumber());
                    t.setStatus(true);
                }
            }
        }
        dataReader.close();
        dataReader = null;
        psReader.close();
        psReader = null;
        for (File f : files) {
            f.delete();
        }
        dir.delete();
        dir = new File(path.substring(0, path.lastIndexOf('/')));
        dir.delete();
        tpool.cleanup();
        tpool = null;
        databuf = null;
        databufindex = null;
        this.counter = null;
        LOG.info("The number of vertices that were read from the input file: " + headNodeNum);
        LOG.info("The number of vertices that were put into the partition: " + local);
        LOG.info("The number of vertices that were sent to other partitions: " + send);
        LOG.info("The number of verteices in the partition that cound not be " + "parsed:" + lost);
    } catch (IOException e) {
        throw e;
    } catch (InterruptedException e) {
        throw e;
    } finally {
        for (File f : files) {
            f.delete();
        }
        dir.delete();
        dir = new File(path.substring(0, path.lastIndexOf('/')));
        dir.delete();
    }
}