Example usage for java.io RandomAccessFile getFilePointer

List of usage examples for java.io RandomAccessFile getFilePointer

Introduction

In this page you can find the example usage for java.io RandomAccessFile getFilePointer.

Prototype

public native long getFilePointer() throws IOException;

Source Link

Document

Returns the current offset in this file.

Usage

From source file:big.BigZip.java

/**
 * Given a position inside our knowledge base, retrieve the data up to
 * the next file indicator.//from   w  w w . ja v  a2 s .co  m
 * @param targetFile    The new file that will be created
 * @param startPosition The position from where we start to read the data
 * @param endPosition
 * @return 
 */
public boolean extractBytes(final File targetFile, final long startPosition, final Long endPosition) {
    /**
     * This is a tricky method. We will be extracting data from a the BIG
     * archive onto a new file somewhere on disk. The biggest challenge here
     * is to find exactly when the data for the file ends and still do the
     * file copy with a wonderful performance.
     */
    try {
        // enable random access to the BIG file (fast as heck)
        RandomAccessFile dataBIG = new RandomAccessFile(fileMainBIG, "r");
        // if the target file exists, try to delete it
        if (targetFile.exists()) {
            targetFile.delete();
            if (targetFile.exists()) {
                // we failed completely
                System.out.println("BIG405 - Failed to delete: " + targetFile.getAbsolutePath());
                return false;
            }
        }
        // we need to create a temporary zip file holder
        File fileZip = new File("temp.zip");
        // delete the zip file if it already exists
        if (fileZip.exists()) {
            fileZip.delete();
            if (fileZip.exists()) {
                // we failed completely
                System.out.println("BIG416 - Failed to delete: " + fileZip.getAbsolutePath());
                return false;
            }
        }

        // create a new file
        RandomAccessFile dataNew = new RandomAccessFile(fileZip, "rw");
        // jump directly to the position where the file is positioned
        dataBIG.seek(startPosition);
        // now we start reading bytes during the mentioned interval
        while (dataBIG.getFilePointer() < endPosition) {
            // read a byte from our BIG archive
            int data = dataBIG.read();
            // write the same byte on the target file
            dataNew.write(data);
        }

        // close the file streams
        dataBIG.close();
        dataNew.close();

        // extract the file
        zip.extract(fileZip, new File("."));
        // delete the temp zip file
        fileZip.delete();

    } catch (FileNotFoundException ex) {
        Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
        return false;
    } catch (IOException ex) {
        Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
        return false;
    }

    return true;
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

/** resubmit failed items 
 * //from   w  w  w  . j  a  v  a  2  s. c  o m
 * @param loader
 */
public void requeueFailedItems(CrawlQueueLoader loader) throws IOException {
    synchronized (this) {
        _queueState = QueueState.QUEUEING;
    }
    RandomAccessFile fixedDataReader = new RandomAccessFile(_fixedDataFile, "rw");
    RandomAccessFile stringDataReader = new RandomAccessFile(_variableDataFile, "rw");
    try {

        OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem();
        URLFP fingerprint = new URLFP();

        while (fixedDataReader.getFilePointer() != fixedDataReader.length()) {
            item.deserialize(fixedDataReader);
            boolean queueItem = false;
            if (item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS)) {

                if (item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_STATUS)) {
                    queueItem = (item._redirectStatus != 0);

                    if (!queueItem) {
                        if (item._redirectHttpResult != 200 && item._redirectHttpResult != 404) {
                            queueItem = true;
                        }
                    }
                } else {
                    queueItem = (item._crawlStatus != 0);

                    if (!queueItem) {
                        if (item._httpResultCode != 200 && item._httpResultCode != 404) {
                            queueItem = true;
                        }
                    }
                }

                if (queueItem) {
                    // seek to string data 
                    stringDataReader.seek(item._stringsOffset);
                    // and skip buffer length 
                    WritableUtils.readVInt(stringDataReader);
                    // and read primary string 
                    String url = stringDataReader.readUTF();
                    // and spill
                    fingerprint.setDomainHash(item._domainHash);
                    fingerprint.setUrlHash(item._urlFingerprint);

                    loader.queueURL(fingerprint, url);
                }
            }
        }
    } catch (IOException e) {
        LOG.error("Encountered Exception Queueing Items for List:" + _listId + " Exception:"
                + CCStringUtils.stringifyException(e));
        _queueState = QueueState.QUEUED;
    } finally {
        fixedDataReader.close();
        stringDataReader.close();
    }
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

/** queue uncrawled urls via the CrawlQueueLoader
 * /*from w ww .  j av  a  2 s.  c  o m*/
 * @param loader
 */
public void queueUnCrawledItems(CrawlQueueLoader loader) throws IOException {
    _queueState = QueueState.QUEUEING;

    int metadataVersion = getMetadata().getVersion();

    synchronized (_metadata) {
        // reset metadata PERIOD  
        int urlCount = _metadata.getUrlCount();
        _metadata.clear();
        _metadata.setUrlCount(urlCount);
    }

    RandomAccessFile fixedDataReader = new RandomAccessFile(_fixedDataFile, "rw");
    RandomAccessFile stringDataReader = new RandomAccessFile(_variableDataFile, "rw");
    try {

        OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem();
        URLFP fingerprint = new URLFP();

        while (fixedDataReader.getFilePointer() != fixedDataReader.length()) {

            long position = fixedDataReader.getFilePointer();

            //LOG.info("*** TRYING READ LOCK FOR OFFSET:" + position);
            while (true) {
                // get read lock on position ... 
                try {
                    FileLock lock = fixedDataReader.getChannel().tryLock(position,
                            OnDiskCrawlHistoryItem.ON_DISK_SIZE, false);

                    try {
                        //LOG.info("*** GOT READ LOCK FOR OFFSET:" + position);
                        item.deserialize(fixedDataReader);
                        break;
                    } finally {
                        lock.release();
                        //LOG.info("*** RELEASED READ LOCK FOR OFFSET:" + position);
                    }
                } catch (OverlappingFileLockException e) {
                    LOG.error("*** LOCK CONTENTION AT:" + position + " Exception:"
                            + CCStringUtils.stringifyException(e));
                }
            }

            // seek to string data 
            stringDataReader.seek(item._stringsOffset);
            // and skip buffer length 
            WritableUtils.readVInt(stringDataReader);
            // and read primary string 
            String url = stringDataReader.readUTF();
            // setup fingerprint 
            fingerprint.setDomainHash(item._domainHash);
            fingerprint.setUrlHash(item._urlFingerprint);

            // first, if it has not been crawled ever, crawl it not matter what ... 
            boolean crawlItem = !item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS);

            // if it has been crawled ... check list metadata version ... 
            if (!crawlItem && metadataVersion >= 1) {
                // ok this is newer version of the list ... 
                // check refresh time if specified ...
                int refreshIntervalInSeconds = DEFAULT_REFRESH_INTERVAL_IN_SECS;

                if (getMetadata().getRefreshInterval() != 0) {
                    refreshIntervalInSeconds = getMetadata().getRefreshInterval();
                }

                if (item._updateTimestamp > 0) {
                    long timeSinceLastCrawl = item._updateTimestamp;
                    if (System.currentTimeMillis() - timeSinceLastCrawl >= (refreshIntervalInSeconds * 1000)) {
                        crawlItem = true;
                    }
                }
            }

            if (crawlItem) {

                loader.queueURL(fingerprint, url);

                synchronized (_metadata) {
                    // update queued item count 
                    _metadata.setQueuedItemCount(_metadata.getQueuedItemCount() + 1);
                }
            } else {
                updateMetadata(item, _metadata, 0);
            }
            // ok update subdomain stats 
            updateSubDomainMetadataForItemDuringLoad(item, url, fingerprint, crawlItem);
        }

        flushCachedSubDomainMetadata();

        loader.flush();

        _queueState = QueueState.QUEUED;
    } catch (IOException e) {
        LOG.error("Encountered Exception Queueing Items for List:" + _listId + " Exception:"
                + CCStringUtils.stringifyException(e));
        _queueState = QueueState.ERROR;
    } finally {
        fixedDataReader.close();
        stringDataReader.close();
    }
}

From source file:org.opencb.cellbase.mongodb.db.VariantAnnotationMongoDBAdaptorTest.java

private int getVepAnnotationBatch(RandomAccessFile raf, int nVariantsToRead,
        Set<AnnotationComparisonObject> vepAnnotationSet) throws IOException {
    /**//from w w w.j av a  2  s .  c o  m
     * Loads VEP annotation
     */
    String newLine;
    int nNonRegulatoryAnnotations = 0;
    int nReadVariants = 0;
    String previousChr = "";
    String previousPosition = "";
    String previousAlt = "";
    String alt;
    long filePointer = 0;

    if (nVariantsToRead > 0) {
        while (((newLine = raf.readLine()) != null) && nReadVariants <= nVariantsToRead) {
            String[] lineFields = newLine.split("\t");
            String[] coordinatesParts = lineFields[1].split(":");
            if (lineFields[2].equals("deletion")) {
                alt = "-";
            } else {
                alt = lineFields[2];
            }
            if (!previousChr.equals(coordinatesParts[0]) || !previousPosition.equals(coordinatesParts[1])
                    || !previousAlt.equals(alt)) {
                nReadVariants++;
            }
            if (nReadVariants <= nVariantsToRead) {
                for (String SOname : lineFields[6].split(",")) {
                    if (SOname.equals("nc_transcript_variant")) {
                        SOname = "non_coding_transcript_variant";
                    }
                    if (!SOname.equals("regulatory_region_variant")) {
                        nNonRegulatoryAnnotations++;
                    }
                    vepAnnotationSet.add(new AnnotationComparisonObject(coordinatesParts[0],
                            coordinatesParts[1], alt, lineFields[3], lineFields[4], SOname));
                }
                previousChr = coordinatesParts[0];
                previousPosition = coordinatesParts[1];
                previousAlt = alt;
                filePointer = raf.getFilePointer();
            }
        }

        raf.seek(filePointer);
    }

    return nNonRegulatoryAnnotations;
}

From source file:org.commoncrawl.service.listcrawler.HDFSFlusherThread.java

private long generateSequenceFileAndIndex(int itemFlushLimit, RandomAccessFile sourceLogFile, long startPos,
        long endPos, byte[] syncBytes, SequenceFile.Writer writer, DataOutput indexStreamOut,
        ArrayList<FingerprintAndOffsetTuple> tupleListOut) throws IOException {

    byte[] syncCheck = new byte[syncBytes.length];

    // and create a list to hold fingerprint / offset information
    Vector<FingerprintAndOffsetTuple> fpOffsetList = new Vector<FingerprintAndOffsetTuple>();

    long currentPos = startPos;

    LOG.info("Flushing Entries Starting up to offset:" + endPos);
    CacheItemHeader itemHeader = new CacheItemHeader();

    int itemsProcessed = 0;

    boolean ignoreFlushLimit = false;

    // start read 
    while (currentPos < endPos) {

        if ((endPos - currentPos) < LocalLogFileHeader.SYNC_BYTES_SIZE)
            break;

        // seek to current position ... 
        sourceLogFile.seek(currentPos);/*  w w w. j  av a  2s.  co  m*/

        boolean headerLoadFailed = false;

        try {
            // read the item header ... assuming things are good so far ... 
            itemHeader.readHeader(sourceLogFile);
        } catch (IOException e) {
            CacheManager.LOG.error("### Item Header Load At Position:" + currentPos + " Failed With Exception:"
                    + CCStringUtils.stringifyException(e));
            headerLoadFailed = true;
        }

        if (headerLoadFailed) {
            CacheManager.LOG
                    .error("### Item File Corrupt at position:" + currentPos + " Seeking Next Sync Point");
            currentPos += LocalLogFileHeader.SYNC_BYTES_SIZE;
        }

        // if header sync bytes don't match .. then seek to next sync position ... 
        if (headerLoadFailed || !Arrays.equals(itemHeader._sync, syncBytes)) {

            CacheManager.LOG
                    .error("### Item File Corrupt at position:" + currentPos + " Seeking Next Sync Point");

            // reseek to current pos 
            sourceLogFile.seek(currentPos);
            // read in a sync.length buffer amount 
            sourceLogFile.readFully(syncCheck);

            int syncLen = syncBytes.length;

            // start scan for next sync position ...
            for (int i = 0; sourceLogFile.getFilePointer() < endPos; i++) {
                int j = 0;
                for (; j < syncLen; j++) {
                    if (syncBytes[j] != syncCheck[(i + j) % syncLen])
                        break;
                }
                if (j == syncLen) {
                    sourceLogFile.seek(sourceLogFile.getFilePointer() - LocalLogFileHeader.SYNC_BYTES_SIZE); // position before sync
                    break;
                }
                syncCheck[i % syncLen] = sourceLogFile.readByte();
            }
            // whatever, happened file pointer is at current pos 
            currentPos = sourceLogFile.getFilePointer();

            if (currentPos < endPos) {
                CacheManager.LOG.info("### Item Loader Found another sync point at:" + currentPos);
            } else {
                CacheManager.LOG.error("### No more sync points found!");
            }
        } else {
            CacheManager.LOG
                    .info("WritingItem with FP:" + itemHeader._fingerprint + " Pos Is:" + writer.getLength());
            // track offset information for index building purposes   
            fpOffsetList.add(new FingerprintAndOffsetTuple(itemHeader._fingerprint, writer.getLength()));
            // read item data ...
            CacheItem cacheItem = new CacheItem();
            cacheItem.readFields(sourceLogFile);
            // now read content length 
            int contentLength = sourceLogFile.readInt();
            // and if content present... allocate buffer 
            if (contentLength != 0) {
                // allocate content buffer 
                byte[] contentBuffer = new byte[contentLength];
                // read it from disk 
                sourceLogFile.readFully(contentBuffer);
                // and set content into cache item 
                cacheItem.setContent(new Buffer(contentBuffer));
            }
            CacheManager.LOG.info("Adding to Sequence File Item with URL:" + cacheItem.getUrl());
            // write to sequence file ... 
            writer.append(new Text(cacheItem.getUrl()), cacheItem);
            // now seek past data
            currentPos += CacheItemHeader.SIZE + itemHeader._dataLength
                    + CacheManager.ITEM_RECORD_TRAILING_BYTES;
            // increment item count 
            itemsProcessed++;

        }

        if (!ignoreFlushLimit && itemsProcessed >= itemFlushLimit) {
            // ok this gets tricky now ...
            // figure out how many bytes of data were required to get to flush limit 
            long approxCheckpointSize = currentPos - startPos;
            // compute a  threshold number 
            long bytesThreshold = (long) (approxCheckpointSize * .70);
            // compute bytes remaining in checkpoint file ... 
            long bytesRemaining = endPos - currentPos;

            // ok if bytes remaining are less than threshold number then go ahead and gobble
            // everything up in a single pass (to prevent smaller subsequent index 
            if (bytesRemaining <= bytesThreshold) {
                // ignore the flush limit and keep on rolling to the end ...  
                ignoreFlushLimit = true;
                LOG.warn("*****Bytes Remaining:" + bytesRemaining + " less than % of last whole chkpt size:"
                        + approxCheckpointSize + ". Bypassing Flush Limit");
            } else {
                LOG.info("Reached Flush Item Limit:" + itemsProcessed + " Breaking Out");
                break;
            }

        }
    }

    LOG.info("Writing Index");
    // ok now build the index file ... 
    HDFSFileIndex.writeIndex(fpOffsetList, indexStreamOut);
    LOG.info("Done Writing Index. Total Items Written:" + fpOffsetList.size());
    // copy offset list into tuple list
    tupleListOut.addAll(fpOffsetList);

    return currentPos;
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

void resetSubDomainCounts() throws IOException {

    LOG.info("*** LIST:" + getListId() + " Reset SubDomain Queued Counts.");

    if (_subDomainMetadataFile.exists()) {

        LOG.info("*** LIST:" + getListId() + " FILE EXISTS .");

        RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw");
        DataInputBuffer inputBuffer = new DataInputBuffer();
        DataOutputBuffer outputBuffer = new DataOutputBuffer(CrawlListMetadata.Constants.FixedDataSize);

        try {/*from  w w  w . j  ava 2s .  c o m*/
            // skip version 
            file.read();
            // read item count 
            int itemCount = file.readInt();

            LOG.info("*** LIST:" + getListId() + " SUBDOMAIN ITEM COUNT:" + itemCount);

            CrawlListMetadata newMetadata = new CrawlListMetadata();

            for (int i = 0; i < itemCount; ++i) {

                long orignalPos = file.getFilePointer();
                file.readFully(outputBuffer.getData(), 0, CrawlListMetadata.Constants.FixedDataSize);
                inputBuffer.reset(outputBuffer.getData(), CrawlListMetadata.Constants.FixedDataSize);
                try {
                    newMetadata.deserialize(inputBuffer, new BinaryProtocol());
                } catch (Exception e) {
                    LOG.error("-----Failed to Deserialize Metadata at Index:" + i + " Exception:"
                            + CCStringUtils.stringifyException(e));
                }
                // ok reset everything except hashes and first/last url pointers 
                int urlCount = newMetadata.getUrlCount();
                long firstRecordOffset = newMetadata.getFirstRecordOffset();
                long lastRecordOffset = newMetadata.getLastRecordOffset();
                String domainName = newMetadata.getDomainName();
                long domainHash = newMetadata.getDomainHash();

                // reset 
                newMetadata.clear();
                // restore 
                newMetadata.setUrlCount(urlCount);
                newMetadata.setFirstRecordOffset(firstRecordOffset);
                newMetadata.setLastRecordOffset(lastRecordOffset);
                newMetadata.setDomainName(domainName);
                newMetadata.setDomainHash(domainHash);

                // serialize it ... 
                outputBuffer.reset();
                newMetadata.serialize(outputBuffer, new BinaryProtocol());
                // write it back to disk 
                file.seek(orignalPos);
                // and rewrite it ... 
                file.write(outputBuffer.getData(), 0, CrawlListMetadata.Constants.FixedDataSize);
            }
        } finally {
            file.close();
        }
        LOG.info("*** LIST:" + getListId() + " DONE RESETTIGN SUBDOMAIN METADATA QUEUE COUNTS");
    }
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

void loadSubDomainMetadataFromDisk() throws IOException {
    LOG.info("*** LIST:" + getListId() + " LOAD SUBDOMAIN METADATA FROM DISK ...  ");
    if (_subDomainMetadataFile.exists()) {

        LOG.info("*** LIST:" + getListId() + " FILE EXISTS LOADING SUBDOMAIN DATA FROM DISK.");

        RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw");
        DataInputBuffer inputBuffer = new DataInputBuffer();
        byte fixedDataBlock[] = new byte[CrawlListMetadata.Constants.FixedDataSize];

        try {//from w  w w. ja va2s  . c om
            // skip version 
            file.read();
            // read item count 
            int itemCount = file.readInt();

            LOG.info("*** LIST:" + getListId() + " SUBDOMAIN ITEM COUNT:" + itemCount);

            CrawlListMetadata newMetadata = new CrawlListMetadata();

            TreeMap<Long, Integer> idToOffsetMap = new TreeMap<Long, Integer>();
            for (int i = 0; i < itemCount; ++i) {

                long orignalPos = file.getFilePointer();
                file.readFully(fixedDataBlock, 0, fixedDataBlock.length);
                inputBuffer.reset(fixedDataBlock, fixedDataBlock.length);
                try {
                    newMetadata.deserialize(inputBuffer, new BinaryProtocol());
                } catch (Exception e) {
                    LOG.error("-----Failed to Deserialize Metadata at Index:" + i + " Exception:"
                            + CCStringUtils.stringifyException(e));
                }
                idToOffsetMap.put(newMetadata.getDomainHash(), (int) orignalPos);
            }

            // write lookup table 
            _offsetLookupTable = new DataOutputBuffer(idToOffsetMap.size() * OFFSET_TABLE_ENTRY_SIZE);
            for (Map.Entry<Long, Integer> entry : idToOffsetMap.entrySet()) {
                _offsetLookupTable.writeLong(entry.getKey());
                _offsetLookupTable.writeInt(entry.getValue());
            }
        } finally {
            file.close();
        }
        LOG.info("*** LIST:" + getListId() + " DONE LOADING SUBDOMAIN DATA FROM DISK");
    } else {

        LOG.info("*** LIST:" + getListId() + " SUBDOMAIN METADATA DOES NOT EXIST! LOADING FROM SCRATCH");

        RandomAccessFile fixedDataReader = new RandomAccessFile(_fixedDataFile, "rw");
        RandomAccessFile stringDataReader = new RandomAccessFile(_variableDataFile, "rw");

        try {

            //ok rebuild top level metadata as well 
            _metadata.clear();

            OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem();

            int processedCount = 0;
            while (fixedDataReader.getFilePointer() != fixedDataReader.length()) {

                long position = fixedDataReader.getFilePointer();

                // store offset in item 
                item._fileOffset = position;
                // load from disk 
                item.deserialize(fixedDataReader);
                try {
                    // seek to string data 
                    stringDataReader.seek(item._stringsOffset);
                    // and skip buffer length 
                    WritableUtils.readVInt(stringDataReader);
                    // and read primary string 
                    String url = stringDataReader.readUTF();

                    // get metadata object for subdomain 
                    CrawlListMetadata subDomainMetadata = getTransientSubDomainMetadata(url);

                    // increment url count 
                    subDomainMetadata.setUrlCount(subDomainMetadata.getUrlCount() + 1);

                    // increment top level metadata count 
                    _metadata.setUrlCount(_metadata.getUrlCount() + 1);

                    // update top level metadata ..
                    updateMetadata(item, _metadata, 0);

                    // update sub-domain metadata object  from item data
                    updateMetadata(item, subDomainMetadata, 0);

                    ++processedCount;
                } catch (IOException e) {
                    LOG.error("Exception Reading String Data For Item:" + (processedCount + 1));
                    LOG.error("Exception:" + CCStringUtils.stringifyException(e));
                    LOG.error("File Position:" + fixedDataReader.getFilePointer() + " StringsPointer:"
                            + stringDataReader.getFilePointer());
                }

                if (processedCount % 10000 == 0) {
                    LOG.info("*** LIST:" + getListId() + " Processed:" + processedCount + " Items");
                }
            }

            // ok commit top level metadata to disk as well 
            writeMetadataToDisk();

        } catch (IOException e) {
            LOG.error("Encountered Exception Queueing Items for List:" + _listId + " Exception:"
                    + CCStringUtils.stringifyException(e));
            LOG.error("File Position:" + fixedDataReader.getFilePointer() + " StringsPointer:"
                    + stringDataReader.getFilePointer());
            _queueState = QueueState.QUEUED;
        } finally {
            fixedDataReader.close();
            stringDataReader.close();
        }
        LOG.info("*** LIST:" + getListId() + " SUBDOMAIN METADATA REBUILT FROM LIST DATA . WRITING TO DISK");

        // write metadat to disk 
        writeInitialSubDomainMetadataToDisk();

        LOG.info("*** LIST:" + getListId() + " SUBDOMAIN METADATA REBUILT FROM LIST DATA . WRITE COMPLETE");
    }
}

From source file:org.opencb.cellbase.lib.db.VariantAnnotationCalculatorTest.java

private int getVepAnnotationBatch(RandomAccessFile raf, int nVariantsToRead,
        Set<AnnotationComparisonObject> vepAnnotationSet) throws IOException {
    /**//from ww w  . j a va2 s  .c  o m
     * Loads VEP annotation
     */
    String newLine;
    int nNonRegulatoryAnnotations = 0;
    int nReadVariants = 0;
    String previousChr = "";
    String previousPosition = "";
    String previousAlt = "";
    String alt;
    long filePointer = 0;

    if (nVariantsToRead > 0) {
        while (((newLine = raf.readLine()) != null) && nReadVariants <= nVariantsToRead) {
            String[] lineFields = newLine.split("\t");
            String[] coordinatesParts = lineFields[1].split(":");
            if (lineFields[2].equals("deletion")) {
                alt = "-";
            } else {
                alt = lineFields[2];
            }
            // TODO: Remove this if as refactoring implements consequence types for other variant types
            //                if(!alt.equals("-") && coordinatesParts[1].split("-").length==1) {
            if (!previousChr.equals(coordinatesParts[0]) || !previousPosition.equals(coordinatesParts[1])
                    || !previousAlt.equals(alt)) {
                nReadVariants++;
            }
            if (nReadVariants <= nVariantsToRead) {
                for (String SOname : lineFields[6].split(",")) {
                    if (SOname.equals("nc_transcript_variant")) {
                        SOname = "non_coding_transcript_variant";
                    }
                    if (!SOname.equals("regulatory_region_variant")) {
                        nNonRegulatoryAnnotations++;
                    }
                    vepAnnotationSet.add(new AnnotationComparisonObject(coordinatesParts[0],
                            coordinatesParts[1], alt, lineFields[3], lineFields[4], SOname));
                }
                previousChr = coordinatesParts[0];
                previousPosition = coordinatesParts[1];
                previousAlt = alt;
                filePointer = raf.getFilePointer();
            }
            //                }
        }

        raf.seek(filePointer);
    }

    return nNonRegulatoryAnnotations;
}

From source file:io.minio.MinioClient.java

/**
 * Skips data of up to given length in given input stream.
 *
 * @param inputStream  Input stream which is intance of {@link RandomAccessFile} or {@link BufferedInputStream}.
 * @param n            Length of bytes to skip.
 *///from   ww w .  j  a  va 2s  . co m
private void skipStream(Object inputStream, long n) throws IOException, InsufficientDataException {
    RandomAccessFile file = null;
    BufferedInputStream stream = null;
    if (inputStream instanceof RandomAccessFile) {
        file = (RandomAccessFile) inputStream;
    } else if (inputStream instanceof BufferedInputStream) {
        stream = (BufferedInputStream) inputStream;
    } else {
        throw new IllegalArgumentException("unsupported input stream object");
    }

    if (file != null) {
        file.seek(file.getFilePointer() + n);
        return;
    }

    long bytesSkipped;
    long totalBytesSkipped = 0;

    while ((bytesSkipped = stream.skip(n - totalBytesSkipped)) >= 0) {
        totalBytesSkipped += bytesSkipped;
        if (totalBytesSkipped == n) {
            return;
        }
    }

    throw new InsufficientDataException(
            "Insufficient data.  bytes skipped " + totalBytesSkipped + " expected " + n);
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

public ArrayList<CrawlListDomainItem> getSubDomainList(int offset, int count) {
    synchronized (_metadata) {

        ArrayList<CrawlListDomainItem> itemsOut = new ArrayList<CrawlListDomainItem>();

        try {/*  www .ja  va  2 s . co  m*/
            synchronized (_subDomainMetadataFile) {
                RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw");
                DataInputBuffer inputBuffer = new DataInputBuffer();
                byte fixedDataBlock[] = new byte[CrawlListMetadata.Constants.FixedDataSize];

                try {
                    // skip version 
                    file.read();
                    // read item count 
                    int itemCount = file.readInt();

                    int i = offset;
                    int end = Math.min(i + count, itemCount);

                    LOG.info("*** LIST:" + getListId() + " SUBDOMAIN ITEM COUNT:" + itemCount);

                    if (i < itemCount) {

                        file.seek(5 + (CrawlListMetadata.Constants.FixedDataSize * offset));

                        CrawlListMetadata newMetadata = new CrawlListMetadata();

                        for (; i < end; ++i) {

                            long orignalPos = file.getFilePointer();
                            file.readFully(fixedDataBlock, 0, fixedDataBlock.length);
                            inputBuffer.reset(fixedDataBlock, fixedDataBlock.length);
                            newMetadata.deserialize(inputBuffer, new BinaryProtocol());
                            itemsOut.add(buildSubDomainSummary(newMetadata.getDomainName(), newMetadata));
                        }
                    }
                } finally {
                    file.close();
                }
            }
        } catch (IOException e) {
            LOG.error(CCStringUtils.stringifyException(e));
        }
        LOG.info("*** LIST:" + getListId() + " DONE LOADING SUBDOMAIN DATA FROM DISK");

        return itemsOut;
    }
}