Example usage for java.io RandomAccessFile readUTF

List of usage examples for java.io RandomAccessFile readUTF

Introduction

In this page you can find the example usage for java.io RandomAccessFile readUTF.

Prototype

public final String readUTF() throws IOException 

Source Link

Document

Reads in a string from this file.

Usage

From source file:Main.java

public static void main(String[] args) {
    try {// ww  w .j ava 2  s.  c o  m
        // create a new RandomAccessFile with filename Example
        RandomAccessFile raf = new RandomAccessFile("c:/test.txt", "rw");

        // write something in the file
        raf.writeUTF("java2s.com Hello World");

        // set the file pointer at 0 position
        raf.seek(0);

        // read and print the contents of the file
        System.out.println(raf.readUTF());

        // return the file pointer
        System.out.println(raf.getFilePointer());

        // change the position of the file pointer
        raf.seek(5);

        // return the file pointer
        System.out.println(raf.getFilePointer());

        // close the strea and release resources
        raf.close();
    } catch (IOException ex) {
        ex.printStackTrace();
    }

}

From source file:Main.java

public static void main(String[] args) throws Exception {
    RandomAccessFile raf = new RandomAccessFile("books.dat", "rw");

    String books[] = new String[5];
    books[0] = "A";
    books[1] = "B";
    books[2] = "C";
    books[3] = "D";
    books[4] = "E";

    for (int i = 0; i < books.length; i++) {
        raf.writeUTF(books[i]);//from   w ww.j  a v a2  s .c om
    }
    raf.seek(raf.length());
    raf.writeUTF("Servlet & JSP Programming");

    raf.seek(0);

    while (raf.getFilePointer() < raf.length()) {
        System.out.println(raf.readUTF());
    }
}

From source file:Main.java

public static void readFile(String fileName) throws IOException {
    RandomAccessFile raf = new RandomAccessFile(fileName, "rw");
    int counter = raf.readInt();
    String msg = raf.readUTF();

    System.out.println(counter);/* w  ww .ja  v  a  2  s  .c om*/
    System.out.println(msg);
    incrementReadCounter(raf);
    raf.close();
}

From source file:net.ontopia.infoset.content.FileContentStore.java

private void allocateNewBlock() throws ContentStoreException {
    RandomAccessFile out = null;
    boolean exception_thrown = false;
    try {//from   ww w .  ja  v  a2s .  co m
        out = new RandomAccessFile(key_file, "rws");

        for (int i = 0; i < MAX_SPINS; i++) {
            // acquire exclusive lock
            FileLock l = out.getChannel().tryLock();

            if (l == null) {
                // wait a little before trying again
                try {
                    Thread.sleep(SPIN_TIMEOUT);
                } catch (InterruptedException e) {
                }
                continue;

            } else {
                try {
                    // allocate new key
                    int old_key;
                    int new_key;
                    String content = null;

                    if (out.length() == 0) {
                        old_key = 0;
                        new_key = old_key + KEY_BLOCK_SIZE;

                    } else {
                        try {
                            content = out.readUTF();
                            old_key = Integer.parseInt(content);
                            new_key = old_key + KEY_BLOCK_SIZE;
                        } catch (NumberFormatException e) {
                            if (content.length() > 100)
                                content = content.substring(0, 100) + "...";
                            throw new ContentStoreException(
                                    "Content store key file corrupted. Contained: '" + content + "'");
                        }
                    }

                    // truncate key file and write out new key
                    out.seek(0);
                    out.writeUTF(Integer.toString(new_key));

                    end_of_key_block = new_key;
                    last_key = old_key;
                    return;
                } finally {
                    // release file lock
                    try {
                        l.release();
                    } catch (Throwable t) {
                        throw new ContentStoreException("Could not release key file lock.", t);
                    }
                }
            }
        }
        throw new ContentStoreException("Block allocation timed out.");

    } catch (ContentStoreException e) {
        exception_thrown = true;
        throw e;

    } catch (Throwable t) {
        exception_thrown = true;
        throw new ContentStoreException(t);

    } finally {
        if (out != null) {
            try {
                out.close();
            } catch (IOException e) {
                if (!exception_thrown)
                    throw new ContentStoreException("Problems occurred when closing content store.", e);
            }
        }
    }
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

/** queue uncrawled urls via the CrawlQueueLoader
 * //  w  w w.j a  va  2s.  co  m
 * @param loader
 */
public void queueUnCrawledItems(CrawlQueueLoader loader) throws IOException {
    _queueState = QueueState.QUEUEING;

    int metadataVersion = getMetadata().getVersion();

    synchronized (_metadata) {
        // reset metadata PERIOD  
        int urlCount = _metadata.getUrlCount();
        _metadata.clear();
        _metadata.setUrlCount(urlCount);
    }

    RandomAccessFile fixedDataReader = new RandomAccessFile(_fixedDataFile, "rw");
    RandomAccessFile stringDataReader = new RandomAccessFile(_variableDataFile, "rw");
    try {

        OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem();
        URLFP fingerprint = new URLFP();

        while (fixedDataReader.getFilePointer() != fixedDataReader.length()) {

            long position = fixedDataReader.getFilePointer();

            //LOG.info("*** TRYING READ LOCK FOR OFFSET:" + position);
            while (true) {
                // get read lock on position ... 
                try {
                    FileLock lock = fixedDataReader.getChannel().tryLock(position,
                            OnDiskCrawlHistoryItem.ON_DISK_SIZE, false);

                    try {
                        //LOG.info("*** GOT READ LOCK FOR OFFSET:" + position);
                        item.deserialize(fixedDataReader);
                        break;
                    } finally {
                        lock.release();
                        //LOG.info("*** RELEASED READ LOCK FOR OFFSET:" + position);
                    }
                } catch (OverlappingFileLockException e) {
                    LOG.error("*** LOCK CONTENTION AT:" + position + " Exception:"
                            + CCStringUtils.stringifyException(e));
                }
            }

            // seek to string data 
            stringDataReader.seek(item._stringsOffset);
            // and skip buffer length 
            WritableUtils.readVInt(stringDataReader);
            // and read primary string 
            String url = stringDataReader.readUTF();
            // setup fingerprint 
            fingerprint.setDomainHash(item._domainHash);
            fingerprint.setUrlHash(item._urlFingerprint);

            // first, if it has not been crawled ever, crawl it not matter what ... 
            boolean crawlItem = !item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS);

            // if it has been crawled ... check list metadata version ... 
            if (!crawlItem && metadataVersion >= 1) {
                // ok this is newer version of the list ... 
                // check refresh time if specified ...
                int refreshIntervalInSeconds = DEFAULT_REFRESH_INTERVAL_IN_SECS;

                if (getMetadata().getRefreshInterval() != 0) {
                    refreshIntervalInSeconds = getMetadata().getRefreshInterval();
                }

                if (item._updateTimestamp > 0) {
                    long timeSinceLastCrawl = item._updateTimestamp;
                    if (System.currentTimeMillis() - timeSinceLastCrawl >= (refreshIntervalInSeconds * 1000)) {
                        crawlItem = true;
                    }
                }
            }

            if (crawlItem) {

                loader.queueURL(fingerprint, url);

                synchronized (_metadata) {
                    // update queued item count 
                    _metadata.setQueuedItemCount(_metadata.getQueuedItemCount() + 1);
                }
            } else {
                updateMetadata(item, _metadata, 0);
            }
            // ok update subdomain stats 
            updateSubDomainMetadataForItemDuringLoad(item, url, fingerprint, crawlItem);
        }

        flushCachedSubDomainMetadata();

        loader.flush();

        _queueState = QueueState.QUEUED;
    } catch (IOException e) {
        LOG.error("Encountered Exception Queueing Items for List:" + _listId + " Exception:"
                + CCStringUtils.stringifyException(e));
        _queueState = QueueState.ERROR;
    } finally {
        fixedDataReader.close();
        stringDataReader.close();
    }
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

/** resubmit failed items 
 * //from   w w  w .  ja  v  a2s  .co  m
 * @param loader
 */
public void requeueFailedItems(CrawlQueueLoader loader) throws IOException {
    synchronized (this) {
        _queueState = QueueState.QUEUEING;
    }
    RandomAccessFile fixedDataReader = new RandomAccessFile(_fixedDataFile, "rw");
    RandomAccessFile stringDataReader = new RandomAccessFile(_variableDataFile, "rw");
    try {

        OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem();
        URLFP fingerprint = new URLFP();

        while (fixedDataReader.getFilePointer() != fixedDataReader.length()) {
            item.deserialize(fixedDataReader);
            boolean queueItem = false;
            if (item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS)) {

                if (item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_STATUS)) {
                    queueItem = (item._redirectStatus != 0);

                    if (!queueItem) {
                        if (item._redirectHttpResult != 200 && item._redirectHttpResult != 404) {
                            queueItem = true;
                        }
                    }
                } else {
                    queueItem = (item._crawlStatus != 0);

                    if (!queueItem) {
                        if (item._httpResultCode != 200 && item._httpResultCode != 404) {
                            queueItem = true;
                        }
                    }
                }

                if (queueItem) {
                    // seek to string data 
                    stringDataReader.seek(item._stringsOffset);
                    // and skip buffer length 
                    WritableUtils.readVInt(stringDataReader);
                    // and read primary string 
                    String url = stringDataReader.readUTF();
                    // and spill
                    fingerprint.setDomainHash(item._domainHash);
                    fingerprint.setUrlHash(item._urlFingerprint);

                    loader.queueURL(fingerprint, url);
                }
            }
        }
    } catch (IOException e) {
        LOG.error("Encountered Exception Queueing Items for List:" + _listId + " Exception:"
                + CCStringUtils.stringifyException(e));
        _queueState = QueueState.QUEUED;
    } finally {
        fixedDataReader.close();
        stringDataReader.close();
    }
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

private ProxyCrawlHistoryItem getHistoryItemFromOnDiskItem(OnDiskCrawlHistoryItem item) throws IOException {

    ProxyCrawlHistoryItem itemOut = new ProxyCrawlHistoryItem();

    if ((item._flags & OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS) != 0)
        itemOut.setCrawlStatus(item._crawlStatus);
    if ((item._flags & OnDiskCrawlHistoryItem.FLAG_HAS_ORIGINAL_RESULT_CODE) != 0)
        itemOut.setHttpResultCode(item._httpResultCode);
    if ((item._flags & OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_STATUS) != 0)
        itemOut.setRedirectStatus(item._redirectStatus);
    if ((item._flags & OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_RESULT_CODE) != 0)
        itemOut.setRedirectHttpResult(item._redirectHttpResult);
    if ((item._flags & OnDiskCrawlHistoryItem.FLAG_HAS_LASTMODIFIED_TIME) != 0)
        itemOut.setLastModifiedTime(item._updateTimestamp);
    // now attept to get the string offset 
    RandomAccessFile stringDataReader = new RandomAccessFile(_variableDataFile, "rw");
    try {/*from ww w  .ja va  2 s.c o m*/
        // seek to string data 
        stringDataReader.seek(item._stringsOffset);
        // and skip buffer length 
        WritableUtils.readVInt(stringDataReader);
        // now populate original url ... 
        itemOut.setOriginalURL(stringDataReader.readUTF());
        // now if redirect url is present 
        if ((item._flags & OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_URL) != 0) {
            itemOut.setRedirectURL(stringDataReader.readUTF());
        }
    } finally {
        stringDataReader.close();
    }
    return itemOut;
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

void loadSubDomainMetadataFromDisk() throws IOException {
    LOG.info("*** LIST:" + getListId() + " LOAD SUBDOMAIN METADATA FROM DISK ...  ");
    if (_subDomainMetadataFile.exists()) {

        LOG.info("*** LIST:" + getListId() + " FILE EXISTS LOADING SUBDOMAIN DATA FROM DISK.");

        RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw");
        DataInputBuffer inputBuffer = new DataInputBuffer();
        byte fixedDataBlock[] = new byte[CrawlListMetadata.Constants.FixedDataSize];

        try {//  w  w  w  .  j a  v  a  2 s.co m
            // skip version 
            file.read();
            // read item count 
            int itemCount = file.readInt();

            LOG.info("*** LIST:" + getListId() + " SUBDOMAIN ITEM COUNT:" + itemCount);

            CrawlListMetadata newMetadata = new CrawlListMetadata();

            TreeMap<Long, Integer> idToOffsetMap = new TreeMap<Long, Integer>();
            for (int i = 0; i < itemCount; ++i) {

                long orignalPos = file.getFilePointer();
                file.readFully(fixedDataBlock, 0, fixedDataBlock.length);
                inputBuffer.reset(fixedDataBlock, fixedDataBlock.length);
                try {
                    newMetadata.deserialize(inputBuffer, new BinaryProtocol());
                } catch (Exception e) {
                    LOG.error("-----Failed to Deserialize Metadata at Index:" + i + " Exception:"
                            + CCStringUtils.stringifyException(e));
                }
                idToOffsetMap.put(newMetadata.getDomainHash(), (int) orignalPos);
            }

            // write lookup table 
            _offsetLookupTable = new DataOutputBuffer(idToOffsetMap.size() * OFFSET_TABLE_ENTRY_SIZE);
            for (Map.Entry<Long, Integer> entry : idToOffsetMap.entrySet()) {
                _offsetLookupTable.writeLong(entry.getKey());
                _offsetLookupTable.writeInt(entry.getValue());
            }
        } finally {
            file.close();
        }
        LOG.info("*** LIST:" + getListId() + " DONE LOADING SUBDOMAIN DATA FROM DISK");
    } else {

        LOG.info("*** LIST:" + getListId() + " SUBDOMAIN METADATA DOES NOT EXIST! LOADING FROM SCRATCH");

        RandomAccessFile fixedDataReader = new RandomAccessFile(_fixedDataFile, "rw");
        RandomAccessFile stringDataReader = new RandomAccessFile(_variableDataFile, "rw");

        try {

            //ok rebuild top level metadata as well 
            _metadata.clear();

            OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem();

            int processedCount = 0;
            while (fixedDataReader.getFilePointer() != fixedDataReader.length()) {

                long position = fixedDataReader.getFilePointer();

                // store offset in item 
                item._fileOffset = position;
                // load from disk 
                item.deserialize(fixedDataReader);
                try {
                    // seek to string data 
                    stringDataReader.seek(item._stringsOffset);
                    // and skip buffer length 
                    WritableUtils.readVInt(stringDataReader);
                    // and read primary string 
                    String url = stringDataReader.readUTF();

                    // get metadata object for subdomain 
                    CrawlListMetadata subDomainMetadata = getTransientSubDomainMetadata(url);

                    // increment url count 
                    subDomainMetadata.setUrlCount(subDomainMetadata.getUrlCount() + 1);

                    // increment top level metadata count 
                    _metadata.setUrlCount(_metadata.getUrlCount() + 1);

                    // update top level metadata ..
                    updateMetadata(item, _metadata, 0);

                    // update sub-domain metadata object  from item data
                    updateMetadata(item, subDomainMetadata, 0);

                    ++processedCount;
                } catch (IOException e) {
                    LOG.error("Exception Reading String Data For Item:" + (processedCount + 1));
                    LOG.error("Exception:" + CCStringUtils.stringifyException(e));
                    LOG.error("File Position:" + fixedDataReader.getFilePointer() + " StringsPointer:"
                            + stringDataReader.getFilePointer());
                }

                if (processedCount % 10000 == 0) {
                    LOG.info("*** LIST:" + getListId() + " Processed:" + processedCount + " Items");
                }
            }

            // ok commit top level metadata to disk as well 
            writeMetadataToDisk();

        } catch (IOException e) {
            LOG.error("Encountered Exception Queueing Items for List:" + _listId + " Exception:"
                    + CCStringUtils.stringifyException(e));
            LOG.error("File Position:" + fixedDataReader.getFilePointer() + " StringsPointer:"
                    + stringDataReader.getFilePointer());
            _queueState = QueueState.QUEUED;
        } finally {
            fixedDataReader.close();
            stringDataReader.close();
        }
        LOG.info("*** LIST:" + getListId() + " SUBDOMAIN METADATA REBUILT FROM LIST DATA . WRITING TO DISK");

        // write metadat to disk 
        writeInitialSubDomainMetadataToDisk();

        LOG.info("*** LIST:" + getListId() + " SUBDOMAIN METADATA REBUILT FROM LIST DATA . WRITE COMPLETE");
    }
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

public static void dumpUnCrawledItems(File dataDir, long listId, File outputFilePath,
        boolean includeRobotsExcludedItems) throws IOException {

    File fixedDataFile = new File(dataDir, LIST_VALUE_MAP_PREFIX + Long.toString(listId));
    File variableDataFile = new File(dataDir, LIST_STRING_MAP_PREFIX + Long.toString(listId));

    LOG.info("FixedDataFile is:" + fixedDataFile);
    LOG.info("VariableDataFile is:" + variableDataFile);

    RandomAccessFile fixedDataReader = new RandomAccessFile(fixedDataFile, "r");
    RandomAccessFile stringDataReader = new RandomAccessFile(variableDataFile, "r");

    JsonWriter writer = new JsonWriter(new BufferedWriter(new FileWriter(outputFilePath), 1024 * 1024 * 10));

    writer.setIndent(" ");

    try {/*from  www  .  ja  v a2 s.  c o m*/
        writer.beginObject();
        writer.name("urls");
        writer.beginArray();
        try {

            OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem();
            URLFP fingerprint = new URLFP();

            while (fixedDataReader.getFilePointer() != fixedDataReader.length()) {

                long position = fixedDataReader.getFilePointer();

                item.deserialize(fixedDataReader);

                // seek to string data 
                stringDataReader.seek(item._stringsOffset);
                // and skip buffer length 
                WritableUtils.readVInt(stringDataReader);
                // and read primary string 
                String url = stringDataReader.readUTF();
                // setup fingerprint 
                fingerprint.setDomainHash(item._domainHash);
                fingerprint.setUrlHash(item._urlFingerprint);

                // any item that has not been crawled needs to be queued 
                boolean queueItem = !item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS);

                // if item is not queued, check to see if we need to retry the item 
                if (!queueItem && item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS)) {

                    if (item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_STATUS)) {

                        queueItem = (item._redirectStatus != 0);

                        if (!queueItem) {
                            if (item._redirectHttpResult != 200 && item._redirectHttpResult != 404) {
                                queueItem = true;
                            }
                        }
                    } else {
                        queueItem = (item._crawlStatus != 0);

                        if (!queueItem) {
                            if (item._httpResultCode != 200 && item._httpResultCode != 404) {
                                queueItem = true;
                            }
                        }
                    }
                }

                if (queueItem) {
                    // ok if queue item is set ... 
                    writer.beginObject();
                    writer.name("url");
                    writer.value(url);
                    writer.name("redirected");
                    writer.value((boolean) item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_STATUS));
                    writer.name("lastStatus");
                    if (item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_STATUS)) {
                        if (item._redirectStatus == 0) {
                            writer.value("HTTP-" + item._redirectHttpResult);
                        } else {
                            writer.value(CrawlURL.FailureReason.toString(item._redirectHttpResult));
                        }
                    } else {
                        if (item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS)) {
                            if (item._crawlStatus == 0) {
                                writer.value("HTTP-" + item._httpResultCode);
                            } else {
                                writer.value(CrawlURL.FailureReason.toString(item._crawlStatus));
                            }
                        } else {
                            writer.value("UNCRAWLED");
                        }
                    }
                    writer.name("updateTime");
                    writer.value(item._updateTimestamp);
                    writer.endObject();
                }
            }
        } catch (IOException e) {
            LOG.error("Encountered Exception Queueing Items for List:" + listId + " Exception:"
                    + CCStringUtils.stringifyException(e));
        } finally {
            fixedDataReader.close();
            stringDataReader.close();
        }

        writer.endArray();
        writer.endObject();
    } catch (Exception e) {
        LOG.error(CCStringUtils.stringifyException(e));
        throw new IOException(e);
    } finally {
        writer.flush();
        writer.close();
    }

}