Example usage for java.io RandomAccessFile close

List of usage examples for java.io RandomAccessFile close

Introduction

In this page you can find the example usage for java.io RandomAccessFile close.

Prototype

public void close() throws IOException 

Source Link

Document

Closes this random access file stream and releases any system resources associated with the stream.

Usage

From source file:org.commoncrawl.service.crawler.CrawlList.java

private static int readTargetsFromLogFile(CrawlList domain, File logFileName, int desiredReadAmount,
        IntrusiveList<CrawlTarget> targetsOut) throws IOException {

    int itemsRead = 0;

    if (logFileName.exists()) {

        RandomAccessFile file = new RandomAccessFile(logFileName, "rw");

        LogFileHeader header = new LogFileHeader();

        try {//  w w w .  j  ava2 s . c om

            long headerOffset = readLogFileHeader(file, header);

            // seelk to appropriate write position 
            if (header._readPos != 0)
                file.seek(header._readPos);

            int itemsToRead = Math.min(desiredReadAmount, header._itemCount);

            PersistentCrawlTarget persistentTarget = new PersistentCrawlTarget();
            CRC32 crc = new CRC32();
            CustomByteArrayOutputStream buffer = new CustomByteArrayOutputStream(1 << 16);

            for (int i = 0; i < itemsToRead; ++i) {
                // read length ... 
                int urlDataLen = file.readInt();
                long urlDataCRC = file.readLong();

                buffer.reset();

                if (urlDataLen > buffer.getBuffer().length) {
                    buffer = new CustomByteArrayOutputStream(((urlDataLen / 65536) + 1) * 65536);
                }
                file.read(buffer.getBuffer(), 0, urlDataLen);
                crc.reset();
                crc.update(buffer.getBuffer(), 0, urlDataLen);

                long computedValue = crc.getValue();

                // validate crc values ... 
                if (computedValue != urlDataCRC) {
                    throw new IOException("Crawl Target Log File Corrupt");
                } else {
                    //populate a persistentTarget from the (in memory) data stream
                    DataInputStream bufferReader = new DataInputStream(
                            new ByteArrayInputStream(buffer.getBuffer(), 0, urlDataLen));

                    persistentTarget.clear();
                    persistentTarget.readFields(bufferReader);

                    //populate a new crawl target structure ... 
                    CrawlTarget newTarget = new CrawlTarget(domain, persistentTarget);

                    targetsOut.addTail(newTarget);
                }
            }

            itemsRead = itemsToRead;

            // now update header ... 
            header._itemCount -= itemsRead;
            // now if item count is non zero ... 
            if (header._itemCount != 0) {
                // set read cursor to next record location 
                header._readPos = file.getFilePointer();
            }
            // otherwise ... 
            else {
                // reset both cursors ... 
                header._readPos = 0;
                header._writePos = 0;
            }

            // now write out header anew ... 
            writeLogFileHeader(file, header);
        } finally {
            if (file != null) {
                file.close();
            }
        }
    }
    return itemsRead;
}

From source file:gate.util.reporting.DocTimeReporter.java

/**
 * Provides the functionality to separate out pipeline specific benchmark
 * entries in separate temporary benchmark files in a temporary folder in the
 * current working directory.//from w w  w  .  ja  v a 2  s . co m
 *
 * @param benchmarkFile
 *          An object of type File representing the input benchmark file.
 * @param report
 *          A file handle to the report file to be written.
 * @throws BenchmarkReportFileAccessException
 *           if any error occurs while accessing the input benchmark file or
 *           while splitting it.
 * @throws BenchmarkReportExecutionException
 *           if the given input benchmark file is modified while generating
 *           the report.
 */
private void splitBenchmarkFile(File benchmarkFile, File report)
        throws BenchmarkReportFileAccessException, BenchmarkReportInputFileFormatException {
    File dir = temporaryDirectory;
    // Folder already exists; then delete all files in the temporary folder
    if (dir.isDirectory()) {
        File files[] = dir.listFiles();
        for (int count = 0; count < files.length; count++) {
            if (!files[count].delete()) {
                throw new BenchmarkReportFileAccessException(
                        "Could not delete files in the folder \"" + temporaryDirectory + "\"");
            }
        }
    } else if (!dir.mkdir()) {
        throw new BenchmarkReportFileAccessException(
                "Could not create  temporary folder \"" + temporaryDirectory + "\"");
    }

    // delete report2 from the filesystem
    if (getPrintMedia().equalsIgnoreCase(MEDIA_TEXT)) {
        deleteFile(new File(report.getAbsolutePath() + ".txt"));
    } else if (getPrintMedia().equalsIgnoreCase(MEDIA_HTML)) {
        deleteFile(new File(report.getAbsolutePath() + ".html"));
    }

    RandomAccessFile in = null;
    BufferedWriter out = null;
    try {
        String logEntry = "";
        long fromPos = 0;

        // File benchmarkFileName;
        if (getLogicalStart() != null) {
            fromPos = tail(benchmarkFile, FILE_CHUNK_SIZE);
        }
        in = new RandomAccessFile(benchmarkFile, "r");

        if (getLogicalStart() != null) {
            in.seek(fromPos);
        }
        ArrayList<String> startTokens = new ArrayList<String>();
        String lastStart = "";
        Pattern pattern = Pattern.compile("(\\d+) (\\d+) (.*) (.*) \\{(.*)\\}");
        Matcher matcher = null;
        File benchmarkFileName = null;
        while ((logEntry = in.readLine()) != null) {
            matcher = pattern.matcher(logEntry);
            String startToken = "";
            if (logEntry.matches(".*START.*")) {
                String[] splittedStartEntry = logEntry.split("\\s");
                if (splittedStartEntry.length > 2) {
                    startToken = splittedStartEntry[2];
                } else {
                    throw new BenchmarkReportInputFileFormatException(getBenchmarkFile() + " is invalid.");
                }

                if (startToken.endsWith("Start")) {
                    continue;
                }
                if (!startTokens.contains(startToken)) {
                    // create a new file for the new pipeline
                    startTokens.add(startToken);
                    benchmarkFileName = new File(temporaryDirectory, startToken + "_benchmark.txt");
                    if (!benchmarkFileName.createNewFile()) {
                        throw new BenchmarkReportFileAccessException("Could not create \"" + startToken
                                + "_benchmark.txt" + "\" in directory named \"" + temporaryDirectory + "\"");
                    }
                    out = new BufferedWriter(new FileWriter(benchmarkFileName));
                    out.write(logEntry);
                    out.newLine();
                }
            }
            // if a valid benchmark entry then write it to the pipeline specific
            // file
            if (matcher != null && matcher.matches() && (validateLogEntry(matcher.group(3), startTokens)
                    || logEntry.matches(".*documentLoaded.*"))) {
                startToken = matcher.group(3).split("\\.")[0];
                if (!(lastStart.equals(startToken))) {
                    if (out != null) {
                        out.close();
                    }
                    benchmarkFileName = new File(temporaryDirectory, startToken + "_benchmark.txt");
                    out = new BufferedWriter(new FileWriter(benchmarkFileName, true));
                }
                if (out != null) {
                    out.write(logEntry);
                    out.newLine();
                }
                lastStart = startToken;
            }
        }

    } catch (IOException e) {
        e.printStackTrace();

    } finally {
        try {
            if (in != null) {
                in.close();
            }
            if (out != null) {
                out.close();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

/** resubmit failed items 
 * //from  w  w  w.j a  v  a  2  s .  c  o m
 * @param loader
 */
public void requeueFailedItems(CrawlQueueLoader loader) throws IOException {
    synchronized (this) {
        _queueState = QueueState.QUEUEING;
    }
    RandomAccessFile fixedDataReader = new RandomAccessFile(_fixedDataFile, "rw");
    RandomAccessFile stringDataReader = new RandomAccessFile(_variableDataFile, "rw");
    try {

        OnDiskCrawlHistoryItem item = new OnDiskCrawlHistoryItem();
        URLFP fingerprint = new URLFP();

        while (fixedDataReader.getFilePointer() != fixedDataReader.length()) {
            item.deserialize(fixedDataReader);
            boolean queueItem = false;
            if (item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_CRAWL_STATUS)) {

                if (item.isFlagSet(OnDiskCrawlHistoryItem.FLAG_HAS_REDIRECT_STATUS)) {
                    queueItem = (item._redirectStatus != 0);

                    if (!queueItem) {
                        if (item._redirectHttpResult != 200 && item._redirectHttpResult != 404) {
                            queueItem = true;
                        }
                    }
                } else {
                    queueItem = (item._crawlStatus != 0);

                    if (!queueItem) {
                        if (item._httpResultCode != 200 && item._httpResultCode != 404) {
                            queueItem = true;
                        }
                    }
                }

                if (queueItem) {
                    // seek to string data 
                    stringDataReader.seek(item._stringsOffset);
                    // and skip buffer length 
                    WritableUtils.readVInt(stringDataReader);
                    // and read primary string 
                    String url = stringDataReader.readUTF();
                    // and spill
                    fingerprint.setDomainHash(item._domainHash);
                    fingerprint.setUrlHash(item._urlFingerprint);

                    loader.queueURL(fingerprint, url);
                }
            }
        }
    } catch (IOException e) {
        LOG.error("Encountered Exception Queueing Items for List:" + _listId + " Exception:"
                + CCStringUtils.stringifyException(e));
        _queueState = QueueState.QUEUED;
    } finally {
        fixedDataReader.close();
        stringDataReader.close();
    }
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

void writeInitialSubDomainMetadataToDisk() throws IOException {

    RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw");

    try {//  w  w  w . j ava  2  s .c  om

        file.writeByte(0); // version
        file.writeInt(_transientSubDomainStats.size());

        ArrayList<CrawlListMetadata> sortedMetadata = new ArrayList<CrawlListMetadata>();
        sortedMetadata.addAll(_transientSubDomainStats.values());
        _transientSubDomainStats = null;
        CrawlListMetadata metadataArray[] = sortedMetadata.toArray(new CrawlListMetadata[0]);
        Arrays.sort(metadataArray, new Comparator<CrawlListMetadata>() {

            @Override
            public int compare(CrawlListMetadata o1, CrawlListMetadata o2) {
                int result = ((Integer) o2.getUrlCount()).compareTo(o1.getUrlCount());
                if (result == 0) {
                    result = o1.getDomainName().compareTo(o2.getDomainName());
                }
                return result;
            }
        });

        DataOutputBuffer outputBuffer = new DataOutputBuffer(CrawlListMetadata.Constants.FixedDataSize);

        TreeMap<Long, Integer> idToOffsetMap = new TreeMap<Long, Integer>();

        for (CrawlListMetadata entry : metadataArray) {
            // reset output buffer 
            outputBuffer.reset();
            // write item to disk 
            entry.serialize(outputBuffer, new BinaryProtocol());

            if (outputBuffer.getLength() > CrawlListMetadata.Constants.FixedDataSize) {
                LOG.fatal("Metadata Serialization for List:" + getListId() + " SubDomain:"
                        + entry.getDomainName());
                System.out.println("Metadata Serialization for List:" + getListId() + " SubDomain:"
                        + entry.getDomainName());
            }
            // save offset 
            idToOffsetMap.put(entry.getDomainHash(), (int) file.getFilePointer());
            // write out fixed data size 
            file.write(outputBuffer.getData(), 0, CrawlListMetadata.Constants.FixedDataSize);
        }

        // write lookup table 
        _offsetLookupTable = new DataOutputBuffer(idToOffsetMap.size() * OFFSET_TABLE_ENTRY_SIZE);

        for (Map.Entry<Long, Integer> entry : idToOffsetMap.entrySet()) {
            _offsetLookupTable.writeLong(entry.getKey());
            _offsetLookupTable.writeInt(entry.getValue());
        }
    } finally {
        file.close();
    }
    _transientSubDomainStats = null;
}

From source file:edu.umass.cs.gigapaxos.SQLPaxosLogger.java

private ArrayList<byte[]> getJournaledMessage(FileOffsetLength[] fols) throws IOException {
    ArrayList<byte[]> logStrings = new ArrayList<byte[]>();
    RandomAccessFile raf = null;
    String filename = null;/*  ww  w. j  av  a  2s  .c o  m*/
    for (FileOffsetLength fol : fols) {
        try {
            if (raf == null) {
                raf = new RandomAccessFile(filename = fol.file, "r");
            } else if (!filename.equals(fol.file)) {
                raf.close();
                raf = new RandomAccessFile(filename = fol.file, "r");
            }
            logStrings.add(this.getJournaledMessage(fol.file, fol.offset, fol.length, raf));
        } catch (IOException e) {
            if (raf != null)
                raf.close();
            raf = null;
        }
    }
    return logStrings;
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

void resetSubDomainCounts() throws IOException {

    LOG.info("*** LIST:" + getListId() + " Reset SubDomain Queued Counts.");

    if (_subDomainMetadataFile.exists()) {

        LOG.info("*** LIST:" + getListId() + " FILE EXISTS .");

        RandomAccessFile file = new RandomAccessFile(_subDomainMetadataFile, "rw");
        DataInputBuffer inputBuffer = new DataInputBuffer();
        DataOutputBuffer outputBuffer = new DataOutputBuffer(CrawlListMetadata.Constants.FixedDataSize);

        try {//w ww  . j ava2 s.c o m
            // skip version 
            file.read();
            // read item count 
            int itemCount = file.readInt();

            LOG.info("*** LIST:" + getListId() + " SUBDOMAIN ITEM COUNT:" + itemCount);

            CrawlListMetadata newMetadata = new CrawlListMetadata();

            for (int i = 0; i < itemCount; ++i) {

                long orignalPos = file.getFilePointer();
                file.readFully(outputBuffer.getData(), 0, CrawlListMetadata.Constants.FixedDataSize);
                inputBuffer.reset(outputBuffer.getData(), CrawlListMetadata.Constants.FixedDataSize);
                try {
                    newMetadata.deserialize(inputBuffer, new BinaryProtocol());
                } catch (Exception e) {
                    LOG.error("-----Failed to Deserialize Metadata at Index:" + i + " Exception:"
                            + CCStringUtils.stringifyException(e));
                }
                // ok reset everything except hashes and first/last url pointers 
                int urlCount = newMetadata.getUrlCount();
                long firstRecordOffset = newMetadata.getFirstRecordOffset();
                long lastRecordOffset = newMetadata.getLastRecordOffset();
                String domainName = newMetadata.getDomainName();
                long domainHash = newMetadata.getDomainHash();

                // reset 
                newMetadata.clear();
                // restore 
                newMetadata.setUrlCount(urlCount);
                newMetadata.setFirstRecordOffset(firstRecordOffset);
                newMetadata.setLastRecordOffset(lastRecordOffset);
                newMetadata.setDomainName(domainName);
                newMetadata.setDomainHash(domainHash);

                // serialize it ... 
                outputBuffer.reset();
                newMetadata.serialize(outputBuffer, new BinaryProtocol());
                // write it back to disk 
                file.seek(orignalPos);
                // and rewrite it ... 
                file.write(outputBuffer.getData(), 0, CrawlListMetadata.Constants.FixedDataSize);
            }
        } finally {
            file.close();
        }
        LOG.info("*** LIST:" + getListId() + " DONE RESETTIGN SUBDOMAIN METADATA QUEUE COUNTS");
    }
}

From source file:org.commoncrawl.service.crawler.CrawlLog.java

public static void walkCrawlLogFile(File crawlLogPath, long startOffset) throws IOException {

    // and open the crawl log file ...
    RandomAccessFile inputStream = null;

    IOException exception = null;

    CRC32 crc = new CRC32();
    CustomByteArrayOutputStream buffer = new CustomByteArrayOutputStream(1 << 17);
    byte[] syncBytesBuffer = new byte[SYNC_BYTES_SIZE];

    // save position for potential debug output.
    long lastReadPosition = 0;

    try {//from w  ww .  j ava  2s .  com
        inputStream = new RandomAccessFile(crawlLogPath, "rw");

        // and a data input stream ...
        RandomAccessFile reader = inputStream;
        // seek to zero
        reader.seek(0L);

        // read the header ...
        LogFileHeader header = readLogFileHeader(reader);

        System.out.println("Header ItemCount:" + header._itemCount + " FileSize:" + header._fileSize);

        if (startOffset != 0L) {
            System.out.println("Preseeking to:" + startOffset);
            reader.seek(startOffset);
        }

        Configuration conf = new Configuration();

        // read a crawl url from the stream...

        long recordCount = 0;
        while (inputStream.getFilePointer() < header._fileSize) {

            // System.out.println("PRE-SYNC SeekPos:"+
            // inputStream.getFilePointer());
            if (seekToNextSyncBytesPos(syncBytesBuffer, reader, header._fileSize)) {

                // System.out.println("POST-SYNC SeekPos:"+
                // inputStream.getFilePointer());

                lastReadPosition = inputStream.getFilePointer();

                // skip sync
                inputStream.skipBytes(SYNC_BYTES_SIZE);

                // read length ...
                int urlDataLen = reader.readInt();
                long urlDataCRC = reader.readLong();

                if (urlDataLen > buffer.getBuffer().length) {
                    buffer = new CustomByteArrayOutputStream(((urlDataLen / 65536) + 1) * 65536);
                }
                reader.read(buffer.getBuffer(), 0, urlDataLen);
                crc.reset();
                crc.update(buffer.getBuffer(), 0, urlDataLen);

                long computedValue = crc.getValue();

                // validate crc values ...
                if (computedValue != urlDataCRC) {
                    LOG.error("CRC Mismatch Detected during HDFS transfer in CrawlLog:"
                            + crawlLogPath.getAbsolutePath() + " FilePosition:" + lastReadPosition);
                    inputStream.seek(lastReadPosition + 1);
                } else {
                    if (recordCount++ % 10000 == 0) {
                        // allocate a crawl url data structure
                        CrawlURL url = new CrawlURL();
                        DataInputStream bufferReader = new DataInputStream(
                                new ByteArrayInputStream(buffer.getBuffer(), 0, urlDataLen));
                        // populate it from the (in memory) data stream
                        url.readFields(bufferReader);

                        System.out.println("Record:" + recordCount + " At:" + lastReadPosition + " URL:"
                                + url.getUrl() + " BuffSize:" + urlDataLen + " ContentLen:"
                                + url.getContentRaw().getCount() + " LastModified:"
                                + new Date(url.getLastAttemptTime()).toString());
                    }
                }
            } else {
                break;
            }
        }
    } catch (EOFException e) {
        LOG.error("Caught EOF Exception during read of local CrawlLog:" + crawlLogPath.getAbsolutePath()
                + " FilePosition:" + lastReadPosition);
    } catch (IOException e) {
        LOG.error(CCStringUtils.stringifyException(e));
        exception = e;
        throw e;
    } finally {
        if (inputStream != null)
            inputStream.close();
    }
}

From source file:org.commoncrawl.service.listcrawler.CrawlList.java

private OnDiskCrawlHistoryItem loadOnDiskItemForURLFP(URLFP fingerprint) throws IOException {

    // see if state is cached in memory ...
    boolean loadedFromMemory = false;

    synchronized (this) {
        if (_tempFixedDataBuffer != null) {

            loadedFromMemory = true;/*w w w.  ja va  2  s .c  om*/

            int low = 0;
            int high = (int) (_tempFixedDataBufferSize / OnDiskCrawlHistoryItem.ON_DISK_SIZE) - 1;

            OnDiskCrawlHistoryItem itemOut = new OnDiskCrawlHistoryItem();
            DataInputBuffer inputBuffer = new DataInputBuffer();

            int iterationNumber = 0;

            while (low <= high) {

                ++iterationNumber;

                int mid = low + ((high - low) / 2);

                inputBuffer.reset(_tempFixedDataBuffer, 0, _tempFixedDataBufferSize);
                inputBuffer.skip(mid * OnDiskCrawlHistoryItem.ON_DISK_SIZE);

                // deserialize 
                itemOut.deserialize(inputBuffer);

                // now compare it against desired hash value ...
                int comparisonResult = itemOut.compareFingerprints(fingerprint);

                if (comparisonResult > 0)
                    high = mid - 1;
                else if (comparisonResult < 0)
                    low = mid + 1;
                else {

                    // cache offset 
                    itemOut._fileOffset = mid * OnDiskCrawlHistoryItem.ON_DISK_SIZE;

                    // LOG.info("Found Match. Took:"+ iterationNumber + " iterations");
                    // and return item 
                    return itemOut;
                }
            }
            //LOG.error("Did Not Find Match For Domain:" + fingerprint.getDomainHash() + " URLFP:" + fingerprint.getUrlHash() + " Took:" + iterationNumber + " iterations");
        }
    }

    if (!loadedFromMemory) {
        //load from disk 

        //LOG.info("Opening Data File for OnDiskItem load for Fingerprint:" + fingerprint.getUrlHash());

        RandomAccessFile file = new RandomAccessFile(_fixedDataFile, "rw");

        // allocate buffer upfront 
        byte[] onDiskItemBuffer = new byte[OnDiskCrawlHistoryItem.ON_DISK_SIZE];
        DataInputBuffer inputStream = new DataInputBuffer();

        //LOG.info("Opened Data File. Searching for match");
        try {

            int low = 0;
            int high = (int) (file.length() / OnDiskCrawlHistoryItem.ON_DISK_SIZE) - 1;

            OnDiskCrawlHistoryItem itemOut = new OnDiskCrawlHistoryItem();

            int iterationNumber = 0;

            while (low <= high) {

                ++iterationNumber;

                int mid = low + ((high - low) / 2);

                // seek to proper location 
                file.seek(mid * OnDiskCrawlHistoryItem.ON_DISK_SIZE);
                // read the data structure 
                file.readFully(onDiskItemBuffer, 0, onDiskItemBuffer.length);
                // map location in file 
                //MappedByteBuffer memoryBuffer = file.getChannel().map(MapMode.READ_ONLY,mid * OnDiskCrawlHistoryItem.ON_DISK_SIZE,OnDiskCrawlHistoryItem.ON_DISK_SIZE);
                //DataInputStream  inputStream = new DataInputStream(new ByteBufferInputStream(memoryBuffer));
                inputStream.reset(onDiskItemBuffer, 0, OnDiskCrawlHistoryItem.ON_DISK_SIZE);

                // deserialize 
                itemOut.deserialize(inputStream);

                // memoryBuffer = null;
                //inputStream = null;

                // now compare it against desired hash value ...
                int comparisonResult = itemOut.compareFingerprints(fingerprint);

                if (comparisonResult > 0)
                    high = mid - 1;
                else if (comparisonResult < 0)
                    low = mid + 1;
                else {

                    // cache offset 
                    itemOut._fileOffset = mid * OnDiskCrawlHistoryItem.ON_DISK_SIZE;

                    // LOG.info("Found Match. Took:"+ iterationNumber + " iterations");
                    // and return item 
                    return itemOut;
                }
            }
            //LOG.error("******Did Not Find Match For Domain:" + fingerprint.getDomainHash() + " URLFP:" + fingerprint.getUrlHash() + " Took:" + iterationNumber + " iterations");

            //DEBUG ONLY !
            // dumpFixedDataFile();
        } finally {
            file.close();
        }
    }
    return null;
}

From source file:com.portfolio.rest.RestServicePortfolio.java

@Path("/portfolios/portfolio/{portfolio-id}")
@GET//w ww  . ja  v a  2  s  .  c o m
@Produces({ MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML, "application/zip",
        MediaType.APPLICATION_OCTET_STREAM })
public Object getPortfolio(@CookieParam("user") String user, @CookieParam("credential") String token,
        @QueryParam("group") int groupId, @PathParam("portfolio-id") String portfolioUuid,
        @Context ServletConfig sc, @Context HttpServletRequest httpServletRequest,
        @HeaderParam("Accept") String accept, @QueryParam("user") Integer userId,
        @QueryParam("group") Integer group, @QueryParam("resources") String resource,
        @QueryParam("files") String files, @QueryParam("export") String export,
        @QueryParam("lang") String lang) {
    UserInfo ui = checkCredential(httpServletRequest, user, token, null);

    Response response = null;
    try {
        String portfolio = dataProvider.getPortfolio(new MimeType("text/xml"), portfolioUuid, ui.userId, 0,
                this.label, resource, "", ui.subId).toString();

        if ("faux".equals(portfolio)) {
            response = Response.status(403).build();
        }

        if (response == null) {
            Date time = new Date();
            Document doc = DomUtils.xmlString2Document(portfolio, new StringBuffer());
            NodeList codes = doc.getDocumentElement().getElementsByTagName("code");
            // Le premier c'est celui du root
            Node codenode = codes.item(0);
            String code = "";
            if (codenode != null)
                code = codenode.getTextContent();

            if (export != null) {
                response = Response.ok(portfolio).header("content-disposition",
                        "attachment; filename = \"" + code + "-" + time + ".xml\"").build();
            } else if (resource != null && files != null) {
                //// Cas du renvoi d'un ZIP

                /// Temp file in temp directory
                File tempDir = new File(System.getProperty("java.io.tmpdir", null));
                File tempZip = File.createTempFile(portfolioUuid, ".zip", tempDir);

                FileOutputStream fos = new FileOutputStream(tempZip);
                ZipOutputStream zos = new ZipOutputStream(fos);
                //               BufferedOutputStream bos = new BufferedOutputStream(zos);

                /// zos.setComment("Some comment");

                /// Write xml file to zip
                ZipEntry ze = new ZipEntry(portfolioUuid + ".xml");
                zos.putNextEntry(ze);

                byte[] bytes = portfolio.getBytes("UTF-8");
                zos.write(bytes);

                zos.closeEntry();

                /// Find all fileid/filename
                XPath xPath = XPathFactory.newInstance().newXPath();
                String filterRes = "//asmResource/fileid";
                NodeList nodelist = (NodeList) xPath.compile(filterRes).evaluate(doc, XPathConstants.NODESET);

                /// Direct link to data
                // String urlTarget = "http://"+ server + "/user/" + user +"/file/" + uuid +"/"+ lang+ "/ptype/fs";

                /*
                String langatt = "";
                if( lang != null )
                   langatt = "?lang="+lang;
                else
                   langatt = "?lang=fr";
                //*/

                /// Fetch all files
                for (int i = 0; i < nodelist.getLength(); ++i) {
                    Node res = nodelist.item(i);
                    Node p = res.getParentNode(); // resource -> container
                    Node gp = p.getParentNode(); // container -> context
                    Node uuidNode = gp.getAttributes().getNamedItem("id");
                    String uuid = uuidNode.getTextContent();

                    String filterName = "./filename[@lang and text()]";
                    NodeList textList = (NodeList) xPath.compile(filterName).evaluate(p,
                            XPathConstants.NODESET);
                    String filename = "";
                    if (textList.getLength() != 0) {
                        Element fileNode = (Element) textList.item(0);
                        filename = fileNode.getTextContent();
                        lang = fileNode.getAttribute("lang");
                        if ("".equals(lang))
                            lang = "fr";
                    }

                    String servlet = httpServletRequest.getRequestURI();
                    servlet = servlet.substring(0, servlet.indexOf("/", 7));
                    String server = httpServletRequest.getServerName();
                    int port = httpServletRequest.getServerPort();
                    //                  "http://"+ server + /resources/resource/file/ uuid ? lang= size=
                    // String urlTarget = "http://"+ server + "/user/" + user +"/file/" + uuid +"/"+ lang+ "/ptype/fs";
                    String url = "http://" + server + ":" + port + servlet + "/resources/resource/file/" + uuid
                            + "?lang=" + lang;
                    HttpGet get = new HttpGet(url);

                    // Transfer sessionid so that local request still get security checked
                    HttpSession session = httpServletRequest.getSession(true);
                    get.addHeader("Cookie", "JSESSIONID=" + session.getId());

                    // Send request
                    CloseableHttpClient client = HttpClients.createDefault();
                    CloseableHttpResponse ret = client.execute(get);
                    HttpEntity entity = ret.getEntity();

                    // Put specific name for later recovery
                    if ("".equals(filename))
                        continue;
                    int lastDot = filename.lastIndexOf(".");
                    if (lastDot < 0)
                        lastDot = 0;
                    String filenameext = filename.substring(0); /// find extension
                    int extindex = filenameext.lastIndexOf(".");
                    filenameext = uuid + "_" + lang + filenameext.substring(extindex);

                    // Save it to zip file
                    //                  int length = (int) entity.getContentLength();
                    InputStream content = entity.getContent();

                    //                  BufferedInputStream bis = new BufferedInputStream(entity.getContent());

                    ze = new ZipEntry(filenameext);
                    try {
                        int totalread = 0;
                        zos.putNextEntry(ze);
                        int inByte;
                        byte[] buf = new byte[4096];
                        //                     zos.write(bytes,0,inByte);
                        while ((inByte = content.read(buf)) != -1) {
                            totalread += inByte;
                            zos.write(buf, 0, inByte);
                        }
                        System.out.println("FILE: " + filenameext + " -> " + totalread);
                        content.close();
                        //                     bis.close();
                        zos.closeEntry();
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                    EntityUtils.consume(entity);
                    ret.close();
                    client.close();
                }

                zos.close();
                fos.close();

                /// Return zip file
                RandomAccessFile f = new RandomAccessFile(tempZip.getAbsoluteFile(), "r");
                byte[] b = new byte[(int) f.length()];
                f.read(b);
                f.close();

                response = Response.ok(b, MediaType.APPLICATION_OCTET_STREAM)
                        .header("content-disposition", "attachment; filename = \"" + code + "-" + time + ".zip")
                        .build();

                // Temp file cleanup
                tempZip.delete();
            } else {
                //try { this.userId = userId; } catch(Exception ex) { this.userId = -1; };
                //              String returnValue = dataProvider.getPortfolio(new MimeType("text/xml"),portfolioUuid,this.userId, this.groupId, this.label, resource, files).toString();
                if (portfolio.equals("faux")) {

                    throw new RestWebApplicationException(Status.FORBIDDEN,
                            "Vous n'avez pas les droits necessaires");
                }

                if (accept.equals(MediaType.APPLICATION_JSON)) {
                    portfolio = XML.toJSONObject(portfolio).toString();
                    response = Response.ok(portfolio).type(MediaType.APPLICATION_JSON).build();
                } else
                    response = Response.ok(portfolio).type(MediaType.APPLICATION_XML).build();

                logRestRequest(httpServletRequest, null, portfolio, Status.OK.getStatusCode());
            }
        }
    } catch (RestWebApplicationException ex) {
        throw new RestWebApplicationException(Status.FORBIDDEN, ex.getResponse().getEntity().toString());
    } catch (SQLException ex) {
        logRestRequest(httpServletRequest, null, "Portfolio " + portfolioUuid + " not found",
                Status.NOT_FOUND.getStatusCode());

        throw new RestWebApplicationException(Status.NOT_FOUND, "Portfolio " + portfolioUuid + " not found");
    } catch (Exception ex) {
        ex.printStackTrace();
        logRestRequest(httpServletRequest, null, ex.getMessage() + "\n\n" + ex.getStackTrace(),
                Status.INTERNAL_SERVER_ERROR.getStatusCode());

        throw new RestWebApplicationException(Status.INTERNAL_SERVER_ERROR, ex.getMessage());
    } finally {
        if (dataProvider != null)
            dataProvider.disconnect();
    }

    return response;
}

From source file:org.commoncrawl.service.crawler.CrawlLog.java

private static void transferLocalCheckpointLog(File crawlLogPath, HDFSCrawlURLWriter writer, long checkpointId)
        throws IOException {

    // and open the crawl log file ...
    RandomAccessFile inputStream = null;

    IOException exception = null;

    CRC32 crc = new CRC32();
    CustomByteArrayOutputStream buffer = new CustomByteArrayOutputStream(1 << 17);
    byte[] syncBytesBuffer = new byte[SYNC_BYTES_SIZE];

    // save position for potential debug output.
    long lastReadPosition = 0;

    try {/* w  ww .  jav  a  2  s .  c  om*/
        inputStream = new RandomAccessFile(crawlLogPath, "rw");
        // and a data input stream ...
        RandomAccessFile reader = inputStream;
        // seek to zero
        reader.seek(0L);

        // read the header ...
        LogFileHeader header = readLogFileHeader(reader);

        // read a crawl url from the stream...

        while (inputStream.getFilePointer() < header._fileSize) {

            if (seekToNextSyncBytesPos(syncBytesBuffer, reader, header._fileSize)) {

                try {
                    lastReadPosition = inputStream.getFilePointer();

                    // skip sync
                    inputStream.skipBytes(SYNC_BYTES_SIZE);

                    // read length ...
                    int urlDataLen = reader.readInt();
                    long urlDataCRC = reader.readLong();

                    if (urlDataLen > buffer.getBuffer().length) {
                        buffer = new CustomByteArrayOutputStream(((urlDataLen / 65536) + 1) * 65536);
                    }
                    reader.read(buffer.getBuffer(), 0, urlDataLen);
                    crc.reset();
                    crc.update(buffer.getBuffer(), 0, urlDataLen);

                    long computedValue = crc.getValue();

                    // validate crc values ...
                    if (computedValue != urlDataCRC) {
                        LOG.error("CRC Mismatch Detected during HDFS transfer in CrawlLog:"
                                + crawlLogPath.getAbsolutePath() + " Checkpoint Id:" + checkpointId
                                + " FilePosition:" + lastReadPosition);
                        inputStream.seek(lastReadPosition + 1);
                    } else {
                        // allocate a crawl url data structure
                        CrawlURL url = new CrawlURL();
                        DataInputStream bufferReader = new DataInputStream(
                                new ByteArrayInputStream(buffer.getBuffer(), 0, urlDataLen));
                        // populate it from the (in memory) data stream
                        url.readFields(bufferReader);
                        try {
                            // and write out appropriate sequence file entries ...
                            writer.writeCrawlURLItem(new Text(url.getUrl()), url);
                        } catch (IOException e) {
                            LOG.error("Failed to write CrawlURL to SequenceFileWriter with Exception:"
                                    + CCStringUtils.stringifyException(e));
                            throw new URLWriterException();
                        }
                    }
                } catch (URLWriterException e) {
                    LOG.error("Caught URLRewriter Exception! - Throwing to outer layer!");
                    throw e;
                } catch (Exception e) {
                    LOG.error("Ignoring Error Processing CrawlLog Entry at Position:" + lastReadPosition
                            + " Exception:" + CCStringUtils.stringifyException(e));
                }
            } else {
                break;
            }
        }
    } catch (EOFException e) {
        LOG.error("Caught EOF Exception during read of local CrawlLog:" + crawlLogPath.getAbsolutePath()
                + " Checkpoint Id:" + checkpointId + " FilePosition:" + lastReadPosition);
    } catch (IOException e) {
        LOG.error(CCStringUtils.stringifyException(e));
        exception = e;
        throw e;
    } finally {
        if (inputStream != null)
            inputStream.close();
    }
}