Source code

Java tutorial


Here is the source code for


 * SPDXVersion: SPDX-1.1
 * Creator: Person: Nuno Brito (
 * Creator: Organization: TripleCheck (
 * Created: 2014-07-07T13:49:34Z
 * LicenseName: EUPL-1.1-without-appendix
 * FileName:  
 * FileType: SOURCE
 * FileCopyrightText: <text> Copyright 2014 Nuno Brito, TripleCheck </text>
 * FileComment: <text>
 * This class provides the methods to add and read files from a BIG archive.
 * There is a common set of variables that are used for opening the stream of
 * data globally across the class. This is more efficient than opening the BIG
 * file every time we need to add something new but at the same time brings more
 * complexity in ensuring that each related file is ready for operation.
 * The risk of data corruption is very high. One single byte misplaced and the
 * whole archive is lost. Therefore, we write a line on the log stating that an
 * operation is in course. When the operation ends then another line on the log
 * will signal that everything was done with success. It will happen that
 * some time the process is interrupted before completing. When this is the case
 * then on the next operation will be noted that existed a pending operation.
 * The pending operation will be discarded completely to ensure that the archive
 * remains usable.
 * A second measure to prevent data corruption is the magic signature that also
 * serves as individual file separator. On one hand it permits other tools to
 * identify the type of data stored in the archive. On the other hand, if the
 * index is not available then you lose the path/name information of the files
 * but the data remains usable.
 * </text> 

package big;

import java.util.ArrayList;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.compress.archivers.ArchiveException;
import org.apache.commons.compress.archivers.ArchiveInputStream;
import org.apache.commons.compress.archivers.ArchiveOutputStream;
import org.apache.commons.compress.archivers.ArchiveStreamFactory;
import org.apache.commons.compress.utils.IOUtils;
import utils.files;
import tools.header;

 * @author Nuno Brito, 7th of July 2014 in Darmstadt, Germany
public class BigZip {

    // settings
    private int
    // how big a file do we accept for storing?
    maxFileSize = 1000000 * 100; // default max size = 100Mb 

    // variables
    private Boolean isReady = false;
    private OutputStream outputStream = null;
    private BufferedWriter writerFileIndex = null, writerFileLog = null;
    // the main file associated to this object
    private File fileLogBIG = null, fileMainBIG = null, fileIndexBIG = null;

    private long currentPosition = 0, getNextFileCounter = 0;

    // defines the magic number and recovery trigger for each stored file
    private final String magicSignature = "BIG81nb", tagStart = "start: ", tagEnded = "ended: ";

    private String basePath = "";

    // variables used during the "next file" iterator
    private BufferedReader readerNextFile;
    private FileReader fileReaderNext;
    private long currentGetNextPosition = 0;

    private String readerNextFileName, lastReadLine, currentLine;

    * Initialises a BIG archive. If the archive file doesn't exist yet then 
    * it will be created. You should check the isReady() method to verify
    * that the archive is ready to be used.
    * @param fileTarget    the file that we want to open 
    public BigZip(final File fileTarget) {
        Start(fileTarget, false);

     * Initialises a BIG archive. If the archive file doesn't exist yet then 
     * it will be created. You should check the isReady() method to verify
     * that the archive is ready to be used.
     * @param fileTarget    the file that we want to open 
     * @param silent        No initialisation messages are output
    public BigZip(final File fileTarget, boolean silent) {
        Start(fileTarget, silent);

     * Initialises a BIG archive. If the archive file doesn't exist yet then 
     * it will be created. You should check the isReady() method to verify
     * that the archive is ready to be used.
     * @param fileTarget    the file that we want to open 
     * @param silent        No initialisation messages are output
    private void Start(final File fileTarget, boolean silent) {
        // do the proper assignments
        this.fileMainBIG = fileTarget;
        this.fileLogBIG = getNewFile("log");
        this.fileIndexBIG = getNewFile("index");

        // ensure these files exist        
        existOrTouch(fileMainBIG, "");
        existOrTouch(fileLogBIG, "log");
        existOrTouch(fileIndexBIG, "index");

        // prepare the initial message
        String message = "Archive is ready to be used: " + fileMainBIG.getName();

        // shall we add the file size if above a given value?
        if (fileMainBIG.length() > 0) {
            // add the size then
            message += " (" + utils.files.humanReadableSize(fileMainBIG.length()) + ")";
        // output the message
        if (silent == false) {
        // all done
        isReady = true;

     * Provides a new file based on the main BIG file
     * @param name  The name to append on the extension
     * @return      A file pointer
    private File getNewFile(final String name) {
        return new File(fileMainBIG.getParentFile(), fileMainBIG.getName() + "-" + name);

     * Check if a file exists, if doesn't exist try to create one.
     * @param file  the file to create
     * @return      true if it exists or was created, false if we fail to create one
    private boolean existOrTouch(final File file, final String designation) {
        // first check for the folder
        File folder = file.getParentFile();

        // does it exist?
        if (folder.exists() == false) {
            // then create one

        // does our archive already exists?
        if (file.exists() == false) {
            // then create a new one
            if (designation.isEmpty()) {
            } else {
                utils.files.SaveStringToFile(file, header.create(magicSignature + "-" + designation,
                        "TripleCheck at"));
            // did this worked?
            if (file.exists() == false) {
                // we failed to create our file
                System.err.println("BIG201 - Error creating file: " + file.getAbsolutePath());
                return false;
        return true;

     * Is this object initialised and ready to be used?
     * @return True if ready, return False when something went wrong 
    public Boolean isReady() {
        return isReady;

     * Add all files from a given folder inside our archive
     * @param folderToAdd The folder whose files we want to add
     * @throws
    public void addFolder(final File folderToAdd) throws IOException {
        // preflight checks
        if (isReady == false) {
            System.err.println("BIG137 - Error, Archive is not ready");
        // open the index files
        // call the iteration to go through all files
        addFiles(folderToAdd, folderToAdd, 25);
        // now close all the pointers

     * Add all files from a given folder inside our archive
     * @param fileToAdd The file we want to add
     * @param baseFolder
     * @throws
    public synchronized void addFile(final File fileToAdd, final String baseFolder) throws IOException {
        // preflight checks
        if (isReady == false) {
            System.err.println("BIG241 - Error, Archive is not ready");

        basePath = baseFolder;

        // open the index files
        // call the iteration to go through all files
        // now close all the pointers

     * Opens the BIG file and respective index
    public void quickStart() {
        try {
            // open the BIG file where the binary data is stored
            currentPosition = fileMainBIG.length();
            // open our archive file
            outputStream = new FileOutputStream(fileMainBIG, true);
            // open the file where we list the data, signatures and positions
            writerFileIndex = new BufferedWriter(new FileWriter(fileIndexBIG, true), 8192);
            // open the log file
            writerFileLog = new BufferedWriter(new FileWriter(fileLogBIG, true), 8192);

        } catch (IOException ex) {
            Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);

    * Closes the pointers of our work files
    public void quickEnd() {
        try {
            // flush all the remaining data onto the files

            // close the streams

        } catch (IOException ex) {
            Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);

     * Opens the BIG file and respective index
     * @param folderToAdd
    public void operationStart(final File folderToAdd) {
        try {
            // if the base path is already set, don't change it
            if (basePath.isEmpty()) {
                basePath = folderToAdd.getAbsolutePath();

            // open the BIG file where the binary data is stored
            currentPosition = fileMainBIG.length();
            // do we have any operation left incomplete?
            // open our archive file
            outputStream = new FileOutputStream(fileMainBIG, true);
            // open the file where we list the data, signatures and positions
            writerFileIndex = new BufferedWriter(new FileWriter(fileIndexBIG, true), 8192);
            writerFileLog = new BufferedWriter(new FileWriter(fileLogBIG, true), 8192);

        } catch (IOException ex) {
            Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);

     * Checks if we have a restore point that hasn't terminated with success
     * on a previous operation. If something went wrong on the previous run,
     * we will try to clean up things now. Results without success are discarded
     * from the BIG archive.
    private void pointRestoreAndSave(final File folderToAdd) throws IOException {
        // get the last line from our log textWeird file
        String lastLine = utils.files.getLastLineFast(fileLogBIG);
        // are we detecting that something went wrong?
        if ((lastLine.isEmpty() == false) && (lastLine.startsWith(tagStart))) {
            System.out.println("BIG290 Something went wrong last time, we need to restore the last saved point!");
            // we need to restore the last saved point
            final String snippet = lastLine.substring(lastLine.indexOf(" ") + 1);
            final String number = snippet.substring(0, snippet.indexOf(" "));
            long lastPosition = Long.parseLong(number);

            // try to return our knowledge base to the previous state
            utils.files.changeSize(fileMainBIG, lastPosition);
            // was the size change successful?
            if (lastPosition != fileMainBIG.length()) {
                System.out.println("BIG197 - Failed to restore last saved point");
            // we had success so, time to delete this info from the index
            // update our index
            currentPosition = lastPosition;
        // now add a line to record what we are doing
        utils.files.addTextToFile(fileLogBIG, "\n" + tagStart + utils.files.getPrettyFileSize(currentPosition) + " "
                + utils.time.getDateTimeISO() + "->" + folderToAdd.getName());

     * Looks at the data inside the index file, when we reach a file that
     * is bigger than the value specified as last position then we delete
     * all lines that come after that position, effectively deleting them.
    private void deleteIndexDataAfterPosition(final long lastPosition) {
        // prepare the keyword that we want to delete
        final String prettyNumber = utils.files.getPrettyFileSize(lastPosition);
        // cut the log file after the mentioned position
        utils.files.cutTextFileAfter(fileLogBIG, tagStart + prettyNumber);
        // cut the index file after the mentioned position
        utils.files.cutTextFileAfter(fileIndexBIG, prettyNumber + " ");

     * Closes the pointers of our work files
    public void operationEnd() {
        try {

            // now add a line to record what we are doing

            // flush all the remaining data onto the files

            // close the streams

        } catch (IOException ex) {
            Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);

     * Add a line to the log file
     * @param title 
    private void addTagStarted(final String title) throws IOException {
        // now add a line to record what we are doing
        final String line = "\n" + tagStart + utils.files.getPrettyFileSize(currentPosition) + " "
                + utils.time.getDateTimeISO() + "->" + title;

     * Marks the commit of a file with success
     * @throws
    private void addTagEnded() throws IOException {
        final String line = "\n" + tagEnded + utils.files.getPrettyFileSize(currentPosition) + " "
                + utils.time.getDateTimeISO();

     * Find all files in a given folder and respective sub-folders
     * @param where A file object of the start folder
     * @param maxDeep How deep is the crawl allowed to proceed
     * @return An array containing all the found files, returns null if none is
     * found
    private void addFiles(final File baseFolder, final File where, int maxDeep) throws IOException {
        // list the files on the current directory 
        File[] files = where.listFiles();
        // no need to continue if nothing was found
        if (files == null) {
        // go through each file
        for (final File file : files) {
            if (file.isFile()) {
                // Add the file to our archive
                addFile(file, file.getParentFile().getAbsolutePath());
            } else if (maxDeep - 1 > 0) {
                // do the recursive crawling
                addFiles(baseFolder, file, maxDeep - 1);

      * Copies one file into the big archive
      * @param fileToCopy
      * @return 
    public boolean writeFile(final File fileToCopy) {

        // declare
        ByteArrayOutputStream outputZipStream = new ByteArrayOutputStream();
        try {
            /* Create Archive Output Stream that attaches File Output Stream / and specifies type of compression */
            ArchiveOutputStream logical_zip = new ArchiveStreamFactory()
                    .createArchiveOutputStream(ArchiveStreamFactory.ZIP, outputZipStream);
            /* Create Archieve entry - write header information*/
            logical_zip.putArchiveEntry(new ZipArchiveEntry(fileToCopy.getName()));
            /* Copy input file */
            IOUtils.copy(new FileInputStream(fileToCopy), logical_zip);

            // get the bytes
            final ByteArrayInputStream byteInput = new ByteArrayInputStream(outputZipStream.toByteArray());

            byte[] buffer = new byte[8192];
            int length, counter = 0;
            // add the magic number to this file block
            // now copy the whole file into the BIG archive
            while ((length = > 0) {
                outputStream.write(buffer, 0, length);
                counter += length;
            // if there is something else to be flushed, do it now

            // calculate the base path
            final String resultingPath = fileToCopy.getAbsolutePath().replace(basePath, "");

            // calculate the SHA1 signature
            final String output = utils.hashing.checksum.generateFileChecksum("SHA-1", fileToCopy);

            // write a new line in our index file
                    "\n" + utils.files.getPrettyFileSize(currentPosition) + " " + output + " " + resultingPath);
            // increase the position counter
            currentPosition += counter + magicSignature.length();
        } catch (Exception e) {
            System.err.println("BIG346 - Error copying file: " + fileToCopy.getAbsolutePath());
            return false;

        finally {
        return true;

     * Copies one file into the big archive
     * @param fileToCopy
     * @param SHA1
     * @param filePathToWriteInTextLine
     * @return 
    public boolean quickWrite(final File fileToCopy, final String SHA1, final String filePathToWriteInTextLine) {
        // declare
        ByteArrayOutputStream outputZipStream = new ByteArrayOutputStream();
        try {
            // save this operation on the log of commits

            /* Create Archive Output Stream that attaches File Output Stream / and specifies type of compression */
            ArchiveOutputStream logical_zip = new ArchiveStreamFactory()
                    .createArchiveOutputStream(ArchiveStreamFactory.ZIP, outputZipStream);
            /* Create Archieve entry - write header information*/
            logical_zip.putArchiveEntry(new ZipArchiveEntry(fileToCopy.getName()));
            /* Copy input file */
            IOUtils.copy(new FileInputStream(fileToCopy), logical_zip);

            // get the bytes
            final ByteArrayInputStream byteInput = new ByteArrayInputStream(outputZipStream.toByteArray());

            byte[] buffer = new byte[8192];
            int length, counter = 0;
            // add the magic number to this file block
            // now copy the whole file into the BIG archive
            while ((length = > 0) {
                outputStream.write(buffer, 0, length);
                counter += length;
            // if there is something else to be flushed, do it now

            // calculate the base path
            //final String resultingPath = fileToCopy.getAbsolutePath().replace(rootFolder, "");

            final String line = "\n" + utils.files.getPrettyFileSize(currentPosition) + " " + SHA1 + " "
                    + filePathToWriteInTextLine;

            // write a new line in our index file
            // increase the position counter
            currentPosition += counter + magicSignature.length();

            // close the log with success
        } catch (Exception e) {
            System.err.println("BIG600 - Error copying file: " + fileToCopy.getAbsolutePath());
            return false;
        } finally {
        return true;

     * Copies one file into the big archive
     * @param textToCopy
     * @param SHA1
     * @param filePathToWriteInTextLine
     * @return 
     * @throws 
    public boolean quickWrite(final String textToCopy, final String SHA1, final String filePathToWriteInTextLine)
            throws IOException {
        return quickWriteGenericStream(new ByteArrayInputStream(textToCopy.getBytes()), SHA1,

     * Copies one file into the big archive
     * @param stream
     * @param SHA1
     * @param filePathToWriteInTextLine
     * @return 
     * @throws 
    public boolean quickWriteGenericStream(final InputStream stream, final String SHA1,
            final String filePathToWriteInTextLine) throws IOException {
        // declare
        ByteArrayOutputStream outputZipStream = new ByteArrayOutputStream();
        ByteArrayInputStream byteInput = null;
        try {
            // save this operation on the log of commits
            // Create Archive Output Stream that attaches File Output Stream / and specifies type of compression
            ArchiveOutputStream logical_zip = new ArchiveStreamFactory()
                    .createArchiveOutputStream(ArchiveStreamFactory.ZIP, outputZipStream);
            // Create Archive entry - write header information
            ZipArchiveEntry zipArchiveEntry = new ZipArchiveEntry(filePathToWriteInTextLine);
            // Copy input file

            IOUtils.copy(stream, logical_zip);


            // get the bytes
            byteInput = new ByteArrayInputStream(outputZipStream.toByteArray());

            byte[] buffer = new byte[8192];
            int length, counter = 0;
            // add the magic number to this file block
            // now copy the whole file into the BIG archive
            while ((length = > 0) {
                outputStream.write(buffer, 0, length);
                counter += length;

            final String line = "\n" + utils.files.getPrettyFileSize(currentPosition) + " " + SHA1 + " "
                    + filePathToWriteInTextLine;

            // write a new line in our index file
            // increase the position counter
            currentPosition += counter + magicSignature.length();

            // close the log with success
        } catch (Exception e) {
            System.err.println("BIG600 - Error copying file: " + filePathToWriteInTextLine);
            return false;
        } finally {
            if (byteInput != null) {
        return true;

     * Requires an InputStream, it will calculate the SHA1 checksum at the same
     * time that it writes data onto the big file. The input stream is expected
     * to be closed outside of this method.
     * @param stream
     * @param filePathToWriteInTextLine
     * @throws 
    public void quickWriteStreamStandalone(final InputStream stream, final String filePathToWriteInTextLine)
            throws Exception {
        // declare
        ByteArrayOutputStream outputZipStream = new ByteArrayOutputStream();
        ByteArrayInputStream byteInput = null;
        // Create Archive Output Stream that attaches File Output Stream / and specifies type of compression
        ArchiveOutputStream logical_zip = new ArchiveStreamFactory()
                .createArchiveOutputStream(ArchiveStreamFactory.ZIP, outputZipStream);
        // Create Archive entry - write header information
        ZipArchiveEntry zipArchiveEntry = new ZipArchiveEntry(filePathToWriteInTextLine);
        // prepare the SHA1 signature generation
        final MessageDigest hash = MessageDigest.getInstance("SHA1");

        // Copy input file
        byte[] buffer = new byte[16384];
        int length;

        // decompress from the original zip file, compress to our zip format
        // calculate the SHA1 signature on the same loop to save resource
        while ((length = > 0) {
            logical_zip.write(buffer, 0, length);
            hash.update(buffer, 0, length);

        // compute the file signature
        byte[] digest = hash.digest();
        final String SHA1 = utils.hashing.checksum.convertHash(digest);

        // close the zip related objects
        logical_zip = null;

        // define the line that will be written on the index file
        final String line = "\n".concat(utils.files.getPrettyFileSize(currentPosition)).concat(" ").concat(SHA1)
                .concat(" ").concat(filePathToWriteInTextLine);

        // get the bytes
        byteInput = new ByteArrayInputStream(outputZipStream.toByteArray());
        int counter = 0;

        // add the magic number to this file block
        // now copy the whole file into the BIG archive
        while ((length = > 0) {
            outputStream.write(buffer, 0, length);
            counter += length;
        // write a new line in our index file
        // increase the position counter
        currentPosition += counter + magicSignature.length();
        // close the streams that were created

     * Define basePath, this is useful for cases where we want to index
     * files with several sublevels of folders to preserve URL information.
     * @param basePath 
    public void setBasePath(String basePath) {
        this.basePath = basePath;

     * Looks inside our BIG archive to extract a specific file using the
     * path/name portion
     * @param fileToExtract The full path and name of the file
     * @param targetFile    The where we will be writing the result
     * @return True if we created a new file, false if we didn't found one or
     * something else went wrong.
    public boolean getFile(final String fileToExtract, final File targetFile) {
        // get the line where the file is located on our index
        long[] coordinates = getFileCoordinates(fileIndexBIG, fileToExtract);
        // did we found something?
        if (coordinates == null) {
            return false;
        // now extract the mentioned bytes from our BIG archive
        boolean result = extractBytes(targetFile, coordinates[0], coordinates[1]);
        System.out.println("BIG 799: Grabbing file from " + coordinates[0] + " to " + coordinates[1]);
        // all done
        return result;

     * Extracts a file from the archive based on its file path/name information
     * @param fileToExtract
     * @return null if not found, otherwise it contains the file data
    public String getFileAsText(final String fileToExtract) {
        // get the line where the file is located on our index
        long[] coordinates = getFileCoordinates(fileIndexBIG, fileToExtract);
        // did we found something?
        if (coordinates == null) {
            return null;
        // now extract the mentioned bytes from our BIG archive
        String result = extractBytesToRAM(coordinates[0], coordinates[1]);
        // all done
        return result;

     * Given a position inside our knowledge base, retrieve the data up to
     * the next file indicator.
     * @param targetFile    The new file that will be created
     * @param startPosition The position from where we start to read the data
     * @param endPosition
     * @return 
    public boolean extractBytes(final File targetFile, final long startPosition, final Long endPosition) {
         * This is a tricky method. We will be extracting data from a the BIG
         * archive onto a new file somewhere on disk. The biggest challenge here
         * is to find exactly when the data for the file ends and still do the
         * file copy with a wonderful performance.
        try {
            // enable random access to the BIG file (fast as heck)
            RandomAccessFile dataBIG = new RandomAccessFile(fileMainBIG, "r");
            // if the target file exists, try to delete it
            if (targetFile.exists()) {
                if (targetFile.exists()) {
                    // we failed completely
                    System.out.println("BIG405 - Failed to delete: " + targetFile.getAbsolutePath());
                    return false;
            // we need to create a temporary zip file holder
            File fileZip = new File("");
            // delete the zip file if it already exists
            if (fileZip.exists()) {
                if (fileZip.exists()) {
                    // we failed completely
                    System.out.println("BIG416 - Failed to delete: " + fileZip.getAbsolutePath());
                    return false;

            // create a new file
            RandomAccessFile dataNew = new RandomAccessFile(fileZip, "rw");
            // jump directly to the position where the file is positioned
            // now we start reading bytes during the mentioned interval
            while (dataBIG.getFilePointer() < endPosition) {
                // read a byte from our BIG archive
                int data =;
                // write the same byte on the target file

            // close the file streams

            // extract the file
            zip.extract(fileZip, new File("."));
            // delete the temp zip file

        } catch (FileNotFoundException ex) {
            Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
            return false;
        } catch (IOException ex) {
            Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
            return false;

        return true;

     * Version 2 that permits to extract the text from a compressed file without
     * creating any file on the disk.
     * @param startPosition Offset where the file begins
     * @param endPosition   Offset where the file ends
     * @return      The source code of the compressed file
    public String extractBytesToRAM(final long startPosition, final Long endPosition) {

        String result = null;

        try {
            // enable random access to the BIG file (fast as heck)
            RandomAccessFile dataBIG = new RandomAccessFile(fileMainBIG, "r");
            // jump directly to the position where the file is positioned
            // create a byte array
            ByteArrayOutputStream byteOutput = new ByteArrayOutputStream();

            // now we start reading bytes during the mentioned interval
            while (dataBIG.getFilePointer() < endPosition) {
                // read a byte from our BIG archive
                int data =;
            // flush data at this point
            // now convert the stream from input into an output (to feed the zip stream)
            ByteArrayInputStream byteInput = new ByteArrayInputStream(byteOutput.toByteArray());
            // where we place the decompressed bytes
            ByteArrayOutputStream textOutput = new ByteArrayOutputStream();
            // create the zip streamer
            final ArchiveInputStream archiveStream;
            archiveStream = new ArchiveStreamFactory().createArchiveInputStream("zip", byteInput);
            final ZipArchiveEntry entry = (ZipArchiveEntry) archiveStream.getNextEntry();
            // copy all bytes from one location to the other (and decompress the data)
            IOUtils.copy(archiveStream, textOutput);
            // flush the results
            // we've got the result right here!
            result = textOutput.toString();
            // now close all the streams that we have open

        } catch (FileNotFoundException ex) {
            Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
            return null;
        } catch (IOException ex) {
            Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
            return null;
        } catch (ArchiveException ex) {
            Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);

        return result;

     * Version 2 that permits to extract the text from a compressed file without
     * creating any file on the disk.
     * @param filePosition
     * @return      The source code of the compressed file
    public String extractBytesToRAM(final long filePosition) {

        String result = null;

        try {

            // add the signature bytes to our start position
            long startPosition = filePosition + magicSignature.length();

            // enable random access to the BIG file (fast as heck)
            RandomAccessFile dataBIG = new RandomAccessFile(fileMainBIG, "r");
            // jump directly to the position where the file is positioned
            // create a byte array
            ByteArrayOutputStream byteOutput = new ByteArrayOutputStream();

            // get the end of this file entry (by brute-force)
            char test = 0;
            long endPosition = -1;
            while (test != -1) {
                test = dataBIG.readChar();
                // if the magic devil number was found..
                if (test == 66) {
                    // read the next value for confirmation
                    byte value = dataBIG.readByte();
                    if (value != 73) {
                    // we found the next entry
                    endPosition = dataBIG.getFilePointer() - 1;

            // rewind back to the start position

            // now we start reading bytes during the mentioned interval
            while (dataBIG.getFilePointer() < endPosition) {
                // read a byte from our BIG archive
                int data =;
            // flush data at this point
            // now convert the stream from input into an output (to feed the zip stream)
            ByteArrayInputStream byteInput = new ByteArrayInputStream(byteOutput.toByteArray());
            // where we place the decompressed bytes
            ByteArrayOutputStream textOutput = new ByteArrayOutputStream();
            // create the zip streamer
            final ArchiveInputStream archiveStream;
            archiveStream = new ArchiveStreamFactory().createArchiveInputStream("zip", byteInput);
            final ZipArchiveEntry entry = (ZipArchiveEntry) archiveStream.getNextEntry();
            // copy all bytes from one location to the other (and decompress the data)
            IOUtils.copy(archiveStream, textOutput);
            // flush the results
            // we've got the result right here!
            result = textOutput.toString();
            // now close all the streams that we have open

        } catch (FileNotFoundException ex) {
            Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
            return null;
        } catch (IOException ex) {
            Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
            return null;
        } catch (ArchiveException ex) {
            Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);

        return result;

     * Looks inside a textWeird file to discover the line that contains a given
    keyword. When the line is discovered then it returns an array where
     * the first long represents the start of data and the second represents
     * its end.
     * @param file      A file on disk
     * @param keyword   A keyword that must be present on the file
     * @return          An array with the start and end position of a given
     * file inside our BIG archive. If we don't have a match, the result is NULL
    private long[] getFileCoordinates(final File file, final String keyword) {
        // what we provide as answer
        long[] result = null;
        BufferedReader reader;
        try {
            FileReader fileReader = new FileReader(file);
            reader = new BufferedReader(fileReader);
            String line = "";
            while (line != null) {
                // do we have a match? Yes, let's proceed
                if (line.endsWith(keyword)) {
                    // an example of what we are reading:
                    // 000000000180411 3f1f0990b8200b5e9b5de461a7fa7f7640ae16f7 /C/HappyNuno.txt
                    final String startValue = line.substring(0, 15);
                    // get the coordinate and ignore the magic signature size to get the raw binary contents
                    final long val1 = Long.parseLong(startValue) + magicSignature.length();
                    // now read the next line to get the end value
                    line = reader.readLine();
                    final String endValue = line.substring(0, 15);
                    final long val2 = Long.parseLong(endValue);
                    // deliver the value
                    result = new long[] { val1, val2 };
                line = reader.readLine();
        } catch (IOException ex) {
            Logger.getLogger(files.class.getName()).log(Level.SEVERE, null, ex);
        // all done    
        return result;

     * Given a specific SHA1 signature, go through the BIG archive and
     * file the files that have a matching value. This method will search
     * across the whole knowledge base. If more than one match is found, it will
     * be included on the list.
     * @param idSHA1    The SHA1 identifier to find
     * @return          A list of files found with this SHA1
    public ArrayList<String> findFilesWithSpecificSHA1(final String idSHA1) {
        // prepare the variable where we place the results
        ArrayList<String> result = new ArrayList();

        // open the file for reading

        String line;
        try {
            while ((line = readerNextFile.readLine()) != null) {
                // get the SHA1 signature
                final String SHA1 = line.substring(16, 56);
                // no need to continue if no match exists
                if (utils.text.equals(SHA1, idSHA1) == false) {
                // ge the file name details after coordinate 57
                final String fileName = line.substring(57);
                // add this data to our result list

        } catch (IOException ex) {
            Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
        } finally {
            // close the files for reading
        //System.out.println("BIG662 SHA1 search concluded");
        // all done
        return result;

    public String findFileWithSpecificSHA1(final String signatureSHA1) {
        // open the file for reading
        String result = null;
        String line;
        try {
            while ((line = readerNextFile.readLine()) != null) {
                // get the SHA1 signature
                final String SHA1 = line.substring(16, 56);
                // no need to continue if no match exists
                if (utils.text.equals(SHA1, signatureSHA1) == false) {
                // ge the file name details after coordinate 57
                final String fileName = line.substring(57);
                // add this data to our result list
                result = fileName;

        } catch (IOException ex) {
            Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
        } finally {
            // close the files for reading
        // all done
        return result;

     * Prepares this archive to iterate all files sequentially
    public void getNextFileInitiate() {
        // we start by initiating the file readers
        try {
            fileReaderNext = new FileReader(fileIndexBIG);
            readerNextFile = new BufferedReader(fileReaderNext);
            // avoid the header line
            // now avoid the first file because we know its offset is 0000
            lastReadLine = readerNextFile.readLine();
            currentLine = lastReadLine;
            readerNextFileName = getFileNameOutOfLine(lastReadLine);

        } catch (FileNotFoundException ex) {
            Logger.getLogger(files.class.getName()).log(Level.SEVERE, null, ex);
        } catch (IOException ex) {
            Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);

     * Releases the allocated resources required for running this operation
    public void getNextFileConclude() {
        // closes the streams previously open
        try {
            if (fileReaderNext != null)
            if (readerNextFile != null)
        } catch (IOException ex) {
            Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);

     * Starting from the first file, this method permits to iterate over all
     * the files inside a big archive.
     * @return a pointer to the extracted file on disk
     * @throws when the file had some error 
    public File getNextFile() throws IOException {
        lastReadLine = currentLine;
        // now get the next line
        currentLine = readerNextFile.readLine();
        // increase the counter
        // get the new coordinate
        final long newValue = getValueOutOfLine(currentLine);

        // define the file pointer that we will be using
        final File file = new File(readerNextFileName);
        // now extract the mentioned bytes from our BIG archive
        extractBytes(file, currentGetNextPosition + magicSignature.length(), newValue);

        // now update the marker for the present offset
        currentGetNextPosition = newValue;
        readerNextFileName = getFileNameOutOfLine(currentLine);
        // all done
        return file;

     * Returns the last line that was read while iterating the files inside
     * a big archive in sequential mode
     * @return The full line as available on the textWeird file
    public String getLastLine() {
        return lastReadLine;

     * How many files were indexed with this sequential processing?
     * @return 
    public long getGetNextFileCounter() {
        return getNextFileCounter;

     * Given a line describing a file, get the file name portion
     * @param line  A line from our index file
     * @return      The file name. Errors are ignored intentionally to permit
     *              scale and faster processing speed.
    private String getFileNameOutOfLine(final String line) {
        // get the last path indicator
        final int i1 = line.lastIndexOf("/");
        // provide the name portion of the file
        return line.substring(i1 + 1);

     * Given a line describing a file in our big archive, get the coordinate value
     * @param line
     * @return      A long with the value where the data can be found
    private long getValueOutOfLine(final String line) {
        // get the first values with the coordinate
        final String startValue = line.substring(0, 15);
        // get the coordinate and ignore the magic signature
        return Long.parseLong(startValue);

     * Tries to jump directly to the last position from where processing took place
     * @param offsetPosition
     * @param linesProcessed 
    public void moveToOffsetPosition(final long offsetPosition, final long linesProcessed) {
        try {
            // attempt to skip a given number of bytes
            getNextFileCounter = linesProcessed;
        } catch (IOException ex) {
            Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);

     * Skip a given number of files until we get the next pointer to reader.
     * @param currentLine   The line number that was counted up to that point
    public void moveToLinePosition(final long currentLine) {
        //        try {
        //            // attempt to skip a given number of bytes
        //            readerNextFile.skip(nextPosition);
        //            getNextFileCounter = currentLine;
        //            this.currentGetNextPosition = currentGetNextPosition;
        //        } catch (IOException ex) {
        //            Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);
        //        }

        try {
            // ignore all these lines until we can resume
            while (getNextFileCounter < currentLine) {
        } catch (IOException ex) {
            Logger.getLogger(BigZip.class.getName()).log(Level.SEVERE, null, ex);


     * Does a simple line read to advance the counters
    private void emptyLineRead() throws IOException {

     * Starting from the first file, this method permits to iterate over all
     * the files inside a big archive.
     * @return a pointer to the extracted file on disk
     * @throws when the file had some error 
    public String getNextSourceCodeFile() throws IOException {
        lastReadLine = currentLine;
        // now get the next line
        currentLine = readerNextFile.readLine();
        // increase the counter
        // get the new coordinate
        final long newValue = getValueOutOfLine(currentLine);

        // now extract the mentioned bytes from our BIG archive
        final String result = extractBytesToRAM(currentGetNextPosition + magicSignature.length(), newValue);

        // now update the marker for the present offset
        currentGetNextPosition = newValue;
        readerNextFileName = getFileNameOutOfLine(currentLine);
        // all done
        return result;

     * Close the big archive and all open files associated with it
    public void close() {

    public File getFileLog() {
        return fileLogBIG;

    public File getFile() {
        return fileMainBIG;

    public File getFileIndex() {
        return fileIndexBIG;

    public long getCurrentGetNextPosition() {
        return currentGetNextPosition;

    public String getCurrentLine() {
        return currentLine;

     * Sets the maximum size accepted as a file for storage.
     * @param maxFileSizeBigZip 
    public void setFileSizeLimit(final int maxFileSizeBigZip) {
        maxFileSize = maxFileSizeBigZip;
