Example usage for java.util.zip GZIPOutputStream GZIPOutputStream

List of usage examples for java.util.zip GZIPOutputStream GZIPOutputStream

Introduction

In this page you can find the example usage for java.util.zip GZIPOutputStream GZIPOutputStream.

Prototype

public GZIPOutputStream(OutputStream out, boolean syncFlush) throws IOException 

Source Link

Document

Creates a new output stream with a default buffer size and the specified flush mode.

Usage

From source file:com.fdwills.external.http.JsonStreamerEntity.java

@Override
public void writeTo(final OutputStream out) throws IOException {
    if (out == null) {
        throw new IllegalStateException("Output stream cannot be null.");
    }//from   ww w .  ja v  a  2  s . co m

    // Record the time when uploading started.
    long now = System.currentTimeMillis();

    // Use GZIP compression when sending streams, otherwise just use
    // a buffered output stream to speed things up a bit.
    OutputStream os = null != contentEncoding ? new GZIPOutputStream(out, BUFFER_SIZE) : out;

    // Always send a JSON object.
    os.write('{');

    // Keys used by the HashMaps.
    Set<String> keys = jsonParams.keySet();

    boolean isFileWrapper;

    // Go over all keys and handle each's value.
    for (String key : keys) {
        // Evaluate the value (which cannot be null).
        Object value = jsonParams.get(key);

        // Bail out prematurely if value's null.
        if (value == null) {
            continue;
        }

        // Write the JSON object's key.
        os.write(escape(key));
        os.write(':');

        // Check if this is a FileWrapper.
        isFileWrapper = value instanceof RequestParams.FileWrapper;

        // If a file should be uploaded.
        if (isFileWrapper || value instanceof RequestParams.StreamWrapper) {
            // All uploads are sent as an object containing the file's details.
            os.write('{');

            // Determine how to handle this entry.
            if (isFileWrapper) {
                writeToFromFile(os, (RequestParams.FileWrapper) value);
            } else {
                writeToFromStream(os, (RequestParams.StreamWrapper) value);
            }

            // End the file's object and prepare for next one.
            os.write('}');
        } else if (value instanceof JsonValueInterface) {
            os.write(((JsonValueInterface) value).getEscapedJsonValue());
        } else if (value instanceof org.json.JSONObject) {
            os.write(((org.json.JSONObject) value).toString().getBytes());
        } else if (value instanceof org.json.JSONArray) {
            os.write(((org.json.JSONArray) value).toString().getBytes());
        } else if (value instanceof Boolean) {
            os.write((Boolean) value ? JSON_TRUE : JSON_FALSE);
        } else if (value instanceof Long) {
            os.write((((Number) value).longValue() + "").getBytes());
        } else if (value instanceof Double) {
            os.write((((Number) value).doubleValue() + "").getBytes());
        } else if (value instanceof Float) {
            os.write((((Number) value).floatValue() + "").getBytes());
        } else if (value instanceof Integer) {
            os.write((((Number) value).intValue() + "").getBytes());
        } else {
            os.write(escape(value.toString()));
        }

        os.write(',');
    }

    // Include the elapsed time taken to upload everything.
    // This might be useful for somebody, but it serves us well since
    // there will almost always be a ',' as the last sent character.
    os.write(STREAM_ELAPSED);
    os.write(':');
    long elapsedTime = System.currentTimeMillis() - now;
    os.write((elapsedTime + "}").getBytes());

    Log.i(LOG_TAG, "Uploaded JSON in " + Math.floor(elapsedTime / 1000) + " seconds");

    // Flush the contents up the stream.
    os.flush();
    AsyncHttpClient.silentCloseOutputStream(os);
}

From source file:com.lfrj.diancan.http.JsonStreamerEntity.java

@Override
public void writeTo(final OutputStream out) throws IOException {
    if (out == null) {
        throw new IllegalStateException("Output stream cannot be null.");
    }/*from  w w  w . j a  va  2s . co m*/

    // Record the time when uploading started.
    long now = System.currentTimeMillis();

    // Use GZIP compression when sending streams, otherwise just use
    // a buffered output stream to speed things up a bit.
    OutputStream os = null != contentEncoding ? new GZIPOutputStream(out, BUFFER_SIZE) : out;

    // Always send a JSON object.
    os.write('{');

    // Keys used by the HashMaps.
    Set<String> keys = jsonParams.keySet();

    boolean isFileWrapper;

    // Go over all keys and handle each's value.
    for (String key : keys) {
        // Evaluate the value (which cannot be null).
        Object value = jsonParams.get(key);

        // Bail out prematurely if value's null.
        if (value == null) {
            continue;
        }

        // Write the JSON object's key.
        os.write(escape(key));
        os.write(':');

        // Check if this is a FileWrapper.
        isFileWrapper = value instanceof RequestParams.FileWrapper;

        // If a file should be uploaded.
        if (isFileWrapper || value instanceof RequestParams.StreamWrapper) {
            // All uploads are sent as an object containing the file's
            // details.
            os.write('{');

            // Determine how to handle this entry.
            if (isFileWrapper) {
                writeToFromFile(os, (RequestParams.FileWrapper) value);
            } else {
                writeToFromStream(os, (RequestParams.StreamWrapper) value);
            }

            // End the file's object and prepare for next one.
            os.write('}');
        } else if (value instanceof JsonValueInterface) {
            os.write(((JsonValueInterface) value).getEscapedJsonValue());
        } else if (value instanceof org.json.JSONObject) {
            os.write(((org.json.JSONObject) value).toString().getBytes());
        } else if (value instanceof org.json.JSONArray) {
            os.write(((org.json.JSONArray) value).toString().getBytes());
        } else if (value instanceof Boolean) {
            os.write((Boolean) value ? JSON_TRUE : JSON_FALSE);
        } else if (value instanceof Long) {
            os.write((((Number) value).longValue() + "").getBytes());
        } else if (value instanceof Double) {
            os.write((((Number) value).doubleValue() + "").getBytes());
        } else if (value instanceof Float) {
            os.write((((Number) value).floatValue() + "").getBytes());
        } else if (value instanceof Integer) {
            os.write((((Number) value).intValue() + "").getBytes());
        } else {
            os.write(escape(value.toString()));
        }

        os.write(',');
    }

    // Include the elapsed time taken to upload everything.
    // This might be useful for somebody, but it serves us well since
    // there will almost always be a ',' as the last sent character.
    os.write(STREAM_ELAPSED);
    os.write(':');
    long elapsedTime = System.currentTimeMillis() - now;
    os.write((elapsedTime + "}").getBytes());

    Log.i(LOG_TAG, "Uploaded JSON in " + Math.floor(elapsedTime / 1000) + " seconds");

    // Flush the contents up the stream.
    os.flush();
    AsyncHttpClient.silentCloseOutputStream(os);
}

From source file:com.wen.security.http.JsonStreamerEntity.java

@Override
public void writeTo(final OutputStream outstream) throws IOException {
    if (outstream == null) {
        throw new IllegalStateException("Output stream cannot be null.");
    }//from  w w w . jav  a2s  .c o  m

    // Record the time when uploading started.
    long now = System.currentTimeMillis();

    // Keys used by the HashMaps.
    Set<String> keys;

    // Use GZIP compression when sending streams, otherwise just use
    // a buffered output stream to speed things up a bit.
    OutputStream upload;
    if (null != contentEncoding) {
        upload = new GZIPOutputStream(new BufferedOutputStream(outstream), BUFFER_SIZE);
    } else {
        upload = new BufferedOutputStream(outstream);
    }

    // Always send a JSON object.
    upload.write('{');

    // Send the K/V values.
    keys = kvParams.keySet();
    for (String key : keys) {
        // Write the JSON object's key.
        upload.write(escape(key));
        upload.write(':');

        // Evaluate the value (which cannot be null).
        Object value = kvParams.get(key);

        if (value instanceof Boolean) {
            upload.write((Boolean) value ? JSON_TRUE : JSON_FALSE);
        } else if (value instanceof Long) {
            upload.write((((Number) value).longValue() + "").getBytes());
        } else if (value instanceof Double) {
            upload.write((((Number) value).doubleValue() + "").getBytes());
        } else if (value instanceof Float) {
            upload.write((((Number) value).floatValue() + "").getBytes());
        } else if (value instanceof Integer) {
            upload.write((((Number) value).intValue() + "").getBytes());
        } else {
            upload.write(value.toString().getBytes());
        }

        upload.write(',');
    }

    // Buffer used for reading from input streams.
    byte[] buffer = new byte[BUFFER_SIZE];

    // Send the stream params.
    keys = streamParams.keySet();
    for (String key : keys) {
        RequestParams.StreamWrapper entry = streamParams.get(key);

        // Write the JSON object's key.
        upload.write(escape(key));

        // All uploads are sent as an object containing the file's details.
        upload.write(':');
        upload.write('{');

        // Send the streams's name.
        upload.write(STREAM_NAME);
        upload.write(':');
        upload.write(escape(entry.name));
        upload.write(',');

        // Send the streams's content type.
        upload.write(STREAM_TYPE);
        upload.write(':');
        upload.write(escape(entry.contentType));
        upload.write(',');

        // Prepare the file content's key.
        upload.write(STREAM_CONTENTS);
        upload.write(':');
        upload.write('"');

        // Upload the file's contents in Base64.
        Base64OutputStream outputStream = new Base64OutputStream(upload, Base64.NO_CLOSE | Base64.NO_WRAP);

        // Read from input stream until no more data's left to read.
        int bytesRead;
        while ((bytesRead = entry.inputStream.read(buffer)) != -1) {
            outputStream.write(buffer, 0, bytesRead);
        }

        // Close the Base64 output stream.
        outputStream.close();

        // End the file's object and prepare for next one.
        upload.write('"');
        upload.write('}');
        upload.write(',');
    }

    // Include the elapsed time taken to upload everything.
    // This might be useful for somebody, but it serves us well since
    // there will almost always be a ',' as the last sent character.
    upload.write(STREAM_ELAPSED);
    upload.write(':');
    long elapsedTime = System.currentTimeMillis() - now;
    upload.write((elapsedTime + "}").getBytes());

    // Flush the contents up the stream.
    upload.flush();
    upload.close();
}

From source file:com.aoeng.degu.utils.net.asyncthhpclient.JsonStreamerEntity.java

@Override
public void writeTo(final OutputStream outstream) throws IOException {
    if (outstream == null) {
        throw new IllegalStateException("Output stream cannot be null.");
    }//  ww w. j  a  va  2 s  .  c o m

    // Record the time when uploading started.
    long now = System.currentTimeMillis();

    // Keys used by the HashMaps.
    Set<String> keys;

    // Use GZIP compression when sending streams, otherwise just use
    // a buffered output stream to speed things up a bit.
    OutputStream upload;
    if (null != contentEncoding) {
        upload = new GZIPOutputStream(new BufferedOutputStream(outstream), BUFFER_SIZE);
    } else {
        upload = new BufferedOutputStream(outstream);
    }

    // Always send a JSON object.
    upload.write('{');

    // Send the K/V values.
    keys = kvParams.keySet();
    for (String key : keys) {
        // Write the JSON object's key.
        upload.write(escape(key));
        upload.write(':');

        // Evaluate the value (which cannot be null).
        Object value = kvParams.get(key);

        if (value instanceof Boolean) {
            upload.write((Boolean) value ? JSON_TRUE : JSON_FALSE);
        } else if (value instanceof Long) {
            upload.write((((Number) value).longValue() + "").getBytes());
        } else if (value instanceof Double) {
            upload.write((((Number) value).doubleValue() + "").getBytes());
        } else if (value instanceof Float) {
            upload.write((((Number) value).floatValue() + "").getBytes());
        } else if (value instanceof Integer) {
            upload.write((((Number) value).intValue() + "").getBytes());
        } else {
            upload.write(value.toString().getBytes());
        }

        upload.write(',');
    }

    // Buffer used for reading from input streams.
    byte[] buffer = new byte[BUFFER_SIZE];

    // Send the stream params.
    keys = streamParams.keySet();
    for (String key : keys) {
        RequestParams.StreamWrapper entry = streamParams.get(key);

        // Write the JSON object's key.
        upload.write(escape(key));

        // All uploads are sent as an object containing the file's details.
        upload.write(':');
        upload.write('{');

        // Send the streams's name.
        upload.write(STREAM_NAME);
        upload.write(':');
        upload.write(escape(entry.name));
        upload.write(',');

        // Send the streams's content type.
        upload.write(STREAM_TYPE);
        upload.write(':');
        upload.write(escape(entry.contentType));
        upload.write(',');

        // Prepare the file content's key.
        upload.write(STREAM_CONTENTS);
        upload.write(':');
        upload.write('"');

        // Upload the file's contents in Base64.
        Base64OutputStream outputStream = new Base64OutputStream(upload, Base64.NO_CLOSE | Base64.NO_WRAP);

        // Read from input stream until no more data's left to read.
        int bytesRead;
        while ((bytesRead = entry.inputStream.read(buffer)) != -1) {
            outputStream.write(buffer, 0, bytesRead);
        }

        // Close the Base64 output stream.
        outputStream.close();

        // End the file's object and prepare for next one.
        upload.write('"');
        upload.write('}');
        upload.write(',');
    }

    // Include the elapsed time taken to upload everything.
    // This might be useful for somebody, but it serves us well since
    // there will almost always be a ',' as the last sent character.
    upload.write(STREAM_ELAPSED);
    upload.write(':');
    long elapsedTime = System.currentTimeMillis() - now;
    upload.write((elapsedTime + "}").getBytes());

    Log.i(LOG_TAG, "Uploaded JSON in " + Math.floor(elapsedTime / 1000) + " seconds");

    // Flush the contents up the stream.
    upload.flush();
    upload.close();
}

From source file:io.ecarf.core.compress.NxGzipProcessor.java

/**
 * Get inflated output stream form the provided output stream
 * @param output//from   w  w  w.j ava 2 s . c  om
 * @return
 * @throws IOException
 */
private OutputStream getInflatedOutputStream(OutputStream output) throws IOException {
    OutputStream inflated = output;

    // gzip
    if (GzipUtils.isCompressedFilename(this.inputFile)) {
        inflated = new GZIPOutputStream(output, Constants.GZIP_BUF_SIZE);

    }
    // bz2
    else if (BZip2Utils.isCompressedFilename(this.inputFile)) {
        inflated = new BZip2CompressorOutputStream(new BufferedOutputStream(output));
    }

    return inflated;
}

From source file:io.ecarf.core.cloud.task.processor.reason.phase0.DoReasonTask3.java

@Override
public void run() throws IOException {

    GoogleCloudService cloud = (GoogleCloudService) this.getCloudService();

    //String table = metadata.getValue(EcarfMetaData.ECARF_TABLE);
    //Set<String> terms = metadata.getTerms();
    //String schemaFile = metadata.getValue(EcarfMetaData.ECARF_SCHEMA);
    //String bucket = metadata.getBucket();
    Set<String> termsSet;

    if (terms == null) {
        // too large, probably saved as a file
        //String termsFile = metadata.getValue(EcarfMetaData.ECARF_TERMS_FILE);
        log.info("Using json file for terms: " + termsFile);
        Validate.notNull(termsFile);/* w  w  w  .j av a2s .  c o m*/

        String localTermsFile = Utils.TEMP_FOLDER + termsFile;
        cloud.downloadObjectFromCloudStorage(termsFile, localTermsFile, bucket);

        // convert from JSON
        termsSet = FileUtils.jsonFileToSet(localTermsFile);

    } else {
        termsSet = ObjectUtils.csvToSet(terms);
    }

    String localSchemaFile = Utils.TEMP_FOLDER + schemaFile;
    // download the file from the cloud storage
    cloud.downloadObjectFromCloudStorage(schemaFile, localSchemaFile, bucket);

    // uncompress if compressed
    if (GzipUtils.isCompressedFilename(schemaFile)) {
        localSchemaFile = GzipUtils.getUncompressedFilename(localSchemaFile);
    }

    Map<String, Set<Triple>> allSchemaTriples = TripleUtils.getRelevantSchemaNTriples(localSchemaFile,
            TermUtils.RDFS_TBOX);

    // get all the triples we care about
    Map<Term, Set<Triple>> schemaTerms = new HashMap<>();

    for (String term : termsSet) {
        if (allSchemaTriples.containsKey(term)) {
            schemaTerms.put(new Term(term), allSchemaTriples.get(term));
        }
    }

    String decoratedTable = table;
    int emptyRetries = 0;
    int totalInferredTriples = 0;
    int maxRetries = Config.getIntegerProperty(Constants.REASON_RETRY_KEY, 6);

    // timestamp loop
    do {

        //List<String> inferredFiles = new ArrayList<>();

        // First of all run all the queries asynchronously and remember the jobId and filename for each term
        for (Entry<Term, Set<Triple>> entry : schemaTerms.entrySet()) {

            Term term = entry.getKey();

            // add table decoration to table name
            String query = GenericRule.getQuery(entry.getValue(), decoratedTable);

            log.info("\nQuery: " + query);

            String jobId = cloud.startBigDataQuery(query);
            String encodedTerm = FileUtils.encodeFilename(term.getTerm());
            String filename = Utils.TEMP_FOLDER + encodedTerm + Constants.DOT_TERMS;

            // remember the filename and the jobId for this query
            term.setFilename(filename).setJobId(jobId).setEncodedTerm(encodedTerm);

        }

        long start = System.currentTimeMillis();

        String inferredTriplesFile = Utils.TEMP_FOLDER + start + Constants.DOT_INF;

        List<String> productiveTerms = new ArrayList<>();

        int interimInferredTriples = 0;

        try (PrintWriter writer = new PrintWriter(
                new GZIPOutputStream(new FileOutputStream(inferredTriplesFile), Constants.GZIP_BUF_SIZE))) {

            // now loop through the queries
            for (Entry<Term, Set<Triple>> entry : schemaTerms.entrySet()) {

                Term term = entry.getKey();
                log.info("Reasoning for Term: " + term);

                Set<Triple> schemaTriples = entry.getValue();
                log.info("Schema Triples: " + Joiner.on('\n').join(schemaTriples));

                List<String> select = GenericRule.getSelect(schemaTriples);

                // block and wait for each job to complete then save results to a file
                BigInteger rows = BigInteger.ZERO;

                try {
                    rows = cloud.saveBigQueryResultsToFile(term.getJobId(), term.getFilename()).getTotalRows();

                } catch (IOException ioe) {
                    // transient backend errors
                    log.warn("failed to save query results to file, jobId: " + term.getJobId());
                }

                log.info("Query found " + rows + ", rows");

                // only process if triples are found matching this term
                if (!BigInteger.ZERO.equals(rows)) {

                    int inferredTriplesCount = this.inferAndSaveTriplesToFile(term, select, schemaTriples, rows,
                            decoratedTable, writer);

                    productiveTerms.add(term.getTerm());

                    interimInferredTriples += inferredTriplesCount;

                }
            }
        }

        totalInferredTriples += interimInferredTriples;

        if (interimInferredTriples > 0) {

            //TODO stream smaller numbers of inferred triples
            //TODO try uploading from cloud storage
            int streamingThreshold = Config.getIntegerProperty("ecarf.io.reasoning.streaming.threshold",
                    100000);

            log.info("Inserting " + interimInferredTriples + ", inferred triples into Big Data table for "
                    + productiveTerms.size() + " productive terms. Filename: " + inferredTriplesFile);

            if (interimInferredTriples <= streamingThreshold) {
                // stream the data

                Set<Triple> inferredTriples = TripleUtils.loadCompressedCSVTriples(inferredTriplesFile, false);
                log.info("Total triples to stream into Big Data: " + inferredTriples.size());
                cloud.streamObjectsIntoBigData(inferredTriples, TableUtils.getBigQueryTripleTable(table));

                log.info("All inferred triples are streamed into Big Data table");

            } else {

                // directly upload the data
                List<String> jobIds = cloud.loadLocalFilesIntoBigData(Lists.newArrayList(inferredTriplesFile),
                        TableUtils.getBigQueryTripleTable(table), false);
                log.info("All inferred triples are directly loaded into Big Data table, completed jobIds: "
                        + jobIds);
            }

            // reset empty retries
            emptyRetries = 0;

        } else {
            log.info("No new inferred triples");
            // increment empty retries
            emptyRetries++;
        }

        log.info("Total inferred triples so far = " + totalInferredTriples + ", current retry count: "
                + emptyRetries);

        ApiUtils.block(Config.getIntegerProperty(Constants.REASON_SLEEP_KEY, 20));

        // FIXME move into the particular cloud implementation service
        long elapsed = System.currentTimeMillis() - start;
        decoratedTable = "[" + table + "@-" + elapsed + "-]";

        log.info("Using table decorator: " + decoratedTable + ". Empty retries count: " + emptyRetries);

    } while (!(emptyRetries == maxRetries)); // end timestamp loop

    log.info("Finished reasoning, total inferred triples = " + totalInferredTriples);
}

From source file:io.ecarf.core.cloud.task.processor.reason.phase0.DoReasonTask4.java

@Override
public void run() throws IOException {

    GoogleCloudService cloud = (GoogleCloudService) this.getCloudService();

    //String table = metadata.getValue(EcarfMetaData.ECARF_TABLE);
    //Set<String> terms = metadata.getTerms();
    //String schemaFile = metadata.getValue(EcarfMetaData.ECARF_SCHEMA);
    //String bucket = metadata.getBucket();
    Set<String> termsSet;

    if (terms == null) {
        // too large, probably saved as a file
        //String termsFile = metadata.getValue(EcarfMetaData.ECARF_TERMS_FILE);
        log.info("Using json file for terms: " + termsFile);
        Validate.notNull(termsFile);/* w ww  .  ja  v a2s. com*/

        String localTermsFile = Utils.TEMP_FOLDER + termsFile;
        cloud.downloadObjectFromCloudStorage(termsFile, localTermsFile, bucket);

        // convert from JSON
        termsSet = FileUtils.jsonFileToSet(localTermsFile);

    } else {
        termsSet = ObjectUtils.csvToSet(terms);
    }

    String localSchemaFile = Utils.TEMP_FOLDER + schemaFile;
    // download the file from the cloud storage
    cloud.downloadObjectFromCloudStorage(schemaFile, localSchemaFile, bucket);

    // uncompress if compressed
    if (GzipUtils.isCompressedFilename(schemaFile)) {
        localSchemaFile = GzipUtils.getUncompressedFilename(localSchemaFile);
    }

    Map<String, Set<Triple>> allSchemaTriples = TripleUtils.getRelevantSchemaNTriples(localSchemaFile,
            TermUtils.RDFS_TBOX);

    // get all the triples we care about
    Map<Term, Set<Triple>> schemaTerms = new HashMap<>();

    for (String term : termsSet) {
        if (allSchemaTriples.containsKey(term)) {
            schemaTerms.put(new Term(term), allSchemaTriples.get(term));
        }
    }

    String decoratedTable = table;
    int emptyRetries = 0;
    int totalInferredTriples = 0;
    int maxRetries = Config.getIntegerProperty(Constants.REASON_RETRY_KEY, 6);
    String instanceId = cloud.getInstanceId();

    // timestamp loop
    do {

        //List<String> inferredFiles = new ArrayList<>();

        // First of all run all the queries asynchronously and remember the jobId and filename for each term
        for (Entry<Term, Set<Triple>> entry : schemaTerms.entrySet()) {

            Term term = entry.getKey();

            // add table decoration to table name
            String query = GenericRule.getQuery(entry.getValue(), decoratedTable);

            log.info("\nQuery: " + query);

            String jobId = cloud.startBigDataQuery(query);
            String encodedTerm = FileUtils.encodeFilename(term.getTerm());
            String filename = Utils.TEMP_FOLDER + encodedTerm + Constants.DOT_TERMS;

            // remember the filename and the jobId for this query
            term.setFilename(filename).setJobId(jobId).setEncodedTerm(encodedTerm);

        }

        long start = System.currentTimeMillis();

        String inferredTriplesFile = Utils.TEMP_FOLDER + instanceId + '_' + start + Constants.DOT_INF;

        List<String> productiveTerms = new ArrayList<>();

        int interimInferredTriples = 0;

        try (PrintWriter writer = new PrintWriter(
                new GZIPOutputStream(new FileOutputStream(inferredTriplesFile), Constants.GZIP_BUF_SIZE))) {

            // now loop through the queries
            for (Entry<Term, Set<Triple>> entry : schemaTerms.entrySet()) {

                Term term = entry.getKey();
                log.info("Reasoning for Term: " + term);

                Set<Triple> schemaTriples = entry.getValue();
                log.info("Schema Triples: " + Joiner.on('\n').join(schemaTriples));

                List<String> select = GenericRule.getSelect(schemaTriples);

                // block and wait for each job to complete then save results to a file
                BigInteger rows = BigInteger.ZERO;

                try {
                    rows = cloud.saveBigQueryResultsToFile(term.getJobId(), term.getFilename()).getTotalRows();

                } catch (IOException ioe) {
                    // transient backend errors
                    log.warn("failed to save query results to file, jobId: " + term.getJobId());
                }

                log.info("Query found " + rows + ", rows");

                // only process if triples are found matching this term
                if (!BigInteger.ZERO.equals(rows)) {

                    int inferredTriplesCount = this.inferAndSaveTriplesToFile(term, select, schemaTriples, rows,
                            decoratedTable, writer);

                    productiveTerms.add(term.getTerm());

                    interimInferredTriples += inferredTriplesCount;

                    this.totalRows = this.totalRows.add(rows);

                }
            }
        }

        totalInferredTriples += interimInferredTriples;

        if (interimInferredTriples > 0) {

            //TODO stream smaller numbers of inferred triples
            //TODO try uploading from cloud storage
            int streamingThreshold = Config.getIntegerProperty("ecarf.io.reasoning.streaming.threshold",
                    100000);

            log.info("Inserting " + interimInferredTriples + ", inferred triples into Big Data table for "
                    + productiveTerms.size() + " productive terms. Filename: " + inferredTriplesFile);

            if (interimInferredTriples <= streamingThreshold) {
                // stream the data

                Set<Triple> inferredTriples = TripleUtils.loadCompressedCSVTriples(inferredTriplesFile, false);
                log.info("Total triples to stream into Big Data: " + inferredTriples.size());
                cloud.streamObjectsIntoBigData(inferredTriples, TableUtils.getBigQueryTripleTable(table));

                log.info("All inferred triples are streamed into Big Data table");

            } else {

                // load the data through cloud storage
                // upload the file to cloud storage
                log.info("Uploading inferred triples file into cloud storage: " + inferredTriplesFile);
                StorageObject file = cloud.uploadFileToCloudStorage(inferredTriplesFile, bucket);
                log.info("File " + file + ", uploaded successfully. Now loading it into big data.");

                String jobId = cloud.loadCloudStorageFilesIntoBigData(Lists.newArrayList(file.getUri()),
                        TableUtils.getBigQueryTripleTable(table), false);
                log.info(
                        "All inferred triples are loaded into Big Data table through cloud storage, completed jobId: "
                                + jobId);

            }

            // reset empty retries
            emptyRetries = 0;

        } else {
            log.info("No new inferred triples");
            // increment empty retries
            emptyRetries++;
        }

        log.info("Total inferred triples so far = " + totalInferredTriples + ", current retry count: "
                + emptyRetries);

        ApiUtils.block(Config.getIntegerProperty(Constants.REASON_SLEEP_KEY, 20));

        // FIXME move into the particular cloud implementation service
        long elapsed = System.currentTimeMillis() - start;
        decoratedTable = "[" + table + "@-" + elapsed + "-]";

        log.info("Using table decorator: " + decoratedTable + ". Empty retries count: " + emptyRetries);

    } while (!(emptyRetries == maxRetries)); // end timestamp loop

    log.info("Finished reasoning, total inferred triples = " + totalInferredTriples);
    log.info("Number of avoided duplicate terms = " + this.duplicates);
    log.info("Total rows retrieved from big data = " + this.totalRows);
}

From source file:io.ecarf.core.cloud.task.processor.reason.phase1.DoReasonTask5.java

@Override
public void run() throws IOException {

    GoogleCloudService cloud = (GoogleCloudService) this.getCloudService();

    //String table = metadata.getValue(EcarfMetaData.ECARF_TABLE);
    //Set<String> terms = metadata.getTerms();
    //String schemaFile = metadata.getValue(EcarfMetaData.ECARF_SCHEMA);
    //String bucket = metadata.getBucket();
    Stopwatch stopwatch1 = Stopwatch.createUnstarted();
    Stopwatch stopwatch2 = Stopwatch.createUnstarted();
    Set<String> termsSet;

    if (terms == null) {
        // too large, probably saved as a file
        //String termsFile = metadata.getValue(EcarfMetaData.ECARF_TERMS_FILE);
        log.info("Using json file for terms: " + termsFile);
        Validate.notNull(termsFile);/*from   w w w .ja v  a  2  s  .  c  om*/

        String localTermsFile = Utils.TEMP_FOLDER + termsFile;
        cloud.downloadObjectFromCloudStorage(termsFile, localTermsFile, bucket);

        // convert from JSON
        termsSet = io.cloudex.framework.utils.FileUtils.jsonFileToSet(localTermsFile);

    } else {
        termsSet = ObjectUtils.csvToSet(terms);
    }

    String localSchemaFile = Utils.TEMP_FOLDER + schemaFile;
    // download the file from the cloud storage
    cloud.downloadObjectFromCloudStorage(schemaFile, localSchemaFile, bucket);

    // uncompress if compressed
    if (GzipUtils.isCompressedFilename(schemaFile)) {
        localSchemaFile = GzipUtils.getUncompressedFilename(localSchemaFile);
    }

    Map<String, Set<Triple>> allSchemaTriples = TripleUtils.getRelevantSchemaNTriples(localSchemaFile,
            TermUtils.RDFS_TBOX);

    // get all the triples we care about
    Map<Term, Set<Triple>> schemaTerms = new HashMap<>();

    for (String term : termsSet) {
        if (allSchemaTriples.containsKey(term)) {
            schemaTerms.put(new Term(term), allSchemaTriples.get(term));
        }
    }

    String decoratedTable = table;
    int emptyRetries = 0;
    int totalInferredTriples = 0;
    int maxRetries = Config.getIntegerProperty(Constants.REASON_RETRY_KEY, 6);
    String instanceId = cloud.getInstanceId();

    // timestamp loop
    do {

        List<String> productiveTerms = new ArrayList<>();
        int interimInferredTriples = 0;

        // First of all run all the queries asynchronously and remember the jobId and filename for each term
        List<Callable<Void>> queryTasks = new ArrayList<>();
        List<Callable<Void>> saveTasks = new ArrayList<>();

        for (Entry<Term, Set<Triple>> entry : schemaTerms.entrySet()) {

            Term term = entry.getKey();
            Set<Triple> triples = entry.getValue();

            QuerySubTask queryTask = new QuerySubTask(term, triples, decoratedTable, cloud);
            queryTasks.add(queryTask);

            SaveResultsSubTask saveTask = new SaveResultsSubTask(term, cloud);
            saveTasks.add(saveTask);
        }

        // invoke all the queries in parallel
        this.invokeAll(queryTasks);

        long start = System.currentTimeMillis();

        String inferredTriplesFile = Utils.TEMP_FOLDER + instanceId + '_' + start + Constants.DOT_INF;

        // save all the query results in files in parallel
        this.invokeAll(saveTasks);

        try (PrintWriter writer = new PrintWriter(
                new GZIPOutputStream(new FileOutputStream(inferredTriplesFile), Constants.GZIP_BUF_SIZE))) {

            // now loop through the queries
            for (Entry<Term, Set<Triple>> entry : schemaTerms.entrySet()) {

                Term term = entry.getKey();

                BigInteger rows = term.getRows();

                this.totalBytes = this.totalBytes + term.getBytes();

                // only process if triples are found matching this term
                if (!BigInteger.ZERO.equals(rows)) {

                    stopwatch1.start();

                    log.info("Reasoning for Term: " + term);

                    Set<Triple> schemaTriples = entry.getValue();
                    log.info("Schema Triples: " + Joiner.on('\n').join(schemaTriples));

                    List<String> select = GenericRule.getSelect(schemaTriples);

                    int inferredTriplesCount = this.inferAndSaveTriplesToFile(term, select, schemaTriples, rows,
                            decoratedTable, writer);

                    productiveTerms.add(term.getTerm());

                    interimInferredTriples += inferredTriplesCount;

                    this.totalRows = this.totalRows.add(rows);

                    stopwatch1.stop();

                } else {
                    log.info("Skipping term as no data found: " + term);
                }
            }
        }

        totalInferredTriples += interimInferredTriples;

        if (interimInferredTriples > 0) {

            // stream smaller numbers of inferred triples
            // try uploading from cloud storage
            int streamingThreshold = Config.getIntegerProperty("ecarf.io.reasoning.streaming.threshold",
                    100000);

            log.info("Inserting " + interimInferredTriples + ", inferred triples into Big Data table for "
                    + productiveTerms.size() + " productive terms. Filename: " + inferredTriplesFile);

            if (interimInferredTriples <= streamingThreshold) {
                // stream the data

                Set<Triple> inferredTriples = TripleUtils.loadCompressedCSVTriples(inferredTriplesFile, false);
                log.info("Total triples to stream into Big Data: " + inferredTriples.size());
                cloud.streamObjectsIntoBigData(inferredTriples, TableUtils.getBigQueryTripleTable(table));

                log.info("All inferred triples are streamed into Big Data table");

            } else {

                // load the data through cloud storage
                // upload the file to cloud storage
                log.info("Uploading inferred triples file into cloud storage: " + inferredTriplesFile);
                StorageObject file = cloud.uploadFileToCloudStorage(inferredTriplesFile, bucket);
                log.info("File " + file + ", uploaded successfully. Now loading it into big data.");

                String jobId = cloud.loadCloudStorageFilesIntoBigData(Lists.newArrayList(file.getUri()),
                        TableUtils.getBigQueryTripleTable(table), false);
                log.info(
                        "All inferred triples are loaded into Big Data table through cloud storage, completed jobId: "
                                + jobId);

            }

            // reset empty retries
            emptyRetries = 0;

            stopwatch2.reset();

        } else {
            log.info("No new inferred triples");
            // increment empty retries
            emptyRetries++;

            if (!stopwatch2.isRunning()) {
                stopwatch2.start();
            }
        }

        log.info("Total inferred triples so far = " + totalInferredTriples + ", current retry count: "
                + emptyRetries);

        if (emptyRetries < maxRetries) {
            ApiUtils.block(Config.getIntegerProperty(Constants.REASON_SLEEP_KEY, 20));

            // FIXME move into the particular cloud implementation service
            long elapsed = System.currentTimeMillis() - start;
            decoratedTable = "[" + table + "@-" + elapsed + "-]";

            log.info("Using table decorator: " + decoratedTable + ". Empty retries count: " + emptyRetries);
        }

    } while (emptyRetries < maxRetries); // end timestamp loop

    executor.shutdown();
    log.info("Finished reasoning, total inferred triples = " + totalInferredTriples);
    log.info("Number of avoided duplicate terms = " + this.duplicates);
    log.info("Total rows retrieved from big data = " + this.totalRows);
    log.info("Total processed GBytes = " + ((double) this.totalBytes / FileUtils.ONE_GB));
    log.info("Total process reasoning time (serialization in inf file) = " + stopwatch1);
    log.info("Total time spent in empty inference cycles = " + stopwatch2);
}

From source file:com.thoughtworks.go.server.websocket.ConsoleLogSender.java

byte[] maybeGzipIfLargeEnough(byte[] input) {
    if (input.length < 512) {
        return input;
    }/* w w  w.j av a 2s.c  om*/
    // To avoid having to re-allocate the internal byte array, allocate an initial buffer assuming a safe 10:1 compression ratio
    final ByteArrayOutputStream gzipBytes = new ByteArrayOutputStream(input.length / 10);
    try {
        final GZIPOutputStream gzipOutputStream = new GZIPOutputStream(gzipBytes, 1024 * 8);
        gzipOutputStream.write(input);
        gzipOutputStream.close();
    } catch (IOException e) {
        LOGGER.error("Could not gzip {}", input);
    }
    return gzipBytes.toByteArray();
}