Example usage for org.apache.hadoop.io.compress SnappyCodec isNativeCodeLoaded

List of usage examples for org.apache.hadoop.io.compress SnappyCodec isNativeCodeLoaded

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress SnappyCodec isNativeCodeLoaded.

Prototype

public static boolean isNativeCodeLoaded() 

Source Link

Usage

From source file:com.cloudera.cdk.data.filesystem.ParquetFileSystemDatasetWriter.java

License:Apache License

@Override
public void open() {
    Preconditions.checkState(state.equals(ReaderWriterState.NEW), "Unable to open a writer from state:%s",
            state);/*from ww w  .j  a  va2s .c  o m*/

    logger.debug("Opening data file with pathTmp:{} (final path will be path:{})", pathTmp, path);

    try {
        CompressionCodecName codecName = CompressionCodecName.UNCOMPRESSED;
        if (enableCompression) {
            if (SnappyCodec.isNativeCodeLoaded()) {
                codecName = CompressionCodecName.SNAPPY;
            } else {
                logger.warn("Compression enabled, but Snappy native code not loaded. "
                        + "Parquet file will not be compressed.");
            }
        }
        avroParquetWriter = new AvroParquetWriter<E>(fileSystem.makeQualified(pathTmp), schema, codecName,
                DEFAULT_BLOCK_SIZE, ParquetWriter.DEFAULT_PAGE_SIZE);
    } catch (IOException e) {
        throw new DatasetWriterException("Unable to create writer to path:" + pathTmp, e);
    }

    state = ReaderWriterState.OPEN;
}

From source file:com.msd.gin.halyard.tools.HalyardBulkLoad.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        System.err.println("Usage: bulkload [-D" + MRJobConfig.QUEUE_NAME + "=proofofconcepts] [-D"
                + SKIP_INVALID_PROPERTY + "=true] [-D" + SPLIT_BITS_PROPERTY + "=8] [-D"
                + DEFAULT_CONTEXT_PROPERTY + "=http://new_context] [-D" + OVERRIDE_CONTEXT_PROPERTY
                + "=true] <input_path(s)> <output_path> <table_name>");
        return -1;
    }//from www  .j a v  a2s . c  om
    TableMapReduceUtil.addDependencyJars(getConf(), NTriplesUtil.class, Rio.class, AbstractRDFHandler.class,
            RDFFormat.class, RDFParser.class);
    HBaseConfiguration.addHbaseResources(getConf());
    if (SnappyCodec.isNativeCodeLoaded()) {
        getConf().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
        getConf().setClass(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, SnappyCodec.class, CompressionCodec.class);
    }
    getConf().setDouble(MRJobConfig.COMPLETED_MAPS_FOR_REDUCE_SLOWSTART, 1.0);
    getConf().setLong(MRJobConfig.TASK_TIMEOUT, 3600000l);
    getConf().setInt(MRJobConfig.IO_SORT_FACTOR, 100);
    getConf().setInt(MRJobConfig.IO_SORT_MB, 1000);
    getConf().setInt(FileInputFormat.SPLIT_MAXSIZE, 1000000000);
    getConf().setInt(LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY, 2048);
    Job job = Job.getInstance(getConf(), "HalyardBulkLoad -> " + args[1] + " -> " + args[2]);
    job.setJarByClass(HalyardBulkLoad.class);
    job.setMapperClass(RDFMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);
    job.setInputFormatClass(RioFileInputFormat.class);
    job.setSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);
    Map<String, Integer> contextSplitsMap = new HashMap<>();
    for (Map.Entry<String, String> me : getConf().getValByRegex(CONTEXT_SPLIT_REGEXP).entrySet()) {
        int splits = Integer.parseInt(me.getKey().substring(me.getKey().lastIndexOf('.') + 1));
        StringTokenizer stk = new StringTokenizer(me.getValue(), ",");
        while (stk.hasMoreTokens()) {
            contextSplitsMap.put(stk.nextToken(), splits);
        }
    }
    try (HTable hTable = HalyardTableUtils.getTable(getConf(), args[2], true,
            getConf().getInt(SPLIT_BITS_PROPERTY, 3), contextSplitsMap)) {
        HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(),
                hTable.getRegionLocator());
        FileInputFormat.setInputDirRecursive(job, true);
        FileInputFormat.setInputPaths(job, args[0]);
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        TableMapReduceUtil.addDependencyJars(job);
        TableMapReduceUtil.initCredentials(job);
        if (job.waitForCompletion(true)) {
            new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(args[1]), hTable);
            LOG.info("Bulk Load Completed..");
            return 0;
        }
    }
    return -1;
}

From source file:com.msd.gin.halyard.tools.HalyardBulkUpdate.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        System.err.println("Usage: bulkupdate [-D" + MRJobConfig.QUEUE_NAME + "=proofofconcepts] [-D"
                + DEFAULT_CONTEXT_PROPERTY + "=http://new_context] [-D" + OVERRIDE_CONTEXT_PROPERTY
                + "=true] <input_file_with_SPARQL_queries> <output_path> <table_name>");
        return -1;
    }//from  www . ja  v  a2  s.c  om
    TableMapReduceUtil.addDependencyJars(getConf(), NTriplesUtil.class, Rio.class, RDFFormat.class,
            RDFParser.class);
    HBaseConfiguration.addHbaseResources(getConf());
    if (SnappyCodec.isNativeCodeLoaded()) {
        getConf().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
        getConf().setClass(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, SnappyCodec.class, CompressionCodec.class);
    }
    getConf().setDouble(MRJobConfig.COMPLETED_MAPS_FOR_REDUCE_SLOWSTART, 1.0);
    getConf().setLong(MRJobConfig.TASK_TIMEOUT, 3600000l);
    getConf().setInt(MRJobConfig.IO_SORT_FACTOR, 100);
    getConf().setInt(MRJobConfig.IO_SORT_MB, 1000);
    getConf().setInt(FileInputFormat.SPLIT_MAXSIZE, 1000000000);
    getConf().setInt(LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY, 2048);
    getConf().setStrings(TABLE_NAME_PROPERTY, args[2]);
    Job job = Job.getInstance(getConf(), "HalyardBulkUpdate -> " + args[1] + " -> " + args[2]);
    NLineInputFormat.setNumLinesPerSplit(job, 1);
    job.setJarByClass(HalyardBulkUpdate.class);
    job.setMapperClass(SPARQLMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);
    job.setInputFormatClass(NLineInputFormat.class);
    job.setSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);
    try (HTable hTable = HalyardTableUtils.getTable(getConf(), args[2], false, 0, null)) {
        HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(),
                hTable.getRegionLocator());
        FileInputFormat.setInputPaths(job, args[0]);
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        TableMapReduceUtil.addDependencyJars(job);
        TableMapReduceUtil.initCredentials(job);
        if (job.waitForCompletion(true)) {
            new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(args[1]), hTable);
            LOG.info("Bulk Update Completed..");
            return 0;
        }
    }
    return -1;
}

From source file:com.msd.gin.halyard.tools.HalyardHiveLoad.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        System.err.println("Usage: hiveload -D" + RDF_MIME_TYPE_PROPERTY + "='application/ld+json' [-D"
                + MRJobConfig.QUEUE_NAME + "=proofofconcepts] [-D" + HIVE_DATA_COLUMN_INDEX_PROPERTY + "=3] [-D"
                + BASE_URI_PROPERTY + "='http://my_base_uri/'] [-D" + HalyardBulkLoad.SPLIT_BITS_PROPERTY
                + "=8] [-D" + HalyardBulkLoad.DEFAULT_CONTEXT_PROPERTY + "=http://new_context] [-D"
                + HalyardBulkLoad.OVERRIDE_CONTEXT_PROPERTY
                + "=true] <hive_table_name> <output_path> <hbase_table_name>");
        return -1;
    }// ww w .  j  a  v a  2 s  .  c o  m
    TableMapReduceUtil.addDependencyJars(getConf(), NTriplesUtil.class, Rio.class, AbstractRDFHandler.class,
            RDFFormat.class, RDFParser.class);
    HBaseConfiguration.addHbaseResources(getConf());
    if (SnappyCodec.isNativeCodeLoaded()) {
        getConf().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
        getConf().setClass(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, SnappyCodec.class, CompressionCodec.class);
    }
    getConf().setDouble(MRJobConfig.COMPLETED_MAPS_FOR_REDUCE_SLOWSTART, 1.0);
    getConf().setLong(MRJobConfig.TASK_TIMEOUT, 3600000l);
    getConf().setInt(MRJobConfig.IO_SORT_FACTOR, 100);
    getConf().setInt(MRJobConfig.IO_SORT_MB, 1000);
    getConf().setInt(FileInputFormat.SPLIT_MAXSIZE, 1000000000);
    getConf().setInt(LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY, 2048);
    Job job = Job.getInstance(getConf(), "HalyardHiveLoad -> " + args[1] + " -> " + args[2]);
    int i = args[0].indexOf('.');
    HCatInputFormat.setInput(job, i > 0 ? args[0].substring(0, i) : null, args[0].substring(i + 1));
    job.setJarByClass(HalyardHiveLoad.class);
    job.setMapperClass(HiveMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);
    job.setInputFormatClass(HCatInputFormat.class);
    job.setSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);
    Map<String, Integer> contextSplitsMap = new HashMap<>();
    for (Map.Entry<String, String> me : getConf().getValByRegex(HalyardBulkLoad.CONTEXT_SPLIT_REGEXP)
            .entrySet()) {
        int splits = Integer.parseInt(me.getKey().substring(me.getKey().lastIndexOf('.') + 1));
        StringTokenizer stk = new StringTokenizer(me.getValue(), ",");
        while (stk.hasMoreTokens()) {
            contextSplitsMap.put(stk.nextToken(), splits);
        }
    }
    try (HTable hTable = HalyardTableUtils.getTable(getConf(), args[2], true,
            getConf().getInt(HalyardBulkLoad.SPLIT_BITS_PROPERTY, 3), contextSplitsMap)) {
        HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(),
                hTable.getRegionLocator());
        FileInputFormat.setInputDirRecursive(job, true);
        FileInputFormat.setInputPaths(job, args[0]);
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        TableMapReduceUtil.addDependencyJars(job);
        TableMapReduceUtil.initCredentials(job);
        if (job.waitForCompletion(true)) {
            new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(args[1]), hTable);
            LOG.info("Bulk Load Completed..");
            return 0;
        }
    }
    return -1;
}

From source file:org.apache.tajo.storage.TestCompressionStorages.java

License:Apache License

@Test
public void testSnappyCodecCompressionData() throws IOException {
    if (SnappyCodec.isNativeCodeLoaded()) {
        storageCompressionTest(storeType, SnappyCodec.class);
    }/* w  w w .j  av a2s  . com*/
}

From source file:org.icgc.dcc.release.core.util.JavaRDDs.java

License:Open Source License

@NonNull
public static <K, V> void saveAsSequenceFile(JavaPairRDD<K, V> rdd, Class<K> keyClass, Class<V> valueClass,
        String path, JobConf conf) {

    // Compress//from   ww  w . j a  v  a  2 s .c  o  m
    SequenceFileOutputFormat.setCompressOutput(conf, true);
    SequenceFileOutputFormat.setOutputCompressionType(conf, CompressionType.BLOCK);
    if (SnappyCodec.isNativeCodeLoaded()) {
        log.debug("Saving output with Snappy compression");
        SequenceFileOutputFormat.setOutputCompressorClass(conf, SnappyCodec.class);
    }

    rdd.saveAsHadoopFile(path, keyClass, valueClass, SequenceFileOutputFormat.class, conf);
}

From source file:org.pentaho.hadoop.shim.cdh4.SnappyShim.java

License:Apache License

/**
 * Tests whether hadoop-snappy (not to be confused with other java-based
 * snappy implementations such as jsnappy or snappy-java) plus the 
 * native snappy libraries are available.
 * /*from   w ww.  j  a v a2 s . c o m*/
 * @return true if hadoop-snappy is available on the classpath
 */
public boolean isHadoopSnappyAvailable() {
    ClassLoader cl = Thread.currentThread().getContextClassLoader();
    Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
    try {
        return SnappyCodec.isNativeCodeLoaded();
    } catch (Throwable t) {
        return false;
    } finally {
        Thread.currentThread().setContextClassLoader(cl);
    }
}

From source file:org.pentaho.hadoop.shim.mapr410.SnappyShim.java

License:Apache License

/**
 * Tests whether hadoop-snappy (not to be confused with other java-based snappy implementations such as jsnappy or
 * snappy-java) plus the native snappy libraries are available.
 *
 * @return true if hadoop-snappy is available on the classpath
 *//*from   w  w w.  java2s . c  o  m*/
@Override
public boolean isHadoopSnappyAvailable() {
    ClassLoader cl = Thread.currentThread().getContextClassLoader();
    Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
    try {
        return SnappyCodec.isNativeCodeLoaded();
    } catch (Throwable t) {
        return false;
    } finally {
        Thread.currentThread().setContextClassLoader(cl);
    }
}