List of usage examples for org.apache.hadoop.mapred JobConf getBoolean
public boolean getBoolean(String name, boolean defaultValue)
name
property as a boolean
. From source file:StorageEngineClient.ColumnStorageHiveOutputFormat.java
License:Open Source License
@Override public RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tbl, Progressable progress) throws IOException { boolean usenewformat = jc.getBoolean("fdf.newformat", false); IHead head = new IHead(usenewformat ? ConstVar.NewFormatFile : ConstVar.OldFormatFile); String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES); IFieldMap map = new IFieldMap(); ArrayList<TypeInfo> types; if (columnTypeProperty == null) { types = new ArrayList<TypeInfo>(); map.addFieldType(new IRecord.IFType(ConstVar.FieldType_Int, 0)); } else// w w w. j av a 2s . c o m types = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); String compress = tbl.getProperty(ConstVar.Compress); if (compress != null && compress.equalsIgnoreCase("true")) head.setCompress((byte) 1); int i = 0; for (TypeInfo type : types) { byte fdftype = 0; String name = type.getTypeName(); if (name.equals(Constants.TINYINT_TYPE_NAME)) fdftype = ConstVar.FieldType_Byte; else if (name.equals(Constants.SMALLINT_TYPE_NAME)) fdftype = ConstVar.FieldType_Short; else if (name.equals(Constants.INT_TYPE_NAME)) fdftype = ConstVar.FieldType_Int; else if (name.equals(Constants.BIGINT_TYPE_NAME)) fdftype = ConstVar.FieldType_Long; else if (name.equals(Constants.FLOAT_TYPE_NAME)) fdftype = ConstVar.FieldType_Float; else if (name.equals(Constants.DOUBLE_TYPE_NAME)) fdftype = ConstVar.FieldType_Double; else if (name.equals(Constants.STRING_TYPE_NAME)) fdftype = ConstVar.FieldType_String; map.addFieldType(new IRecord.IFType(fdftype, i++)); } head.setFieldMap(map); ArrayList<ArrayList<Integer>> columnprojects = null; String projectionString = jc.get(ConstVar.Projection); if (projectionString != null) { columnprojects = new ArrayList<ArrayList<Integer>>(); String[] projectionList = projectionString.split(ConstVar.RecordSplit); for (String str : projectionList) { ArrayList<Integer> cp = new ArrayList<Integer>(); String[] item = str.split(ConstVar.FieldSplit); for (String s : item) { cp.add(Integer.valueOf(s)); } columnprojects.add(cp); } } if (!jc.getBoolean(ConstVar.NeedPostfix, true)) { final Configuration conf = new Configuration(jc); final IFormatDataFile ifdf = new IFormatDataFile(conf); ifdf.create(finalOutPath.toString(), head); return new RecordWriter() { @Override public void write(Writable w) throws IOException { } @Override public void close(boolean abort) throws IOException { ifdf.close(); } }; } final IColumnDataFile icdf = new IColumnDataFile(jc); icdf.create(finalOutPath.toString(), head, columnprojects); LOG.info(finalOutPath.toString()); LOG.info("output file compress?\t" + compress); LOG.info("head:\t" + head.toStr()); return new RecordWriter() { @Override public void write(Writable w) throws IOException { icdf.addRecord((IRecord) w); } @Override public void close(boolean abort) throws IOException { icdf.close(); } }; }
From source file:StorageEngineClient.CombineFileInputFormat.java
License:Open Source License
private void processsplit(JobConf job, Map.Entry<String, List<OneBlockInfo>> one, HashMap<OneBlockInfo, String[]> blockToNodes, long maxSize, long minSizeNode, long minSizeRack, List<CombineFileSplit> splits, String type) { ArrayList<OneBlockInfo> validBlocks = new ArrayList<OneBlockInfo>(); ArrayList<String> nodes = new ArrayList<String>(); long curSplitSize = 0; if (type.equals("node")) nodes.add(one.getKey());/* w ww .j a v a 2s .c o m*/ List<OneBlockInfo> blocks = null; if (!type.equals("all")) { blocks = one.getValue(); } else { blocks = new ArrayList<OneBlockInfo>(); blocks.addAll(blockToNodes.keySet()); } OneBlockInfo[] blocksInNodeArr = blocks.toArray(new OneBlockInfo[blocks.size()]); if (job.getBoolean("hive.merge.inputfiles.sort", true)) { Arrays.sort(blocksInNodeArr, new Comparator<OneBlockInfo>() { @Override public int compare(OneBlockInfo o1, OneBlockInfo o2) { return (int) (o2.length - o1.length); } }); } if (job.getBoolean("hive.merge.inputfiles.rerange", false)) { Random r = new Random(123456); OneBlockInfo tmp = null; for (int i = 0; i < blocksInNodeArr.length; i++) { int idx = r.nextInt(blocksInNodeArr.length); tmp = blocksInNodeArr[i]; blocksInNodeArr[i] = blocksInNodeArr[idx]; blocksInNodeArr[idx] = tmp; } } int maxFileNumPerSplit = job.getInt("hive.merge.inputfiles.maxFileNumPerSplit", 1000); for (int i = 0; i < blocksInNodeArr.length; i++) { if (blockToNodes.containsKey(blocksInNodeArr[i])) { if (!type.equals("node")) { nodes.clear(); } curSplitSize = blocksInNodeArr[i].length; validBlocks.clear(); validBlocks.add(blocksInNodeArr[i]); blockToNodes.remove(blocksInNodeArr[i]); if (maxSize != 0 && curSplitSize >= maxSize) { addCreatedSplit(job, splits, nodes, validBlocks); } else { int filenum = 1; for (int j = i + 1; j < blocksInNodeArr.length; j++) { if (blockToNodes.containsKey(blocksInNodeArr[j])) { long size1 = blocksInNodeArr[j].length; if (maxSize != 0 && curSplitSize + size1 <= maxSize) { curSplitSize += size1; filenum++; validBlocks.add(blocksInNodeArr[j]); blockToNodes.remove(blocksInNodeArr[j]); if (!type.equals("node")) for (int k = 0; k < blocksInNodeArr[j].hosts.length; k++) { nodes.add(blocksInNodeArr[j].hosts[k]); } } if (filenum >= maxFileNumPerSplit) { break; } } } if (minSizeNode != 0 && curSplitSize >= minSizeNode) { addCreatedSplit(job, splits, nodes, validBlocks); } else { for (OneBlockInfo oneblock : validBlocks) { blockToNodes.put(oneblock, oneblock.hosts); } break; } } } } }
From source file:StorageEngineClient.CombineFileInputFormat.java
License:Open Source License
private void processsplitForUnsplit(JobConf job, Map.Entry<String, List<OneBlockInfo>> one, HashMap<OneBlockInfo, String[]> blockToNodes, long maxSize, long minSizeNode, long minSizeRack, List<CombineFileSplit> splits, String type) { ArrayList<OneBlockInfo> validBlocks = new ArrayList<OneBlockInfo>(); ArrayList<String> nodes = new ArrayList<String>(); long curSplitSize = 0; if (type.equals("node")) nodes.add(one.getKey());// w w w . ja v a 2s . c o m List<OneBlockInfo> blocks = null; if (!type.equals("all")) { blocks = one.getValue(); } else { blocks = new ArrayList<OneBlockInfo>(); blocks.addAll(blockToNodes.keySet()); } OneBlockInfo[] blocksInNodeArr = blocks.toArray(new OneBlockInfo[blocks.size()]); if (job.getBoolean("hive.merge.inputfiles.sort", true)) { Arrays.sort(blocksInNodeArr, new Comparator<OneBlockInfo>() { @Override public int compare(OneBlockInfo o1, OneBlockInfo o2) { long comparereuslt = o2.length - o1.length; int result = 0; if (comparereuslt > 0) result = 1; if (comparereuslt < 0) result = -1; return result; } }); } if (job.getBoolean("hive.merge.inputfiles.rerange", false)) { Random r = new Random(123456); OneBlockInfo tmp = null; for (int i = 0; i < blocksInNodeArr.length; i++) { int idx = r.nextInt(blocksInNodeArr.length); tmp = blocksInNodeArr[i]; blocksInNodeArr[i] = blocksInNodeArr[idx]; blocksInNodeArr[idx] = tmp; } } int maxFileNumPerSplit = job.getInt("hive.merge.inputfiles.maxFileNumPerSplit", 1000); for (int i = 0; i < blocksInNodeArr.length; i++) { if (blockToNodes.containsKey(blocksInNodeArr[i])) { if (!type.equals("node")) { nodes.clear(); } curSplitSize = blocksInNodeArr[i].length; validBlocks.clear(); validBlocks.add(blocksInNodeArr[i]); blockToNodes.remove(blocksInNodeArr[i]); if (maxSize != 0 && curSplitSize >= maxSize) { if (!type.equals("node")) { for (int k = 0; k < blocksInNodeArr[i].hosts.length; k++) { nodes.add(blocksInNodeArr[i].hosts[k]); } } addCreatedSplit(job, splits, nodes, validBlocks); } else { int filenum = 1; for (int j = i + 1; j < blocksInNodeArr.length; j++) { if (blockToNodes.containsKey(blocksInNodeArr[j])) { long size1 = blocksInNodeArr[j].length; if (maxSize != 0 && curSplitSize < maxSize) { curSplitSize += size1; filenum++; validBlocks.add(blocksInNodeArr[j]); blockToNodes.remove(blocksInNodeArr[j]); } if (filenum >= maxFileNumPerSplit) { break; } if (curSplitSize >= maxSize) { break; } } } if (minSizeNode != 0 && curSplitSize >= minSizeNode) { if (!type.equals("node")) { generateNodesInfo(validBlocks, nodes); } addCreatedSplit(job, splits, nodes, validBlocks); } else { for (OneBlockInfo oneblock : validBlocks) { blockToNodes.put(oneblock, oneblock.hosts); } break; } } } } HashSet<OneBlockInfo> hs = new HashSet<OneBlockInfo>(); while (blockToNodes.size() > 0) { validBlocks = new ArrayList<OneBlockInfo>(); nodes = new ArrayList<String>(); int filenum = 0; hs.clear(); for (OneBlockInfo blockInfo : blockToNodes.keySet()) { filenum++; validBlocks.add(blockInfo); hs.add(blockInfo); if (filenum >= maxFileNumPerSplit) { break; } } for (OneBlockInfo blockInfo : hs) { blockToNodes.remove(blockInfo); } generateNodesInfo(validBlocks, nodes); this.addCreatedSplit(job, splits, nodes, validBlocks); } }
From source file:StorageEngineClient.FormatStorageHiveOutputFormat.java
License:Open Source License
@Override public RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tbl, Progressable progress) throws IOException { boolean usenewformat = jc.getBoolean("fdf.newformat", false); IHead head = new IHead(usenewformat ? ConstVar.NewFormatFile : ConstVar.OldFormatFile); boolean flag = true; if (flag) {/*from w w w.j a va 2 s. c o m*/ String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES); ArrayList<TypeInfo> types; if (columnTypeProperty == null) types = new ArrayList<TypeInfo>(); else types = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); String compress = tbl.getProperty(ConstVar.Compress); String ifdfCompressionOn = jc.get(ConstVar.CompressionConfName); if ((ifdfCompressionOn != null && ifdfCompressionOn.equalsIgnoreCase("true")) || (compress != null && compress.equalsIgnoreCase("true"))) { head.setCompress((byte) 1); String strCompressionMethod = jc.get(ConstVar.CompressionMethodConfName); if (strCompressionMethod != null) { try { byte compressionMethod = Byte.valueOf(strCompressionMethod).byteValue(); head.setCompressStyle(compressionMethod); } catch (NumberFormatException e) { } } String compressionLevel = jc.get(ConstVar.ZlibCompressionLevelConfName); if (compressionLevel != null) { if (compressionLevel.equalsIgnoreCase("bestSpeed")) { jc.set("zlib.compress.level", ZlibCompressor.CompressionLevel.BEST_SPEED.toString()); } else if (compressionLevel.equalsIgnoreCase("bestCompression")) { jc.set("zlib.compress.level", ZlibCompressor.CompressionLevel.BEST_COMPRESSION.toString()); } } } IFieldMap map = new IFieldMap(); int i = 0; for (TypeInfo type : types) { byte fdftype = 0; String name = type.getTypeName(); if (name.equals(Constants.TINYINT_TYPE_NAME)) fdftype = ConstVar.FieldType_Byte; else if (name.equals(Constants.SMALLINT_TYPE_NAME)) fdftype = ConstVar.FieldType_Short; else if (name.equals(Constants.INT_TYPE_NAME)) fdftype = ConstVar.FieldType_Int; else if (name.equals(Constants.BIGINT_TYPE_NAME)) fdftype = ConstVar.FieldType_Long; else if (name.equals(Constants.FLOAT_TYPE_NAME)) fdftype = ConstVar.FieldType_Float; else if (name.equals(Constants.DOUBLE_TYPE_NAME)) fdftype = ConstVar.FieldType_Double; else if (name.equals(Constants.STRING_TYPE_NAME)) fdftype = ConstVar.FieldType_String; else if (name.equals(Constants.TIMESTAMP_TYPE_NAME)) fdftype = ConstVar.FieldType_String; map.addFieldType(new IRecord.IFType(fdftype, i++)); } head.setFieldMap(map); } final IFormatDataFile ifdf = new IFormatDataFile(jc); ifdf.create(finalOutPath.toString(), head); return new RecordWriter() { @Override public void write(Writable w) throws IOException { ifdf.addRecord((IRecord) w); } @Override public void close(boolean abort) throws IOException { ifdf.close(); } }; }
From source file:uk.bl.wa.hadoop.mapred.ByteBlockRecordReader.java
License:Open Source License
/** * //www .j a va2 s .c om * @param inputSplit * @param conf * @throws IOException */ public ByteBlockRecordReader(InputSplit inputSplit, JobConf conf) throws IOException { if (inputSplit instanceof FileSplit) { FileSplit fs = (FileSplit) inputSplit; path = fs.getPath(); FileSystem fSys = path.getFileSystem(conf); file_length = fSys.getContentSummary(path).getLength(); fsdis = fSys.open(path); // Support auto-decompression of compressed files: boolean autoDecompress = conf.getBoolean("mapreduce.unsplittableinputfileformat.autodecompress", false); if (autoDecompress) { log.warn("Enabling auto-decompression of this file."); compressionCodecs = new CompressionCodecFactory(conf); final CompressionCodec codec = compressionCodecs.getCodec(path); if (codec != null) { fsdis = codec.createInputStream(fsdis); } } else { log.info("Auto-decompression is not enabled."); } } else { log.error("Only FileSplit supported!"); throw new IOException("Need FileSplit input..."); } }
From source file:voldemort.store.readonly.disk.HadoopStoreWriterPerBucket.java
License:Apache License
@Override public void conf(JobConf job) { JobConf conf = job; try {/* w w w . j a va2 s . c om*/ this.cluster = new ClusterMapper().readCluster(new StringReader(conf.get("cluster.xml"))); List<StoreDefinition> storeDefs = new StoreDefinitionsMapper() .readStoreList(new StringReader(conf.get("stores.xml"))); if (storeDefs.size() != 1) throw new IllegalStateException("Expected to find only a single store, but found multiple!"); this.storeDef = storeDefs.get(0); this.numChunks = conf.getInt("num.chunks", -1); if (this.numChunks < 1) throw new VoldemortException("num.chunks not specified in the job conf."); this.saveKeys = conf.getBoolean("save.keys", false); this.reducerPerBucket = conf.getBoolean("reducer.per.bucket", false); this.conf = job; this.outputDir = job.get("final.output.dir"); this.taskId = job.get("mapred.task.id"); this.checkSumType = CheckSum.fromString(job.get("checksum.type")); this.checkSumDigestIndex = new CheckSum[getNumChunks()]; this.checkSumDigestValue = new CheckSum[getNumChunks()]; this.position = new int[getNumChunks()]; this.taskIndexFileName = new Path[getNumChunks()]; this.taskValueFileName = new Path[getNumChunks()]; this.indexFileStream = new DataOutputStream[getNumChunks()]; this.valueFileStream = new DataOutputStream[getNumChunks()]; for (int chunkId = 0; chunkId < getNumChunks(); chunkId++) { this.checkSumDigestIndex[chunkId] = CheckSum.getInstance(checkSumType); this.checkSumDigestValue[chunkId] = CheckSum.getInstance(checkSumType); this.position[chunkId] = 0; this.taskIndexFileName[chunkId] = new Path(FileOutputFormat.getOutputPath(job), getStoreName() + "." + Integer.toString(chunkId) + "_" + this.taskId + ".index"); this.taskValueFileName[chunkId] = new Path(FileOutputFormat.getOutputPath(job), getStoreName() + "." + Integer.toString(chunkId) + "_" + this.taskId + ".data"); if (this.fs == null) this.fs = this.taskIndexFileName[chunkId].getFileSystem(job); this.indexFileStream[chunkId] = fs.create(this.taskIndexFileName[chunkId]); fs.setPermission(this.taskIndexFileName[chunkId], new FsPermission(HadoopStoreBuilder.HADOOP_FILE_PERMISSION)); logger.info("Setting permission to 755 for " + this.taskIndexFileName[chunkId]); this.valueFileStream[chunkId] = fs.create(this.taskValueFileName[chunkId]); fs.setPermission(this.taskValueFileName[chunkId], new FsPermission(HadoopStoreBuilder.HADOOP_FILE_PERMISSION)); logger.info("Setting permission to 755 for " + this.taskValueFileName[chunkId]); logger.info("Opening " + this.taskIndexFileName[chunkId] + " and " + this.taskValueFileName[chunkId] + " for writing."); } } catch (IOException e) { // throw new RuntimeException("Failed to open Input/OutputStream", // e); e.printStackTrace(); } }
From source file:voldemort.store.readonly.mr.AbstractStoreBuilderConfigurable.java
License:Apache License
public void configure(JobConf conf) { this.cluster = new ClusterMapper().readCluster(new StringReader(conf.get("cluster.xml"))); List<StoreDefinition> storeDefs = new StoreDefinitionsMapper() .readStoreList(new StringReader(conf.get("stores.xml"))); if (storeDefs.size() != 1) throw new IllegalStateException("Expected to find only a single store, but found multiple!"); this.storeDef = storeDefs.get(0); this.numChunks = conf.getInt("num.chunks", -1); if (this.numChunks < 1) throw new VoldemortException("num.chunks not specified in the job conf."); this.saveKeys = conf.getBoolean("save.keys", false); this.reducerPerBucket = conf.getBoolean("reducer.per.bucket", false); }
From source file:voldemort.store.readonly.mr.AvroStoreBuilderMapper.java
License:Apache License
@Override public void configure(JobConf conf) { super.setConf(conf); // from parent code md5er = ByteUtils.getDigest("md5"); this.cluster = new ClusterMapper().readCluster(new StringReader(conf.get("cluster.xml"))); List<StoreDefinition> storeDefs = new StoreDefinitionsMapper() .readStoreList(new StringReader(conf.get("stores.xml"))); if (storeDefs.size() != 1) throw new IllegalStateException("Expected to find only a single store, but found multiple!"); this.storeDef = storeDefs.get(0); this.numChunks = conf.getInt("num.chunks", -1); if (this.numChunks < 1) throw new VoldemortException("num.chunks not specified in the job conf."); this.saveKeys = conf.getBoolean("save.keys", true); this.reducerPerBucket = conf.getBoolean("reducer.per.bucket", false); keySerializerDefinition = getStoreDef().getKeySerializer(); valueSerializerDefinition = getStoreDef().getValueSerializer(); try {//from w w w. j av a 2 s . c o m SerializerFactory factory = new DefaultSerializerFactory(); if (conf.get("serializer.factory") != null) { factory = (SerializerFactory) Class.forName(conf.get("serializer.factory")).newInstance(); } keySerializer = factory.getSerializer(keySerializerDefinition); valueSerializer = factory.getSerializer(valueSerializerDefinition); keyField = conf.get("avro.key.field"); valField = conf.get("avro.value.field"); keySchema = conf.get("avro.key.schema"); valSchema = conf.get("avro.val.schema"); if (keySerializerDefinition.getName().equals("avro-generic")) { keySerializer = new AvroGenericSerializer(keySchema); valueSerializer = new AvroGenericSerializer(valSchema); } else { if (keySerializerDefinition.hasVersion()) { Map<Integer, String> versions = new HashMap<Integer, String>(); for (Map.Entry<Integer, String> entry : keySerializerDefinition.getAllSchemaInfoVersions() .entrySet()) versions.put(entry.getKey(), entry.getValue()); keySerializer = new AvroVersionedGenericSerializer(versions); } else keySerializer = new AvroVersionedGenericSerializer( keySerializerDefinition.getCurrentSchemaInfo()); if (valueSerializerDefinition.hasVersion()) { Map<Integer, String> versions = new HashMap<Integer, String>(); for (Map.Entry<Integer, String> entry : valueSerializerDefinition.getAllSchemaInfoVersions() .entrySet()) versions.put(entry.getKey(), entry.getValue()); valueSerializer = new AvroVersionedGenericSerializer(versions); } else valueSerializer = new AvroVersionedGenericSerializer( valueSerializerDefinition.getCurrentSchemaInfo()); } } catch (Exception e) { throw new RuntimeException(e); } keyCompressor = new CompressionStrategyFactory().get(keySerializerDefinition.getCompression()); valueCompressor = new CompressionStrategyFactory().get(valueSerializerDefinition.getCompression()); routingStrategy = new ConsistentRoutingStrategy(getCluster().getNodes(), getStoreDef().getReplicationFactor()); Props props = HadoopUtils.getPropsFromJob(conf); }
From source file:voldemort.store.readonly.mr.AvroStoreBuilderPartitioner.java
License:Apache License
@Override public void configure(JobConf conf) { this.cluster = new ClusterMapper().readCluster(new StringReader(conf.get("cluster.xml"))); List<StoreDefinition> storeDefs = new StoreDefinitionsMapper() .readStoreList(new StringReader(conf.get("stores.xml"))); if (storeDefs.size() != 1) throw new IllegalStateException("Expected to find only a single store, but found multiple!"); this.storeDef = storeDefs.get(0); this.numChunks = conf.getInt("num.chunks", -1); if (this.numChunks < 1) throw new VoldemortException("num.chunks not specified in the job conf."); this.saveKeys = conf.getBoolean("save.keys", false); this.reducerPerBucket = conf.getBoolean("reducer.per.bucket", false); }
From source file:voldemort.store.readonly.mr.serialization.JsonMapper.java
License:Apache License
public void configure(JobConf conf) { setInputKeySerializer(getSchemaFromJob(conf, "mapper.input.key.schema")); setInputValueSerializer(getSchemaFromJob(conf, "mapper.input.value.schema")); setOutputKeySerializer(getSchemaFromJob(conf, "mapper.output.key.schema")); setOutputValueSerializer(getSchemaFromJob(conf, "mapper.output.value.schema")); // set comparator for input Key Schema if (conf.getBoolean("use.json.comparator", false)) { conf.setOutputKeyComparatorClass(JsonDeserializerComparator.class); conf.set("json.schema", conf.get("mapper.output.key.schema")); }//from ww w .j ava2 s . c o m setConfigured(true); }