List of usage examples for org.apache.hadoop.mapred JobConf getInt
public int getInt(String name, int defaultValue)
name
property as an int
. From source file:org.hypertable.hadoop.mapred.RowOutputFormat.java
License:Open Source License
/** * Create a record writer/*from w w w . jav a 2s .c o m*/ */ public RecordWriter<NullWritable, Row> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { String namespace = job.get(RowOutputFormat.NAMESPACE); String table = job.get(RowOutputFormat.TABLE); int flags = job.getInt(RowOutputFormat.MUTATOR_FLAGS, 0); int flush_interval = job.getInt(RowOutputFormat.MUTATOR_FLUSH_INTERVAL, 0); int buffer_size = job.getInt(RowOutputFormat.BUFFER_SIZE, msDefaultSerializedCellBufferSize); try { return new HypertableRecordWriter(namespace, table, flags, flush_interval, buffer_size, 0); } catch (Exception e) { log.error(e); throw new IOException("Unable to access RecordWriter - " + e.toString()); } }
From source file:org.hypertable.hadoop.mapred.TextTableInputFormat.java
License:Open Source License
/** * Gets the ThriftBroker framesize.//from w ww .j a v a 2 s.c o m * Obtains the framesize by first reading the * "hypertable.mapreduce.thriftbroker.framesize" property and if that is zero, * reads the deprecated "hypertable.mapreduce.thriftclient.framesize" * @param job Job configuration * @return ThriftBroker frame size */ private int getThriftFramesize(JobConf job) { int framesize = job.getInt(THRIFT_FRAMESIZE, 0); if (framesize == 0) framesize = job.getInt(THRIFT_FRAMESIZE2, 0); return framesize; }
From source file:org.hypertable.hadoop.mapred.TextTableInputFormat.java
License:Open Source License
public RecordReader<Text, Text> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { try {//from w ww .j a v a 2 s .com TableSplit ts = (TableSplit) split; if (m_namespace == null) { m_namespace = job.get(INPUT_NAMESPACE); if (m_namespace == null) m_namespace = job.get(NAMESPACE); } if (m_tablename == null) m_tablename = job.get(TABLE); ScanSpec scan_spec = ts.createScanSpec(m_base_spec); System.err.println(scan_spec); if (m_client == null) { String host = getThriftHost(job); int port = job.getInt(THRIFT_PORT, 15867); int framesize = getThriftFramesize(job); if (framesize != 0) m_client = ThriftClient.create(host, port, 1600000, true, framesize); else m_client = ThriftClient.create(host, port); } return new HypertableRecordReader(m_client, m_namespace, m_tablename, scan_spec, m_include_timestamps, m_no_escape); } catch (TTransportException e) { e.printStackTrace(); throw new IOException(e.getMessage()); } catch (TException e) { e.printStackTrace(); throw new IOException(e.getMessage()); } catch (ParseException e) { e.printStackTrace(); throw new IOException(e.getMessage()); } }
From source file:org.hypertable.hadoop.mapred.TextTableInputFormat.java
License:Open Source License
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { long ns = 0;/* w w w . j a va 2s . c o m*/ try { RowInterval ri = null; if (m_client == null) { String host = getThriftHost(job); int port = job.getInt(THRIFT_PORT, 15867); int framesize = getThriftFramesize(job); if (framesize != 0) m_client = ThriftClient.create(host, port, 1600000, true, framesize); else m_client = ThriftClient.create(host, port); } String tablename = job.get(TABLE); String namespace = job.get(INPUT_NAMESPACE); if (namespace == null) namespace = job.get(NAMESPACE); java.util.Iterator<RowInterval> iter = m_base_spec.getRow_intervalsIterator(); if (iter != null && iter.hasNext()) { ri = iter.next(); if (iter.hasNext()) { System.err.println("InputFormat only allows a single ROW interval"); System.exit(-1); } } ns = m_client.namespace_open(namespace); List<org.hypertable.thriftgen.TableSplit> tsplits = m_client.table_get_splits(ns, tablename); List<InputSplit> splits = new ArrayList<InputSplit>(tsplits.size()); ByteBuffer riStartRow; ByteBuffer riEndRow; Charset charset = Charset.forName("UTF-8"); CharsetEncoder encoder = charset.newEncoder(); for (final org.hypertable.thriftgen.TableSplit ts : tsplits) { riStartRow = (ri != null && ri.isSetStart_row()) ? encoder.encode(CharBuffer.wrap(ri.getStart_row())) : null; riEndRow = (ri != null && ri.isSetEnd_row()) ? encoder.encode(CharBuffer.wrap(ri.getEnd_row())) : null; if (ri == null || ((riStartRow == null || ts.end_row == null || ts.end_row.compareTo(riStartRow) > 0 || (ts.end_row.compareTo(riStartRow) == 0 && ri.isStart_inclusive())) && (riEndRow == null || ts.start_row == null || ts.start_row.compareTo(riEndRow) <= 0))) { TableSplit split = new TableSplit(tablename.getBytes("UTF-8"), ts.start_row, ts.end_row, ts.hostname); splits.add(split); } } InputSplit[] isplits = new InputSplit[splits.size()]; return splits.toArray(isplits); } catch (TTransportException e) { e.printStackTrace(); throw new IOException(e.getMessage()); } catch (TException e) { e.printStackTrace(); throw new IOException(e.getMessage()); } catch (ParseException e) { e.printStackTrace(); throw new IOException(e.getMessage()); } catch (UnsupportedEncodingException e) { e.printStackTrace(); throw new IOException(e.getMessage()); } finally { if (ns != 0) { try { m_client.namespace_close(ns); } catch (Exception e) { e.printStackTrace(); throw new IOException(e.getMessage()); } } } }
From source file:org.hypertable.hadoop.mapred.TextTableOutputFormat.java
License:Open Source License
/** * Create a record writer/*from w w w . ja v a 2 s .c o m*/ */ public RecordWriter<Text, Text> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { String namespace = job.get(TextTableOutputFormat.OUTPUT_NAMESPACE); if (namespace == null) namespace = job.get(TextTableOutputFormat.NAMESPACE); String table = job.get(TextTableOutputFormat.TABLE); int flags = job.getInt(TextTableOutputFormat.MUTATOR_FLAGS, 0); int flush_interval = job.getInt(TextTableOutputFormat.MUTATOR_FLUSH_INTERVAL, 0); int framesize = job.getInt(TextTableOutputFormat.THRIFT_FRAMESIZE, 0); if (framesize == 0) framesize = job.getInt(TextTableOutputFormat.THRIFT_FRAMESIZE2, 0); try { String host = getThriftHost(job); int port = job.getInt(THRIFT_PORT, 15867); ThriftClient client; if (framesize != 0) client = ThriftClient.create(host, port, 1600000, true, framesize); else client = ThriftClient.create(host, port); return new HypertableRecordWriter(client, namespace, table, flags, flush_interval); } catch (Exception e) { log.error(e); throw new IOException("Unable to access RecordWriter - " + e.toString()); } }
From source file:org.mitre.bio.mapred.io.FastaRecordReader.java
License:Open Source License
public FastaRecordReader(FileSplit split, JobConf job) throws IOException { this.pushBackString = null; this.pushBackSize = 0; this.maxLineLength = job.getInt("io.file.buffer.size", // mapred.linereader.maxlength Integer.MAX_VALUE);// w w w . j a va 2 s . co m this.start = split.getStart(); this.end = this.start + split.getLength(); final Path file = split.getPath(); this.compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = this.compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); boolean skipFirstLine = false; if (codec != null) { this.in = new LineReader(codec.createInputStream(fileIn), job); this.end = Long.MAX_VALUE; } else { /** * From LineRecordReader, what is this doing? */ if (this.start != 0) { LOG.info("Skipping first line in split"); skipFirstLine = true; --this.start; fileIn.seek(this.start); } this.in = new LineReader(fileIn, job); } if (skipFirstLine) { /** * Skipping first line to re-established "start". */ this.start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start)); } this.pos = start; }
From source file:org.terrier.utility.io.HadoopUtility.java
License:Mozilla Public License
protected static Path makeTemporaryFile(JobConf jobConf, String filename) throws IOException { final int randomKey = jobConf.getInt("terrier.tempfile.id", random.nextInt()); jobConf.setInt("terrier.tempfile.id", randomKey); FileSystem defFS = FileSystem.get(jobConf); final Path tempFile = new Path(HADOOP_TMP_PATH + "/" + (randomKey) + "-" + filename); defFS.deleteOnExit(tempFile);//from w w w . ja v a2 s.c o m return tempFile; }
From source file:org.vilcek.hive.kv.KVHiveInputFormat.java
License:Apache License
@Override public InputSplit[] getSplits(JobConf conf, int numSplits) throws IOException { String kvHostPort = conf.get(ConfigProperties.KV_HOST_PORT); Pattern pattern = Pattern.compile(","); kvHelperHosts = pattern.split(kvHostPort); kvStoreName = conf.get(ConfigProperties.KV_NAME); Topology topology = null;// w ww . ja v a2 s. c om try { topology = TopologyLocator.get(kvHelperHosts, 0); } catch (KVStoreException KVSE) { KVSE.printStackTrace(); return null; } RegistryUtils regUtils = new RegistryUtils(topology); PartitionMap partitionMap = topology.getPartitionMap(); int nParts = partitionMap.getNPartitions(); List<InputSplit> ret = new ArrayList<InputSplit>(nParts); Map<Object, RepNodeStatus> statuses = new HashMap<Object, RepNodeStatus>(); Path[] tablePaths = FileInputFormat.getInputPaths(conf); for (int i = 1; i <= nParts; i++) { PartitionId partId = new PartitionId(i); RepGroupId repGroupId = topology.getRepGroupId(partId); RepGroup repGroup = topology.get(repGroupId); Collection<RepNode> repNodes = repGroup.getRepNodes(); List<String> repNodeNames = new ArrayList<String>(); List<String> repNodeNamesAndPorts = new ArrayList<String>(); for (RepNode rn : repNodes) { RepNodeStatus rnStatus = null; try { if (statuses.containsKey(rn.getResourceId())) { rnStatus = statuses.get(rn.getResourceId()); } else { RepNodeAdminAPI rna = regUtils.getRepNodeAdmin(rn.getResourceId()); rnStatus = rna.ping(); statuses.put(rn.getResourceId(), rnStatus); } } catch (RemoteException re) { System.err.println("Ping failed for " + rn.getResourceId() + ": " + re.getMessage()); re.printStackTrace(); statuses.put(rn.getResourceId(), null); } catch (NotBoundException e) { System.err.println( "No RMI service for RN: " + rn.getResourceId() + " message: " + e.getMessage()); } if (rnStatus == null) { continue; } /* * com.sleepycat.je.rep.ReplicatedEnvironment.State state = rnStatus.getReplicationState(); if (!state.isActive() || * (consistency == Consistency.ABSOLUTE && !state.isMaster())) { continue; } */ StorageNodeId snid = rn.getStorageNodeId(); StorageNode sn = topology.get(snid); repNodeNames.add(sn.getHostname()); repNodeNamesAndPorts.add(sn.getHostname() + ":" + sn.getRegistryPort()); } Key parentKey = null; String parentKeyValue = conf.get("oracle.kv.parentKey"); if (parentKeyValue != null && parentKeyValue.length() > 0) { parentKey = Key.fromString(parentKeyValue); } KeyRange subRange = null; String subRangeValue = conf.get("oracle.kv.subRange"); if (subRangeValue != null && subRangeValue.length() > 0) { subRange = KeyRange.fromString(subRangeValue); } int batchSize = conf.getInt("oracle.kv.batchSize", 0); ret.add(new KVHiveInputSplit(tablePaths[0]) .setKVHelperHosts(repNodeNamesAndPorts.toArray(new String[0])).setKVStoreName(kvStoreName) .setKVPart(i).setLocations(repNodeNames.toArray(new String[0])).setDirection(direction) .setBatchSize(batchSize).setParentKey(parentKey).setSubRange(subRange).setDepth(depth) .setConsistency(consistency).setTimeout(timeout).setTimeoutUnit(timeoutUnit)); } return ret.toArray(new InputSplit[ret.size()]); }
From source file:org.warcbase.index.IndexerMapper.java
License:Apache License
@Override public void configure(JobConf job) { try {/*from ww w . ja va 2 s .c om*/ LOG.info("Configuring WARCIndexer."); Config config = ConfigFactory.parseString(job.get(IndexerRunner.CONFIG_PROPERTIES)); this.indexer = new WARCIndexer(config); numShards = job.getInt(NUM_SHARDS, 10); LOG.info("Number of shards: " + numShards); mapTaskId = job.get("mapred.task.id"); inputFile = job.get("map.input.file"); LOG.info("Got task.id " + mapTaskId + " and input.file " + inputFile); } catch (NoSuchAlgorithmException e) { LOG.error("IndexerMapper.configure(): " + e.getMessage()); } }
From source file:pathmerge.linear.MergePathH1Mapper.java
License:Apache License
public void configure(JobConf job) { KMER_SIZE = job.getInt("sizeKmer", 0); outputKmerFactory = new VKmerBytesWritableFactory(KMER_SIZE); outputValue = new MergePathValueWritable(); tmpKmer = new VKmerBytesWritable(KMER_SIZE); outputKmer = new VKmerBytesWritable(KMER_SIZE); }