Example usage for org.apache.hadoop.mapred JobConf getInt

List of usage examples for org.apache.hadoop.mapred JobConf getInt

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getInt.

Prototype

public int getInt(String name, int defaultValue) 

Source Link

Document

Get the value of the name property as an int.

Usage

From source file:org.hypertable.hadoop.mapred.RowOutputFormat.java

License:Open Source License

/**
 * Create a record writer/*from w w w  . jav  a 2s  .c o  m*/
 */
public RecordWriter<NullWritable, Row> getRecordWriter(FileSystem ignored, JobConf job, String name,
        Progressable progress) throws IOException {

    String namespace = job.get(RowOutputFormat.NAMESPACE);
    String table = job.get(RowOutputFormat.TABLE);
    int flags = job.getInt(RowOutputFormat.MUTATOR_FLAGS, 0);
    int flush_interval = job.getInt(RowOutputFormat.MUTATOR_FLUSH_INTERVAL, 0);
    int buffer_size = job.getInt(RowOutputFormat.BUFFER_SIZE, msDefaultSerializedCellBufferSize);

    try {
        return new HypertableRecordWriter(namespace, table, flags, flush_interval, buffer_size, 0);
    } catch (Exception e) {
        log.error(e);
        throw new IOException("Unable to access RecordWriter - " + e.toString());
    }
}

From source file:org.hypertable.hadoop.mapred.TextTableInputFormat.java

License:Open Source License

/**
 * Gets the ThriftBroker framesize.//from w ww .j a  v  a  2  s.c  o m
 * Obtains the framesize by first reading the 
 * "hypertable.mapreduce.thriftbroker.framesize" property and if that is zero,
 * reads the deprecated "hypertable.mapreduce.thriftclient.framesize"
 * @param job Job configuration
 * @return ThriftBroker frame size
 */
private int getThriftFramesize(JobConf job) {
    int framesize = job.getInt(THRIFT_FRAMESIZE, 0);
    if (framesize == 0)
        framesize = job.getInt(THRIFT_FRAMESIZE2, 0);
    return framesize;
}

From source file:org.hypertable.hadoop.mapred.TextTableInputFormat.java

License:Open Source License

public RecordReader<Text, Text> getRecordReader(InputSplit split, JobConf job, Reporter reporter)
        throws IOException {
    try {//from  w ww  .j a v  a  2 s  .com
        TableSplit ts = (TableSplit) split;
        if (m_namespace == null) {
            m_namespace = job.get(INPUT_NAMESPACE);
            if (m_namespace == null)
                m_namespace = job.get(NAMESPACE);
        }
        if (m_tablename == null)
            m_tablename = job.get(TABLE);
        ScanSpec scan_spec = ts.createScanSpec(m_base_spec);
        System.err.println(scan_spec);

        if (m_client == null) {
            String host = getThriftHost(job);
            int port = job.getInt(THRIFT_PORT, 15867);
            int framesize = getThriftFramesize(job);
            if (framesize != 0)
                m_client = ThriftClient.create(host, port, 1600000, true, framesize);
            else
                m_client = ThriftClient.create(host, port);
        }
        return new HypertableRecordReader(m_client, m_namespace, m_tablename, scan_spec, m_include_timestamps,
                m_no_escape);
    } catch (TTransportException e) {
        e.printStackTrace();
        throw new IOException(e.getMessage());
    } catch (TException e) {
        e.printStackTrace();
        throw new IOException(e.getMessage());
    } catch (ParseException e) {
        e.printStackTrace();
        throw new IOException(e.getMessage());
    }
}

From source file:org.hypertable.hadoop.mapred.TextTableInputFormat.java

License:Open Source License

public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    long ns = 0;/* w w  w . j a  va 2s . c  o m*/

    try {
        RowInterval ri = null;

        if (m_client == null) {
            String host = getThriftHost(job);
            int port = job.getInt(THRIFT_PORT, 15867);
            int framesize = getThriftFramesize(job);
            if (framesize != 0)
                m_client = ThriftClient.create(host, port, 1600000, true, framesize);
            else
                m_client = ThriftClient.create(host, port);
        }

        String tablename = job.get(TABLE);
        String namespace = job.get(INPUT_NAMESPACE);
        if (namespace == null)
            namespace = job.get(NAMESPACE);

        java.util.Iterator<RowInterval> iter = m_base_spec.getRow_intervalsIterator();
        if (iter != null && iter.hasNext()) {
            ri = iter.next();
            if (iter.hasNext()) {
                System.err.println("InputFormat only allows a single ROW interval");
                System.exit(-1);
            }
        }

        ns = m_client.namespace_open(namespace);
        List<org.hypertable.thriftgen.TableSplit> tsplits = m_client.table_get_splits(ns, tablename);
        List<InputSplit> splits = new ArrayList<InputSplit>(tsplits.size());

        ByteBuffer riStartRow;
        ByteBuffer riEndRow;
        Charset charset = Charset.forName("UTF-8");
        CharsetEncoder encoder = charset.newEncoder();
        for (final org.hypertable.thriftgen.TableSplit ts : tsplits) {
            riStartRow = (ri != null && ri.isSetStart_row())
                    ? encoder.encode(CharBuffer.wrap(ri.getStart_row()))
                    : null;
            riEndRow = (ri != null && ri.isSetEnd_row()) ? encoder.encode(CharBuffer.wrap(ri.getEnd_row()))
                    : null;
            if (ri == null || ((riStartRow == null || ts.end_row == null || ts.end_row.compareTo(riStartRow) > 0
                    || (ts.end_row.compareTo(riStartRow) == 0 && ri.isStart_inclusive()))
                    && (riEndRow == null || ts.start_row == null || ts.start_row.compareTo(riEndRow) <= 0))) {
                TableSplit split = new TableSplit(tablename.getBytes("UTF-8"), ts.start_row, ts.end_row,
                        ts.hostname);
                splits.add(split);
            }
        }

        InputSplit[] isplits = new InputSplit[splits.size()];
        return splits.toArray(isplits);
    } catch (TTransportException e) {
        e.printStackTrace();
        throw new IOException(e.getMessage());
    } catch (TException e) {
        e.printStackTrace();
        throw new IOException(e.getMessage());
    } catch (ParseException e) {
        e.printStackTrace();
        throw new IOException(e.getMessage());
    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
        throw new IOException(e.getMessage());
    } finally {
        if (ns != 0) {
            try {
                m_client.namespace_close(ns);
            } catch (Exception e) {
                e.printStackTrace();
                throw new IOException(e.getMessage());
            }
        }
    }
}

From source file:org.hypertable.hadoop.mapred.TextTableOutputFormat.java

License:Open Source License

/**
 * Create a record writer/*from w  w  w  .  ja  v a  2 s .c  o  m*/
 */
public RecordWriter<Text, Text> getRecordWriter(FileSystem ignored, JobConf job, String name,
        Progressable progress) throws IOException {
    String namespace = job.get(TextTableOutputFormat.OUTPUT_NAMESPACE);
    if (namespace == null)
        namespace = job.get(TextTableOutputFormat.NAMESPACE);
    String table = job.get(TextTableOutputFormat.TABLE);
    int flags = job.getInt(TextTableOutputFormat.MUTATOR_FLAGS, 0);
    int flush_interval = job.getInt(TextTableOutputFormat.MUTATOR_FLUSH_INTERVAL, 0);
    int framesize = job.getInt(TextTableOutputFormat.THRIFT_FRAMESIZE, 0);
    if (framesize == 0)
        framesize = job.getInt(TextTableOutputFormat.THRIFT_FRAMESIZE2, 0);

    try {
        String host = getThriftHost(job);
        int port = job.getInt(THRIFT_PORT, 15867);
        ThriftClient client;
        if (framesize != 0)
            client = ThriftClient.create(host, port, 1600000, true, framesize);
        else
            client = ThriftClient.create(host, port);
        return new HypertableRecordWriter(client, namespace, table, flags, flush_interval);
    } catch (Exception e) {
        log.error(e);
        throw new IOException("Unable to access RecordWriter - " + e.toString());
    }
}

From source file:org.mitre.bio.mapred.io.FastaRecordReader.java

License:Open Source License

public FastaRecordReader(FileSplit split, JobConf job) throws IOException {
    this.pushBackString = null;
    this.pushBackSize = 0;

    this.maxLineLength = job.getInt("io.file.buffer.size", // mapred.linereader.maxlength
            Integer.MAX_VALUE);// w  w  w  . j  a  va  2 s .  co m

    this.start = split.getStart();
    this.end = this.start + split.getLength();
    final Path file = split.getPath();

    this.compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = this.compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        this.in = new LineReader(codec.createInputStream(fileIn), job);
        this.end = Long.MAX_VALUE;
    } else {
        /**
         * From LineRecordReader, what is this doing?
         */
        if (this.start != 0) {
            LOG.info("Skipping first line in split");
            skipFirstLine = true;
            --this.start;
            fileIn.seek(this.start);
        }
        this.in = new LineReader(fileIn, job);
    }
    if (skipFirstLine) {
        /**
         * Skipping first line to re-established "start".
         */
        this.start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:org.terrier.utility.io.HadoopUtility.java

License:Mozilla Public License

protected static Path makeTemporaryFile(JobConf jobConf, String filename) throws IOException {
    final int randomKey = jobConf.getInt("terrier.tempfile.id", random.nextInt());
    jobConf.setInt("terrier.tempfile.id", randomKey);
    FileSystem defFS = FileSystem.get(jobConf);
    final Path tempFile = new Path(HADOOP_TMP_PATH + "/" + (randomKey) + "-" + filename);
    defFS.deleteOnExit(tempFile);//from   w w w  . ja  v  a2 s.c  o m
    return tempFile;
}

From source file:org.vilcek.hive.kv.KVHiveInputFormat.java

License:Apache License

@Override
public InputSplit[] getSplits(JobConf conf, int numSplits) throws IOException {
    String kvHostPort = conf.get(ConfigProperties.KV_HOST_PORT);
    Pattern pattern = Pattern.compile(",");
    kvHelperHosts = pattern.split(kvHostPort);
    kvStoreName = conf.get(ConfigProperties.KV_NAME);

    Topology topology = null;// w  ww  .  ja v a2  s.  c om
    try {
        topology = TopologyLocator.get(kvHelperHosts, 0);
    } catch (KVStoreException KVSE) {
        KVSE.printStackTrace();
        return null;
    }
    RegistryUtils regUtils = new RegistryUtils(topology);
    PartitionMap partitionMap = topology.getPartitionMap();
    int nParts = partitionMap.getNPartitions();
    List<InputSplit> ret = new ArrayList<InputSplit>(nParts);

    Map<Object, RepNodeStatus> statuses = new HashMap<Object, RepNodeStatus>();
    Path[] tablePaths = FileInputFormat.getInputPaths(conf);
    for (int i = 1; i <= nParts; i++) {
        PartitionId partId = new PartitionId(i);
        RepGroupId repGroupId = topology.getRepGroupId(partId);
        RepGroup repGroup = topology.get(repGroupId);
        Collection<RepNode> repNodes = repGroup.getRepNodes();
        List<String> repNodeNames = new ArrayList<String>();
        List<String> repNodeNamesAndPorts = new ArrayList<String>();
        for (RepNode rn : repNodes) {
            RepNodeStatus rnStatus = null;
            try {
                if (statuses.containsKey(rn.getResourceId())) {
                    rnStatus = statuses.get(rn.getResourceId());
                } else {
                    RepNodeAdminAPI rna = regUtils.getRepNodeAdmin(rn.getResourceId());
                    rnStatus = rna.ping();
                    statuses.put(rn.getResourceId(), rnStatus);
                }
            } catch (RemoteException re) {
                System.err.println("Ping failed for " + rn.getResourceId() + ": " + re.getMessage());
                re.printStackTrace();
                statuses.put(rn.getResourceId(), null);
            } catch (NotBoundException e) {
                System.err.println(
                        "No RMI service for RN: " + rn.getResourceId() + " message: " + e.getMessage());
            }

            if (rnStatus == null) {
                continue;
            }

            /*
             * com.sleepycat.je.rep.ReplicatedEnvironment.State state = rnStatus.getReplicationState(); if (!state.isActive() ||
             * (consistency == Consistency.ABSOLUTE && !state.isMaster())) { continue; }
             */

            StorageNodeId snid = rn.getStorageNodeId();
            StorageNode sn = topology.get(snid);

            repNodeNames.add(sn.getHostname());
            repNodeNamesAndPorts.add(sn.getHostname() + ":" + sn.getRegistryPort());
        }

        Key parentKey = null;
        String parentKeyValue = conf.get("oracle.kv.parentKey");
        if (parentKeyValue != null && parentKeyValue.length() > 0) {
            parentKey = Key.fromString(parentKeyValue);
        }
        KeyRange subRange = null;
        String subRangeValue = conf.get("oracle.kv.subRange");
        if (subRangeValue != null && subRangeValue.length() > 0) {
            subRange = KeyRange.fromString(subRangeValue);
        }

        int batchSize = conf.getInt("oracle.kv.batchSize", 0);

        ret.add(new KVHiveInputSplit(tablePaths[0])
                .setKVHelperHosts(repNodeNamesAndPorts.toArray(new String[0])).setKVStoreName(kvStoreName)
                .setKVPart(i).setLocations(repNodeNames.toArray(new String[0])).setDirection(direction)
                .setBatchSize(batchSize).setParentKey(parentKey).setSubRange(subRange).setDepth(depth)
                .setConsistency(consistency).setTimeout(timeout).setTimeoutUnit(timeoutUnit));

    }

    return ret.toArray(new InputSplit[ret.size()]);
}

From source file:org.warcbase.index.IndexerMapper.java

License:Apache License

@Override
public void configure(JobConf job) {
    try {/*from ww  w .  ja va 2 s .c  om*/
        LOG.info("Configuring WARCIndexer.");
        Config config = ConfigFactory.parseString(job.get(IndexerRunner.CONFIG_PROPERTIES));
        this.indexer = new WARCIndexer(config);

        numShards = job.getInt(NUM_SHARDS, 10);
        LOG.info("Number of shards: " + numShards);

        mapTaskId = job.get("mapred.task.id");
        inputFile = job.get("map.input.file");
        LOG.info("Got task.id " + mapTaskId + " and input.file " + inputFile);
    } catch (NoSuchAlgorithmException e) {
        LOG.error("IndexerMapper.configure(): " + e.getMessage());
    }
}

From source file:pathmerge.linear.MergePathH1Mapper.java

License:Apache License

public void configure(JobConf job) {
    KMER_SIZE = job.getInt("sizeKmer", 0);
    outputKmerFactory = new VKmerBytesWritableFactory(KMER_SIZE);
    outputValue = new MergePathValueWritable();
    tmpKmer = new VKmerBytesWritable(KMER_SIZE);
    outputKmer = new VKmerBytesWritable(KMER_SIZE);
}