Example usage for org.apache.hadoop.io Text toString

List of usage examples for org.apache.hadoop.io Text toString

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text toString.

Prototype

@Override
public String toString() 

Source Link

Document

Convert text back to string

Usage

From source file:com.ifeng.vdn.loggroup.tool.VideologFilter.java

License:Apache License

public static final VideologPair filerByErrCode(Text value) {
    String result = "";
    VideologPair pair = null;/*  w w w.  ja  va2 s  .c  o  m*/

    if (value == null) {
        pair = new VideologPair("");
        pair.setValue("");

        return pair;
    }

    String errCode = "";
    String[] items = value.toString().split("\t");

    if (items != null && items.length == 14) {
        // extract the err code: column index 8:
        errCode = items[7];

        // check the err code is valid, if not then ignore.
        if (VALID_ERR_CODE.contains(errCode) || errCode.startsWith("") || errCode.startsWith("")) {
            pair = new VideologPair(items[0] + "|" + items[4]);

            result = value.toString();

            pair.setValue(result);
        }
    } else {
        pair = new VideologPair("");
        pair.setValue("");
    }

    return pair;
}

From source file:com.ifeng.vdn.logparser.mapper.VideoLogMapper.java

License:Apache License

@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
        throws IOException, InterruptedException {
    if (value != null) {
        String[] items = value.toString().split("\t");
        if (items.length == 24) {
            log.info("Key[{}]    Value>>>>{}", key, value.toString());

            if (items[20].endsWith("zhvp1.0.16") || items[20].endsWith("nsvp1.0.18")) {
                context.write(new Text("items[20]"), value);
            }/*from  w  w w . j  a v  a2s  .  c  o m*/
        }
    }
}

From source file:com.ifeng.vdn.logparser.mapper.VideoLogReducer.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)
        throws IOException, InterruptedException {

    for (Text value : values) {
        log.info("Key = {}  Value = {}", key.toString(), value.toString());
        context.write(key, value);/*ww  w  . j  av a 2s .c o m*/
    }
}

From source file:com.ifeng.vdn.parser.VideoLogParseMapper.java

License:Apache License

/**
 * <code>/*from   www  . ja  va2s .c o m*/
   -----------------------------------------------------------------------------------------------------
   -- ??
   -----------------------------------------------------------------------------------------------------
   16   err   EventRetCode ? EventCode1?+ActionCode2?+Data3? ?   err=100000
   3   ip   IP?   
   4   ref   ?url   ref=http://v.ifeng.com/v/news/djmdnz/index.shtml#01c92b9c-37c7-4510-ac87-519a1224c263
   5   sid   ???cookie[sid]   sid=3232F65C8864C995D82D087D8A15FF05kzzxc1
   6   uid   ID   uid=1395896719356_cqf3nr8244
   9   loc      
   12   tm   ?   tm=1424048309234
   13   url   ?   url=http://ips.ifeng.com/video19.ifeng.com/video09/2015/02/15/2999516-102-2028.mp4
   15   dur   ?XML   dur=155
   17   bt   ?B   bt=12451187
   18   bl   ?B   bl=12451187
   19   lt      lt=139059
   21   vid      vid=vNsPlayer_nsvp1.0.18
   23   cdnId   CDNSooner-Chinanet-Chinacache-? ??   cdnId=ifengP2P
   24   netname   ?? ??   netname=
   -----------------------------------------------------------------------------------------------------
  </code>
 */
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
        throws IOException, InterruptedException {

    if (value != null) {
        String ds = "";
        FileSplit split = (FileSplit) context.getInputSplit();
        String parentPath = split.getPath().getParent().toString();

        String fileName = split.getPath().getName();
        fileName = fileName.substring(0, 4);

        String[] parents = parentPath.split("/");
        if (parents.length > 0) {
            ds = parents[parents.length - 1];
        }

        VideologPair pair = VideologFilter.filte(value.toString(), ds, fileName);

        if (pair != null && pair.getKey() != null && pair.getValue() != null) {
            context.write(new Text(pair.getKey()), new Text(pair.getValue()));
        }
    }
}

From source file:com.ifeng.vdn.videolog.VideologFilter.java

License:Apache License

public static final VideologPair filerByErrCode(Text value) {
    String result = "";
    VideologPair pair = null;/*from w  w w  .j  a va 2s .  c om*/

    if (value == null) {
        pair = new VideologPair("");
        pair.setValue("");

        return pair;
    }

    String errCode = "";
    String[] items = value.toString().split("\t");

    if (items != null && items.length == 14) {
        // extract the err code: column index 8:
        errCode = items[7];

        // check the err code is valid, if not then ignore.
        if (VALID_ERR_CODE.contains(errCode) || errCode.startsWith("")) {
            pair = new VideologPair(items[0] + "|" + items[4]);

            result = value.toString();

            pair.setErrId(errCode);
            pair.setValue(result);
        }
    } else {
        pair = new VideologPair("");
        pair.setValue("");
    }

    return pair;
}

From source file:com.ikanow.aleph2.analytics.hadoop.assets.ObjectNodeWritableComparable.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    final Text text = new Text();
    text.readFields(in);/* w  w w  .java 2  s  .  com*/

    _object_node = (ObjectNode) _mapper.readTree(text.toString()); //(object node by construction)
}

From source file:com.ikanow.infinit.e.processing.custom.utils.HadoopUtils.java

License:Open Source License

public static BasicDBList getBsonFromSequenceFile(CustomMapReduceJobPojo cmr, int nLimit, String fields)
        throws SAXException, IOException, ParserConfigurationException {

    BasicDBList dbl = new BasicDBList();

    PropertiesManager props = new PropertiesManager();
    Configuration conf = getConfiguration(props);

    Path pathDir = HadoopUtils.getPathForJob(cmr, conf, false);

    @SuppressWarnings({ "unchecked", "rawtypes" })
    SequenceFileDirIterable<? extends Writable, ? extends Writable> seqFileDir = new SequenceFileDirIterable(
            pathDir, PathType.LIST, PathFilters.logsCRCFilter(), conf);

    // Very basic, only allow top level, 1 level of nesting, and field removal
    HashSet<String> fieldLookup = null;
    if (null != fields) {
        fieldLookup = new HashSet<String>();
        String[] fieldArray = fields.split(",");
        for (String field : fieldArray) {
            String[] fieldDecomp = field.split(":");
            fieldLookup.add(fieldDecomp[0]);
        }//from www.j a va2 s .c om
    } //TOTEST

    int nRecords = 0;
    for (Pair<? extends Writable, ? extends Writable> record : seqFileDir) {
        BasicDBObject element = new BasicDBObject();

        // KEY

        Writable key = record.getFirst();
        if (key instanceof org.apache.hadoop.io.Text) {
            org.apache.hadoop.io.Text writable = (org.apache.hadoop.io.Text) key;
            element.put("key", writable.toString());
        } else if (key instanceof org.apache.hadoop.io.DoubleWritable) {
            org.apache.hadoop.io.DoubleWritable writable = (org.apache.hadoop.io.DoubleWritable) key;
            element.put("key", Double.toString(writable.get()));
        } else if (key instanceof org.apache.hadoop.io.IntWritable) {
            org.apache.hadoop.io.IntWritable writable = (org.apache.hadoop.io.IntWritable) key;
            element.put("key", Integer.toString(writable.get()));
        } else if (key instanceof org.apache.hadoop.io.LongWritable) {
            org.apache.hadoop.io.LongWritable writable = (org.apache.hadoop.io.LongWritable) key;
            element.put("key", Long.toString(writable.get()));
        } else if (key instanceof BSONWritable) {
            element.put("key", MongoDbUtil.convert((BSONWritable) key));
        }

        // VALUE

        Writable value = record.getSecond();
        if (value instanceof org.apache.hadoop.io.Text) {
            org.apache.hadoop.io.Text writable = (org.apache.hadoop.io.Text) value;
            element.put("value", writable.toString());
        } else if (value instanceof org.apache.hadoop.io.DoubleWritable) {
            org.apache.hadoop.io.DoubleWritable writable = (org.apache.hadoop.io.DoubleWritable) value;
            element.put("value", Double.toString(writable.get()));
        } else if (value instanceof org.apache.hadoop.io.IntWritable) {
            org.apache.hadoop.io.IntWritable writable = (org.apache.hadoop.io.IntWritable) value;
            element.put("value", Integer.toString(writable.get()));
        } else if (value instanceof org.apache.hadoop.io.LongWritable) {
            org.apache.hadoop.io.LongWritable writable = (org.apache.hadoop.io.LongWritable) value;
            element.put("value", Long.toString(writable.get()));
        } else if (value instanceof BSONWritable) {
            element.put("value", MongoDbUtil.convert((BSONWritable) value));
        } else if (value instanceof org.apache.mahout.math.VectorWritable) {
            Vector vec = ((org.apache.mahout.math.VectorWritable) value).get();
            BasicDBList dbl2 = listFromMahoutVector(vec, "value", element);
            element.put("value", dbl2);
        } else if (value instanceof org.apache.mahout.clustering.classify.WeightedVectorWritable) {
            org.apache.mahout.clustering.classify.WeightedVectorWritable vecW = (org.apache.mahout.clustering.classify.WeightedVectorWritable) value;
            element.put("valueWeight", vecW.getWeight());
            BasicDBList dbl2 = listFromMahoutVector(vecW.getVector(), "value", element);
            element.put("value", dbl2);
        } else if (value instanceof org.apache.mahout.clustering.iterator.ClusterWritable) {
            Cluster cluster = ((org.apache.mahout.clustering.iterator.ClusterWritable) value).getValue();
            BasicDBObject clusterVal = new BasicDBObject();
            clusterVal.put("center", listFromMahoutVector(cluster.getCenter(), "center", clusterVal));
            clusterVal.put("radius", listFromMahoutVector(cluster.getRadius(), "radius", clusterVal));
            element.put("value", clusterVal);
        } else {
            element.put("unknownValue", value.getClass().toString());
        }

        // Check the fields settings:
        // Only handle a few...
        if (null != fieldLookup) {
            for (String fieldToRemove : fieldLookup) {
                if (fieldToRemove.startsWith("value.")) {
                    fieldToRemove = fieldToRemove.substring(6);
                    BasicDBObject nested = (BasicDBObject) element.get("value.");
                    if (null != nested) {
                        nested.remove(fieldToRemove);
                    }
                } else {
                    element.remove(fieldToRemove);
                }
            } //TOTEST
        }

        dbl.add(element);
        nRecords++;
        if ((nLimit > 0) && (nRecords >= nLimit)) {
            break;
        }
    }

    return dbl;
}

From source file:com.impetus.code.examples.hadoop.mapred.earthquake.EarthQuakeMapper.java

License:Apache License

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

    if (key.get() > 0) {

        String[] parsedData = value.toString().split(",");
        String date = DateCoverter.convertDate(parsedData[0]);

        if (date != null) {
            context.write(new Text(date), new IntWritable(1));
        }/*from  ww w.j  av a 2  s.c o m*/

    }
}

From source file:com.impetus.code.examples.hadoop.mapred.weather.MaxTempMapper.java

License:Apache License

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

    String line = value.toString();
    String year = line.substring(15, 19);
    int airTemperature;
    if (line.charAt(87) == '+') { // parseInt doesn't like leading plus signs
        airTemperature = Integer.parseInt(line.substring(88, 92));
    } else {/*  www . j  ava 2s. c o m*/
        airTemperature = Integer.parseInt(line.substring(87, 92));
    }
    String quality = line.substring(92, 93);
    if (airTemperature != MISSING && quality.matches("[01459]")) {
        context.write(new Text(year), new IntWritable(airTemperature));
    }

}

From source file:com.inmobi.conduit.distcp.tools.mapred.CopyMapper.java

License:Apache License

/**
 * Implementation of the Mapper<>::map(). Does the copy.
 * @param relPath: The target path./*from   ww  w  .jav  a 2 s  . c o  m*/
 * @param sourceFileStatus: The source path.
 * @throws IOException
 */
@Override
public void map(Text relPath, FileStatus sourceFileStatus, Context context)
        throws IOException, InterruptedException {
    Path sourcePath = sourceFileStatus.getPath();
    Map<Long, Long> received = null;
    if (context.getConfiguration().getBoolean(ConduitConstants.AUDIT_ENABLED_KEY, true)) {
        received = new HashMap<Long, Long>();
    }
    if (LOG.isDebugEnabled())
        LOG.debug("DistCpMapper::map(): Received " + sourcePath + ", " + relPath);

    Path target = new Path(targetWorkPath.makeQualified(targetFS) + relPath.toString());

    EnumSet<DistCpOptions.FileAttribute> fileAttributes = getFileAttributeSettings(context);

    final String description = "Copying " + sourcePath + " to " + target;
    context.setStatus(description);

    LOG.info(description);

    try {
        FileStatus sourceCurrStatus;
        FileSystem sourceFS;
        try {
            sourceFS = sourcePath.getFileSystem(conf);
            sourceCurrStatus = sourceFS.getFileStatus(sourcePath);
        } catch (FileNotFoundException e) {
            throw new IOException(new RetriableFileCopyCommand.CopyReadException(e));
        }

        FileStatus targetStatus = null;

        try {
            targetStatus = targetFS.getFileStatus(target);
        } catch (FileNotFoundException ignore) {
        }

        if (targetStatus != null && (targetStatus.isDir() != sourceCurrStatus.isDir())) {
            throw new IOException("Can't replace " + target + ". Target is " + getFileType(targetStatus)
                    + ", Source is " + getFileType(sourceCurrStatus));
        }

        if (sourceCurrStatus.isDir()) {
            createTargetDirsWithRetry(description, target, context);
            return;
        }

        if (skipFile(sourceFS, sourceCurrStatus, target)) {
            LOG.info("Skipping copy of " + sourceCurrStatus.getPath() + " to " + target);
            updateSkipCounters(context, sourceCurrStatus);
        } else {
            String streamName = null;
            if (!relPath.toString().isEmpty()) {
                Path relativePath = new Path(relPath.toString());
                if (relativePath.depth() > 2) {
                    // path is for mirror service and is of format
                    // /conduit/streams/<streamName>/2013/09/12
                    Path tmpPath = relativePath;
                    while (tmpPath.getParent() != null && !tmpPath.getParent().getName().equals("streams")) {
                        tmpPath = tmpPath.getParent();
                    }
                    streamName = tmpPath.getName();
                } else {
                    // path is for merge service and of form /<stream name>/filename.gz
                    streamName = relativePath.getParent().getName();
                }
            }
            copyFileWithRetry(description, sourceCurrStatus, target, context, fileAttributes, received);
            // generate audit counters
            if (received != null) {
                for (Entry<Long, Long> entry : received.entrySet()) {
                    String counterNameValue = getCounterNameValue(streamName, sourcePath.getName(),
                            entry.getKey(), entry.getValue());
                    context.write(NullWritable.get(), new Text(counterNameValue));
                }
            }
        }

        DistCpUtils.preserve(target.getFileSystem(conf), target, sourceCurrStatus, fileAttributes);

    } catch (IOException exception) {
        handleFailures(exception, sourceFileStatus, target, context);
    }
}