Example usage for org.apache.hadoop.io Text toString

List of usage examples for org.apache.hadoop.io Text toString

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text toString.

Prototype

@Override
public String toString() 

Source Link

Document

Convert text back to string

Usage

From source file:crimeScoreMapper.java

public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter)
        throws IOException {

    StringTokenizer tokenizer = new StringTokenizer(value.toString(), ",");
    if (tokenizer.countTokens() != 0) {
        String[] data = new String[tokenizer.countTokens()];
        int i = 0;
        while (tokenizer.hasMoreTokens()) {
            data[i] = tokenizer.nextToken();
            i++;/*from w w w.  j a  va  2  s  .  co  m*/
        }
        String type = data[2];
        type = type.trim();
        int weight = 0;
        if (type.equalsIgnoreCase("arson")) {
            weight = 2;
        } else if (type.equalsIgnoreCase("theft")) {
            weight = 3;
        } else if (type.equalsIgnoreCase("assault")) {
            weight = 2;
        } else if (type.equalsIgnoreCase("battery")) {
            weight = 4;
        } else if (type.equalsIgnoreCase("robbery")) {
            weight = 3;
        } else if (type.equalsIgnoreCase("burglary")) {
            weight = 3;
        } else if (type.equalsIgnoreCase("gambling")) {
            weight = 1;
        } else if (type.equalsIgnoreCase("homicide")) {
            weight = 4;
        } else if (type.equalsIgnoreCase("stalking")) {
            weight = 1;
        } else if (type.equalsIgnoreCase("narcotics")) {
            weight = 2;
        } else if (type.equalsIgnoreCase("obscenity")) {
            weight = 1;
        } else if (type.equalsIgnoreCase("kidnapping")) {
            weight = 3;
        } else if (type.equalsIgnoreCase("sex offense")) {
            weight = 3;
        } else if (type.equalsIgnoreCase("intimidation")) {
            weight = 2;
        } else if (type.equalsIgnoreCase("non - criminal")) {
            weight = 1;
        } else if (type.equalsIgnoreCase("prostitution")) {
            weight = 2;
        } else if (type.equalsIgnoreCase("other offense")) {
            weight = 1;
        } else if (type.equalsIgnoreCase("non-criminal")) {
            weight = 1;
        } else if (type.equalsIgnoreCase("criminal damage")) {
            weight = 2;
        } else if (type.equalsIgnoreCase("public indecency")) {
            weight = 2;
        } else if (type.equalsIgnoreCase("criminal trespass")) {
            weight = 2;
        } else if (type.equalsIgnoreCase("human trafficking")) {
            weight = 3;
        } else if (type.equalsIgnoreCase("weapons violation")) {
            weight = 2;
        } else if (type.equalsIgnoreCase("deceptive practice")) {
            weight = 2;
        } else if (type.equalsIgnoreCase("crim sexual assault")) {
            weight = 4;
        } else if (type.equalsIgnoreCase("motor vehicle theft")) {
            weight = 2;
        } else if (type.equalsIgnoreCase("liquor law violation")) {
            weight = 1;
        } else if (type.equalsIgnoreCase("public peace violation")) {
            weight = 1;
        } else if (type.equalsIgnoreCase("other narcotic violation")) {
            weight = 1;
        } else if (type.equalsIgnoreCase("offense involving children")) {
            weight = 3;
        } else if (type.equalsIgnoreCase("interference with public officer")) {
            weight = 1;
        } else if (type.equalsIgnoreCase("concealed carry license violation")) {
            weight = 2;
        }

        if (data[3].trim().startsWith("60"))
            output.collect(new Text(data[3].trim()), new IntWritable(weight));
        else
            output.collect(new Text(data[4].trim()), new IntWritable(weight));
    } else {
        output.collect(new Text("ProBLEMMMMMMMMMMMMMMMMMMMMM"), new IntWritable(1));
    }
}

From source file:FriendsReducer.java

License:Apache License

public void reduce(Text anagramKey, Iterator<Text> anagramValues, OutputCollector<Text, Text> results,
        Reporter reporter) throws IOException {
    String output = "";
    List<String> values1 = Arrays.asList(anagramValues.next().toString().split(","));
    String[] values2 = anagramValues.next().toString().split(",");
    List<String> r = new ArrayList<String>();
    for (String v2 : values2) {
        if (values1.contains(v2))
            r.add(v2);//from ww w. j av a2  s .c o  m
    }

    //StringTokenizer outputTokenizer = new StringTokenizer(output,"~");
    /*      if(outputTokenizer.countTokens()>=2)
          {*/
    //   output = output.replace("~", ",");
    outputKey.set(anagramKey.toString());
    outputValue.set(String.join(",", r));
    results.collect(outputKey, outputValue);
    //}
}

From source file:MinMaxCountReducer.java

@Override
protected void reduce(Text key, Iterable<MinMaxCountTupple> values, Context context)
        throws IOException, InterruptedException {
    //Initialize result
    result.setMax(-100);/*from  ww w .j av a 2s.  c om*/
    result.setMin(100);
    int sum = 0;

    for (MinMaxCountTupple val : values) {

        if (val.getMin() < result.getMin()) {
            result.setMin(val.getMin());
        }

        if (val.getMax() > result.getMax()) {
            result.setMax(val.getMax());
        }

        sum += val.getCount();

    }

    result.setCount(sum);

    Put put = new Put(toBytes(key.toString()));
    put.add(Bytes.toBytes("cf"), toBytes("min"), toBytes(result.getMin()));
    put.add(Bytes.toBytes("cf"), toBytes("max"), toBytes(result.getMax()));
    put.add(Bytes.toBytes("cf"), toBytes("count"), toBytes(result.getCount()));

    System.out.println(
            "R-> " + key.toString() + " " + result.getMin() + " " + result.getMax() + " " + result.getCount());

    context.write(null, put);
}

From source file:WikipediaForwardIndexBuilder.java

License:Apache License

@SuppressWarnings("static-access")
@Override// w w w.  j a va2 s .c  om
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input").create(INPUT_OPTION));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("index file").create(INDEX_FILE_OPTION));
    options.addOption(OptionBuilder.withArgName("en|sv|de|cs|es|zh|ar|tr").hasArg()
            .withDescription("two-letter language code").create(LANGUAGE_OPTION));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(INDEX_FILE_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    Path inputPath = new Path(cmdline.getOptionValue(INPUT_OPTION));
    String indexFile = cmdline.getOptionValue(INDEX_FILE_OPTION);

    String tmpPath = "tmp-" + WikipediaForwardIndexBuilder.class.getSimpleName() + "-" + RANDOM.nextInt(10000);

    if (!inputPath.isAbsolute()) {
        System.err.println("Error: " + INPUT_OPTION + " must be an absolute path!");
        return -1;
    }

    String language = null;
    if (cmdline.hasOption(LANGUAGE_OPTION)) {
        language = cmdline.getOptionValue(LANGUAGE_OPTION);
        if (language.length() != 2) {
            System.err.println("Error: \"" + language + "\" unknown language!");
            return -1;
        }
    }

    JobConf conf = new JobConf(getConf(), WikipediaForwardIndexBuilder.class);
    FileSystem fs = FileSystem.get(conf);

    LOG.info("Tool name: " + this.getClass().getName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - index file: " + indexFile);
    LOG.info(" - language: " + language);
    LOG.info("Note: This tool only works on block-compressed SequenceFiles!");

    conf.setJobName(String.format("BuildWikipediaForwardIndex[%s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath,
            INDEX_FILE_OPTION, indexFile, LANGUAGE_OPTION, language));

    conf.setNumReduceTasks(1);

    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, new Path(tmpPath));
    FileOutputFormat.setCompressOutput(conf, false);

    if (language != null) {
        conf.set("wiki.language", language);
    }

    conf.setInputFormat(NoSplitSequenceFileInputFormat.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapRunnerClass(MyMapRunner.class);
    conf.setReducerClass(IdentityReducer.class);

    // Delete the output directory if it exists already.
    fs.delete(new Path(tmpPath), true);

    RunningJob job = JobClient.runJob(conf);

    Counters counters = job.getCounters();
    int blocks = (int) counters.getCounter(Blocks.Total);

    LOG.info("number of blocks: " + blocks);

    LOG.info("Writing index file...");
    LineReader reader = new LineReader(fs.open(new Path(tmpPath + "/part-00000")));
    FSDataOutputStream out = fs.create(new Path(indexFile), true);

    out.writeUTF(edu.umd.cloud9.collection.wikipedia.WikipediaForwardIndex.class.getCanonicalName());
    out.writeUTF(inputPath.toString());
    out.writeInt(blocks);

    int cnt = 0;
    Text line = new Text();
    while (reader.readLine(line) > 0) {
        String[] arr = line.toString().split("\\s+");

        int docno = Integer.parseInt(arr[0]);
        int offset = Integer.parseInt(arr[1]);
        short fileno = Short.parseShort(arr[2]);

        out.writeInt(docno);
        out.writeInt(offset);
        out.writeShort(fileno);

        cnt++;

        if (cnt % 100000 == 0) {
            LOG.info(cnt + " blocks written");
        }
    }

    reader.close();
    out.close();

    if (cnt != blocks) {
        throw new RuntimeException("Error: mismatch in block count!");
    }

    // Clean up.
    fs.delete(new Path(tmpPath), true);

    return 0;
}

From source file:DistribCountingMapper.java

License:Apache License

@Override
public void map(LongWritable lineNum, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter)
        throws IOException {
    IntWritable one = new IntWritable(1);
    HashSet<String> transactionItems = new HashSet<String>();
    StringTokenizer st = new StringTokenizer(value.toString());
    while (st.hasMoreTokens()) {
        transactionItems.add(st.nextToken());
    }/*from  w w  w.  j a va 2s . co m*/

    Set<Set<String>> powerSet = Sets.powerSet(transactionItems);
    for (Set<String> itemset : powerSet) {
        if (itemset.size() > 0) {
            String[] itemsetArr = new String[itemset.size()];
            itemset.toArray(itemsetArr);
            Arrays.sort(itemsetArr);
            String itemsetStr = "";
            for (int i = 0; i < itemsetArr.length; i++) {
                itemsetStr += itemsetArr[i] + " ";
            }
            output.collect(new Text(itemsetStr), one);
        }
    }
}

From source file:Text2FormatStorageMR.java

License:Open Source License

@SuppressWarnings("unchecked")
public static int readFormatFile(JobConf conf, String inputPath, int lineNum) throws Exception {

    RecordReader<WritableComparable, Writable> currRecReader;

    conf.set("mapred.input.dir", inputPath);

    InputFormat inputFormat = new FormatStorageInputFormat();
    InputSplit[] inputSplits = inputFormat.getSplits(conf, 1);
    if (inputSplits.length == 0) {
        System.out.println("inputSplits is empty");
        return -1;
    }/* w  w w  .j  av a  2  s  .com*/

    currRecReader = inputFormat.getRecordReader(inputSplits[0], conf, Reporter.NULL);

    WritableComparable key;
    Writable value;

    key = currRecReader.createKey();
    value = currRecReader.createValue();

    int num = 0;

    while (true) {
        boolean ret = currRecReader.next(key, value);
        if (ret) {
            Text Line = (Text) key;
            System.out.println(Line.toString());
            num++;
            if (num >= lineNum)
                break;

        } else
            break;
    }

    return 0;
}

From source file:AnagramMapper.java

License:Apache License

public void map(LongWritable key, Text value, OutputCollector<Text, Text> outputCollector, Reporter reporter)
        throws IOException {

    String word = value.toString();
    char[] wordChars = word.toCharArray();
    Arrays.sort(wordChars);/*from w  w  w .j  a v  a 2  s . com*/
    String sortedWord = new String(wordChars);
    sortedText.set(sortedWord);
    orginalText.set(word);
    outputCollector.collect(sortedText, orginalText);
}

From source file:MedianMaper.java

@Override
public void map(LongWritable Key, Text Value, Context context) throws IOException, InterruptedException {
    int YearWeek;
    double Price;
    Configuration conf = context.getConfiguration();
    String Cheapest_Carrier = conf.get("Cheapest_Carrier");
    Calendar Cal = Calendar.getInstance();
    FlightPriceParser FParser = new FlightPriceParser();
    if (!FParser.map(Value.toString())) {
        return;//w w  w. j a  v a 2  s  .co  m
    }
    if (FParser.Carrier != Cheapest_Carrier)
        return;
    Cal.set(FParser.Year, FParser.Month - 1, FParser.DayOfMonth);
    YearWeek = FParser.Year * 100 + Cal.get(Calendar.WEEK_OF_YEAR);
    context.write(new IntWritable(YearWeek), new IntWritable((int) Math.round(FParser.Price * 100)));
}

From source file:FriendsMapper.java

License:Apache License

public void map(LongWritable key, Text value, OutputCollector<Text, Text> outputCollector, Reporter reporter)
        throws IOException {

    // value => A:B,C,D,E

    String[] kv = value.toString().split(":");
    // kv[0] = A / kv[1] = B,C,D,E
    String k = kv[0];/*from   w  w w.  j  a v a2 s. c om*/
    // k = A;

    List<String> vs = Arrays.asList(kv[1].split(","));
    // vs = {B,C,D,E}

    String values = kv[1];

    orginalText.set(values);

    for (String v : vs) { // v = {B,C,D,E}

        List<String> ks = new ArrayList<String>();
        // ks = {}
        ks.add(k);
        // ks = {A}
        ks.add(v);
        // ks = {A,v}
        Collections.sort(ks);

        sortedText.set(String.join(",", ks));
        // A,v
        outputCollector.collect(sortedText, orginalText);
    }

}

From source file:TestString.java

License:Apache License

@Test
public void testStringSubstring() throws Exception {
    Text text = new Text("string");
    Text text1 = new Text();
    Text text2 = new Text();

    long start = System.nanoTime();
    for (int i = 0; i < 100000000; i++) {
        String str = text.toString();
        String str1 = str.substring(0, 2);
        String str2 = str.substring(3, str.length());
        text1.set(str1);//from  w  w w  . ja v a 2  s .  c o m
        text2.set(str2);
    }
    long end = System.nanoTime();
    System.out.println("TextStringSubString");
    System.out.println("text1: " + text1.toString());
    System.out.println("text2: " + text2.toString());
    System.out.println("Elapsed Time: " + (end - start) / 1000000000f + " seconds.");
}