Example usage for org.apache.hadoop.io Text Text

List of usage examples for org.apache.hadoop.io Text Text

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text Text.

Prototype

public Text(byte[] utf8) 

Source Link

Document

Construct from a byte array.

Usage

From source file:crimeScoreMapper.java

public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter)
        throws IOException {

    StringTokenizer tokenizer = new StringTokenizer(value.toString(), ",");
    if (tokenizer.countTokens() != 0) {
        String[] data = new String[tokenizer.countTokens()];
        int i = 0;
        while (tokenizer.hasMoreTokens()) {
            data[i] = tokenizer.nextToken();
            i++;//  ww w.  j a  v  a 2  s. com
        }
        String type = data[2];
        type = type.trim();
        int weight = 0;
        if (type.equalsIgnoreCase("arson")) {
            weight = 2;
        } else if (type.equalsIgnoreCase("theft")) {
            weight = 3;
        } else if (type.equalsIgnoreCase("assault")) {
            weight = 2;
        } else if (type.equalsIgnoreCase("battery")) {
            weight = 4;
        } else if (type.equalsIgnoreCase("robbery")) {
            weight = 3;
        } else if (type.equalsIgnoreCase("burglary")) {
            weight = 3;
        } else if (type.equalsIgnoreCase("gambling")) {
            weight = 1;
        } else if (type.equalsIgnoreCase("homicide")) {
            weight = 4;
        } else if (type.equalsIgnoreCase("stalking")) {
            weight = 1;
        } else if (type.equalsIgnoreCase("narcotics")) {
            weight = 2;
        } else if (type.equalsIgnoreCase("obscenity")) {
            weight = 1;
        } else if (type.equalsIgnoreCase("kidnapping")) {
            weight = 3;
        } else if (type.equalsIgnoreCase("sex offense")) {
            weight = 3;
        } else if (type.equalsIgnoreCase("intimidation")) {
            weight = 2;
        } else if (type.equalsIgnoreCase("non - criminal")) {
            weight = 1;
        } else if (type.equalsIgnoreCase("prostitution")) {
            weight = 2;
        } else if (type.equalsIgnoreCase("other offense")) {
            weight = 1;
        } else if (type.equalsIgnoreCase("non-criminal")) {
            weight = 1;
        } else if (type.equalsIgnoreCase("criminal damage")) {
            weight = 2;
        } else if (type.equalsIgnoreCase("public indecency")) {
            weight = 2;
        } else if (type.equalsIgnoreCase("criminal trespass")) {
            weight = 2;
        } else if (type.equalsIgnoreCase("human trafficking")) {
            weight = 3;
        } else if (type.equalsIgnoreCase("weapons violation")) {
            weight = 2;
        } else if (type.equalsIgnoreCase("deceptive practice")) {
            weight = 2;
        } else if (type.equalsIgnoreCase("crim sexual assault")) {
            weight = 4;
        } else if (type.equalsIgnoreCase("motor vehicle theft")) {
            weight = 2;
        } else if (type.equalsIgnoreCase("liquor law violation")) {
            weight = 1;
        } else if (type.equalsIgnoreCase("public peace violation")) {
            weight = 1;
        } else if (type.equalsIgnoreCase("other narcotic violation")) {
            weight = 1;
        } else if (type.equalsIgnoreCase("offense involving children")) {
            weight = 3;
        } else if (type.equalsIgnoreCase("interference with public officer")) {
            weight = 1;
        } else if (type.equalsIgnoreCase("concealed carry license violation")) {
            weight = 2;
        }

        if (data[3].trim().startsWith("60"))
            output.collect(new Text(data[3].trim()), new IntWritable(weight));
        else
            output.collect(new Text(data[4].trim()), new IntWritable(weight));
    } else {
        output.collect(new Text("ProBLEMMMMMMMMMMMMMMMMMMMMM"), new IntWritable(1));
    }
}

From source file:TorrentWebExtracter.java

License:Apache License

@Override
public void map(LongWritable key, WarcRecord value, Context context) throws IOException, InterruptedException {
    context.setStatus(Counters.CURRENT_RECORD + ": " + key.get());

    //Record2Hashcode r1 = new Record2Hashcode();
    Record2Torrent rt = new Record2Torrent(value);
    String hex = rt.getHEXhash();

    if (!"".equals(hex) && hex != null) {
        context.getCounter(Counters.NUM_HTTP_RESPONSE_RECORDS).increment(1);
        context.write(new Text(hex), new Text(rt.getContent()));
    }//from  w ww . j  a va  2 s  .c  om
}

From source file:DistribCountingReducer.java

License:Apache License

@Override
public void reduce(Text itemset, Iterator<IntWritable> values, OutputCollector<Text, Text> output,
        Reporter reporter) throws IOException {
    int sum = 0;/* w ww  .j  a  v a2s .  c o  m*/
    while (values.hasNext()) {
        sum += values.next().get();
    }

    if (sum >= datasetSize * minFreqPercent / 100) {
        double freq = ((double) sum) / datasetSize;
        output.collect(itemset, new Text((new Double(freq)).toString()));
    }
}

From source file:ParallelSplitter.java

License:Apache License

/**
 * @param args/*from w  ww. ja  v a2s  .com*/
 */
public static void main(String[] args) throws Exception {
    if (args.length != 7) {
        System.err.println("Usage : " + ParallelSplitter.class.getName()
                + " <instance> <zoo keepers> <table> <user> <pass> <num threads> <file>");
        System.exit(-1);
    }

    String instance = args[0];
    String zooKeepers = args[1];
    String table = args[2];
    String user = args[3];
    String pass = args[4];
    int numThreads = Integer.parseInt(args[5]);
    String file = args[6];

    TreeSet<Text> splits = new TreeSet<Text>();

    Scanner scanner = new Scanner(new File(file));
    while (scanner.hasNextLine()) {
        splits.add(new Text(scanner.nextLine()));
    }

    ZooKeeperInstance zki = new ZooKeeperInstance(instance, zooKeepers);
    Connector conn = zki.getConnector(user, pass);

    addSplits(conn, table, splits, numThreads);

}

From source file:TestGapDeduce.java

License:Apache License

@Test
public void testMapper() {
    mapDriver.withInput(new Text("sanford"), new Text("sage"));
    mapDriver.withOutput(new Text("sage"), new Text("sanford"));
    mapDriver.runTest();/*from   www.  java  2  s . c  om*/
}

From source file:TestGapDeduce.java

License:Apache License

@Test
public void testReducer() {
    List<Text> values = new ArrayList<Text>();
    values.add(new Text("sage"));
    values.add(new Text("ian"));
    values.add(new Text("sage"));
    reduceDriver.withInput(new Text("sanford"), values);
    reduceDriver.withOutput(new Text("sanford"), new Text("[ian, sage]"));
    reduceDriver.runTest();// ww  w .java 2s  . c o m
}

From source file:DistribCountingMapper.java

License:Apache License

@Override
public void map(LongWritable lineNum, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter)
        throws IOException {
    IntWritable one = new IntWritable(1);
    HashSet<String> transactionItems = new HashSet<String>();
    StringTokenizer st = new StringTokenizer(value.toString());
    while (st.hasMoreTokens()) {
        transactionItems.add(st.nextToken());
    }/*from ww w .  j av  a2 s. c o m*/

    Set<Set<String>> powerSet = Sets.powerSet(transactionItems);
    for (Set<String> itemset : powerSet) {
        if (itemset.size() > 0) {
            String[] itemsetArr = new String[itemset.size()];
            itemset.toArray(itemsetArr);
            Arrays.sort(itemsetArr);
            String itemsetStr = "";
            for (int i = 0; i < itemsetArr.length; i++) {
                itemsetStr += itemsetArr[i] + " ";
            }
            output.collect(new Text(itemsetStr), one);
        }
    }
}

From source file:FlintHadoopTest.java

License:Apache License

/**
 * Test the Map class/*from ww w.  ja v a2s. c om*/
 * @throws IOException
 * @throws InstantiationException
 * @throws IllegalAccessException
 */
@Test
public void testMap() throws IOException, InstantiationException, IllegalAccessException {
    mapDriver.withInput(new LongWritable(0), new Text(testPdf1Path));
    assertOutputMatchesRecord(mapDriver.run().get(0), testPdf1CheckResult, testPdf1Name);
}

From source file:Importer.java

License:Open Source License

public static Text hash(Text content) throws Exception {
    StringBuilder sb = new StringBuilder();
    sb.append("post_");

    MessageDigest md = MessageDigest.getInstance("MD5");

    md.update(content.getBytes(), 0, content.getLength());
    byte[] bytes = md.digest();
    for (int i = 0; i < bytes.length; ++i) {
        if ((bytes[i] & 0xF0) == 0)
            sb.append('0');
        sb.append(Integer.toHexString(0xFF & bytes[i]));
    }/*from  www.j a  v  a2s.  c om*/
    return new Text(sb.toString());
}

From source file:FIMReducer.java

License:Apache License

@Override
public void reduce(IntWritable key, Iterator<Text> values, OutputCollector<Text, DoubleWritable> output,
        Reporter reporter) throws IOException {
    long startTime = System.currentTimeMillis();
    if (!set) {//w  ww  . j  a  va2  s.  c o  m
        reporter.incrCounter("FIMReducerStart", String.valueOf(id), startTime);
        reporter.incrCounter("FIMReducerEnd", String.valueOf(id), startTime);
        set = true;
    }

    // This is a very crappy way of checking whether we got the
    // right number of transactions. It may not be too inefficient
    // though.
    ArrayList<Text> transactions = new ArrayList<Text>(sampleSize);
    while (values.hasNext()) {
        Text trans = new Text(values.next().toString());
        transactions.add(trans);
    }
    if (sampleSize != transactions.size()) {
        System.out.println("WRONG NUMBER OF TRANSACTIONS!");
    }
    System.out.println("samplesize: " + sampleSize + " received: " + transactions.size());
    FPgrowth.mineFrequentItemsets(transactions.iterator(), transactions.size(), minFreqPercent - (epsilon * 50),
            output);

    long endTime = System.currentTimeMillis();
    reporter.incrCounter("FIMReducerEnd", String.valueOf(id), endTime - startTime);
}