List of usage examples for org.apache.hadoop.io Text Text
public Text(byte[] utf8)
From source file:crimeScoreMapper.java
public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { StringTokenizer tokenizer = new StringTokenizer(value.toString(), ","); if (tokenizer.countTokens() != 0) { String[] data = new String[tokenizer.countTokens()]; int i = 0; while (tokenizer.hasMoreTokens()) { data[i] = tokenizer.nextToken(); i++;// ww w. j a v a 2 s. com } String type = data[2]; type = type.trim(); int weight = 0; if (type.equalsIgnoreCase("arson")) { weight = 2; } else if (type.equalsIgnoreCase("theft")) { weight = 3; } else if (type.equalsIgnoreCase("assault")) { weight = 2; } else if (type.equalsIgnoreCase("battery")) { weight = 4; } else if (type.equalsIgnoreCase("robbery")) { weight = 3; } else if (type.equalsIgnoreCase("burglary")) { weight = 3; } else if (type.equalsIgnoreCase("gambling")) { weight = 1; } else if (type.equalsIgnoreCase("homicide")) { weight = 4; } else if (type.equalsIgnoreCase("stalking")) { weight = 1; } else if (type.equalsIgnoreCase("narcotics")) { weight = 2; } else if (type.equalsIgnoreCase("obscenity")) { weight = 1; } else if (type.equalsIgnoreCase("kidnapping")) { weight = 3; } else if (type.equalsIgnoreCase("sex offense")) { weight = 3; } else if (type.equalsIgnoreCase("intimidation")) { weight = 2; } else if (type.equalsIgnoreCase("non - criminal")) { weight = 1; } else if (type.equalsIgnoreCase("prostitution")) { weight = 2; } else if (type.equalsIgnoreCase("other offense")) { weight = 1; } else if (type.equalsIgnoreCase("non-criminal")) { weight = 1; } else if (type.equalsIgnoreCase("criminal damage")) { weight = 2; } else if (type.equalsIgnoreCase("public indecency")) { weight = 2; } else if (type.equalsIgnoreCase("criminal trespass")) { weight = 2; } else if (type.equalsIgnoreCase("human trafficking")) { weight = 3; } else if (type.equalsIgnoreCase("weapons violation")) { weight = 2; } else if (type.equalsIgnoreCase("deceptive practice")) { weight = 2; } else if (type.equalsIgnoreCase("crim sexual assault")) { weight = 4; } else if (type.equalsIgnoreCase("motor vehicle theft")) { weight = 2; } else if (type.equalsIgnoreCase("liquor law violation")) { weight = 1; } else if (type.equalsIgnoreCase("public peace violation")) { weight = 1; } else if (type.equalsIgnoreCase("other narcotic violation")) { weight = 1; } else if (type.equalsIgnoreCase("offense involving children")) { weight = 3; } else if (type.equalsIgnoreCase("interference with public officer")) { weight = 1; } else if (type.equalsIgnoreCase("concealed carry license violation")) { weight = 2; } if (data[3].trim().startsWith("60")) output.collect(new Text(data[3].trim()), new IntWritable(weight)); else output.collect(new Text(data[4].trim()), new IntWritable(weight)); } else { output.collect(new Text("ProBLEMMMMMMMMMMMMMMMMMMMMM"), new IntWritable(1)); } }
From source file:TorrentWebExtracter.java
License:Apache License
@Override public void map(LongWritable key, WarcRecord value, Context context) throws IOException, InterruptedException { context.setStatus(Counters.CURRENT_RECORD + ": " + key.get()); //Record2Hashcode r1 = new Record2Hashcode(); Record2Torrent rt = new Record2Torrent(value); String hex = rt.getHEXhash(); if (!"".equals(hex) && hex != null) { context.getCounter(Counters.NUM_HTTP_RESPONSE_RECORDS).increment(1); context.write(new Text(hex), new Text(rt.getContent())); }//from w ww . j a va 2 s .c om }
From source file:DistribCountingReducer.java
License:Apache License
@Override public void reduce(Text itemset, Iterator<IntWritable> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { int sum = 0;/* w ww .j a v a2s . c o m*/ while (values.hasNext()) { sum += values.next().get(); } if (sum >= datasetSize * minFreqPercent / 100) { double freq = ((double) sum) / datasetSize; output.collect(itemset, new Text((new Double(freq)).toString())); } }
From source file:ParallelSplitter.java
License:Apache License
/** * @param args/*from w ww. ja v a2s .com*/ */ public static void main(String[] args) throws Exception { if (args.length != 7) { System.err.println("Usage : " + ParallelSplitter.class.getName() + " <instance> <zoo keepers> <table> <user> <pass> <num threads> <file>"); System.exit(-1); } String instance = args[0]; String zooKeepers = args[1]; String table = args[2]; String user = args[3]; String pass = args[4]; int numThreads = Integer.parseInt(args[5]); String file = args[6]; TreeSet<Text> splits = new TreeSet<Text>(); Scanner scanner = new Scanner(new File(file)); while (scanner.hasNextLine()) { splits.add(new Text(scanner.nextLine())); } ZooKeeperInstance zki = new ZooKeeperInstance(instance, zooKeepers); Connector conn = zki.getConnector(user, pass); addSplits(conn, table, splits, numThreads); }
From source file:TestGapDeduce.java
License:Apache License
@Test public void testMapper() { mapDriver.withInput(new Text("sanford"), new Text("sage")); mapDriver.withOutput(new Text("sage"), new Text("sanford")); mapDriver.runTest();/*from www. java 2 s . c om*/ }
From source file:TestGapDeduce.java
License:Apache License
@Test public void testReducer() { List<Text> values = new ArrayList<Text>(); values.add(new Text("sage")); values.add(new Text("ian")); values.add(new Text("sage")); reduceDriver.withInput(new Text("sanford"), values); reduceDriver.withOutput(new Text("sanford"), new Text("[ian, sage]")); reduceDriver.runTest();// ww w .java 2s . c o m }
From source file:DistribCountingMapper.java
License:Apache License
@Override public void map(LongWritable lineNum, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { IntWritable one = new IntWritable(1); HashSet<String> transactionItems = new HashSet<String>(); StringTokenizer st = new StringTokenizer(value.toString()); while (st.hasMoreTokens()) { transactionItems.add(st.nextToken()); }/*from ww w . j av a2 s. c o m*/ Set<Set<String>> powerSet = Sets.powerSet(transactionItems); for (Set<String> itemset : powerSet) { if (itemset.size() > 0) { String[] itemsetArr = new String[itemset.size()]; itemset.toArray(itemsetArr); Arrays.sort(itemsetArr); String itemsetStr = ""; for (int i = 0; i < itemsetArr.length; i++) { itemsetStr += itemsetArr[i] + " "; } output.collect(new Text(itemsetStr), one); } } }
From source file:FlintHadoopTest.java
License:Apache License
/** * Test the Map class/*from ww w. ja v a2s. c om*/ * @throws IOException * @throws InstantiationException * @throws IllegalAccessException */ @Test public void testMap() throws IOException, InstantiationException, IllegalAccessException { mapDriver.withInput(new LongWritable(0), new Text(testPdf1Path)); assertOutputMatchesRecord(mapDriver.run().get(0), testPdf1CheckResult, testPdf1Name); }
From source file:Importer.java
License:Open Source License
public static Text hash(Text content) throws Exception { StringBuilder sb = new StringBuilder(); sb.append("post_"); MessageDigest md = MessageDigest.getInstance("MD5"); md.update(content.getBytes(), 0, content.getLength()); byte[] bytes = md.digest(); for (int i = 0; i < bytes.length; ++i) { if ((bytes[i] & 0xF0) == 0) sb.append('0'); sb.append(Integer.toHexString(0xFF & bytes[i])); }/*from www.j a v a2s. c om*/ return new Text(sb.toString()); }
From source file:FIMReducer.java
License:Apache License
@Override public void reduce(IntWritable key, Iterator<Text> values, OutputCollector<Text, DoubleWritable> output, Reporter reporter) throws IOException { long startTime = System.currentTimeMillis(); if (!set) {//w ww . j a va2 s. c o m reporter.incrCounter("FIMReducerStart", String.valueOf(id), startTime); reporter.incrCounter("FIMReducerEnd", String.valueOf(id), startTime); set = true; } // This is a very crappy way of checking whether we got the // right number of transactions. It may not be too inefficient // though. ArrayList<Text> transactions = new ArrayList<Text>(sampleSize); while (values.hasNext()) { Text trans = new Text(values.next().toString()); transactions.add(trans); } if (sampleSize != transactions.size()) { System.out.println("WRONG NUMBER OF TRANSACTIONS!"); } System.out.println("samplesize: " + sampleSize + " received: " + transactions.size()); FPgrowth.mineFrequentItemsets(transactions.iterator(), transactions.size(), minFreqPercent - (epsilon * 50), output); long endTime = System.currentTimeMillis(); reporter.incrCounter("FIMReducerEnd", String.valueOf(id), endTime - startTime); }