List of usage examples for org.apache.hadoop.io Text Text
public Text(byte[] utf8)
From source file:com.alexholmes.hadooputils.sort.SortRecordReaderTest.java
License:Apache License
@Test public void testSeparator() throws IOException { assertEquals("asd~def", SortRecordReader.extractKey(new Text("asd~def"), 1, // start key 2, // end key "~", // separator false // ignore case ).toString());/*from ww w . ja v a2 s . c o m*/ assertEquals("asd", SortRecordReader.extractKey(new Text("asd~def"), 1, // start key 1, // end key "~", // separator false // ignore case ).toString()); }
From source file:com.alexholmes.hadooputils.sort.SortRecordReaderTest.java
License:Apache License
@Test public void testIgnoreCase() throws IOException { assertEquals("aaabbb", SortRecordReader.extractKey(new Text("aaaBBB"), 1, // start key 1, // end key "~", // separator true // ignore case ).toString());/*from w w w .j a v a 2 s.c o m*/ }
From source file:com.alimama.quanjingmonitor.kmeans.KMeansClusterCombiner.java
License:Apache License
@Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { Cluster clu = clusterMap.get(key.toString()); int limit = 1; if (clu != null) { limit = Math.min(clu.getNumselect() * this.rep * 10, 50000); }/*w w w.j av a2 s .co m*/ if (limit < 1000) { limit = 1000; } PriorityQueue<Text> res = new PriorityQueue<Text>(limit, Collections.reverseOrder(cmp)); for (Text value : values) { if (res.size() < limit) { res.add(new Text(value.toString())); } else if (cmp.compare(res.peek(), new Text(value.toString())) > 0) { res.add(new Text(value.toString())); res.poll(); } } for (Text s : res) { context.write(key, s); } }
From source file:com.alimama.quanjingmonitor.kmeans.KMeansClusterMapper.java
License:Apache License
@Override protected void map(WritableComparable<?> key, Text point, Context context) throws IOException, InterruptedException { Cluster nearestCluster = null;// w w w . j av a 2 s. co m double nearestDistance = Integer.MAX_VALUE; Vector pointv = parse.parseVector(point.toString()); if (pointv == null) { return; } pointv.setNumPoints(1); for (Cluster cluster : clusters) { Vector clusterCenter = cluster.getCenter(); boolean isDeny = clusterCenter.Deny(pointv); if (isDeny) { continue; } double distance = clusterCenter.distiance(pointv); context.getCounter("Clustering", "similar").increment(1); if (distance <= nearestDistance || nearestCluster == null) { nearestCluster = cluster; nearestDistance = distance; } } if (nearestCluster != null) { context.write(new Text(String.valueOf(nearestCluster.getId())), new Text(String.valueOf(nearestDistance) + "@abtest@" + point.toString())); } }
From source file:com.alimama.quanjingmonitor.kmeans.KMeansClusterReduce.java
License:Apache License
@Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { Cluster clu = clusterMap.get(key.toString()); int numberSelect = 1; int limit = 1; if (clu != null) { limit = Math.min(clu.getNumselect() * this.rep * 100, 100000); numberSelect = clu.getNumselect(); System.out.println("key:" + key + "," + numberSelect); } else {/*from w ww . j a v a 2 s . co m*/ System.out.println("can nott found key:" + key); } if (limit < 5000) { limit = 5000; } PriorityQueue<String> res = new PriorityQueue<String>(limit, Collections.reverseOrder(cmp)); for (Text value : values) { if (res.size() < limit) { res.add(value.toString()); } else if (cmp.compare(res.peek(), value.toString()) > 0) { res.add(value.toString()); res.poll(); } } ArrayList<String> list = new ArrayList<String>(res); Collections.sort(list, cmp); comPair[] writelist = new comPair[numberSelect]; int end = list.size(); ArrayList<String> left = new ArrayList<String>(res); for (int i = 0; i < end; i++) { String s = list.get(i); System.out.println("111>>" + s); String[] cols = s.split("@abtest@"); String line = cols[1]; Vector group = parse.parseVector(line); for (int j = 0; j < writelist.length; j++) { if (writelist[j] == null) { comPair p = new comPair(); p.s1 = s; p.v1 = group; writelist[j] = p; s = null; break; } boolean deny = writelist[j].v1.Deny(group); double dis = writelist[j].v1.distiance(group); System.out.println("222>>" + dis); if (!deny && writelist[j].distance > dis) { writelist[j].distance = dis; String s_tmp = writelist[j].s2; Vector group_tmp = writelist[j].v2; writelist[j].s2 = s; writelist[j].v2 = group; s = s_tmp; group = group_tmp; if (s_tmp == null) { break; } } } if (s != null) { left.add(s); } } int end2 = left.size(); for (int i = 0; i < end2; i++) { String s = left.get(i); String[] cols = s.split("@abtest@"); String line = cols[1]; Vector group = parse.parseVector(line); boolean isset = false; for (int j = 0; j < writelist.length; j++) { if (writelist[j] == null || writelist[j].s2 != null) { continue; } double dis = writelist[j].v1.distiance(group); if (writelist[j].distance > dis) { System.out.println("333>>" + s); isset = true; writelist[j].distance = dis; String s_tmp = writelist[j].s2; Vector group_tmp = writelist[j].v2; writelist[j].s2 = s; writelist[j].v2 = group; if (s_tmp == null) { break; } s = s_tmp; group = group_tmp; } } if (!isset) { break; } } for (int i = 0; i < writelist.length; i++) { if (writelist[i] != null && writelist[i].s2 != null) { int rrr = (int) ((Math.random() * 10000) % 2); int rrr2 = (rrr + 1) % 2; System.out.println(writelist[i].toString()); context.write(key, new Text(writelist[i].distance + "\t" + i + "\trep" + rrr + "_1\t" + writelist[i].s1)); context.write(key, new Text(writelist[i].distance + "\t" + i + "\trep" + rrr2 + "_2\t" + writelist[i].s2)); } } }
From source file:com.alimama.quanjingmonitor.kmeans.KMeansGroupMapper.java
License:Apache License
@Override protected void map(WritableComparable<?> key, Text point, Context context) throws IOException, InterruptedException { String line = point.toString(); Vector pointv = parse.parseVector(line); pointv.setNumPoints(1);//from ww w.j a va2 s . co m Cluster clu = new Cluster(pointv, 0); context.write(new Text(String.valueOf(parse.parseKey(line))), clu); }
From source file:com.alimama.quanjingmonitor.kmeans.KMeansMapper.java
License:Apache License
protected void cleanup(Context context) throws IOException, InterruptedException { for (Cluster cluster : clusters) { Vector clusterCenter = cluster.getCenter(); clusterCenter.setNumPoints(0);// w ww . ja v a 2 s .com context.write(new Text(String.valueOf(cluster.getId())), clusterCenter); } }
From source file:com.alimama.quanjingmonitor.kmeans.KMeansMapper.java
License:Apache License
@Override protected void map(WritableComparable<?> key, Text point, Context context) throws IOException, InterruptedException { Cluster nearestCluster = null;// ww w. j a v a2 s . co m double nearestDistance = Double.MAX_VALUE; Vector pointv = parse.parseVector(point.toString()); if (pointv == null) { return; } pointv.setNumPoints(1); for (Cluster cluster : clusters) { Vector clusterCenter = cluster.getCenter(); boolean isDeny = pointv.Deny(clusterCenter); if (isDeny) { continue; } double distance = clusterCenter.distiance(pointv); context.getCounter("Clustering", "similar").increment(1); if (distance <= nearestDistance || nearestCluster == null) { nearestCluster = cluster; nearestDistance = distance; } } if (nearestCluster != null) { context.write(new Text(String.valueOf(nearestCluster.getId())), pointv); } }
From source file:com.asakusafw.dag.compiler.internalio.InternalInputAdapterGeneratorTest.java
License:Apache License
private void check(String... values) { Path path = new Path(new File(temporary.getRoot(), "temp.bin").toURI()); Configuration conf = configurations.newInstance(); try (ModelOutput<Text> out = TemporaryStorage.openOutput(conf, Text.class, path)) { for (String v : values) { out.write(new Text(v)); }/*from w ww .j a va2 s . co m*/ } catch (IOException e) { throw new AssertionError(e); } ClassGeneratorContext gc = context(); Spec spec = new Spec("testing", Collections.singleton(path.toString()), Descriptions.typeOf(Text.class)); ClassDescription gen = add(c -> new InternalInputAdapterGenerator().generate(gc, spec, c)); List<String> results = new ArrayList<>(); loading(gen, c -> { VertexProcessorContext vc = new MockVertexProcessorContext().with(c).withResource(conf) .withResource(new StageInfo("u", "b", "f", "s", "e", Collections.emptyMap())); input(vc, c, o -> { results.add(o.toString()); }); }); assertThat(results, containsInAnyOrder((Object[]) values)); }
From source file:com.asakusafw.dag.compiler.internalio.InternalOutputAdapterGeneratorTest.java
License:Apache License
private void check(String... values) { Path path = new Path(new File(temporary.getRoot(), "part-*").toURI()); Configuration conf = configurations.newInstance(); ClassGeneratorContext gc = context(); Spec spec = new Spec("o", path.toString(), Descriptions.typeOf(Text.class)); ClassDescription gen = add(c -> new InternalOutputAdapterGenerator().generate(gc, Arrays.asList(spec), c)); loading(gen, c -> {/*ww w. ja v a 2s. co m*/ VertexProcessorContext vc = new MockVertexProcessorContext().with(c).withResource(conf) .withResource(new StageInfo("u", "b", "f", "s", "e", Collections.emptyMap())); try (OutputAdapter adapter = adapter(c, vc)) { adapter.initialize(); OutputHandler<? super TaskProcessorContext> handler = adapter.newHandler(); Result<Text> sink = handler.getSink(Text.class, "o"); try (Session session = handler.start(new MockTaskProcessorContext("t"))) { for (String v : values) { sink.add(new Text(v)); } } } }); Set<String> results = new LinkedHashSet<>(); try { List<Path> paths = TemporaryStorage.list(conf, path); Text buf = new Text(); for (Path p : paths) { try (ModelInput<Text> in = TemporaryStorage.openInput(conf, Text.class, p)) { while (in.readTo(buf)) { results.add(buf.toString()); } } } } catch (IOException e) { throw new AssertionError(e); } assertThat(results, containsInAnyOrder(values)); }