Example usage for org.apache.hadoop.io Text Text

List of usage examples for org.apache.hadoop.io Text Text

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text Text.

Prototype

public Text(byte[] utf8) 

Source Link

Document

Construct from a byte array.

Usage

From source file:com.alexholmes.hadooputils.sort.SortRecordReaderTest.java

License:Apache License

@Test
public void testSeparator() throws IOException {
    assertEquals("asd~def", SortRecordReader.extractKey(new Text("asd~def"), 1, // start key
            2, // end key
            "~", // separator
            false // ignore case
    ).toString());/*from ww  w . ja  v  a2  s . c  o m*/

    assertEquals("asd", SortRecordReader.extractKey(new Text("asd~def"), 1, // start key
            1, // end key
            "~", // separator
            false // ignore case
    ).toString());
}

From source file:com.alexholmes.hadooputils.sort.SortRecordReaderTest.java

License:Apache License

@Test
public void testIgnoreCase() throws IOException {
    assertEquals("aaabbb", SortRecordReader.extractKey(new Text("aaaBBB"), 1, // start key
            1, // end key
            "~", // separator
            true // ignore case
    ).toString());/*from w w w .j a  v a  2 s.c o m*/
}

From source file:com.alimama.quanjingmonitor.kmeans.KMeansClusterCombiner.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
    Cluster clu = clusterMap.get(key.toString());
    int limit = 1;
    if (clu != null) {
        limit = Math.min(clu.getNumselect() * this.rep * 10, 50000);

    }/*w w w.j  av  a2 s  .co m*/
    if (limit < 1000) {
        limit = 1000;
    }

    PriorityQueue<Text> res = new PriorityQueue<Text>(limit, Collections.reverseOrder(cmp));
    for (Text value : values) {

        if (res.size() < limit) {
            res.add(new Text(value.toString()));
        } else if (cmp.compare(res.peek(), new Text(value.toString())) > 0) {
            res.add(new Text(value.toString()));
            res.poll();
        }

    }
    for (Text s : res) {
        context.write(key, s);

    }
}

From source file:com.alimama.quanjingmonitor.kmeans.KMeansClusterMapper.java

License:Apache License

@Override
protected void map(WritableComparable<?> key, Text point, Context context)
        throws IOException, InterruptedException {

    Cluster nearestCluster = null;// w w w . j  av  a 2 s.  co  m
    double nearestDistance = Integer.MAX_VALUE;
    Vector pointv = parse.parseVector(point.toString());
    if (pointv == null) {
        return;
    }
    pointv.setNumPoints(1);
    for (Cluster cluster : clusters) {
        Vector clusterCenter = cluster.getCenter();
        boolean isDeny = clusterCenter.Deny(pointv);
        if (isDeny) {
            continue;
        }
        double distance = clusterCenter.distiance(pointv);
        context.getCounter("Clustering", "similar").increment(1);

        if (distance <= nearestDistance || nearestCluster == null) {
            nearestCluster = cluster;
            nearestDistance = distance;
        }
    }
    if (nearestCluster != null) {
        context.write(new Text(String.valueOf(nearestCluster.getId())),
                new Text(String.valueOf(nearestDistance) + "@abtest@" + point.toString()));
    }

}

From source file:com.alimama.quanjingmonitor.kmeans.KMeansClusterReduce.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
    Cluster clu = clusterMap.get(key.toString());
    int numberSelect = 1;
    int limit = 1;
    if (clu != null) {
        limit = Math.min(clu.getNumselect() * this.rep * 100, 100000);
        numberSelect = clu.getNumselect();
        System.out.println("key:" + key + "," + numberSelect);

    } else {/*from   w  ww  .  j a  v a 2  s .  co  m*/
        System.out.println("can nott found key:" + key);
    }

    if (limit < 5000) {
        limit = 5000;
    }

    PriorityQueue<String> res = new PriorityQueue<String>(limit, Collections.reverseOrder(cmp));
    for (Text value : values) {

        if (res.size() < limit) {
            res.add(value.toString());
        } else if (cmp.compare(res.peek(), value.toString()) > 0) {
            res.add(value.toString());
            res.poll();
        }
    }

    ArrayList<String> list = new ArrayList<String>(res);
    Collections.sort(list, cmp);

    comPair[] writelist = new comPair[numberSelect];
    int end = list.size();

    ArrayList<String> left = new ArrayList<String>(res);

    for (int i = 0; i < end; i++) {
        String s = list.get(i);
        System.out.println("111>>" + s);
        String[] cols = s.split("@abtest@");
        String line = cols[1];
        Vector group = parse.parseVector(line);
        for (int j = 0; j < writelist.length; j++) {
            if (writelist[j] == null) {
                comPair p = new comPair();
                p.s1 = s;
                p.v1 = group;
                writelist[j] = p;
                s = null;
                break;
            }

            boolean deny = writelist[j].v1.Deny(group);
            double dis = writelist[j].v1.distiance(group);
            System.out.println("222>>" + dis);

            if (!deny && writelist[j].distance > dis) {
                writelist[j].distance = dis;
                String s_tmp = writelist[j].s2;
                Vector group_tmp = writelist[j].v2;
                writelist[j].s2 = s;
                writelist[j].v2 = group;
                s = s_tmp;
                group = group_tmp;
                if (s_tmp == null) {
                    break;
                }

            }
        }

        if (s != null) {
            left.add(s);
        }
    }

    int end2 = left.size();

    for (int i = 0; i < end2; i++) {
        String s = left.get(i);
        String[] cols = s.split("@abtest@");
        String line = cols[1];
        Vector group = parse.parseVector(line);
        boolean isset = false;
        for (int j = 0; j < writelist.length; j++) {
            if (writelist[j] == null || writelist[j].s2 != null) {
                continue;
            }

            double dis = writelist[j].v1.distiance(group);
            if (writelist[j].distance > dis) {
                System.out.println("333>>" + s);
                isset = true;
                writelist[j].distance = dis;
                String s_tmp = writelist[j].s2;
                Vector group_tmp = writelist[j].v2;
                writelist[j].s2 = s;
                writelist[j].v2 = group;
                if (s_tmp == null) {
                    break;
                }
                s = s_tmp;
                group = group_tmp;
            }
        }

        if (!isset) {
            break;
        }
    }

    for (int i = 0; i < writelist.length; i++) {
        if (writelist[i] != null && writelist[i].s2 != null) {
            int rrr = (int) ((Math.random() * 10000) % 2);
            int rrr2 = (rrr + 1) % 2;
            System.out.println(writelist[i].toString());
            context.write(key,
                    new Text(writelist[i].distance + "\t" + i + "\trep" + rrr + "_1\t" + writelist[i].s1));
            context.write(key,
                    new Text(writelist[i].distance + "\t" + i + "\trep" + rrr2 + "_2\t" + writelist[i].s2));
        }
    }

}

From source file:com.alimama.quanjingmonitor.kmeans.KMeansGroupMapper.java

License:Apache License

@Override
protected void map(WritableComparable<?> key, Text point, Context context)
        throws IOException, InterruptedException {

    String line = point.toString();
    Vector pointv = parse.parseVector(line);
    pointv.setNumPoints(1);//from ww  w.j  a  va2  s  . co m

    Cluster clu = new Cluster(pointv, 0);
    context.write(new Text(String.valueOf(parse.parseKey(line))), clu);

}

From source file:com.alimama.quanjingmonitor.kmeans.KMeansMapper.java

License:Apache License

protected void cleanup(Context context) throws IOException, InterruptedException {
    for (Cluster cluster : clusters) {
        Vector clusterCenter = cluster.getCenter();
        clusterCenter.setNumPoints(0);//  w ww . ja  v  a  2 s  .com
        context.write(new Text(String.valueOf(cluster.getId())), clusterCenter);
    }
}

From source file:com.alimama.quanjingmonitor.kmeans.KMeansMapper.java

License:Apache License

@Override
protected void map(WritableComparable<?> key, Text point, Context context)
        throws IOException, InterruptedException {

    Cluster nearestCluster = null;//  ww w. j a v a2  s .  co m
    double nearestDistance = Double.MAX_VALUE;
    Vector pointv = parse.parseVector(point.toString());
    if (pointv == null) {
        return;
    }
    pointv.setNumPoints(1);
    for (Cluster cluster : clusters) {
        Vector clusterCenter = cluster.getCenter();

        boolean isDeny = pointv.Deny(clusterCenter);
        if (isDeny) {
            continue;
        }
        double distance = clusterCenter.distiance(pointv);
        context.getCounter("Clustering", "similar").increment(1);

        if (distance <= nearestDistance || nearestCluster == null) {
            nearestCluster = cluster;
            nearestDistance = distance;
        }
    }
    if (nearestCluster != null) {
        context.write(new Text(String.valueOf(nearestCluster.getId())), pointv);
    }

}

From source file:com.asakusafw.dag.compiler.internalio.InternalInputAdapterGeneratorTest.java

License:Apache License

private void check(String... values) {
    Path path = new Path(new File(temporary.getRoot(), "temp.bin").toURI());
    Configuration conf = configurations.newInstance();
    try (ModelOutput<Text> out = TemporaryStorage.openOutput(conf, Text.class, path)) {
        for (String v : values) {
            out.write(new Text(v));
        }/*from  w ww .j a  va2 s .  co m*/
    } catch (IOException e) {
        throw new AssertionError(e);
    }

    ClassGeneratorContext gc = context();
    Spec spec = new Spec("testing", Collections.singleton(path.toString()), Descriptions.typeOf(Text.class));
    ClassDescription gen = add(c -> new InternalInputAdapterGenerator().generate(gc, spec, c));

    List<String> results = new ArrayList<>();
    loading(gen, c -> {
        VertexProcessorContext vc = new MockVertexProcessorContext().with(c).withResource(conf)
                .withResource(new StageInfo("u", "b", "f", "s", "e", Collections.emptyMap()));
        input(vc, c, o -> {
            results.add(o.toString());
        });
    });
    assertThat(results, containsInAnyOrder((Object[]) values));
}

From source file:com.asakusafw.dag.compiler.internalio.InternalOutputAdapterGeneratorTest.java

License:Apache License

private void check(String... values) {
    Path path = new Path(new File(temporary.getRoot(), "part-*").toURI());
    Configuration conf = configurations.newInstance();

    ClassGeneratorContext gc = context();
    Spec spec = new Spec("o", path.toString(), Descriptions.typeOf(Text.class));
    ClassDescription gen = add(c -> new InternalOutputAdapterGenerator().generate(gc, Arrays.asList(spec), c));
    loading(gen, c -> {/*ww  w. ja  v  a  2s.  co m*/
        VertexProcessorContext vc = new MockVertexProcessorContext().with(c).withResource(conf)
                .withResource(new StageInfo("u", "b", "f", "s", "e", Collections.emptyMap()));
        try (OutputAdapter adapter = adapter(c, vc)) {
            adapter.initialize();
            OutputHandler<? super TaskProcessorContext> handler = adapter.newHandler();
            Result<Text> sink = handler.getSink(Text.class, "o");
            try (Session session = handler.start(new MockTaskProcessorContext("t"))) {
                for (String v : values) {
                    sink.add(new Text(v));
                }
            }
        }
    });

    Set<String> results = new LinkedHashSet<>();
    try {
        List<Path> paths = TemporaryStorage.list(conf, path);
        Text buf = new Text();
        for (Path p : paths) {
            try (ModelInput<Text> in = TemporaryStorage.openInput(conf, Text.class, p)) {
                while (in.readTo(buf)) {
                    results.add(buf.toString());
                }
            }
        }
    } catch (IOException e) {
        throw new AssertionError(e);
    }
    assertThat(results, containsInAnyOrder(values));
}