Example usage for org.apache.hadoop.io Text Text

List of usage examples for org.apache.hadoop.io Text Text

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text Text.

Prototype

public Text(byte[] utf8) 

Source Link

Document

Construct from a byte array.

Usage

From source file:com.asakusafw.dmdl.directio.csv.driver.CsvFormatEmitterTest.java

License:Apache License

/**
 * with force header.//ww  w.  java 2  s .  c  o m
 * @throws Exception if failed
 */
@Test
public void force_header() throws Exception {
    ModelLoader loaded = generateJava("force_header");
    ModelWrapper model = loaded.newModel("Model");
    BinaryStreamFormat<Object> support = unsafe(loaded.newObject("csv", "ModelCsvFormat"));

    ByteArrayOutputStream output = new ByteArrayOutputStream();
    try (ModelOutput<Object> writer = support.createOutput(model.unwrap().getClass(), "hello", output)) {
        model.set("value", new Text("Hello, world!"));
        writer.write(model.unwrap());
    }

    String[][] results = parse(1, new String(output.toByteArray(), "UTF-8"));
    assertThat(results, is(new String[][] { { "title" }, { "Hello, world!" }, }));
}

From source file:com.asakusafw.dmdl.directio.csv.driver.CsvFormatEmitterTest.java

License:Apache License

/**
 * with implicit field./* w  w  w . j  ava 2  s.c  o m*/
 * @throws Exception if failed
 */
@Test
public void implicit_field_name() throws Exception {
    ModelLoader loaded = generateJava("implicit_field_name");
    ModelWrapper model = loaded.newModel("Model");
    BinaryStreamFormat<Object> support = unsafe(loaded.newObject("csv", "ModelCsvFormat"));

    ByteArrayOutputStream output = new ByteArrayOutputStream();
    try (ModelOutput<Object> writer = support.createOutput(model.unwrap().getClass(), "hello", output)) {
        model.set("value", new Text("Hello, world!"));
        writer.write(model.unwrap());
    }

    String[][] results = parse(1, new String(output.toByteArray(), "UTF-8"));
    assertThat(results, is(new String[][] { { "value" }, { "Hello, world!" }, }));
}

From source file:com.asakusafw.dmdl.directio.csv.driver.CsvFormatEmitterTest.java

License:Apache License

/**
 * with file name./*w w  w.ja  v a2  s . c  om*/
 * @throws Exception if failed
 */
@Test
public void file_name() throws Exception {
    ModelLoader loaded = generateJava("file_name");
    ModelWrapper model = loaded.newModel("Model");
    ModelWrapper buffer = loaded.newModel("Model");
    BinaryStreamFormat<Object> support = unsafe(loaded.newObject("csv", "ModelCsvFormat"));
    assertThat(support.getMinimumFragmentSize(), is(greaterThan(0L)));

    ByteArrayOutputStream output = new ByteArrayOutputStream();
    try (ModelOutput<Object> writer = support.createOutput(model.unwrap().getClass(), "hello", output)) {
        model.set("value", new Text("Hello, world!"));
        writer.write(model.unwrap());
    }

    try (ModelInput<Object> reader = support.createInput(model.unwrap().getClass(), "testing", in(output), 0,
            size(output))) {
        assertThat(reader.readTo(buffer.unwrap()), is(true));
        assertThat(buffer.getOption("value"), is((Object) new StringOption("Hello, world!")));
        assertThat(buffer.getOption("path"), is((Object) new StringOption("testing")));
        assertThat(reader.readTo(buffer.unwrap()), is(false));
    }
}

From source file:com.asakusafw.dmdl.directio.csv.driver.CsvFormatEmitterTest.java

License:Apache License

/**
 * with line number.//from  w w w .  j a  va  2  s . c  o  m
 * @throws Exception if failed
 */
@Test
public void line_number() throws Exception {
    ModelLoader loaded = generateJava("line_number");
    ModelWrapper model = loaded.newModel("Model");
    model.set("value", new Text("Hello\nworld!"));
    ModelWrapper buffer = loaded.newModel("Model");
    BinaryStreamFormat<Object> support = unsafe(loaded.newObject("csv", "ModelCsvFormat"));

    ByteArrayOutputStream output = new ByteArrayOutputStream();
    try (ModelOutput<Object> writer = support.createOutput(model.unwrap().getClass(), "hello", output)) {
        writer.write(model.unwrap());
        writer.write(model.unwrap());
    }

    try (ModelInput<Object> reader = support.createInput(model.unwrap().getClass(), "testing", in(output), 0,
            size(output))) {
        assertThat(reader.readTo(buffer.unwrap()), is(true));
        assertThat(buffer.getOption("value"), is((Object) new StringOption("Hello\nworld!")));
        assertThat(buffer.getOption("number"), is((Object) new IntOption(1)));
        assertThat(reader.readTo(buffer.unwrap()), is(true));
        assertThat(buffer.getOption("value"), is((Object) new StringOption("Hello\nworld!")));
        assertThat(buffer.getOption("number"), is((Object) new IntOption(3)));
        assertThat(reader.readTo(buffer.unwrap()), is(false));
    }
}

From source file:com.asakusafw.dmdl.directio.csv.driver.CsvFormatEmitterTest.java

License:Apache License

/**
 * with record number./*from w  w  w.j a v a  2  s. c o m*/
 * @throws Exception if failed
 */
@Test
public void record_number() throws Exception {
    ModelLoader loaded = generateJava("record_number");
    ModelWrapper model = loaded.newModel("Model");
    model.set("value", new Text("Hello\nworld!"));
    ModelWrapper buffer = loaded.newModel("Model");
    BinaryStreamFormat<Object> support = unsafe(loaded.newObject("csv", "ModelCsvFormat"));

    ByteArrayOutputStream output = new ByteArrayOutputStream();
    try (ModelOutput<Object> writer = support.createOutput(model.unwrap().getClass(), "hello", output)) {
        writer.write(model.unwrap());
        writer.write(model.unwrap());
    }
    try (ModelInput<Object> reader = support.createInput(model.unwrap().getClass(), "testing", in(output), 0,
            size(output))) {
        assertThat(reader.readTo(buffer.unwrap()), is(true));
        assertThat(buffer.getOption("value"), is((Object) new StringOption("Hello\nworld!")));
        assertThat(buffer.getOption("number"), is((Object) new LongOption(1)));
        assertThat(reader.readTo(buffer.unwrap()), is(true));
        assertThat(buffer.getOption("value"), is((Object) new StringOption("Hello\nworld!")));
        assertThat(buffer.getOption("number"), is((Object) new LongOption(2)));
        assertThat(reader.readTo(buffer.unwrap()), is(false));
    }
}

From source file:com.asakusafw.dmdl.directio.csv.driver.CsvFormatEmitterTest.java

License:Apache License

/**
 * with ignored property./*from  w  w  w.j a va2s  .  c om*/
 * @throws Exception if failed
 */
@Test
public void ignore() throws Exception {
    ModelLoader loaded = generateJava("ignore");
    ModelWrapper model = loaded.newModel("Model");
    model.set("value", new Text("Hello, world!"));
    model.set("ignored", new Text("ignored"));
    ModelWrapper buffer = loaded.newModel("Model");
    BinaryStreamFormat<Object> support = unsafe(loaded.newObject("csv", "ModelCsvFormat"));

    ByteArrayOutputStream output = new ByteArrayOutputStream();
    try (ModelOutput<Object> writer = support.createOutput(model.unwrap().getClass(), "hello", output)) {
        writer.write(model.unwrap());
    }
    try (ModelInput<Object> reader = support.createInput(model.unwrap().getClass(), "testing", in(output), 0,
            size(output))) {
        assertThat(reader.readTo(buffer.unwrap()), is(true));
        assertThat(buffer.getOption("value"), is((Object) new StringOption("Hello, world!")));
        assertThat(buffer.getOption("ignored"), is((Object) new StringOption()));
        assertThat(reader.readTo(buffer.unwrap()), is(false));
    }
}

From source file:com.asakusafw.dmdl.directio.csv.driver.CsvFormatEmitterTest.java

License:Apache License

private void fragmentation_attempt(ModelLoader loaded, Random random) throws Exception {
    ModelWrapper model = loaded.newModel("Tuple");
    BinaryStreamFormat<?> support = (BinaryStreamFormat<?>) loaded.newObject("csv", "TupleCsvFormat");

    assertThat(support.getSupportedType(), is((Object) model.unwrap().getClass()));

    BinaryStreamFormat<Object> unsafe = unsafe(support);

    List<Object> expected = new ArrayList<>();
    ByteArrayOutputStream output = new ByteArrayOutputStream();
    try (ModelOutput<Object> writer = unsafe.createOutput(model.unwrap().getClass(), "hello", output)) {
        for (int line = 0; line < 100; line++) {
            ModelWrapper buffer = loaded.newModel("Tuple");
            buffer.set("f1", new Text("f1:" + (line * 1)));
            buffer.set("f2", new Text("f2:" + random.nextInt()));
            buffer.set("f3", new Text("f3:" + random.nextInt()));
            writer.write(buffer.unwrap());
            expected.add(buffer.unwrap());
        }/*from  w  ww .  j av  a2  s. c o  m*/
    }
    byte[] bytes = output.toByteArray();

    for (int attempt = 0; attempt < 100; attempt++) {
        List<Object> actual = new ArrayList<>();
        int[] fragment = new int[random.nextInt(100) + 2];
        fragment[0] = output.size();
        for (int i = 1; i < fragment.length; i++) {
            fragment[i] = random.nextInt(output.size() + 1);
        }
        Arrays.sort(fragment);
        int start = 0;
        for (int i = 0; i < fragment.length; i++) {
            int offset = start;
            int length = fragment[i] - offset;
            InputStream in = new ByteArrayInputStream(bytes, offset, bytes.length - offset);
            in.mark(bytes.length - offset);
            try (ModelInput<Object> reader = unsafe.createInput(model.unwrap().getClass(), "hello", in, offset,
                    length)) {
                while (true) {
                    Object buffer = loaded.newModel("Tuple").unwrap();
                    if (reader.readTo(buffer) == false) {
                        break;
                    }
                    actual.add(buffer);
                }
            } catch (CsvFormatException e) {
                try (InputStream reIn = new ByteArrayInputStream(bytes, offset, bytes.length - offset);
                        InputStream copy = new DelimiterRangeInputStream(reIn, '\n', length, offset > 0)) {
                    System.out.println(copy.read());
                }
                throw new IOException(MessageFormat.format(
                        "attempt={0}, f-offset={1}, f-size={2}, total={3}: [[{4}]]", attempt, offset, length,
                        bytes.length, new String(bytes, offset, length, "UTF-8")), e);
            }
            start = fragment[i];
        }
        assertThat(actual, is(expected));
    }
}

From source file:com.asakusafw.dmdl.directio.csv.driver.CsvFormatEmitterTest.java

License:Apache License

/**
 * fragmentation is restricted./*  w w w .j  a  v  a  2  s.  c o m*/
 * @throws Exception if failed
 */
@Test
public void fragmentation_restricted() throws Exception {
    ModelLoader loaded = generateJava("fragmentation_restricted");
    ModelWrapper model = loaded.newModel("Tuple");
    BinaryStreamFormat<?> support = (BinaryStreamFormat<?>) loaded.newObject("csv", "TupleCsvFormat");
    BinaryStreamFormat<Object> unsafe = unsafe(support);

    model.set("f1", new Text("Hello1"));
    model.set("f2", new Text("Hello1"));
    model.set("f3", new Text("Hello1"));

    ByteArrayOutputStream output = new ByteArrayOutputStream();
    try (ModelOutput<Object> writer = unsafe.createOutput(model.unwrap().getClass(), "hello", output)) {
        writer.write(model.unwrap());
    }
    try {
        unsafe.createInput(model.unwrap().getClass(), "hello", in(output), 1, size(output));
        fail();
    } catch (Exception e) {
        // ok.
    }
}

From source file:com.asakusafw.dmdl.directio.line.driver.LineFormatEmitterTest.java

License:Apache License

/**
 * w/ file name.// w w w. j a  v a2s  . c  o m
 * @throws Exception if failed
 */
@Test
public void file_name() throws Exception {
    ModelLoader loader = generateJavaFromLines(
            new String[] { "@directio.line", "complex = {", "    @directio.line.file_name", "    path : TEXT;",
                    "    @directio.line.body", "    value : TEXT;", "};", });
    BinaryStreamFormat<Object> format = unsafe(loader.newObject("line", "ComplexLineFormat"));
    assertThat(format, is(splittable()));

    byte[] contents = contents(new String[] { "Hello1", "Hello2", "Hello3", }).getBytes("UTF-8");
    try (ModelInput<Object> reader = format.createInput(format.getSupportedType(), "testing",
            new ByteArrayInputStream(contents), 0, contents.length)) {
        ModelWrapper model = loader.newModel("Complex");
        assertThat(reader.readTo(model.unwrap()), is(true));
        assertThat(model.get("value"), is((Object) new Text("Hello1")));
        assertThat(model.get("path"), is((Object) new Text("testing")));
        model.reset();
        assertThat(reader.readTo(model.unwrap()), is(true));
        assertThat(model.get("value"), is((Object) new Text("Hello2")));
        assertThat(model.get("path"), is((Object) new Text("testing")));
        model.reset();
        assertThat(reader.readTo(model.unwrap()), is(true));
        assertThat(model.get("value"), is((Object) new Text("Hello3")));
        assertThat(model.get("path"), is((Object) new Text("testing")));
        model.reset();
        assertThat(reader.readTo(model.unwrap()), is(false));
    }
}

From source file:com.asakusafw.dmdl.directio.line.driver.LineFormatEmitterTest.java

License:Apache License

/**
 * w/ line number.//from w  w w.  j  a  v  a  2  s .  c om
 * @throws Exception if failed
 */
@Test
public void line_number() throws Exception {
    ModelLoader loader = generateJavaFromLines(
            new String[] { "@directio.line", "complex = {", "    @directio.line.line_number",
                    "    line_num : INT;", "    @directio.line.body", "    value : TEXT;", "};", });
    BinaryStreamFormat<Object> format = unsafe(loader.newObject("line", "ComplexLineFormat"));
    assertThat(format, is(not(splittable())));

    byte[] contents = contents(new String[] { "Hello1", "Hello2", "Hello3", }).getBytes("UTF-8");
    try (ModelInput<Object> reader = format.createInput(format.getSupportedType(), "testing",
            new ByteArrayInputStream(contents), 0, contents.length)) {
        ModelWrapper model = loader.newModel("Complex");
        assertThat(reader.readTo(model.unwrap()), is(true));
        assertThat(model.get("value"), is((Object) new Text("Hello1")));
        assertThat(model.get("line_num"), is((Object) 1));
        model.reset();
        assertThat(reader.readTo(model.unwrap()), is(true));
        assertThat(model.get("value"), is((Object) new Text("Hello2")));
        assertThat(model.get("line_num"), is((Object) 2));
        model.reset();
        assertThat(reader.readTo(model.unwrap()), is(true));
        assertThat(model.get("value"), is((Object) new Text("Hello3")));
        assertThat(model.get("line_num"), is((Object) 3));
        model.reset();
        assertThat(reader.readTo(model.unwrap()), is(false));
    }
}