Example usage for org.apache.hadoop.io Text Text

List of usage examples for org.apache.hadoop.io Text Text

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text Text.

Prototype

public Text(byte[] utf8) 

Source Link

Document

Construct from a byte array.

Usage

From source file:com.asakusafw.dmdl.directio.line.driver.LineFormatEmitterTest.java

License:Apache License

/**
 * w/ line number.//ww  w  .ja v  a  2s .c  o m
 * @throws Exception if failed
 */
@Test
public void line_number_long() throws Exception {
    ModelLoader loader = generateJavaFromLines(
            new String[] { "@directio.line", "complex = {", "    @directio.line.line_number",
                    "    line_num : LONG;", "    @directio.line.body", "    value : TEXT;", "};", });
    BinaryStreamFormat<Object> format = unsafe(loader.newObject("line", "ComplexLineFormat"));
    assertThat(format, is(not(splittable())));

    byte[] contents = contents(new String[] { "Hello1", "Hello2", "Hello3", }).getBytes("UTF-8");
    try (ModelInput<Object> reader = format.createInput(format.getSupportedType(), "testing",
            new ByteArrayInputStream(contents), 0, contents.length)) {
        ModelWrapper model = loader.newModel("Complex");
        assertThat(reader.readTo(model.unwrap()), is(true));
        assertThat(model.get("value"), is((Object) new Text("Hello1")));
        assertThat(model.get("line_num"), is((Object) 1L));
        model.reset();
        assertThat(reader.readTo(model.unwrap()), is(true));
        assertThat(model.get("value"), is((Object) new Text("Hello2")));
        assertThat(model.get("line_num"), is((Object) 2L));
        model.reset();
        assertThat(reader.readTo(model.unwrap()), is(true));
        assertThat(model.get("value"), is((Object) new Text("Hello3")));
        assertThat(model.get("line_num"), is((Object) 3L));
        model.reset();
        assertThat(reader.readTo(model.unwrap()), is(false));
    }
}

From source file:com.asakusafw.dmdl.directio.line.driver.LineFormatEmitterTest.java

License:Apache License

/**
 * test for writer.//from   w  w  w.  j a va 2 s  .c  o  m
 * @throws Exception if failed
 */
@Test
public void writer() throws Exception {
    ModelLoader loader = generateJavaFromLines(
            new String[] { "@directio.line", "simple = { value : TEXT; };", });
    BinaryStreamFormat<Object> format = unsafe(loader.newObject("line", "SimpleLineFormat"));
    ByteArrayOutputStream contents = new ByteArrayOutputStream();
    try (ModelOutput<Object> writer = writer(format, contents)) {
        ModelWrapper model = loader.newModel("Simple");
        model.set("value", new Text("Hello1"));
        writer.write(model.unwrap());
        model.set("value", new Text("Hello2"));
        writer.write(model.unwrap());
        model.set("value", new Text("Hello3"));
        writer.write(model.unwrap());
    }
    assertThat(new String(contents.toByteArray(), "UTF-8"),
            is(contents(new String[] { "Hello1", "Hello2", "Hello3", })));
}

From source file:com.asakusafw.dmdl.directio.line.driver.LineFormatEmitterTest.java

License:Apache License

/**
 * test for reader./*from ww  w  .  j  a v a  2  s  . c o  m*/
 * @throws Exception if failed
 */
@Test
public void reader() throws Exception {
    ModelLoader loader = generateJavaFromLines(
            new String[] { "@directio.line", "simple = { value : TEXT; };", });
    BinaryStreamFormat<Object> format = unsafe(loader.newObject("line", "SimpleLineFormat"));
    ModelInput<Object> reader = reader(format,
            contents(new String[] { "Hello1", "Hello2", "Hello3", }).getBytes("UTF-8"));

    ModelWrapper model = loader.newModel("Simple");
    assertThat(reader.readTo(model.unwrap()), is(true));
    assertThat(model.get("value"), is((Object) new Text("Hello1")));
    assertThat(reader.readTo(model.unwrap()), is(true));
    assertThat(model.get("value"), is((Object) new Text("Hello2")));
    assertThat(reader.readTo(model.unwrap()), is(true));
    assertThat(model.get("value"), is((Object) new Text("Hello3")));
    assertThat(reader.readTo(model.unwrap()), is(false));
}

From source file:com.asakusafw.dmdl.directio.line.driver.LineFormatEmitterTest.java

License:Apache License

/**
 * test for writer w/ MS932.//from ww w. j av a2 s. c om
 * @throws Exception if failed
 */
@Test
public void writer_ms932() throws Exception {
    ModelLoader loader = generateJavaFromLines(
            new String[] { "@directio.line(charset=\"MS932\")", "simple = { value : TEXT; };", });
    BinaryStreamFormat<Object> format = unsafe(loader.newObject("line", "SimpleLineFormat"));
    ByteArrayOutputStream contents = new ByteArrayOutputStream();
    try (ModelOutput<Object> writer = writer(format, contents)) {
        ModelWrapper model = loader.newModel("Simple");
        model.set("value", new Text(HELLO_JP + "1"));
        writer.write(model.unwrap());
        model.set("value", new Text(HELLO_JP + "2"));
        writer.write(model.unwrap());
        model.set("value", new Text(HELLO_JP + "3"));
        writer.write(model.unwrap());
    }
    assertThat(new String(contents.toByteArray(), "MS932"),
            is(contents(new String[] { HELLO_JP + "1", HELLO_JP + "2", HELLO_JP + "3", })));
}

From source file:com.asakusafw.dmdl.directio.line.driver.LineFormatEmitterTest.java

License:Apache License

/**
 * test for reader w/ MS932./*  ww w  . ja v  a 2  s  .c  o m*/
 * @throws Exception if failed
 */
@Test
public void reader_ms932() throws Exception {
    ModelLoader loader = generateJavaFromLines(
            new String[] { "@directio.line(charset=\"MS932\")", "simple = { value : TEXT; };", });
    BinaryStreamFormat<Object> format = unsafe(loader.newObject("line", "SimpleLineFormat"));
    ModelInput<Object> reader = reader(format,
            contents(new String[] { HELLO_JP + "1", HELLO_JP + "2", HELLO_JP + "3", }).getBytes("MS932"));

    ModelWrapper model = loader.newModel("Simple");
    assertThat(reader.readTo(model.unwrap()), is(true));
    assertThat(model.get("value"), is((Object) new Text(HELLO_JP + "1")));
    assertThat(reader.readTo(model.unwrap()), is(true));
    assertThat(model.get("value"), is((Object) new Text(HELLO_JP + "2")));
    assertThat(reader.readTo(model.unwrap()), is(true));
    assertThat(model.get("value"), is((Object) new Text(HELLO_JP + "3")));
    assertThat(reader.readTo(model.unwrap()), is(false));
}

From source file:com.asakusafw.dmdl.directio.line.driver.LineFormatEmitterTest.java

License:Apache License

/**
 * test for restoring compression.//from   w  w  w  .j  av a2s  .  c o m
 * @throws Exception if failed
 */
@Test
public void compression_restore() throws Exception {
    ModelLoader loader = generateJavaFromLines(
            new String[] { "@directio.line(compression=\"gzip\")", "simple = { value : TEXT; };", });
    BinaryStreamFormat<Object> format = unsafe(loader.newObject("line", "SimpleLineFormat"));
    ByteArrayOutputStream contents = new ByteArrayOutputStream();
    try (ModelOutput<Object> writer = writer(format, contents)) {
        ModelWrapper model = loader.newModel("Simple");
        model.set("value", new Text("Hello1"));
        writer.write(model.unwrap());
        model.set("value", new Text("Hello2"));
        writer.write(model.unwrap());
        model.set("value", new Text("Hello3"));
        writer.write(model.unwrap());
    }
    try (InputStream input = new GZIPInputStream(new ByteArrayInputStream(contents.toByteArray()));
            Scanner scanner = new Scanner(new InputStreamReader(input, "UTF-8"));) {
        assertThat(scanner.hasNextLine(), is(true));
        assertThat(scanner.nextLine(), is("Hello1"));
        assertThat(scanner.hasNextLine(), is(true));
        assertThat(scanner.nextLine(), is("Hello2"));
        assertThat(scanner.hasNextLine(), is(true));
        assertThat(scanner.nextLine(), is("Hello3"));
        assertThat(scanner.hasNextLine(), is(false));
    }
}

From source file:com.asakusafw.dmdl.directio.line.driver.LineFormatEmitterTest.java

License:Apache License

private void check(ModelLoader loader, String name, BinaryStreamFormat<Object> format)
        throws IOException, InterruptedException {
    ModelWrapper model = loader.newModel(name);
    assertThat(format.getSupportedType(), equalTo((Object) model.getModelClass()));

    model.set("value", new Text("Hello, world!"));

    ByteArrayOutputStream output = new ByteArrayOutputStream();
    try (ModelOutput<Object> writer = writer(format, output)) {
        writer.write(model.unwrap());//from w w w. ja  v a2  s  .c  om
    }
    Object buffer = loader.newModel(name).unwrap();
    try (ModelInput<Object> reader = reader(format, output.toByteArray())) {
        assertThat(reader.readTo(buffer), is(true));
        assertThat(buffer, is(model.unwrap()));
        assertThat(reader.readTo(buffer), is(false));
        assertThat(buffer, is(model.unwrap()));
    }
}

From source file:com.asakusafw.dmdl.directio.sequencefile.driver.SequenceFileFormatEmitterTest.java

License:Apache License

/**
 * A simple case./*from  w w  w  . j a va  2 s  .c  o  m*/
 * @throws Exception if failed
 */
@Test
public void simple() throws Exception {
    File tempFile = folder.newFile("tempfile");
    Path path = new Path(tempFile.toURI());

    ModelLoader loaded = generateJava("simple");
    ModelWrapper model = loaded.newModel("Simple");
    DataFormat<?> support = (DataFormat<?>) loaded.newObject("sequencefile", "SimpleSequenceFileFormat");
    assertThat(support, is(instanceOf(Configurable.class)));
    Thread.currentThread().setContextClassLoader(support.getClass().getClassLoader());

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(tempFile.toURI(), conf);
    if (support instanceof Configurable) {
        ((Configurable) support).setConf(conf);
    }

    assertThat(support.getSupportedType(), is((Object) model.unwrap().getClass()));

    HadoopFileFormat<Object> unsafe = unsafe(support);

    model.set("value", new Text("Hello, world!"));

    try (ModelOutput<Object> writer = unsafe.createOutput(model.unwrap().getClass(), fs, path, new Counter())) {
        writer.write(model.unwrap());
    }

    try (ModelInput<Object> reader = unsafe.createInput(model.unwrap().getClass(), fs, path, 0,
            fs.getFileStatus(path).getLen(), new Counter())) {
        Object buffer = loaded.newModel("Simple").unwrap();
        assertThat(reader.readTo(buffer), is(true));
        assertThat(buffer, is(buffer));
        assertThat(reader.readTo(buffer), is(false));
    }
}

From source file:com.asakusafw.dmdl.directio.tsv.driver.TsvFormatEmitterTest.java

License:Apache License

/**
 * simple testing./*from  w ww .  j  av a2s .  com*/
 * @throws Exception if failed
 */
@Test
public void simple() throws Exception {
    ModelLoader loaded = generateJava("simple");
    ModelWrapper model = loaded.newModel("Simple");
    BinaryStreamFormat<?> support = (BinaryStreamFormat<?>) loaded.newObject("tsv", "SimpleTsvFormat");

    assertThat(support.getSupportedType(), is((Object) model.unwrap().getClass()));

    BinaryStreamFormat<Object> unsafe = unsafe(support);
    assertThat(unsafe, is(not(instanceOf(Configurable.class))));
    assertThat(unsafe.getMinimumFragmentSize(), greaterThan(0L));

    model.set("value", new Text("Hello, world!"));

    ByteArrayOutputStream output = new ByteArrayOutputStream();
    try (ModelOutput<Object> writer = unsafe.createOutput(model.unwrap().getClass(), "hello", output)) {
        writer.write(model.unwrap());
    }

    Object buffer = loaded.newModel("Simple").unwrap();
    try (ModelInput<Object> reader = unsafe.createInput(model.unwrap().getClass(), "hello", in(output), 0,
            size(output))) {
        assertThat(reader.readTo(buffer), is(true));
        assertThat(buffer, is(model.unwrap()));
        assertThat(reader.readTo(buffer), is(false));
    }
}

From source file:com.asakusafw.dmdl.directio.tsv.driver.TsvFormatEmitterTest.java

License:Apache License

/**
 * All types./*from   w  w  w. j a  v a 2 s . c  o  m*/
 * @throws Exception if failed
 */
@Test
public void types() throws Exception {
    ModelLoader loaded = generateJava("types");
    ModelWrapper model = loaded.newModel("Types");
    BinaryStreamFormat<?> support = (BinaryStreamFormat<?>) loaded.newObject("tsv", "TypesTsvFormat");
    assertThat(support.getSupportedType(), is((Object) model.unwrap().getClass()));

    ModelWrapper empty = loaded.newModel("Types");

    ModelWrapper all = loaded.newModel("Types");
    all.set("c_int", 100);
    all.set("c_text", new Text("Hello, DMDL world!"));
    all.set("c_boolean", true);
    all.set("c_byte", (byte) 64);
    all.set("c_short", (short) 1023);
    all.set("c_long", 100000L);
    all.set("c_float", 1.5f);
    all.set("c_double", 2.5f);
    all.set("c_decimal", new BigDecimal("3.1415"));
    all.set("c_date", new Date(2011, 9, 1));
    all.set("c_datetime", new DateTime(2011, 12, 31, 23, 59, 59));

    BinaryStreamFormat<Object> unsafe = unsafe(support);

    ByteArrayOutputStream output = new ByteArrayOutputStream();
    try (ModelOutput<Object> writer = unsafe.createOutput(model.unwrap().getClass(), "hello", output)) {
        writer.write(empty.unwrap());
        writer.write(all.unwrap());
    }

    Object buffer = loaded.newModel("Types").unwrap();
    try (ModelInput<Object> reader = unsafe.createInput(model.unwrap().getClass(), "hello", in(output), 0,
            size(output))) {
        assertThat(reader.readTo(buffer), is(true));
        assertThat(buffer, is(empty.unwrap()));
        assertThat(reader.readTo(buffer), is(true));
        assertThat(buffer, is(all.unwrap()));
        assertThat(reader.readTo(buffer), is(false));
    }
}