Example usage for org.apache.hadoop.io NullWritable get

List of usage examples for org.apache.hadoop.io NullWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io NullWritable get.

Prototype

public static NullWritable get() 

Source Link

Document

Returns the single instance of this class.

Usage

From source file:com.hotels.corc.mapred.CorcInputFormatTest.java

License:Apache License

@Test
public void readFullyReadSchemaFromSplit() throws IOException {
    StructTypeInfo typeInfo = new StructTypeInfoBuilder().add("a", TypeInfoFactory.stringTypeInfo)
            .add("b", TypeInfoFactory.stringTypeInfo).build();
    CorcInputFormat.setTypeInfo(conf, typeInfo);
    CorcInputFormat.setConverterFactoryClass(conf, DefaultConverterFactory.class);

    RecordReader<NullWritable, Corc> reader = inputFormat.getRecordReader(split, conf, reporter);

    Corc corc = reader.createValue();//from  w  w  w.j a  v  a2s .c  o m

    reader.next(NullWritable.get(), corc);
    assertThat(corc.get("a"), is((Object) "A1"));
    assertThat(corc.get("b"), is((Object) "B1"));
    reader.close();
}

From source file:com.hotels.corc.mapred.CorcInputFormatTest.java

License:Apache License

@Test
public void readFullyDeclaredSchema() throws IOException {
    StructTypeInfo typeInfo = new StructTypeInfoBuilder().add("a", TypeInfoFactory.stringTypeInfo)
            .add("b", TypeInfoFactory.stringTypeInfo).build();
    CorcInputFormat.setTypeInfo(conf, typeInfo);
    CorcInputFormat.setSchemaTypeInfo(conf, typeInfo);
    CorcInputFormat.setConverterFactoryClass(conf, DefaultConverterFactory.class);

    RecordReader<NullWritable, Corc> reader = inputFormat.getRecordReader(split, conf, reporter);

    Corc corc = reader.createValue();//from   w  w w.  ja  va 2s .  c om

    reader.next(NullWritable.get(), corc);
    assertThat(corc.get("a"), is((Object) "A1"));
    assertThat(corc.get("b"), is((Object) "B1"));
    reader.close();
}

From source file:com.hotels.corc.mapred.CorcOutputFormatTest.java

License:Apache License

@Test
public void writer() throws IOException {
    File root = temporaryFolder.getRoot();
    conf.set("mapreduce.output.fileoutputformat.outputdir", root.getCanonicalPath());
    conf.set("mapreduce.task.attempt.id", "attempt_x_0001_m_000001_1");

    String name = "name";
    RecordWriter<NullWritable, Corc> writer = outputFormat.getRecordWriter(fileSystem, conf, name, progress);

    StructTypeInfo typeInfo = new StructTypeInfoBuilder().add("a", TypeInfoFactory.stringTypeInfo).build();

    Corc corc = new Corc(typeInfo, new DefaultConverterFactory());
    corc.set("a", "value");

    writer.write(NullWritable.get(), corc);

    writer.close(reporter);/* w ww .j  a va2s  .c o  m*/

    Path path = new Path(root.getCanonicalPath() + "/_temporary/0/_temporary/attempt_x_0001_m_000001_1/name");
    try (OrcReader reader = new OrcReader(conf, path)) {
        List<Object> next = reader.next();
        assertThat(next.size(), is(1));
        assertThat(next.get(0), is((Object) "value"));
        assertFalse(reader.hasNext());
    }
    ;
}

From source file:com.hotels.corc.mapred.CorcRecordReaderTest.java

License:Apache License

@Test
public void readerNext() throws IOException {
    @SuppressWarnings("unchecked")
    RecordReader<NullWritable, OrcStruct> recordReader = mock(RecordReader.class);
    CorcRecordReader reader = new CorcRecordReader(typeInfo, recordReader, factory);

    Corc corc = mock(Corc.class);

    when(recordReader.next(any(NullWritable.class), any(OrcStruct.class))).thenReturn(true);

    boolean next = reader.next(NullWritable.get(), corc);

    assertTrue(next);//from  w  w  w. j  a v a2s.  c  o m

    verify(corc, never()).setRecordIdentifier(any(RecordIdentifier.class));
}

From source file:com.hotels.corc.mapred.CorcRecordReaderTest.java

License:Apache License

@Test
public void readerNoNext() throws IOException {
    @SuppressWarnings("unchecked")
    RecordReader<NullWritable, OrcStruct> recordReader = mock(RecordReader.class);
    CorcRecordReader reader = new CorcRecordReader(typeInfo, recordReader, factory);

    Corc corc = mock(Corc.class);

    when(recordReader.next(any(NullWritable.class), any(OrcStruct.class))).thenReturn(false);

    boolean next = reader.next(NullWritable.get(), corc);

    assertFalse(next);//w w w  . j a  va2 s.c  o  m

    verify(corc, never()).setRecordIdentifier(any(RecordIdentifier.class));
}

From source file:com.hotels.corc.mapred.CorcRecordReaderTest.java

License:Apache License

@Test
public void readerNextTransactional() throws IOException {
    @SuppressWarnings("unchecked")
    AcidRecordReader<NullWritable, OrcStruct> recordReader = mock(AcidRecordReader.class);
    CorcRecordReader reader = new CorcRecordReader(typeInfo, recordReader, factory);

    Corc corc = mock(Corc.class);

    when(recordReader.next(any(NullWritable.class), any(OrcStruct.class))).thenReturn(true);

    boolean next = reader.next(NullWritable.get(), corc);

    assertTrue(next);// www .  jav  a2  s  .  c o m

    verify(corc).setRecordIdentifier(any(RecordIdentifier.class));
}

From source file:com.hotels.corc.mapred.CorcRecordReaderTest.java

License:Apache License

@Test
public void readerNoNextTransactional() throws IOException {
    @SuppressWarnings("unchecked")
    AcidRecordReader<NullWritable, OrcStruct> recordReader = mock(AcidRecordReader.class);
    CorcRecordReader reader = new CorcRecordReader(typeInfo, recordReader, factory);

    Corc corc = mock(Corc.class);

    when(recordReader.next(any(NullWritable.class), any(OrcStruct.class))).thenReturn(false);

    boolean next = reader.next(NullWritable.get(), corc);

    assertFalse(next);// w w w  .  j  a va  2s . c om

    verify(corc, never()).setRecordIdentifier(any(RecordIdentifier.class));
}

From source file:com.htuple.SecondarySortTest.java

License:Apache License

@Test
public void testSecondarySort() throws IOException {

    Configuration conf = mapReduceDriver.getConfiguration();

    SecondarySort.setupSecondarySort(conf);

    LongWritable dummyMapKey = new LongWritable(1);

    mapReduceDriver.withInput(dummyMapKey, new Text("Smith\tJohn\n"))
            .withInput(dummyMapKey, new Text("Smith\tAnne\n")).withInput(dummyMapKey, new Text("Smith\tKen\n"))
            .withOutput(new Text("Smith\tAnne\n"), NullWritable.get())
            .withOutput(new Text("Smith\tJohn\n"), NullWritable.get())
            .withOutput(new Text("Smith\tKen\n"), NullWritable.get()).runTest(true);
}

From source file:com.hyperiongray.ccmr.s3wordcount.WordCountOnlyMapper.java

License:Apache License

public void map(Object key, Text value, OutputCollector<NullWritable, Text> outputCollector, Reporter reporter)
        throws IOException {

    // We're accessing a publicly available bucket so don't need to fill in
    // our credentials
    ArchiveReader ar;//ww  w  .j a va  2 s .  c o  m
    try {
        S3Service s3s = new RestS3Service(null);

        // Let's grab a file out of the CommonCrawl S3 bucket
        String fn = value.toString();
        logger.info(fn);

        S3Object f = s3s.getObject("aws-publicdatasets", fn, null, null, null, null, null, null);

        // The file name identifies the ArchiveReader and indicates if it
        // should be decompressed
        ar = WARCReaderFactory.get(fn, f.getDataInputStream(), true);

    } catch (ServiceException e) {
        logger.error("S3 connection Failed", e);
        throw new RuntimeException(e);
    }

    // Once we have an ArchiveReader, we can work through each of the
    // records it contains
    int i = 0;
    logger.info("Started" + new Date());
    for (ArchiveRecord r : ar) {

        reporter.progress();
        String url = "";
        try {

            // The header file contains information such as the type of
            // record, size, creation time, and URL
            url = r.getHeader().getUrl();
            String crawledDate = r.getHeader().getDate();
            if (url == null)
                continue;

            // If we want to read the contents of the record, we can use the
            // ArchiveRecord as an InputStream
            // Create a byte array that is as long as all the record's
            // stated length
            OutputStream os = new ByteArrayOutputStream();
            try {
                r.dump(os);
            } finally {
                try {
                    if (r != null)
                        r.close();
                } catch (Exception e) {
                    logger.error("reading inputstream Failed", e);
                }
            }
            // Note: potential optimization would be to have a large buffer
            // only allocated once

            // Why don't we convert it to a string and print the start of
            // it?
            String content = new String(os.toString());

            Map<String, Integer> matches = contentMatcher.matchContent(content);
            int score = contentMatcher.score(matches);

            if (score > LOWER_SCORE_THRESHOLD) {

                logger.info("****************************************");
                logger.info("URL: " + url + " Score: " + score + " Detail: " + matches);
                // outputCollector.collect(new IntWritable(score), new Text(url));
                outputCollector.collect(NullWritable.get(), new Text(outputParser
                        .parse(contentMatcher.getTitle(content), url, crawledDate, score, matches)));
            }

            logger.debug(new Integer(i).toString());

            if (i++ > sampleSize) {
                logger.info("Finished " + new Date());
                break;
            }

        } catch (Exception e) {
            logger.error("url failed " + url, e);
        }
    }

}

From source file:com.ibm.bi.dml.runtime.matrix.sort.SamplingSortMRInputFormat.java

License:Open Source License

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * @param conf the job to sample/*from  w  w  w.  j  a v  a2s . c o  m*/
 * @param partFile where to write the output file to
 * @throws IOException if something goes wrong
* @throws IllegalAccessException 
* @throws InstantiationException 
 */
@SuppressWarnings({ "unchecked", "unused", "deprecation" })
public static int writePartitionFile(JobConf conf, Path partFile)
        throws IOException, InstantiationException, IllegalAccessException {
    SamplingSortMRInputFormat inFormat = new SamplingSortMRInputFormat();
    Sampler sampler = new Sampler();

    Class<? extends WritableComparable> targetKeyClass;
    targetKeyClass = (Class<? extends WritableComparable>) conf.getClass(TARGET_KEY_CLASS,
            WritableComparable.class);
    //get input converter information
    int brlen = MRJobConfiguration.getNumRowsPerBlock(conf, (byte) 0);
    int bclen = MRJobConfiguration.getNumColumnsPerBlock(conf, (byte) 0);

    //indicate whether the matrix value in this mapper is a matrix cell or a matrix block
    int partitions = conf.getNumReduceTasks();

    long sampleSize = conf.getLong(SAMPLE_SIZE, 1000);
    InputSplit[] splits = inFormat.getSplits(conf, conf.getNumMapTasks());
    int samples = Math.min(10, splits.length);
    long recordsPerSample = sampleSize / samples;
    int sampleStep = splits.length / samples;
    // take N samples from different parts of the input

    int totalcount = 0;
    for (int i = 0; i < samples; ++i) {
        SequenceFileRecordReader reader = (SequenceFileRecordReader) inFormat
                .getRecordReader(splits[sampleStep * i], conf, null);
        int count = 0;
        WritableComparable key = (WritableComparable) reader.createKey();
        Writable value = (Writable) reader.createValue();
        while (reader.next(key, value) && count < recordsPerSample) {
            Converter inputConverter = MRJobConfiguration.getInputConverter(conf, (byte) 0);
            inputConverter.setBlockSize(brlen, bclen);
            inputConverter.convert(key, value);
            while (inputConverter.hasNext()) {
                Pair pair = inputConverter.next();
                if (pair.getKey() instanceof DoubleWritable) {
                    sampler.addValue(new DoubleWritable(((DoubleWritable) pair.getKey()).get()));
                } else if (pair.getValue() instanceof MatrixCell) {
                    sampler.addValue(new DoubleWritable(((MatrixCell) pair.getValue()).getValue()));
                } else
                    throw new IOException("SamplingSortMRInputFormat unsupported key/value class: "
                            + pair.getKey().getClass() + ":" + pair.getValue().getClass());

                count++;
            }
            key = (WritableComparable) reader.createKey();
            value = (Writable) reader.createValue();
        }
        totalcount += count;
    }

    if (totalcount == 0) //empty input files
        sampler.addValue(new DoubleWritable(0));

    FileSystem outFs = partFile.getFileSystem(conf);
    if (outFs.exists(partFile)) {
        outFs.delete(partFile, false);
    }

    //note: key value always double/null as expected by partitioner
    SequenceFile.Writer writer = SequenceFile.createWriter(outFs, conf, partFile, DoubleWritable.class,
            NullWritable.class);
    NullWritable nullValue = NullWritable.get();
    int index0 = -1, i = 0;
    boolean lessthan0 = true;
    for (WritableComparable splitValue : sampler.createPartitions(partitions)) {
        writer.append(splitValue, nullValue);
        if (lessthan0 && ((DoubleWritable) splitValue).get() >= 0) {
            index0 = i;
            lessthan0 = false;
        }
        i++;
    }
    if (lessthan0)
        index0 = partitions - 1;
    writer.close();

    return index0;
}