Example usage for org.apache.hadoop.io NullWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io NullWritable get.

Prototype

public static NullWritable get()

Source Link

Document

Returns the single instance of this class.

Usage

From source file:org.apache.hama.pipes.util.SequenceFileDumper.java

License:Apache License

public static void main(String[] args) throws Exception {
    CommandLineParser cli = new CommandLineParser();
    if (args.length == 0) {
        cli.printUsage();// w w  w .  j a v a2s .  co  m
        return;
    }

    // Add arguments
    cli.addOption("file", false, "The Sequence File containing the Clusters", "path");
    cli.addOption("output", false, "The output file.  If not specified, dumps to the console", "path");
    cli.addOption("substring", false, "The number of chars of the FormatString() to print", "number");
    cli.addOption("count", false, "Report the count only", "number");

    Parser parser = cli.createParser();
    try {
        HamaConfiguration conf = new HamaConfiguration();
        CommandLine cmdLine = parser.parse(cli.options, args);

        if (cmdLine.hasOption("file")) {
            Path path = new Path(cmdLine.getOptionValue("file"));

            FileSystem fs = FileSystem.get(path.toUri(), conf);
            if (!fs.isFile(path)) {
                System.out.println("File does not exist: " + path.toString());
                return;
            }
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);

            Writer writer;
            if (cmdLine.hasOption("output")) {
                writer = new FileWriter(cmdLine.getOptionValue("output"));
            } else {
                writer = new OutputStreamWriter(System.out);
            }

            writer.append("Input Path: ").append(String.valueOf(path)).append(LINE_SEP);

            int sub = Integer.MAX_VALUE;
            if (cmdLine.hasOption("substring")) {
                sub = Integer.parseInt(cmdLine.getOptionValue("substring"));
            }

            Writable key;
            if (reader.getKeyClass() != NullWritable.class) {
                key = (Writable) reader.getKeyClass().newInstance();
            } else {
                key = NullWritable.get();
            }
            Writable value;
            if (reader.getValueClass() != NullWritable.class) {
                value = (Writable) reader.getValueClass().newInstance();
            } else {
                value = NullWritable.get();
            }

            writer.append("Key class: ").append(String.valueOf(reader.getKeyClass())).append(" Value Class: ")
                    .append(String.valueOf(value.getClass())).append(LINE_SEP);
            writer.flush();

            long count = 0;
            boolean countOnly = cmdLine.hasOption("count");
            if (countOnly == false) {
                while (reader.next(key, value)) {
                    writer.append("Key: ").append(String.valueOf(key));
                    String str = value.toString();
                    writer.append(": Value: ").append(str.length() > sub ? str.substring(0, sub) : str);
                    writer.write(LINE_SEP);
                    writer.flush();
                    count++;
                }
                writer.append("Count: ").append(String.valueOf(count)).append(LINE_SEP);

            } else { // count only
                while (reader.next(key, value)) {
                    count++;
                }
                writer.append("Count: ").append(String.valueOf(count)).append(LINE_SEP);
            }
            writer.flush();

            if (cmdLine.hasOption("output")) {
                writer.close();
            }
            reader.close();

        } else {
            cli.printUsage();
        }

    } catch (ParseException e) {
        LOG.error(e.getMessage());
        cli.printUsage();
        return;
    }
}

From source file:org.apache.hawq.pxf.plugins.hdfs.AvroFileAccessor.java

License:Apache License

/**
 * readNextObject/* w w  w.  j ava2  s. com*/
 * The AVRO accessor is currently the only specialized accessor that
 * overrides this method. This happens, because the special
 * AvroRecordReader.next() semantics (use of the AvroWrapper), so it
 * cannot use the RecordReader's default implementation in
 * SplittableFileAccessor
 */
@Override
public OneRow readNextObject() throws IOException {
    /** Resetting datum to null, to avoid stale bytes to be padded from the previous row's datum */
    avroWrapper.datum(null);
    if (reader.next(avroWrapper, NullWritable.get())) { // There is one more record in the current split.
        return new OneRow(null, avroWrapper.datum());
    } else if (getNextSplit()) { // The current split is exhausted. try to move to the next split.
        return reader.next(avroWrapper, NullWritable.get()) ? new OneRow(null, avroWrapper.datum()) : null;
    }

    // if neither condition was met, it means we already read all the records in all the splits, and
    // in this call record variable was not set, so we return null and thus we are signaling end of
    // records sequence - in this case avroWrapper.datum() will be null
    return null;
}

From source file:org.apache.hcatalog.mapreduce.FileRecordWriterContainer.java

License:Apache License

@Override
public void write(WritableComparable<?> key, HCatRecord value) throws IOException, InterruptedException {

    org.apache.hadoop.mapred.RecordWriter localWriter;
    ObjectInspector localObjectInspector;
    SerDe localSerDe;//ww w .  ja  v a2  s.  c o  m
    OutputJobInfo localJobInfo = null;

    if (dynamicPartitioningUsed) {
        // calculate which writer to use from the remaining values - this needs to be done before we delete cols
        List<String> dynamicPartValues = new ArrayList<String>();
        for (Integer colToAppend : dynamicPartCols) {
            dynamicPartValues.add(value.get(colToAppend).toString());
        }

        String dynKey = dynamicPartValues.toString();
        if (!baseDynamicWriters.containsKey(dynKey)) {
            if ((maxDynamicPartitions != -1) && (baseDynamicWriters.size() > maxDynamicPartitions)) {
                throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS,
                        "Number of dynamic partitions being created "
                                + "exceeds configured max allowable partitions[" + maxDynamicPartitions
                                + "], increase parameter [" + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname
                                + "] if needed.");
            }

            org.apache.hadoop.mapred.TaskAttemptContext currTaskContext = HCatMapRedUtil
                    .createTaskAttemptContext(context);
            configureDynamicStorageHandler(currTaskContext, dynamicPartValues);
            localJobInfo = HCatBaseOutputFormat.getJobInfo(currTaskContext);

            //setup serDe
            SerDe currSerDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(),
                    currTaskContext.getJobConf());
            try {
                InternalUtil.initializeOutputSerDe(currSerDe, currTaskContext.getConfiguration(), localJobInfo);
            } catch (SerDeException e) {
                throw new IOException("Failed to initialize SerDe", e);
            }

            //create base OutputFormat
            org.apache.hadoop.mapred.OutputFormat baseOF = ReflectionUtils
                    .newInstance(storageHandler.getOutputFormatClass(), currTaskContext.getJobConf());

            //We are skipping calling checkOutputSpecs() for each partition
            //As it can throw a FileAlreadyExistsException when more than one mapper is writing to a partition
            //See HCATALOG-490, also to avoid contacting the namenode for each new FileOutputFormat instance
            //In general this should be ok for most FileOutputFormat implementations
            //but may become an issue for cases when the method is used to perform other setup tasks

            //get Output Committer
            org.apache.hadoop.mapred.OutputCommitter baseOutputCommitter = currTaskContext.getJobConf()
                    .getOutputCommitter();
            //create currJobContext the latest so it gets all the config changes
            org.apache.hadoop.mapred.JobContext currJobContext = HCatMapRedUtil
                    .createJobContext(currTaskContext);
            //setupJob()
            baseOutputCommitter.setupJob(currJobContext);
            //recreate to refresh jobConf of currTask context
            currTaskContext = HCatMapRedUtil.createTaskAttemptContext(currJobContext.getJobConf(),
                    currTaskContext.getTaskAttemptID(), currTaskContext.getProgressible());
            //set temp location
            currTaskContext.getConfiguration().set("mapred.work.output.dir",
                    new FileOutputCommitter(new Path(localJobInfo.getLocation()), currTaskContext).getWorkPath()
                            .toString());
            //setupTask()
            baseOutputCommitter.setupTask(currTaskContext);

            Path parentDir = new Path(currTaskContext.getConfiguration().get("mapred.work.output.dir"));
            Path childPath = new Path(parentDir, FileOutputFormat.getUniqueFile(currTaskContext, "part", ""));

            org.apache.hadoop.mapred.RecordWriter baseRecordWriter = baseOF.getRecordWriter(
                    parentDir.getFileSystem(currTaskContext.getConfiguration()), currTaskContext.getJobConf(),
                    childPath.toString(), InternalUtil.createReporter(currTaskContext));

            baseDynamicWriters.put(dynKey, baseRecordWriter);
            baseDynamicSerDe.put(dynKey, currSerDe);
            baseDynamicCommitters.put(dynKey, baseOutputCommitter);
            dynamicContexts.put(dynKey, currTaskContext);
            dynamicObjectInspectors.put(dynKey,
                    InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()));
            dynamicOutputJobInfo.put(dynKey, HCatOutputFormat.getJobInfo(dynamicContexts.get(dynKey)));
        }

        localJobInfo = dynamicOutputJobInfo.get(dynKey);
        localWriter = baseDynamicWriters.get(dynKey);
        localSerDe = baseDynamicSerDe.get(dynKey);
        localObjectInspector = dynamicObjectInspectors.get(dynKey);
    } else {
        localJobInfo = jobInfo;
        localWriter = getBaseRecordWriter();
        localSerDe = serDe;
        localObjectInspector = objectInspector;
    }

    for (Integer colToDel : partColsToDel) {
        value.remove(colToDel);
    }

    //The key given by user is ignored
    try {
        localWriter.write(NullWritable.get(), localSerDe.serialize(value.getAll(), localObjectInspector));
    } catch (SerDeException e) {
        throw new IOException("Failed to serialize object", e);
    }
}

From source file:org.apache.hcatalog.mapreduce.TestHCatHiveThriftCompatibility.java

License:Apache License

@Before
@Override/*from  w w w. j  ava  2s . c o m*/
public void setUp() throws Exception {
    super.setUp();
    if (setUpComplete) {
        return;
    }

    ByteArrayOutputStream out = new ByteArrayOutputStream();
    TIOStreamTransport transport = new TIOStreamTransport(out);
    TBinaryProtocol protocol = new TBinaryProtocol(transport);

    IntString intString = new IntString(1, "one", 1);
    intString.write(protocol);
    BytesWritable bytesWritable = new BytesWritable(out.toByteArray());

    intStringSeq = new Path(TEST_DATA_DIR + "/data/intString.seq");
    LOG.info("Creating data file: " + intStringSeq);

    SequenceFile.Writer seqFileWriter = SequenceFile.createWriter(intStringSeq.getFileSystem(hiveConf),
            hiveConf, intStringSeq, NullWritable.class, BytesWritable.class);
    seqFileWriter.append(NullWritable.get(), bytesWritable);
    seqFileWriter.close();

    setUpComplete = true;
}

From source file:org.apache.hcatalog.mapreduce.TestHCatInputFormat.java

License:Apache License

/**
 * Create an input sequence file with 100 records; every 10th record is bad.
 * Load this table into Hive./*from   w  w  w  .  ja  va2 s  . c  o m*/
 */
@Before
@Override
public void setUp() throws Exception {
    super.setUp();
    if (setUpComplete) {
        return;
    }

    Path intStringSeq = new Path(TEST_DATA_DIR + "/data/intString.seq");
    LOG.info("Creating data file: " + intStringSeq);
    SequenceFile.Writer seqFileWriter = SequenceFile.createWriter(intStringSeq.getFileSystem(hiveConf),
            hiveConf, intStringSeq, NullWritable.class, BytesWritable.class);

    ByteArrayOutputStream out = new ByteArrayOutputStream();
    TIOStreamTransport transport = new TIOStreamTransport(out);
    TBinaryProtocol protocol = new TBinaryProtocol(transport);

    for (int i = 1; i <= 100; i++) {
        if (i % 10 == 0) {
            seqFileWriter.append(NullWritable.get(), new BytesWritable("bad record".getBytes()));
        } else {
            out.reset();
            IntString intString = new IntString(i, Integer.toString(i), i);
            intString.write(protocol);
            BytesWritable bytesWritable = new BytesWritable(out.toByteArray());
            seqFileWriter.append(NullWritable.get(), bytesWritable);
        }
    }

    seqFileWriter.close();

    // Now let's load this file into a new Hive table.
    Assert.assertEquals(0, driver.run("drop table if exists test_bad_records").getResponseCode());
    Assert.assertEquals(0,
            driver.run("create table test_bad_records "
                    + "row format serde 'org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer' "
                    + "with serdeproperties ( "
                    + "  'serialization.class'='org.apache.hadoop.hive.serde2.thrift.test.IntString', "
                    + "  'serialization.format'='org.apache.thrift.protocol.TBinaryProtocol') " + "stored as"
                    + "  inputformat 'org.apache.hadoop.mapred.SequenceFileInputFormat'"
                    + "  outputformat 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'")
                    .getResponseCode());
    Assert.assertEquals(0,
            driver.run("load data local inpath '" + intStringSeq.getParent() + "' into table test_bad_records")
                    .getResponseCode());

    setUpComplete = true;
}

From source file:org.apache.hive.hcatalog.mapreduce.FileRecordWriterContainer.java

License:Apache License

@Override
public void write(WritableComparable<?> key, HCatRecord value) throws IOException, InterruptedException {
    LocalFileWriter localFileWriter = getLocalFileWriter(value);
    RecordWriter localWriter = localFileWriter.getLocalWriter();
    ObjectInspector localObjectInspector = localFileWriter.getLocalObjectInspector();
    SerDe localSerDe = localFileWriter.getLocalSerDe();
    OutputJobInfo localJobInfo = localFileWriter.getLocalJobInfo();

    for (Integer colToDel : partColsToDel) {
        value.remove(colToDel);//from www .java 2s  . c  o m
    }

    // The key given by user is ignored
    try {
        localWriter.write(NullWritable.get(), localSerDe.serialize(value.getAll(), localObjectInspector));
    } catch (SerDeException e) {
        throw new IOException("Failed to serialize object", e);
    }
}

From source file:org.apache.hive.jdbc.BaseJdbcWithMiniLlap.java

License:Apache License

private int processQuery(String currentDatabase, String query, int numSplits, RowProcessor rowProcessor)
        throws Exception {
    String url = miniHS2.getJdbcURL();
    String user = System.getProperty("user.name");
    String pwd = user;/*from ww w. j a  v  a 2 s  .  com*/
    String handleId = UUID.randomUUID().toString();

    InputFormat<NullWritable, Row> inputFormat = getInputFormat();

    // Get splits
    JobConf job = new JobConf(conf);
    job.set(LlapBaseInputFormat.URL_KEY, url);
    job.set(LlapBaseInputFormat.USER_KEY, user);
    job.set(LlapBaseInputFormat.PWD_KEY, pwd);
    job.set(LlapBaseInputFormat.QUERY_KEY, query);
    job.set(LlapBaseInputFormat.HANDLE_ID, handleId);
    if (currentDatabase != null) {
        job.set(LlapBaseInputFormat.DB_KEY, currentDatabase);
    }

    InputSplit[] splits = inputFormat.getSplits(job, numSplits);
    assertTrue(splits.length > 0);

    // Fetch rows from splits
    boolean first = true;
    int rowCount = 0;
    for (InputSplit split : splits) {
        System.out.println("Processing split " + split.getLocations());

        int numColumns = 2;
        RecordReader<NullWritable, Row> reader = inputFormat.getRecordReader(split, job, null);
        Row row = reader.createValue();
        while (reader.next(NullWritable.get(), row)) {
            rowProcessor.process(row);
            ++rowCount;
        }
        //In arrow-mode this will throw exception unless all buffers have been released
        //See org.apache.hadoop.hive.llap.LlapArrowBatchRecordReader
        reader.close();
    }
    LlapBaseInputFormat.close(handleId);

    return rowCount;
}

From source file:org.apache.hive.jdbc.TestJdbcWithMiniLlap.java

License:Apache License

private int processQuery(String query, int numSplits, RowProcessor rowProcessor) throws Exception {
    String url = miniHS2.getJdbcURL();
    String user = System.getProperty("user.name");
    String pwd = user;//from ww  w.jav  a2s.  c om

    LlapRowInputFormat inputFormat = new LlapRowInputFormat();

    // Get splits
    JobConf job = new JobConf(conf);
    job.set(LlapBaseInputFormat.URL_KEY, url);
    job.set(LlapBaseInputFormat.USER_KEY, user);
    job.set(LlapBaseInputFormat.PWD_KEY, pwd);
    job.set(LlapBaseInputFormat.QUERY_KEY, query);

    InputSplit[] splits = inputFormat.getSplits(job, numSplits);
    assertTrue(splits.length > 0);

    // Fetch rows from splits
    boolean first = true;
    int rowCount = 0;
    for (InputSplit split : splits) {
        System.out.println("Processing split " + split.getLocations());

        int numColumns = 2;
        RecordReader<NullWritable, Row> reader = inputFormat.getRecordReader(split, job, null);
        Row row = reader.createValue();
        while (reader.next(NullWritable.get(), row)) {
            rowProcessor.process(row);
            ++rowCount;
        }
    }

    return rowCount;
}

From source file:org.apache.hive.storage.jdbc.JdbcRecordReader.java

License:Apache License

@Override
public boolean next(LongWritable key, MapWritable value) throws IOException {
    try {//from  w  w  w.ja  v  a 2  s  . co  m
        LOGGER.debug("JdbcRecordReader.next called");
        if (dbAccessor == null) {
            dbAccessor = DatabaseAccessorFactory.getAccessor(conf);
            iterator = dbAccessor.getRecordIterator(conf, split.getLimit(), split.getOffset());
        }

        if (iterator.hasNext()) {
            LOGGER.debug("JdbcRecordReader has more records to read.");
            key.set(pos);
            pos++;
            Map<String, Object> record = iterator.next();
            if ((record != null) && (!record.isEmpty())) {
                for (Entry<String, Object> entry : record.entrySet()) {
                    value.put(new Text(entry.getKey()), entry.getValue() == null ? NullWritable.get()
                            : new ObjectWritable(entry.getValue()));
                }
                return true;
            } else {
                LOGGER.debug("JdbcRecordReader got null record.");
                return false;
            }
        } else {
            LOGGER.debug("JdbcRecordReader has no more records to read.");
            return false;
        }
    } catch (Exception e) {
        LOGGER.error("An error occurred while reading the next record from DB.", e);
        return false;
    }
}

From source file:org.apache.jena.grande.pig.RdfStorage.java

License:Apache License

@Override
public void putNext(Tuple tuple) throws IOException {
    log.debug("putNext({})", tuple);
    try {/*  www  .j a  va2 s. c  o m*/
        Node g = NodeEncoder.asNode((String) tuple.get(0));
        Node s = NodeEncoder.asNode((String) tuple.get(1));
        Node p = NodeEncoder.asNode((String) tuple.get(2));
        Node o = NodeEncoder.asNode((String) tuple.get(3));
        Quad quad = new Quad(g, s, p, o);
        QuadWritable quadWritable = new QuadWritable(quad);
        writer.write(NullWritable.get(), quadWritable);
    } catch (InterruptedException e) {
        throw new IOException(e);
    }
}