Example usage for org.apache.hadoop.mapred RecordReader getProgress

List of usage examples for org.apache.hadoop.mapred RecordReader getProgress

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred RecordReader getProgress.

Prototype

float getProgress() throws IOException;

Source Link

Document

How much of the input has the RecordReader consumed i.e.

Usage

From source file:com.ask.hive.hbase.HiveHBaseTextTableInputFormat.java

License:Apache License

public RecordReader<Text, Text> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter)
        throws IOException {

    HBaseSplit hbaseSplit = (HBaseSplit) split;
    TableSplit tableSplit = hbaseSplit.getSplit();
    String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME);
    setHTable(new HTable(new HBaseConfiguration(jobConf), Bytes.toBytes(hbaseTableName)));
    String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
    List<String> hbaseColumnFamilies = new ArrayList<String>();
    List<String> hbaseColumnQualifiers = new ArrayList<String>();
    List<byte[]> hbaseColumnFamiliesBytes = new ArrayList<byte[]>();
    List<byte[]> hbaseColumnQualifiersBytes = new ArrayList<byte[]>();

    int iKey;/*from   w w  w.j a v a 2 s  . c om*/
    try {
        iKey = parseColumnMapping(hbaseColumnsMapping, hbaseColumnFamilies, hbaseColumnFamiliesBytes,
                hbaseColumnQualifiers, hbaseColumnQualifiersBytes);
    } catch (Exception se) {
        throw new IOException(se);
    }
    List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf);

    if (hbaseColumnFamilies.size() < readColIDs.size()) {
        throw new IOException("Cannot read more columns than the given table contains.");
    }

    boolean addAll = (readColIDs.size() == 0);
    Scan scan = new Scan();
    boolean empty = true;

    if (!addAll) {
        for (int i : readColIDs) {
            if (i == iKey) {
                continue;
            }
            scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            empty = false;
        }
    }

    // The HBase table's row key maps to a Hive table column. In the corner case when only the
    // row key column is selected in Hive, the HBase Scan will be empty i.e. no column family/
    // column qualifier will have been added to the scan. We arbitrarily add at least one column
    // to the HBase scan so that we can retrieve all of the row keys and return them as the Hive
    // tables column projection.
    if (empty) {
        for (int i = 0; i < hbaseColumnFamilies.size(); i++) {
            if (i == iKey) {
                continue;
            }

            if (hbaseColumnQualifiers.get(i) == null) {
                scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            } else {
                scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i));
            }

            if (!addAll) {
                break;
            }
        }
    }

    //setting start and end time for scanning
    setTime(jobConf, scan);
    // If Hive's optimizer gave us a filter to process, convert it to the
    // HBase scan form now.
    tableSplit = convertFilter(jobConf, scan, tableSplit, iKey);

    setScan(scan);

    Job job = new Job(jobConf);
    TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) {

        @Override
        public void progress() {
            reporter.progress();
        }
    };

    final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader = createRecordReader(
            tableSplit, tac);

    return new RecordReader<Text, Text>() {

        //@Override
        public void close() throws IOException {
            recordReader.close();
        }

        // @Override
        public Text createKey() {
            return new Text();
        }

        // @Override
        public Text createValue() {
            return new Text();
        }

        // @Override
        public long getPos() throws IOException {
            return 0;
        }

        // @Override
        public float getProgress() throws IOException {
            float progress = 0.0F;

            try {
                progress = recordReader.getProgress();
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return progress;
        }

        // @Override
        public boolean next(Text rowKey, Text value) throws IOException {

            boolean next = false;

            try {
                next = recordReader.nextKeyValue();

                //logic for to find the column name 
                if (next) {
                    rowKey.set(Bytes.toString(recordReader.getCurrentValue().getRow()));
                    StringBuilder val = new StringBuilder();
                    String prev = "";
                    for (KeyValue kv : recordReader.getCurrentValue().raw()) {
                        String current = new String(kv.getQualifier());
                        char[] col = new String(current).toCharArray();
                        if (val.length() > 0) {
                            if (prev.equals(current))
                                val.append(",");
                            else
                                val.append("\t");
                        }
                        prev = current;
                        val.append(col[0]).append("_");
                        val.append(Bytes.toString(kv.getValue()));
                    }
                    value.set(val.toString()); // rowKey.set(Bytes.toString(recordReader.getCurrentValue().getRow()));;
                    // value.set(Bytes.toString(recordReader.getCurrentValue().value()));
                }
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return next;
        }
    };
}

From source file:com.ask.hive.hbase.HiveHBaseTimeTableInputFormat.java

License:Apache License

public RecordReader<ImmutableBytesWritable, Result> getRecordReader(InputSplit split, JobConf jobConf,
        final Reporter reporter) throws IOException {

    HBaseSplit hbaseSplit = (HBaseSplit) split;
    TableSplit tableSplit = hbaseSplit.getSplit();
    String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME);
    setHTable(new HTable(new HBaseConfiguration(jobConf), Bytes.toBytes(hbaseTableName)));
    String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
    List<String> hbaseColumnFamilies = new ArrayList<String>();
    List<String> hbaseColumnQualifiers = new ArrayList<String>();
    List<byte[]> hbaseColumnFamiliesBytes = new ArrayList<byte[]>();
    List<byte[]> hbaseColumnQualifiersBytes = new ArrayList<byte[]>();

    int iKey;/* ww w.  ja  v  a 2  s.c  o m*/
    try {
        iKey = HBaseSerDe.parseColumnMapping(hbaseColumnsMapping, hbaseColumnFamilies, hbaseColumnFamiliesBytes,
                hbaseColumnQualifiers, hbaseColumnQualifiersBytes);
    } catch (SerDeException se) {
        throw new IOException(se);
    }
    List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf);

    if (hbaseColumnFamilies.size() < readColIDs.size()) {
        throw new IOException("Cannot read more columns than the given table contains.");
    }

    boolean addAll = (readColIDs.size() == 0);
    Scan scan = new Scan();
    boolean empty = true;

    if (!addAll) {
        for (int i : readColIDs) {
            if (i == iKey) {
                continue;
            }

            if (hbaseColumnQualifiers.get(i) == null) {
                scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            } else {
                scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i));
            }

            empty = false;
        }
    }

    // The HBase table's row key maps to a Hive table column. In the corner case when only the
    // row key column is selected in Hive, the HBase Scan will be empty i.e. no column family/
    // column qualifier will have been added to the scan. We arbitrarily add at least one column
    // to the HBase scan so that we can retrieve all of the row keys and return them as the Hive
    // tables column projection.
    if (empty) {
        for (int i = 0; i < hbaseColumnFamilies.size(); i++) {
            if (i == iKey) {
                continue;
            }

            if (hbaseColumnQualifiers.get(i) == null) {
                scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            } else {
                scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i));
            }

            if (!addAll) {
                break;
            }
        }
    }

    //setting start and end time for scanning
    setTime(jobConf, scan);
    // If Hive's optimizer gave us a filter to process, convert it to the
    // HBase scan form now.
    tableSplit = convertFilter(jobConf, scan, tableSplit, iKey);

    setScan(scan);

    Job job = new Job(jobConf);
    TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) {

        @Override
        public void progress() {
            reporter.progress();
        }
    };

    final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader = createRecordReader(
            tableSplit, tac);

    return new RecordReader<ImmutableBytesWritable, Result>() {

        //@Override
        public void close() throws IOException {
            recordReader.close();
        }

        // @Override
        public ImmutableBytesWritable createKey() {
            return new ImmutableBytesWritable();
        }

        // @Override
        public Result createValue() {
            return new Result();
        }

        // @Override
        public long getPos() throws IOException {
            return 0;
        }

        // @Override
        public float getProgress() throws IOException {
            float progress = 0.0F;

            try {
                progress = recordReader.getProgress();
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return progress;
        }

        // @Override
        public boolean next(ImmutableBytesWritable rowKey, Result value) throws IOException {

            boolean next = false;

            try {
                next = recordReader.nextKeyValue();

                if (next) {
                    rowKey.set(recordReader.getCurrentValue().getRow());
                    Writables.copyWritable(recordReader.getCurrentValue(), value);
                }
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return next;
        }
    };
}

From source file:com.facebook.hive.orc.TestInputOutputFormat.java

License:Apache License

@Test
public void testInOutFormat() throws Exception {
    Properties properties = new Properties();
    StructObjectInspector inspector;/*from  w w  w  .  j  ava  2s.c o  m*/
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
                ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    SerDe serde = new OrcSerde();
    HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
    FileSinkOperator.RecordWriter writer = outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
            properties, Reporter.NULL);
    ReaderWriterProfiler.setProfilerOptions(conf);
    writer.write(serde.serialize(new MyRow(1, 2), inspector));
    writer.write(serde.serialize(new MyRow(2, 2), inspector));
    writer.write(serde.serialize(new MyRow(3, 2), inspector));
    writer.close(true);
    serde = new OrcSerde();
    properties.setProperty("columns", "x,y");
    properties.setProperty("columns.types", "int:int");
    serde.initialize(conf, properties);
    assertEquals(OrcSerde.OrcSerdeRow.class, serde.getSerializedClass());
    inspector = (StructObjectInspector) serde.getObjectInspector();
    assertEquals("struct<x:int,y:int>", inspector.getTypeName());
    InputFormat<?, ?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);

    // the the validate input method
    ArrayList<FileStatus> fileList = new ArrayList<FileStatus>(3);
    assertEquals(false, ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
    fileList.add(fs.getFileStatus(testFilePath));
    assertEquals(true, ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
    fileList.add(fs.getFileStatus(workDir));
    assertEquals(false, ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));

    // read the whole file
    org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Writable value = (Writable) reader.createValue();
    int rowNum = 0;
    List<? extends StructField> fields = inspector.getAllStructFieldRefs();
    IntObjectInspector intInspector = (IntObjectInspector) fields.get(0).getFieldObjectInspector();
    assertEquals(0.0, reader.getProgress(), 0.00001);
    assertEquals(0, reader.getPos());
    while (reader.next(key, value)) {
        assertEquals(++rowNum,
                intInspector.get(inspector.getStructFieldData(serde.deserialize(value), fields.get(0))));
        assertEquals(2,
                intInspector.get(inspector.getStructFieldData(serde.deserialize(value), fields.get(1))));
    }
    assertEquals(3, rowNum);
    assertEquals(1.0, reader.getProgress(), 0.00001);
    reader.close();

    // read just the first column
    conf.set("hive.io.file.readcolumn.ids", "0");
    reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    key = reader.createKey();
    value = (Writable) reader.createValue();
    rowNum = 0;
    fields = inspector.getAllStructFieldRefs();
    while (reader.next(key, value)) {
        assertEquals(++rowNum, intInspector.get(inspector.getStructFieldData(value, fields.get(0))));
        assertEquals(null, inspector.getStructFieldData(value, fields.get(1)));
    }
    assertEquals(3, rowNum);
    reader.close();

    // test the mapping of empty string to all columns
    conf.set("hive.io.file.readcolumn.ids", "");
    reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    key = reader.createKey();
    value = (Writable) reader.createValue();
    rowNum = 0;
    fields = inspector.getAllStructFieldRefs();
    while (reader.next(key, value)) {
        assertEquals(++rowNum, intInspector.get(inspector.getStructFieldData(value, fields.get(0))));
        assertEquals(2,
                intInspector.get(inspector.getStructFieldData(serde.deserialize(value), fields.get(1))));
    }
    assertEquals(3, rowNum);
    reader.close();
}

From source file:com.facebook.hive.orc.TestInputOutputFormat.java

License:Apache License

@Test
public void testEmptyFile() throws Exception {
    JobConf job = new JobConf(conf);
    Properties properties = new Properties();
    HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
    FileSinkOperator.RecordWriter writer = outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
            properties, Reporter.NULL);/*from   ww w  .j a  v  a  2 s .c o  m*/
    writer.close(true);
    properties.setProperty("columns", "x,y");
    properties.setProperty("columns.types", "int:int");
    SerDe serde = new OrcSerde();
    serde.initialize(conf, properties);
    InputFormat<?, ?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);

    // read the whole file
    conf.set("hive.io.file.readcolumn.ids", "0,1");
    org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Object value = reader.createValue();
    assertEquals(0.0, reader.getProgress(), 0.00001);
    assertEquals(0, reader.getPos());
    assertEquals(false, reader.next(key, value));
    reader.close();
    assertEquals(null, serde.getSerDeStats());
}

From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseTableInputFormat.java

License:Apache License

@Override
public RecordReader<ImmutableBytesWritable, ResultWritable> getRecordReader(InputSplit split, JobConf jobConf,
        final Reporter reporter) throws IOException {

    HBaseSplit hbaseSplit = (HBaseSplit) split;
    TableSplit tableSplit = hbaseSplit.getTableSplit();

    Job job = new Job(jobConf);
    TaskAttemptContext tac = ShimLoader.getHadoopShims().newTaskAttemptContext(job.getConfiguration(),
            reporter);//from  ww  w.java 2 s.c o  m

    final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader;
    if (hbaseSplit.isTxIndexScan()) {
        LOG.info("getRecordReader: TxHiveIndexScan -> " + tableSplit);
        recordReader = TxHiveTableInputFormatUtil.createRecordReader(tableSplit, tac, jobConf);
    } else {
        LOG.info("getRecordReader: no TxHiveIndexScan -> " + tableSplit);
        setHTable(HiveHBaseInputFormatUtil.getTable(jobConf));
        setScan(HiveHBaseInputFormatUtil.getScan(jobConf));
        recordReader = createRecordReader(tableSplit, tac);
    }
    try {
        recordReader.initialize(tableSplit, tac);
    } catch (InterruptedException e) {
        throw new IOException("Failed to initialize RecordReader", e);
    }

    return new RecordReader<ImmutableBytesWritable, ResultWritable>() {

        @Override
        public void close() throws IOException {
            recordReader.close();
            closeTable();
        }

        @Override
        public ImmutableBytesWritable createKey() {
            return new ImmutableBytesWritable();
        }

        @Override
        public ResultWritable createValue() {
            return new ResultWritable(new Result());
        }

        @Override
        public long getPos() throws IOException {
            return 0;
        }

        @Override
        public float getProgress() throws IOException {
            float progress = 0.0F;
            try {
                progress = recordReader.getProgress();
            } catch (InterruptedException e) {
                throw new IOException(e);
            }
            return progress;
        }

        @Override
        public boolean next(ImmutableBytesWritable rowKey, ResultWritable value) throws IOException {
            boolean next = false;
            try {
                next = recordReader.nextKeyValue();
                if (next) {
                    rowKey.set(recordReader.getCurrentValue().getRow());
                    value.setResult(recordReader.getCurrentValue());
                }
            } catch (InterruptedException e) {
                throw new IOException(e);
            }
            return next;
        }
    };
}

From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseTableSnapshotInputFormat.java

License:Apache License

@Override
public RecordReader<ImmutableBytesWritable, ResultWritable> getRecordReader(InputSplit split, JobConf job,
        Reporter reporter) throws IOException {
    setColumns(job);/*from w ww .jav a 2  s . c om*/
    final RecordReader<ImmutableBytesWritable, Result> rr = delegate
            .getRecordReader(((HBaseSplit) split).getSnapshotSplit(), job, reporter);

    return new RecordReader<ImmutableBytesWritable, ResultWritable>() {
        @Override
        public boolean next(ImmutableBytesWritable key, ResultWritable value) throws IOException {
            return rr.next(key, value.getResult());
        }

        @Override
        public ImmutableBytesWritable createKey() {
            return rr.createKey();
        }

        @Override
        public ResultWritable createValue() {
            return new ResultWritable(rr.createValue());
        }

        @Override
        public long getPos() throws IOException {
            return rr.getPos();
        }

        @Override
        public void close() throws IOException {
            rr.close();
        }

        @Override
        public float getProgress() throws IOException {
            return rr.getProgress();
        }
    };
}

From source file:com.ibm.jaql.io.hadoop.CompositeInputAdapter.java

License:Apache License

@SuppressWarnings("unchecked")
public RecordReader<JsonHolder, JsonHolder> getRecordReader(InputSplit split, JobConf job, Reporter reporter)
        throws IOException {
    CompositeSplit cSplit = (CompositeSplit) split;

    // 1. get the InputAdapter's array index (i) from the split
    final int idx = cSplit.getAdapterIdx();
    InputSplit baseSplit = cSplit.getSplit();

    try {//from   ww w.  java  2  s.c  o m
        // 2. get the ith adapter's args record
        JsonValue value = this.args.get(idx);
        // JRecord baseArgs = (JRecord) item.getNonNull();
        // record the current index to the job conf
        // ASSUMES: in map/reduce, the format's record reader is called *before*
        // the map class is configured
        writeCurrentIndex(job, idx); // FIXME: no longer needed

        // 3. insantiate and initialize the adapter
        HadoopInputAdapter adapter = (HadoopInputAdapter) AdapterStore.getStore().input
                .getAdapter(/** baseArgs, */
                        value);

        // 4. create a new JobConf j'
        JobConf jTmp = new JobConf(job);

        // 5. call adapter's setupConf(j')
        // ConfiguratorUtil.writeToConf(adapter, jTmp, item/**baseArgs*/);
        adapter.setParallel(jTmp);

        // 6. configure the adapter from j'
        adapter.configure(jTmp);

        // 7. call adapter's getRecordReader with j'
        final RecordReader<JsonHolder, JsonHolder> reader = (RecordReader<JsonHolder, JsonHolder>) adapter
                .getRecordReader(baseSplit, jTmp, reporter);

        if (!addIndex) {
            return reader;
        }

        return new RecordReader<JsonHolder, JsonHolder>() {

            @Override
            public void close() throws IOException {
                reader.close();
            }

            @Override
            public JsonHolder createKey() {
                return reader.createKey();
            }

            @Override
            public JsonHolder createValue() {
                return reader.createValue();
            }

            @Override
            public long getPos() throws IOException {
                return reader.getPos();
            }

            @Override
            public float getProgress() throws IOException {
                return reader.getProgress();
            }

            @Override
            public boolean next(JsonHolder key, JsonHolder value) throws IOException {
                BufferedJsonArray pair = (BufferedJsonArray) value.value;
                if (pair != null) {
                    value.value = pair.get(1);
                } else {
                    pair = new BufferedJsonArray(2);
                    pair.set(0, JsonLong.make(idx));
                }

                if (reader.next(key, value)) {
                    pair.set(1, value.value);
                    value.value = pair;
                    return true;
                }

                return false;
            }
        };

    } catch (Exception e) {
        return null;
    }
}

From source file:com.ibm.jaql.io.hadoop.DefaultHadoopInputAdapter.java

License:Apache License

@SuppressWarnings("unchecked")
public RecordReader<JsonHolder, JsonHolder> getRecordReader(InputSplit split, JobConf job, Reporter reporter)
        throws IOException {
    if (split instanceof DHIASplit) {
        // not using order-preserving wrapper
        split = ((DHIASplit) split).split;
    }// w  ww  .  j  ava2s  .c o  m

    if (converter == null)
        return ((InputFormat<JsonHolder, JsonHolder>) iFormat).getRecordReader(split, job, reporter);
    final RecordReader<K, V> baseReader = ((InputFormat<K, V>) iFormat).getRecordReader(split, job, reporter);
    final K baseKey = baseReader.createKey();
    final V baseValue = baseReader.createValue();

    return new RecordReader<JsonHolder, JsonHolder>() {

        public void close() throws IOException {
            baseReader.close();
        }

        public JsonHolder createKey() {
            return keyHolder();
        }

        public JsonHolder createValue() {
            JsonHolder holder = valueHolder();
            holder.value = converter.createTarget();
            return holder;
        }

        public long getPos() throws IOException {
            return baseReader.getPos();
        }

        public float getProgress() throws IOException {
            return baseReader.getProgress();
        }

        public boolean next(JsonHolder key, JsonHolder value) throws IOException {
            boolean hasMore = baseReader.next(baseKey, baseValue);
            if (!hasMore)
                return false;
            value.value = converter.convert(baseKey, baseValue, value.value);
            return true;
        }
    };
}

From source file:org.datavec.hadoop.records.reader.TestBasicHDFS_Integration.java

License:Apache License

/**
 * Things we'd need:/*  w ww  .j av a  2s  .  c om*/
 *       1. JobConf
 *      2. some way to get input splits
 * 
 * @throws IOException
 */
@Test
public void testParametersInputSplitSetup() throws IOException {

    //      InputSplit genericSplit = null;

    //      TaskAttemptContext context = null;

    // ---- this all needs to be done in
    JobConf job = new JobConf(defaultConf);

    // app.input.path

    String split_filename = "src/test/resources/records/reader/SVMLightRecordReaderInput/record_reader_input_test.txt";

    Path splitPath = new Path(split_filename);

    InputSplit[] splits = generateDebugSplits(splitPath, job);

    System.out.println("split count: " + splits.length);

    //RecordReader<LongWritable, Text> rr = new LineRecordReader(job, (FileSplit) splits[0]);

    TextInputFormat format = new TextInputFormat();
    format.configure(job);

    //Reporter reporter = new DummyReporter();

    RecordReader<LongWritable, Text> reader = null;
    LongWritable key = new LongWritable();
    Text value = new Text();

    final Reporter voidReporter = Reporter.NULL;

    reader = format.getRecordReader(splits[0], job, voidReporter);

    //while (rr.)

    while (reader.getProgress() < 1.0) {

        boolean hasMore = reader.next(key, value);

        System.out.println("line: " + value.toString());

    }

    reader.close();

}

From source file:org.terrier.structures.indexing.singlepass.hadoop.BitPostingIndexInputFormat.java

License:Mozilla Public License

/** Test method, runs splits for inverted/lexicon with the command line specified index */
public static void main(String[] args) throws Exception {
    Index.setIndexLoadingProfileAsRetrieval(false);
    IndexOnDisk index = Index.createIndex(args[1], args[2]);
    if (args[0].equals("--splits")) {
        JobConf job = HadoopPlugin.getJobFactory(BitPostingIndexInputFormat.class.getSimpleName()).newJob();
        HadoopUtility.toHConfiguration(index, job);
        setStructures(job, "inverted", "lexicon");
        index.close();//from w w w .  j av  a 2 s.  c o m
        new BitPostingIndexInputFormat().getSplits(job, 100);
    } else {
        JobConf job = HadoopPlugin.getJobFactory(BitPostingIndexInputFormat.class.getSimpleName()).newJob();
        setStructures(job, "linksin", "linksin-lookup");
        HadoopUtility.toHConfiguration(index, job);
        index.close();
        InputSplit s = new BitPostingIndexInputSplit(new Path(args[3]), Long.parseLong(args[4]),
                Long.parseLong(args[5]), new String[0], Integer.parseInt(args[6]), Integer.parseInt(args[7]));
        RecordReader<IntWritable, IntObjectWrapper<IterablePosting>> rr = new BitPostingIndexInputFormat()
                .getRecordReader(s, job, new Reporter() {
                    public InputSplit getInputSplit() throws UnsupportedOperationException {
                        return null;
                    }

                    @SuppressWarnings({ "rawtypes" })
                    public void incrCounter(Enum arg0, long arg1) {
                    }

                    public void incrCounter(String arg0, String arg1, long arg2) {
                    }

                    @SuppressWarnings({ "rawtypes" })
                    public org.apache.hadoop.mapred.Counters.Counter getCounter(Enum arg0) {
                        return null;
                    }

                    public org.apache.hadoop.mapred.Counters.Counter getCounter(String arg0, String arg1) {
                        return null;
                    }

                    public void setStatus(String arg0) {
                    }

                    public void progress() {
                    }
                });
        IntWritable key = rr.createKey();
        IntObjectWrapper<IterablePosting> value = rr.createValue();
        long pointers = 0;
        int lastId = 0;
        int nonZeroEntryCount = 0;
        float maxProgress = 0;
        while (rr.next(key, value)) {
            IterablePosting ip = value.getObject();
            lastId = key.get();
            while (ip.next() != IterablePosting.EOL) {
                pointers++;
            }
            nonZeroEntryCount++;
            if (rr.getProgress() > maxProgress)
                maxProgress = rr.getProgress();
        }
        rr.close();
        System.out.println("maxProgress=" + maxProgress + " Lastid=" + lastId + " nonZeroEntryCount="
                + nonZeroEntryCount + " postings=" + pointers);
    }
}