Example usage for org.apache.hadoop.mapred RecordReader getProgress

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred RecordReader getProgress.

Prototype

float getProgress() throws IOException;

Source Link

Document

How much of the input has the RecordReader consumed i.e.

Usage

From source file:com.ask.hive.hbase.HiveHBaseTextTableInputFormat.java

License:Apache License

public RecordReader<Text, Text> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter)
        throws IOException {

    HBaseSplit hbaseSplit = (HBaseSplit) split;
    TableSplit tableSplit = hbaseSplit.getSplit();
    String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME);
    setHTable(new HTable(new HBaseConfiguration(jobConf), Bytes.toBytes(hbaseTableName)));
    String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
    List<String> hbaseColumnFamilies = new ArrayList<String>();
    List<String> hbaseColumnQualifiers = new ArrayList<String>();
    List<byte[]> hbaseColumnFamiliesBytes = new ArrayList<byte[]>();
    List<byte[]> hbaseColumnQualifiersBytes = new ArrayList<byte[]>();

    int iKey;/*from   w w  w.j a v a 2 s  . c om*/
    try {
        iKey = parseColumnMapping(hbaseColumnsMapping, hbaseColumnFamilies, hbaseColumnFamiliesBytes,
                hbaseColumnQualifiers, hbaseColumnQualifiersBytes);
    } catch (Exception se) {
        throw new IOException(se);
    }
    List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf);

    if (hbaseColumnFamilies.size() < readColIDs.size()) {
        throw new IOException("Cannot read more columns than the given table contains.");
    }

    boolean addAll = (readColIDs.size() == 0);
    Scan scan = new Scan();
    boolean empty = true;

    if (!addAll) {
        for (int i : readColIDs) {
            if (i == iKey) {
                continue;
            }
            scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            empty = false;
        }
    }

    // The HBase table's row key maps to a Hive table column. In the corner case when only the
    // row key column is selected in Hive, the HBase Scan will be empty i.e. no column family/
    // column qualifier will have been added to the scan. We arbitrarily add at least one column
    // to the HBase scan so that we can retrieve all of the row keys and return them as the Hive
    // tables column projection.
    if (empty) {
        for (int i = 0; i < hbaseColumnFamilies.size(); i++) {
            if (i == iKey) {
                continue;
            }

            if (hbaseColumnQualifiers.get(i) == null) {
                scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            } else {
                scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i));
            }

            if (!addAll) {
                break;
            }
        }
    }

    //setting start and end time for scanning
    setTime(jobConf, scan);
    // If Hive's optimizer gave us a filter to process, convert it to the
    // HBase scan form now.
    tableSplit = convertFilter(jobConf, scan, tableSplit, iKey);

    setScan(scan);

    Job job = new Job(jobConf);
    TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) {

        @Override
        public void progress() {
            reporter.progress();
        }
    };

    final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader = createRecordReader(
            tableSplit, tac);

    return new RecordReader<Text, Text>() {

        //@Override
        public void close() throws IOException {
            recordReader.close();
        }

        // @Override
        public Text createKey() {
            return new Text();
        }

        // @Override
        public Text createValue() {
            return new Text();
        }

        // @Override
        public long getPos() throws IOException {
            return 0;
        }

        // @Override
        public float getProgress() throws IOException {
            float progress = 0.0F;

            try {
                progress = recordReader.getProgress();
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return progress;
        }

        // @Override
        public boolean next(Text rowKey, Text value) throws IOException {

            boolean next = false;

            try {
                next = recordReader.nextKeyValue();

                //logic for to find the column name 
                if (next) {
                    rowKey.set(Bytes.toString(recordReader.getCurrentValue().getRow()));
                    StringBuilder val = new StringBuilder();
                    String prev = "";
                    for (KeyValue kv : recordReader.getCurrentValue().raw()) {
                        String current = new String(kv.getQualifier());
                        char[] col = new String(current).toCharArray();
                        if (val.length() > 0) {
                            if (prev.equals(current))
                                val.append(",");
                            else
                                val.append("\t");
                        }
                        prev = current;
                        val.append(col[0]).append("_");
                        val.append(Bytes.toString(kv.getValue()));
                    }
                    value.set(val.toString()); // rowKey.set(Bytes.toString(recordReader.getCurrentValue().getRow()));;
                    // value.set(Bytes.toString(recordReader.getCurrentValue().value()));
                }
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return next;
        }
    };
}

From source file:com.ask.hive.hbase.HiveHBaseTimeTableInputFormat.java

License:Apache License

public RecordReader<ImmutableBytesWritable, Result> getRecordReader(InputSplit split, JobConf jobConf,
        final Reporter reporter) throws IOException {

    HBaseSplit hbaseSplit = (HBaseSplit) split;
    TableSplit tableSplit = hbaseSplit.getSplit();
    String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME);
    setHTable(new HTable(new HBaseConfiguration(jobConf), Bytes.toBytes(hbaseTableName)));
    String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
    List<String> hbaseColumnFamilies = new ArrayList<String>();
    List<String> hbaseColumnQualifiers = new ArrayList<String>();
    List<byte[]> hbaseColumnFamiliesBytes = new ArrayList<byte[]>();
    List<byte[]> hbaseColumnQualifiersBytes = new ArrayList<byte[]>();

    int iKey;/* ww w.  ja  v  a 2  s.c  o m*/
    try {
        iKey = HBaseSerDe.parseColumnMapping(hbaseColumnsMapping, hbaseColumnFamilies, hbaseColumnFamiliesBytes,
                hbaseColumnQualifiers, hbaseColumnQualifiersBytes);
    } catch (SerDeException se) {
        throw new IOException(se);
    }
    List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf);

    if (hbaseColumnFamilies.size() < readColIDs.size()) {
        throw new IOException("Cannot read more columns than the given table contains.");
    }

    boolean addAll = (readColIDs.size() == 0);
    Scan scan = new Scan();
    boolean empty = true;

    if (!addAll) {
        for (int i : readColIDs) {
            if (i == iKey) {
                continue;
            }

            if (hbaseColumnQualifiers.get(i) == null) {
                scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            } else {
                scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i));
            }

            empty = false;
        }
    }

    // The HBase table's row key maps to a Hive table column. In the corner case when only the
    // row key column is selected in Hive, the HBase Scan will be empty i.e. no column family/
    // column qualifier will have been added to the scan. We arbitrarily add at least one column
    // to the HBase scan so that we can retrieve all of the row keys and return them as the Hive
    // tables column projection.
    if (empty) {
        for (int i = 0; i < hbaseColumnFamilies.size(); i++) {
            if (i == iKey) {
                continue;
            }

            if (hbaseColumnQualifiers.get(i) == null) {
                scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            } else {
                scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i));
            }

            if (!addAll) {
                break;
            }
        }
    }

    //setting start and end time for scanning
    setTime(jobConf, scan);
    // If Hive's optimizer gave us a filter to process, convert it to the
    // HBase scan form now.
    tableSplit = convertFilter(jobConf, scan, tableSplit, iKey);

    setScan(scan);

    Job job = new Job(jobConf);
    TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) {

        @Override
        public void progress() {
            reporter.progress();
        }
    };

    final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader = createRecordReader(
            tableSplit, tac);

    return new RecordReader<ImmutableBytesWritable, Result>() {

        //@Override
        public void close() throws IOException {
            recordReader.close();
        }

        // @Override
        public ImmutableBytesWritable createKey() {
            return new ImmutableBytesWritable();
        }

        // @Override
        public Result createValue() {
            return new Result();
        }

        // @Override
        public long getPos() throws IOException {
            return 0;
        }

        // @Override
        public float getProgress() throws IOException {
            float progress = 0.0F;

            try {
                progress = recordReader.getProgress();
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return progress;
        }

        // @Override
        public boolean next(ImmutableBytesWritable rowKey, Result value) throws IOException {

            boolean next = false;

            try {
                next = recordReader.nextKeyValue();

                if (next) {
                    rowKey.set(recordReader.getCurrentValue().getRow());
                    Writables.copyWritable(recordReader.getCurrentValue(), value);
                }
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return next;
        }
    };
}

From source file:com.facebook.hive.orc.TestInputOutputFormat.java

License:Apache License

@Test
public void testInOutFormat() throws Exception {
    Properties properties = new Properties();
    StructObjectInspector inspector;/*from  w w  w  .  j  ava  2s.c o  m*/
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
                ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    SerDe serde = new OrcSerde();
    HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
    FileSinkOperator.RecordWriter writer = outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
            properties, Reporter.NULL);
    ReaderWriterProfiler.setProfilerOptions(conf);
    writer.write(serde.serialize(new MyRow(1, 2), inspector));
    writer.write(serde.serialize(new MyRow(2, 2), inspector));
    writer.write(serde.serialize(new MyRow(3, 2), inspector));
    writer.close(true);
    serde = new OrcSerde();
    properties.setProperty("columns", "x,y");
    properties.setProperty("columns.types", "int:int");
    serde.initialize(conf, properties);
    assertEquals(OrcSerde.OrcSerdeRow.class, serde.getSerializedClass());
    inspector = (StructObjectInspector) serde.getObjectInspector();
    assertEquals("struct<x:int,y:int>", inspector.getTypeName());
    InputFormat<?, ?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);

    // the the validate input method
    ArrayList<FileStatus> fileList = new ArrayList<FileStatus>(3);
    assertEquals(false, ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
    fileList.add(fs.getFileStatus(testFilePath));
    assertEquals(true, ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
    fileList.add(fs.getFileStatus(workDir));
    assertEquals(false, ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));

    // read the whole file
    org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Writable value = (Writable) reader.createValue();
    int rowNum = 0;
    List<? extends StructField> fields = inspector.getAllStructFieldRefs();
    IntObjectInspector intInspector = (IntObjectInspector) fields.get(0).getFieldObjectInspector();
    assertEquals(0.0, reader.getProgress(), 0.00001);
    assertEquals(0, reader.getPos());
    while (reader.next(key, value)) {
        assertEquals(++rowNum,
                intInspector.get(inspector.getStructFieldData(serde.deserialize(value), fields.get(0))));
        assertEquals(2,
                intInspector.get(inspector.getStructFieldData(serde.deserialize(value), fields.get(1))));
    }
    assertEquals(3, rowNum);
    assertEquals(1.0, reader.getProgress(), 0.00001);
    reader.close();

    // read just the first column
    conf.set("hive.io.file.readcolumn.ids", "0");
    reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    key = reader.createKey();
    value = (Writable) reader.createValue();
    rowNum = 0;
    fields = inspector.getAllStructFieldRefs();
    while (reader.next(key, value)) {
        assertEquals(++rowNum, intInspector.get(inspector.getStructFieldData(value, fields.get(0))));
        assertEquals(null, inspector.getStructFieldData(value, fields.get(1)));
    }
    assertEquals(3, rowNum);
    reader.close();

    // test the mapping of empty string to all columns
    conf.set("hive.io.file.readcolumn.ids", "");
    reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    key = reader.createKey();
    value = (Writable) reader.createValue();
    rowNum = 0;
    fields = inspector.getAllStructFieldRefs();
    while (reader.next(key, value)) {
        assertEquals(++rowNum, intInspector.get(inspector.getStructFieldData(value, fields.get(0))));
        assertEquals(2,
                intInspector.get(inspector.getStructFieldData(serde.deserialize(value), fields.get(1))));
    }
    assertEquals(3, rowNum);
    reader.close();
}

From source file:com.facebook.hive.orc.TestInputOutputFormat.java

License:Apache License

@Test
public void testEmptyFile() throws Exception {
    JobConf job = new JobConf(conf);
    Properties properties = new Properties();
    HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
    FileSinkOperator.RecordWriter writer = outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
            properties, Reporter.NULL);/*from   ww w  .j a  v  a  2 s .c o  m*/
    writer.close(true);
    properties.setProperty("columns", "x,y");
    properties.setProperty("columns.types", "int:int");
    SerDe serde = new OrcSerde();
    serde.initialize(conf, properties);
    InputFormat<?, ?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);

    // read the whole file
    conf.set("hive.io.file.readcolumn.ids", "0,1");
    org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Object value = reader.createValue();
    assertEquals(0.0, reader.getProgress(), 0.00001);
    assertEquals(0, reader.getPos());
    assertEquals(false, reader.next(key, value));
    reader.close();
    assertEquals(null, serde.getSerDeStats());
}

From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseTableInputFormat.java

License:Apache License

@Override
public RecordReader<ImmutableBytesWritable, ResultWritable> getRecordReader(InputSplit split, JobConf jobConf,
        final Reporter reporter) throws IOException {

    HBaseSplit hbaseSplit = (HBaseSplit) split;
    TableSplit tableSplit = hbaseSplit.getTableSplit();

    Job job = new Job(jobConf);
    TaskAttemptContext tac = ShimLoader.getHadoopShims().newTaskAttemptContext(job.getConfiguration(),
            reporter);//from  ww  w.java 2 s.c o  m

    final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader;
    if (hbaseSplit.isTxIndexScan()) {
        LOG.info("getRecordReader: TxHiveIndexScan -> " + tableSplit);
        recordReader = TxHiveTableInputFormatUtil.createRecordReader(tableSplit, tac, jobConf);
    } else {
        LOG.info("getRecordReader: no TxHiveIndexScan -> " + tableSplit);
        setHTable(HiveHBaseInputFormatUtil.getTable(jobConf));
        setScan(HiveHBaseInputFormatUtil.getScan(jobConf));
        recordReader = createRecordReader(tableSplit, tac);
    }
    try {
        recordReader.initialize(tableSplit, tac);
    } catch (InterruptedException e) {
        throw new IOException("Failed to initialize RecordReader", e);
    }

    return new RecordReader<ImmutableBytesWritable, ResultWritable>() {

        @Override
        public void close() throws IOException {
            recordReader.close();
            closeTable();
        }

        @Override
        public ImmutableBytesWritable createKey() {
            return new ImmutableBytesWritable();
        }

        @Override
        public ResultWritable createValue() {
            return new ResultWritable(new Result());
        }

        @Override
        public long getPos() throws IOException {
            return 0;
        }

        @Override
        public float getProgress() throws IOException {
            float progress = 0.0F;
            try {
                progress = recordReader.getProgress();
            } catch (InterruptedException e) {
                throw new IOException(e);
            }
            return progress;
        }

        @Override
        public boolean next(ImmutableBytesWritable rowKey, ResultWritable value) throws IOException {
            boolean next = false;
            try {
                next = recordReader.nextKeyValue();
                if (next) {
                    rowKey.set(recordReader.getCurrentValue().getRow());
                    value.setResult(recordReader.getCurrentValue());
                }
            } catch (InterruptedException e) {
                throw new IOException(e);
            }
            return next;
        }
    };
}

From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseTableSnapshotInputFormat.java

License:Apache License

@Override
public RecordReader<ImmutableBytesWritable, ResultWritable> getRecordReader(InputSplit split, JobConf job,
        Reporter reporter) throws IOException {
    setColumns(job);/*from w ww .jav a 2  s . c om*/
    final RecordReader<ImmutableBytesWritable, Result> rr = delegate
            .getRecordReader(((HBaseSplit) split).getSnapshotSplit(), job, reporter);

    return new RecordReader<ImmutableBytesWritable, ResultWritable>() {
        @Override
        public boolean next(ImmutableBytesWritable key, ResultWritable value) throws IOException {
            return rr.next(key, value.getResult());
        }

        @Override
        public ImmutableBytesWritable createKey() {
            return rr.createKey();
        }

        @Override
        public ResultWritable createValue() {
            return new ResultWritable(rr.createValue());
        }

        @Override
        public long getPos() throws IOException {
            return rr.getPos();
        }

        @Override
        public void close() throws IOException {
            rr.close();
        }

        @Override
        public float getProgress() throws IOException {
            return rr.getProgress();
        }
    };
}

From source file:com.ibm.jaql.io.hadoop.CompositeInputAdapter.java

License:Apache License

@SuppressWarnings("unchecked")
public RecordReader<JsonHolder, JsonHolder> getRecordReader(InputSplit split, JobConf job, Reporter reporter)
        throws IOException {
    CompositeSplit cSplit = (CompositeSplit) split;

    // 1. get the InputAdapter's array index (i) from the split
    final int idx = cSplit.getAdapterIdx();
    InputSplit baseSplit = cSplit.getSplit();

    try {//from   ww w.  java  2  s.c  o m
        // 2. get the ith adapter's args record
        JsonValue value = this.args.get(idx);
        // JRecord baseArgs = (JRecord) item.getNonNull();
        // record the current index to the job conf
        // ASSUMES: in map/reduce, the format's record reader is called *before*
        // the map class is configured
        writeCurrentIndex(job, idx); // FIXME: no longer needed

        // 3. insantiate and initialize the adapter
        HadoopInputAdapter adapter = (HadoopInputAdapter) AdapterStore.getStore().input
                .getAdapter(/** baseArgs, */
                        value);

        // 4. create a new JobConf j'
        JobConf jTmp = new JobConf(job);

        // 5. call adapter's setupConf(j')
        // ConfiguratorUtil.writeToConf(adapter, jTmp, item/**baseArgs*/);
        adapter.setParallel(jTmp);

        // 6. configure the adapter from j'
        adapter.configure(jTmp);

        // 7. call adapter's getRecordReader with j'
        final RecordReader<JsonHolder, JsonHolder> reader = (RecordReader<JsonHolder, JsonHolder>) adapter
                .getRecordReader(baseSplit, jTmp, reporter);

        if (!addIndex) {
            return reader;
        }

        return new RecordReader<JsonHolder, JsonHolder>() {

            @Override
            public void close() throws IOException {
                reader.close();
            }

            @Override
            public JsonHolder createKey() {
                return reader.createKey();
            }

            @Override
            public JsonHolder createValue() {
                return reader.createValue();
            }

            @Override
            public long getPos() throws IOException {
                return reader.getPos();
            }

            @Override
            public float getProgress() throws IOException {
                return reader.getProgress();
            }

            @Override
            public boolean next(JsonHolder key, JsonHolder value) throws IOException {
                BufferedJsonArray pair = (BufferedJsonArray) value.value;
                if (pair != null) {
                    value.value = pair.get(1);
                } else {
                    pair = new BufferedJsonArray(2);
                    pair.set(0, JsonLong.make(idx));
                }

                if (reader.next(key, value)) {
                    pair.set(1, value.value);
                    value.value = pair;
                    return true;
                }

                return false;
            }
        };

    } catch (Exception e) {
        return null;
    }
}

From source file:com.ibm.jaql.io.hadoop.DefaultHadoopInputAdapter.java

License:Apache License

@SuppressWarnings("unchecked")
public RecordReader<JsonHolder, JsonHolder> getRecordReader(InputSplit split, JobConf job, Reporter reporter)
        throws IOException {
    if (split instanceof DHIASplit) {
        // not using order-preserving wrapper
        split = ((DHIASplit) split).split;
    }// w  ww  .  j  ava2s  .c o  m

    if (converter == null)
        return ((InputFormat<JsonHolder, JsonHolder>) iFormat).getRecordReader(split, job, reporter);
    final RecordReader<K, V> baseReader = ((InputFormat<K, V>) iFormat).getRecordReader(split, job, reporter);
    final K baseKey = baseReader.createKey();
    final V baseValue = baseReader.createValue();

    return new RecordReader<JsonHolder, JsonHolder>() {

        public void close() throws IOException {
            baseReader.close();
        }

        public JsonHolder createKey() {
            return keyHolder();
        }

        public JsonHolder createValue() {
            JsonHolder holder = valueHolder();
            holder.value = converter.createTarget();
            return holder;
        }

        public long getPos() throws IOException {
            return baseReader.getPos();
        }

        public float getProgress() throws IOException {
            return baseReader.getProgress();
        }

        public boolean next(JsonHolder key, JsonHolder value) throws IOException {
            boolean hasMore = baseReader.next(baseKey, baseValue);
            if (!hasMore)
                return false;
            value.value = converter.convert(baseKey, baseValue, value.value);
            return true;
        }
    };
}

From source file:org.datavec.hadoop.records.reader.TestBasicHDFS_Integration.java

License:Apache License

/**
 * Things we'd need:/*  w ww  .j av a  2s  .  c om*/
 *       1. JobConf
 *      2. some way to get input splits
 * 
 * @throws IOException
 */
@Test
public void testParametersInputSplitSetup() throws IOException {

    //      InputSplit genericSplit = null;

    //      TaskAttemptContext context = null;

    // ---- this all needs to be done in
    JobConf job = new JobConf(defaultConf);

    // app.input.path

    String split_filename = "src/test/resources/records/reader/SVMLightRecordReaderInput/record_reader_input_test.txt";

    Path splitPath = new Path(split_filename);

    InputSplit[] splits = generateDebugSplits(splitPath, job);

    System.out.println("split count: " + splits.length);

    //RecordReader<LongWritable, Text> rr = new LineRecordReader(job, (FileSplit) splits[0]);

    TextInputFormat format = new TextInputFormat();
    format.configure(job);

    //Reporter reporter = new DummyReporter();

    RecordReader<LongWritable, Text> reader = null;
    LongWritable key = new LongWritable();
    Text value = new Text();

    final Reporter voidReporter = Reporter.NULL;

    reader = format.getRecordReader(splits[0], job, voidReporter);

    //while (rr.)

    while (reader.getProgress() < 1.0) {

        boolean hasMore = reader.next(key, value);

        System.out.println("line: " + value.toString());

    }

    reader.close();

}

From source file:org.terrier.structures.indexing.singlepass.hadoop.BitPostingIndexInputFormat.java

License:Mozilla Public License

/** Test method, runs splits for inverted/lexicon with the command line specified index */
public static void main(String[] args) throws Exception {
    Index.setIndexLoadingProfileAsRetrieval(false);
    IndexOnDisk index = Index.createIndex(args[1], args[2]);
    if (args[0].equals("--splits")) {
        JobConf job = HadoopPlugin.getJobFactory(BitPostingIndexInputFormat.class.getSimpleName()).newJob();
        HadoopUtility.toHConfiguration(index, job);
        setStructures(job, "inverted", "lexicon");
        index.close();//from w w w .  j av  a 2 s.  c o m
        new BitPostingIndexInputFormat().getSplits(job, 100);
    } else {
        JobConf job = HadoopPlugin.getJobFactory(BitPostingIndexInputFormat.class.getSimpleName()).newJob();
        setStructures(job, "linksin", "linksin-lookup");
        HadoopUtility.toHConfiguration(index, job);
        index.close();
        InputSplit s = new BitPostingIndexInputSplit(new Path(args[3]), Long.parseLong(args[4]),
                Long.parseLong(args[5]), new String[0], Integer.parseInt(args[6]), Integer.parseInt(args[7]));
        RecordReader<IntWritable, IntObjectWrapper<IterablePosting>> rr = new BitPostingIndexInputFormat()
                .getRecordReader(s, job, new Reporter() {
                    public InputSplit getInputSplit() throws UnsupportedOperationException {
                        return null;
                    }

                    @SuppressWarnings({ "rawtypes" })
                    public void incrCounter(Enum arg0, long arg1) {
                    }

                    public void incrCounter(String arg0, String arg1, long arg2) {
                    }

                    @SuppressWarnings({ "rawtypes" })
                    public org.apache.hadoop.mapred.Counters.Counter getCounter(Enum arg0) {
                        return null;
                    }

                    public org.apache.hadoop.mapred.Counters.Counter getCounter(String arg0, String arg1) {
                        return null;
                    }

                    public void setStatus(String arg0) {
                    }

                    public void progress() {
                    }
                });
        IntWritable key = rr.createKey();
        IntObjectWrapper<IterablePosting> value = rr.createValue();
        long pointers = 0;
        int lastId = 0;
        int nonZeroEntryCount = 0;
        float maxProgress = 0;
        while (rr.next(key, value)) {
            IterablePosting ip = value.getObject();
            lastId = key.get();
            while (ip.next() != IterablePosting.EOL) {
                pointers++;
            }
            nonZeroEntryCount++;
            if (rr.getProgress() > maxProgress)
                maxProgress = rr.getProgress();
        }
        rr.close();
        System.out.println("maxProgress=" + maxProgress + " Lastid=" + lastId + " nonZeroEntryCount="
                + nonZeroEntryCount + " postings=" + pointers);
    }
}