Example usage for org.apache.hadoop.mapreduce RecordReader nextKeyValue

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordReader nextKeyValue.

Prototype

public abstract boolean nextKeyValue() throws IOException, InterruptedException;

Source Link

Document

Read the next key, value pair.

Usage

From source file:org.apache.rya.accumulo.mr.GraphXEdgeInputFormatTest.java

License:Apache License

@SuppressWarnings("rawtypes")
@Test/*  w ww. j  a  v a2  s  . c  o  m*/
public void testInputFormat() throws Exception {
    RyaStatement input = RyaStatement.builder().setSubject(new RyaURI("http://www.google.com"))
            .setPredicate(new RyaURI("http://some_other_uri")).setObject(new RyaURI("http://www.yahoo.com"))
            .setColumnVisibility(new byte[0]).setValue(new byte[0]).build();

    apiImpl.add(input);

    Job jobConf = Job.getInstance();

    GraphXEdgeInputFormat.setMockInstance(jobConf, instance.getInstanceName());
    GraphXEdgeInputFormat.setConnectorInfo(jobConf, username, password);
    GraphXEdgeInputFormat.setTableLayout(jobConf, TABLE_LAYOUT.SPO);
    GraphXEdgeInputFormat.setInputTableName(jobConf, table);
    GraphXEdgeInputFormat.setInputTableName(jobConf, table);

    GraphXEdgeInputFormat.setScanIsolation(jobConf, false);
    GraphXEdgeInputFormat.setLocalIterators(jobConf, false);
    GraphXEdgeInputFormat.setOfflineTableScan(jobConf, false);

    GraphXEdgeInputFormat inputFormat = new GraphXEdgeInputFormat();

    JobContext context = new JobContextImpl(jobConf.getConfiguration(), jobConf.getJobID());

    List<InputSplit> splits = inputFormat.getSplits(context);

    Assert.assertEquals(1, splits.size());

    TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(context.getConfiguration(),
            new TaskAttemptID(new TaskID(), 1));

    RecordReader reader = inputFormat.createRecordReader(splits.get(0), taskAttemptContext);

    RecordReader ryaStatementRecordReader = (RecordReader) reader;
    ryaStatementRecordReader.initialize(splits.get(0), taskAttemptContext);

    List<Edge> results = new ArrayList<Edge>();
    while (ryaStatementRecordReader.nextKeyValue()) {
        Edge writable = (Edge) ryaStatementRecordReader.getCurrentValue();
        long srcId = writable.srcId();
        long destId = writable.dstId();
        RyaTypeWritable rtw = null;
        Object text = ryaStatementRecordReader.getCurrentKey();
        Edge<RyaTypeWritable> edge = new Edge<RyaTypeWritable>(srcId, destId, rtw);
        results.add(edge);

        System.out.println(text);
    }

    System.out.println(results.size());
    System.out.println(results);
    Assert.assertTrue(results.size() == 2);
}

From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.RecordReaderWriterTest.java

License:Apache License

private static void validateFileSplits(final List<FileSplit> fileSplits, final Configuration configuration,
        final Class<? extends InputFormat<NullWritable, VertexWritable>> inputFormatClass,
        final Optional<Class<? extends OutputFormat<NullWritable, VertexWritable>>> outFormatClass)
        throws Exception {

    final InputFormat inputFormat = ReflectionUtils.newInstance(inputFormatClass, configuration);
    final TaskAttemptContext job = new TaskAttemptContextImpl(configuration,
            new TaskAttemptID(UUID.randomUUID().toString(), 0, TaskType.MAP, 0, 0));

    int vertexCount = 0;
    int outEdgeCount = 0;
    int inEdgeCount = 0;

    final OutputFormat<NullWritable, VertexWritable> outputFormat = outFormatClass.isPresent()
            ? ReflectionUtils.newInstance(outFormatClass.get(), configuration)
            : null;// w  w w .  j a va 2 s.  c  o  m
    final RecordWriter<NullWritable, VertexWritable> writer = null == outputFormat ? null
            : outputFormat.getRecordWriter(job);

    boolean foundKeyValue = false;
    for (final FileSplit split : fileSplits) {
        logger.info("\treading file split {}", split.getPath().getName() + " ({}",
                split.getStart() + "..." + (split.getStart() + split.getLength()), "{} {} bytes)");
        final RecordReader reader = inputFormat.createRecordReader(split, job);

        float lastProgress = -1f;
        while (reader.nextKeyValue()) {
            //System.out.println("" + reader.getProgress() + "> " + reader.getCurrentKey() + ": " + reader.getCurrentValue());
            final float progress = reader.getProgress();
            assertTrue(progress >= lastProgress);
            assertEquals(NullWritable.class, reader.getCurrentKey().getClass());
            final VertexWritable vertexWritable = (VertexWritable) reader.getCurrentValue();
            if (null != writer)
                writer.write(NullWritable.get(), vertexWritable);
            vertexCount++;
            outEdgeCount = outEdgeCount + (int) IteratorUtils.count(vertexWritable.get().edges(Direction.OUT));
            inEdgeCount = inEdgeCount + (int) IteratorUtils.count(vertexWritable.get().edges(Direction.IN));
            //
            final Vertex vertex = vertexWritable.get();
            assertEquals(Integer.class, vertex.id().getClass());
            if (vertex.value("name").equals("SUGAR MAGNOLIA")) {
                foundKeyValue = true;
                assertEquals(92, IteratorUtils.count(vertex.edges(Direction.OUT)));
                assertEquals(77, IteratorUtils.count(vertex.edges(Direction.IN)));
            }
            lastProgress = progress;
        }
    }

    assertEquals(8049, outEdgeCount);
    assertEquals(8049, inEdgeCount);
    assertEquals(outEdgeCount, inEdgeCount);
    assertEquals(808, vertexCount);
    assertTrue(foundKeyValue);

    if (null != writer) {
        writer.close(new TaskAttemptContextImpl(configuration, job.getTaskAttemptID()));
        for (int i = 1; i < 10; i++) {
            final File outputDirectory = new File(
                    new URL(configuration.get("mapreduce.output.fileoutputformat.outputdir")).toURI());
            final List<FileSplit> splits = generateFileSplits(
                    new File(outputDirectory.getAbsoluteFile() + "/_temporary/0/_temporary/"
                            + job.getTaskAttemptID().getTaskID().toString().replace("task", "attempt") + "_0"
                            + "/part-m-00000"),
                    i);
            validateFileSplits(splits, configuration, inputFormatClass, Optional.empty());
        }
    }
}

From source file:org.apache.vxquery.metadata.VXQueryCollectionOperatorDescriptor.java

License:Apache License

@Override
public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx,
        IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions)
        throws HyracksDataException {
    final FrameTupleAccessor fta = new FrameTupleAccessor(ctx.getFrameSize(),
            recordDescProvider.getInputRecordDescriptor(getActivityId(), 0));
    final int fieldOutputCount = recordDescProvider.getOutputRecordDescriptor(getActivityId(), 0)
            .getFieldCount();//from  w w  w  . ja v a 2  s . c  om
    final ByteBuffer frame = ctx.allocateFrame();
    final FrameTupleAppender appender = new FrameTupleAppender(ctx.getFrameSize(), fieldOutputCount);
    final short partitionId = (short) ctx.getTaskAttemptId().getTaskId().getPartition();
    final ITreeNodeIdProvider nodeIdProvider = new TreeNodeIdProvider(partitionId, dataSourceId,
            totalDataSources);
    final String nodeId = ctx.getJobletContext().getApplicationContext().getNodeId();
    final DynamicContext dCtx = (DynamicContext) ctx.getJobletContext().getGlobalJobData();

    final String collectionName = collectionPartitions[partition % collectionPartitions.length];
    final XMLParser parser = new XMLParser(false, nodeIdProvider, nodeId, frame, appender, childSeq,
            dCtx.getStaticContext());

    return new AbstractUnaryInputUnaryOutputOperatorNodePushable() {
        @Override
        public void open() throws HyracksDataException {
            appender.reset(frame, true);
            writer.open();
            hdfs = new HDFSFunctions();
        }

        @Override
        public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
            fta.reset(buffer);
            String collectionModifiedName = collectionName.replace("${nodeId}", nodeId);
            if (!collectionModifiedName.contains("hdfs:/")) {
                File collectionDirectory = new File(collectionModifiedName);
                //check if directory is in the local file system
                if (collectionDirectory.exists()) {
                    // Go through each tuple.
                    if (collectionDirectory.isDirectory()) {
                        for (int tupleIndex = 0; tupleIndex < fta.getTupleCount(); ++tupleIndex) {
                            Iterator<File> it = FileUtils.iterateFiles(collectionDirectory,
                                    new VXQueryIOFileFilter(), TrueFileFilter.INSTANCE);
                            while (it.hasNext()) {
                                File xmlDocument = it.next();
                                if (LOGGER.isLoggable(Level.FINE)) {
                                    LOGGER.fine(
                                            "Starting to read XML document: " + xmlDocument.getAbsolutePath());
                                }
                                parser.parseElements(xmlDocument, writer, fta, tupleIndex);
                            }
                        }
                    } else {
                        throw new HyracksDataException("Invalid directory parameter (" + nodeId + ":"
                                + collectionDirectory.getAbsolutePath() + ") passed to collection.");
                    }
                }
            } else {
                // Else check in HDFS file system
                // Get instance of the HDFS filesystem
                FileSystem fs = hdfs.getFileSystem();
                if (fs != null) {
                    collectionModifiedName = collectionModifiedName.replaceAll("hdfs:/", "");
                    Path directory = new Path(collectionModifiedName);
                    Path xmlDocument;
                    if (tag != null) {
                        hdfs.setJob(directory.getName(), tag);
                        tag = "<" + tag + ">";
                        Job job = hdfs.getJob();
                        InputFormat inputFormat = hdfs.getinputFormat();
                        try {
                            hdfs.scheduleSplits();
                            ArrayList<Integer> schedule = hdfs
                                    .getScheduleForNode(InetAddress.getLocalHost().getHostName());
                            List<InputSplit> splits = hdfs.getSplits();
                            List<FileSplit> fileSplits = new ArrayList<FileSplit>();
                            for (int i : schedule) {
                                fileSplits.add((FileSplit) splits.get(i));
                            }
                            FileSplitsFactory splitsFactory = new FileSplitsFactory(fileSplits);
                            List<FileSplit> inputSplits = splitsFactory.getSplits();
                            ContextFactory ctxFactory = new ContextFactory();
                            int size = inputSplits.size();
                            InputStream stream;
                            String value;
                            RecordReader reader;
                            TaskAttemptContext context;
                            for (int i = 0; i < size; i++) {
                                //read split
                                context = ctxFactory.createContext(job.getConfiguration(), i);
                                try {
                                    reader = inputFormat.createRecordReader(inputSplits.get(i), context);
                                    reader.initialize(inputSplits.get(i), context);
                                    while (reader.nextKeyValue()) {
                                        value = reader.getCurrentValue().toString();
                                        //Split value if it contains more than one item with the tag
                                        if (StringUtils.countMatches(value, tag) > 1) {
                                            String items[] = value.split(tag);
                                            for (String item : items) {
                                                if (item.length() > 0) {
                                                    item = START_TAG + tag + item;
                                                    stream = new ByteArrayInputStream(
                                                            item.getBytes(StandardCharsets.UTF_8));
                                                    parser.parseHDFSElements(stream, writer, fta, i);
                                                }
                                            }
                                        } else {
                                            value = START_TAG + value;
                                            //create an input stream to the file currently reading and send it to parser
                                            stream = new ByteArrayInputStream(
                                                    value.getBytes(StandardCharsets.UTF_8));
                                            parser.parseHDFSElements(stream, writer, fta, i);
                                        }
                                    }

                                } catch (InterruptedException e) {
                                    if (LOGGER.isLoggable(Level.SEVERE)) {
                                        LOGGER.severe(e.getMessage());
                                    }
                                }
                            }

                        } catch (IOException e) {
                            if (LOGGER.isLoggable(Level.SEVERE)) {
                                LOGGER.severe(e.getMessage());
                            }
                        } catch (ParserConfigurationException e) {
                            if (LOGGER.isLoggable(Level.SEVERE)) {
                                LOGGER.severe(e.getMessage());
                            }
                        } catch (SAXException e) {
                            if (LOGGER.isLoggable(Level.SEVERE)) {
                                LOGGER.severe(e.getMessage());
                            }
                        }
                    } else {
                        try {
                            //check if the path exists and is a directory
                            if (fs.exists(directory) && fs.isDirectory(directory)) {
                                for (int tupleIndex = 0; tupleIndex < fta.getTupleCount(); ++tupleIndex) {
                                    //read every file in the directory
                                    RemoteIterator<LocatedFileStatus> it = fs.listFiles(directory, true);
                                    while (it.hasNext()) {
                                        xmlDocument = it.next().getPath();
                                        if (fs.isFile(xmlDocument)) {
                                            if (LOGGER.isLoggable(Level.FINE)) {
                                                LOGGER.fine("Starting to read XML document: "
                                                        + xmlDocument.getName());
                                            }
                                            //create an input stream to the file currently reading and send it to parser
                                            InputStream in = fs.open(xmlDocument).getWrappedStream();
                                            parser.parseHDFSElements(in, writer, fta, tupleIndex);
                                        }
                                    }
                                }
                            } else {
                                throw new HyracksDataException("Invalid HDFS directory parameter (" + nodeId
                                        + ":" + directory + ") passed to collection.");
                            }
                        } catch (FileNotFoundException e) {
                            if (LOGGER.isLoggable(Level.SEVERE)) {
                                LOGGER.severe(e.getMessage());
                            }
                        } catch (IOException e) {
                            if (LOGGER.isLoggable(Level.SEVERE)) {
                                LOGGER.severe(e.getMessage());
                            }
                        }
                    }
                    try {
                        fs.close();
                    } catch (IOException e) {
                        if (LOGGER.isLoggable(Level.SEVERE)) {
                            LOGGER.severe(e.getMessage());
                        }
                    }
                }
            }
        }

        @Override
        public void fail() throws HyracksDataException {
            writer.fail();
        }

        @Override
        public void close() throws HyracksDataException {
            // Check if needed?
            fta.reset(frame);
            if (fta.getTupleCount() > 0) {
                FrameUtils.flushFrame(frame, writer);
            }
            writer.close();
        }
    };
}

From source file:org.commoncrawl.hadoop.io.mapreduce.ArcFileInputFormatTests.java

License:Apache License

static void validateSplit(FileSystem fs, InputSplit split, List<Pair<Path, List<TestRecord>>> splits,
        RecordReader<Text, BytesWritable> reader) throws IOException, InterruptedException {

    int splitDataIndex = getIndexOfSplit(splits, split);

    Assert.assertTrue(splitDataIndex != -1);

    List<TestRecord> records = splits.get(splitDataIndex).e1;

    int itemIndex = 0;
    // iterate and validate stuff ... 
    while (reader.nextKeyValue()) {
        Text key = reader.getCurrentKey();
        BytesWritable value = reader.getCurrentValue();

        TestRecord testRecord = records.get(itemIndex++);
        // get test key bytes as utf-8 bytes ... 
        byte[] testKeyBytes = testRecord.url.getBytes(Charset.forName("UTF-8"));
        // compare against raw key bytes to validate key is the same (Text's utf-8 mapping code replaces invalid characters 
        // with ?, which causes our test case (which does use invalid characters to from the key, to break.
        Assert.assertTrue(ArcFileReaderTests.compareTo(testKeyBytes, 0, testKeyBytes.length, key.getBytes(), 0,
                key.getLength()) == 0);//from   w w w . j  av  a 2 s  . c o  m
        // retured bytes represent the header(encoded in utf-8), terminated by a \r\n\r\n. The content follows this terminator
        // we search for this specific byte pattern to locate start of content, then compare it against source ... 
        int indexofHeaderTerminator = ByteArrayUtils.indexOf(value.getBytes(), 0, value.getLength(),
                "\r\n\r\n".getBytes());
        indexofHeaderTerminator += 4;
        Assert.assertTrue(ArcFileReaderTests.compareTo(testRecord.data, 0, testRecord.data.length,
                value.getBytes(), indexofHeaderTerminator, testRecord.data.length) == 0);
    }
    reader.close();

    Assert.assertEquals(itemIndex, ArcFileReaderTests.BASIC_TEST_RECORD_COUNT);

    splits.remove(splitDataIndex);

}

From source file:org.commoncrawl.hadoop.io.mapreduce.ArcFileInputFormatTests.java

License:Apache License

static void validateArcFileItemSplit(FileSystem fs, InputSplit split, List<Pair<Path, List<TestRecord>>> splits,
        RecordReader<Text, ArcFileItem> reader) throws IOException, InterruptedException {

    int splitDataIndex = getIndexOfSplit(splits, split);

    Assert.assertTrue(splitDataIndex != -1);

    List<TestRecord> records = splits.get(splitDataIndex).e1;

    int itemIndex = 0;
    // iterate and validate stuff ...
    while (reader.nextKeyValue()) {

        Text key = reader.getCurrentKey();
        ArcFileItem value = reader.getCurrentValue();

        TestRecord testRecord = records.get(itemIndex++);

        // get test key bytes as utf-8 bytes ... 
        byte[] testKeyBytes = testRecord.url.getBytes(Charset.forName("UTF-8"));
        // compare against raw key bytes to validate key is the same (Text's utf-8 mapping code replaces invalid characters 
        // with ?, which causes our test case (which does use invalid characters to from the key, to break.
        Assert.assertTrue(ArcFileReaderTests.compareTo(testKeyBytes, 0, testKeyBytes.length, key.getBytes(), 0,
                key.getLength()) == 0);/*from w  w  w  . jav  a  2 s  . co  m*/
        // retured bytes represent the header(encoded in utf-8), terminated by a \r\n\r\n. The content follows this terminator
        // we search for this specific byte pattern to locate start of content, then compare it against source ... 
        Assert.assertTrue(ArcFileReaderTests.compareTo(testRecord.data, 0, testRecord.data.length,
                value.getContent().getReadOnlyBytes(), value.getContent().getOffset(),
                value.getContent().getCount()) == 0);
        NIOHttpHeaders headers = ArcFileItemUtils.buildHeaderFromArcFileItemHeaders(value.getHeaderItems());
        // validate metadata 
        Assert.assertEquals("text/html", headers.findValue(Constants.ARCFileHeader_ARC_MimeType));
        Assert.assertEquals(value.getArcFilePos(), testRecord.streamPos);
        Assert.assertEquals(value.getArcFileSize(), testRecord.rawSize);
        Assert.assertEquals("test-value", headers.findValue("test"));
        Assert.assertEquals(value.getArcFileName(), ((FileSplit) split).getPath().getName());

    }
    reader.close();

    Assert.assertEquals(itemIndex, ArcFileReaderTests.BASIC_TEST_RECORD_COUNT);

    splits.remove(splitDataIndex);

}

From source file:org.kududb.mapreduce.TestKuduTableInputFormat.java

License:Apache License

@Test
public void test() throws Exception {
    createTable(TABLE_NAME, getBasicSchema(), new CreateTableOptions());

    KuduTable table = openTable(TABLE_NAME);
    Schema schema = getBasicSchema();
    Insert insert = table.newInsert();//from  w  w w . j  ava2s.  co m
    PartialRow row = insert.getRow();
    row.addInt(0, 1);
    row.addInt(1, 2);
    row.addInt(2, 3);
    row.addString(3, "a string");
    row.addBoolean(4, true);
    AsyncKuduSession session = client.newSession();
    session.apply(insert).join(DEFAULT_SLEEP);
    session.close().join(DEFAULT_SLEEP);

    // Test getting all the columns back
    RecordReader<NullWritable, RowResult> reader = createRecordReader("*", null);
    assertTrue(reader.nextKeyValue());
    assertEquals(5, reader.getCurrentValue().getColumnProjection().getColumnCount());
    assertFalse(reader.nextKeyValue());

    // Test getting two columns back
    reader = createRecordReader(
            schema.getColumnByIndex(3).getName() + "," + schema.getColumnByIndex(2).getName(), null);
    assertTrue(reader.nextKeyValue());
    assertEquals(2, reader.getCurrentValue().getColumnProjection().getColumnCount());
    assertEquals("a string", reader.getCurrentValue().getString(0));
    assertEquals(3, reader.getCurrentValue().getInt(1));
    try {
        reader.getCurrentValue().getString(2);
        fail("Should only be getting 2 columns back");
    } catch (IndexOutOfBoundsException e) {
        // expected
    }

    // Test getting one column back
    reader = createRecordReader(schema.getColumnByIndex(1).getName(), null);
    assertTrue(reader.nextKeyValue());
    assertEquals(1, reader.getCurrentValue().getColumnProjection().getColumnCount());
    assertEquals(2, reader.getCurrentValue().getInt(0));
    try {
        reader.getCurrentValue().getString(1);
        fail("Should only be getting 1 column back");
    } catch (IndexOutOfBoundsException e) {
        // expected
    }

    // Test getting empty rows back
    reader = createRecordReader("", null);
    assertTrue(reader.nextKeyValue());
    assertEquals(0, reader.getCurrentValue().getColumnProjection().getColumnCount());
    assertFalse(reader.nextKeyValue());

    // Test getting an unknown table, will not work
    try {
        createRecordReader("unknown", null);
        fail("Should not be able to scan a column that doesn't exist");
    } catch (IllegalArgumentException e) {
        // expected
    }

    // Test using a predicate that filters the row out.
    ColumnRangePredicate pred1 = new ColumnRangePredicate(schema.getColumnByIndex(1));
    pred1.setLowerBound(3);
    reader = createRecordReader("*", Lists.newArrayList(pred1));
    assertFalse(reader.nextKeyValue());
}

From source file:org.mrgeo.format.CsvInputFormatTest.java

License:Apache License

@Test
@Category(UnitTest.class)
public void testNullIgnore() throws Exception {
    FileSystem fs = new RawLocalFileSystem();
    try {//w  ww .  ja  va2  s  .  com
        int lineCount = 0;

        // Write columns file which defines the columns title and type
        String cstr = "<?xml version='1.0' encoding='UTF-8'?>\n<AllColumns firstLineHeader='false'>\n";
        cstr += "  <Column name='name' type='Nominal'/>\n";
        cstr += "  <Column name='x' type='Numeric'/>\n";
        cstr += "  <Column name='y' type='Numeric'/>\n";
        cstr += "</AllColumns>\n";
        FileOutputStream fos = new FileOutputStream(output + "/nulXY.csv.columns");
        PrintStream ps = new PrintStream(fos);
        ps.print(cstr);
        ps.close();

        // Write csv test data
        fos = new FileOutputStream(output + "/nullXY.csv");
        ps = new PrintStream(fos);
        // populated rows
        for (int ii = 0; ii < 10; ii++) {
            ps.print("ASDF,1.0,1.0\n");
            lineCount++;
        }
        // empty rows
        ps.print("ASDF,,1.0\n");
        ps.print("ASDF,1.0,\n");
        ps.print("ASDF,,\n");
        lineCount += 3;
        // populated rows
        for (int ii = 0; ii < 5; ii++) {
            ps.print("ASDF,1.0,1.0\n");
            lineCount++;
        }
        ps.close();

        System.out.println(output + "nulXY.csv");

        Job j = new Job(new Configuration());
        Configuration c = j.getConfiguration();
        fs.setConf(c);
        Path testFile = new Path(output, "nullXY.csv");
        testFile = fs.makeQualified(testFile);
        InputSplit split;
        long l;
        long start;

        TextInputFormat format = new TextInputFormat();
        split = new FileSplit(testFile, 0, lineCount * 1000, null);
        RecordReader<LongWritable, Text> reader2 = format.createRecordReader(split,
                HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID()));

        reader2.initialize(split, HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID()));
        l = 0;
        start = System.currentTimeMillis();
        while (reader2.nextKeyValue()) {
            reader2.getCurrentValue().toString();
            l++;
        }
        Assert.assertEquals(lineCount, l);
        System.out.printf("text line reader with null x,y ignore: %d\n", System.currentTimeMillis() - start);

    } catch (Exception e) {
        e.printStackTrace();
        throw e;
    } finally {
        fs.close();
    }
}

From source file:org.mrgeo.format.OsmContentHandlerTest.java

License:Apache License

@Ignore
@Test/*from  w  ww.j  a  v  a 2 s .c  o m*/
public void OfftestBenchmark() throws Exception {
    // @TODO this class and its unit tests are a work in progress.
    FileSystem fs = new RawLocalFileSystem();
    try {
        String input = TestUtils.composeInputDir(OsmContentHandlerTest.class);

        Configuration c = new Configuration();
        c.set("xml.content.handler", OsmContentHandler.class.getCanonicalName());
        c.set("xml.pattern", "node");
        c.set("xml.root.tag", "osm");

        fs.setConf(c);
        Path testFile = new Path(input, "sample.osm");
        testFile = fs.makeQualified(testFile);

        c.set("xml.pattern", "place");

        FileSplit split = new FileSplit(testFile, 0, 64 * 1048576, null);
        RecordReader<LongWritable, Geometry> reader = new SaxInputFormat<LongWritable, Geometry>()
                .createRecordReader(split, HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID()));

        int l = 0;
        long start = new Date().getTime();
        while (reader.nextKeyValue()) {
            l++;
        }
        long elapsed = new Date().getTime() - start;
        log.debug("ms per record: {} record count: {}", (double) elapsed / (double) l, l);
        Assert.assertEquals(1, l);
    } catch (Exception e) {
        e.printStackTrace();
        throw e;
    } finally {
        fs.close();
    }
}

From source file:org.mrgeo.format.OsmContentHandlerTest.java

License:Apache License

@Ignore
@Test//w ww .  j  a  v  a 2 s. com
public void OfftestBenchmark2() throws Exception {
    // @TODO this class and its unit tests are a work in progress.
    try {

        Configuration c = new Configuration();
        c.set("xml.content.handler", OsmContentHandler.class.getCanonicalName());
        c.set("xml.pattern", "node");
        c.set("xml.root.tag", "osm");

        FileSystem fs = HadoopFileUtils.getFileSystem();
        fs.setConf(c);
        Path testFile = new Path("/user/jason.surratt/", "georgia.osm");
        testFile = fs.makeQualified(testFile);

        c.set("xml.pattern", "place");

        FileSplit split = new FileSplit(testFile, 0, fs.getFileStatus(testFile).getLen(), null);
        RecordReader<LongWritable, Geometry> reader = new SaxInputFormat<LongWritable, Geometry>()
                .createRecordReader(split, HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID()));

        FileOutputStream fos = new FileOutputStream("georgia-points.txt");
        BufferedWriter wr = new BufferedWriter(new OutputStreamWriter(fos));

        Formatter formatter = new Formatter(wr, Locale.US);

        int l = 0;
        long start = new Date().getTime();
        while (reader.nextKeyValue() && l < 10000) {
            l++;
            Geometry f = reader.getCurrentValue();
            if (f instanceof Point) {
                Point p = (Point) f;
                formatter.format("%.7f %.7f\n", p.getX(), p.getY());
            }
        }

        formatter.close();

        long elapsed = new Date().getTime() - start;
        log.debug("ms per record: {} record count: {}", (double) elapsed / (double) l, l);
    } catch (Exception e) {
        e.printStackTrace();
        throw e;
    }
}

From source file:org.mrgeo.format.PgQueryInputFormatTest.java

License:Apache License

@Test
@Category(UnitTest.class)
public void testComplexPolygonRead() throws Exception {

    RecordReader<LongWritable, Geometry> r = openReader(new Path(input, "sql_test.sql"));

    if (r != null) {
        String[] base = {//from  w  w  w .ja  va 2  s  .c  o  m
                "value:10.0 GEOMETRY:POLYGON ((48.513333329928 14.795000002082, 48.513333329928 14.794166668749, 48.519999996592 14.794166668749, 48.519999996592 14.795000002082, 48.522499996591 14.795000002082, 48.522499996591 14.794166668749, 48.524166663257 14.794166668749, 48.524166663257 14.795833335415, 48.52499999659 14.795833335415, 48.52499999659 14.796666668748, 48.524166663257 14.796666668748, 48.524166663257 14.799166668747, 48.523333329924 14.799166668747, 48.523333329924 14.80000000208, 48.522499996591 14.80000000208, 48.522499996591 14.803333335412, 48.520833329925 14.803333335412, 48.520833329925 14.805000002078, 48.518333329926 14.805000002078, 48.518333329926 14.805833335411, 48.517499996593 14.805833335411, 48.517499996593 14.806666668744, 48.515833329927 14.806666668744, 48.515833329927 14.807500002077, 48.514999996594 14.807500002077, 48.514999996594 14.810000002076, 48.514166663261 14.810000002076, 48.514166663261 14.810833335409, 48.513333329928 14.810833335409, 48.513333329928 14.811666668742, 48.503333329932 14.811666668742, 48.503333329932 14.810833335409, 48.495833329935 14.810833335409, 48.495833329935 14.80833333541, 48.496666663268 14.80833333541, 48.496666663268 14.807500002077, 48.498333329934 14.807500002077, 48.498333329934 14.806666668744, 48.500833329933 14.806666668744, 48.500833329933 14.805833335411, 48.502499996599 14.805833335411, 48.502499996599 14.805000002078, 48.504166663265 14.805000002078, 48.504166663265 14.804166668745, 48.504999996598 14.804166668745, 48.504999996598 14.801666668746, 48.505833329931 14.801666668746, 48.505833329931 14.799166668747, 48.506666663264 14.799166668747, 48.506666663264 14.798333335414, 48.509166663263 14.798333335414, 48.509166663263 14.797500002081, 48.509999996596 14.797500002081, 48.509999996596 14.798333335414, 48.511666663262 14.798333335414, 48.511666663262 14.796666668748, 48.512499996595 14.796666668748, 48.512499996595 14.795000002082, 48.513333329928 14.795000002082)) ",
                "value:100.0 GEOMETRY:POLYGON ((48.586666663232 14.7500000021, 48.586666663232 14.750833335433, 48.589999996564 14.750833335433, 48.589999996564 14.754166668765, 48.589166663231 14.754166668765, 48.589166663231 14.757500002097, 48.588333329898 14.757500002097, 48.588333329898 14.75833333543, 48.587499996565 14.75833333543, 48.587499996565 14.759166668763, 48.585833329899 14.759166668763, 48.585833329899 14.75833333543, 48.582499996567 14.75833333543, 48.582499996567 14.757500002097, 48.581666663234 14.757500002097, 48.581666663234 14.755000002098, 48.582499996567 14.755000002098, 48.582499996567 14.754166668765, 48.5833333299 14.754166668765, 48.5833333299 14.752500002099, 48.584166663233 14.752500002099, 48.584166663233 14.751666668766, 48.584999996566 14.751666668766, 48.584999996566 14.750833335433, 48.585833329899 14.750833335433, 48.585833329899 14.7500000021, 48.586666663232 14.7500000021)) " };

        int index = 0;
        while (r.nextKeyValue()) {
            Geometry f = r.getCurrentValue();
            String row = "";
            for (Map.Entry attr : f.getAllAttributes().entrySet()) {
                row += attr.getKey() + ":" + attr.getValue() + " ";
            }
            Assert.assertEquals(base[index++], row);
        }
    }
}