org.apache.beam.sdk.nexmark.queries.sql.SqlQuery3.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.beam.sdk.nexmark.queries.sql.SqlQuery3.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.beam.sdk.nexmark.queries.sql;

import org.apache.beam.sdk.extensions.sql.SqlTransform;
import org.apache.beam.sdk.nexmark.NexmarkConfiguration;
import org.apache.beam.sdk.nexmark.model.Auction;
import org.apache.beam.sdk.nexmark.model.Event;
import org.apache.beam.sdk.nexmark.model.Event.Type;
import org.apache.beam.sdk.nexmark.model.NameCityStateId;
import org.apache.beam.sdk.nexmark.model.Person;
import org.apache.beam.sdk.nexmark.model.sql.SelectEvent;
import org.apache.beam.sdk.nexmark.queries.NexmarkQueryTransform;
import org.apache.beam.sdk.nexmark.queries.Query3;
import org.apache.beam.sdk.schemas.transforms.Convert;
import org.apache.beam.sdk.transforms.Filter;
import org.apache.beam.sdk.transforms.SerializableFunction;
import org.apache.beam.sdk.transforms.windowing.FixedWindows;
import org.apache.beam.sdk.transforms.windowing.Window;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.PCollectionTuple;
import org.apache.beam.sdk.values.Row;
import org.apache.beam.sdk.values.TupleTag;
import org.joda.time.Duration;

/**
 * Query 3, 'Local Item Suggestion'. Who is selling in OR, ID or CA in category 10, and for what
 * auction ids? In CQL syntax:
 *
 * <pre>
 * SELECT Istream(P.name, P.city, P.state, A.id)
 * FROM Auction A [ROWS UNBOUNDED], Person P [ROWS UNBOUNDED]
 * WHERE A.seller = P.id AND (P.state = `OR' OR P.state = `ID' OR P.state = `CA') AND A.category
 * = 10;
 * </pre>
 *
 * <p>This implementation runs as written, but results may not be what is expected from a correct
 * join, and behavior doesn't match the java version.
 *
 * <p>At the moment join is implemented as a CoGBK, it joins the trigger outputs. It means that in
 * discarding mode it will join only new elements arrived since last trigger firing. And in
 * accumulating mode it will output the results which were already emitted in last trigger firing.
 *
 * <p>Additionally, it is currently not possible to match the elements across windows.
 *
 * <p>All of the above makes it not intuitive, inflexible, and produces results which may not be
 * what users are expecting.
 *
 * <p>Java version of the query ({@link Query3}) solves this by caching the auctions in the state
 * cell if there was no matching seller yet. And then flushes them when sellers become available.
 *
 * <p>Correct join semantics implementation is tracked in BEAM-3190, BEAM-3191
 */
public class SqlQuery3 extends NexmarkQueryTransform<NameCityStateId> {

    private static final String QUERY_NAME = SqlQuery3.class.getSimpleName();

    private static final String QUERY_STRING = "" + " SELECT " + "    P.name, P.city, P.state, A.id " + " FROM "
            + "    Auction A INNER JOIN Person P on A.seller = P.id " + " WHERE " + "    A.category = 10 "
            + "    AND (P.state = 'OR' OR P.state = 'ID' OR P.state = 'CA')";

    private NexmarkConfiguration configuration;

    public SqlQuery3(NexmarkConfiguration configuration) {
        super(QUERY_NAME);
        this.configuration = configuration;
    }

    @Override
    public PCollection<NameCityStateId> expand(PCollection<Event> allEvents) {
        PCollection<Event> windowed = fixedWindows(allEvents);

        PCollection<Row> auctions = filter(windowed, e -> e.newAuction != null, Auction.class, Type.AUCTION);
        PCollection<Row> people = filter(windowed, e -> e.newPerson != null, Person.class, Type.PERSON);

        PCollectionTuple inputStreams = createStreamsTuple(auctions, people);

        return inputStreams.apply(SqlTransform.query(QUERY_STRING)).apply(Convert.fromRows(NameCityStateId.class));
    }

    private PCollection<Event> fixedWindows(PCollection<Event> events) {
        return events.apply(Window.into(FixedWindows.of(Duration.standardSeconds(configuration.windowSizeSec))));
    }

    private PCollectionTuple createStreamsTuple(PCollection<Row> auctions, PCollection<Row> people) {

        return PCollectionTuple.of(new TupleTag<>("Auction"), auctions).and(new TupleTag<>("Person"), people);
    }

    private PCollection<Row> filter(PCollection<Event> allEvents, SerializableFunction<Event, Boolean> filter,
            Class clazz, Event.Type eventType) {

        String modelName = clazz.getSimpleName();

        return allEvents.apply(QUERY_NAME + ".Filter." + modelName, Filter.by(filter))
                .apply(QUERY_NAME + ".ToRecords." + modelName, new SelectEvent(eventType));
    }
}