com.google.doubleclick.util.DoubleClickMetadata.java Source code

Java tutorial

Introduction

Here is the source code for com.google.doubleclick.util.DoubleClickMetadata.java

Source

/*
 * Copyright 2014 Google Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.doubleclick.util;

import com.google.common.base.Function;
import com.google.common.base.MoreObjects;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Interner;
import com.google.common.collect.Interners;
import com.google.common.collect.Lists;
import com.google.doubleclick.util.GeoTarget.Type;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.text.ParseException;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.annotation.Nullable;
import javax.inject.Inject;
import javax.inject.Singleton;

/**
 * DoubleClickMetadata for DoubleClick Ad Exchange dictionaries. Helps code that need to
 * inspect or manipulate several categories of numeric IDs embedded in DoubleClick's protobuf.
 * See <a href="https://developers.google.com/ad-exchange/rtb/downloads">
 * DoubleClick Ad Exchange Real-Time Bidding Protocol</a>, this class mostly encapsulates
 * the .txt and .csv files obtained there (loaded by HTTP at startup).
 * <p>
 * Note: validation is lenient: invalid codes are logged, but no failure is raised here
 * if the bidder is using old metadata. If a code is really invalid, AdX may reject the bid.
 * <p>
 * This class is threadsafe, as well as all nested helper classes. It's recommended to create
 * a single instance because its initialization can be slow (up to a few seconds).
 */
@Singleton
public class DoubleClickMetadata {
    private static final Logger logger = LoggerFactory.getLogger(DoubleClickMetadata.class);
    private static final String BASE_URL = "https://storage.googleapis.com";
    private static final String ADX_DICT = BASE_URL + "/adx-rtb-dictionaries/";

    private final ImmutableMap<Integer, String> vendors;
    private final ImmutableMap<Integer, String> gdnVendors;
    private final ImmutableMap<Integer, String> adSensitiveCategories;
    private final ImmutableMap<Integer, String> adProductCategories;
    private final ImmutableMap<Integer, String> adRestrictedCategories;
    private final ImmutableMap<Integer, String> agencies;
    private final ImmutableMap<Integer, String> allAdCategories;
    private final ImmutableMap<Integer, String> pubExcCreativeAttributes;
    private final ImmutableMap<Integer, String> buyDecCreativeAttributes;
    private final ImmutableMap<Integer, String> allCreativeAttributes;
    private final ImmutableMap<Integer, String> creativeStatusCodes;
    private final ImmutableMap<Integer, String> sellerNetworks;
    private final ImmutableMap<Integer, String> siteLists;
    private final ImmutableMap<Integer, String> contentLabels;
    private final ImmutableMap<Integer, String> publisherVerticals;
    private final ImmutableMap<Integer, GeoTarget> geoTargetsByCriteriaId;
    private final ImmutableMap<CityDMARegionKey, CityDMARegionValue> dmaRegions;
    private final ImmutableMap<GeoTarget.CanonicalKey, GeoTarget> geoTargetsByCanonicalKey;
    private final ImmutableMap<Object, CountryCodes> countryCodes;

    @Inject
    public DoubleClickMetadata(Transport transport) {
        Interner<String> interner = Interners.<String>newStrongInterner();
        vendors = load(interner, transport, ADX_DICT + "vendors.txt");
        gdnVendors = load(interner, transport, ADX_DICT + "gdn-vendors.txt");
        HashMap<Integer, String> cats = new HashMap<>();
        cats.putAll(adSensitiveCategories = load(interner, transport, ADX_DICT + "ad-sensitive-categories.txt"));
        cats.putAll(adProductCategories = load(interner, transport, ADX_DICT + "ad-product-categories.txt"));
        cats.putAll(adRestrictedCategories = load(interner, transport, ADX_DICT + "ad-restricted-categories.txt"));
        allAdCategories = ImmutableMap.copyOf(cats);
        agencies = load(interner, transport, ADX_DICT + "agencies.txt");
        HashMap<Integer, String> attrs = new HashMap<>();
        attrs.putAll(pubExcCreativeAttributes = load(interner, transport,
                ADX_DICT + "publisher-excludable-creative-attributes.txt"));
        attrs.putAll(buyDecCreativeAttributes = load(interner, transport,
                ADX_DICT + "buyer-declarable-creative-attributes.txt"));
        allCreativeAttributes = ImmutableMap.copyOf(attrs);
        creativeStatusCodes = load(interner, transport, ADX_DICT + "creative-status-codes.txt");
        sellerNetworks = load(interner, transport, ADX_DICT + "seller-network-ids.txt");
        siteLists = load(interner, transport, ADX_DICT + "site-lists.txt");
        contentLabels = load(interner, transport, ADX_DICT + "content-labels.txt");
        publisherVerticals = load(interner, transport, ADX_DICT + "publisher-verticals.txt");
        geoTargetsByCriteriaId = loadGeoTargets(interner, transport, ADX_DICT + "geo-table.csv");
        HashMap<GeoTarget.CanonicalKey, GeoTarget> byKey = new HashMap<>();
        for (GeoTarget target : geoTargetsByCriteriaId.values()) {
            byKey.put(target.key(), target);
        }
        geoTargetsByCanonicalKey = ImmutableMap.copyOf(byKey);
        dmaRegions = loadCitiesDMARegions(interner, transport,
                transport instanceof ResourceTransport ? ADX_DICT + "cities-dma-regions.csv"
                        : "http://goo.gl/9ENFV7");
        countryCodes = loadCountryCodes(interner, ADX_DICT + "countries.txt");
    }

    /**
     * Dictionary used in the excluded_attribute field of BidRequest.
     * This field describes the types of creatives that are not allowed by the publisher.
     * For example, they might specify restrictions on whether cookie usage is allowed,
     * or whether media and/or text ads are allowed.
     */
    public ImmutableMap<Integer, String> publisherExcludableCreativeAttributes() {
        return pubExcCreativeAttributes;
    }

    /**
     * Dictionary used for the attribute field in BidResponse. This field describes buyer declarable
     * attributes on creatives which must must not appear in excluded_attribute in BidRequest.
     */
    public ImmutableMap<Integer, String> buyerDeclarableCreativeAttributes() {
        return buyDecCreativeAttributes;
    }

    /**
     * @return Union of {@link #publisherExcludableCreativeAttributes()},
     * {@link #buyerDeclarableCreativeAttributes()}
     */
    public ImmutableMap<Integer, String> allCreativeAttributes() {
        return allCreativeAttributes;
    }

    /**
     * Dictionary file used in the creative_status_code field of BidRequest.BidResponseFeedback.
     * This field lists the different reasons that a creative returned
     * in a BidResponse may be rejected.
     */
    public ImmutableMap<Integer, String> creativeStatusCodes() {
        return creativeStatusCodes;
    }

    /**
     * Dictionary used in the excluded_sensitive_category field of BidRequest
     * and the category field of BidResponse. This field describes sensitive categories
     * that are not allowed by publisher.
     * For example, the publisher does not want to host ads related to Politics.
     */
    public ImmutableMap<Integer, String> sensitiveCategories() {
        return adSensitiveCategories;
    }

    /**
     * Dictionary used in the excluded_product_category field of BidRequest.
     * This field describes categories of products and services that are not allowed by the publisher.
     * For example, the publisher does not want to host ads related to Online Banking.
     */
    public ImmutableMap<Integer, String> productCategories() {
        return adProductCategories;
    }

    /**
     * Dictionary used in the allowed_restricted_category field of BidRequest and
     * the restricted_category field of BidResponse. This field describes restricted categories
     * that may be allowed by the publisher, and must be declared to use.
     * For example, ads containing Alcohol related content.
     */
    public ImmutableMap<Integer, String> restrictedCategories() {
        return adRestrictedCategories;
    }

    /**
     * @return Union of {@link #productCategories()}, {@link #sensitiveCategories()},
     * {@link #restrictedCategories()}
     */
    public ImmutableMap<Integer, String> allCategories() {
        return allAdCategories;
    }

    /**
     * Dictionary file used in the agency_id field of BidResponse.
     * This field is used to declare the agency that is associated with the ad being returned.
     */
    public ImmutableMap<Integer, String> agencies() {
        return agencies;
    }

    /**
     * Dictionary used in the allowed_vendor_type field of BidRequest. This field lists which
     * Rich Media vendors such as Eyeblaster and Pointroll are allowed for the creative being
     * served as specified by the publisher.
     */
    public ImmutableMap<Integer, String> vendors() {
        return vendors;
    }

    /**
     * Dictionary which lists all the allowed_vendor_type entries for any request
     * from a GDN publisher. These vendor types are all allowed on GDN but must be declared if used.
     * This is a subset of the entries in {@link #vendors()}.
     */
    public ImmutableMap<Integer, String> gdnVendors() {
        return gdnVendors;
    }

    /**
     * Dictionary file used in the seller_network_id field of BidRequest. This field specifies
     * the seller network to which the publisher belongs.
     */
    public ImmutableMap<Integer, String> sellerNetworks() {
        return sellerNetworks;
    }

    /**
     * Dictionary file used in the site_list_id field of BidRequest. This field specifies the
     * site lists to which the publisher belongs. Current options are Ad Planner 1000
     * (a list of 1000 most visited sites on the web), and Brand Select
     * (a list of quality publishers generated based on Google internal ranking).
     */
    public ImmutableMap<Integer, String> siteLists() {
        return siteLists;
    }

    /**
     * Dictionary file used in the detected_content_labels field of BidRequest.
     */
    public ImmutableMap<Integer, String> contentLabels() {
        return contentLabels;
    }

    /**
     * Dictionary file used in the detected_vertical field of BidRequest.
     * This field specifies the verticals (similar to keywords) of the page on which
     * the ad will be shown. Google generates this field by crawling the page and
     * determining which verticals are used.
     */
    public ImmutableMap<Integer, String> publisherVerticals() {
        return publisherVerticals;
    }

    /**
     * Dictionary file used to map cities to DMA Region codes.
     */
    public ImmutableMap<CityDMARegionKey, CityDMARegionValue> dmaRegions() {
        return dmaRegions;
    }

    /**
     * {@link GeoTarget}s indexed by criteria ID.
     */
    public ImmutableMap<Integer, GeoTarget> geoTargets() {
        return geoTargetsByCriteriaId;
    }

    public GeoTarget geoTargetFor(int criteriaId) {
        return geoTargetsByCriteriaId.get(criteriaId);
    }

    public GeoTarget geoTargetFor(Type type, String canonicalName) {
        return geoTargetsByCanonicalKey.get(new GeoTarget.CanonicalKey(type, canonicalName));
    }

    /**
     * Maps ISO 3166-1 codes.
     */
    public ImmutableMap<Object, CountryCodes> countryCodes() {
        return countryCodes;
    }

    /**
     * Formats a code to the corresponding description from its domain.
     */
    public static String toString(Map<Integer, String> metadata, int code) {
        StringBuilder sb = new StringBuilder();
        sb.append(code).append(": ");
        String description = metadata.get(code);
        sb.append(description == null ? "<invalid>" : description);
        return sb.toString();
    }

    @Override
    public String toString() {
        return MoreObjects.toStringHelper(this).omitNullValues().add("agencies#", agencies.size())
                .add("buyDecCreativeAttributes#", buyDecCreativeAttributes.size())
                .add("contentLabels#", contentLabels.size()).add("countryCodes#", countryCodes.size())
                .add("creativeStatusCodes#", creativeStatusCodes.size()).add("gdnVendorTypes#", gdnVendors.size())
                .add("productCategories#", adProductCategories.size())
                .add("pubExcCreativeAttributes#", pubExcCreativeAttributes.size())
                .add("publisherVerticals#", publisherVerticals.size())
                .add("restrictedCategories#", adRestrictedCategories.size())
                .add("sellerNetworks#", sellerNetworks.size())
                .add("sensitiveCategories#", adSensitiveCategories.size()).add("siteLists#", siteLists.size())
                .add("geoTargets#", geoTargetsByCriteriaId.size()).add("dmaRegions#", dmaRegions.size())
                .add("vendors#", vendors.size()).toString();
    }

    private static ImmutableMap<Integer, String> load(Interner<String> interner, Transport transport,
            String resourceName) {
        try (InputStream isMetadata = transport.open(resourceName)) {
            Pattern pattern = Pattern.compile("(\\d+)\\s+(.*)");
            ImmutableMap.Builder<Integer, String> builder = ImmutableMap.builder();
            BufferedReader rd = new BufferedReader(new InputStreamReader(isMetadata));
            String record;

            while ((record = rd.readLine()) != null) {
                Matcher matcher = pattern.matcher(record);

                if (matcher.matches()) {
                    try {
                        builder.put(Integer.parseInt(matcher.group(1)), interner.intern(matcher.group(2)));
                    } catch (NumberFormatException e) {
                        logger.trace("Bad record, ignoring: {} - [{}]", e.toString(), record);
                    }
                }
            }

            return builder.build();
        } catch (IOException e) {
            throw new ExceptionInInitializerError(e);
        }
    }

    private static ImmutableMap<CityDMARegionKey, CityDMARegionValue> loadCitiesDMARegions(
            final Interner<String> interner, Transport transport, String resourceName) {
        final Map<CityDMARegionKey, CityDMARegionValue> map = new LinkedHashMap<>();
        try (InputStream is = transport.open(resourceName)) {
            CSVParser.csvParser().parse(is, ".*,(\\d+),.*,.*,(\\d+)", new Function<List<String>, Boolean>() {
                @Override
                public Boolean apply(List<String> fields) {
                    map.put(new CityDMARegionKey(Integer.parseInt(fields.get(1)), interner.intern(fields.get(3))),
                            new CityDMARegionValue(Integer.parseInt(fields.get(4)), interner.intern(fields.get(0)),
                                    interner.intern(fields.get(2))));
                    return true;
                }
            });
        } catch (IOException e) {
            throw new ExceptionInInitializerError(e);
        }
        return ImmutableMap.copyOf(map);
    }

    private static ImmutableMap<Integer, GeoTarget> loadGeoTargets(final Interner<String> interner,
            Transport transport, String resourceName) {
        final Map<Integer, GeoTarget> targetsById = new LinkedHashMap<>();
        final Map<Integer, List<Integer>> parentIdsById = new LinkedHashMap<>();
        final Map<String, GeoTarget> targetsByCanon = new LinkedHashMap<>();
        final Set<String> duplicateCanon = new LinkedHashSet<>();

        try (InputStream is = transport.open(resourceName)) {
            CSVParser.csvParser().parse(is, "(\\d+),(.*)", new Function<List<String>, Boolean>() {
                @Override
                public Boolean apply(List<String> fields) {
                    try {
                        if (fields.size() == 7) {
                            GeoTarget target = new GeoTarget(Integer.valueOf(fields.get(0)),
                                    Type.valueOf(toEnumName(fields.get(6))), interner.intern(fields.get(2)),
                                    interner.intern(fields.get(1)), interner.intern(fields.get(5)), null, null);
                            List<Integer> idParent = Lists.transform(CSVParser.csvParser().parse(fields.get(3)),
                                    new Function<String, Integer>() {
                                        @Override
                                        public Integer apply(@Nullable String id) {
                                            return Integer.valueOf(id);
                                        }
                                    });

                            targetsById.put(target.criteriaId(), target);
                            parentIdsById.put(target.criteriaId(), idParent);

                            if (targetsByCanon.containsKey(target.canonicalName())) {
                                duplicateCanon.add(target.canonicalName());
                                targetsByCanon.remove(target.canonicalName());
                            } else {
                                targetsByCanon.put(target.canonicalName(), target);
                            }
                        }
                    } catch (ParseException | IllegalArgumentException e) {
                        logger.trace("Bad record [{}]: {}", fields, e.toString());
                    }
                    return true;
                }
            });

            for (Map.Entry<Integer, GeoTarget> entry : targetsById.entrySet()) {
                GeoTarget target = entry.getValue();
                GeoTarget canonParent = targetsByCanon.get(target.findCanonParentName());
                if (canonParent != null) {
                    target.setCanonParent(canonParent);
                }
            }

            for (Map.Entry<Integer, GeoTarget> entry : targetsById.entrySet()) {
                GeoTarget target = entry.getValue();
                List<Integer> parentIds = parentIdsById.get(target.criteriaId());
                for (Integer parentId : parentIds) {
                    GeoTarget idParent = targetsById.get(parentId);
                    if (idParent != null) {
                        target.setIdParent(idParent);
                        break;
                    }
                }
            }

            return ImmutableMap.copyOf(targetsById);
        } catch (IOException e) {
            throw new ExceptionInInitializerError(e);
        }
    }

    private ImmutableMap<Object, CountryCodes> loadCountryCodes(final Interner<String> interner,
            String resourceName) {
        final ImmutableMap.Builder<Object, CountryCodes> map = ImmutableMap.builder();

        try (InputStream is = new ResourceTransport().open(resourceName)) {
            CSVParser.tsvParser().parse(is, "(\\d+)\\s+(.*)", new Function<List<String>, Boolean>() {
                @Override
                @Nullable
                public Boolean apply(@Nullable List<String> fields) {
                    try {
                        CountryCodes codes = new CountryCodes(Integer.parseInt(fields.get(0)),
                                interner.intern(fields.get(2)), interner.intern(fields.get(3)));
                        map.put(codes.numeric(), codes);
                        map.put(codes.alpha2(), codes);
                        map.put(codes.alpha3(), codes);
                    } catch (IllegalArgumentException e) {
                        logger.trace("Bad record: {}: {}", fields, e.toString());
                    }
                    return true;
                }
            });
        } catch (IOException e) {
            throw new ExceptionInInitializerError(e);
        }
        return map.build();
    }

    private static String toEnumName(String csvName) {
        return csvName.replace(' ', '_').toUpperCase();
    }

    /**
     * Object that can load the content of an URL as a stream.
     */
    public static interface Transport {
        InputStream open(String url) throws IOException;
    }

    /**
     * Implementation of {@link Transport} using the java.net APIs.
     */
    public static class URLConnectionTransport implements Transport {
        @Override
        public InputStream open(String url) throws IOException {
            return new URL(url).openStream();
        }
    }

    /**
     * Implementation of {@link Transport} that loads a local resource.
     */
    public static class ResourceTransport implements Transport {
        @Override
        public InputStream open(String url) throws IOException {
            String resourceName = url.startsWith("/") ? url : new URL(url).getPath();
            InputStream is = ResourceTransport.class.getResourceAsStream(resourceName);
            if (is == null) {
                throw new IOException("Cannot open local resource: " + resourceName);
            }
            return is;
        }
    }
}