org.icgc.dcc.release.job.export.function.CreateRow.java Source code

Java tutorial

Introduction

Here is the source code for org.icgc.dcc.release.job.export.function.CreateRow.java

Source

/*
 * Copyright (c) 2016 The Ontario Institute for Cancer Research. All rights reserved.                             
 *                                                                                                               
 * This program and the accompanying materials are made available under the terms of the GNU Public License v3.0.
 * You should have received a copy of the GNU General Public License along with                                  
 * this program. If not, see <http://www.gnu.org/licenses/>.                                                     
 *                                                                                                               
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY                           
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES                          
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT                           
 * SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,                                
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED                          
 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;                               
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER                              
 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN                         
 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package org.icgc.dcc.release.job.export.function;

import static com.google.common.base.Preconditions.checkState;
import static org.icgc.dcc.release.job.export.util.DataTypeUtils.checkDataType;

import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import lombok.NonNull;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.val;
import lombok.extern.slf4j.Slf4j;

import org.apache.spark.api.java.function.Function;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.types.ArrayType;
import org.apache.spark.sql.types.StructType;
import org.icgc.dcc.common.json.Jackson;
import org.icgc.dcc.release.job.export.model.ExportType;
import org.icgc.dcc.release.job.export.stats.StatsCalculator;

import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

@Slf4j
@RequiredArgsConstructor
public final class CreateRow implements Function<ObjectNode, Row> {

    /**
     * Configuration.
     */
    @NonNull
    private final ExportType exportType;
    @NonNull
    private final StructType exportTypeSchema;
    @NonNull
    private final StatsCalculator statsCalculator;

    @Override
    public Row call(ObjectNode json) {
        statsCalculator.calculate(json);
        val map = convert(json);
        Row row = null;
        try {
            row = createRow(map, exportType, exportTypeSchema);
        } catch (IllegalArgumentException e) {
            log.error("Failed to convert row: {}", json);
            throw e;
        }

        return row;
    }

    @SneakyThrows
    private Map<String, Object> convert(ObjectNode json) {
        val jsonString = json.toString();
        Map<String, Object> map = Maps.<String, Object>newHashMap();
        map = Jackson.DEFAULT.readValue(jsonString, new TypeReference<Map<String, Object>>() {
        });

        return map;
    }

    private Row createRow(Map<String, Object> map, ExportType exportType, StructType exportTypeSchema) {
        val rowValues = Lists.newArrayList();
        val schemaFields = Lists.newArrayList(exportTypeSchema.fields());

        for (val field : schemaFields) {
            val fieldName = field.name();
            val value = map.get(fieldName);
            checkDataType(value, field.dataType(), fieldName);

            if (value == null) {
                rowValues.add(null);
            } else if (isSimpleType(value)) {
                rowValues.add(value);
            } else {
                val childExportType = ExportType.getChildType(exportType, fieldName);
                val arrayType = (ArrayType) field.dataType();
                rowValues.add(convertArray(value, childExportType, (StructType) arrayType.elementType()));
            }
        }

        return create(rowValues);
    }

    private static Row create(List<? extends Object> rowValues) {
        return RowFactory.create(rowValues.toArray(new Object[rowValues.size()]));
    }

    private List<Row> convertArray(Object value, ExportType exportType, StructType exportTypeSchema) {
        checkState(value instanceof List);
        @SuppressWarnings("unchecked")
        List<Map<String, Object>> values = (List<Map<String, Object>>) value;

        List<Row> result = values.stream().map(map -> createRow(map, exportType, exportTypeSchema))
                .collect(Collectors.toList());

        return result;
    }

    private static boolean isSimpleType(Object value) {
        return !(value instanceof List);
    }

}