Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.aliyun.odps.mapred.unittest; import java.io.File; import java.io.IOException; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.io.FileUtils; import org.apache.commons.lang.StringUtils; import com.aliyun.odps.Column; import com.aliyun.odps.OdpsType; import com.aliyun.odps.conf.Configuration; import com.aliyun.odps.data.Record; import com.aliyun.odps.data.TableInfo; import com.aliyun.odps.mapred.Mapper; import com.aliyun.odps.mapred.Reducer; import com.aliyun.odps.local.common.TableMeta; import com.aliyun.odps.local.common.utils.SchemaUtils; /** * MapReduce Unit Test . * * <p> * {@link MapUTContext} {@link ReduceUTContext} ? * */ public abstract class UTContext { private int taskId = 0; protected RuntimeContext runtimeContext; private Map<String, String> outputSchemas = new HashMap<String, String>(); private Set<String> resources = new HashSet<String>(); private Map<String, byte[]> fileResources = new HashMap<String, byte[]>(); private Map<String, File> archiveResources = new HashMap<String, File>(); private Map<String, List<Record>> tableResources = new HashMap<String, List<Record>>(); private Map<String, TableMeta> tableMetas = new HashMap<String, TableMeta>(); private boolean isCleanUtDir = false; /** * TaskId? 0. * * @return TaskId */ public int getTaskId() { return taskId; } /** * TaskId? 0. * * <p> * ?? TaskAttemptID#getTaskId() ? * * @param taskId */ public void setTaskId(int taskId) { this.taskId = taskId; } /** * ??. * * <p> * {@link Mapper} {@link Reducer} ??? * * @param resourceName * ??? * @param file * ? * @throws IOException */ public void setFileResource(String resourceName, File file) throws IOException { checkResource(resourceName, file); setFileResource(resourceName, FileUtils.readFileToByteArray(file)); } /** * ?. * * <p> * {@link Mapper} {@link Reducer} ??? * * @param resourceName * ??? * @param content * * @throws IOException */ public void setFileResource(String resourceName, String content) throws IOException { checkResource(resourceName); setFileResource(resourceName, (content == null ? null : content.getBytes())); } /** * ?. * * <p> * {@link Mapper} {@link Reducer} ??? * * @param resourceName * ??? * @param content * * @throws IOException */ public void setFileResource(String resourceName, byte[] content) throws IOException { checkResource(resourceName); if (content == null) { throw new IOException("content is null for resource: " + resourceName); } fileResources.put(resourceName, content); resources.add(resourceName); } /** * ?. * * <p> * {@link Mapper} {@link Reducer} ?? * * @param resourceName * ??? * @param path * * @throws IOException */ public void setArchiveResource(String resourceName, File path) throws IOException { checkResource(resourceName, path); archiveResources.put(resourceName, path); resources.add(resourceName); } /** * ??. * * <p> * {@link Mapper} {@link Reducer} ?? * * <p> * schema ?[<proj>.<table_name>,]<col_name>:<col_type * >(,<col_name>:<col_type>)*<br /> * * <ul> * <li>proj.tablename,a:string,b:bigint,c:double * <li>a:string,b:bigint,c:double * </ul> * * @param resourceName * ??? * @param info * ? * @param schema * schema * @param records * ? * @throws IOException */ public void setTableResource(String resourceName, TableInfo info, String schema, List<Record> records) throws IOException { checkResource(resourceName); if (info == null) { throw new IOException("table info is null for resource: " + resourceName); } if (schema == null) { throw new IOException("schema is null for resource: " + resourceName); } if (records == null) { throw new IOException("record list is null for resource: " + resourceName); } Column[] parts = new Column[info.getPartSpec().size()]; int i = 0; for (Map.Entry<String, String> part : info.getPartSpec().entrySet()) { Column col = new Column(part.getKey(), OdpsType.STRING); parts[i++] = col; } TableMeta meta = new TableMeta(info.getProjectName(), info.getTableName(), com.aliyun.odps.mapred.utils.SchemaUtils.fromString(schema), parts); tableResources.put(resourceName, records); tableMetas.put(resourceName, meta); resources.add(resourceName); } /** * ?. * * <p> * ???"__schema__" schema ??csv? * * @param resourceName * ??? * @param dir * * @throws IOException */ public void setTableResource(String resourceName, File dir) throws IOException { checkResource(resourceName, dir); TableMeta meta = SchemaUtils.readSchema(dir); String schemaStr = SchemaUtils.toString(meta.getCols()); TableInfo info = new TableInfo(); info.setProjectName(meta.getProjName()); info.setTableName(meta.getTableName()); List<Record> records = MRUnitTest.readRecords(dir); setTableResource(resourceName, info, schemaStr, records); } /** * schema. * * <p> * schema ?<col_name>:<col_type >(,<col_name>:<col_type>)*<br /> * a:string,b:bigint,c:double * * <p> * * {@link TableOutputFormat#addOutput(TableInfo, com.aliyun.odps.conf.Configuration)} * ? schema * * @param schema * schema * @throws IOException */ public void setOutputSchema(String schema, Configuration conf) throws IOException { setOutputSchema("__default__", schema, conf); } /** * schema. * * <p> * schema ?<col_name>:<col_type >(,<col_name>:<col_type>)*<br /> * a:string,b:bigint,c:double * * <p> * * {@link TableOutputFormat#addOutput(TableInfo, String, com.aliyun.odps.conf.Configuration)} * ? schema * * @param label * * @param schema * schema * @throws IOException */ public void setOutputSchema(String label, String schema, Configuration conf) throws IOException { try { Column[] columns = com.aliyun.odps.mapred.utils.SchemaUtils.fromString(schema.trim()); conf.set("odps.mapred.output.schema." + label, schema); } catch (Exception ex) { throw new IOException("bad schema format: " + schema); } outputSchemas.put(label, schema); } private void checkResource(String resourceName) throws IOException { if (StringUtils.isEmpty(resourceName)) { throw new IOException("invalid resource name: " + resourceName); } if (resources.contains(resourceName)) { throw new IOException("duplicate resource: " + resourceName); } } private void checkResource(String resourceName, File file) throws IOException { checkResource(resourceName); if (file == null) { throw new IOException("file or directory is null for resource: " + resourceName); } if (!file.exists()) { throw new IOException("file or directory not found for resource: " + resourceName + ", file: " + file); } } RuntimeContext getRuntimeContext() { return runtimeContext; } void setRuntimeContext(RuntimeContext runtimeContext) { this.runtimeContext = runtimeContext; } Map<String, String> getOutputSchemas() { return outputSchemas; } Map<String, byte[]> getFileResources() { return fileResources; } Map<String, File> getArchiveResources() { return archiveResources; } Map<String, List<Record>> getTableResources() { return tableResources; } Map<String, TableMeta> getTableMetas() { return tableMetas; } void clearResources() { resources.clear(); fileResources.clear(); archiveResources.clear(); tableMetas.clear(); } public boolean isCleanUtDir() { return isCleanUtDir; } public void setCleanUtDir(boolean isClean) { isCleanUtDir = isClean; } }