Java tutorial
/* * Copyright (c) 2011, Cloudera, Inc. All Rights Reserved. * * Cloudera, Inc. licenses this file to you under the Apache License, * Version 2.0 (the "License"). You may not use this file except in * compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. See the License for * the specific language governing permissions and limitations under the * License. */ package com.cloudera.recordbreaker.learnstructure.test; import java.io.File; import java.io.IOException; import java.io.BufferedReader; import java.io.FileReader; import java.util.Iterator; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.conf.Configuration; import org.apache.avro.file.DataFileReader; import org.apache.avro.generic.GenericDatumReader; import org.junit.rules.TemporaryFolder; import com.cloudera.recordbreaker.learnstructure.LearnStructure; /** * TestInference tests the LearnStructure component's structure-inference code. * * @author "Michael Cafarella" <mjc@cloudera.com> * @version 1.0 * @since 1.0 */ public abstract class InferenceTest { private static double MIN_PARSE_RATIO = 0.85; static File sampleDir = new File(System.getProperty("test.samples.dir", "src/samples"), "textdata"); /** * runSingletonTest() executes LearnStructure test for a single given input text file. * * @param inputData a <code>File</code> value * @return a <code>boolean</code> value; did the test succeed? */ boolean runSingletonTest(File workingDir, File inputData) { File tmpSingletonDir = new File(workingDir, "testinference-" + inputData.getName()); try { FileSystem localFS = FileSystem.getLocal(new Configuration()); tmpSingletonDir.mkdir(); Path schemaFile = new Path(tmpSingletonDir.getCanonicalPath(), LearnStructure.SCHEMA_FILENAME); Path parseTreeFile = new Path(tmpSingletonDir.getCanonicalPath(), LearnStructure.PARSER_FILENAME); Path jsonDataFile = new Path(tmpSingletonDir.getCanonicalPath(), LearnStructure.JSONDATA_FILENAME); Path avroFile = new Path(tmpSingletonDir.getCanonicalPath(), LearnStructure.DATA_FILENAME); LearnStructure ls = new LearnStructure(); // Check to see how many records exist in the original input int lineCount = 0; BufferedReader in2 = new BufferedReader(new FileReader(inputData)); try { while (in2.readLine() != null) { lineCount++; } } finally { in2.close(); } // Infer structure ls.inferRecordFormat(localFS, new Path(inputData.getCanonicalPath()), localFS, schemaFile, parseTreeFile, jsonDataFile, avroFile, false, lineCount); // Test the inferred structure // First, load in the avro file and see how many records there are. int avroCount = 0; DataFileReader in = new DataFileReader(new File(avroFile.toString()), new GenericDatumReader()); try { Iterator it = in.iterator(); while (it.hasNext()) { avroCount++; it.next(); } } finally { in.close(); } // Was the synthesized parser able to figure out the file? double parseRatio = avroCount / (1.0 * lineCount); return (parseRatio > MIN_PARSE_RATIO); } catch (IOException e) { try { System.err.println("File: " + inputData.getCanonicalPath()); } catch (IOException ex) { ex.printStackTrace(); } e.printStackTrace(); return false; } finally { // remove temp files tmpSingletonDir.delete(); } } }