Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.oodt.cas.pushpull.filerestrictions.parsers; //OODT imports import org.apache.oodt.cas.metadata.Metadata; import org.apache.oodt.cas.pushpull.filerestrictions.Parser; import org.apache.oodt.cas.pushpull.filerestrictions.VirtualFile; import org.apache.oodt.cas.pushpull.filerestrictions.VirtualFileStructure; import org.apache.oodt.cas.pushpull.exceptions.ParserException; //Google imports import com.google.common.base.Splitter; import com.google.common.base.Strings; import com.google.common.collect.Lists; import com.google.common.collect.Sets; //JDK imports import java.io.FileInputStream; import java.util.List; import java.util.Scanner; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * A generic email parser which generates file paths to be downloaded by using a defined java * Pattern. The pattern should specify pattern groups for file paths in the matching pattern. * These groups will then be extracted and added to the file structure. * * @author bfoster@apache.org (Brian Foster) */ public class GenericEmailParser implements Parser { private static final Logger log = Logger.getLogger(GenericEmailParser.class.getCanonicalName()); public static final String FILE_PATTERNS_PROPERTY_NAME = "org.apache.oodt.cas.pushpull.generic.email.parser.file.pattern"; public static final String CHECK_FOR_PATTERN_PROPERTY_NAME = "org.apache.oodt.cas.pushpull.generic.email.parser.check.for.pattern"; public static final String PATH_TO_ROOT_PROPERTY_NAME = "org.apache.oodt.cas.pushpull.generic.email.parser.path.to.root"; public static final String METADATA_KEYS = "org.apache.oodt.cas.pushpull.generic.email.parser.metadata.keys"; public static final String METADATA_KEY_PREFIX = "org.apache.oodt.cas.pushpull.generic.email.parser.metadata."; private final String filePattern; private final String checkForPattern; private final String pathToRoot; public GenericEmailParser() { filePattern = loadFilePattern(); checkForPattern = loadCheckForPattern(); pathToRoot = loadPathToRoot(); } public GenericEmailParser(String filePattern, String checkForPattern, String pathToRoot) { this.filePattern = filePattern; this.checkForPattern = checkForPattern; this.pathToRoot = Strings.nullToEmpty(pathToRoot); } @Override public VirtualFileStructure parse(FileInputStream emailFile, Metadata metadata) throws ParserException { log.info("GenericEmailParser is parsing email: " + emailFile); VirtualFile root = VirtualFile.createRootDir(); String emailText = readEmail(emailFile); if (!isValidEmail(emailText)) { throw new ParserException("Failed to find check for pattern in email: " + checkForPattern); } List<String> filePaths = generateFilePaths(emailText); readMetadata(emailText, metadata); for (String filePath : filePaths) { new VirtualFile(root, pathToRoot + filePath, false); } return new VirtualFileStructure("/", root); } private String readEmail(FileInputStream emailFile) { StringBuilder emailText = new StringBuilder(""); Scanner scanner = new Scanner(emailFile); while (scanner.hasNextLine()) { emailText.append(scanner.nextLine()).append("\n"); } scanner.close(); return emailText.toString(); } private List<String> generateFilePaths(String emailText) { List<String> filePaths = Lists.newArrayList(); Pattern pattern = Pattern.compile(filePattern); Matcher m = pattern.matcher(emailText); if (m.find()) { // Ignore index 0, as that is the matching string for pattern. for (int i = 1; i <= m.groupCount(); i++) { filePaths.add(m.group(i)); } } return filePaths; } private void readMetadata(String emailText, Metadata metadata) { Set<String> metadataKeys = loadMetadataKeys(); for (String metadataKey : metadataKeys) { String metadataPattern = loadMetadataKey(metadataKey); if (metadataPattern == null) { log.log(Level.SEVERE, "Failed to load metadata pattern for key: " + metadataKey); } else { Pattern pattern = Pattern.compile(metadataPattern); Matcher m = pattern.matcher(emailText); if (m.find()) { // Ignore index 0, as that is the matching string for pattern. String metadatValue = m.group(1); metadata.replaceMetadata(metadataKey, metadatValue); } } } } private boolean isValidEmail(String emailText) { Pattern pattern = Pattern.compile(checkForPattern); Matcher m = pattern.matcher(emailText.replaceAll("\n", " ")); return m.find(); } private String loadFilePattern() { return System.getProperty(FILE_PATTERNS_PROPERTY_NAME); } private String loadCheckForPattern() { return System.getProperty(CHECK_FOR_PATTERN_PROPERTY_NAME); } private String loadPathToRoot() { return Strings.nullToEmpty(System.getProperty(PATH_TO_ROOT_PROPERTY_NAME)); } private Set<String> loadMetadataKeys() { return Sets.newHashSet( Splitter.on(",").omitEmptyStrings().split(Strings.nullToEmpty(System.getProperty(METADATA_KEYS)))); } private String loadMetadataKey(String key) { return System.getProperty(METADATA_KEY_PREFIX + key); } }