org.apache.blur.analysis.HdfsFieldManager.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.blur.analysis.HdfsFieldManager.java

Source

package org.apache.blur.analysis;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.UUID;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

import org.apache.blur.analysis.type.MultiValuedNotAllowedException;
import org.apache.blur.log.Log;
import org.apache.blur.log.LogFactory;
import org.apache.blur.trace.Trace;
import org.apache.blur.trace.Tracer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.lucene.analysis.Analyzer;

public class HdfsFieldManager extends BaseFieldManager {

    private static final List<String> EMPTY_LIST = Arrays.asList(new String[] {});

    private static final Log LOG = LogFactory.getLog(HdfsFieldManager.class);

    private static final String FIELD_TYPE = "_fieldType_";
    private static final String FIELD_LESS_INDEXING = "_fieldLessIndexing_";
    private static final String SORTENABLED = "_sortEnabled_";
    private static final String MULTI_VALUE_FIELD = "_multiValueField_";
    private static final String FAMILY = "_family_";
    private static final String COLUMN_NAME = "_columnName_";
    private static final String SUB_COLUMN_NAME = "_subColumnName_";
    private static final String TYPE_FILE_EXT = ".type";

    private static final Lock _lock = new ReentrantReadWriteLock().writeLock();

    private final Configuration _configuration;
    private final Path _storagePath;
    private final FileSystem _fileSystem;

    public HdfsFieldManager(String fieldLessField, Analyzer defaultAnalyzerForQuerying, Path storagePath,
            Configuration configuration) throws IOException {
        this(fieldLessField, defaultAnalyzerForQuerying, storagePath, configuration, true, null, false, null);
    }

    public HdfsFieldManager(String fieldLessField, Analyzer defaultAnalyzerForQuerying, Path storagePath,
            Configuration configuration, boolean strict, String defaultMissingFieldType,
            boolean defaultMissingFieldLessIndexing, Map<String, String> defaultMissingFieldProps)
            throws IOException {
        super(fieldLessField, defaultAnalyzerForQuerying, strict, defaultMissingFieldType,
                defaultMissingFieldLessIndexing, defaultMissingFieldProps, configuration);
        _storagePath = storagePath;
        _configuration = configuration;
        _fileSystem = _storagePath.getFileSystem(_configuration);
    }

    @Override
    protected List<String> getFieldNamesToLoad() throws IOException {
        Tracer trace = Trace.trace("filesystem - getFieldNamesToLoad", Trace.param("storagePath", _storagePath));
        try {
            if (!_fileSystem.exists(_storagePath)) {
                return EMPTY_LIST;
            }
            FileStatus[] listStatus = _fileSystem.listStatus(_storagePath, new PathFilter() {
                @Override
                public boolean accept(Path path) {
                    if (path.getName().endsWith(TYPE_FILE_EXT)) {
                        return true;
                    }
                    return false;
                }
            });
            if (listStatus == null) {
                return EMPTY_LIST;
            }
            List<String> fieldNames = new ArrayList<String>();
            for (FileStatus fileStatus : listStatus) {
                if (!fileStatus.isDir()) {
                    String fileName = fileStatus.getPath().getName();
                    fieldNames.add(fileName.substring(0, fileName.lastIndexOf(TYPE_FILE_EXT)));
                }
            }
            return fieldNames;
        } finally {
            trace.done();
        }
    }

    @Override
    protected boolean tryToStore(FieldTypeDefinition fieldTypeDefinition, String fieldName) throws IOException {
        Tracer trace = Trace.trace("filesystem - tryToStore fieldName", Trace.param("fieldName", fieldName),
                Trace.param("storagePath", _storagePath));
        try {
            // Might want to make this a ZK lock
            _lock.lock();
            try {
                String fieldType = fieldTypeDefinition.getFieldType();
                boolean fieldLessIndexed = fieldTypeDefinition.isFieldLessIndexed();
                boolean sortEnable = fieldTypeDefinition.isSortEnable();
                boolean multiValueField = fieldTypeDefinition.isMultiValueField();
                LOG.info(
                        "Attempting to store new field [{0}] with fieldLessIndexing [{1}] with type [{2}] and properties [{3}]",
                        fieldName, fieldLessIndexed, fieldType, fieldTypeDefinition.getProperties());
                Properties properties = new Properties();
                setProperty(properties, FAMILY, fieldTypeDefinition.getFamily());
                setProperty(properties, FAMILY, fieldTypeDefinition.getFamily());
                setProperty(properties, COLUMN_NAME, fieldTypeDefinition.getColumnName());
                setProperty(properties, SUB_COLUMN_NAME, fieldTypeDefinition.getSubColumnName());
                setProperty(properties, FIELD_LESS_INDEXING, Boolean.toString(fieldLessIndexed));
                setProperty(properties, SORTENABLED, Boolean.toString(sortEnable));
                setProperty(properties, MULTI_VALUE_FIELD, Boolean.toString(multiValueField));

                setProperty(properties, FIELD_TYPE, fieldType);
                Map<String, String> props = fieldTypeDefinition.getProperties();
                if (props != null) {
                    for (Entry<String, String> e : props.entrySet()) {
                        properties.setProperty(e.getKey(), e.getValue());
                    }
                }
                Path path = getFieldPath(fieldName);
                if (_fileSystem.exists(path)) {
                    LOG.info("Field [{0}] already exists.", fieldName);
                    return false;
                }
                Path tmpPath = new Path(path.getParent(), UUID.randomUUID().toString() + ".tmp");
                FSDataOutputStream outputStream = _fileSystem.create(tmpPath, false);
                properties.store(outputStream, getComments());
                outputStream.close();
                if (_fileSystem.rename(tmpPath, path)) {
                    // @TODO make this configurable
                    _fileSystem.setReplication(path, (short) 10);
                    return true;
                } else {
                    _fileSystem.delete(tmpPath, false);
                    LOG.info("Field [{0}] already exists.", fieldName, fieldLessIndexed, fieldType, props);
                    return false;
                }
            } finally {
                _lock.unlock();
            }
        } finally {
            trace.done();
        }
    }

    private void setProperty(Properties properties, String key, String value) {
        if (value == null) {
            return;
        }
        properties.setProperty(key, value);
    }

    private Path getFieldPath(String fieldName) {
        return new Path(_storagePath, fieldName + TYPE_FILE_EXT);
    }

    private String getComments() {
        return "This file is generated from Apache Blur to store meta data about field types.  DO NOT MODIFY!";
    }

    @Override
    protected void tryToLoad(String fieldName) throws IOException {
        _lock.lock();
        try {
            Path path = getFieldPath(fieldName);
            if (!_fileSystem.exists(path)) {
                return;
            }
            FSDataInputStream inputStream = _fileSystem.open(path);
            Properties properties = new Properties();
            properties.load(inputStream);
            inputStream.close();
            boolean fieldLessIndexing = Boolean.parseBoolean(properties.getProperty(FIELD_LESS_INDEXING));
            boolean sortenabled = Boolean.parseBoolean(properties.getProperty(SORTENABLED));
            String mvfProp = properties.getProperty(MULTI_VALUE_FIELD);
            boolean multiValueField;
            if (mvfProp == null || mvfProp.trim().isEmpty()) {
                multiValueField = true;
            } else {
                multiValueField = Boolean.parseBoolean(mvfProp);
            }
            String fieldType = properties.getProperty(FIELD_TYPE);
            Map<String, String> props = toMap(properties);

            if (mvfProp == null) {
                if (multiValueField && sortenabled) {
                    // @TODO hack because we use to not have multivalue in the schema
                    LOG.warn("Changing field [{0}] to be NOT multiValueField.", fieldName);
                    multiValueField = false;
                }
            }
            FieldTypeDefinition fieldTypeDefinition;
            try {
                fieldTypeDefinition = newFieldTypeDefinition(fieldName, fieldLessIndexing, fieldType, sortenabled,
                        multiValueField, props);
            } catch (MultiValuedNotAllowedException e) {
                if (mvfProp == null) {
                    multiValueField = false;
                    fieldTypeDefinition = newFieldTypeDefinition(fieldName, fieldLessIndexing, fieldType,
                            sortenabled, multiValueField, props);
                } else {
                    throw e;
                }
            }
            fieldTypeDefinition.setFamily(properties.getProperty(FAMILY));
            fieldTypeDefinition.setColumnName(properties.getProperty(COLUMN_NAME));
            fieldTypeDefinition.setSubColumnName(properties.getProperty(SUB_COLUMN_NAME));
            fieldTypeDefinition.setFieldLessIndexed(fieldLessIndexing);
            fieldTypeDefinition.setFieldType(properties.getProperty(FIELD_TYPE));
            fieldTypeDefinition.setSortEnable(sortenabled);
            fieldTypeDefinition.setMultiValueField(multiValueField);
            fieldTypeDefinition.setProperties(props);
            registerFieldTypeDefinition(fieldName, fieldTypeDefinition);
        } finally {
            _lock.unlock();
        }
    }

    private Map<String, String> toMap(Properties props) {
        Map<String, String> result = new HashMap<String, String>();
        for (Entry<Object, Object> e : props.entrySet()) {
            result.put(e.getKey().toString(), e.getValue().toString());
        }
        result.remove(FAMILY);
        result.remove(COLUMN_NAME);
        result.remove(SUB_COLUMN_NAME);
        result.remove(FIELD_TYPE);
        result.remove(FIELD_LESS_INDEXING);
        result.remove(SORTENABLED);
        result.remove(MULTI_VALUE_FIELD);
        return result;
    }

}