Java tutorial
/** * Copyright (C) 2014 JBoss Inc * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.dashbuilder.dataprovider.backend.elasticsearch; import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.StringUtils; import org.dashbuilder.dataprovider.DataSetProvider; import org.dashbuilder.dataprovider.DataSetProviderType; import org.dashbuilder.dataprovider.backend.StaticDataSetProvider; import org.dashbuilder.dataprovider.backend.elasticsearch.rest.client.model.*; import org.dashbuilder.dataset.*; import org.dashbuilder.dataset.def.DataSetDef; import org.dashbuilder.dataset.def.DataSetDefRegistry; import org.dashbuilder.dataset.def.ElasticSearchDataSetDef; import org.dashbuilder.dataset.events.DataSetStaleEvent; import org.dashbuilder.dataset.filter.ColumnFilter; import org.dashbuilder.dataset.filter.DataSetFilter; import org.dashbuilder.dataset.group.DataSetGroup; import org.dashbuilder.dataset.group.GroupFunction; import org.dashbuilder.dataset.impl.ElasticSearchDataSetMetadata; import org.dashbuilder.dataset.impl.MemSizeEstimator; import org.dashbuilder.dataset.sort.ColumnSort; import org.dashbuilder.dataset.sort.DataSetSort; import org.joda.time.format.DateTimeFormatter; import org.joda.time.format.ISODateTimeFormat; import javax.enterprise.event.Observes; import javax.inject.Inject; import javax.inject.Named; import java.util.*; /** * <p>Data provider for an ElasticSearch server.</p> * <p>It's basically implemented as a REST client for querying an ElasticSearch server instance.</p> * * <p>If a given type field is not explicitly mapped as a concrete dashbuilder datatype using the <code>columns</code> parameter, the implicit data type bindings are:</p> * <table> * <tr> * <th>ElasticSearch type</th> * <th>Dashbuilder type</th> * </tr> * <tr> * <td>string</td> * <td>TEXT ( if <code>index</code> value is <code>analyzed</code> ) or LABEL ( if <code>index</code> value is <code>not_analyzed</code> )</td> * </tr> * <tr> * <td>float</td> * <td>NUMBER</td> * </tr> * <tr> * <td>double</td> * <td>NUMBER</td> * </tr> * <tr> * <td>byte</td> * <td>NUMBER</td> * </tr> * <tr> * <td>short</td> * <td>NUMBER</td> * </tr> * <tr> * <td>integer</td> * <td>NUMBER</td> * </tr> * <tr> * <td>long</td> * <td>NUMBER</td> * </tr> * <tr> * <td>token_count</td> * <td>LABEL</td> * </tr> * <tr> * <td>date</td> * <td>DATE</td> * </tr> * <tr> * <td>boolean</td> * <td>LABEL</td> * </tr> * <tr> * <td>binary</td> * <td>LABEL</td> * </tr> * </table> * * @since 0.3.0 * */ @Named("elasticsearch") public class ElasticSearchDataSetProvider implements DataSetProvider { public static final DateTimeFormatter EL_DEFAULT_DATETIME_FORMATTER = ISODateTimeFormat .dateOptionalTimeParser(); public static final int RESPONSE_CODE_OK = 200; @Inject protected StaticDataSetProvider staticDataSetProvider; @Inject protected DataSetDefRegistry dataSetDefRegistry; @Inject protected ElasticSearchClientFactory clientFactory; @Inject protected ElasticSearchQueryBuilderFactory queryBuilderFactory; protected final Map<String, DataSetMetadata> _metadataMap = new HashMap<String, DataSetMetadata>(); public ElasticSearchDataSetProvider() { } public DataSetProviderType getType() { return DataSetProviderType.ELASTICSEARCH; } public DataSet lookupDataSet(DataSetDef def, DataSetLookup lookup) throws Exception { ElasticSearchDataSetDef elDef = (ElasticSearchDataSetDef) def; // Look first into the static data set provider cache. if (elDef.isCacheEnabled()) { DataSet dataSet = staticDataSetProvider.lookupDataSet(def.getUUID(), null); if (dataSet != null) { // Lookup from cache. return staticDataSetProvider.lookupDataSet(def.getUUID(), lookup); } else { // Fetch always from EL server if existing rows are greater than the cache max. rows long rows = getRowCount(elDef); if (rows > elDef.getCacheMaxRows()) { return _lookupDataSet(elDef, lookup); } // Fetch from EL server and register into the static cache. Further requests will lookup from cache. dataSet = _lookupDataSet(elDef, null); dataSet.setUUID(def.getUUID()); dataSet.setDefinition(def); staticDataSetProvider.registerDataSet(dataSet); return staticDataSetProvider.lookupDataSet(def.getUUID(), lookup); } } // If cache is disabled then always fetch from EL server. return _lookupDataSet(elDef, lookup); } protected DataSet _lookupDataSet(ElasticSearchDataSetDef elDef, DataSetLookup lookup) throws Exception { ElasticSearchDataSetMetadata metadata = (ElasticSearchDataSetMetadata) getDataSetMetadata(elDef); int numRows = lookup.getNumberOfRows(); int rowOffset = lookup.getRowOffset(); String[] index = elDef.getIndex(); String[] type = elDef.getType(); boolean trim = (lookup != null && numRows > 0); SearchRequest request = new SearchRequest(metadata); int numberOfColumns = metadata.getNumberOfColumns(); List<String> columnIds = new ArrayList<String>(numberOfColumns); for (int x = 0; x < numberOfColumns; x++) { columnIds.add(metadata.getColumnId(x)); } if (!columnIds.isEmpty()) { request.setFields(columnIds.toArray(new String[columnIds.size()])); } // Pagination. if (trim) { request.setStart(rowOffset); request.setSize(numRows); } if (lookup != null && !lookup.getOperationList().isEmpty()) { List<DataSetGroup> groupOps = lookup.getOperationList(DataSetGroup.class); List<DataSetFilter> filters = lookup.getOperationList(DataSetFilter.class); List<DataSetSort> sortOps = lookup.getOperationList(DataSetSort.class); // Check that operation source fields exist as dataset columns. checkOperations(metadata, groupOps, filters, sortOps); // Group operations. request.setAggregations(groupOps); // Filter operations. if (filters != null && !filters.isEmpty()) { // The query is build from a given filters and/or from interval selections. Built it. Query query = queryBuilderFactory.newQueryBuilder().metadata(metadata).groupInterval(groupOps) .filter(filters).build(); request.setQuery(query); } // Sort operations. request.setSorting(sortOps); } // Default sorting. // If no sort operation defined for this lookup, sort for the specified default field on dataset definition, if exists. List<DataSetSort> sortOps = request.getSorting(); if ((sortOps == null || sortOps.isEmpty()) && elDef.getColumnSort() != null) { if (sortOps == null) sortOps = new ArrayList<DataSetSort>(); DataSetSort defaultSort = new DataSetSort(); defaultSort.addSortColumn(elDef.getColumnSort()); sortOps.add(defaultSort); request.setSorting(sortOps); } // Perform the query & generate the resulting dataset. DataSet dataSet = DataSetFactory.newEmptyDataSet(); SearchResponse searchResponse = clientFactory.newClient(elDef).search(elDef, metadata, request); // Add the dataset columns. addDataSetColumns(dataSet, searchResponse); // There are no results. Return an empty dataset. if (searchResponse instanceof EmptySearchResponse) return dataSet; // There exist values. Fill the dataset. fillDataSetValues(elDef, dataSet, searchResponse.getHits()); if (trim) { dataSet.setRowCountNonTrimmed((int) searchResponse.getTotalHits()); } return dataSet; } private void checkOperations(DataSetMetadata metadata, List<DataSetGroup> groupOps, List<DataSetFilter> filterOps, List<DataSetSort> sortOps) { if (metadata == null) return; // Check group operations. if (groupOps != null && !groupOps.isEmpty()) { for (DataSetGroup groupOp : groupOps) { if (groupOp.getColumnGroup() != null && !existColumn(metadata, groupOp.getColumnGroup().getSourceId())) throw new IllegalArgumentException("Grouping by a non existing column [" + groupOp.getColumnGroup().getSourceId() + "] in dataset "); List<GroupFunction> groupFunctions = groupOp.getGroupFunctions(); if (groupFunctions != null && !groupFunctions.isEmpty()) { for (GroupFunction groupFunction : groupFunctions) { if (groupFunction.getSourceId() != null && !existColumn(metadata, groupFunction.getSourceId())) throw new IllegalArgumentException("Grouping function by a non existing column [" + groupFunction.getSourceId() + "] in dataset "); } } } } // Check filter operations. if (filterOps != null && !filterOps.isEmpty()) { for (DataSetFilter filerOp : filterOps) { List<ColumnFilter> filters = filerOp.getColumnFilterList(); if (filters != null && !filters.isEmpty()) { for (ColumnFilter filter : filters) { if (!existColumn(metadata, filter.getColumnId())) throw new IllegalArgumentException("Filtering by a non existing column [" + filter.getColumnId() + "] in dataset "); } } } } // Check filter operations. if (sortOps != null && !sortOps.isEmpty()) { for (DataSetSort sortOp : sortOps) { List<ColumnSort> sorts = sortOp.getColumnSortList(); if (sorts != null && !sorts.isEmpty()) { for (ColumnSort sort : sorts) { if (!existColumn(metadata, sort.getColumnId())) throw new IllegalArgumentException( "Sorting by a non existing column [" + sort.getColumnId() + "] in dataset "); } } } } } private boolean existColumn(DataSetMetadata metadata, String columnId) { if (metadata == null || columnId == null || columnId.trim().length() == 0) return false; int numCols = metadata.getNumberOfColumns(); for (int x = 0; x < numCols; x++) { String metaColumnId = metadata.getColumnId(x); if (columnId.equals(metaColumnId)) return true; } return false; } /** * Fills the dataset values. * * @param dataSet The dataset instance to fill. Note that dataset columns must be added before calling this method. * @param hits The search result hits. * * @throws Exception */ protected void fillDataSetValues(ElasticSearchDataSetDef elDef, DataSet dataSet, SearchHitResponse[] hits) throws Exception { List<DataColumn> dataSetColumns = dataSet.getColumns(); int position = 0; for (SearchHitResponse hit : hits) { int columnNumber = 0; for (DataColumn column : dataSetColumns) { String columnId = column.getId(); Object value = hit.getFieldValue(columnId); dataSet.setValueAt(position, columnNumber, value); columnNumber++; } position++; } } /** * Creates the columns for the dataset. * @param dataSet The dataset instance. * @param searchResponse The resulting columns for the performed query. * * @throws Exception */ protected void addDataSetColumns(DataSet dataSet, SearchResponse searchResponse) throws Exception { List<DataColumn> columns = searchResponse.getColumns(); if (columns != null && !columns.isEmpty()) { int x = 0; for (DataColumn column : columns) { dataSet.addColumn(column); } } } public boolean isDataSetOutdated(DataSetDef def) { try { // If cache is disabled then no way for a data set to get outdated ElasticSearchDataSetDef elDef = (ElasticSearchDataSetDef) def; if (!elDef.isCacheEnabled()) return false; // ... for non cached data sets either. DataSet dataSet = staticDataSetProvider.lookupDataSet(def, null); if (dataSet == null) return false; // Compare the cached vs elasticsearch server rows. long rows = getRowCount(elDef); return rows != dataSet.getRowCount(); } catch (Exception e) { e.printStackTrace(); return false; } } public DataSetMetadata getDataSetMetadata(DataSetDef def) throws Exception { // Type casting. ElasticSearchDataSetDef elasticSearchDataSetDef = (ElasticSearchDataSetDef) def; // Check if metadata already exists in cache. ElasticSearchDataSetMetadata result = (ElasticSearchDataSetMetadata) _metadataMap .get(elasticSearchDataSetDef.getUUID()); if (result != null) return result; // Data Set parameters. String[] index = elasticSearchDataSetDef.getIndex(); String[] type = elasticSearchDataSetDef.getType(); // Get the row count. long rowCount = getRowCount(elasticSearchDataSetDef); // Obtain the indexMappings MappingsResponse mappingsResponse = clientFactory.newClient(elasticSearchDataSetDef).getMappings(index); if (mappingsResponse == null || mappingsResponse.getStatus() != RESPONSE_CODE_OK) throw new IllegalArgumentException( "Cannot retrieve index mappings for index: [" + index[0] + "]. See previous errors."); // Obtain the columns (ids and types). List<String> columnIds = new LinkedList<String>(); List<ColumnType> columnTypes = new LinkedList<ColumnType>(); // Check if custom columns has been configured in the dataset definition or we have to query the index mappings and retrieve column information from it. Map<String, Object[]> columns = parseColumns(mappingsResponse.getIndexMappings(), elasticSearchDataSetDef); if (columns == null || columns.isEmpty()) throw new RuntimeException("There are no column for index [" + index[0] + "] and type [" + ArrayUtils.toString(type) + "]."); boolean isAllColumns = elasticSearchDataSetDef.isAllColumnsEnabled(); List<DataColumn> dataSetColumns = elasticSearchDataSetDef.getDataSet().getColumns(); if (isAllColumns) { // Use colmns given from EL index mapping. for (Map.Entry<String, Object[]> entry : columns.entrySet()) { String columnId = entry.getKey(); ColumnType columnType = (ColumnType) entry.getValue()[0]; // Check if there is any column definition override. DataColumn definitionColumn = getColumn(dataSetColumns, columnId); if (definitionColumn != null) { ColumnType definitionColumnType = definitionColumn.getColumnType(); if (columnType.equals(ColumnType.TEXT) && definitionColumnType.equals(ColumnType.LABEL)) throw new IllegalArgumentException("The column [" + columnId + "] is defined in dataset definition as LABEL, but the column in the index [" + index[0] + "] and type [" + ArrayUtils.toString(type) + "] is using ANALYZED index, you cannot use it as a label."); columnType = definitionColumnType; } columnIds.add(columnId); columnTypes.add(columnType); } } else { // Use given columns from dataset definition. if (dataSetColumns != null && !dataSetColumns.isEmpty()) { for (DataColumn column : dataSetColumns) { String columnId = column.getId(); ColumnType columnType = column.getColumnType(); ColumnType indexColumnType = (ColumnType) columns.get(columnId)[0]; String format = (String) columns.get(columnId)[1]; // Check user defined column exists in the index/type. if (indexColumnType == null) throw new IllegalArgumentException("The column [" + columnId + "] defined in dataset definition does not exist for the index [" + index[0] + "] and type [" + ArrayUtils.toString(type) + "]."); // Check that analyzed fields on EL index definition are analyzed too in the dataset definition. if (indexColumnType.equals(ColumnType.TEXT) && columnType.equals(ColumnType.LABEL)) throw new IllegalArgumentException("The column [" + columnId + "] is defined in dataset definition as LABEL, but the column in the index [" + index[0] + "] and type [" + ArrayUtils.toString(type) + "] is using ANALYZED index, you cannot use it as a label."); columnIds.add(columnId); columnTypes.add(columnType); } } } int _rowCount = (int) rowCount; int estimatedSize = estimateSize(columnTypes, _rowCount); // Build the metadata instance. result = new ElasticSearchDataSetMetadata(def, def.getUUID(), _rowCount, columnIds.size(), columnIds, columnTypes, estimatedSize); // Set the index field patterns from EL server. for (Map.Entry<String, Object[]> entry : columns.entrySet()) { String pattern = (String) entry.getValue()[1]; if (pattern != null && pattern.trim().length() > 0) result.setFieldPattern(entry.getKey(), pattern); } // Put into cache. _metadataMap.put(def.getUUID(), result); return result; } private int estimateSize(List<ColumnType> columnTypes, int rowCount) { int estimatedSize = 0; if (columnTypes != null && !columnTypes.isEmpty()) { for (ColumnType type : columnTypes) { if (ColumnType.DATE.equals(type)) { estimatedSize += MemSizeEstimator.sizeOf(Date.class) * rowCount; } else if (ColumnType.NUMBER.equals(type)) { estimatedSize += MemSizeEstimator.sizeOf(Double.class) * rowCount; } else { // For string use an approximated value as EL does not provide size attribute for fields. estimatedSize += 30 * rowCount; } } } return estimatedSize; } private DataColumn getColumn(List<DataColumn> dataSetColumns, String columnId) { if (dataSetColumns != null && !dataSetColumns.isEmpty()) { for (DataColumn column : dataSetColumns) { String id = column.getId(); if (id.equals(columnId)) return column; } } return null; } /** * Parse a given index' field definitions from EL index mappings response. * * @param indexMappings The mappings response from EL server. * @param def The dataset definition. * @return Return the column for the dataset as a Map where the key is the column identifier, and the value is an Object[] that contains the columnType and the column pattern for that field. */ protected Map<String, Object[]> parseColumns(IndexMappingResponse[] indexMappings, ElasticSearchDataSetDef def) { Map<String, Object[]> result = null; for (IndexMappingResponse indexMapping : indexMappings) { result = new LinkedHashMap<String, Object[]>(); String indexName = indexMapping.getIndexName(); TypeMappingResponse[] typeMappings = indexMapping.getTypeMappings(); if (typeMappings == null || typeMappings.length == 0) throw new IllegalArgumentException("There are no types for index: [" + indexName + "["); for (TypeMappingResponse typeMapping : typeMappings) { String typeName = typeMapping.getTypeName(); FieldMappingResponse[] properties = typeMapping.getFields(); if (properties == null || properties.length == 0) throw new IllegalArgumentException( "There are no fields for index: [" + indexName + "] and type [" + typeName + "["); for (FieldMappingResponse fieldMapping : properties) { String fieldName = fieldMapping.getName(); String format = fieldMapping.getFormat(); String columnId = getColumnId(indexName, typeName, fieldName); ColumnType columnType = getDataType(fieldMapping); // Only use supported column types. if (columnType != null) { boolean columnExists = result.containsKey(columnId); if (columnExists) { // Check column type for existing column. ColumnType existingColumnType = (ColumnType) result.get(columnId)[0]; if (existingColumnType != null && !existingColumnType.equals(columnType)) throw new IllegalArgumentException("Column [" + columnId + "] is already present in data set with type [" + existingColumnType + "] and you are trying to add it again as type [" + columnType.toString() + "["); // Check column format for existing column. if (!StringUtils.isBlank(format)) { String existingPattern = def.getPattern(columnId); if (existingPattern != null && !existingPattern.equals(format)) throw new IllegalArgumentException("Column [" + columnId + "] is already present in data set with pattern [" + existingPattern + "] and you are trying to add it again with pattern [" + format + "["); } } else { result.put(columnId, new Object[] { columnType, format }); } } } } } return result; } protected String getColumnId(String index, String type, String field) throws IllegalArgumentException { if (index == null || index.trim().length() == 0) throw new IllegalArgumentException("Cannot create the column identifier. Index name is not set."); if (type == null || type.trim().length() == 0) throw new IllegalArgumentException( "Cannot create the column identifier. Document type name is not set."); if (field == null || field.trim().length() == 0) throw new IllegalArgumentException("Cannot create the column identifier. Field name is not set."); return field; } /** * <p>Return the dashbuilder data type for a given ElasticSearch field type.</p> * * @param fieldMapping The ElasticSearch field type.. * @return The dashbuilder data type. * @throws IllegalArgumentException If ElasticSearch core data type is not supported. */ protected ColumnType getDataType(FieldMappingResponse fieldMapping) throws IllegalArgumentException { FieldMappingResponse.FieldType fieldType = fieldMapping.getDataType(); if (fieldType == null) return null; switch (fieldType) { case STRING: if (fieldMapping.getIndexType() != null && fieldMapping.getIndexType().equals(FieldMappingResponse.IndexType.NOT_ANALYZED)) return ColumnType.LABEL; // Analyzed index are considered TEXT. return ColumnType.TEXT; case FLOAT: return ColumnType.NUMBER; case DOUBLE: return ColumnType.NUMBER; case BYTE: return ColumnType.NUMBER; case SHORT: return ColumnType.NUMBER; case INTEGER: return ColumnType.NUMBER; case LONG: return ColumnType.NUMBER; case TOKEN_COUNT: return ColumnType.LABEL; case DATE: return ColumnType.DATE; case BOOLEAN: return ColumnType.LABEL; case BINARY: return ColumnType.LABEL; } throw new IllegalArgumentException( "The ElasticSearch core data type [" + fieldType.toString() + "] is not suppored."); } protected long getRowCount(ElasticSearchDataSetDef elasticSearchDataSetDef) throws Exception { String[] index = elasticSearchDataSetDef.getIndex(); String[] type = elasticSearchDataSetDef.getType(); CountResponse response = clientFactory.newClient(elasticSearchDataSetDef).count(index, type); if (response != null) return response.getCount(); return 0; } // Listen to changes on the data set definition registry protected void onDataSetStaleEvent(@Observes DataSetStaleEvent event) { DataSetDef def = event.getDataSetDef(); if (DataSetProviderType.ELASTICSEARCH.equals(def.getProvider())) { String uuid = def.getUUID(); _metadataMap.remove(uuid); staticDataSetProvider.removeDataSet(uuid); } } }