org.wso2.carbon.registry.indexing.indexer.XMLIndexer.java Source code

Java tutorial

Introduction

Here is the source code for org.wso2.carbon.registry.indexing.indexer.XMLIndexer.java

Source

/*
 *  Copyright (c) 2015, WSO2 Inc. (http://www.wso2.org) All Rights Reserved.
 *
 *  WSO2 Inc. licenses this file to you under the Apache License,
 *  Version 2.0 (the "License"); you may not use this file except
 *  in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.wso2.carbon.registry.indexing.indexer;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.solr.common.SolrException;
import org.wso2.carbon.registry.core.exceptions.RegistryException;
import org.wso2.carbon.registry.core.utils.RegistryUtils;
import org.wso2.carbon.registry.indexing.AsyncIndexer.File2Index;
import org.wso2.carbon.registry.indexing.IndexingConstants;
import org.wso2.carbon.registry.indexing.solr.IndexDocument;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import java.io.ByteArrayInputStream;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class XMLIndexer implements Indexer {

    public static final Log log = LogFactory.getLog(XMLIndexer.class);

    public IndexDocument getIndexedDocument(File2Index fileData) throws SolrException, RegistryException {
        // we register both the content as it is and only text content
        String xmlAsStr = RegistryUtils.decodeBytes(fileData.data);

        final StringBuffer contentOnly = new StringBuffer();
        ByteArrayInputStream inData = new ByteArrayInputStream(fileData.data);

        // this will handle text content
        DefaultHandler handler = new DefaultHandler() {
            public void characters(char ch[], int start, int length) throws SAXException {
                contentOnly.append(new String(ch, start, length)).append(" ");
            }
        };
        //         SAXParserFactory factory = SAXParserFactory.newInstance();
        //         SAXParser saxParser = factory.newSAXParser();
        //         saxParser.parse(inData, handler);

        IndexDocument indexDocument = new IndexDocument(fileData.path, xmlAsStr, contentOnly.toString());
        Map<String, List<String>> attributes = new HashMap<String, List<String>>();
        if (fileData.mediaType != null) {
            attributes.put(IndexingConstants.FIELD_MEDIA_TYPE, Arrays.asList(fileData.mediaType));
        }
        if (fileData.lcState != null) {
            attributes.put(IndexingConstants.FIELD_LC_STATE, Arrays.asList(fileData.lcState));
        }
        if (fileData.lcName != null) {
            attributes.put(IndexingConstants.FIELD_LC_NAME, Arrays.asList(fileData.lcName));
        }
        indexDocument.setFields(attributes);
        return indexDocument;

    }

}