Example usage for org.apache.lucene.facet.taxonomy.directory DirectoryTaxonomyWriter commit

List of usage examples for org.apache.lucene.facet.taxonomy.directory DirectoryTaxonomyWriter commit

Introduction

In this page you can find the example usage for org.apache.lucene.facet.taxonomy.directory DirectoryTaxonomyWriter commit.

Prototype

@Override
    public synchronized long commit() throws IOException 

Source Link

Usage

From source file:uk.ac.ebi.arrayexpress.utils.saxon.search.AbstractIndexEnvironment.java

License:Apache License

public void indexFromXmlDB(String indexLocationDirectory, String dbHost, int dbPort, String dbPassword,
        String dbName) throws Exception {
    int countNodes = 0;
    String driverXml = "";
    String connectionString = "";
    Collection coll;// w  ww .j av  a  2 s  .c om
    IndexWriter w = null;
    DirectoryTaxonomyWriter taxoWriter = null;
    Map<String, XPathExpression> fieldXpe = new HashMap<String, XPathExpression>();
    try {

        Directory indexTempDirectory = FSDirectory.open(new File(indexLocationDirectory, indexId));
        logger.debug("Index directory->" + indexLocationDirectory);
        w = createIndex(indexTempDirectory, indexAnalyzer);

        Directory taxDir = FSDirectory.open(new File(indexLocationDirectory + "Facets", indexId));

        taxoWriter = new DirectoryTaxonomyWriter(taxDir);
        CategoryDocumentBuilder docBuilder = new CategoryDocumentBuilder(taxoWriter);

        HierarchicalConfiguration connsConf = (HierarchicalConfiguration) Application.getInstance()
                .getPreferences().getConfSubset("bs.xmldatabase");

        if (null != connsConf) {
            // TODO: rpe use the component XmlDatabasePooling
            driverXml = connsConf.getString("driver");
            // I will use the connectionString that was passed by parameter
            // (in several parameters)
            connectionString = connsConf.getString("base") + "://" + dbHost + ":" + dbPort + "/" + dbName;
        } else {
            logger.error("bs.xmldatabase Configuration is missing!!");
        }

        // I cannot register this database again (this is already registered
        // on XmlDbConnectionPool Component -
        // java.nio.channels.OverlappingFileLockException
        // c = Class.forName(driverXml);
        // db = (Database) c.newInstance();
        // DatabaseManager.registerDatabase(db);
        logger.debug("connectionString->" + connectionString);
        coll = DatabaseManager.getCollection(connectionString);
        XPathQueryService service = (XPathQueryService) coll.getService("XPathQueryService", "1.0");

        DocumentInfo source = null;
        // Loop through all result items

        // collect all the fields data
        Configuration config = ((SaxonEngine) Application.getAppComponent("SaxonEngine")).trFactory
                .getConfiguration();

        XPath xp = new XPathEvaluator(config);
        // XPathExpression xpe = xp.compile(this.env.indexDocumentPath);

        for (FieldInfo field : fields.values()) {
            fieldXpe.put(field.name, xp.compile(field.path));
            logger.debug("Field Path->[{}]", field.path);
        }

        // the xmldatabase is not very correct and have memory problem for
        // queires with huge results, so its necessary to implement our own
        // iteration mechanism
        //
        // // I will collect all the results
        // ResourceSet set = service.query(this.env.indexDocumentPath);
        // //TODO rpe
        // //ResourceSet set = service.query("//Sample");
        // logger.debug("Number of results->" + set.getSize());
        // long numberResults = set.getSize();
        long numberResults = 0;
        ResourceSet set = service.query("count(" + indexDocumentPath + ")");
        if (set.getIterator().hasMoreResources()) {
            numberResults = Integer.parseInt((String) set.getIterator().nextResource().getContent());
        }
        logger.debug("Number of results->" + numberResults);
        long pageSizeDefault = 50000;
        // the samplegroup cannot be big otherwise I will obtain a memory
        // error ... but the sample must b at least one million because the
        // paging queries are really slow - we need to balance it
        // (for samples 1million, for samplegroup 50000)
        if (numberResults > 1000000) {
            pageSizeDefault = 1000000;
        }

        long pageNumber = 1;
        int count = 0;
        // Map<String, AttsInfo[]> cacheAtt = new HashMap<String,
        // AttsInfo[]>();
        // Map<String, XPathExpression> cacheXpathAtt = new HashMap<String,
        // XPathExpression>();
        // Map<String, XPathExpression> cacheXpathAttValue = new
        // HashMap<String, XPathExpression>();
        while ((pageNumber * pageSizeDefault) <= (numberResults + pageSizeDefault - 1)) {
            // while ((pageNumber<=1)) {
            // calculate the last hit
            long pageInit = (pageNumber - 1) * pageSizeDefault + 1;
            long pageSize = (pageNumber * pageSizeDefault < numberResults) ? pageSizeDefault
                    : (numberResults - pageInit + 1);

            service = (XPathQueryService) coll.getService("XPathQueryService", "1.0");

            // xquery paging using subsequence function
            long time = System.nanoTime();

            // /set =
            // service.query("for $x in(/Biosamples/SampleGroup/Sample/@id) return string($x)");
            // I'm getting everything based on nodeId, because i have the
            // sample sample in different samplegroups
            // TODO: change this (just works with baseX)
            set = service.query("for $x in(subsequence(" + indexDocumentPath + "," + pageInit + "," + pageSize
                    + ")) return db:node-id($x)");

            // logger.debug("Number of results of page->" + set.getSize());
            double ms = (System.nanoTime() - time) / 1000000d;
            logger.info("Query XMLDB took ->[{}]", ms);

            ResourceIterator iter = set.getIterator();
            XPath xp2;
            XPathExpression xpe2;
            List documentNodes;
            StringReader reader;
            // cache of distinct attributes fora each sample group

            while (iter.hasMoreResources()) {
                count++;
                logger.debug("its beeing processed the number ->" + count);
                String idNode = (String) iter.nextResource().getContent();
                //logger.debug("Id node->" + idNode);
                // I need to get the sample
                // ResourceSet setid = service.query(indexDocumentPath
                // + "[@id='" + idSample + "']");
                ResourceSet setid = service.query("db:open-id('" + dbName + "'," + idNode + ")");
                ResourceIterator iterid = setid.getIterator();
                List<CategoryPath> sampleCategories = null;
                while (iterid.hasMoreResources()) {
                    // System.out.println("");
                    // /xml=(String) iterid.nextResource().getContent();

                    // /xml=(String) iter.nextResource().getContent();

                    // /reader = new StringReader(xml);
                    StringBuilder xml = new StringBuilder();
                    xml.append((String) iterid.nextResource().getContent());
                    // logger.debug("xml->"+xml);
                    // logger.debug(xml.toString());
                    reader = new StringReader(xml.toString());
                    source = config.buildDocument(new StreamSource(reader));

                    // logger.debug("XML DB->[{}]",
                    // PrintUtils.printNodeInfo((NodeInfo) source, config));
                    Document d = new Document();

                    xp2 = new XPathEvaluator(source.getConfiguration());

                    int position = indexDocumentPath.lastIndexOf("/");
                    ;
                    String pathRoot = "";
                    if (position != -1) {
                        pathRoot = indexDocumentPath.substring(position);
                    } else {
                        pathRoot = indexDocumentPath;
                    }
                    // logger.debug("PathRoot->[{}]",pathRoot);
                    xpe2 = xp2.compile(pathRoot);
                    // TODO rpe
                    // xpe2 = xp2.compile("/Sample");
                    documentNodes = (List) xpe2.evaluate(source, XPathConstants.NODESET);

                    for (Object node : documentNodes) {

                        try {
                            d = processEntryIndex(node, config, service, fieldXpe);
                        } catch (Exception x) {
                            String xmlError = PrintUtils.printNodeInfo((NodeInfo) node, config);
                            logger.error("XML that was being processed when the error occurred DB->[{}]",
                                    xmlError);

                            // to avoid the next running to stop
                            // because its not able to delete the
                            // newSetup directory
                            w.close();
                            throw new Exception("Xml that is being processed:" + xmlError, x);
                        }
                    }

                    documentNodes = null;
                    source = null;
                    reader = null;
                    xml = null;
                    countNodes++;
                    // logger.debug("count->[{}]", countNodes);

                    // facet tests

                    docBuilder.setCategoryPaths(sampleCategories);
                    docBuilder.build(d);

                    addIndexDocument(w, d);

                }
            }

            logger.debug("until now it were processed->[{}]", pageNumber * pageSizeDefault);
            pageNumber++;
            if (coll != null) {
                try {
                    // coll.close();
                } catch (Exception e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
            set = null;

        }

        setCountDocuments(countNodes);
        // add metadata to the lucene index
        Map<String, String> map = new HashMap<String, String>();
        map.put("numberDocs", Integer.toString(countNodes));
        map.put("date", Long.toString(System.nanoTime()));
        // logger.debug(Application.getInstance().getComponent("XmlDbConnectionPool").getMetaDataInformation());
        // I cannot call directly
        // getComponent("XmlDbConnectionPool").getMetaDataInformation(),
        // because I can be working in a did
        String dbInfo = ((XmlDbConnectionPool) Application.getInstance().getComponent("XmlDbConnectionPool"))
                .getDBInfo(dbHost, dbPort, dbPassword, dbName);

        map.put("DBInfo", dbInfo);
        // facet
        taxoWriter.commit();
        taxoWriter.close();
        commitIndex(w, map);

    } catch (Exception x) {
        logger.error("Caught an exception:", x);
        taxoWriter.close();
        w.close();
        throw x;
    }
}

From source file:uk.ac.ebi.arrayexpress.utils.saxon.search.AbstractIndexEnvironment.java

License:Apache License

public void indexFromXmlDB_FACETS(String indexLocationDirectory, String dbHost, int dbPort, String dbPassword,
        String dbName) throws Exception {
    int countNodes = 0;
    String driverXml = "";
    String connectionString = "";
    Collection coll;//from  w  w  w. java 2  s . co m
    IndexWriter w = null;
    Map<String, XPathExpression> fieldXpe = new HashMap<String, XPathExpression>();
    try {

        Directory indexTempDirectory = FSDirectory.open(new File(indexLocationDirectory, indexId));
        w = createIndex(indexTempDirectory, indexAnalyzer);

        Directory taxDir = FSDirectory.open(new File(indexLocationDirectory + "Facets", indexId));

        DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxDir);
        CategoryDocumentBuilder docBuilder = new CategoryDocumentBuilder(taxoWriter);

        HierarchicalConfiguration connsConf = (HierarchicalConfiguration) Application.getInstance()
                .getPreferences().getConfSubset("bs.xmldatabase");

        if (null != connsConf) {
            // TODO: rpe use the component XmlDatabasePooling
            driverXml = connsConf.getString("driver");
            // I will use the connectionString that was passed by parameter
            // (in several parameters)
            connectionString = connsConf.getString("base") + "://" + dbHost + ":" + dbPort + "/" + dbName;
        } else {
            logger.error("bs.xmldatabase Configuration is missing!!");
        }

        // I cannot register this database again (this is already registered
        // on XmlDbConnectionPool Component -
        // java.nio.channels.OverlappingFileLockException
        // c = Class.forName(driverXml);
        // db = (Database) c.newInstance();
        // DatabaseManager.registerDatabase(db);
        logger.debug("connectionString->" + connectionString);
        coll = DatabaseManager.getCollection(connectionString);
        XPathQueryService service = (XPathQueryService) coll.getService("XPathQueryService", "1.0");

        DocumentInfo source = null;
        // Loop through all result items

        // collect all the fields data
        Configuration config = ((SaxonEngine) Application.getAppComponent("SaxonEngine")).trFactory
                .getConfiguration();

        XPath xp = new XPathEvaluator(config);
        // XPathExpression xpe = xp.compile(this.env.indexDocumentPath);

        for (FieldInfo field : fields.values()) {
            fieldXpe.put(field.name, xp.compile(field.path));
            logger.debug("Field Path->[{}]", field.path);
        }

        // the xmldatabase is not very correct and have memory problem for
        // queires with huge results, so its necessary to implement our own
        // iteration mechanism
        //
        // // I will collect all the results
        // ResourceSet set = service.query(this.env.indexDocumentPath);
        // //TODO rpe
        // //ResourceSet set = service.query("//Sample");
        // logger.debug("Number of results->" + set.getSize());
        // long numberResults = set.getSize();
        long numberResults = 0;
        ResourceSet set = service.query("count(" + indexDocumentPath + ")");
        if (set.getIterator().hasMoreResources()) {
            numberResults = Integer.parseInt((String) set.getIterator().nextResource().getContent());
        }
        logger.debug("Number of results->" + numberResults);
        long pageSizeDefault = 50000;
        // the samplegroup cannot be big otherwise I will obtain a memory
        // error ... but the sample must b at least one million because the
        // paging queries are really slow - we need to balance it
        // (for samples 1million, for samplegroup 50000)
        if (numberResults > 1000000) {
            pageSizeDefault = 1000000;
        }

        long pageNumber = 1;
        int count = 0;
        Map<String, AttsInfo[]> cacheAtt = new HashMap<String, AttsInfo[]>();
        Map<String, XPathExpression> cacheXpathAtt = new HashMap<String, XPathExpression>();
        Map<String, XPathExpression> cacheXpathAttValue = new HashMap<String, XPathExpression>();
        while ((pageNumber * pageSizeDefault) <= (numberResults + pageSizeDefault - 1)) {
            // while ((pageNumber<=1)) {
            // calculate the last hit
            long pageInit = (pageNumber - 1) * pageSizeDefault + 1;
            long pageSize = (pageNumber * pageSizeDefault < numberResults) ? pageSizeDefault
                    : (numberResults - pageInit + 1);

            service = (XPathQueryService) coll.getService("XPathQueryService", "1.0");

            // xquery paging using subsequence function
            long time = System.nanoTime();

            // /set =
            // service.query("for $x in(/Biosamples/SampleGroup/Sample/@id) return string($x)");
            set = service.query("for $x in(subsequence(" + indexDocumentPath + "/@id," + pageInit + ","
                    + pageSize + ")) return string($x)");
            // logger.debug("Number of results of page->" + set.getSize());
            double ms = (System.nanoTime() - time) / 1000000d;
            logger.info("Query XMLDB took ->[{}]", ms);

            ResourceIterator iter = set.getIterator();
            XPath xp2;
            XPathExpression xpe2;
            List documentNodes;
            StringReader reader;
            // cache of distinct attributes fora each sample group

            while (iter.hasMoreResources()) {
                count++;
                logger.debug("its beeing processed the number ->" + count);
                String idSample = (String) iter.nextResource().getContent();
                logger.debug("idSample->" + idSample);
                // I need to get the sample
                ResourceSet setid = service.query(indexDocumentPath + "[@id='" + idSample + "']");

                // System.out.println("/Biosamples/SampleGroup/Sample[@id='"
                // + idSample + "']");
                ResourceIterator iterid = setid.getIterator();
                List<CategoryPath> sampleCategories = null;
                while (iterid.hasMoreResources()) {
                    // System.out.println("");
                    // /xml=(String) iterid.nextResource().getContent();

                    // /xml=(String) iter.nextResource().getContent();
                    // logger.debug("xml->"+xml);
                    // /reader = new StringReader(xml);
                    StringBuilder xml = new StringBuilder();
                    xml.append((String) iterid.nextResource().getContent());

                    // logger.debug(xml.toString());
                    reader = new StringReader(xml.toString());
                    source = config.buildDocument(new StreamSource(reader));

                    // logger.debug("XML DB->[{}]",
                    // PrintUtils.printNodeInfo((NodeInfo) source, config));
                    Document d = new Document();

                    xp2 = new XPathEvaluator(source.getConfiguration());

                    int position = indexDocumentPath.lastIndexOf("/");
                    ;
                    String pathRoot = "";
                    if (position != -1) {
                        pathRoot = indexDocumentPath.substring(position);
                    } else {
                        pathRoot = indexDocumentPath;
                    }
                    // logger.debug("PathRoot->[{}]",pathRoot);
                    xpe2 = xp2.compile(pathRoot);
                    // TODO rpe
                    // xpe2 = xp2.compile("/Sample");
                    documentNodes = (List) xpe2.evaluate(source, XPathConstants.NODESET);

                    for (Object node : documentNodes) {
                        // logger.debug("XML DB->[{}]",PrintUtils.printNodeInfo((NodeInfo)node,config));
                        for (FieldInfo field : fields.values()) {
                            try {

                                // Configuration
                                // config=doc.getConfiguration();
                                // I Just have to calculate the Xpath
                                if (!field.process) {

                                    List values = (List) fieldXpe.get(field.name).evaluate(node,
                                            XPathConstants.NODESET);
                                    // logger.debug("Field->[{}] values-> [{}]",
                                    // field.name,
                                    // values.toString());
                                    for (Object v : values) {

                                        if ("integer".equals(field.type)) {
                                            addIntIndexField(d, field.name, v, field.shouldStore,
                                                    field.shouldSort);

                                            // Just to test I will put here
                                            // one facet for the samples
                                            if (field.name.equalsIgnoreCase("samples")) {
                                                System.out.println("Value-->" + v.toString());
                                                sampleCategories = new ArrayList<CategoryPath>();
                                                sampleCategories.add(new CategoryPath("samples", v.toString()));
                                            }

                                        } else if ("date".equals(field.type)) {
                                            // todo: addDateIndexField(d,
                                            // field.name,
                                            // v);
                                            logger.error(
                                                    "Date fields are not supported yet, field [{}] will not be created",
                                                    field.name);
                                        } else if ("boolean".equals(field.type)) {
                                            addBooleanIndexField(d, field.name, v, field.shouldSort);
                                        } else {
                                            addIndexField(d, field.name, v, field.shouldAnalyze,
                                                    field.shouldStore, field.shouldSort);
                                        }
                                    }

                                } else {
                                    if (field.name.equalsIgnoreCase("attributes")) {
                                        // implement here the biosamples
                                        // database sample attributes logic
                                        // TODO: rpe
                                        // logger.debug("There is A special treatment for this field->"
                                        // + field.name);

                                        List values = (List) fieldXpe.get(field.name).evaluate(node,
                                                XPathConstants.NODESET);

                                        // XPathExpression
                                        // classAtt=xp.compile("@class");
                                        // XPathExpression
                                        // typeAtt=xp.compile("@dataType");
                                        // XPathExpression
                                        // valueAtt=xp.compile("value");
                                        String groupId = (String) fieldXpe.get("samplegroup").evaluate(node,
                                                XPathConstants.STRING);
                                        String id = (String) fieldXpe.get("accession").evaluate(node,
                                                XPathConstants.STRING);

                                        // logger.debug(groupId+"$$$" + id);

                                        // logger.debug("Field->[{}] values-> [{}]",
                                        // field.name,
                                        // values.toString());

                                        AttsInfo[] attsInfo = null;
                                        if (cacheAtt.containsKey(groupId)) {
                                            attsInfo = cacheAtt.get(groupId);
                                        } else {
                                            logger.debug("No exists cache for samplegroup->" + groupId);
                                            // ResourceSet setAtt =
                                            // service.query("distinct-values(/Biosamples/SampleGroup[@id='"
                                            // + groupId +
                                            // "']/Sample/attribute[@dataType!='INTEGER']/replace(@class,' ', '-'))");
                                            // /ResourceSet setAtt =
                                            // service.query("distinct-values(/Biosamples/SampleGroup[@id='"
                                            // + groupId +
                                            // "']/Sample/attribute/replace(@class,' ', '-'))");
                                            // /ResourceSet setAtt =
                                            // service.query("distinct-values(/Biosamples/SampleGroup[@id='"
                                            // + groupId +
                                            // "']/Sample/attribute/@class)");
                                            ResourceSet setAtt = service
                                                    .query("data(/Biosamples/SampleGroup[@id='" + groupId
                                                            + "']/SampleAttributes/attribute/@class)");
                                            // logger.debug("->"
                                            // +
                                            // "/Biosamples/SampleGroup[@id='"
                                            // + groupId +
                                            // "']/SampleAttributes/attribute/@class");

                                            ResourceIterator resAtt = setAtt.getIterator();
                                            int i = 0;
                                            attsInfo = new AttsInfo[(int) setAtt.getSize()];
                                            while (resAtt.hasMoreResources()) {
                                                String classValue = (String) resAtt.nextResource().getContent();
                                                // logger.debug("->"
                                                // + classValue);
                                                // need to use this because
                                                // of the use of quotes in
                                                // the name of the classes
                                                String classValueWitoutQuotes = classValue.replaceAll("\"",
                                                        "\"\"");
                                                // logger.debug("Class value->"
                                                // + classValue);
                                                XPathExpression xpathAtt = null;
                                                XPathExpression xpathAttValue = null;
                                                if (cacheXpathAtt.containsKey(classValue)) {
                                                    xpathAtt = cacheXpathAtt.get(classValue);
                                                    xpathAttValue = cacheXpathAttValue.get(classValue);
                                                } else {

                                                    xpathAtt = xp.compile("./attribute[@class=\""
                                                            + classValueWitoutQuotes + "\"]/@dataType");

                                                    xpathAttValue = xp.compile(
                                                            "attribute[@class=\"" + classValueWitoutQuotes
                                                                    + "\"]/value/text()[last()]");

                                                    // logger.debug("attribute[@class=\""
                                                    // +
                                                    // classValueWitoutQuotes
                                                    // +
                                                    // "\"]//value/text()");
                                                    // //xpathAttValue=xp.compile("./attribute[@class=\""
                                                    // +
                                                    // classValueWitoutQuotes
                                                    // +
                                                    // "\"]/value[1]/text()");
                                                    // logger.debug("./attribute[@class=\""
                                                    // +
                                                    // classValueWitoutQuotes
                                                    // +
                                                    // "\"]/value[1]/text()");
                                                    cacheXpathAtt.put(classValue, xpathAtt);
                                                    cacheXpathAttValue.put(classValue, xpathAttValue);
                                                }
                                                // this doesnt work when the
                                                // first sample of sample
                                                // group doens have all the
                                                // attributes
                                                // im using \" becuse there
                                                // are some attributes thas
                                                // has ' on the name!!!
                                                // /ResourceSet setAttType =
                                                // service.query("string((/Biosamples/SampleGroup[@id='"
                                                // + groupId
                                                // +"']/Sample/attribute[@class=replace(\""
                                                // + classValueWitoutQuotes
                                                // +
                                                // "\",'-',' ')]/@dataType)[1])");
                                                // /ResourceSet setAttType =
                                                // service.query("string(/Biosamples/SampleGroup[@id='"
                                                // + groupId
                                                // +"']/Sample/attribute[@class=\""
                                                // + classValueWitoutQuotes
                                                // + "\"]/@dataType)");
                                                ResourceSet setAttType = service
                                                        .query("data(/Biosamples/SampleGroup[@id='" + groupId
                                                                + "']/SampleAttributes/attribute[@class=\""
                                                                + classValueWitoutQuotes + "\"]/@dataType)");
                                                String dataValue = (String) setAttType.getIterator()
                                                        .nextResource().getContent();
                                                // logger.debug("Data Type of "
                                                // + classValue + " ->" +
                                                // dataValue);
                                                // String
                                                // dataValue=(String)xpathAtt.evaluate(node,
                                                // XPathConstants.STRING);
                                                AttsInfo attsI = new AttsInfo(classValue, dataValue);
                                                // logger.debug("Atttribute->class"
                                                // + attsI.name + "->type->"
                                                // + attsI.type + "->i" +
                                                // i);
                                                attsInfo[i] = attsI;
                                                // logger.debug("distinct att->"
                                                // + value);
                                                // cacheAtt.put(groupId,
                                                // value);
                                                i++;
                                            }
                                            cacheAtt.put(groupId, attsInfo);
                                            // distinctAtt=cacheAtt.get(groupId);
                                            // logger.debug("Already exists->"
                                            // + distinctAtt);
                                        }
                                        int len = attsInfo.length;
                                        for (int i = 0; i < len; i++) {
                                            // logger.debug("$$$$$$->" +
                                            // attsInfo[i].name + "$$$$" +
                                            // attsInfo[i].type);
                                            if (!attsInfo[i].type.equalsIgnoreCase("integer")
                                                    && !attsInfo[i].type.equalsIgnoreCase("real")) {

                                                XPathExpression valPath = cacheXpathAttValue
                                                        .get(attsInfo[i].name);
                                                String val = (String) valPath.evaluate(node,
                                                        XPathConstants.STRING);
                                                // logger.debug("$$$$$$->" +
                                                // "STRING->" + val + "");
                                                addIndexField(d, (i + 1) + "", val, true, false, true);
                                            } else {
                                                XPathExpression valPath = cacheXpathAttValue
                                                        .get(attsInfo[i].name);
                                                String valS = (String) valPath.evaluate(node,
                                                        XPathConstants.STRING);
                                                valS = valS.trim();
                                                // logger.debug("Integer->"
                                                // + valS);
                                                int val = 0;
                                                if (valS == null || valS.equalsIgnoreCase("")
                                                        || valS.equalsIgnoreCase("NaN")) {
                                                    valS = "0";
                                                }
                                                // sort numbers as strings
                                                // logger.debug("class->" +
                                                // attsInfo[i].name
                                                // +"value->##"+ valS +
                                                // "##");
                                                BigDecimal num = new BigDecimal(valS);
                                                num = num.multiply(new BigDecimal(100));
                                                int taux = num.toBigInteger().intValue();
                                                valS = String.format("%07d", taux);
                                                // logger.debug("Integer->"
                                                // + valS + "position->"
                                                // +(i+1)+"integer");
                                                addIndexField(d, (i + 1) + "", valS, true, false, true);
                                                // addIntIndexField(d,
                                                // (i+1)+"integer", new
                                                // BigInteger(valS),false,
                                                // true);
                                                //
                                            }
                                        }

                                    } else {
                                        // logger.debug("There is NO special treatment for this field->"
                                        // + field.name);
                                    }
                                }
                            } catch (XPathExpressionException x) {
                                String xmlError = PrintUtils.printNodeInfo((NodeInfo) node, config);
                                logger.error("XML DB->[{}]", xmlError);
                                logger.error("Caught an exception while indexing expression [" + field.path
                                        + "] for document ["
                                        + ((NodeInfo) source).getStringValue().substring(0, 20) + "...]", x);
                                throw new Exception("Xml:" + xmlError, x);
                            }
                        }
                    }

                    documentNodes = null;
                    source = null;
                    reader = null;
                    xml = null;
                    countNodes++;
                    // logger.debug("count->[{}]", countNodes);

                    // facet tests

                    docBuilder.setCategoryPaths(sampleCategories);
                    docBuilder.build(d);

                    addIndexDocument(w, d);

                }
            }
            logger.debug("until now it were processed->[{}]", pageNumber * pageSizeDefault);
            pageNumber++;
            if (coll != null) {
                try {
                    // coll.close();
                } catch (Exception e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
            set = null;

        }

        setCountDocuments(countNodes);
        // add metadata to the lucene index
        Map<String, String> map = new HashMap<String, String>();
        map.put("numberDocs", Integer.toString(countNodes));
        map.put("date", Long.toString(System.nanoTime()));
        // logger.debug(Application.getInstance().getComponent("XmlDbConnectionPool").getMetaDataInformation());
        // I cannot call directly
        // getComponent("XmlDbConnectionPool").getMetaDataInformation(),
        // because I can be working in a did
        String dbInfo = ((XmlDbConnectionPool) Application.getInstance().getComponent("XmlDbConnectionPool"))
                .getDBInfo(dbHost, dbPort, dbPassword, dbName);

        map.put("DBInfo", dbInfo);
        // facet
        taxoWriter.commit();
        taxoWriter.close();
        commitIndex(w, map);

    } catch (Exception x) {
        logger.error("Caught an exception:", x);
        w.close();
        throw x;
    }
}

From source file:uk.ac.ebi.fg.biostudies.utils.saxon.search.AbstractIndexEnvironment.java

License:Apache License

public void indexFromXmlDB(String indexLocationDirectory, String dbHost, int dbPort, String dbPassword,
        String dbName) throws Exception {
    int countNodes = 0;
    String driverXml = "";
    String connectionString = "";
    Collection coll;/*from w  w  w.j a  v a2s  .c  o m*/
    IndexWriter w = null;
    DirectoryTaxonomyWriter taxoWriter = null;
    Map<String, XPathExpression> fieldXpe = new HashMap<String, XPathExpression>();
    try {

        Directory indexTempDirectory = FSDirectory.open(new File(indexLocationDirectory, indexId));
        logger.debug("Index directory->" + indexLocationDirectory);
        w = createIndex(indexTempDirectory, indexAnalyzer);

        Directory taxDir = FSDirectory.open(new File(indexLocationDirectory + "Facets", indexId));

        taxoWriter = new DirectoryTaxonomyWriter(taxDir);
        CategoryDocumentBuilder docBuilder = new CategoryDocumentBuilder(taxoWriter);

        HierarchicalConfiguration connsConf = (HierarchicalConfiguration) Application.getInstance()
                .getPreferences().getConfSubset("bs.xmldatabase");

        if (null != connsConf) {
            // TODO: rpe use the component XmlDatabasePooling
            driverXml = connsConf.getString("driver");
            // I will use the connectionString that was passed by parameter
            // (in several parameters)
            connectionString = connsConf.getString("base") + "://" + dbHost + ":" + dbPort + "/" + dbName;
        } else {
            logger.error("bs.xmldatabase Configuration is missing!!");
        }

        // I cannot register this database again (this is already registered
        // on XmlDbConnectionPool Component -
        // java.nio.channels.OverlappingFileLockException
        // c = Class.forName(driverXml);
        // db = (Database) c.newInstance();
        // DatabaseManager.registerDatabase(db);
        logger.debug("connectionString->" + connectionString);
        coll = DatabaseManager.getCollection(connectionString);
        XPathQueryService service = (XPathQueryService) coll.getService("XPathQueryService", "1.0");

        DocumentInfo source = null;
        // Loop through all result items

        // collect all the fields data
        Configuration config = ((SaxonEngine) Application.getAppComponent("SaxonEngine")).trFactory
                .getConfiguration();

        XPath xp = new XPathEvaluator(config);
        // XPathExpression xpe = xp.compile(this.env.indexDocumentPath);

        for (FieldInfo field : fields.values()) {
            fieldXpe.put(field.name, xp.compile(field.path));
            logger.debug("Field Path->[{}]", field.path);
        }

        // the xmldatabase is not very correct and have memory problem for
        // queires with huge results, so its necessary to implement our own
        // iteration mechanism
        //
        // // I will collect all the results
        // ResourceSet set = service.query(this.env.indexDocumentPath);
        // //TODO rpe
        // //ResourceSet set = service.query("//Sample");
        // logger.debug("Number of results->" + set.getSize());
        // long numberResults = set.getSize();
        long numberResults = 0;
        ResourceSet set = service.query("count(" + indexDocumentPath + ")");
        if (set.getIterator().hasMoreResources()) {
            numberResults = Integer.parseInt((String) set.getIterator().nextResource().getContent());
        }
        logger.debug("Number of results->" + numberResults);
        long pageSizeDefault = 50000;
        // the samplegroup cannot be big otherwise I will obtain a memory
        // error ... but the sample must b at least one million because the
        // paging queries are really slow - we need to balance it
        // (for samples 1million, for samplegroup 50000)
        if (numberResults > 1000000) {
            pageSizeDefault = 1000000;
        }

        long pageNumber = 1;
        int count = 0;
        // Map<String, AttsInfo[]> cacheAtt = new HashMap<String,
        // AttsInfo[]>();
        // Map<String, XPathExpression> cacheXpathAtt = new HashMap<String,
        // XPathExpression>();
        // Map<String, XPathExpression> cacheXpathAttValue = new
        // HashMap<String, XPathExpression>();
        while ((pageNumber * pageSizeDefault) <= (numberResults + pageSizeDefault - 1)) {
            // while ((pageNumber<=1)) {
            // calculate the last hit
            long pageInit = (pageNumber - 1) * pageSizeDefault + 1;
            long pageSize = (pageNumber * pageSizeDefault < numberResults) ? pageSizeDefault
                    : (numberResults - pageInit + 1);

            service = (XPathQueryService) coll.getService("XPathQueryService", "1.0");

            // xquery paging using subsequence function
            long time = System.nanoTime();

            // /set =
            // service.query("for $x in(/Biosamples/SampleGroup/Sample/@id) return string($x)");
            // I'm getting everything based on nodeId, because i have the
            // sample sample in different samplegroups
            // TODO: change this (just works with baseX)
            set = service.query("for $x in(subsequence(" + indexDocumentPath + "," + pageInit + "," + pageSize
                    + ")) return db:node-id($x)");

            // logger.debug("Number of results of page->" + set.getSize());
            double ms = (System.nanoTime() - time) / 1000000d;
            logger.info("Query XMLDB took ->[{}]", ms);

            ResourceIterator iter = set.getIterator();
            XPath xp2;
            XPathExpression xpe2;
            List documentNodes;
            StringReader reader;
            // cache of distinct attributes fora each sample group

            while (iter.hasMoreResources()) {
                count++;
                logger.debug("its beeing processed the number ->" + count);
                String idNode = (String) iter.nextResource().getContent();
                // logger.debug("Id node->" + idNode);
                // I need to get the sample
                // ResourceSet setid = service.query(indexDocumentPath
                // + "[@id='" + idSample + "']");
                ResourceSet setid = service.query("db:open-id('" + dbName + "'," + idNode + ")");
                ResourceIterator iterid = setid.getIterator();
                List<CategoryPath> sampleCategories = null;
                while (iterid.hasMoreResources()) {
                    // System.out.println("xxx");
                    // /xml=(String) iterid.nextResource().getContent();

                    // /xml=(String) iter.nextResource().getContent();

                    // /reader = new StringReader(xml);
                    StringBuilder xml = new StringBuilder();
                    xml.append((String) iterid.nextResource().getContent());
                    // logger.debug("xml->"+xml);
                    // logger.debug(xml.toString());
                    reader = new StringReader(xml.toString());
                    source = config.buildDocument(new StreamSource(reader));

                    // logger.debug("XML DB->[{}]",
                    // PrintUtils.printNodeInfo((NodeInfo) source, config));
                    Document d = new Document();

                    xp2 = new XPathEvaluator(source.getConfiguration());

                    int position = indexDocumentPath.lastIndexOf("/");
                    ;
                    String pathRoot = "";
                    if (position != -1) {
                        pathRoot = indexDocumentPath.substring(position);
                    } else {
                        pathRoot = indexDocumentPath;
                    }
                    // logger.debug("PathRoot->[{}]",pathRoot);
                    xpe2 = xp2.compile(pathRoot);
                    // TODO rpe
                    // xpe2 = xp2.compile("/Sample");
                    documentNodes = (List) xpe2.evaluate(source, XPathConstants.NODESET);

                    for (Object node : documentNodes) {

                        try {
                            d = processEntryIndex(node, config, service, fieldXpe);
                        } catch (Exception x) {
                            String xmlError = PrintUtils.printNodeInfo((NodeInfo) node, config);
                            logger.error("XML that was being processed when the error occurred DB->[{}]",
                                    xmlError);

                            // to avoid the next running to stop
                            // because its not able to delete the
                            // newSetup directory
                            w.close();
                            throw new Exception("Xml that is being processed:" + xmlError, x);
                        }
                    }

                    documentNodes = null;
                    source = null;
                    reader = null;
                    xml = null;
                    countNodes++;
                    // logger.debug("count->[{}]", countNodes);

                    // facet tests

                    docBuilder.setCategoryPaths(sampleCategories);
                    docBuilder.build(d);

                    addIndexDocument(w, d);

                }
            }

            logger.debug("until now it were processed->[{}]", pageNumber * pageSizeDefault);
            pageNumber++;
            if (coll != null) {
                try {
                    // coll.close();
                } catch (Exception e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
            set = null;

        }

        setCountDocuments(countNodes);
        // add metadata to the lucene index
        Map<String, String> map = new HashMap<String, String>();
        map.put("numberDocs", Integer.toString(countNodes));
        map.put("date", Long.toString(System.nanoTime()));
        // logger.debug(Application.getInstance().getComponent("XmlDbConnectionPool").getMetaDataInformation());
        // I cannot call directly
        // getComponent("XmlDbConnectionPool").getMetaDataInformation(),
        // because I can be working in a did
        String dbInfo = ((XmlDbConnectionPool) Application.getInstance().getComponent("XmlDbConnectionPool"))
                .getDBInfo(dbHost, dbPort, dbPassword, dbName);

        map.put("DBInfo", dbInfo);
        // facet
        taxoWriter.commit();
        taxoWriter.close();
        commitIndex(w, map);

    } catch (Exception x) {
        logger.error("Caught an exception:", x);
        taxoWriter.close();
        w.close();
        throw x;
    }
}

From source file:uk.ac.ebi.fg.biostudies.utils.saxon.search.AbstractIndexEnvironment.java

License:Apache License

public void indexFromXmlDB_FACETS(String indexLocationDirectory, String dbHost, int dbPort, String dbPassword,
        String dbName) throws Exception {
    int countNodes = 0;
    String driverXml = "";
    String connectionString = "";
    Collection coll;//from   ww w  .  jav  a  2  s  .co m
    IndexWriter w = null;
    Map<String, XPathExpression> fieldXpe = new HashMap<String, XPathExpression>();
    try {

        Directory indexTempDirectory = FSDirectory.open(new File(indexLocationDirectory, indexId));
        w = createIndex(indexTempDirectory, indexAnalyzer);

        Directory taxDir = FSDirectory.open(new File(indexLocationDirectory + "Facets", indexId));

        DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxDir);
        CategoryDocumentBuilder docBuilder = new CategoryDocumentBuilder(taxoWriter);

        HierarchicalConfiguration connsConf = (HierarchicalConfiguration) Application.getInstance()
                .getPreferences().getConfSubset("bs.xmldatabase");

        if (null != connsConf) {
            // TODO: rpe use the component XmlDatabasePooling
            driverXml = connsConf.getString("driver");
            // I will use the connectionString that was passed by parameter
            // (in several parameters)
            connectionString = connsConf.getString("base") + "://" + dbHost + ":" + dbPort + "/" + dbName;
        } else {
            logger.error("bs.xmldatabase Configuration is missing!!");
        }

        // I cannot register this database again (this is already registered
        // on XmlDbConnectionPool Component -
        // java.nio.channels.OverlappingFileLockException
        // c = Class.forName(driverXml);
        // db = (Database) c.newInstance();
        // DatabaseManager.registerDatabase(db);
        logger.debug("connectionString->" + connectionString);
        coll = DatabaseManager.getCollection(connectionString);
        XPathQueryService service = (XPathQueryService) coll.getService("XPathQueryService", "1.0");

        DocumentInfo source = null;
        // Loop through all result items

        // collect all the fields data
        Configuration config = ((SaxonEngine) Application.getAppComponent("SaxonEngine")).trFactory
                .getConfiguration();

        XPath xp = new XPathEvaluator(config);
        // XPathExpression xpe = xp.compile(this.env.indexDocumentPath);

        for (FieldInfo field : fields.values()) {
            fieldXpe.put(field.name, xp.compile(field.path));
            logger.debug("Field Path->[{}]", field.path);
        }

        // the xmldatabase is not very correct and have memory problem for
        // queires with huge results, so its necessary to implement our own
        // iteration mechanism
        //
        // // I will collect all the results
        // ResourceSet set = service.query(this.env.indexDocumentPath);
        // //TODO rpe
        // //ResourceSet set = service.query("//Sample");
        // logger.debug("Number of results->" + set.getSize());
        // long numberResults = set.getSize();
        long numberResults = 0;
        ResourceSet set = service.query("count(" + indexDocumentPath + ")");
        if (set.getIterator().hasMoreResources()) {
            numberResults = Integer.parseInt((String) set.getIterator().nextResource().getContent());
        }
        logger.debug("Number of results->" + numberResults);
        long pageSizeDefault = 50000;
        // the samplegroup cannot be big otherwise I will obtain a memory
        // error ... but the sample must b at least one million because the
        // paging queries are really slow - we need to balance it
        // (for samples 1million, for samplegroup 50000)
        if (numberResults > 1000000) {
            pageSizeDefault = 1000000;
        }

        long pageNumber = 1;
        int count = 0;
        Map<String, AttsInfo[]> cacheAtt = new HashMap<String, AttsInfo[]>();
        Map<String, XPathExpression> cacheXpathAtt = new HashMap<String, XPathExpression>();
        Map<String, XPathExpression> cacheXpathAttValue = new HashMap<String, XPathExpression>();
        while ((pageNumber * pageSizeDefault) <= (numberResults + pageSizeDefault - 1)) {
            // while ((pageNumber<=1)) {
            // calculate the last hit
            long pageInit = (pageNumber - 1) * pageSizeDefault + 1;
            long pageSize = (pageNumber * pageSizeDefault < numberResults) ? pageSizeDefault
                    : (numberResults - pageInit + 1);

            service = (XPathQueryService) coll.getService("XPathQueryService", "1.0");

            // xquery paging using subsequence function
            long time = System.nanoTime();

            // /set =
            // service.query("for $x in(/Biosamples/SampleGroup/Sample/@id) return string($x)");
            set = service.query("for $x in(subsequence(" + indexDocumentPath + "/@id," + pageInit + ","
                    + pageSize + ")) return string($x)");
            // logger.debug("Number of results of page->" + set.getSize());
            double ms = (System.nanoTime() - time) / 1000000d;
            logger.info("Query XMLDB took ->[{}]", ms);

            ResourceIterator iter = set.getIterator();
            XPath xp2;
            XPathExpression xpe2;
            List documentNodes;
            StringReader reader;
            // cache of distinct attributes fora each sample group

            while (iter.hasMoreResources()) {
                count++;
                logger.debug("its beeing processed the number ->" + count);
                String idSample = (String) iter.nextResource().getContent();
                logger.debug("idSample->" + idSample);
                // I need to get the sample
                ResourceSet setid = service.query(indexDocumentPath + "[@id='" + idSample + "']");

                // System.out.println("/Biosamples/SampleGroup/Sample[@id='"
                // + idSample + "']");
                ResourceIterator iterid = setid.getIterator();
                List<CategoryPath> sampleCategories = null;
                while (iterid.hasMoreResources()) {
                    // System.out.println("");
                    // /xml=(String) iterid.nextResource().getContent();

                    // /xml=(String) iter.nextResource().getContent();
                    // logger.debug("xml->"+xml);
                    // /reader = new StringReader(xml);
                    StringBuilder xml = new StringBuilder();
                    xml.append((String) iterid.nextResource().getContent());

                    // logger.debug(xml.toString());
                    reader = new StringReader(xml.toString());
                    source = config.buildDocument(new StreamSource(reader));

                    // logger.debug("XML DB->[{}]",
                    // PrintUtils.printNodeInfo((NodeInfo) source, config));
                    Document d = new Document();

                    xp2 = new XPathEvaluator(source.getConfiguration());

                    int position = indexDocumentPath.lastIndexOf("/");
                    ;
                    String pathRoot = "";
                    if (position != -1) {
                        pathRoot = indexDocumentPath.substring(position);
                    } else {
                        pathRoot = indexDocumentPath;
                    }
                    // logger.debug("PathRoot->[{}]",pathRoot);
                    xpe2 = xp2.compile(pathRoot);
                    // TODO rpe
                    // xpe2 = xp2.compile("/Sample");
                    documentNodes = (List) xpe2.evaluate(source, XPathConstants.NODESET);

                    for (Object node : documentNodes) {
                        // logger.debug("XML DB->[{}]",PrintUtils.printNodeInfo((NodeInfo)node,config));
                        for (FieldInfo field : fields.values()) {
                            try {

                                // Configuration
                                // config=doc.getConfiguration();
                                // I Just have to calculate the Xpath
                                if (!field.process) {

                                    List values = (List) fieldXpe.get(field.name).evaluate(node,
                                            XPathConstants.NODESET);
                                    // logger.debug("Field->[{}] values-> [{}]",
                                    // field.name,
                                    // values.toString());
                                    for (Object v : values) {

                                        if ("integer".equals(field.type)) {
                                            addIntIndexField(d, field.name, v, field.shouldStore,
                                                    field.shouldSort);

                                            // Just to test I will put here
                                            // one facet for the samples
                                            if (field.name.equalsIgnoreCase("samples")) {
                                                System.out.println("Value-->"
                                                        + v.toString());
                                                sampleCategories = new ArrayList<CategoryPath>();
                                                sampleCategories.add(new CategoryPath("samples", v.toString()));
                                            }

                                        } else if ("date".equals(field.type)) {
                                            // todo: addDateIndexField(d,
                                            // field.name,
                                            // v);
                                            logger.error(
                                                    "Date fields are not supported yet, field [{}] will not be created",
                                                    field.name);
                                        } else if ("boolean".equals(field.type)) {
                                            addBooleanIndexField(d, field.name, v, field.shouldSort);
                                        } else {
                                            addIndexField(d, field.name, v, field.shouldAnalyze,
                                                    field.shouldStore, field.shouldSort);
                                        }
                                    }

                                } else {
                                    if (field.name.equalsIgnoreCase("attributes")) {
                                        // implement here the biosamples
                                        // database sample attributes logic
                                        // TODO: rpe
                                        // logger.debug("There is A special treatment for this field->"
                                        // + field.name);

                                        List values = (List) fieldXpe.get(field.name).evaluate(node,
                                                XPathConstants.NODESET);

                                        // XPathExpression
                                        // classAtt=xp.compile("@class");
                                        // XPathExpression
                                        // typeAtt=xp.compile("@dataType");
                                        // XPathExpression
                                        // valueAtt=xp.compile("value");
                                        String groupId = (String) fieldXpe.get("samplegroup").evaluate(node,
                                                XPathConstants.STRING);
                                        String id = (String) fieldXpe.get("accession").evaluate(node,
                                                XPathConstants.STRING);

                                        // logger.debug(groupId+"$$$" + id);

                                        // logger.debug("Field->[{}] values-> [{}]",
                                        // field.name,
                                        // values.toString());

                                        AttsInfo[] attsInfo = null;
                                        if (cacheAtt.containsKey(groupId)) {
                                            attsInfo = cacheAtt.get(groupId);
                                        } else {
                                            logger.debug("No exists cache for samplegroup->" + groupId);
                                            // ResourceSet setAtt =
                                            // service.query("distinct-values(/Biosamples/SampleGroup[@id='"
                                            // + groupId +
                                            // "']/Sample/attribute[@dataType!='INTEGER']/replace(@class,' ', '-'))");
                                            // /ResourceSet setAtt =
                                            // service.query("distinct-values(/Biosamples/SampleGroup[@id='"
                                            // + groupId +
                                            // "']/Sample/attribute/replace(@class,' ', '-'))");
                                            // /ResourceSet setAtt =
                                            // service.query("distinct-values(/Biosamples/SampleGroup[@id='"
                                            // + groupId +
                                            // "']/Sample/attribute/@class)");
                                            ResourceSet setAtt = service
                                                    .query("data(/Biosamples/SampleGroup[@id='" + groupId
                                                            + "']/SampleAttributes/attribute/@class)");
                                            // logger.debug("->"
                                            // +
                                            // "/Biosamples/SampleGroup[@id='"
                                            // + groupId +
                                            // "']/SampleAttributes/attribute/@class");

                                            ResourceIterator resAtt = setAtt.getIterator();
                                            int i = 0;
                                            attsInfo = new AttsInfo[(int) setAtt.getSize()];
                                            while (resAtt.hasMoreResources()) {
                                                String classValue = (String) resAtt.nextResource().getContent();
                                                // logger.debug("->"
                                                // + classValue);
                                                // need to use this because
                                                // of the use of quotes in
                                                // the name of the classes
                                                String classValueWitoutQuotes = classValue.replaceAll("\"",
                                                        "\"\"");
                                                // logger.debug("Class value->"
                                                // + classValue);
                                                XPathExpression xpathAtt = null;
                                                XPathExpression xpathAttValue = null;
                                                if (cacheXpathAtt.containsKey(classValue)) {
                                                    xpathAtt = cacheXpathAtt.get(classValue);
                                                    xpathAttValue = cacheXpathAttValue.get(classValue);
                                                } else {

                                                    xpathAtt = xp.compile("./attribute[@class=\""
                                                            + classValueWitoutQuotes + "\"]/@dataType");

                                                    xpathAttValue = xp.compile(
                                                            "attribute[@class=\"" + classValueWitoutQuotes
                                                                    + "\"]/value/text()[last()]");

                                                    // logger.debug("attribute[@class=\""
                                                    // +
                                                    // classValueWitoutQuotes
                                                    // +
                                                    // "\"]//value/text()");
                                                    // //xpathAttValue=xp.compile("./attribute[@class=\""
                                                    // +
                                                    // classValueWitoutQuotes
                                                    // +
                                                    // "\"]/value[1]/text()");
                                                    // logger.debug("./attribute[@class=\""
                                                    // +
                                                    // classValueWitoutQuotes
                                                    // +
                                                    // "\"]/value[1]/text()");
                                                    cacheXpathAtt.put(classValue, xpathAtt);
                                                    cacheXpathAttValue.put(classValue, xpathAttValue);
                                                }
                                                // this doesnt work when the
                                                // first sample of sample
                                                // group doens have all the
                                                // attributes
                                                // im using \" becuse there
                                                // are some attributes thas
                                                // has ' on the name!!!
                                                // /ResourceSet setAttType =
                                                // service.query("string((/Biosamples/SampleGroup[@id='"
                                                // + groupId
                                                // +"']/Sample/attribute[@class=replace(\""
                                                // + classValueWitoutQuotes
                                                // +
                                                // "\",'-',' ')]/@dataType)[1])");
                                                // /ResourceSet setAttType =
                                                // service.query("string(/Biosamples/SampleGroup[@id='"
                                                // + groupId
                                                // +"']/Sample/attribute[@class=\""
                                                // + classValueWitoutQuotes
                                                // + "\"]/@dataType)");
                                                ResourceSet setAttType = service
                                                        .query("data(/Biosamples/SampleGroup[@id='" + groupId
                                                                + "']/SampleAttributes/attribute[@class=\""
                                                                + classValueWitoutQuotes + "\"]/@dataType)");
                                                String dataValue = (String) setAttType.getIterator()
                                                        .nextResource().getContent();
                                                // logger.debug("Data Type of "
                                                // + classValue + " ->" +
                                                // dataValue);
                                                // String
                                                // dataValue=(String)xpathAtt.evaluate(node,
                                                // XPathConstants.STRING);
                                                AttsInfo attsI = new AttsInfo(classValue, dataValue);
                                                // logger.debug("Atttribute->class"
                                                // + attsI.name + "->type->"
                                                // + attsI.type + "->i" +
                                                // i);
                                                attsInfo[i] = attsI;
                                                // logger.debug("distinct att->"
                                                // + value);
                                                // cacheAtt.put(groupId,
                                                // value);
                                                i++;
                                            }
                                            cacheAtt.put(groupId, attsInfo);
                                            // distinctAtt=cacheAtt.get(groupId);
                                            // logger.debug("Already exists->"
                                            // + distinctAtt);
                                        }
                                        int len = attsInfo.length;
                                        for (int i = 0; i < len; i++) {
                                            // logger.debug("$$$$$$->" +
                                            // attsInfo[i].name + "$$$$" +
                                            // attsInfo[i].type);
                                            if (!attsInfo[i].type.equalsIgnoreCase("integer")
                                                    && !attsInfo[i].type.equalsIgnoreCase("real")) {

                                                XPathExpression valPath = cacheXpathAttValue
                                                        .get(attsInfo[i].name);
                                                String val = (String) valPath.evaluate(node,
                                                        XPathConstants.STRING);
                                                // logger.debug("$$$$$$->" +
                                                // "STRING->" + val + "");
                                                addIndexField(d, (i + 1) + "", val, true, false, true);
                                            } else {
                                                XPathExpression valPath = cacheXpathAttValue
                                                        .get(attsInfo[i].name);
                                                String valS = (String) valPath.evaluate(node,
                                                        XPathConstants.STRING);
                                                valS = valS.trim();
                                                // logger.debug("Integer->"
                                                // + valS);
                                                int val = 0;
                                                if (valS == null || valS.equalsIgnoreCase("")
                                                        || valS.equalsIgnoreCase("NaN")) {
                                                    valS = "0";
                                                }
                                                // sort numbers as strings
                                                // logger.debug("class->" +
                                                // attsInfo[i].name
                                                // +"value->##"+ valS +
                                                // "##");
                                                BigDecimal num = new BigDecimal(valS);
                                                num = num.multiply(new BigDecimal(100));
                                                int taux = num.toBigInteger().intValue();
                                                valS = String.format("%07d", taux);
                                                // logger.debug("Integer->"
                                                // + valS + "position->"
                                                // +(i+1)+"integer");
                                                addIndexField(d, (i + 1) + "", valS, true, false, true);
                                                // addIntIndexField(d,
                                                // (i+1)+"integer", new
                                                // BigInteger(valS),false,
                                                // true);
                                                //
                                            }
                                        }

                                    } else {
                                        // logger.debug("There is NO special treatment for this field->"
                                        // + field.name);
                                    }
                                }
                            } catch (XPathExpressionException x) {
                                String xmlError = PrintUtils.printNodeInfo((NodeInfo) node, config);
                                logger.error("XML DB->[{}]", xmlError);
                                logger.error("Caught an exception while indexing expression [" + field.path
                                        + "] for document ["
                                        + ((NodeInfo) source).getStringValue().substring(0, 20) + "...]", x);
                                throw new Exception("Xml:" + xmlError, x);
                            }
                        }
                    }

                    documentNodes = null;
                    source = null;
                    reader = null;
                    xml = null;
                    countNodes++;
                    // logger.debug("count->[{}]", countNodes);

                    // facet tests

                    docBuilder.setCategoryPaths(sampleCategories);
                    docBuilder.build(d);

                    addIndexDocument(w, d);

                }
            }
            logger.debug("until now it were processed->[{}]", pageNumber * pageSizeDefault);
            pageNumber++;
            if (coll != null) {
                try {
                    // coll.close();
                } catch (Exception e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
            set = null;

        }

        setCountDocuments(countNodes);
        // add metadata to the lucene index
        Map<String, String> map = new HashMap<String, String>();
        map.put("numberDocs", Integer.toString(countNodes));
        map.put("date", Long.toString(System.nanoTime()));
        // logger.debug(Application.getInstance().getComponent("XmlDbConnectionPool").getMetaDataInformation());
        // I cannot call directly
        // getComponent("XmlDbConnectionPool").getMetaDataInformation(),
        // because I can be working in a did
        String dbInfo = ((XmlDbConnectionPool) Application.getInstance().getComponent("XmlDbConnectionPool"))
                .getDBInfo(dbHost, dbPort, dbPassword, dbName);

        map.put("DBInfo", dbInfo);
        // facet
        taxoWriter.commit();
        taxoWriter.close();
        commitIndex(w, map);

    } catch (Exception x) {
        logger.error("Caught an exception:", x);
        w.close();
        throw x;
    }
}