List of usage examples for org.jdom2.output Format getPrettyFormat
public static Format getPrettyFormat()
From source file:eu.himeros.digitaledition.AlignedQuotationParser.java
License:Open Source License
public static void align(String inFileName, String outFileName) throws Exception { AlignedQuotationParser aqp = new AlignedQuotationParser(); Element rootOut = aqp.parse(inFileName); //e.g. xxx002_001_ft-xi_frag.xml XMLOutputter xop = new XMLOutputter( Format.getPrettyFormat().setEncoding("UTF-8").setLineSeparator("\n").setIndent(" ")); String output = xop.outputString(rootOut); System.out.println(output);/*w w w . ja va2 s .c o m*/ }
From source file:eu.himeros.hocr.FlatXml.java
License:Open Source License
private void init(File inFile, File outFile) throws Exception { SAXBuilder builder = new SAXBuilder(); Document doc = builder.build(inFile); Element root = doc.getRootElement(); Namespace oldns = root.getNamespace(); Element newRoot = new Element("html", "http://www.w3.org/1999/xhtml"); Namespace xmlns = newRoot.getNamespace(); Element head = root.getChild("head", oldns); head.setNamespace(xmlns);//from w ww . ja v a2s . c o m for (Element child : head.getChildren()) child.setNamespace(xmlns); Element title = new Element("title", xmlns); title.addContent("ocr"); if (head != null) head.addContent(title); Element body = root.getChild("body", oldns); body.setNamespace(xmlns); /*Element oldPage; try{ oldPage=body.getChild("div",xmlns); }catch(Exception ex){ oldPage=new Element("div",xmlns); }*/ Element page = new Element("div", xmlns); page.setAttribute("class", "ocr_page"); page.setAttribute("id", "i" + inFile.getName().substring(1).replace(".html", ".png")); XPathExpression<Element> xpath = XPathFactory.instance().compile("//*[@class='ocr_carea']", Filters.element(), null, Namespace.getNamespace("ns", "http://www.w3.org/1999/xhtml")); List<Element> careaElL = xpath.evaluate(body); for (Element careaEl : careaElL) { page.addContent(new Comment("<div class=\"" + careaEl.getAttributeValue("class") + "\" title=\"" + careaEl.getAttributeValue("title") + "\">")); for (Element pEl : careaEl.getChildren()) { page.addContent(new Comment("<p>")); for (Element lineEl : pEl.getChildren()) { lineEl.removeAttribute("id"); lineEl.setNamespace(xmlns); for (Element child : lineEl.getChildren()) { child.removeAttribute("id"); child.removeAttribute("lang"); child.removeAttribute("lang", xmlns); child.setNamespace(xmlns); } page.addContent(lineEl.clone()); } page.addContent(new Comment("</p>")); } page.addContent(new Comment("</div>")); } //oldPage.detach(); if (body != null) { body.removeContent(); body.addContent(page); } newRoot.addContent(root.removeContent()); doc.detachRootElement(); doc.setRootElement(newRoot); XMLOutputter xmlOutputter = new XMLOutputter(Format.getPrettyFormat()); xmlOutputter.output(doc, new BufferedWriter(new FileWriter(outFile))); }
From source file:eu.himeros.hocr.HocrInfoAggregator.java
License:Open Source License
public void output(String outFileName) { try (BufferedWriter bw = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(outFileName), "UTF-8"))) { xop = new XMLOutputter(Format.getPrettyFormat().setLineSeparator("\n")); makeCompliantHocr();// w w w . j a v a2 s .c o m xop.output(doc, bw); } catch (Exception ex) { ex.printStackTrace(System.err); } }
From source file:eu.himeros.ocr.ngt.NearGroundTruthParser.java
License:Open Source License
public static void align(String inFileName, String outFileName) throws Exception { NearGroundTruthParser ngtp = new NearGroundTruthParser(); Element rootOut = ngtp.parse(inFileName); //e.g. xxx002_001_ft-xi_frag.xml XMLOutputter xop = new XMLOutputter( Format.getPrettyFormat().setEncoding("UTF-8").setLineSeparator("\n").setIndent(" ")); String output = xop.outputString(rootOut); System.out.println(output);/*from w w w . ja va2s . c om*/ }
From source file:eu.optimis.monitoring.amazoncollector.XMLHelper.java
License:Apache License
public static String createDocument(List<Measurement> measurements) { Element root = new Element(ROOT); Document doc = new Document(root); for (Measurement m : measurements) { root.addContent(createMonitoringResource(m)); }// w ww.ja v a2 s. c o m XMLOutputter outputter = new XMLOutputter(); outputter.setFormat(Format.getPrettyFormat()); try { outputter.output(doc, new FileOutputStream("./aws.xml")); } catch (FileNotFoundException ex) { Logger.getLogger(XMLHelper.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(XMLHelper.class.getName()).log(Level.SEVERE, null, ex); } String xmlResult = outputter.outputString(doc); String result = xmlResult.replace("encoding=\"UTF-8\"", ""); //System.out.println (result); return result; }
From source file:eus.ixa.ixa.pipe.convert.AbsaSemEval.java
License:Apache License
public static String nafToAbsa2015(String inputNAF) throws IOException { Path kafPath = Paths.get(inputNAF); KAFDocument kaf = KAFDocument.createFromFile(kafPath.toFile()); Set<String> reviewIds = getReviewIdsFromXpathAttribute(kaf); // root element in ABSA 2015 and 2016 format Element reviewsElem = new Element("Reviews"); Document doc = new Document(reviewsElem); // creating Reviews children of Review for (String reviewId : reviewIds) { Element reviewElem = new Element("Review"); reviewElem.setAttribute("rid", reviewId); Element sentencesElem = new Element("sentences"); // getting the sentences in the review List<List<WF>> sentencesByReview = getSentencesByReview(kaf, reviewId); for (List<WF> sent : sentencesByReview) { String sentId = sent.get(0).getXpath(); Integer sentNumber = sent.get(0).getSent(); // getting text element from word forms in NAF String textString = NAFUtils.getSentenceStringFromWFs(sent); Element sentenceElem = new Element("sentence"); sentenceElem.setAttribute("id", sentId); Element textElem = new Element("text"); textElem.setText(textString); sentenceElem.addContent(textElem); // creating opinions element for sentence List<Opinion> opinionsBySentence = getOpinionsBySentence(kaf, sentNumber); Element opinionsElem = new Element("Opinions"); if (!opinionsBySentence.isEmpty()) { // getting opinion info from NAF Opinion layer for (Opinion opinion : opinionsBySentence) { Element opinionElem = new Element("Opinion"); // String polarity = opinion.getOpinionExpression().getPolarity(); String category = opinion.getOpinionExpression().getSentimentProductFeature(); String targetString = opinion.getStr(); int fromOffset = opinion.getOpinionTarget().getTerms().get(0).getWFs().get(0).getOffset(); List<WF> targetWFs = opinion.getOpinionTarget().getTerms() .get(opinion.getOpinionTarget().getTerms().size() - 1).getWFs(); int toOffset = targetWFs.get(targetWFs.size() - 1).getOffset() + targetWFs.get(targetWFs.size() - 1).getLength(); opinionElem.setAttribute("target", targetString); opinionElem.setAttribute("category", category); // TODO we still do not have polarity here opinionElem.setAttribute("polarity", "na"); opinionElem.setAttribute("from", Integer.toString(fromOffset)); opinionElem.setAttribute("to", Integer.toString(toOffset)); opinionsElem.addContent(opinionElem); }/*from w w w .jav a 2s. c o m*/ } sentenceElem.addContent(opinionsElem); sentencesElem.addContent(sentenceElem); } reviewElem.addContent(sentencesElem); reviewsElem.addContent(reviewElem); } // end of review XMLOutputter xmlOutput = new XMLOutputter(); Format format = Format.getPrettyFormat(); xmlOutput.setFormat(format); return xmlOutput.outputString(doc); }
From source file:eus.ixa.ixa.pipe.convert.AbsaSemEval.java
License:Apache License
public static String nafToAbsa2014(String kafDocument) { KAFDocument kaf = null;/*from ww w . ja va 2 s .c o m*/ try { Path kafPath = Paths.get(kafDocument); kaf = KAFDocument.createFromFile(kafPath.toFile()); } catch (IOException e) { e.printStackTrace(); } Element sentencesElem = new Element("sentences"); Document doc = new Document(sentencesElem); for (List<WF> sent : kaf.getSentences()) { String sentId = sent.get(0).getXpath(); Integer sentNumber = sent.get(0).getSent(); // getting text element from WFs in NAF String textString = NAFUtils.getSentenceStringFromWFs(sent); Element sentenceElem = new Element("sentence"); sentenceElem.setAttribute("id", sentId); Element textElem = new Element("text"); textElem.setText(textString); sentenceElem.addContent(textElem); // creating opinions element for sentence List<Opinion> opinionsBySentence = getOpinionsBySentence(kaf, sentNumber); if (!opinionsBySentence.isEmpty()) { Element aspectTerms = new Element("aspectTerms"); // getting opinion info from NAF Opinion layer for (Opinion opinion : opinionsBySentence) { String polarity = ""; String targetString = opinion.getStr(); int fromOffset = opinion.getOpinionTarget().getTerms().get(0).getWFs().get(0).getOffset(); List<WF> targetWFs = opinion.getOpinionTarget().getTerms() .get(opinion.getOpinionTarget().getTerms().size() - 1).getWFs(); int toOffset = targetWFs.get(targetWFs.size() - 1).getOffset() + targetWFs.get(targetWFs.size() - 1).getLength(); Element aspectTerm = new Element("aspectTerm"); aspectTerm.setAttribute("term", targetString); aspectTerm.setAttribute("polarity", polarity); aspectTerm.setAttribute("from", Integer.toString(fromOffset)); aspectTerm.setAttribute("to", Integer.toString(toOffset)); aspectTerms.addContent(aspectTerm); } sentenceElem.addContent(aspectTerms); } sentencesElem.addContent(sentenceElem); } XMLOutputter xmlOutput = new XMLOutputter(); Format format = Format.getPrettyFormat(); xmlOutput.setFormat(format); return xmlOutput.outputString(doc); }
From source file:eus.ixa.ixa.pipe.ml.document.features.DocumentFeatureDescriptor.java
License:Apache License
/** * Generate the XML feature descriptor from the docTrainer.properties file. * // w ww . ja v a 2 s . c om * @param params * the properties file * @return the XML feature descriptor * @throws IOException * if input output fails */ public static String createDocumentFeatureDescriptor(final TrainingParameters params) throws IOException { // <generators> final Element generators = new Element("generators"); final Document doc = new Document(generators); // <custom bagofwords /. if (Flags.isBagOfWordsFeature(params)) { final String tokenFeatureRange = Flags.getBagOfWordsFeaturesRange(params); final Element tokenFeature = new Element("custom"); tokenFeature.setAttribute("class", BagOfWordsFeatureGenerator.class.getName()); tokenFeature.setAttribute("range", tokenFeatureRange); generators.addContent(tokenFeature); System.err.println("-> BOW features added!"); } if (Flags.isTokenClassFeature(params)) { final String tokenClassFeatureRange = Flags.getTokenClassFeaturesRange(params); final Element tokenClassFeature = new Element("custom"); tokenClassFeature.setAttribute("class", DocTokenClassFeatureGenerator.class.getName()); tokenClassFeature.setAttribute("range", tokenClassFeatureRange); generators.addContent(tokenClassFeature); System.err.println("-> Token Class Features added!"); } if (Flags.isOutcomePriorFeature(params)) { final Element outcomePriorFeature = new Element("custom"); outcomePriorFeature.setAttribute("class", DocOutcomePriorFeatureGenerator.class.getName()); generators.addContent(outcomePriorFeature); System.err.println("-> Outcome Prior Features added!"); } if (Flags.isSentenceFeature(params)) { final String beginSentence = Flags.getSentenceFeaturesBegin(params); final String endSentence = Flags.getSentenceFeaturesEnd(params); final Element sentenceFeature = new Element("custom"); sentenceFeature.setAttribute("class", DocSentenceFeatureGenerator.class.getName()); sentenceFeature.setAttribute("begin", beginSentence); sentenceFeature.setAttribute("end", endSentence); generators.addContent(sentenceFeature); System.err.println("-> Sentence Features added!"); } if (Flags.isPrefixFeature(params)) { final String beginPrefix = Flags.getPrefixFeaturesBegin(params); final String endPrefix = Flags.getPrefixFeaturesEnd(params); final Element prefixFeature = new Element("custom"); prefixFeature.setAttribute("class", DocPrefixFeatureGenerator.class.getName()); prefixFeature.setAttribute("begin", beginPrefix); prefixFeature.setAttribute("end", endPrefix); generators.addContent(prefixFeature); System.err.println("-> Prefix Features added!"); } if (Flags.isSuffixFeature(params)) { final String beginSuffix = Flags.getSuffixFeaturesBegin(params); final String endSuffix = Flags.getSuffixFeaturesEnd(params); final Element suffixFeature = new Element("custom"); suffixFeature.setAttribute("class", DocSuffixFeatureGenerator.class.getName()); suffixFeature.setAttribute("begin", beginSuffix); suffixFeature.setAttribute("end", endSuffix); generators.addContent(suffixFeature); System.err.println("-> Suffix Features added!"); } if (Flags.isNgramFeature(params)) { final String charngramRange = Flags.getNgramFeaturesRange(params); final String[] rangeArray = Flags.processNgramRange(charngramRange); final Element charngramFeature = new Element("custom"); charngramFeature.setAttribute("class", NGramFeatureGenerator.class.getName()); charngramFeature.setAttribute("minLength", rangeArray[0]); charngramFeature.setAttribute("maxLength", rangeArray[1]); generators.addContent(charngramFeature); System.err.println("-> Ngram Features added!"); } if (Flags.isCharNgramClassFeature(params)) { final String charngramRange = Flags.getCharNgramFeaturesRange(params); final String[] rangeArray = Flags.processNgramRange(charngramRange); final Element charngramFeature = new Element("custom"); charngramFeature.setAttribute("class", DocCharacterNgramFeatureGenerator.class.getName()); charngramFeature.setAttribute("minLength", rangeArray[0]); charngramFeature.setAttribute("maxLength", rangeArray[1]); generators.addContent(charngramFeature); System.err.println("-> CharNgram Class Features added!"); } // Polarity Dictionary Features if (Flags.isDictionaryPolarityFeatures(params)) { final String dictPath = Flags.getDictionaryPolarityFeatures(params); final List<File> fileList = StringUtils.getFilesInDir(new File(dictPath)); for (final File dictFile : fileList) { final Element dictFeatures = new Element("custom"); dictFeatures.setAttribute("class", DocPolarityDictionaryFeatureGenerator.class.getName()); dictFeatures.setAttribute("dict", IOUtils.normalizeLexiconName(dictFile.getName())); generators.addContent(dictFeatures); } System.err.println("-> Dictionary Features added!"); } // Frequent Word Features if (Flags.isFrequentWordFeatures(params)) { final String dictPath = Flags.getFrequentWordFeatures(params); final List<File> fileList = StringUtils.getFilesInDir(new File(dictPath)); for (final File dictFile : fileList) { final Element dictFeatures = new Element("custom"); dictFeatures.setAttribute("class", FrequentWordFeatureGenerator.class.getName()); dictFeatures.setAttribute("dict", IOUtils.normalizeLexiconName(dictFile.getName())); generators.addContent(dictFeatures); } System.err.println("-> Frequent Word Features added!"); } //Opinion Target Extraction Features if (Flags.isTargetFeatures(params)) { final String targetModelPath = Flags.getTargetFeatures(params); final String targetModelRange = Flags.getTargetFeaturesRange(params); final Element targetClassFeatureElement = new Element("custom"); targetClassFeatureElement.setAttribute("class", DocTargetFeatureGenerator.class.getName()); targetClassFeatureElement.setAttribute("model", IOUtils.normalizeLexiconName(new File(targetModelPath).getName())); targetClassFeatureElement.setAttribute("range", targetModelRange); generators.addContent(targetClassFeatureElement); System.err.println("-> Target Model Features added!"); } // Dictionary Features if (Flags.isDictionaryFeatures(params)) { final String dictPath = Flags.getDictionaryFeatures(params); final String seqCodec = Flags.getSequenceCodec(params); final List<File> fileList = StringUtils.getFilesInDir(new File(dictPath)); for (final File dictFile : fileList) { final Element dictFeatures = new Element("custom"); dictFeatures.setAttribute("class", DocDictionaryFeatureGenerator.class.getName()); dictFeatures.setAttribute("dict", IOUtils.normalizeLexiconName(dictFile.getName())); dictFeatures.setAttribute("seqCodec", seqCodec); generators.addContent(dictFeatures); } System.err.println("-> Dictionary Features added!"); } // Brown clustering features if (Flags.isBrownFeatures(params)) { final String brownClusterPath = Flags.getBrownFeatures(params); final List<File> brownClusterFiles = Flags.getClusterLexiconFiles(brownClusterPath); for (final File brownClusterFile : brownClusterFiles) { // brown bigram class features final Element brownBigramFeatures = new Element("custom"); brownBigramFeatures.setAttribute("class", DocBrownBigramFeatureGenerator.class.getName()); brownBigramFeatures.setAttribute("dict", IOUtils.normalizeLexiconName(brownClusterFile.getName())); //generators.addContent(brownBigramFeatures); // brown token feature final Element brownTokenFeature = new Element("custom"); brownTokenFeature.setAttribute("class", DocBrownTokenFeatureGenerator.class.getName()); brownTokenFeature.setAttribute("dict", IOUtils.normalizeLexiconName(brownClusterFile.getName())); generators.addContent(brownTokenFeature); // brown token class feature final Element brownTokenClassFeature = new Element("custom"); brownTokenClassFeature.setAttribute("class", DocBrownTokenClassFeatureGenerator.class.getName()); brownTokenClassFeature.setAttribute("dict", IOUtils.normalizeLexiconName(brownClusterFile.getName())); //generators.addContent(brownTokenClassFeature); } System.err.println("-> Brown Cluster Features added!"); } // Clark clustering features if (Flags.isClarkFeatures(params)) { final String clarkClusterPath = Flags.getClarkFeatures(params); final List<File> clarkClusterFiles = Flags.getClusterLexiconFiles(clarkClusterPath); for (final File clarkCluster : clarkClusterFiles) { final Element clarkFeatures = new Element("custom"); clarkFeatures.setAttribute("class", DocClarkFeatureGenerator.class.getName()); clarkFeatures.setAttribute("dict", IOUtils.normalizeLexiconName(clarkCluster.getName())); generators.addContent(clarkFeatures); } System.err.println("-> Clark Cluster Features added!"); } // word2vec clustering features if (Flags.isWord2VecClusterFeatures(params)) { final String word2vecClusterPath = Flags.getWord2VecClusterFeatures(params); final List<File> word2vecClusterFiles = Flags.getClusterLexiconFiles(word2vecClusterPath); for (final File word2vecFile : word2vecClusterFiles) { final Element word2vecClusterFeatures = new Element("custom"); word2vecClusterFeatures.setAttribute("class", DocWord2VecClusterFeatureGenerator.class.getName()); word2vecClusterFeatures.setAttribute("dict", IOUtils.normalizeLexiconName(word2vecFile.getName())); generators.addContent(word2vecClusterFeatures); } System.err.println("-> Word2Vec Clusters Features added!"); } // Morphological features if (Flags.isPOSTagModelFeatures(params)) { final String posModelPath = Flags.getPOSTagModelFeatures(params); final String posModelRange = Flags.getPOSTagModelFeaturesRange(params); final Element posTagClassFeatureElement = new Element("custom"); posTagClassFeatureElement.setAttribute("class", DocPOSTagModelFeatureGenerator.class.getName()); posTagClassFeatureElement.setAttribute("model", IOUtils.normalizeLexiconName(new File(posModelPath).getName())); posTagClassFeatureElement.setAttribute("range", posModelRange); generators.addContent(posTagClassFeatureElement); System.err.println("-> POSTagModel Features added!"); } if (Flags.isLemmaModelFeatures(params)) { final String lemmaModelPath = Flags.getLemmaModelFeatures(params); final Element lemmaClassFeatureElement = new Element("custom"); lemmaClassFeatureElement.setAttribute("class", DocLemmaModelFeatureGenerator.class.getName()); lemmaClassFeatureElement.setAttribute("model", IOUtils.normalizeLexiconName(new File(lemmaModelPath).getName())); generators.addContent(lemmaClassFeatureElement); System.err.println("-> LemmaModel Features added!"); } if (Flags.isLemmaDictionaryFeatures(params)) { final String lemmaDictPath = Flags.getLemmaDictionaryFeatures(params); final String[] lemmaDictResources = Flags.getLemmaDictionaryResources(lemmaDictPath); final Element lemmaClassFeatureElement = new Element("custom"); lemmaClassFeatureElement.setAttribute("class", DocLemmaDictionaryFeatureGenerator.class.getName()); lemmaClassFeatureElement.setAttribute("model", IOUtils.normalizeLexiconName(new File(lemmaDictResources[0]).getName())); lemmaClassFeatureElement.setAttribute("dict", IOUtils.normalizeLexiconName(new File(lemmaDictResources[1]).getName())); generators.addContent(lemmaClassFeatureElement); System.err.println("-> LemmaDictionary Features added!"); } final XMLOutputter xmlOutput = new XMLOutputter(); final Format format = Format.getPrettyFormat(); xmlOutput.setFormat(format); return xmlOutput.outputString(doc); }
From source file:eus.ixa.ixa.pipe.ml.features.XMLFeatureDescriptor.java
License:Apache License
/** * Generate the XML feature descriptor from the TrainingParameters prop file. * @param params the properties file//w ww.j a va 2 s . c o m * @return the XML feature descriptor * @throws IOException if input output fails */ public static String createXMLFeatureDescriptor(TrainingParameters params) throws IOException { Element aggGenerators = new Element("generators"); Document doc = new Document(aggGenerators); //<generators> // <cache> // <generators> Element cached = new Element("cache"); Element generators = new Element("generators"); //<window prevLength="2" nextLength="2"> // <token /> //</window> if (Flags.isTokenFeature(params)) { setWindow(params); String tokenFeatureRange = Flags.getTokenFeaturesRange(params); Element tokenFeature = new Element("custom"); tokenFeature.setAttribute("class", TokenFeatureGenerator.class.getName()); tokenFeature.setAttribute("range", tokenFeatureRange); Element tokenWindow = new Element("window"); tokenWindow.setAttribute("prevLength", Integer.toString(leftWindow)); tokenWindow.setAttribute("nextLength", Integer.toString(rightWindow)); tokenWindow.addContent(tokenFeature); generators.addContent(tokenWindow); System.err.println("-> Token features added!: Window range " + leftWindow + ":" + rightWindow); } if (Flags.isTokenClassFeature(params)) { setWindow(params); String tokenClassFeatureRange = Flags.getTokenClassFeaturesRange(params); Element tokenClassFeature = new Element("custom"); tokenClassFeature.setAttribute("class", TokenClassFeatureGenerator.class.getName()); tokenClassFeature.setAttribute("range", tokenClassFeatureRange); Element tokenClassWindow = new Element("window"); tokenClassWindow.setAttribute("prevLength", Integer.toString(leftWindow)); tokenClassWindow.setAttribute("nextLength", Integer.toString(rightWindow)); tokenClassWindow.addContent(tokenClassFeature); generators.addContent(tokenClassWindow); System.err.println("-> Token Class Features added!: Window range " + leftWindow + ":" + rightWindow); } if (Flags.isWordShapeSuperSenseFeature(params)) { setWindow(params); Element wordShapeSuperSenseFeature = new Element("custom"); wordShapeSuperSenseFeature.setAttribute("class", WordShapeSuperSenseFeatureGenerator.class.getName()); Element wordShapeWindow = new Element("window"); wordShapeWindow.setAttribute("prevLength", Integer.toString(leftWindow)); wordShapeWindow.setAttribute("nextLength", Integer.toString(rightWindow)); wordShapeWindow.addContent(wordShapeSuperSenseFeature); generators.addContent(wordShapeWindow); System.err.println( "-> Word Shape SuperSense Features added!: Window range " + leftWindow + ":" + rightWindow); } if (Flags.isOutcomePriorFeature(params)) { Element outcomePriorFeature = new Element("custom"); outcomePriorFeature.setAttribute("class", OutcomePriorFeatureGenerator.class.getName()); generators.addContent(outcomePriorFeature); System.err.println("-> Outcome Prior Features added!"); } if (Flags.isPreviousMapFeature(params)) { Element previousMapFeature = new Element("custom"); previousMapFeature.setAttribute("class", PreviousMapFeatureGenerator.class.getName()); generators.addContent(previousMapFeature); System.err.println("-> Previous Map Features added!"); } if (Flags.isSentenceFeature(params)) { String beginSentence = Flags.getSentenceFeaturesBegin(params); String endSentence = Flags.getSentenceFeaturesEnd(params); Element sentenceFeature = new Element("custom"); sentenceFeature.setAttribute("class", SentenceFeatureGenerator.class.getName()); sentenceFeature.setAttribute("begin", beginSentence); sentenceFeature.setAttribute("end", endSentence); generators.addContent(sentenceFeature); System.err.println("-> Sentence Features added!"); } if (Flags.isPrefixFeature(params)) { String beginPrefix = Flags.getPrefixFeaturesBegin(params); String endPrefix = Flags.getPrefixFeaturesEnd(params); Element prefixFeature = new Element("custom"); prefixFeature.setAttribute("class", PrefixFeatureGenerator.class.getName()); prefixFeature.setAttribute("begin", beginPrefix); prefixFeature.setAttribute("end", endPrefix); generators.addContent(prefixFeature); System.err.println("-> Prefix Features added!"); } if (Flags.isSuffixFeature(params)) { String beginSuffix = Flags.getSuffixFeaturesBegin(params); String endSuffix = Flags.getSuffixFeaturesEnd(params); Element suffixFeature = new Element("custom"); suffixFeature.setAttribute("class", SuffixFeatureGenerator.class.getName()); suffixFeature.setAttribute("begin", beginSuffix); suffixFeature.setAttribute("end", endSuffix); generators.addContent(suffixFeature); System.err.println("-> Suffix Features added!"); } if (Flags.isBigramClassFeature(params)) { Element bigramFeature = new Element("custom"); bigramFeature.setAttribute("class", BigramClassFeatureGenerator.class.getName()); generators.addContent(bigramFeature); System.err.println("-> Bigram Class Features added!"); } if (Flags.isTrigramClassFeature(params)) { Element trigramFeature = new Element("custom"); trigramFeature.setAttribute("class", TrigramClassFeatureGenerator.class.getName()); generators.addContent(trigramFeature); System.err.println("-> Trigram Class Features added!"); } if (Flags.isFourgramClassFeature(params)) { Element fourgramFeature = new Element("custom"); fourgramFeature.setAttribute("class", FourgramClassFeatureGenerator.class.getName()); generators.addContent(fourgramFeature); System.err.println("-> Fourgram Class Features added!"); } if (Flags.isFivegramClassFeature(params)) { Element fivegramFeature = new Element("custom"); fivegramFeature.setAttribute("class", FivegramClassFeatureGenerator.class.getName()); generators.addContent(fivegramFeature); System.err.println("-> Fivegram Class Features added!"); } if (Flags.isCharNgramClassFeature(params)) { String charngramRange = Flags.getCharNgramFeaturesRange(params); String[] rangeArray = Flags.processNgramRange(charngramRange); Element charngramFeature = new Element("custom"); charngramFeature.setAttribute("class", CharacterNgramFeatureGenerator.class.getName()); charngramFeature.setAttribute("minLength", rangeArray[0]); charngramFeature.setAttribute("maxLength", rangeArray[1]); generators.addContent(charngramFeature); System.err.println("-> CharNgram Class Features added!"); } //Dictionary Features if (Flags.isDictionaryFeatures(params)) { setWindow(params); String dictPath = Flags.getDictionaryFeatures(params); String seqCodec = Flags.getSequenceCodec(params); List<File> fileList = StringUtils.getFilesInDir(new File(dictPath)); for (File dictFile : fileList) { Element dictFeatures = new Element("custom"); dictFeatures.setAttribute("class", DictionaryFeatureGenerator.class.getName()); dictFeatures.setAttribute("dict", IOUtils.normalizeLexiconName(dictFile.getName())); dictFeatures.setAttribute("seqCodec", seqCodec); Element dictWindow = new Element("window"); dictWindow.setAttribute("prevLength", Integer.toString(leftWindow)); dictWindow.setAttribute("nextLength", Integer.toString(rightWindow)); dictWindow.addContent(dictFeatures); generators.addContent(dictWindow); } System.err.println("-> Dictionary Features added!"); } //Brown clustering features if (Flags.isBrownFeatures(params)) { setWindow(params); //previous 2 maps features Element prev2MapFeature = new Element("custom"); prev2MapFeature.setAttribute("class", Prev2MapFeatureGenerator.class.getName()); //generators.addContent(prev2MapFeature); //previous map and token feature (in window) Element prevMapTokenFeature = new Element("custom"); prevMapTokenFeature.setAttribute("class", PreviousMapTokenFeatureGenerator.class.getName()); Element prevMapTokenWindow = new Element("window"); prevMapTokenWindow.setAttribute("prevLength", Integer.toString(leftWindow)); prevMapTokenWindow.setAttribute("nextLength", Integer.toString(rightWindow)); prevMapTokenWindow.addContent(prevMapTokenFeature); //generators.addContent(prevMapTokenWindow); //brown clustering features String brownClusterPath = Flags.getBrownFeatures(params); List<File> brownClusterFiles = Flags.getClusterLexiconFiles(brownClusterPath); for (File brownClusterFile : brownClusterFiles) { //brown bigram class features Element brownBigramFeatures = new Element("custom"); brownBigramFeatures.setAttribute("class", BrownBigramFeatureGenerator.class.getName()); brownBigramFeatures.setAttribute("dict", IOUtils.normalizeLexiconName(brownClusterFile.getName())); generators.addContent(brownBigramFeatures); //brown token feature Element brownTokenFeature = new Element("custom"); brownTokenFeature.setAttribute("class", BrownTokenFeatureGenerator.class.getName()); brownTokenFeature.setAttribute("dict", IOUtils.normalizeLexiconName(brownClusterFile.getName())); Element brownTokenWindow = new Element("window"); brownTokenWindow.setAttribute("prevLength", Integer.toString(leftWindow)); brownTokenWindow.setAttribute("nextLength", Integer.toString(rightWindow)); brownTokenWindow.addContent(brownTokenFeature); generators.addContent(brownTokenWindow); //brown token class feature Element brownTokenClassFeature = new Element("custom"); brownTokenClassFeature.setAttribute("class", BrownTokenClassFeatureGenerator.class.getName()); brownTokenClassFeature.setAttribute("dict", IOUtils.normalizeLexiconName(brownClusterFile.getName())); Element brownTokenClassWindow = new Element("window"); brownTokenClassWindow.setAttribute("prevLength", Integer.toString(leftWindow)); brownTokenClassWindow.setAttribute("nextLength", Integer.toString(rightWindow)); brownTokenClassWindow.addContent(brownTokenClassFeature); generators.addContent(brownTokenClassWindow); } System.err.println("-> Brown Cluster Features added!"); } //Clark clustering features if (Flags.isClarkFeatures(params)) { setWindow(params); String clarkClusterPath = Flags.getClarkFeatures(params); List<File> clarkClusterFiles = Flags.getClusterLexiconFiles(clarkClusterPath); for (File clarkCluster : clarkClusterFiles) { Element clarkFeatures = new Element("custom"); clarkFeatures.setAttribute("class", ClarkFeatureGenerator.class.getName()); clarkFeatures.setAttribute("dict", IOUtils.normalizeLexiconName(clarkCluster.getName())); Element clarkWindow = new Element("window"); clarkWindow.setAttribute("prevLength", Integer.toString(leftWindow)); clarkWindow.setAttribute("nextLength", Integer.toString(rightWindow)); clarkWindow.addContent(clarkFeatures); generators.addContent(clarkWindow); } System.err.println("-> Clark Cluster Features added!"); } //word2vec clustering features if (Flags.isWord2VecClusterFeatures(params)) { setWindow(params); String word2vecClusterPath = Flags.getWord2VecClusterFeatures(params); List<File> word2vecClusterFiles = Flags.getClusterLexiconFiles(word2vecClusterPath); for (File word2vecFile : word2vecClusterFiles) { Element word2vecClusterFeatures = new Element("custom"); word2vecClusterFeatures.setAttribute("class", Word2VecClusterFeatureGenerator.class.getName()); word2vecClusterFeatures.setAttribute("dict", IOUtils.normalizeLexiconName(word2vecFile.getName())); Element word2vecClusterWindow = new Element("window"); word2vecClusterWindow.setAttribute("prevLength", Integer.toString(leftWindow)); word2vecClusterWindow.setAttribute("nextLength", Integer.toString(rightWindow)); word2vecClusterWindow.addContent(word2vecClusterFeatures); generators.addContent(word2vecClusterWindow); } System.err.println("-> Word2Vec Clusters Features added!"); } //Morphological features if (Flags.isPOSTagModelFeatures(params)) { setWindow(params); String posModelPath = Flags.getPOSTagModelFeatures(params); String posModelRange = Flags.getPOSTagModelFeaturesRange(params); Element posTagClassFeatureElement = new Element("custom"); posTagClassFeatureElement.setAttribute("class", POSTagModelFeatureGenerator.class.getName()); posTagClassFeatureElement.setAttribute("model", IOUtils.normalizeLexiconName(new File(posModelPath).getName())); posTagClassFeatureElement.setAttribute("range", posModelRange); Element posTagClassFeatureWindow = new Element("window"); posTagClassFeatureWindow.setAttribute("prevLength", Integer.toString(leftWindow)); posTagClassFeatureWindow.setAttribute("nextLength", Integer.toString(rightWindow)); posTagClassFeatureWindow.addContent(posTagClassFeatureElement); generators.addContent(posTagClassFeatureWindow); System.err.println("-> POSTagModel Features added!"); } if (Flags.isPOSDictionaryFeatures(params)) { setWindow(params); String posDictPath = Flags.getPOSDictionaryFeatures(params); Element posDictFeatures = new Element("custom"); posDictFeatures.setAttribute("class", POSDictionaryFeatureGenerator.class.getName()); posDictFeatures.setAttribute("dict", IOUtils.normalizeLexiconName(new File(posDictPath).getName())); Element posDictWindow = new Element("window"); posDictWindow.setAttribute("prevLength", Integer.toString(leftWindow)); posDictWindow.setAttribute("nextLength", Integer.toString(rightWindow)); posDictWindow.addContent(posDictFeatures); generators.addContent(posDictWindow); System.err.println("-> POSDictionary Features added!"); } if (Flags.isLemmaModelFeatures(params)) { setWindow(params); String lemmaModelPath = Flags.getLemmaModelFeatures(params); Element lemmaClassFeatureElement = new Element("custom"); lemmaClassFeatureElement.setAttribute("class", LemmaModelFeatureGenerator.class.getName()); lemmaClassFeatureElement.setAttribute("model", IOUtils.normalizeLexiconName(new File(lemmaModelPath).getName())); Element lemmaClassFeatureWindow = new Element("window"); lemmaClassFeatureWindow.setAttribute("prevLength", Integer.toString(leftWindow)); lemmaClassFeatureWindow.setAttribute("nextLength", Integer.toString(rightWindow)); lemmaClassFeatureWindow.addContent(lemmaClassFeatureElement); generators.addContent(lemmaClassFeatureWindow); System.err.println("-> LemmaModel Features added!"); } if (Flags.isLemmaDictionaryFeatures(params)) { setWindow(params); String lemmaDictPath = Flags.getLemmaDictionaryFeatures(params); String[] lemmaDictResources = Flags.getLemmaDictionaryResources(lemmaDictPath); Element lemmaClassFeatureElement = new Element("custom"); lemmaClassFeatureElement.setAttribute("class", LemmaDictionaryFeatureGenerator.class.getName()); lemmaClassFeatureElement.setAttribute("model", IOUtils.normalizeLexiconName(new File(lemmaDictResources[0]).getName())); lemmaClassFeatureElement.setAttribute("dict", IOUtils.normalizeLexiconName(new File(lemmaDictResources[1]).getName())); Element lemmaClassFeatureWindow = new Element("window"); lemmaClassFeatureWindow.setAttribute("prevLength", Integer.toString(leftWindow)); lemmaClassFeatureWindow.setAttribute("nextLength", Integer.toString(rightWindow)); lemmaClassFeatureWindow.addContent(lemmaClassFeatureElement); generators.addContent(lemmaClassFeatureWindow); System.err.println("-> LemmaDictionary Features added!"); } if (Flags.isMFSFeatures(params)) { setWindow(params); String mfsPath = Flags.getMFSFeatures(params); String[] mfsResources = Flags.getMFSResources(mfsPath); String mfsRange = Flags.getMFSFeaturesRange(params); String seqCodec = Flags.getSequenceCodec(params); Element mfsClassFeatureElement = new Element("custom"); mfsClassFeatureElement.setAttribute("class", MFSFeatureGenerator.class.getName()); mfsClassFeatureElement.setAttribute("model", IOUtils.normalizeLexiconName(new File(mfsResources[0]).getName())); mfsClassFeatureElement.setAttribute("dict", IOUtils.normalizeLexiconName(new File(mfsResources[1]).getName())); mfsClassFeatureElement.setAttribute("mfs", IOUtils.normalizeLexiconName(new File(mfsResources[2]).getName())); mfsClassFeatureElement.setAttribute("range", mfsRange); mfsClassFeatureElement.setAttribute("seqCodec", seqCodec); Element mfsClassFeatureWindow = new Element("window"); mfsClassFeatureWindow.setAttribute("prevLength", Integer.toString(leftWindow)); mfsClassFeatureWindow.setAttribute("nextLength", Integer.toString(rightWindow)); mfsClassFeatureWindow.addContent(mfsClassFeatureElement); generators.addContent(mfsClassFeatureWindow); System.err.println("-> MFS Features added"); } if (Flags.isSuperSenseFeatures(params)) { String mfsPath = Flags.getSuperSenseFeatures(params); String[] mfsResources = Flags.getSuperSenseResources(mfsPath); String mfsRange = Flags.getSuperSenseFeaturesRange(params); String seqCodec = Flags.getSequenceCodec(params); Element mfsClassFeatureElement = new Element("custom"); mfsClassFeatureElement.setAttribute("class", SuperSenseFeatureGenerator.class.getName()); mfsClassFeatureElement.setAttribute("model", IOUtils.normalizeLexiconName(new File(mfsResources[0]).getName())); mfsClassFeatureElement.setAttribute("dict", IOUtils.normalizeLexiconName(new File(mfsResources[1]).getName())); mfsClassFeatureElement.setAttribute("mfs", IOUtils.normalizeLexiconName(new File(mfsResources[2]).getName())); mfsClassFeatureElement.setAttribute("range", mfsRange); mfsClassFeatureElement.setAttribute("seqCodec", seqCodec); generators.addContent(mfsClassFeatureElement); System.err.println("-> SuperSense Features added!"); } if (Flags.isPOSBaselineFeatures(params)) { String beginPrefix = Flags.getPrefixBegin(params); String endPrefix = Flags.getPrefixEnd(params); String beginSuffix = Flags.getSuffixBegin(params); String endSuffix = Flags.getSuffixEnd(params); Element posFeatureElement = new Element("custom"); posFeatureElement.setAttribute("class", POSBaselineContextGenerator.class.getName()); posFeatureElement.setAttribute("prefBegin", beginPrefix); posFeatureElement.setAttribute("prefEnd", endPrefix); posFeatureElement.setAttribute("sufBegin", beginSuffix); posFeatureElement.setAttribute("sufEnd", endSuffix); generators.addContent(posFeatureElement); System.err.println("-> POS Baseline Context Generator added!"); } if (Flags.isLemmaBaselineFeatures(params)) { String beginPrefix = Flags.getPrefixBegin(params); String endPrefix = Flags.getPrefixEnd(params); String beginSuffix = Flags.getSuffixBegin(params); String endSuffix = Flags.getSuffixEnd(params); String posModel = Flags.getLemmaBaselineFeatures(params); String lemmaRange = Flags.getLemmaBaselineFeaturesRange(params); Element lemmaFeatureElement = new Element("custom"); lemmaFeatureElement.setAttribute("class", LemmaBaselineContextGenerator.class.getName()); lemmaFeatureElement.setAttribute("prefBegin", beginPrefix); lemmaFeatureElement.setAttribute("prefEnd", endPrefix); lemmaFeatureElement.setAttribute("sufBegin", beginSuffix); lemmaFeatureElement.setAttribute("sufEnd", endSuffix); lemmaFeatureElement.setAttribute("model", IOUtils.normalizeLexiconName(new File(posModel).getName())); lemmaFeatureElement.setAttribute("range", lemmaRange); generators.addContent(lemmaFeatureElement); System.err.println("-> Lemma Baseline Context Generator added!"); } if (Flags.isChunkBaselineFeatures(params)) { String posModel = Flags.getChunkBaselineFeatures(params); Element chunkFeatureElement = new Element("custom"); chunkFeatureElement.setAttribute("class", ChunkBaselineContextGenerator.class.getName()); chunkFeatureElement.setAttribute("model", IOUtils.normalizeLexiconName(new File(posModel).getName())); generators.addContent(chunkFeatureElement); System.err.println("-> Chunk Baseline Context Generator added!"); } if (Flags.isPredicateContextFeatures(params)) { String predicateContextFile = Flags.getPredicateContextFeatures(params); Element predicateContextFeatureElement = new Element("custom"); predicateContextFeatureElement.setAttribute("class", PredicateContextFeatureGenerator.class.getName()); predicateContextFeatureElement.setAttribute("dict", IOUtils.normalizeLexiconName(new File(predicateContextFile).getName())); generators.addContent(predicateContextFeatureElement); System.err.println("-> Predicate Context Generator added!"); } aggGenerators.addContent(cached); cached.addContent(generators); XMLOutputter xmlOutput = new XMLOutputter(); Format format = Format.getPrettyFormat(); xmlOutput.setFormat(format); return xmlOutput.outputString(doc); }
From source file:eus.ixa.ixa.pipe.nerc.features.XMLFeatureDescriptor.java
License:Apache License
/** * Generate the XML feature descriptor from the TrainingParameters prop file. * @param params the properties file//from w ww.j a v a 2 s . c om * @return the XML feature descriptor * @throws IOException if input output fails */ public static String createXMLFeatureDescriptor(TrainingParameters params) throws IOException { Element aggGenerators = new Element("generators"); Document doc = new Document(aggGenerators); //<generators> // <cache> // <generators> Element cached = new Element("cache"); Element generators = new Element("generators"); //<window prevLength="2" nextLength="2"> // <token /> //</window> if (Flags.isTokenFeature(params)) { setWindow(params); Element tokenFeature = new Element("custom"); tokenFeature.setAttribute("class", TokenFeatureGenerator.class.getName()); Element tokenWindow = new Element("window"); tokenWindow.setAttribute("prevLength", Integer.toString(leftWindow)); tokenWindow.setAttribute("nextLength", Integer.toString(rightWindow)); tokenWindow.addContent(tokenFeature); generators.addContent(tokenWindow); System.err.println("-> Token features added!: Window range " + leftWindow + ":" + rightWindow); } if (Flags.isTokenClassFeature(params)) { setWindow(params); Element tokenClassFeature = new Element("custom"); tokenClassFeature.setAttribute("class", TokenClassFeatureGenerator.class.getName()); Element tokenClassWindow = new Element("window"); tokenClassWindow.setAttribute("prevLength", Integer.toString(leftWindow)); tokenClassWindow.setAttribute("nextLength", Integer.toString(rightWindow)); tokenClassWindow.addContent(tokenClassFeature); generators.addContent(tokenClassWindow); System.err.println("-> Token Class Features added!: Window range " + leftWindow + ":" + rightWindow); } if (Flags.isWordShapeSuperSenseFeature(params)) { setWindow(params); Element wordShapeSuperSenseFeature = new Element("custom"); wordShapeSuperSenseFeature.setAttribute("class", WordShapeSuperSenseFeatureGenerator.class.getName()); Element wordShapeWindow = new Element("window"); wordShapeWindow.setAttribute("prevLength", Integer.toString(leftWindow)); wordShapeWindow.setAttribute("nextLength", Integer.toString(rightWindow)); wordShapeWindow.addContent(wordShapeSuperSenseFeature); generators.addContent(wordShapeWindow); System.err.println( "-> Word Shape SuperSense Features added!: Window range " + leftWindow + ":" + rightWindow); } if (Flags.isOutcomePriorFeature(params)) { Element outcomePriorFeature = new Element("custom"); outcomePriorFeature.setAttribute("class", OutcomePriorFeatureGenerator.class.getName()); generators.addContent(outcomePriorFeature); System.err.println("-> Outcome Prior Features added!"); } if (Flags.isPreviousMapFeature(params)) { Element previousMapFeature = new Element("custom"); previousMapFeature.setAttribute("class", PreviousMapFeatureGenerator.class.getName()); generators.addContent(previousMapFeature); System.err.println("-> Previous Map Features added!"); } if (Flags.isSentenceFeature(params)) { Element sentenceFeature = new Element("custom"); sentenceFeature.setAttribute("class", SentenceFeatureGenerator.class.getName()); sentenceFeature.setAttribute("begin", "true"); sentenceFeature.setAttribute("end", "false"); generators.addContent(sentenceFeature); System.err.println("-> Sentence Features added!"); } if (Flags.isPrefixFeature(params)) { Element prefixFeature = new Element("custom"); prefixFeature.setAttribute("class", Prefix34FeatureGenerator.class.getName()); generators.addContent(prefixFeature); System.err.println("-> Prefix Features added!"); } if (Flags.isSuffixFeature(params)) { Element suffixFeature = new Element("custom"); suffixFeature.setAttribute("class", SuffixFeatureGenerator.class.getName()); generators.addContent(suffixFeature); System.err.println("-> Suffix Features added!"); } if (Flags.isBigramClassFeature(params)) { Element bigramFeature = new Element("custom"); bigramFeature.setAttribute("class", BigramClassFeatureGenerator.class.getName()); generators.addContent(bigramFeature); System.err.println("-> Bigram Class Features added!"); } if (Flags.isTrigramClassFeature(params)) { Element trigramFeature = new Element("custom"); trigramFeature.setAttribute("class", TrigramClassFeatureGenerator.class.getName()); generators.addContent(trigramFeature); System.err.println("-> Trigram Class Features added!"); } if (Flags.isFourgramClassFeature(params)) { Element fourgramFeature = new Element("custom"); fourgramFeature.setAttribute("class", FourgramClassFeatureGenerator.class.getName()); generators.addContent(fourgramFeature); System.err.println("-> Fourgram Class Features added!"); } if (Flags.isFivegramClassFeature(params)) { Element fivegramFeature = new Element("custom"); fivegramFeature.setAttribute("class", FivegramClassFeatureGenerator.class.getName()); generators.addContent(fivegramFeature); System.err.println("-> Fivegram Class Features added!"); } if (Flags.isCharNgramClassFeature(params)) { setNgramRange(params); Element charngramFeature = new Element("custom"); charngramFeature.setAttribute("class", CharacterNgramFeatureGenerator.class.getName()); charngramFeature.setAttribute("minLength", Integer.toString(minCharNgram)); charngramFeature.setAttribute("maxLength", Integer.toString(maxCharNgram)); generators.addContent(charngramFeature); System.err.println("-> CharNgram Class Features added!"); } //Dictionary Features if (Flags.isDictionaryFeatures(params)) { setWindow(params); String dictPath = Flags.getDictionaryFeatures(params); String seqCodec = Flags.getSequenceCodec(params); List<File> fileList = StringUtils.getFilesInDir(new File(dictPath)); for (File dictFile : fileList) { Element dictFeatures = new Element("custom"); dictFeatures.setAttribute("class", DictionaryFeatureGenerator.class.getName()); dictFeatures.setAttribute("dict", InputOutputUtils.normalizeLexiconName(dictFile.getName())); dictFeatures.setAttribute("seqCodec", seqCodec); Element dictWindow = new Element("window"); dictWindow.setAttribute("prevLength", Integer.toString(leftWindow)); dictWindow.setAttribute("nextLength", Integer.toString(rightWindow)); dictWindow.addContent(dictFeatures); generators.addContent(dictWindow); } System.err.println("-> Dictionary Features added!"); } //Brown clustering features if (Flags.isBrownFeatures(params)) { setWindow(params); //previous 2 maps features Element prev2MapFeature = new Element("custom"); prev2MapFeature.setAttribute("class", Prev2MapFeatureGenerator.class.getName()); generators.addContent(prev2MapFeature); //previous map and token feature (in window) Element prevMapTokenFeature = new Element("custom"); prevMapTokenFeature.setAttribute("class", PreviousMapTokenFeatureGenerator.class.getName()); Element prevMapTokenWindow = new Element("window"); prevMapTokenWindow.setAttribute("prevLength", Integer.toString(leftWindow)); prevMapTokenWindow.setAttribute("nextLength", Integer.toString(rightWindow)); prevMapTokenWindow.addContent(prevMapTokenFeature); generators.addContent(prevMapTokenWindow); //brown clustering features String brownClusterPath = Flags.getBrownFeatures(params); List<File> brownClusterFiles = Flags.getClusterLexiconFiles(brownClusterPath); for (File brownClusterFile : brownClusterFiles) { //brown bigram class features Element brownBigramFeatures = new Element("custom"); brownBigramFeatures.setAttribute("class", BrownBigramFeatureGenerator.class.getName()); brownBigramFeatures.setAttribute("dict", InputOutputUtils.normalizeLexiconName(brownClusterFile.getName())); generators.addContent(brownBigramFeatures); //brown token feature Element brownTokenFeature = new Element("custom"); brownTokenFeature.setAttribute("class", BrownTokenFeatureGenerator.class.getName()); brownTokenFeature.setAttribute("dict", InputOutputUtils.normalizeLexiconName(brownClusterFile.getName())); Element brownTokenWindow = new Element("window"); brownTokenWindow.setAttribute("prevLength", Integer.toString(leftWindow)); brownTokenWindow.setAttribute("nextLength", Integer.toString(rightWindow)); brownTokenWindow.addContent(brownTokenFeature); generators.addContent(brownTokenWindow); //brown token class feature Element brownTokenClassFeature = new Element("custom"); brownTokenClassFeature.setAttribute("class", BrownTokenClassFeatureGenerator.class.getName()); brownTokenClassFeature.setAttribute("dict", InputOutputUtils.normalizeLexiconName(brownClusterFile.getName())); Element brownTokenClassWindow = new Element("window"); brownTokenClassWindow.setAttribute("prevLength", Integer.toString(leftWindow)); brownTokenClassWindow.setAttribute("nextLength", Integer.toString(rightWindow)); brownTokenClassWindow.addContent(brownTokenClassFeature); generators.addContent(brownTokenClassWindow); } System.err.println("-> Brown Cluster Features added!"); } //Clark clustering features if (Flags.isClarkFeatures(params)) { setWindow(params); String clarkClusterPath = Flags.getClarkFeatures(params); List<File> clarkClusterFiles = Flags.getClusterLexiconFiles(clarkClusterPath); for (File clarkCluster : clarkClusterFiles) { Element clarkFeatures = new Element("custom"); clarkFeatures.setAttribute("class", ClarkFeatureGenerator.class.getName()); clarkFeatures.setAttribute("dict", InputOutputUtils.normalizeLexiconName(clarkCluster.getName())); Element clarkWindow = new Element("window"); clarkWindow.setAttribute("prevLength", Integer.toString(leftWindow)); clarkWindow.setAttribute("nextLength", Integer.toString(rightWindow)); clarkWindow.addContent(clarkFeatures); generators.addContent(clarkWindow); } System.err.println("-> Clark Cluster Features added!"); } //word2vec clustering features if (Flags.isWord2VecClusterFeatures(params)) { setWindow(params); String word2vecClusterPath = Flags.getWord2VecClusterFeatures(params); List<File> word2vecClusterFiles = Flags.getClusterLexiconFiles(word2vecClusterPath); for (File word2vecFile : word2vecClusterFiles) { Element word2vecClusterFeatures = new Element("custom"); word2vecClusterFeatures.setAttribute("class", Word2VecClusterFeatureGenerator.class.getName()); word2vecClusterFeatures.setAttribute("dict", InputOutputUtils.normalizeLexiconName(word2vecFile.getName())); Element word2vecClusterWindow = new Element("window"); word2vecClusterWindow.setAttribute("prevLength", Integer.toString(leftWindow)); word2vecClusterWindow.setAttribute("nextLength", Integer.toString(rightWindow)); word2vecClusterWindow.addContent(word2vecClusterFeatures); generators.addContent(word2vecClusterWindow); } System.err.println("-> Word2Vec Clusters Features added!"); } //Morphological features if (Flags.isMorphoFeatures(params)) { setWindow(params); String morphoPath = Flags.getMorphoFeatures(params); String[] morphoResources = Flags.getMorphoResources(morphoPath); String morphoRange = Flags.getMorphoFeaturesRange(params); Element morphoClassFeatureElement = new Element("custom"); morphoClassFeatureElement.setAttribute("class", MorphoFeatureGenerator.class.getName()); morphoClassFeatureElement.setAttribute("model", InputOutputUtils.normalizeLexiconName(new File(morphoResources[0]).getName())); morphoClassFeatureElement.setAttribute("dict", InputOutputUtils.normalizeLexiconName(new File(morphoResources[1]).getName())); morphoClassFeatureElement.setAttribute("range", morphoRange); Element morphoClassFeatureWindow = new Element("window"); morphoClassFeatureWindow.setAttribute("prevLength", Integer.toString(leftWindow)); morphoClassFeatureWindow.setAttribute("nextLength", Integer.toString(rightWindow)); morphoClassFeatureWindow.addContent(morphoClassFeatureElement); generators.addContent(morphoClassFeatureWindow); System.err.println("-> Morphological Features added!"); } if (Flags.isMFSFeatures(params)) { setWindow(params); String mfsPath = Flags.getMFSFeatures(params); String[] mfsResources = Flags.getMFSResources(mfsPath); String mfsRange = Flags.getMFSFeaturesRange(params); String seqCodec = Flags.getSequenceCodec(params); Element mfsClassFeatureElement = new Element("custom"); mfsClassFeatureElement.setAttribute("class", MFSFeatureGenerator.class.getName()); mfsClassFeatureElement.setAttribute("model", InputOutputUtils.normalizeLexiconName(new File(mfsResources[0]).getName())); mfsClassFeatureElement.setAttribute("dict", InputOutputUtils.normalizeLexiconName(new File(mfsResources[1]).getName())); mfsClassFeatureElement.setAttribute("mfs", InputOutputUtils.normalizeLexiconName(new File(mfsResources[2]).getName())); mfsClassFeatureElement.setAttribute("range", mfsRange); mfsClassFeatureElement.setAttribute("seqCodec", seqCodec); Element mfsClassFeatureWindow = new Element("window"); mfsClassFeatureWindow.setAttribute("prevLength", Integer.toString(leftWindow)); mfsClassFeatureWindow.setAttribute("nextLength", Integer.toString(rightWindow)); mfsClassFeatureWindow.addContent(mfsClassFeatureElement); generators.addContent(mfsClassFeatureWindow); System.err.println("-> MFS Features added"); } if (Flags.isSuperSenseFeatures(params)) { String mfsPath = Flags.getSuperSenseFeatures(params); String[] mfsResources = Flags.getSuperSenseResources(mfsPath); String mfsRange = Flags.getSuperSenseFeaturesRange(params); String seqCodec = Flags.getSequenceCodec(params); Element mfsClassFeatureElement = new Element("custom"); mfsClassFeatureElement.setAttribute("class", SuperSenseFeatureGenerator.class.getName()); mfsClassFeatureElement.setAttribute("model", InputOutputUtils.normalizeLexiconName(new File(mfsResources[0]).getName())); mfsClassFeatureElement.setAttribute("dict", InputOutputUtils.normalizeLexiconName(new File(mfsResources[1]).getName())); mfsClassFeatureElement.setAttribute("mfs", InputOutputUtils.normalizeLexiconName(new File(mfsResources[2]).getName())); mfsClassFeatureElement.setAttribute("range", mfsRange); mfsClassFeatureElement.setAttribute("seqCodec", seqCodec); generators.addContent(mfsClassFeatureElement); System.err.println("-> SuperSense Features added!"); } aggGenerators.addContent(cached); cached.addContent(generators); XMLOutputter xmlOutput = new XMLOutputter(); Format format = Format.getPrettyFormat(); xmlOutput.setFormat(format); return xmlOutput.outputString(doc); }