Example usage for org.apache.commons.configuration BaseConfiguration BaseConfiguration

List of usage examples for org.apache.commons.configuration BaseConfiguration BaseConfiguration

Introduction

In this page you can find the example usage for org.apache.commons.configuration BaseConfiguration BaseConfiguration.

Prototype

BaseConfiguration

Source Link

Usage

From source file:revaligner.service.FileAligner.java

private Configuration createConfigForSegmenter(boolean segmentParagraph, String srcLangcode) throws Exception {
    Configuration config = new BaseConfiguration();
    config.setProperty("segmenter.default.paragraphsegmenter", Boolean.valueOf(segmentParagraph));
    config.setProperty("segmenter.strategy.trados.rule1enabled", "true");

    config.setProperty("segmenter.default.strategy", "trados");
    config.setProperty("segmenter.default.simplifier", "fontformat");
    config.setProperty("breakiterator.strategy.sentence.trados.esps",
            createESMString(Locale.makeLocale(srcLangcode)));

    String abbv = getAbbreviations(srcLangcode);
    if (!abbv.equals("")) {
        config.setProperty("abbreviations." + srcLangcode.split("-")[0], abbv);
    }//from  w w w .  j  a va  2  s .  co  m
    return config;
}

From source file:revaligner.service.FileAligner.java

public ArrayList<String[]> populateSourceTxlf() throws Exception {
    System.out.println("populating source txlf with aligned segments....");

    ArrayList<String[]> reportStates = new ArrayList();

    ExtractionSupportImpl extractionSupportImpl = new ExtractionSupportImpl(
            Locale.makeLocale(this.sourcelanguage), Locale.makeLocale(this.targetlanguage));
    Configuration config = new BaseConfiguration();
    config.setProperty("extraction.tokens.extract", "all");
    extractionSupportImpl.setConfiguration(config);

    Locale locale = Locale.makeLocale(this.sourcelanguage);
    TradosWordCounter wcounter = new TradosWordCounter(locale, config);

    org.dom4j.Document document_src = XmlParser.parseXmlFile(this.sourcetxlf_nonSeg);
    org.dom4j.Element root_src = document_src.getRootElement();

    org.dom4j.Document document_src_ingt = XmlParser.parseXmlFile(this.sourcetxlf_nonSeg);
    org.dom4j.Element root_src_ingt = document_src_ingt.getRootElement();

    org.dom4j.Document document_src_seg = XmlParser.parseXmlFile(this.sourcetxlf_seg);
    org.dom4j.Element root_src_seg = document_src_seg.getRootElement();

    List<com.aspose.words.Node> list_source = root_src
            .selectNodes("//*[name() = 'group'][@restype = 'x-paragraph']");
    List<com.aspose.words.Node> list_source_ingt = root_src_ingt
            .selectNodes("//*[name() = 'group'][@restype = 'x-paragraph']");
    List<com.aspose.words.Node> list_source_seg = root_src_seg
            .selectNodes("//*[name() = 'group'][@restype = 'x-paragraph']");
    int count = 0;
    int totalWC = 0;

    org.dom4j.Document document = XmlParser.parseXmlFile(this.alignedfile);
    List<org.dom4j.Element> groups = document.getRootElement().element("aligned").elements("group");
    for (int i = 0; i < groups.size(); i++) {
        org.dom4j.Element group = (org.dom4j.Element) groups.get(i);
        List<org.dom4j.Element> units = group.elements("unit");
        if (((org.dom4j.Element) units.get(0)).element("src_para") != null) {
            boolean isParaAllSegmented = true;
            for (int j = 0; j < units.size(); j++) {
                if (((org.dom4j.Element) units.get(j)).attributeValue("alignsegs").equals("false")) {
                    isParaAllSegmented = false;
                    break;
                }/*from w w w . jav  a 2 s  .  c o m*/
            }
            String srcTextAccepted = group.elementText("text").replaceAll("(?s)<del>.*?</del>", "")
                    .replaceAll("<(/)*ins>", "");
            if (!extractionSupportImpl.isExtractable(srcTextAccepted)) {
                if (isParaAllSegmented) {
                    for (int j = 0; j < units.size(); j++) {
                        org.dom4j.Element unit = (org.dom4j.Element) units.get(j);
                        List<org.dom4j.Element> srcsegs = unit.element("src_para").element("segments")
                                .elements("src_seg");
                        List<org.dom4j.Element> trgsegs = unit.element("trg_para").element("segments")
                                .elements("trg_seg");
                        for (int x = 0; x < srcsegs.size(); x++) {
                            String[] s = new String[7];
                            s[0] = ((org.dom4j.Element) srcsegs.get(x)).getText();
                            if (x >= trgsegs.size()) {
                                s[1] = "";
                            } else {
                                org.dom4j.Element trgseg = (org.dom4j.Element) trgsegs.get(x);
                                String id = trgseg.attributeValue("id");
                                if (id.startsWith("n - ")) {
                                    s[1] = trgseg.getText();
                                } else {
                                    List tmp_contents = new ArrayList();
                                    if (id.contains(" - ")) {
                                        int start = Integer.parseInt(id.split(" - ")[0]);
                                        int end = Integer.parseInt(id.split(" - ")[1]);
                                        tmp_contents.addAll(
                                                (Collection) this.txlftrgsegmap.get(Integer.valueOf(start)));
                                        for (int su = start + 1; su <= end; su++) {
                                            boolean isprevendofpara = ((boolean[]) this.txlftrgsewsmap
                                                    .get(Integer.valueOf(su - 1)))[1];
                                            boolean iscurrentstartofpara = ((boolean[]) this.txlftrgsewsmap
                                                    .get(Integer.valueOf(su)))[0];
                                            if ((isprevendofpara) && (iscurrentstartofpara)) {
                                                List prevseg = (List) this.txlftrgsegmap
                                                        .get(Integer.valueOf(su - 1));
                                                int previdx = -1;
                                                for (int prev = 0; prev < prevseg.size(); prev++) {
                                                    org.dom4j.Node prevnode = (org.dom4j.Node) prevseg
                                                            .get(prev);
                                                    if (prevnode.getNodeType() == 1) {
                                                        org.dom4j.Element prevnode_e = (org.dom4j.Element) prevnode;
                                                        if ((prevnode_e.getName().equals("ws")) && (prevnode_e
                                                                .attributeValue("pos").equals("after"))) {
                                                            previdx = prevseg.size() - prev;
                                                        }
                                                    }
                                                }
                                                if (previdx != -1) {
                                                    tmp_contents.remove(tmp_contents.size() - previdx);
                                                }
                                                List currseg = (List) this.txlftrgsegmap
                                                        .get(Integer.valueOf(su));
                                                int curridx = -1;
                                                for (int curr = 0; curr < currseg.size(); curr++) {
                                                    org.dom4j.Node currnode = (org.dom4j.Node) currseg
                                                            .get(curr);
                                                    if (currnode.getNodeType() == 1) {
                                                        org.dom4j.Element currnode_e = (org.dom4j.Element) currnode;
                                                        if ((currnode_e.getName().equals("ws")) && (currnode_e
                                                                .attributeValue("pos").equals("before"))) {
                                                            curridx = curr;
                                                        }
                                                    }
                                                }
                                                if (curridx != -1) {
                                                    currseg.remove(curridx);
                                                }
                                                if (Locale.makeLocale(this.targetlanguage).isFarEast()) {
                                                    tmp_contents.addAll(currseg);
                                                } else {
                                                    tmp_contents.add(DocumentHelper.createText(" "));
                                                    tmp_contents.addAll(currseg);
                                                }
                                            } else {
                                                tmp_contents.addAll((Collection) this.txlftrgsegmap
                                                        .get(Integer.valueOf(su)));
                                            }
                                        }
                                    } else {
                                        tmp_contents.addAll((Collection) this.txlftrgsegmap
                                                .get(Integer.valueOf(Integer.parseInt(id))));
                                    }
                                    s[1] = trimText(assembleText(tmp_contents).replace("<br> ", "&#8629;<br>"),
                                            false)[0];
                                }
                            }
                            s[2] = "N/A";
                            s[3] = "N/A";
                            s[4] = ((org.dom4j.Element) srcsegs.get(x)).attributeValue("tctype");
                            s[5] = "0";
                            s[6] = "";
                            reportStates.add(s);
                        }
                    }
                } else {
                    String[] s = new String[7];
                    for (int j = 0; j < units.size(); j++) {
                        s[0] = ((org.dom4j.Element) units.get(j)).element("src_para").elementText("text");
                        if (((org.dom4j.Element) units.get(j)).element("trg_para") != null) {
                            s[1] = ((org.dom4j.Element) units.get(j)).element("trg_para").elementText("text");
                        } else {
                            s[1] = "";
                        }
                        s[2] = "N/A";
                        s[3] = "N/A";
                        s[4] = ((org.dom4j.Element) units.get(j)).element("src_para").attributeValue("tctype");
                        s[5] = "0";
                        s[6] = "";
                        reportStates.add(s);
                    }
                }
            } else {
                if (isParaAllSegmented) {
                    org.dom4j.Element txlf_group = (org.dom4j.Element) list_source.get(count);
                    org.dom4j.Element txlf_group_ingt = (org.dom4j.Element) list_source_ingt.get(count);
                    org.dom4j.Element txlf_group_seg = (org.dom4j.Element) list_source_seg.get(count);
                    txlf_group.setContent(txlf_group_seg.content());
                    List transunits = txlf_group.elements("trans-unit");

                    txlf_group_ingt.setContent(txlf_group_seg.content());
                    List transunits_ingt = txlf_group_ingt.elements("trans-unit");

                    ArrayList<String> mergedsegtext = new ArrayList();
                    ArrayList<List> merged_trg_contents = new ArrayList();
                    ArrayList<String> mergedsegtctypes = new ArrayList();

                    ArrayList<String> keys = new ArrayList();
                    ArrayList<String> key_left = new ArrayList();
                    ArrayList<String> key_right = new ArrayList();
                    ArrayList<String> org_keys = new ArrayList();
                    ArrayList<String> trg_keys = new ArrayList();
                    ArrayList<List> trg_contents = new ArrayList();
                    ArrayList<String> src_tctypes = new ArrayList();
                    ArrayList<String> src_review_stats = new ArrayList();
                    ArrayList<String> src_ignore_stats = new ArrayList();
                    ArrayList<Integer> edited_idx = new ArrayList();
                    for (int j = 0; j < units.size(); j++) {
                        org.dom4j.Element unit = (org.dom4j.Element) units.get(j);
                        org.dom4j.Element src_para = unit.element("src_para");
                        org.dom4j.Element trg_para = unit.element("trg_para");
                        List src_segs = src_para.element("segments").elements("src_seg");
                        for (int z = 0; z < src_segs.size(); z++) {
                            org.dom4j.Element src_seg = (org.dom4j.Element) src_segs.get(z);
                            src_tctypes.add(src_seg.attributeValue("tctype"));
                            src_review_stats.add(src_seg.attributeValue("needreview"));
                            src_ignore_stats.add(src_seg.attributeValue("ignored"));
                            keys.add(src_seg.getText().replaceAll("(?s)<del>.*?</del>", "")
                                    .replaceAll("<(/)*ins>", "").replace("<br>", "").trim());
                            org_keys.add(src_seg.getText());
                            if (trg_para != null) {
                                List trg_segs = trg_para.element("segments").elements("trg_seg");
                                if (((org.dom4j.Element) trg_segs.get(z)).attributeValue("edited")
                                        .equals("true")) {
                                    edited_idx.add(Integer.valueOf(trg_contents.size()));
                                }
                                if (trg_segs.size() > z) {
                                    trg_keys.add(((org.dom4j.Element) trg_segs.get(z)).getText());
                                    String id = ((org.dom4j.Element) trg_segs.get(z)).attributeValue("id");
                                    if (id.startsWith("n - ")) {
                                        trg_contents.add(new ArrayList());
                                    } else {
                                        List tmp_contents = new ArrayList();
                                        if (id.contains(" - ")) {
                                            int start = Integer.parseInt(id.split(" - ")[0]);
                                            int end = Integer.parseInt(id.split(" - ")[1]);
                                            tmp_contents.addAll((Collection) this.txlftrgsegmap
                                                    .get(Integer.valueOf(start)));
                                            for (int su = start + 1; su <= end; su++) {
                                                boolean isprevendofpara = ((boolean[]) this.txlftrgsewsmap
                                                        .get(Integer.valueOf(su - 1)))[1];
                                                boolean iscurrentstartofpara = ((boolean[]) this.txlftrgsewsmap
                                                        .get(Integer.valueOf(su)))[0];
                                                if ((isprevendofpara) && (iscurrentstartofpara)) {
                                                    List prevseg = (List) this.txlftrgsegmap
                                                            .get(Integer.valueOf(su - 1));
                                                    int previdx = -1;
                                                    for (int prev = 0; prev < prevseg.size(); prev++) {
                                                        org.dom4j.Node prevnode = (org.dom4j.Node) prevseg
                                                                .get(prev);
                                                        if (prevnode.getNodeType() == 1) {
                                                            org.dom4j.Element prevnode_e = (org.dom4j.Element) prevnode;
                                                            if ((prevnode_e.getName().equals("ws"))
                                                                    && (prevnode_e.attributeValue("pos")
                                                                            .equals("after"))) {
                                                                previdx = prevseg.size() - prev;
                                                            }
                                                        }
                                                    }
                                                    if (previdx != -1) {
                                                        tmp_contents.remove(tmp_contents.size() - previdx);
                                                    }
                                                    List currseg = (List) this.txlftrgsegmap
                                                            .get(Integer.valueOf(su));
                                                    int curridx = -1;
                                                    for (int curr = 0; curr < currseg.size(); curr++) {
                                                        org.dom4j.Node currnode = (org.dom4j.Node) currseg
                                                                .get(curr);
                                                        if (currnode.getNodeType() == 1) {
                                                            org.dom4j.Element currnode_e = (org.dom4j.Element) currnode;
                                                            if ((currnode_e.getName().equals("ws"))
                                                                    && (currnode_e.attributeValue("pos")
                                                                            .equals("before"))) {
                                                                curridx = curr;
                                                            }
                                                        }
                                                    }
                                                    if (curridx != -1) {
                                                        currseg.remove(curridx);
                                                    }
                                                    if (Locale.makeLocale(this.targetlanguage).isFarEast()) {
                                                        tmp_contents.addAll(currseg);
                                                    } else {
                                                        tmp_contents.add(DocumentHelper.createText(" "));
                                                        tmp_contents.addAll(currseg);
                                                    }
                                                } else {
                                                    tmp_contents.addAll((Collection) this.txlftrgsegmap
                                                            .get(Integer.valueOf(su)));
                                                }
                                            }
                                        } else {
                                            tmp_contents.addAll((Collection) this.txlftrgsegmap
                                                    .get(Integer.valueOf(Integer.parseInt(id))));
                                        }
                                        trg_contents.add(tmp_contents);
                                    }
                                } else {
                                    trg_keys.add("");
                                    trg_contents.add(new ArrayList());
                                }
                            } else {
                                trg_keys.add("");
                                trg_contents.add(new ArrayList());
                            }
                            if ((z == 0) && (z == src_segs.size() - 1)) {
                                key_left.add(src_para.attributeValue("lefttrim"));
                                key_right.add(src_para.attributeValue("righttrim"));
                            } else if (z == 0) {
                                key_left.add(src_para.attributeValue("lefttrim"));
                                key_right.add("true");
                            } else if (z == src_segs.size() - 1) {
                                key_left.add("true");
                                key_right.add(src_para.attributeValue("righttrim"));
                            } else {
                                key_left.add("true");
                                key_right.add("true");
                            }
                        }
                    }
                    SegmenterFactory factory = new SegmenterFactory();
                    Configuration segconfig = createConfigForSegmenter(false, this.sourcelanguage);
                    Segmenter segmenter = factory.getSegmenter("trados", Locale.makeLocale(this.sourcelanguage),
                            segconfig);
                    List<String> finsegs = segmenter
                            .segment(group.elementText("text").replaceAll("(?s)<del>.*?</del>", "")
                                    .replaceAll("<(/)*ins>", "").replace("<br>", "").replace("&lt;", "<")
                                    .replace("&gt;", ">").replace("&amp;", "&"));
                    ArrayList<ArrayList<Integer>> indices = new ArrayList();
                    int key_start_index = 0;
                    for (int k = 0; k < finsegs.size(); k++) {
                        String finsegtext = ((String) finsegs.get(k)).replace("&", "&amp;").replace("<", "&lt;")
                                .replace(">", "&gt;");

                        String combined_key = "";
                        ArrayList<Integer> indice = new ArrayList();
                        for (int x = key_start_index; x < keys.size(); x++) {
                            combined_key = combined_key + (String) keys.get(x);

                            indice.add(Integer.valueOf(x));
                            if (combined_key.replace("", " ").trim().replaceAll("(\\s)+", "")
                                    .equals(finsegtext.replace("", " ").trim().replaceAll("(\\s)+", ""))) {
                                indices.add(indice);
                                key_start_index = x + 1;
                                break;
                            }
                        }
                    }
                    ArrayList<Integer> merged_edited_idx = new ArrayList();
                    ArrayList<String[]> statss = new ArrayList();
                    for (int m = 0; m < indices.size(); m++) {
                        boolean iscontentsuseable = true;
                        ArrayList<Integer> temp_indice = (ArrayList) indices.get(m);
                        String temp_src = "";
                        String temp_org_src = "";
                        String temp_trg = "";
                        List temp_trg_content = new ArrayList();
                        int id = 1;
                        int rid = 1;
                        int bxrid = 1;
                        int bptrid = 1;
                        int bxid = 1;
                        int bptid = 1;
                        HashMap<String, String> map_rid = new HashMap();
                        String temp_tctype = (String) src_tctypes
                                .get(((Integer) temp_indice.get(0)).intValue());
                        String temp_review_stats = (String) src_review_stats
                                .get(((Integer) temp_indice.get(0)).intValue());
                        for (Iterator localIterator = temp_indice.iterator(); localIterator.hasNext();) {
                            int it = ((Integer) localIterator.next()).intValue();
                            temp_tctype = temp_tctype.equals(src_tctypes.get(it)) ? temp_tctype : "MIX";
                            temp_review_stats = ((String) src_review_stats.get(it)).equals("true") ? "true"
                                    : temp_review_stats.equals("true") ? "true" : "false";
                            String temp_ignore_stats = (String) src_ignore_stats.get(it);
                            if (edited_idx.contains(Integer.valueOf(it))) {
                                iscontentsuseable = false;
                            }
                            temp_src = temp_src + (String) keys.get(it);
                            temp_org_src = temp_org_src + (String) org_keys.get(it);
                            if (temp_ignore_stats.equals("true")) {
                                temp_trg = temp_trg + "[skipseg]";
                                temp_trg_content.add(DocumentHelper.createText("[skipseg]"));
                            } else {
                                temp_trg = temp_trg + (String) trg_keys.get(it);

                                List trg_content = (List) trg_contents.get(it);
                                for (int nc = 0; nc < trg_content.size(); nc++) {
                                    org.dom4j.Node raw = (org.dom4j.Node) trg_content.get(nc);
                                    if (raw.getNodeType() == 3) {
                                        temp_trg_content.add(raw);
                                    } else if (raw.getNodeType() == 1) {
                                        org.dom4j.Element rawe = (org.dom4j.Element) raw;
                                        if (rawe.getName().equals("source")) {
                                            for (int ncc = 0; ncc < rawe.content().size(); ncc++) {
                                                org.dom4j.Node node = (org.dom4j.Node) rawe.content().get(ncc);
                                                if (node.getNodeType() == 3) {
                                                    temp_trg_content.add(node);
                                                } else if (node.getNodeType() == 1) {
                                                    org.dom4j.Element e = (org.dom4j.Element) node;
                                                    if (!e.getName().equals("x")) {
                                                        if (!e.getName().equals("ph")) {
                                                            if (e.getName().equals("bx")) {
                                                                if ((e.attribute("fake") != null)
                                                                        && (e.attributeValue("fake")
                                                                                .equals("true"))
                                                                        && (temp_indice.indexOf(
                                                                                Integer.valueOf(it)) != 0)) {
                                                                    continue;
                                                                }
                                                                if ((e.attribute("fake") == null)
                                                                        || (!e.attributeValue("fake")
                                                                                .equals("true"))) {
                                                                }
                                                            } else if (e.getName().equals("ex")) {
                                                                if ((e.attribute("fake") != null)
                                                                        && (e.attributeValue("fake")
                                                                                .equals("true"))
                                                                        && (temp_indice.indexOf(Integer.valueOf(
                                                                                it)) != temp_indice.size()
                                                                                        - 1)) {
                                                                    continue;
                                                                }
                                                                if ((e.attribute("fake") == null)
                                                                        || (!e.attributeValue("fake")
                                                                                .equals("true"))) {
                                                                }
                                                            } else if (e.getName().equals("bpt")) {
                                                                if ((e.attribute("fake") != null)
                                                                        && (e.attributeValue("fake")
                                                                                .equals("true"))
                                                                        && (temp_indice.indexOf(
                                                                                Integer.valueOf(it)) != 0)) {
                                                                    continue;
                                                                }
                                                                if ((e.attribute("fake") == null)
                                                                        || (!e.attributeValue("fake")
                                                                                .equals("true"))) {
                                                                }
                                                            } else if (e.getName().equals("ept")) {
                                                                if ((e.attribute("fake") != null)
                                                                        && (e.attributeValue("fake")
                                                                                .equals("true"))
                                                                        && (temp_indice.indexOf(Integer.valueOf(
                                                                                it)) != temp_indice.size()
                                                                                        - 1)) {
                                                                    continue;
                                                                }
                                                                if ((e.attribute("fake") == null)
                                                                        || (!e.attributeValue("fake")
                                                                                .equals("true"))) {
                                                                }
                                                            }
                                                        }
                                                    }
                                                    if (e.attribute("fake") != null) {
                                                        e.remove(e.attribute("fake"));
                                                    }
                                                    temp_trg_content.add(e);
                                                }
                                            }
                                        } else if (rawe.getName().equals("ws")) {
                                            String pos = rawe.attributeValue("pos");
                                            if (pos.equals("before")) {
                                                for (int ncc = 0; ncc < rawe.content().size(); ncc++) {
                                                    org.dom4j.Node node = (org.dom4j.Node) rawe.content()
                                                            .get(ncc);
                                                    if (node.getNodeType() == 3) {
                                                        temp_trg_content.add(0, node);
                                                    } else if (node.getNodeType() == 1) {
                                                        org.dom4j.Element e = (org.dom4j.Element) node;
                                                        if ((!e.getName().equals("x"))
                                                                && (e.getName().equals("it"))) {
                                                            if (e.attributeValue("pos").equals("open")) {
                                                                if ((e.attribute("fake") != null)
                                                                        && (e.attributeValue("fake")
                                                                                .equals("true"))
                                                                        && (temp_indice.indexOf(
                                                                                Integer.valueOf(it)) != 0)) {
                                                                    continue;
                                                                }
                                                                if (e.getText().equals("")) {
                                                                    e.setName("bx");
                                                                } else {
                                                                    e.setName("bpt");
                                                                }
                                                                if ((e.attribute("fake") == null)
                                                                        || (!e.attributeValue("fake")
                                                                                .equals("true"))) {
                                                                }
                                                            } else if (e.attributeValue("pos")
                                                                    .equals("close")) {
                                                                if ((e.attribute("fake") != null)
                                                                        && (e.attributeValue("fake")
                                                                                .equals("true"))
                                                                        && (temp_indice.indexOf(Integer.valueOf(
                                                                                it)) != temp_indice.size()
                                                                                        - 1)) {
                                                                    continue;
                                                                }
                                                                if (e.getText().equals("")) {
                                                                    e.setName("ex");
                                                                } else {
                                                                    e.setName("ept");
                                                                }
                                                                e.remove(e.attribute("ctype"));
                                                                if ((e.attribute("fake") == null)
                                                                        || (!e.attributeValue("fake")
                                                                                .equals("true"))) {
                                                                }
                                                            }
                                                            e.remove(e.attribute("pos"));
                                                        } else {
                                                            if (e.attribute("fake") != null) {
                                                                e.remove(e.attribute("fake"));
                                                            }
                                                            temp_trg_content.add(0, e);
                                                        }
                                                    }
                                                }
                                            } else if (pos.equals("after")) {
                                                for (int ncc = 0; ncc < rawe.content().size(); ncc++) {
                                                    org.dom4j.Node node = (org.dom4j.Node) rawe.content()
                                                            .get(ncc);
                                                    if (node.getNodeType() == 3) {
                                                        temp_trg_content.add(node);
                                                    } else if (node.getNodeType() == 1) {
                                                        org.dom4j.Element e = (org.dom4j.Element) node;
                                                        if ((!e.getName().equals("x"))
                                                                && (e.getName().equals("it"))) {
                                                            if (e.attributeValue("pos").equals("open")) {
                                                                if ((e.attribute("fake") != null)
                                                                        && (e.attributeValue("fake")
                                                                                .equals("true"))
                                                                        && (temp_indice.indexOf(
                                                                                Integer.valueOf(it)) != 0)) {
                                                                    continue;
                                                                }
                                                                if (e.getText().equals("")) {
                                                                    e.setName("bx");
                                                                } else {
                                                                    e.setName("bpt");
                                                                }
                                                                if ((e.attribute("fake") == null)
                                                                        || (!e.attributeValue("fake")
                                                                                .equals("true"))) {
                                                                }
                                                            } else if (e.attributeValue("pos")
                                                                    .equals("close")) {
                                                                if ((e.attribute("fake") != null)
                                                                        && (e.attributeValue("fake")
                                                                                .equals("true"))
                                                                        && (temp_indice.indexOf(Integer.valueOf(
                                                                                it)) != temp_indice.size()
                                                                                        - 1)) {
                                                                    continue;
                                                                }
                                                                if (e.getText().equals("")) {
                                                                    e.setName("ex");
                                                                } else {
                                                                    e.setName("ept");
                                                                }
                                                                e.remove(e.attribute("ctype"));
                                                                if ((e.attribute("fake") == null)
                                                                        || (!e.attributeValue("fake")
                                                                                .equals("true"))) {
                                                                }
                                                            }
                                                            e.remove(e.attribute("pos"));
                                                        } else {
                                                            if (e.attribute("fake") != null) {
                                                                e.remove(e.attribute("fake"));
                                                            }
                                                            temp_trg_content.add(e);
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                        String[] stats = TrackChangeHelper.getTxlfTrgStatsFromTCType(temp_tctype, temp_trg);
                        if ((stats[0].equals("1")) && (temp_review_stats.equals("true"))) {
                            stats[2] = "fuzzy-match";
                        }
                        String[] s = new String[7];
                        s[0] = temp_org_src.replace("<br> ", "&#8629;<br>");
                        if (iscontentsuseable) {
                            s[1] = trimText(assembleText(temp_trg_content).replace("<br> ", "&#8629;<br>"),
                                    false)[0];
                        } else {
                            s[1] = temp_trg.replace("<br> ", "&#8629;<br>");
                        }
                        if (s[1].contains("[skipseg]")) {
                            if (s[1].replace("[skipseg]", "").trim().equals("")) {
                                s[1] = "";
                                temp_trg_content = new ArrayList();
                                temp_trg_content.add(DocumentHelper.createText(""));
                                temp_trg = "";
                                stats[0] = "1";
                                stats[1] = "translated";
                                stats[2] = "exact-match";
                            } else {
                                s[1] = s[1].replace("[skipseg]", "");
                                temp_trg_content = replacetextinDomObj(temp_trg_content);
                                temp_trg = temp_trg.replace("[skipseg]", "");
                            }
                        }
                        s[2] = stats[0];
                        s[3] = stats[2];
                        s[4] = temp_tctype;
                        wcounter = new TradosWordCounter(locale, config);
                        wcounter.countText(((org.dom4j.Element) transunits.get(m)).element("source").getText());
                        s[5] = Integer.toString(wcounter.getWordCount());
                        s[6] = "";
                        totalWC += wcounter.getWordCount();
                        reportStates.add(s);
                        if (extractionSupportImpl.isExtractable(temp_src)) {
                            mergedsegtext.add(temp_trg);
                            if (!iscontentsuseable) {
                                merged_edited_idx.add(Integer.valueOf(merged_trg_contents.size()));
                            }
                            merged_trg_contents.add(temp_trg_content);
                            mergedsegtctypes.add(temp_tctype);
                            statss.add(stats);
                        }
                    }
                    for (int t = 0; t < transunits.size(); t++) {
                        org.dom4j.Element trans_unit = (org.dom4j.Element) transunits.get(t);
                        org.dom4j.Element trans_unit_ignt = (org.dom4j.Element) transunits_ingt.get(t);
                        trans_unit.addAttribute("gs4tr:editStatus", "leveraged");
                        org.dom4j.Element source = trans_unit.element("source");

                        org.dom4j.Element target = trans_unit.addElement("target");
                        trans_unit.elements().add(source.indexOf(source.getParent()) + 2, target.clone());
                        trans_unit.remove(target);
                        target = trans_unit.element("target");

                        org.dom4j.Element target_ignt = trans_unit_ignt.addElement("target");
                        trans_unit_ignt.elements().add(source.indexOf(source.getParent()) + 2,
                                target_ignt.clone());
                        trans_unit_ignt.remove(target_ignt);
                        target_ignt = trans_unit_ignt.element("target");
                        if (merged_edited_idx.contains(Integer.valueOf(t))) {
                            target.setText(((String) mergedsegtext.get(t)).replace("&lt;", "<")
                                    .replace("&gt;", ">").replace("&amp;", "&").trim());
                            target_ignt.setText(((String) mergedsegtext.get(t)).replace("&lt;", "<")
                                    .replace("&gt;", ">").replace("&amp;", "&").trim());
                        } else {
                            target.setContent(trimContents((List) merged_trg_contents.get(t)));
                            target_ignt.setContent(trimContents((List) merged_trg_contents.get(t)));
                            if (!((String[]) statss.get(t))[0].equals("75")) {
                                org.dom4j.Element source_ingt = trans_unit_ignt.element("source");
                                source_ingt.setContent(trimContents((List) merged_trg_contents.get(t)));
                            }
                        }
                        String[] stats = (String[]) statss.get(t);
                        if (stats[0].equals("1")) {
                            trans_unit.addAttribute("gs4tr:locked", "true");
                        }
                        target.addAttribute("gs4tr:score", stats[0]);
                        target.addAttribute("state", stats[1]);
                        target.addAttribute("state-qualifier", stats[2]);
                        if (stats[0].equals("0")) {
                            trans_unit.remove(target);
                        }
                    }
                } else {
                    String trgtext = "";
                    if (((org.dom4j.Element) units.get(0)).element("trg_para") != null) {
                        trgtext = ((org.dom4j.Element) units.get(0)).element("trg_para").elementText("text");
                    }
                    String temp_tctype = ((org.dom4j.Element) units.get(0)).element("src_para")
                            .attributeValue("tctype");
                    for (int j = 1; j < units.size(); j++) {
                        org.dom4j.Element prev_unit = (org.dom4j.Element) units.get(j - 1);
                        org.dom4j.Element unit = (org.dom4j.Element) units.get(j);
                        String src_tctype = unit.element("src_para").attributeValue("tctype");
                        temp_tctype = temp_tctype.equals(src_tctype) ? temp_tctype : "MIX";
                        if (unit.element("trg_para") != null) {
                            String Rtrim = prev_unit.element("src_para").attributeValue("righttrim");
                            String Ltrim = unit.element("src_para").attributeValue("lefttrim");
                            if ((Rtrim.equals("true")) || (Ltrim.equals("true"))) {
                                trgtext = trgtext + " " + unit.element("trg_para").elementText("text");
                            } else {
                                trgtext = trgtext + unit.element("trg_para").elementText("text");
                            }
                        }
                    }
                    org.dom4j.Element txlf_group = (org.dom4j.Element) list_source.get(count);
                    org.dom4j.Element trans_unit = txlf_group.element("trans-unit");
                    trans_unit.addAttribute("gs4tr:editStatus", "leveraged");
                    org.dom4j.Element source = trans_unit.element("source");

                    org.dom4j.Element target = trans_unit.addElement("target");
                    trans_unit.elements().add(source.indexOf(source.getParent()) + 2, target.clone());
                    trans_unit.remove(target);
                    target = trans_unit.element("target");

                    int lb_cnt = 0;
                    String surfix = trgtext;
                    while (surfix.indexOf("<br> ") != -1) {
                        lb_cnt++;
                        int pos = surfix.indexOf("<br> ");
                        String prefix = surfix.substring(0, pos);
                        target.addText(prefix.replace("&lt;", "<").replace("&gt;", ">").replace("&amp;", "&"));
                        org.dom4j.Element x = target.addElement("x");
                        x.addAttribute("ctype", "lb");
                        x.addAttribute("id", Integer.toString(lb_cnt));
                        x.addAttribute("equiv-text", " ");
                        surfix = surfix.substring(pos + 5, surfix.length());
                    }
                    target.addText(surfix.replace("&lt;", "<").replace("&gt;", ">").replace("&amp;", "&"));
                    String[] stats = TrackChangeHelper.getTxlfTrgStatsFromTCType(temp_tctype, trgtext);
                    target.addAttribute("gs4tr:score", stats[0]);
                    target.addAttribute("state", stats[1]);
                    target.addAttribute("state-qualifier", stats[2]);

                    String[] s = new String[7];
                    s[0] = group.elementText("text").replace("<br> ", "&#8629;<br>");
                    s[1] = trgtext.replace("<br> ", "&#8629;<br>");
                    s[2] = stats[0];
                    s[3] = stats[2];
                    s[4] = temp_tctype;
                    wcounter = new TradosWordCounter(locale, config);
                    wcounter.countText(source.getText());
                    s[5] = Integer.toString(wcounter.getWordCount());
                    s[6] = "";
                    totalWC += wcounter.getWordCount();
                    reportStates.add(s);
                }
                count++;
            }
        }
    }
    root_src.element("file").addAttribute("gs4tr:wordcount", Integer.toString(totalWC));
    fixTxlfTrgTags(document_src);

    this.populatedsourcetxlf = (this.sourcefile + ".txlf");
    if (new File(this.populatedsourcetxlf).exists()) {
        new File(this.populatedsourcetxlf).delete();
    }
    OutputStreamWriter writer = new OutputStreamWriter(
            new BufferedOutputStream(new FileOutputStream(this.populatedsourcetxlf)), "UTF8");
    document_src.write(writer);
    writer.close();

    removeBlankLinesAndNameSpace(this.populatedsourcetxlf);

    root_src_ingt.element("file").addAttribute("gs4tr:wordcount", Integer.toString(totalWC));
    fixTxlfTrgTags(document_src_ingt);

    String ingtfile = this.sourcefile + ".ingt.txlf";
    if (new File(ingtfile).exists()) {
        new File(ingtfile).delete();
    }
    OutputStreamWriter writer_ingt = new OutputStreamWriter(
            new BufferedOutputStream(new FileOutputStream(ingtfile)), "UTF8");
    document_src_ingt.write(writer_ingt);
    writer_ingt.close();

    removeBlankLinesAndNameSpace(ingtfile);

    return reportStates;
}

From source file:revaligner.service.FileAligner.java

public void createTargetFile() throws Exception {
    System.out.println("creating target file....");
    if (!isReplaceStyleAvailable(new com.aspose.words.Document(this.backupsourcefile))) {
        throw new Exception("cannot create target file, replace styles not available");
    }/*from   w  ww . j  a v  a 2s . c o  m*/
    Configuration config = new BaseConfiguration();
    MergeDOC merger = new MergeDOC();
    merger.setConfiguration(config);
    merger.setOverwriteOriginalFile(false);
    XliffDocument xliffDocument = null;

    String tmp_translatedtxlf = this.translatedtxlf + ".tmp";
    FileUtils.copyFile(new File(this.translatedtxlf), new File(tmp_translatedtxlf), true);

    List<String[]> notesinfo = integrateNotesIntoSegments(tmp_translatedtxlf);

    xliffDocument = new XliffDocument(new File(tmp_translatedtxlf));
    String skeletonFile = this.sourcefile;
    com.aspose.words.Document document = new AsposeFactory()
            .createDocumentInstance(new FileInputStream(skeletonFile));
    Configuration docConfig = xliffDocument.getConfiguration(new OfficeConfigurationConverterImpl());
    WordDocumentAligner aligner = new WordDocumentAligner(document, xliffDocument);
    aligner.addWordConfiguration(docConfig);
    aligner.align();
    String ext = this.sourcefile.substring(this.sourcefile.lastIndexOf('.'), this.sourcefile.length());
    String name = new File(this.sourcefile).getName();
    this.aligneddoc = (new File(this.translatedtxlf).getParent() + File.separator
            + name.substring(0, name.lastIndexOf(".")) + "_" + this.targetlanguage + ext);
    aligner.writeAlignedDocument(new File(this.aligneddoc));

    com.aspose.words.Document trg_doc = new com.aspose.words.Document(this.aligneddoc);
    if (this.replacestyles[1] == doublestrikethrough) {
        for (int i = 0; i < trg_doc.getChildNodes(21, true).getCount(); i++) {
            Run run = (Run) trg_doc.getChildNodes(21, true).get(i);
            if ((run.getFont().getUnderline() == this.replacestyles[0])
                    && (run.getFont().getDoubleStrikeThrough() == true)) {
                run.getFont().setUnderline(1);
                run.getFont().setDoubleStrikeThrough(false);
                run.getFont().setStrikeThrough(true);
            } else if (run.getFont().getUnderline() == this.replacestyles[0]) {
                run.getFont().setUnderline(1);
            } else if (run.getFont().getDoubleStrikeThrough() == true) {
                run.getFont().setDoubleStrikeThrough(false);
                run.getFont().setStrikeThrough(true);
            }
        }
    } else {
        for (int i = 0; i < trg_doc.getChildNodes(21, true).getCount(); i++) {
            Run run = (Run) trg_doc.getChildNodes(21, true).get(i);
            if (run.getFont().getUnderline() == this.replacestyles[2]) {
                run.getFont().setUnderline(1);
                run.getFont().setStrikeThrough(true);
            } else if (run.getFont().getUnderline() == this.replacestyles[0]) {
                run.getFont().setUnderline(1);
            } else if (run.getFont().getUnderline() == this.replacestyles[1]) {
                run.getFont().setUnderline(0);
                run.getFont().setStrikeThrough(true);
            }
        }
    }
    removeIgnoredSegments(trg_doc, Locale.makeLocale(this.targetlanguage));
    removeIgnoredParagraphs(trg_doc);

    insertComments(trg_doc, notesinfo);

    trg_doc.save(this.aligneddoc);
    if (xliffDocument != null) {
        xliffDocument.dispose();
    }

    new File(tmp_translatedtxlf).delete();
}