Example usage for java.text Normalizer normalize

List of usage examples for java.text Normalizer normalize

Introduction

In this page you can find the example usage for java.text Normalizer normalize.

Prototype

public static String normalize(CharSequence src, Form form) 

Source Link

Document

Normalize a sequence of char values.

Usage

From source file:com.moviejukebox.plugin.KinopoiskPlugin.java

/**
 * Retrieve Kinopoisk matching the specified movie name and year. This routine is base on a Google request.
 *
 * @param movieName//from w ww  .  j  a  v  a 2 s  .  c om
 * @param year
 * @param season
 * @return
 */
public String getKinopoiskId(String movieName, String year, int season) {
    try {
        String kinopoiskId;
        String sb = movieName;
        // Unaccenting letters
        sb = Normalizer.normalize(sb, Normalizer.Form.NFD);
        sb = sb.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");

        sb = "&m_act[find]=" + URLEncoder.encode(sb, "UTF-8").replace(" ", "+");

        if (season != -1) {
            sb = sb + "&m_act[content_find]=serial";
        } else if (StringTools.isValidString(year)) {
            if (year.indexOf('-') > -1) {
                String[] years = year.split("-");
                sb = sb + "&m_act[from_year]=" + years[0];
                sb = sb + "&m_act[to_year]=" + years[1];
            } else {
                sb = sb + "&m_act[year]=" + year;
            }
        }

        sb = "http://kinopoisk.ru/index.php?level=7&from=forma&result=adv&m_act[from]=forma&m_act[what]=content"
                + sb;
        String xml = httpClient.request(sb, CHARSET);

        // Checking for zero results
        if (!xml.contains("class=\"search_results\"")) {
            // Checking direct movie page
            if (xml.contains("class=\"moviename-big\"")) {
                return HTMLTools.extractTag(xml, "id_film = ", ";");
            }
            return Movie.UNKNOWN;
        }

        // Checking if we got the movie page directly
        int beginIndex = xml.indexOf("id_film = ");
        if (beginIndex == -1) {
            // It's search results page, searching a link to the movie page
            beginIndex = xml.indexOf("class=\"search_results\"");
            if (beginIndex == -1) {
                return Movie.UNKNOWN;
            }

            beginIndex = xml.indexOf("/level/1/film/", beginIndex);
            if (beginIndex == -1) {
                return Movie.UNKNOWN;
            }

            StringTokenizer st = new StringTokenizer(xml.substring(beginIndex + 14), "/\"");
            kinopoiskId = st.nextToken();
        } else {
            // It's the movie page
            StringTokenizer st = new StringTokenizer(xml.substring(beginIndex + 10), ";");
            kinopoiskId = st.nextToken();
        }

        if (StringTools.isValidString(kinopoiskId) && StringUtils.isNumeric(kinopoiskId)) {
            return kinopoiskId;
        }
    } catch (IOException error) {
        LOG.error("Failed retreiving Kinopoisk Id for movie : {}", movieName);
        LOG.error("Error : {}", error.getMessage());
    }
    return Movie.UNKNOWN;
}

From source file:com.otaupdater.utils.Utils.java

public static String sanitizeName(String name) {
    if (name == null)
        return "";

    name = Normalizer.normalize(name, Normalizer.Form.NFD);
    name = name.trim();//  w  ww  . j  a v a2s. c  om
    name = name.replaceAll("[^\\p{ASCII}]", "");
    name = name.replaceAll("[ _-]+", "_");
    name = name.replaceAll("(^_|_$)", "");
    name = name.toLowerCase(Locale.US);

    return name;
}

From source file:org.jets3t.service.utils.FileComparer.java

/**
 * Normalize string into "Normalization Form Canonical Decomposition" (NFD).
 *
 * References:/*w  w  w . j  a  v  a2  s. c o  m*/
 * http://stackoverflow.com/questions/3610013
 * http://en.wikipedia.org/wiki/Unicode_equivalence
 *
 * @param str
 * @return string normalized into NFC form.
 */
protected String normalizeUnicode(String str) {
    Normalizer.Form form = Normalizer.Form.NFD;
    if (!Normalizer.isNormalized(str, form)) {
        return Normalizer.normalize(str, form);
    }
    return str;
}

From source file:com.viettel.util.StringUtils.java

public static String unAccent(String s) {
     String temp = Normalizer.normalize(s, Normalizer.Form.NFD);
     Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
     return pattern.matcher(temp).replaceAll("").replaceAll("?", "D").replaceAll("", "d");
 }

From source file:org.nuxeo.ecm.platform.filemanager.TestFileManagerService.java

@Test
public void testCreateExistingBlobWithNonNFCNormalizedFilename() throws Exception {
    // Create doc from NFC normalized filename
    String fileName = " .rtf";
    String nfcNormalizedFileName = Normalizer.normalize(fileName, Normalizer.Form.NFC);
    Blob blob = Blobs.createBlob("Test content", "text/rtf", null, nfcNormalizedFileName);
    service.createDocumentFromBlob(coreSession, blob, workspace.getPathAsString(), true, nfcNormalizedFileName);
    assertNotNull(FileManagerUtils.getExistingDocByFileName(coreSession, workspace.getPathAsString(),
            nfcNormalizedFileName));/*from   www  .ja v  a 2 s.c om*/
    // Check existing doc with non NFC (NFD) normalized filename
    String nfdNormalizedFileName = Normalizer.normalize(fileName, Normalizer.Form.NFD);
    assertNotNull(FileManagerUtils.getExistingDocByFileName(coreSession, workspace.getPathAsString(),
            nfdNormalizedFileName));
}

From source file:br.com.pprv.web.control.beans.tecnica.TecnicaBean.java

/**
 * metodo utilizado para fazer o upload dos arquivos.
 *
 * @param uploadedFile/*from   w  w w  .  j  av  a  2 s  . c  o  m*/
 * @return
 */
public boolean doUpload(UploadedFile uploadedFile) {
    boolean result = false;
    SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd_HHmmss");

    String fileExtension = uploadedFile.getFileName();

    String nmDescTemplate = Normalizer.normalize(tbequipamentoSelected.getNmequipamenta(), Normalizer.Form.NFD);
    nmDescTemplate = nmDescTemplate.replaceAll("[^\\p{ASCII}]", "").replace("\"", "");
    String fileName = nmDescTemplate.toUpperCase() + "_" + sdf.format(new Date())
            + fileExtension.substring(fileExtension.lastIndexOf('.'), fileExtension.length());

    File file = new File(CAMINHO, fileName);

    final TbarquivosEquipamentoPK tbarquivosEquipamentoPK = new TbarquivosEquipamentoPK();
    tbarquivosEquipamentoPK.setIdequipamento(tbequipamentoSelected.getIdequipamento());
    tbarquivosEquipamentoPK.setTmdataupload(new Date());

    final TbarquivosEquipamento tbarquivosEquipamento = new TbarquivosEquipamento();
    tbarquivosEquipamento.setIdusuario(Shareds.getUser());
    tbarquivosEquipamento.setNmarquivo(fileName);
    tbarquivosEquipamento.setTbarquivosEquipamentoPK(tbarquivosEquipamentoPK);
    tbarquivosEquipamento.setTbequipamento(tbequipamentoSelected);

    try {
        try (FileOutputStream fileOutput = new FileOutputStream(file)) {
            fileOutput.write(IOUtils.toByteArray(uploadedFile.getInputstream()));
            fileOutput.flush();
            fileOutput.close();
        }
        result = true;
        if (arquivosEquipamentoLogic.createTbarquivosEquipamento(tbarquivosEquipamento)) {
            tbequipamentoSelected.getTbarquivosEquipamentoList().add(tbarquivosEquipamento);
        }
    } catch (FileNotFoundException ex) {
        AbstractFacesContextUtils.addMessageError("Falha ao encontrar o arquivo.");
        ex.printStackTrace(System.err);
    } catch (IOException ex) {
        AbstractFacesContextUtils.addMessageError("Falha ao abrir o arquivo.");
        ex.printStackTrace(System.err);
    }
    return result;
}

From source file:org.structr.core.function.Functions.java

public static String cleanString(final Object input) {

    if (input == null) {

        return "";
    }/* w  w  w.ja v a  2  s  . co m*/

    String normalized = Normalizer.normalize(input.toString(), Normalizer.Form.NFD).replaceAll("\\<", "")
            .replaceAll("\\>", "").replaceAll("\\.", "").replaceAll("\\'", "-").replaceAll("\\?", "")
            .replaceAll("\\(", "").replaceAll("\\)", "").replaceAll("\\{", "").replaceAll("\\}", "")
            .replaceAll("\\[", "").replaceAll("\\]", "").replaceAll("\\+", "-").replaceAll("/", "-")
            .replaceAll("", "-").replaceAll("\\\\", "-").replaceAll("\\|", "-").replaceAll("'", "-")
            .replaceAll("!", "").replaceAll(",", "").replaceAll("-", " ").replaceAll("_", " ")
            .replaceAll("`", "-");

    String result = normalized.replaceAll("-", " ");
    result = StringUtils.normalizeSpace(result.toLowerCase());
    result = result.replaceAll("[^\\p{ASCII}]", "").replaceAll("\\p{P}", "-").replaceAll("\\-(\\s+\\-)+", "-");
    result = result.replaceAll(" ", "-");

    return result;
}

From source file:org.geosdi.geoplatform.gui.server.service.impl.GPCatalogFinderService.java

public String deAccent(String str) {
    String nfdNormalizedString = Normalizer.normalize(str, Normalizer.Form.NFD);
    Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
    return pattern.matcher(nfdNormalizedString).replaceAll("'");
}

From source file:com.cloudbees.hudson.plugins.folder.ChildNameGeneratorTest.java

private void checkChild(ComputedFolderImpl instance, String idealName) throws IOException {
    String encodedName = encode(idealName);
    FreeStyleProject item = instance.getItem(encodedName);
    assertThat("We have an item for name " + idealName, item, notNullValue());
    assertThat("The root directory of the item for name " + idealName + " is mangled",
            item.getRootDir().getName(), is(mangle(idealName)));
    String altEncoding = Normalizer.normalize(idealName, Normalizer.Form.NFD);
    if (idealName.equals(altEncoding)) {
        altEncoding = Normalizer.normalize(idealName, Normalizer.Form.NFC);
    }/*from   w w w .j  av  a 2  s  . c  o  m*/
    if (!idealName.equals(altEncoding)) {
        File altRootDir = instance.getRootDirFor(altEncoding);
        assertThat("Alternative normalized form: " + altRootDir + " does not exist", altRootDir.isDirectory(),
                is(false));
    }
    File nameFile = new File(item.getRootDir(), ChildNameGenerator.CHILD_NAME_FILE);
    assertThat("We have the " + ChildNameGenerator.CHILD_NAME_FILE + " for the item for name " + idealName,
            nameFile.isFile(), is(true));
    String name = FileUtils.readFileToString(nameFile);
    assertThat("The " + ChildNameGenerator.CHILD_NAME_FILE + " for the item for name " + idealName
            + " contains the encoded name", name, is(encodedName));
}

From source file:com.github.bfour.fpliteraturecollector.service.DefaultLiteratureService.java

/**
 * Determines whether a pair of two Literatures maybe is a duplicate pair.
 * Employs comparative measures that might lead to false positives.
 * //from w w  w  .  j  a  v a2  s  .c  o  m
 * @param litA
 * @param litB
 * @return
 */
private boolean isProbableDuplicate(Literature litA, Literature litB) {

    if (isCertainDuplicate(litA, litB))
        return true;

    // 1 character different for every 14 characters
    if (StringUtils.getLevenshteinDistance(
            Normalizer.normalize(litA.getTitle(), Normalizer.Form.NFD).toLowerCase(),
            Normalizer.normalize(litB.getTitle(), Normalizer.Form.NFD)
                    .toLowerCase()) <= (litA.getTitle().length() / 14))
        return true;

    if (litA.getISBN() != null && litB.getISBN() != null && litA.getISBN().equals(litB.getISBN()))
        return true;

    return false;

}