JavaScript function to convert UTF8 string between fullwidth and halfwidth forms

Question

EDIT: Thanks to GOTO 0, I now know exactly what I my question is called.

I need a JavaScript function to convert from UTF-8 fullwidth form to halfwidth form for ASCII.

Javascript strings already handle unicode so what problem are you actually trying to solve? Do you really need to process binary data yourself in javascript? — jfriend00
– jfriend00, Commented Dec 10, 2013 at 5:11
That's not "decoding" so much as "mapping." Why do you need to do this at all? This smells like an XY problem. — Matt Ball
– Matt Ball, Commented Dec 10, 2013 at 5:11
Whatever you call it, whether "decoding" or "mapping", you can tell that ！ａｂｃ　ＡＢＣ！ is different from !abc ABC!, and this conversion function IS what I'm asking for. — xpt
– xpt, Commented Dec 10, 2013 at 5:34
There is no straightforward conversion function for this, and building one can be complex. There is not always an inherent connection between different Unicode characters that look almost the same, like ａ and a That's why it's really worth looking at why you need this in the first place, there may be an easier way — Pekka
– Pekka, Commented Dec 10, 2013 at 5:46
... but if you really just want to replace that specific range of characters, developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/… should do (use the actual characters in an UTF-8 encoded HTML file and you should be fine) — Pekka
– Pekka, Commented Dec 10, 2013 at 5:48

GOTO 0 · Accepted Answer · 2013-12-10 11:04:10Z

23

Apperently, you want to convert halfwidth and fullwidth form characters to their equivalent basic latin forms. If this is correct, you can do a replacement using a regular expression. Something like this should work:

var x = "！ａｂｃ　ＡＢＣ！";
var y = x.replace(
    /[\uff01-\uff5e]/g,
    function(ch) { return String.fromCharCode(ch.charCodeAt(0) - 0xfee0); }
    );

Where x is your input string and y is the output.

edited Dec 10, 2013 at 11:04

answered Dec 10, 2013 at 7:14

GOTO 0

48.8k25 gold badges140 silver badges165 bronze badges

Sign up to request clarification or add additional context in comments.

3 Comments

xpt Over a year ago

thank you. Would this regexp replace be faster than a for loop?

GOTO 0 Over a year ago

@xpt It surely is on my browser. I made a test jsfiddle here: jsfiddle.net/K9b56. You can also change the input string to fit your requirements and see how both methods perform.

xpt Over a year ago

Oh, thanks a lot for the neat solution and proof (>10x faster, amazing). I believe everyone would choose this solution. But for myself, for educational purpose, I'll choose Rezigned's as the answer. I wish I can choose more than one as the answer. Thanks again. I believe other people would benefit from your answer.

7vujy0f0hy · Accepted Answer · 2017-12-29 15:08:40Z

Year 2018 answer

Many years later – and it’s still impossible to find on the Internet a function that does this. So I wrote mine. (Nearly learned Japanese and Korean to get to this point.)

Simple version

Latin range only.

var shiftCharCode = Δ => c => String.fromCharCode(c.charCodeAt(0) + Δ);
var toFullWidth = str => str.replace(/[!-~]/g, shiftCharCode(0xFEE0));
var toHalfWidth = str => str.replace(/[！-～]/g, shiftCharCode(-0xFEE0));

Complete version

Let me know if I missed any character.

(function () {
    let charsets = {
        latin: {halfRE: /[!-~]/g, fullRE: /[！-～]/g, delta: 0xFEE0},
        hangul1: {halfRE: /[ﾡ-ﾾ]/g, fullRE: /[ᆨ-ᇂ]/g, delta: -0xEDF9},
        hangul2: {halfRE: /[ￂ-ￜ]/g, fullRE: /[ᅡ-ᅵ]/g, delta: -0xEE61},
        kana: {delta: 0,
            half: "｡｢｣､･ｦｧｨｩｪｫｬｭｮｯｰｱｲｳｴｵｶｷｸｹｺｻｼｽｾｿﾀﾁﾂﾃﾄﾅﾆﾇﾈﾉﾊﾋﾌﾍﾎﾏﾐﾑﾒﾓﾔﾕﾖﾗﾘﾙﾚﾛﾜﾝﾞﾟ", 
            full: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシ" + 
                "スセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゛゜"},
        extras: {delta: 0,
            half: "¢£¬¯¦¥₩\u0020|←↑→↓■°", 
            full: "￠￡￢￣￤￥￦\u3000￨￩￪￫￬￭￮"}
    };
    let toFull = set => c => set.delta ? 
        String.fromCharCode(c.charCodeAt(0) + set.delta) : 
        [...set.full][[...set.half].indexOf(c)];
    let toHalf = set => c => set.delta ? 
        String.fromCharCode(c.charCodeAt(0) - set.delta) : 
        [...set.half][[...set.full].indexOf(c)];
    let re = (set, way) => set[way + "RE"] || new RegExp("[" + set[way] + "]", "g");
    let sets = Object.keys(charsets).map(i => charsets[i]);
    window.toFullWidth = str0 => 
        sets.reduce((str,set) => str.replace(re(set, "half"), toFull(set)), str0);
    window.toHalfWidth = str0 => 
        sets.reduce((str,set) => str.replace(re(set, "full"), toHalf(set)), str0);
})();

/* Example starts here: */
var set = prompt("Enter a couple of comma-separated strings (half or full-width):", 
    ["aouäöü123", "'\"?:", "¢£¥₩↑→", "ｺﾝﾆﾁﾊ", "ﾡﾢￂￃ"].join()).split(",");
var steps = [set, set.map(toFullWidth), set.map(toFullWidth).map(toHalfWidth)];
var tdHTML = str => `<td>${str}</td>`;
var stepsHTML = steps.map(step => step.map(tdHTML).join(""));
var rows = document.getElementsByTagName("tr");
[...rows].forEach((row,i) => row.insertAdjacentHTML("beforeEnd", stepsHTML[i]));

th, td {border: 1px solid lightgrey; padding: 0.2em;}
th {text-align: left;}
table {border-collapse: collapse;}

<table>
    <tr><th scope="row">Input:</th></tr>
    <tr><th scope="row">Full-width:</th></tr>
    <tr><th scope="row">Half-width:</th></tr>
</table>

Meanwhile I found out that the Hangul maps in my code are faulty due to some gaps in Unicode (broken 1:1 mapping). Hang on until I fix them (not today) – or fix them on your own. Or just delete the hangul1 and hangul2 entries entirely as a temporary measure.

Rezigned · Accepted Answer · 2013-12-11 04:09:40Z

8

Try this

function toASCII(chars) {
    var ascii = '';
    for(var i=0, l=chars.length; i<l; i++) {
        var c = chars[i].charCodeAt(0);

        // make sure we only convert half-full width char
        if (c >= 0xFF00 && c <= 0xFFEF) {
           c = 0xFF & (c + 0x20);
        }

        ascii += String.fromCharCode(c);
    }

    return ascii;
}

// example
toASCII("ＡＢＣ"); // returns 'ABC' 0x41

edited Dec 11, 2013 at 4:09

answered Dec 10, 2013 at 6:08

Rezigned

4,9501 gold badge22 silver badges19 bronze badges

1 Comment

xpt Over a year ago

Thanks you Rezigned. Can you add a loop to deal with string please? I'd assume looping with your function would still be faster than the regexp replace.

peterhpchen · Accepted Answer · 2019-10-23 04:10:13Z

3

The answer of GOTO 0 is very useful, but I also need convert space from fullwidth to halfwidth.

So below is my code:

const halfwidthValue = value
      .replace(/[\uff01-\uff5e]/g, fullwidthChar => String.fromCharCode(fullwidthChar.charCodeAt(0) - 0xfee0))
      .replace(/\u3000/g, '\u0020');

answered Oct 23, 2019 at 4:10

peterhpchen

1987 bronze badges

Comments

Lav Shinde · Accepted Answer · 2019-02-13 05:59:43Z

The given solutions do not work for all the cases of Full-Width to Half-Width conversion of Kana (eg. デジタル　is not converted properly). I have made a function for converting Zenkaku to Hankaku Katakana, Hope it helps.

function convertToHalfWidth(string) {
  let characters = getCharacters(string);
  let halfWidthString = ''
  characters.forEach(character => {
    halfWidthString += mapToHankaku(character);
  });
  return halfWidthString;
}

function getCharacters(string) {
   return string.split("");
}

function mapToHankaku(character) {
  let zenHanMap = getZenkakuToHankakuMap();
  if (typeof zenHanMap[character] === 'undefined') {
    return character;
  } else {
    return zenHanMap[character];
  }
}

function getZenkakuToHankakuMap() {
  let zenHanMap = {
    'ァ': 'ｧ',
    'ア': 'ｱ',
    'ィ': 'ｨ',
    'イ': 'ｲ',
    'ゥ': 'ｩ',
    'ウ': 'ｳ',
    'ェ': 'ｪ',
    'エ': 'ｴ',
    'ォ': 'ｫ',
    'オ': 'ｵ',
    'カ': 'ｶ',
    'ガ': 'ｶﾞ',
    'キ': 'ｷ',
    'ギ': 'ｷﾞ',
    'ク': 'ｸ',
    'グ': 'ｸﾞ',
    'ケ': 'ｹ',
    'ゲ': 'ｹﾞ',
    'コ': 'ｺ',
    'ゴ': 'ｺﾞ',
    'サ': 'ｻ',
    'ザ': 'ｻﾞ',
    'シ': 'ｼ',
    'ジ': 'ｼﾞ',
    'ス': 'ｽ',
    'ズ': 'ｽﾞ',
    'セ': 'ｾ',
    'ゼ': 'ｾﾞ',
    'ソ': 'ｿ',
    'ゾ': 'ｿﾞ',
    'タ': 'ﾀ',
    'ダ': 'ﾀﾞ',
    'チ': 'ﾁ',
    'ヂ': 'ﾁﾞ',
    'ッ': 'ｯ',
    'ツ': 'ﾂ',
    'ヅ': 'ﾂﾞ',
    'テ': 'ﾃ',
    'デ': 'ﾃﾞ',
    'ト': 'ﾄ',
    'ド': 'ﾄﾞ',
    'ナ': 'ﾅ',
    'ニ': 'ﾆ',
    'ヌ': 'ﾇ',
    'ネ': 'ﾈ',
    'ノ': 'ﾉ',
    'ハ': 'ﾊ',
    'バ': 'ﾊﾞ',
    'パ': 'ﾊﾟ',
    'ヒ': 'ﾋ',
    'ビ': 'ﾋﾞ',
    'ピ': 'ﾋﾟ',
    'フ': 'ﾌ',
    'ブ': 'ﾌﾞ',
    'プ': 'ﾌﾟ',
    'ヘ': 'ﾍ',
    'ベ': 'ﾍﾞ',
    'ペ': 'ﾍﾟ',
    'ホ': 'ﾎ',
    'ボ': 'ﾎﾞ',
    'ポ': 'ﾎﾟ',
    'マ': 'ﾏ',
    'ミ': 'ﾐ',
    'ム': 'ﾑ',
    'メ': 'ﾒ',
    'モ': 'ﾓ',
    'ャ': 'ｬ',
    'ヤ': 'ﾔ',
    'ュ': 'ｭ',
    'ユ': 'ﾕ',
    'ョ': 'ｮ',
    'ヨ': 'ﾖ',
    'ラ': 'ﾗ',
    'リ': 'ﾘ',
    'ル': 'ﾙ',
    'レ': 'ﾚ',
    'ロ': 'ﾛ',
    'ヮ': '',
    'ワ': 'ﾜ',
    // 'ヰ': 'ﾞ  ﾟ',
    // 'ヱ': '',
    'ヲ': 'ｦ',
    'ン': 'ﾝ',
    'ヴ': 'ｳﾞ',
    // 'ヵ': '',
    // 'ヶ': '',
    // 'ヷ': '',
    // 'ヸ': '',
    // 'ヹ': '',
    // 'ヺ': '',
    '・': '･',
    'ー': 'ｰ',
    // 'ヽ': '',
    // 'ヾ': '',
    // 'ヿ': '',
  };
  return zenHanMap;
}

Use as follows convertToHalfWidth('デジタル');

You can pass the result of this function to the function mentioned by GOTO 0 and get the complete Half width result for the Japanese Language

Reference: https://en.wikipedia.org/wiki/Katakana#Unicode

Christophe Vidal · Accepted Answer · 2025-07-08 13:06:20Z

0

might be worth asking why you want to transform full-width to half-width. Because there is a built-in function to transform half-width to full-width which is used to normalize data, among others in database, it's simply normalize('NKFC', ...)

const input = 'ｶﾀｶﾅ'; // Half-width kana
const normalized = input.normalize('NFKC');

console.log(normalized);

answered Jul 8 at 13:06

Christophe Vidal

1,9421 gold badge19 silver badges13 bronze badges

Collectives™ on Stack Overflow

JavaScript function to convert UTF8 string between fullwidth and halfwidth forms

6 Answers 6

3 Comments

Year 2018 answer

Simple version

Complete version

1 Comment

1 Comment

Comments

Comments

Comments

Your Answer

Linked

Hot Network Questions

Collectives™ on Stack Overflow

6 Answers 6

3 Comments

Year 2018 answer

Simple version

Complete version

1 Comment

1 Comment

Comments

Comments

Comments

Your Answer

Sign up or log in

Post as a guest

Linked

Related