EDIT: Thanks to GOTO 0, I now know exactly what I my question is called.
I need a JavaScript function to convert from UTF-8 fullwidth form to halfwidth form for ASCII.
EDIT: Thanks to GOTO 0, I now know exactly what I my question is called.
I need a JavaScript function to convert from UTF-8 fullwidth form to halfwidth form for ASCII.
Apperently, you want to convert halfwidth and fullwidth form characters to their equivalent basic latin forms. If this is correct, you can do a replacement using a regular expression. Something like this should work:
var x = "!abc ABC!";
var y = x.replace(
/[\uff01-\uff5e]/g,
function(ch) { return String.fromCharCode(ch.charCodeAt(0) - 0xfee0); }
);
Where x is your input string and y is the output.
Many years later – and it’s still impossible to find on the Internet a function that does this. So I wrote mine. (Nearly learned Japanese and Korean to get to this point.)
Latin range only.
var shiftCharCode = Δ => c => String.fromCharCode(c.charCodeAt(0) + Δ);
var toFullWidth = str => str.replace(/[!-~]/g, shiftCharCode(0xFEE0));
var toHalfWidth = str => str.replace(/[!-~]/g, shiftCharCode(-0xFEE0));
Let me know if I missed any character.
(function () {
let charsets = {
latin: {halfRE: /[!-~]/g, fullRE: /[!-~]/g, delta: 0xFEE0},
hangul1: {halfRE: /[ᄀ-ᄒ]/g, fullRE: /[ᆨ-ᇂ]/g, delta: -0xEDF9},
hangul2: {halfRE: /[ᅡ-ᅵ]/g, fullRE: /[ᅡ-ᅵ]/g, delta: -0xEE61},
kana: {delta: 0,
half: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゙゚",
full: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシ" +
"スセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゛゜"},
extras: {delta: 0,
half: "¢£¬¯¦¥₩\u0020|←↑→↓■°",
full: "¢£¬ ̄¦¥₩\u3000│←↑→↓■○"}
};
let toFull = set => c => set.delta ?
String.fromCharCode(c.charCodeAt(0) + set.delta) :
[...set.full][[...set.half].indexOf(c)];
let toHalf = set => c => set.delta ?
String.fromCharCode(c.charCodeAt(0) - set.delta) :
[...set.half][[...set.full].indexOf(c)];
let re = (set, way) => set[way + "RE"] || new RegExp("[" + set[way] + "]", "g");
let sets = Object.keys(charsets).map(i => charsets[i]);
window.toFullWidth = str0 =>
sets.reduce((str,set) => str.replace(re(set, "half"), toFull(set)), str0);
window.toHalfWidth = str0 =>
sets.reduce((str,set) => str.replace(re(set, "full"), toHalf(set)), str0);
})();
/* Example starts here: */
var set = prompt("Enter a couple of comma-separated strings (half or full-width):",
["aouäöü123", "'\"?:", "¢£¥₩↑→", "コンニチハ", "ᄀ까ᅢ"].join()).split(",");
var steps = [set, set.map(toFullWidth), set.map(toFullWidth).map(toHalfWidth)];
var tdHTML = str => `<td>${str}</td>`;
var stepsHTML = steps.map(step => step.map(tdHTML).join(""));
var rows = document.getElementsByTagName("tr");
[...rows].forEach((row,i) => row.insertAdjacentHTML("beforeEnd", stepsHTML[i]));
th, td {border: 1px solid lightgrey; padding: 0.2em;}
th {text-align: left;}
table {border-collapse: collapse;}
<table>
<tr><th scope="row">Input:</th></tr>
<tr><th scope="row">Full-width:</th></tr>
<tr><th scope="row">Half-width:</th></tr>
</table>
hangul1 and hangul2 entries entirely as a temporary measure.Try this
function toASCII(chars) {
var ascii = '';
for(var i=0, l=chars.length; i<l; i++) {
var c = chars[i].charCodeAt(0);
// make sure we only convert half-full width char
if (c >= 0xFF00 && c <= 0xFFEF) {
c = 0xFF & (c + 0x20);
}
ascii += String.fromCharCode(c);
}
return ascii;
}
// example
toASCII("ABC"); // returns 'ABC' 0x41
The answer of GOTO 0 is very useful, but I also need convert space from fullwidth to halfwidth.
So below is my code:
const halfwidthValue = value
.replace(/[\uff01-\uff5e]/g, fullwidthChar => String.fromCharCode(fullwidthChar.charCodeAt(0) - 0xfee0))
.replace(/\u3000/g, '\u0020');
The given solutions do not work for all the cases of Full-Width to Half-Width conversion of Kana (eg. デジタル is not converted properly). I have made a function for converting Zenkaku to Hankaku Katakana, Hope it helps.
function convertToHalfWidth(string) {
let characters = getCharacters(string);
let halfWidthString = ''
characters.forEach(character => {
halfWidthString += mapToHankaku(character);
});
return halfWidthString;
}
function getCharacters(string) {
return string.split("");
}
function mapToHankaku(character) {
let zenHanMap = getZenkakuToHankakuMap();
if (typeof zenHanMap[character] === 'undefined') {
return character;
} else {
return zenHanMap[character];
}
}
function getZenkakuToHankakuMap() {
let zenHanMap = {
'ァ': 'ァ',
'ア': 'ア',
'ィ': 'ィ',
'イ': 'イ',
'ゥ': 'ゥ',
'ウ': 'ウ',
'ェ': 'ェ',
'エ': 'エ',
'ォ': 'ォ',
'オ': 'オ',
'カ': 'カ',
'ガ': 'ガ',
'キ': 'キ',
'ギ': 'ギ',
'ク': 'ク',
'グ': 'グ',
'ケ': 'ケ',
'ゲ': 'ゲ',
'コ': 'コ',
'ゴ': 'ゴ',
'サ': 'サ',
'ザ': 'ザ',
'シ': 'シ',
'ジ': 'ジ',
'ス': 'ス',
'ズ': 'ズ',
'セ': 'セ',
'ゼ': 'ゼ',
'ソ': 'ソ',
'ゾ': 'ゾ',
'タ': 'タ',
'ダ': 'ダ',
'チ': 'チ',
'ヂ': 'ヂ',
'ッ': 'ッ',
'ツ': 'ツ',
'ヅ': 'ヅ',
'テ': 'テ',
'デ': 'デ',
'ト': 'ト',
'ド': 'ド',
'ナ': 'ナ',
'ニ': 'ニ',
'ヌ': 'ヌ',
'ネ': 'ネ',
'ノ': 'ノ',
'ハ': 'ハ',
'バ': 'バ',
'パ': 'パ',
'ヒ': 'ヒ',
'ビ': 'ビ',
'ピ': 'ピ',
'フ': 'フ',
'ブ': 'ブ',
'プ': 'プ',
'ヘ': 'ヘ',
'ベ': 'ベ',
'ペ': 'ペ',
'ホ': 'ホ',
'ボ': 'ボ',
'ポ': 'ポ',
'マ': 'マ',
'ミ': 'ミ',
'ム': 'ム',
'メ': 'メ',
'モ': 'モ',
'ャ': 'ャ',
'ヤ': 'ヤ',
'ュ': 'ュ',
'ユ': 'ユ',
'ョ': 'ョ',
'ヨ': 'ヨ',
'ラ': 'ラ',
'リ': 'リ',
'ル': 'ル',
'レ': 'レ',
'ロ': 'ロ',
'ヮ': '',
'ワ': 'ワ',
// 'ヰ': '゙ ゚',
// 'ヱ': '',
'ヲ': 'ヲ',
'ン': 'ン',
'ヴ': 'ヴ',
// 'ヵ': '',
// 'ヶ': '',
// 'ヷ': '',
// 'ヸ': '',
// 'ヹ': '',
// 'ヺ': '',
'・': '・',
'ー': 'ー',
// 'ヽ': '',
// 'ヾ': '',
// 'ヿ': '',
};
return zenHanMap;
}
Use as follows convertToHalfWidth('デジタル');
You can pass the result of this function to the function mentioned by GOTO 0 and get the complete Half width result for the Japanese Language
might be worth asking why you want to transform full-width to half-width. Because there is a built-in function to transform half-width to full-width which is used to normalize data, among others in database, it's simply normalize('NKFC', ...)
const input = 'カタカナ'; // Half-width kana
const normalized = input.normalize('NFKC');
console.log(normalized);
!abc ABC!is different from!abc ABC!, and this conversion function IS what I'm asking for.aandaThat's why it's really worth looking at why you need this in the first place, there may be an easier way