Let's talk about that... It's actually a lot more difficult than you would think.
bower install unibabel
var buffer = window.Unibabel.strToUtf8Arr("Iā„ā¢š¢");
console.log(buffer);
var base64 = window.Unibabel.arrToBase64(buffer);
console.log(base64); // SeKZpeKYou2gtO20og==
window.crypto.subtle.digest("SHA-1", buffer);
// Promies contains buffer, as hex: da548f7a00f799317d9ba6c03a6ee9d14065223d
var text = "Hello World!";
var textLen = text.length;
var buffer = new Uint8Array(textLen);
var index;
//
// Create the Buffer from the String
//
for (index = 0; index < textLen; index += 1) {
// most browsers now support text.codePointAt(index), which is better
// WARNING buffer[index] = text[index] WILL FAIL SILENTLY
buffer[index] = text.charCodeAt(index);
}
//
// Create and print SHA-1 Hash
//
function printHex(sha1buf) {
console.log(sha1buf);
var hex = bufferToHex(sha1buf); // implementation in appendix
console.log(hex);
}
function printErr(err) {
console.error(err);
}
window.crypto.subtle.digest("SHA-1", buffer).then(printHex, printErr);
Now let's consider that we're in 2015, not 1986:
1-byte | ~ | a | 0 | ! |
2-byte | Ā¶ | Ā¢ | Īµ | Ʊ |
3-byte | ā„ | ā¢ | ā | ā± |
6-byte | š©¶ | š¶ | š¦ | š¢ |
Our users use weird symbols (š©) and, surprise surprise, they don't all speak ASCII English.
Also, check out these full-color symbols.
var radSnoBass = "I ā„ ā¢ š¢!"; // 6 chars
Unibabel.strToUtf8Arr(radSnoBass).length; // 17 bytes
"I".length; // 1 char
Unibabel.strToUtf8Arr("I").length; // 1 byte
"ā„".length; // 1 char
Unibabel.strToUtf8Arr("ā„").length; // 2 bytes
"ā¢".length; // 1 char
Unibabel.strToUtf8Arr("ā¢").length; // 3 bytes
"š¢".length; // 2 chars WHAT!?!?
Unibabel.strToUtf8Arr("š¢").length; // 6 bytes
var happyBuf = Unibabel.strToUtf8Arr(radSnoBass);
So what's wrong with this? Well... a lot.
Let's take a look
var text = "Iā„ā¢š¢"; // aka "I Love Radioactive Bass"
var textLen = text.length;
var buffer = new Uint8Array(textLen);
var index;
for (index = 0; index < textLen; index += 1) {
console.log('char[' + index + ']', text[index]
, text.charCodeAt(index), text.codePointAt(index));
buffer[index] = text.charCodeAt(index);
console.log('buffer[' + index + ']', buffer[index]);
}
window.crypto.subtle.digest("SHA-1", buffer).then(printHex, printErr);
// BAD!: da548f7a00f799317d9ba6c03a6ee9d14065223d
// compare with `echo "Iā„ā¢š¢" | shasum`
// Good: d5bb644c3a9f517bec9c36400cbc449be271f65f
After writing this demo I found out about another new API
var encoder = new TextEncoder("utf-8");
var buf = encoder.encode("!Ā¶ā¢āš©¶š¢");
console.log(buf);
var decoder = new TextDecoder("utf-8");
var msg = decoder.decode(buf)
console.log(msg);
I took MDN's sample code and published it as unibabel on bower
How does it work?
Magic.
'nuf said
Check out the article (above) if you're really interested. It's a bunch of double and triple bit-shifting and other similar nonsense that I don't care to think about, but for which I am very grateful.
bower install unibabel
var buffer = window.Unibabel.strToUtf8Arr("Iā„ā¢š¢");
console.log(buffer);
// Unibabel also supports base64 conversion, if you need it
window.Unibabel.arrToBase64(buffer); // SeKZpeKYou2gtO20og==
window.crypto.subtle.digest("SHA-1", buffer).then(printHex, printErr);
// YAY! da548f7a00f799317d9ba6c03a6ee9d14065223d
What do you get when you mix an Elephant with a Rhino?
Let's try!
aes-256-cbc
function bufferToHex(buf) {
// NOTE: new Uint8Array(sha1buf) would create a mutable copy, whereas a DataView does not
var dv = new DataView(buf)
var i;
var len;
var hex = '';
var c;
for (i = 0, len = dv.byteLength; i < len; i += 1) {
c = dv.getUint8(i).toString(16);
if (c.length < 2) {
c = '0' + c;
}
hex += c;
}
return hex;
}
Helpful Hints: