Count bytes in textarea using javascript

I need to count how long in bytes a textarea is when UTF8 encoded using javascript. Any idea how I would do this?

thanks!

Answers:

Answer

edit: as didier-l has pointed out, this function does not count surrogate characters correctly.

broofa's answer should count surrogates properly, see https://stackoverflow.com/a/12206089/274483.

I have tested the two proposed versions here as well as a naive implementation:

 getUTF8Length: function(string) {
    var utf8length = 0;
    for (var n = 0; n < string.length; n++) {
        var c = string.charCodeAt(n);
        if (c < 128) {
            utf8length++;
        }
        else if((c > 127) && (c < 2048)) {
            utf8length = utf8length+2;
        }
        else {
            utf8length = utf8length+3;
        }
    }
    return utf8length;
 }

With the result that my version is slightly faster in firefox and significantly faster in chrome (~30x) than the here posted versions.

Answer
encodeURIComponent(text).replace(/%[A-F\d]{2}/g, 'U').length
Answer

Combining various answers, the following method should be fast and accurate, and avoids issues with invalid surrogate pairs that can cause errors in encodeURIComponent():

function getUTF8Length(s) {
  var len = 0;
  for (var i = 0; i < s.length; i++) {
    var code = s.charCodeAt(i);
    if (code <= 0x7f) {
      len += 1;
    } else if (code <= 0x7ff) {
      len += 2;
    } else if (code >= 0xd800 && code <= 0xdfff) {
      // Surrogate pair: These take 4 bytes in UTF-8 and 2 chars in UCS-2
      // (Assume next char is the other [valid] half and just skip it)
      len += 4; i++;
    } else if (code < 0xffff) {
      len += 3;
    } else {
      len += 4;
    }
  }
  return len;
}
Answer

If you have non-bmp characters in your string, it's a little more complicated...

Because javascript does UTF-16 encode, and a "character" is a 2-byte-stack (16 bit) all multibyte characters (3 and more bytes) will not work:

    <script type="text/javascript">
        var nonBmpString = "foo€";
        console.log( nonBmpString.length );
        // will output 5
    </script>

The character "€" has a length of 3 bytes (24bit). Javascript does interpret it as 2 characters, because in JS, a character is a 16 bit block.

So to correctly get the bytesize of a mixed string, we have to code our own function fixedCharCodeAt();

    function fixedCharCodeAt(str, idx) {
        idx = idx || 0;
        var code = str.charCodeAt(idx);
        var hi, low;
        if (0xD800 <= code && code <= 0xDBFF) { // High surrogate (could change last hex to 0xDB7F to treat high private surrogates as single characters)
            hi = code;
            low = str.charCodeAt(idx + 1);
            if (isNaN(low)) {
                throw 'Kein gültiges Schriftzeichen oder Speicherfehler!';
            }
            return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;
        }
        if (0xDC00 <= code && code <= 0xDFFF) { // Low surrogate
            // We return false to allow loops to skip this iteration since should have already handled high surrogate above in the previous iteration
            return false;
            /*hi = str.charCodeAt(idx-1);
            low = code;
            return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;*/
        }
        return code;
    }

Now we can count the bytes...

    function countUtf8(str) {
        var result = 0;
        for (var n = 0; n < str.length; n++) {
            var charCode = fixedCharCodeAt(str, n);
            if (typeof charCode === "number") {
                if (charCode < 128) {
                    result = result + 1;
                } else if (charCode < 2048) {
                    result = result + 2;
                } else if (charCode < 65536) {
                    result = result + 3;
                } else if (charCode < 2097152) {
                    result = result + 4;
                } else if (charCode < 67108864) {
                    result = result + 5;
                } else {
                    result = result + 6;
                }
            }
        }
        return result;
    }

By the way... You should not use the encodeURI-method, because, it's a native browser function ;)

More stuff:


Cheers

frankneff.ch / @frank_neff
Answer

Add Byte length counting function to the string

String.prototype.Blength = function() {
    var arr = this.match(/[^\x00-\xff]/ig);
    return  arr == null ? this.length : this.length + arr.length;
}

then you can use .Blength() to get the size

Answer

How about simple:

unescape(encodeURIComponent(utf8text)).length

The trick is that encodeURIComponent seems to work on characters while unescape works on bytes.

Answer

I have been asking myself the same thing. This is the best answer I have stumble upon:

http://www.inter-locale.com/demos/countBytes.html

Here is the code snippet:

<script type="text/javascript">
 function checkLength() {
    var countMe = document.getElementById("someText").value
    var escapedStr = encodeURI(countMe)
    if (escapedStr.indexOf("%") != -1) {
        var count = escapedStr.split("%").length - 1
        if (count == 0) count++  //perverse case; can't happen with real UTF-8
        var tmp = escapedStr.length - (count * 3)
        count = count + tmp
    } else {
        count = escapedStr.length
    }
    alert(escapedStr + ": size is " + count)
 }

but the link contains a live example of it to play with. "encodeURI(STRING)" is the building block here, but also look at encodeURIComponent(STRING) (as already point out on the previous answer) to see which one fits your needs.

Regards

Answer
encodeURI(text).split(/%..|./).length - 1
Answer

Try the following:

function b(c) {
     var n=0;
     for (i=0;i<c.length;i++) {
           p = c.charCodeAt(i);
           if (p<128) {
                 n++;
           } else if (p<2048) {
                 n+=2;
           } else {
                 n+=3;
           }
      }return n;
}
Answer

set meta UTF-8 just & it's OK!

<meta charset="UTF-8">
<meta http-equiv="content-type" content="text/html;charset=utf-8">

and js:

if($mytext.length > 10){
 // its okkk :)
}

Tags

Recent Questions

Top Questions

Home Tags Terms of Service Privacy Policy DMCA Contact Us

©2020 All rights reserved.