Count word contain UTF 8 character in javascript

Sometimes we need word counting so it easy for those language take space like Hindi, English etc. But having problem for those language don’t take space like Chinese, Japanese.

So i have write JavaScript function count_word for count word contain UTF 8 character.

<script type="text/javascript">
function count_word(string)
{
    r1 = new RegExp('[\u3000-\u4DFF]','g');
    r2 = new RegExp('[\u4E00-\u9FFF]','g');
    r3 = new RegExp('[\u0E00-\u0E7F]','g');
    string = string.replace(r1,' {PNK} ');
    string = string.replace(r2,' {CJK} ');
    string = string.replace(r3,' {THI} ');
    //string = string.replace(/(<([^>]+)>)/ig,”") ;
    string = string.replace(/(\(|\)|\*|\||\+|\”|\’|_|;|:|,|\.|\?)/ig," ") ;
    string = string.replace(/\s+/ig," ");
    //string = string.replace(/_+/ig," ");
    var a = string.split(/[\s+|\\|\/]/g);
    var count = 0;
    var pnkCounter = 0;
    var thiCounter = 0;
    for (var i=0;i<a.length;i++){
        if (a[i]=='{PNK}'){
              pnkCounter++;
        }else if(a[i]=='{THI}'){
              thiCounter++;
        }else if (a[i].length>0){
              count++;
        }
    }
    count += Math.ceil(pnkCounter/3) + Math.ceil(thiCounter/4);
    return count;
}
</script>

Call JavaScript function count_word

<script type="text/javascript">
	$(document).ready(function(){
		$('textarea').bind("change keyup input",function() {
		   var countwords = count_word($(this).val());
		   $(".count").text(countwords);
		});
	}); 
</script>

HTML Code
Contains simple HTML code.

<div class="container">
  <label for = "name">Count Word</label>
  <textarea class = "form-control" rows = "3"></textarea>
  <p>Total word Count: <span class="count">0</span></p>
</div>
Leave a reply
Captcha Click on image to update the captcha .