Class Index | File Index

Classes


Namespace Pot.UTF8


Defined in: <pot.js>.

Namespace Summary
Constructor Attributes Constructor Name and Description
 
UTF-8 and UTF-16 utilities.
Method Summary
Method Attributes Method Name and Description
<static>  
Pot.UTF8.byteOf(string)
Gets the byte size of string as UTF-8.
<static>  
Pot.UTF8.convertEncodingToUnicode(data, (from))
Convert encoding to Unicode string.
<static>  
Pot.UTF8.decode(string)
Convert to UTF-16 string from UTF-8 string.
<static>  
Pot.UTF8.encode(string)
Convert to UTF-8 string from UTF-16 string.
Namespace Detail
Pot.UTF8
UTF-8 and UTF-16 utilities. Mutual conversion between UTF-8 and UTF-16. RFC 2044, RFC 2279: UTF-8, a transformation format of ISO 10646
See:
http://www.ietf.org/rfc/rfc2279.txt Note that using "encodeURIComponent" or "decodeURIComponent" to convert a string that includes surrogate pair or characters U+FFFE or U+FFFF then will raise URIError. U+FFFF and U+FFFE will convert unexpect result on SpiderMonkey. This methods implements convertion functions for UTF-8 and UTF-16 compatible with calling of "unescape(encodeURIComponent(string))" and "decodeURIComponent(escape(string))".
Example:
  decodeURIComponent(encodeURIComponent('\uFFFF')) === '\uFFFF';
Results:
  false (SpiderMonkey)

Example:
  decodeURIComponent(encodeURIComponent('\uD811')) === '\uD811';
Results:
  URIError
Method Detail
<static> {Function} Pot.UTF8.byteOf(string)
Gets the byte size of string as UTF-8.
  var string = 'abc123あいうえお';
  var length = string.length;
  var byteSize = Pot.UTF8.byteOf(string);
  debug(string + ' : length = ' + length + ', byteSize = ' + byteSize);
  // @results
  //   length   = 11
  //   byteSize = 21
Parameters:
{String} string
The target string.
Returns:
{Number} The UTF-8 byte size of string.

<static> {Function} Pot.UTF8.convertEncodingToUnicode(data, (from))
Convert encoding to Unicode string. This function requires BlobBuilder and FileReader API. If environment not supported HTML5 API, it will be raised by Deferred.
  // 'こんにちは。ほげほげ'
  var unicode = [
    12371, 12435, 12395, 12385, 12399, 12290,
    12411, 12370, 12411, 12370
  ];
  // Shift_JIS: 'こんにちは。ほげほげ'
  var sjis = [
    130, 177, 130, 241, 130, 201,
    130, 191, 130, 205, 129, 66,
    130, 217, 130, 176, 130, 217,
    130, 176
  ];
  // EUC-JP: 'こんにちは。ほげほげ'
  var eucjp = [
    164, 179, 164, 243, 164, 203,
    164, 193, 164, 207, 161, 163,
    164, 219, 164, 178, 164, 219,
    164, 178
  ];
  // UTF-8: 'こんにちは。ほげほげ'
  var utf8 = [
    227, 129, 147, 227, 130, 147,
    227, 129, 171, 227, 129, 161,
    227, 129, 175, 227, 128, 130,
    227, 129, 187, 227, 129, 146,
    227, 129, 187, 227, 129, 146
  ];
  Pot.convertEncodingToUnicode(sjis, 'Shift_JIS').then(function(res) {
    Pot.debug('SJIS to Unicode:');
    Pot.debug(res); // 'こんにちは。ほげほげ'
  }).then(function() {
    return Pot.convertEncodingToUnicode(eucjp, 'EUC-JP').
                                                    then(function(res) {
      Pot.debug('EUC-JP to Unicode:');
      Pot.debug(res); // 'こんにちは。ほげほげ'
    });
  }).then(function() {
    return Pot.convertEncodingToUnicode(utf8, 'UTF-8').
                                                   then(function(res) {
      Pot.debug('UTF-8 to Unicode:');
      Pot.debug(res); // 'こんにちは。ほげほげ'
    });
  });
Parameters:
{TypedArray|Array|Blob} data
The target data.
{(String)} (from)
(optional) Character encoding from.
Returns:
{Pot.Deferred} A new instance of Pot.Deferred that has Unicode string.

<static> {Function} Pot.UTF8.decode(string)
Convert to UTF-16 string from UTF-8 string.
  var string = 'hogeほげ';
  var encoded = Pot.utf8Encode(string);
  var decoded = Pot.utf8Decode(encoded);
  var toCharCode = function(s) {
    return Pot.map(s.split(''), function(c) {
      return c.charCodeAt(0);
    });
  };
  Pot.debug(toCharCode(encoded));
  // [104, 111, 103, 101, 227, 129, 187, 227, 129, 146]
  Pot.debug(decoded); // 'hogeほげ'
  Pot.debug(decoded === string); // true
Parameters:
{String} string
UTF-8 string.
Returns:
{String} UTF-16 string.

<static> {Function} Pot.UTF8.encode(string)
Convert to UTF-8 string from UTF-16 string.
  var string = 'hogeほげ';
  var encoded = Pot.utf8Encode(string);
  var decoded = Pot.utf8Decode(encoded);
  var toCharCode = function(s) {
    return Pot.map(s.split(''), function(c) {
      return c.charCodeAt(0);
    });
  };
  Pot.debug(toCharCode(encoded));
  // [104, 111, 103, 101, 227, 129, 187, 227, 129, 146]
  Pot.debug(decoded); // 'hogeほげ'
  Pot.debug(decoded === string); // true
Parameters:
{String} string
UTF-16 string.
Returns:
{String} UTF-8 string.

Documentation generated by JsDoc Toolkit 2.4.0 on Fri Sep 21 2012 19:32:22 GMT+0900 (JST)