improve encoding utf8->ascii for non-printable chars
[plcapi.git] / tests / 0CharsetTest.php
1 <?php
2 /**
3  * @author JoakimLofgren
4  */
5
6 include_once __DIR__ . '/PolyfillTestCase.php';
7
8 use PhpXmlRpc\Helper\Charset;
9
10 /**
11  * Test conversion between encodings
12  *
13  * For Windows if you want to test the output use Consolas font
14  * and run the following in cmd:
15  *     chcp 28591 (latin1)
16  *     chcp 65001 (utf8)
17  *
18  * @todo add tests for conversion: utf8 -> ascii (incl. chars 0-31)
19  * @todo add tests for conversion: latin1 -> utf8
20  * @todo add tests for conversion: latin1 -> ascii
21  */
22 class CharsetTest extends PhpXmlRpc_PolyfillTestCase
23 {
24     // Consolas font should render these properly
25     protected $runes = "ᚠᛇᚻ᛫ᛒᛦᚦ᛫ᚠᚱᚩᚠᚢᚱ᛫ᚠᛁᚱᚪ᛫ᚷᛖᚻᚹᛦᛚᚳᚢᛗ";
26     protected $greek = "Τὴ γλῶσσα μοῦ ἔδωσαν ἑλληνικὴ";
27     protected $russian = "Река неслася; бедный чёлн";
28     protected $chinese = "我能吞下玻璃而不伤身体。";
29     protected $latinString;
30
31     /// @todo move to usage of a dataProvider and create the latinString there
32     protected function set_up()
33     {
34         // construct a latin string with all chars (except control ones)
35         $this->latinString = "\n\r\t";
36         for($i = 32; $i < 127; $i++) {
37             $this->latinString .= chr($i);
38         }
39         for($i = 160; $i < 256; $i++) {
40             $this->latinString .= chr($i);
41         }
42     }
43
44     protected function utf8ToLatin1($data)
45     {
46         return Charset::instance()->encodeEntities(
47             $data,
48             'UTF-8',
49             'ISO-8859-1'
50         );
51     }
52
53     protected function utf8ToAscii($data)
54     {
55         return Charset::instance()->encodeEntities(
56             $data,
57             'UTF-8',
58             'US-ASCII'
59         );
60     }
61
62     public function testUtf8ToLatin1All()
63     {
64         /*$this->assertEquals(
65             'ISO-8859-1',
66             mb_detect_encoding($this->latinString, 'ISO-8859-1, UTF-8, WINDOWS-1251, ASCII', true),
67             'Setup latinString is not ISO-8859-1 encoded...'
68         );*/
69         $string = utf8_encode($this->latinString);
70         $encoded = $this->utf8ToLatin1($string);
71         $this->assertEquals(str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $this->latinString), $encoded);
72     }
73
74     public function testUtf8ToLatin1EuroSymbol()
75     {
76         $string = 'a.b.c.å.ä.ö.€.';
77         $encoded = $this->utf8ToLatin1($string);
78         $this->assertEquals(utf8_decode('a.b.c.å.ä.ö.&#8364;.'), $encoded);
79     }
80
81     public function testUtf8ToLatin1Runes()
82     {
83         $string = $this->runes;
84         $encoded = $this->utf8ToLatin1($string);
85         $this->assertEquals('&#5792;&#5831;&#5819;&#5867;&#5842;&#5862;&#5798;&#5867;&#5792;&#5809;&#5801;&#5792;&#5794;&#5809;&#5867;&#5792;&#5825;&#5809;&#5802;&#5867;&#5815;&#5846;&#5819;&#5817;&#5862;&#5850;&#5811;&#5794;&#5847;', $encoded);
86     }
87
88     public function testUtf8ToLatin1Greek()
89     {
90         $string = $this->greek;
91         $encoded = $this->utf8ToLatin1($string);
92         $this->assertEquals('&#932;&#8052; &#947;&#955;&#8182;&#963;&#963;&#945; &#956;&#959;&#8166; &#7956;&#948;&#969;&#963;&#945;&#957; &#7953;&#955;&#955;&#951;&#957;&#953;&#954;&#8052;', $encoded);
93     }
94
95     public function testUtf8ToLatin1Russian()
96     {
97         $string = $this->russian;
98         $encoded = $this->utf8ToLatin1($string);
99         $this->assertEquals('&#1056;&#1077;&#1082;&#1072; &#1085;&#1077;&#1089;&#1083;&#1072;&#1089;&#1103;; &#1073;&#1077;&#1076;&#1085;&#1099;&#1081; &#1095;&#1105;&#1083;&#1085;', $encoded);
100     }
101
102     public function testUtf8ToLatin1Chinese()
103     {
104         $string = $this->chinese;
105         $encoded = $this->utf8ToLatin1($string);
106         $this->assertEquals('&#25105;&#33021;&#21534;&#19979;&#29627;&#29827;&#32780;&#19981;&#20260;&#36523;&#20307;&#12290;', $encoded);
107     }
108 }