|
4 | 4 |
|
5 | 5 | use WpOrg\Requests\Exception;
|
6 | 6 | use WpOrg\Requests\IdnaEncoder;
|
| 7 | +use WpOrg\Requests\Tests\Fixtures\StringableObject; |
7 | 8 | use WpOrg\Requests\Tests\TestCase;
|
8 | 9 |
|
| 10 | +/** |
| 11 | + * @covers \WpOrg\Requests\IdnaEncoder |
| 12 | + */ |
9 | 13 | final class IdnaEncoderTest extends TestCase {
|
10 |
| - public static function specExamples() { |
| 14 | + |
| 15 | + /** |
| 16 | + * Tests encoding a hostname using Punycode. |
| 17 | + * |
| 18 | + * @dataProvider dataEncoding |
| 19 | + * |
| 20 | + * @param string $data Data to encode. |
| 21 | + * @param string $expected Expected function output. |
| 22 | + * |
| 23 | + * @return void |
| 24 | + */ |
| 25 | + public function testEncoding($data, $expected) { |
| 26 | + $result = IdnaEncoder::encode($data); |
| 27 | + $this->assertSame($expected, $result); |
| 28 | + } |
| 29 | + |
| 30 | + /** |
| 31 | + * Data Provider. |
| 32 | + * |
| 33 | + * @return array |
| 34 | + */ |
| 35 | + public function dataEncoding() { |
11 | 36 | return array(
|
12 |
| - array( |
13 |
| - "\xe4\xbb\x96\xe4\xbb\xac\xe4\xb8\xba\xe4\xbb\x80\xe4\xb9\x88\xe4\xb8\x8d\xe8\xaf\xb4\xe4\xb8\xad\xe6\x96\x87", |
14 |
| - 'xn--ihqwcrb4cv8a8dqg056pqjye', |
| 37 | + 'empty string' => array( |
| 38 | + 'data' => '', |
| 39 | + 'expected' => '', |
| 40 | + ), |
| 41 | + 'ascii character' => array( |
| 42 | + 'data' => 'a', |
| 43 | + 'expected' => 'a', |
| 44 | + ), |
| 45 | + 'two-byte character' => array( |
| 46 | + 'data' => "\xc2\xb6", // Pilcrow character |
| 47 | + 'expected' => 'xn--tba', |
| 48 | + ), |
| 49 | + 'three-byte character' => array( |
| 50 | + 'data' => "\xe2\x82\xac", // Euro symbol |
| 51 | + 'expected' => 'xn--lzg', |
| 52 | + ), |
| 53 | + 'four-byte character' => array( |
| 54 | + 'data' => "\xf0\xa4\xad\xa2", // Chinese symbol? |
| 55 | + 'expected' => 'xn--ww6j', |
| 56 | + ), |
| 57 | + |
| 58 | + 'stringable object' => array( |
| 59 | + 'data' => new StringableObject("\xc2\xb6"), |
| 60 | + 'expected' => 'xn--tba', |
| 61 | + ), |
| 62 | + |
| 63 | + /* |
| 64 | + * Examples taken from RFC: https://datatracker.ietf.org/doc/html/rfc3492#section-7 |
| 65 | + * |
| 66 | + * Testdata retrieved by converting to hex using https://r12a.github.io/uniview/ |
| 67 | + * - Paste the unicode sequence. |
| 68 | + * - Use the "Remove all spaces" option. |
| 69 | + * - Use the "Send to Unicode Converter Tool" option. |
| 70 | + * - In the tool, copy the UTF-8 sequence, lowercase it and add the `\x` between each set. |
| 71 | + */ |
| 72 | + 'example from specs: RFC3492, section 7.1-A: Arabic' => array( |
| 73 | + 'data' => "\xd9\x84\xd9\x8a\xd9\x87\xd9\x85\xd8\xa7\xd8\xa8\xd8\xaa\xd9\x83\xd9\x84\xd9\x85\xd9\x88\xd8\xb4\xd8\xb9\xd8\xb1\xd8\xa8\xd9\x8a\xd8\x9f", |
| 74 | + 'expected' => 'xn--egbpdaj6bu4bxfgehfvwxn', |
| 75 | + ), |
| 76 | + 'example from specs: RFC3492, section 7.1-B: Simplified Chinese' => array( |
| 77 | + 'data' => "\xe4\xbb\x96\xe4\xbb\xac\xe4\xb8\xba\xe4\xbb\x80\xe4\xb9\x88\xe4\xb8\x8d\xe8\xaf\xb4\xe4\xb8\xad\xe6\x96\x87", |
| 78 | + 'expected' => 'xn--ihqwcrb4cv8a8dqg056pqjye', |
| 79 | + ), |
| 80 | + 'example from specs: RFC3492, section 7.1-C: Traditional Chinese' => array( |
| 81 | + 'data' => "\xe4\xbb\x96\xe5\x80\x91\xe7\x88\xb2\xe4\xbb\x80\xe9\xba\xbd\xe4\xb8\x8d\xe8\xaa\xaa\xe4\xb8\xad\xe6\x96\x87", |
| 82 | + 'expected' => 'xn--ihqwctvzc91f659drss3x8bo0yb', |
15 | 83 | ),
|
16 |
| - array( |
17 |
| - "\x33\xe5\xb9\xb4\x42\xe7\xb5\x84\xe9\x87\x91\xe5\x85\xab\xe5\x85\x88\xe7\x94\x9f", |
18 |
| - 'xn--3B-ww4c5e180e575a65lsy2b', |
| 84 | + 'example from specs: RFC3492, section 7.1-D: Czech' => array( |
| 85 | + 'data' => "\x50\x72\x6f\xc4\x8d\x70\x72\x6f\x73\x74\xc4\x9b\x6e\x65\x6d\x6c\x75\x76\xc3\xad\xc4\x8d\x65\x73\x6b\x79", |
| 86 | + 'expected' => 'xn--Proprostnemluvesky-uyb24dma41a', |
| 87 | + ), |
| 88 | + 'example from specs: RFC3492, section 7.1-E: Hebrew' => array( |
| 89 | + 'data' => "\xd7\x9c\xd7\x9e\xd7\x94\xd7\x94\xd7\x9d\xd7\xa4\xd7\xa9\xd7\x95\xd7\x98\xd7\x9c\xd7\x90\xd7\x9e\xd7\x93\xd7\x91\xd7\xa8\xd7\x99\xd7\x9d\xd7\xa2\xd7\x91\xd7\xa8\xd7\x99\xd7\xaa", |
| 90 | + 'expected' => 'xn--4dbcagdahymbxekheh6e0a7fei0b', |
| 91 | + ), |
| 92 | + 'example from specs: RFC3492, section 7.1-F: Hindi (Devanagari)' => array( |
| 93 | + 'data' => "\xe0\xa4\xaf\xe0\xa4\xb9\xe0\xa4\xb2\xe0\xa5\x8b\xe0\xa4\x97\xe0\xa4\xb9\xe0\xa4\xbf\xe0\xa4\xa8\xe0\xa5\x8d\xe0\xa4\xa6\xe0\xa5\x80\xe0\xa4\x95\xe0\xa5\x8d\xe0\xa4\xaf\xe0\xa5\x8b\xe0\xa4\x82\xe0\xa4\xa8\xe0\xa4\xb9\xe0\xa5\x80\xe0\xa4\x82\xe0\xa4\xac\xe0\xa5\x8b\xe0\xa4\xb2\xe0\xa4\xb8\xe0\xa4\x95\xe0\xa4\xa4\xe0\xa5\x87\xe0\xa4\xb9\xe0\xa5\x88\xe0\xa4\x82", |
| 94 | + 'expected' => 'xn--i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd', |
| 95 | + ), |
| 96 | + 'example from specs: RFC3492, section 7.1-G: Japanese (Kanji and hiragana)' => array( |
| 97 | + 'data' => "\xe3\x81\xaa\xe3\x81\x9c\xe3\x81\xbf\xe3\x82\x93\xe3\x81\xaa\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x82\x92\xe8\xa9\xb1\xe3\x81\x97\xe3\x81\xa6\xe3\x81\x8f\xe3\x82\x8c\xe3\x81\xaa\xe3\x81\x84\xe3\x81\xae\xe3\x81\x8b", |
| 98 | + 'expected' => 'xn--n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa', |
| 99 | + ), |
| 100 | + /* Does not validate - output too long. |
| 101 | + 'example from specs: RFC3492, section 7.1-H: Korean (Hangul)' => array( |
| 102 | + 'data' => "\xec\x84\xb8\xea\xb3\x84\xec\x9d\x98\xeb\xaa\xa8\xeb\x93\xa0\xec\x82\xac\xeb\x9e\x8c\xeb\x93\xa4\xec\x9d\xb4\xed\x95\x9c\xea\xb5\xad\xec\x96\xb4\xeb\xa5\xbc\xec\x9d\xb4\xed\x95\xb4\xed\x95\x9c\xeb\x8b\xa4\xeb\xa9\xb4\xec\x96\xbc\xeb\xa7\x88\xeb\x82\x98\xec\xa2\x8b\xec\x9d\x84\xea\xb9\x8c", |
| 103 | + 'expected' => 'xn--989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c', |
| 104 | + ), |
| 105 | + */ |
| 106 | + 'example from specs: RFC3492, section 7.1-I: Russian (Cyrillic)' => array( |
| 107 | + 'data' => "\xd0\xbf\xd0\xbe\xd1\x87\xd0\xb5\xd0\xbc\xd1\x83\xd0\xb6\xd0\xb5\xd0\xbe\xd0\xbd\xd0\xb8\xd0\xbd\xd0\xb5\xd0\xb3\xd0\xbe\xd0\xb2\xd0\xbe\xd1\x80\xd1\x8f\xd1\x82\xd0\xbf\xd0\xbe\xd1\x80\xd1\x83\xd1\x81\xd1\x81\xd0\xba\xd0\xb8", |
| 108 | + // Officially, the `d` in `dot` should be uppercase ? Needs double-check. Either a typo in the RFC or a bug. |
| 109 | + 'expected' => 'xn--b1abfaaepdrnnbgefbadotcwatmq2g4l', |
| 110 | + ), |
| 111 | + 'example from specs: RFC3492, section 7.1-J: Spanish' => array( |
| 112 | + 'data' => "\x50\x6f\x72\x71\x75\xc3\xa9\x6e\x6f\x70\x75\x65\x64\x65\x6e\x73\x69\x6d\x70\x6c\x65\x6d\x65\x6e\x74\x65\x68\x61\x62\x6c\x61\x72\x65\x6e\x45\x73\x70\x61\xc3\xb1\x6f\x6c", |
| 113 | + 'expected' => 'xn--PorqunopuedensimplementehablarenEspaol-fmd56a', |
| 114 | + ), |
| 115 | + 'example from specs: RFC3492, section 7.1-K: Vietnamese' => array( |
| 116 | + 'data' => "\x54\xe1\xba\xa1\x69\x73\x61\x6f\x68\xe1\xbb\x8d\x6b\x68\xc3\xb4\x6e\x67\x74\x68\xe1\xbb\x83\x63\x68\xe1\xbb\x89\x6e\xc3\xb3\x69\x74\x69\xe1\xba\xbf\x6e\x67\x56\x69\xe1\xbb\x87\x74", |
| 117 | + 'expected' => 'xn--TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g', |
| 118 | + ), |
| 119 | + 'example from specs: RFC3492, section 7.1-L: Japanese artist' => array( |
| 120 | + 'data' => "\x33\xe5\xb9\xb4\x42\xe7\xb5\x84\xe9\x87\x91\xe5\x85\xab\xe5\x85\x88\xe7\x94\x9f", |
| 121 | + 'expected' => 'xn--3B-ww4c5e180e575a65lsy2b', |
| 122 | + ), |
| 123 | + 'example from specs: RFC3492, section 7.1-M: Japanese artist' => array( |
| 124 | + 'data' => "\xe5\xae\x89\xe5\xae\xa4\xe5\xa5\x88\xe7\xbe\x8e\xe6\x81\xb5\x2d\x77\x69\x74\x68\x2d\x53\x55\x50\x45\x52\x2d\x4d\x4f\x4e\x4b\x45\x59\x53", |
| 125 | + 'expected' => 'xn---with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n', |
| 126 | + ), |
| 127 | + 'example from specs: RFC3492, section 7.1-N: Japanese artist' => array( |
| 128 | + 'data' => "\x48\x65\x6c\x6c\x6f\x2d\x41\x6e\x6f\x74\x68\x65\x72\x2d\x57\x61\x79\x2d\xe3\x81\x9d\xe3\x82\x8c\xe3\x81\x9e\xe3\x82\x8c\xe3\x81\xae\xe5\xa0\xb4\xe6\x89\x80", |
| 129 | + 'expected' => 'xn--Hello-Another-Way--fc4qua05auwb3674vfr0b', |
| 130 | + ), |
| 131 | + 'example from specs: RFC3492, section 7.1-O: Japanese artist' => array( |
| 132 | + 'data' => "\xe3\x81\xb2\xe3\x81\xa8\xe3\x81\xa4\xe5\xb1\x8b\xe6\xa0\xb9\xe3\x81\xae\xe4\xb8\x8b\x32", |
| 133 | + 'expected' => 'xn--2-u9tlzr9756bt3uc0v', |
| 134 | + ), |
| 135 | + 'example from specs: RFC3492, section 7.1-P: Japanese artist' => array( |
| 136 | + 'data' => "\x4d\x61\x6a\x69\xe3\x81\xa7\x4b\x6f\x69\xe3\x81\x99\xe3\x82\x8b\x35\xe7\xa7\x92\xe5\x89\x8d", |
| 137 | + 'expected' => 'xn--MajiKoi5-783gue6qz075azm5e', |
| 138 | + ), |
| 139 | + 'example from specs: RFC3492, section 7.1-Q: Japanese artist' => array( |
| 140 | + 'data' => "\xe3\x83\x91\xe3\x83\x95\xe3\x82\xa3\xe3\x83\xbc\x64\x65\xe3\x83\xab\xe3\x83\xb3\xe3\x83\x90", |
| 141 | + 'expected' => 'xn--de-jg4avhby1noc0d', |
| 142 | + ), |
| 143 | + 'example from specs: RFC3492, section 7.1-R: Japanese artist' => array( |
| 144 | + 'data' => "\xe3\x81\x9d\xe3\x81\xae\xe3\x82\xb9\xe3\x83\x94\xe3\x83\xbc\xe3\x83\x89\xe3\x81\xa7", |
| 145 | + 'expected' => 'xn--d9juau41awczczp', |
| 146 | + ), |
| 147 | + 'example from specs: RFC3492, section 7.1-S: ASCII string which breaks the rules' => array( |
| 148 | + 'data' => "\x2d\x3e\x20\x24\x31\x2e\x30\x30\x20\x3c\x2d", |
| 149 | + 'expected' => '-> $1.00 <-', |
19 | 150 | ),
|
20 | 151 | );
|
21 | 152 | }
|
22 | 153 |
|
23 | 154 | /**
|
24 |
| - * @dataProvider specExamples |
| 155 | + * Tests receiving an exception when trying to encode a hostname containing invalid unicode. |
| 156 | + * |
| 157 | + * @dataProvider dataInvalidUnicode |
| 158 | + * |
| 159 | + * @param string $data Data to encode. |
| 160 | + * |
| 161 | + * @return void |
25 | 162 | */
|
26 |
| - public function testEncoding($data, $expected) { |
27 |
| - $result = IdnaEncoder::encode($data); |
28 |
| - $this->assertSame($expected, $result); |
| 163 | + public function testInvalidUnicode($data) { |
| 164 | + $this->expectException(Exception::class); |
| 165 | + $this->expectExceptionMessage('Invalid Unicode codepoint'); |
| 166 | + |
| 167 | + IdnaEncoder::encode($data); |
| 168 | + } |
| 169 | + |
| 170 | + /** |
| 171 | + * Data Provider. |
| 172 | + * |
| 173 | + * @return array |
| 174 | + */ |
| 175 | + public function dataInvalidUnicode() { |
| 176 | + return array( |
| 177 | + 'Five-byte character' => array("\xfb\xb6\xb6\xb6\xb6"), |
| 178 | + 'Six-byte character' => array("\xfd\xb6\xb6\xb6\xb6\xb6"), |
| 179 | + 'Invalid ASCII character with multibyte' => array("\0\xc2\xb6"), |
| 180 | + 'Unfinished multibyte' => array("\xc2"), |
| 181 | + 'Partial multibyte' => array("\xc2\xc2\xb6"), |
| 182 | + ); |
29 | 183 | }
|
30 | 184 |
|
31 | 185 | public function testASCIITooLong() {
|
32 | 186 | $this->expectException(Exception::class);
|
33 | 187 | $this->expectExceptionMessage('Provided string is too long');
|
| 188 | + |
34 | 189 | $data = str_repeat('abcd', 20);
|
35 | 190 | IdnaEncoder::encode($data);
|
36 | 191 | }
|
37 | 192 |
|
38 | 193 | public function testEncodedTooLong() {
|
39 | 194 | $this->expectException(Exception::class);
|
40 | 195 | $this->expectExceptionMessage('Encoded string is too long');
|
| 196 | + |
41 | 197 | $data = str_repeat("\xe4\xbb\x96", 60);
|
42 | 198 | IdnaEncoder::encode($data);
|
43 | 199 | }
|
44 | 200 |
|
45 | 201 | public function testAlreadyPrefixed() {
|
46 | 202 | $this->expectException(Exception::class);
|
47 | 203 | $this->expectExceptionMessage('Provided string begins with ACE prefix');
|
48 |
| - IdnaEncoder::encode("xn--\xe4\xbb\x96"); |
49 |
| - } |
50 |
| - |
51 |
| - public function testASCIICharacter() { |
52 |
| - $result = IdnaEncoder::encode('a'); |
53 |
| - $this->assertSame('a', $result); |
54 |
| - } |
55 |
| - |
56 |
| - public function testTwoByteCharacter() { |
57 |
| - $result = IdnaEncoder::encode("\xc2\xb6"); // Pilcrow character |
58 |
| - $this->assertSame('xn--tba', $result); |
59 |
| - } |
60 |
| - |
61 |
| - public function testThreeByteCharacter() { |
62 |
| - $result = IdnaEncoder::encode("\xe2\x82\xac"); // Euro symbol |
63 |
| - $this->assertSame('xn--lzg', $result); |
64 |
| - } |
65 |
| - |
66 |
| - public function testFourByteCharacter() { |
67 |
| - $result = IdnaEncoder::encode("\xf0\xa4\xad\xa2"); // Chinese symbol? |
68 |
| - $this->assertSame('xn--ww6j', $result); |
69 |
| - } |
70 | 204 |
|
71 |
| - public function testFiveByteCharacter() { |
72 |
| - $this->expectException(Exception::class); |
73 |
| - $this->expectExceptionMessage('Invalid Unicode codepoint'); |
74 |
| - IdnaEncoder::encode("\xfb\xb6\xb6\xb6\xb6"); |
75 |
| - } |
76 |
| - |
77 |
| - public function testSixByteCharacter() { |
78 |
| - $this->expectException(Exception::class); |
79 |
| - $this->expectExceptionMessage('Invalid Unicode codepoint'); |
80 |
| - IdnaEncoder::encode("\xfd\xb6\xb6\xb6\xb6\xb6"); |
81 |
| - } |
82 |
| - |
83 |
| - public function testInvalidASCIICharacterWithMultibyte() { |
84 |
| - $this->expectException(Exception::class); |
85 |
| - $this->expectExceptionMessage('Invalid Unicode codepoint'); |
86 |
| - IdnaEncoder::encode("\0\xc2\xb6"); |
87 |
| - } |
88 |
| - |
89 |
| - public function testUnfinishedMultibyte() { |
90 |
| - $this->expectException(Exception::class); |
91 |
| - $this->expectExceptionMessage('Invalid Unicode codepoint'); |
92 |
| - IdnaEncoder::encode("\xc2"); |
93 |
| - } |
94 |
| - |
95 |
| - public function testPartialMultibyte() { |
96 |
| - $this->expectException(Exception::class); |
97 |
| - $this->expectExceptionMessage('Invalid Unicode codepoint'); |
98 |
| - IdnaEncoder::encode("\xc2\xc2\xb6"); |
| 205 | + IdnaEncoder::encode("xn--\xe4\xbb\x96"); |
99 | 206 | }
|
100 | 207 | }
|
0 commit comments