From 68eb288629afacabd27bd38f364ec5e2d52d0a1b Mon Sep 17 00:00:00 2001 From: Jan Jakes Date: Thu, 9 Jan 2025 17:52:20 +0100 Subject: [PATCH] wip --- tests/WP_SQLite_Driver_Tests.php | 124 ++++++++++++------ .../sqlite-ast/class-wp-sqlite-driver.php | 52 ++++++++ ...s-wp-sqlite-pdo-user-defined-functions.php | 72 ++++++++++ 3 files changed, 206 insertions(+), 42 deletions(-) diff --git a/tests/WP_SQLite_Driver_Tests.php b/tests/WP_SQLite_Driver_Tests.php index 2e658cb5..7a6cf01c 100644 --- a/tests/WP_SQLite_Driver_Tests.php +++ b/tests/WP_SQLite_Driver_Tests.php @@ -3033,13 +3033,13 @@ public function testTranslatesUtf8SELECT() { $this->assertQuery( 'DELETE FROM _options' ); } - public function testTranslateLikeBinaryAndGlob() { + public function testTranslateLikeBinary() { // Create a temporary table for testing $this->assertQuery( - "CREATE TABLE _tmp_table ( + "CREATE TABLE _tmp_table ( ID INTEGER PRIMARY KEY AUTO_INCREMENT NOT NULL, - name varchar(20) NOT NULL default '' - );" + name varchar(20) + )" ); // Insert data into the table @@ -3052,70 +3052,110 @@ public function testTranslateLikeBinaryAndGlob() { $this->assertQuery( "INSERT INTO _tmp_table (name) VALUES ('special%chars');" ); $this->assertQuery( "INSERT INTO _tmp_table (name) VALUES ('special_chars');" ); $this->assertQuery( "INSERT INTO _tmp_table (name) VALUES ('special\\chars');" ); + $this->assertQuery( "INSERT INTO _tmp_table (name) VALUES ('aste*risk');" ); + $this->assertQuery( "INSERT INTO _tmp_table (name) VALUES ('question?mark');" ); - // Test case-sensitive LIKE BINARY + // Test exact string $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'first'" ); $this->assertCount( 1, $result ); $this->assertEquals( 'first', $result[0]->name ); - // Test case-sensitive LIKE BINARY with wildcard % + // Test exact string with no matches + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'third'" ); + $this->assertCount( 0, $result ); + + // Test mixed case + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'First'" ); + $this->assertCount( 0, $result ); + + // Test % wildcard $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'f%'" ); $this->assertCount( 1, $result ); $this->assertEquals( 'first', $result[0]->name ); - // Test case-sensitive LIKE BINARY with wildcard _ + // Test % wildcard with no matches + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'x%'" ); + $this->assertCount( 0, $result ); + + // Test "%" character (not a wildcard) + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'special\\%chars'" ); + $this->assertCount( 1, $result ); + $this->assertEquals( 'special%chars', $result[0]->name ); + + // Test _ wildcard $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'f_rst'" ); $this->assertCount( 1, $result ); $this->assertEquals( 'first', $result[0]->name ); - // Test case-insensitive LIKE - $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE 'FIRST'" ); - $this->assertCount( 2, $result ); // Should match both 'first' and 'FIRST' + // Test _ wildcard with no matches + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'x_yz'" ); + $this->assertCount( 0, $result ); - // Test mixed case with LIKE BINARY - $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'First'" ); - $this->assertCount( 0, $result ); + // Test "_" character (not a wildcard) + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'special\\_chars'" ); + $this->assertCount( 1, $result ); + $this->assertEquals( 'special_chars', $result[0]->name ); - // Test no matches with LIKE BINARY - $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'third'" ); - $this->assertCount( 0, $result ); + // Test escaping of "*" + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'aste*risk'" ); + $this->assertCount( 1, $result ); + $this->assertEquals( 'aste*risk', $result[0]->name ); - // Test GLOB equivalent for case-sensitive matching with wildcard - $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name GLOB 'f*'" ); - $this->assertCount( 1, $result ); - $this->assertEquals( 'first', $result[0]->name ); + // Test escaping of "*" with no matches + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'f*'" ); + $this->assertCount( 0, $result ); - // Test GLOB with single character wildcard - $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name GLOB 'f?rst'" ); - $this->assertCount( 1, $result ); - $this->assertEquals( 'first', $result[0]->name ); - - // Test GLOB with no matches - $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name GLOB 'S*'" ); - $this->assertCount( 0, $result ); + // Test escaping of "?" + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'question?mark'" ); + $this->assertCount( 1, $result ); + $this->assertEquals( 'question?mark', $result[0]->name ); - // Test GLOB case sensitivity with LIKE and GLOB - $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name GLOB 'first';" ); - $this->assertCount( 1, $result ); // Should only match 'first' + // Test escaping of "?" with no matches + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'f?rst'" ); + $this->assertCount( 0, $result ); - $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name GLOB 'FIRST';" ); - $this->assertCount( 1, $result ); // Should only match 'FIRST' + // Test escaping of character class + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY '[f]irst'" ); + $this->assertCount( 0, $result ); - // Test NULL comparison with LIKE BINARY - $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'first';" ); - $this->assertCount( 1, $result ); - $this->assertEquals( 'first', $result[0]->name ); - - $result = $this->assertQuery( 'SELECT * FROM _tmp_table WHERE name LIKE BINARY NULL;' ); - $this->assertCount( 0, $result ); // NULL comparison should return no results + // Test NULL + $result = $this->assertQuery( 'SELECT * FROM _tmp_table WHERE name LIKE BINARY NULL' ); + $this->assertCount( 0, $result ); // Test pattern with special characters using LIKE BINARY - $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY '%special%';" ); + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY '%special%'" ); $this->assertCount( 4, $result ); $this->assertEquals( '%special%', $result[0]->name ); $this->assertEquals( 'special%chars', $result[1]->name ); $this->assertEquals( 'special_chars', $result[2]->name ); - $this->assertEquals( 'specialchars', $result[3]->name ); + $this->assertEquals( 'special\chars', $result[3]->name ); + + // Test escaping - "\t" is a tab character + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'firs\\t'" ); + $this->assertCount( 0, $result ); + + // Test escaping - "\\t" is "t" (input resolves to "\t", which LIKE resolves to "t") + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'firs\\\\t'" ); + $this->assertCount( 1, $result ); + $this->assertEquals( 'first', $result[0]->name ); + + // Test escaping - "\%" is a "%" literal + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'special\\%chars'" ); + $this->assertCount( 1, $result ); + $this->assertEquals( 'special%chars', $result[0]->name ); + + // Test escaping - "\\%" is also a "%" literal + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'special\\\\%chars'" ); + $this->assertCount( 1, $result ); + $this->assertEquals( 'special%chars', $result[0]->name ); + + // Test escaping - "\\\%" is "\" and a wildcard + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'special\\\\\\%chars'" ); + $this->assertCount( 0, $result ); + + // Test LIKE without BINARY + $result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE 'FIRST'" ); + $this->assertCount( 2, $result ); // Should match both 'first' and 'FIRST' } public function testOnConflictReplace() { diff --git a/wp-includes/sqlite-ast/class-wp-sqlite-driver.php b/wp-includes/sqlite-ast/class-wp-sqlite-driver.php index 30752a07..b2ad7d55 100644 --- a/wp-includes/sqlite-ast/class-wp-sqlite-driver.php +++ b/wp-includes/sqlite-ast/class-wp-sqlite-driver.php @@ -1313,6 +1313,14 @@ private function translate( $ast ) { throw $this->not_supported_exception( sprintf( 'data type: %s', $child->value ) ); + case 'predicateOperations': + $token = $ast->get_child_token(); + if ( WP_MySQL_Lexer::LIKE_SYMBOL === $token->id ) { + return $this->translate_like( $ast ); + } else if ( WP_MySQL_Lexer::REGEXP_SYMBOL === $token->id ) { + return $this->translate_regexp_functions( $ast ); + } + return $this->translate_sequence( $ast->get_children() ); case 'systemVariable': // @TODO: Emulate some system variables, or use reasonable defaults. // See: https://dev.mysql.com/doc/refman/8.4/en/server-system-variable-reference.html @@ -1336,6 +1344,43 @@ private function translate( $ast ) { } } + private function translate_regexp_functions( WP_Parser_Node $node ): string { + $tokens = $node->get_descendant_tokens(); + $is_binary = isset($tokens[1]) && WP_MySQL_Lexer::BINARY_SYMBOL === $tokens[1]->id; + + /* + * If the query says REGEXP BINARY, the comparison is byte-by-byte + * and letter casing matters – lowercase and uppercase letters are + * represented using different byte codes. + * + * The REGEXP function can't be easily made to accept two + * parameters, so we'll have to use a hack to get around this. + * + * If the first character of the pattern is a null byte, we'll + * remove it and make the comparison case-sensitive. This should + * be reasonably safe since PHP does not allow null bytes in + * regular expressions anyway. + */ + if ( true === $is_binary ) { + return 'REGEXP CHAR(0) || ' . $this->translate( $node->get_child_node() ); + } + return 'REGEXP ' . $this->translate( $node->get_child_node() ); + } + + private function translate_like(WP_Parser_Node $node ): string { + $tokens = $node->get_descendant_tokens(); + $is_binary = isset($tokens[1]) && WP_MySQL_Lexer::BINARY_SYMBOL === $tokens[1]->id; + + if ( true === $is_binary ) { + $children = $node->get_children(); + return sprintf( + "GLOB _helper_like_to_glob_pattern(%s)", + $this->translate( $children[1] ) + ); + } + return $this->translate_sequence( $node->get_children() ); + } + private function translate_token( WP_MySQL_Token $token ) { switch ( $token->id ) { case WP_MySQL_Lexer::EOF: @@ -1346,6 +1391,13 @@ private function translate_token( WP_MySQL_Token $token ) { return '"' . trim( $token->value, '`"' ) . '"'; case WP_MySQL_Lexer::AUTO_INCREMENT_SYMBOL: return 'AUTOINCREMENT'; + case WP_MySQL_Lexer::BINARY_SYMBOL: + /* + * There is no "BINARY expr" equivalent in SQLite. We can look for + * the BINARY keyword in particular cases (with REGEXP, LIKE, etc.) + * and then remove it from the translated output here. + */ + return null; default: return $token->value; } diff --git a/wp-includes/sqlite/class-wp-sqlite-pdo-user-defined-functions.php b/wp-includes/sqlite/class-wp-sqlite-pdo-user-defined-functions.php index 6f0d83df..bcb971a1 100644 --- a/wp-includes/sqlite/class-wp-sqlite-pdo-user-defined-functions.php +++ b/wp-includes/sqlite/class-wp-sqlite-pdo-user-defined-functions.php @@ -85,6 +85,8 @@ public function __construct( $pdo ) { 'utc_time' => 'utc_time', 'utc_timestamp' => 'utc_timestamp', 'version' => 'version', + + '_helper_like_to_glob_pattern' => '_helper_like_to_glob_pattern', ); /** @@ -759,4 +761,74 @@ public function utc_timestamp() { public function version() { return '5.5'; } + + /** + * A helper to covert LIKE pattern to a GLOB pattern for "LIKE BINARY" support. + * + * To convert LIKE pattern to a GLOB pattern, we need to apply the following + * steps, while exactly maintaining the listed order: + * + * 1. Escape "]" as "[]]" to avoid interpreting "[...]" as a character class. + * 2. Escape "*" as "[*]" (must be after 1 to avoid being escaped). + * 3. Escape "?" as "[?]" (must be after 1 to avoid being escaped). + * 4. Unescape "\\" to "\" (must be before . + * 5. Replace "%" with "*" (when not escaped by "\"). + * 6. Replace "_" with "?" (when not escaped by "\"). + * 7. Unescape "\%" to "%". + * 8. Unescape "\_" to "_". + * + * @TODO: Unescaping backshlashes for a MySQL LIKE is actually more complex + * due to a bug: https://bugs.mysql.com/bug.php?id=84118 + * We can consider implementing compatibility with the bug, but it + * affects all LIKE patterns, not just LIKE BINARY. + * + * @param string $pattern + * @return string + */ + public function _helper_like_to_glob_pattern( $pattern ) { + if ( null === $pattern ) { + return null; + } + + $pattern = str_replace( ']', '[]]', $pattern ); + $pattern = str_replace( '*', '[*]', $pattern ); + $pattern = str_replace( '?', '[?]', $pattern ); + + $glob_pattern = ''; + for ( $i = 0; $i < strlen( $pattern ); $i += 1 ) { + $byte1 = $pattern[ $i ]; + if ( '\\' === $byte1 ) { + // Add the escape character. + $glob_pattern .= $byte1; + + // Special case: "\\%" and "\\_" are equivalent to "\%" and "\_". + // In such case, we need to skip the extra backslash. + $byte2 = $pattern[ $i + 1 ] ?? null; + $byte3 = $pattern[ $i + 2 ] ?? null; + if ( '\\' === $byte2 && ( '%' === $byte3 || '_' === $byte3 ) ) { + $glob_pattern .= $byte3; + $i += 2; + continue; + } + + // We're in an escape sequence. Add the next character as it is. + $glob_pattern .= $byte2; + $i += 1; + } elseif ( '%' === $byte1 ) { + $glob_pattern .= '*'; + } elseif ( '_' === $byte1 ) { + $glob_pattern .= '?'; + } else { + $glob_pattern .= $byte1; + } + } + + // 1. Unescape C-style escape sequences. + $glob_pattern = stripcslashes($glob_pattern); + + // 2. Unescape LIKE escape sequences. + $glob_pattern = preg_replace('/\\\\(.)/', '$1', $glob_pattern); + + return $glob_pattern; + } }