diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..c48b1a1 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,62 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Changed + +- Merge pull request #4 from bnkc/dependabot/cargo/pyo3-0.22.4 +- Bump pyo3 from 0.22.2 to 0.22.4 +- Changing toolchain +- Completed domain deliverability +- Restructuring project +- Update README.md + +## [1.0.0] - 2024-08-08 + +### Added + +- Add files via upload + +### Changed + +- Updating toml +- Renaming to emval +- Update README.md +- Update README.md +- Update benchmark.py +- Delete benchmark_results.svg +- Delete test.py +- Update README.md +- Delete perf.png +- Began new benchmarking +- Flattened out ValidatedEmail, working on valid tests +- Update README.md +- Update README.md +- Update README.md +- Create LICENSE +- Update README.md +- Update README.md +- Starting readme +- Close to completeing invalid syntax testing +- Deliverable flag and more invalid email tests +- Better PyError handling +- Python bindings +- Preparing for python binding migration +- Refactored tests, added original and normalized email to struct +- Validate special domains +- Setup case testing, passing test suite +- Still working on quoted local part tests +- Quoted local part tests pass +- Domain literals completed +- Working on fixing domain testing and validation +- Benchmarking +- Initial commit for new project structure + +[unreleased]: https://github.com/bnkc/emval/compare/v1.0.0..HEAD + + diff --git a/Cargo.lock b/Cargo.lock index 15bf6f7..d5bd934 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,21 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 + +[[package]] +name = "addr2line" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" [[package]] name = "aho-corasick" @@ -11,11 +26,43 @@ dependencies = [ "memchr", ] +[[package]] +name = "async-trait" +version = "0.1.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "autocfg" -version = "1.3.0" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "backtrace" +version = "0.3.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets 0.52.6", +] + +[[package]] +name = "bitflags" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" [[package]] name = "byteorder" @@ -23,12 +70,24 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +[[package]] +name = "bytes" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b" + [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "data-encoding" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8566979429cf69b49a5c740c60791108e86440e8be149bbea4fe54d2c32d6e2" + [[package]] name = "displaydoc" version = "0.2.5" @@ -44,26 +103,48 @@ dependencies = [ name = "emval" version = "0.1.3" dependencies = [ - "idna", + "idna 1.0.3", "lazy_static", "pyo3", "regex", "rstest", + "trust-dns-resolver", "unicode-properties", "unicode_names2", ] +[[package]] +name = "enum-as-inner" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1e6a265c649f3f5979b601d26f1d05ada116434c87741c9493cb56218f76cbc" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "equivalent" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + [[package]] name = "futures" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" dependencies = [ "futures-channel", "futures-core", @@ -76,9 +157,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" dependencies = [ "futures-core", "futures-sink", @@ -86,15 +167,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" [[package]] name = "futures-executor" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" dependencies = [ "futures-core", "futures-task", @@ -103,15 +184,15 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" [[package]] name = "futures-macro" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", @@ -120,15 +201,15 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" [[package]] name = "futures-task" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" [[package]] name = "futures-timer" @@ -138,9 +219,9 @@ checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" [[package]] name = "futures-util" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" dependencies = [ "futures-channel", "futures-core", @@ -174,6 +255,12 @@ dependencies = [ "wasi", ] +[[package]] +name = "gimli" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" + [[package]] name = "glob" version = "0.3.1" @@ -182,9 +269,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "hashbrown" -version = "0.14.5" +version = "0.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" [[package]] name = "heck" @@ -192,6 +279,23 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + +[[package]] +name = "hostname" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c731c3e10504cc8ed35cfe2f1db4c9274c3d35fa486e3b31df46f068ef3e867" +dependencies = [ + "libc", + "match_cfg", + "winapi", +] + [[package]] name = "icu_collections" version = "1.5.0" @@ -312,21 +416,40 @@ dependencies = [ [[package]] name = "idna" -version = "1.0.2" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd69211b9b519e98303c015e21a007e293db403b6c85b9b124e133d25e242cdd" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" dependencies = [ - "icu_normalizer", - "icu_properties", + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "idna" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +dependencies = [ + "idna_adapter", "smallvec", "utf8_iter", ] +[[package]] +name = "idna_adapter" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + [[package]] name = "indexmap" -version = "2.2.6" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" dependencies = [ "equivalent", "hashbrown", @@ -338,6 +461,24 @@ version = "2.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" +[[package]] +name = "ipconfig" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b58db92f96b720de98181bbbe63c831e87005ab460c1bf306eb2622b4707997f" +dependencies = [ + "socket2", + "widestring", + "windows-sys 0.48.0", + "winreg", +] + +[[package]] +name = "ipnet" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708" + [[package]] name = "lazy_static" version = "1.5.0" @@ -346,15 +487,31 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.155" +version = "0.2.166" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2ccc108bbc0b1331bd061864e7cd823c0cab660bbe6970e66e2c0614decde36" + +[[package]] +name = "linked-hash-map" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" [[package]] name = "litemap" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" +checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] [[package]] name = "log" @@ -362,6 +519,21 @@ version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +[[package]] +name = "lru-cache" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31e24f1ad8321ca0e8a1e0ac13f23cb668e6f5466c2c57319f6a5cf1cc8e3b1c" +dependencies = [ + "linked-hash-map", +] + +[[package]] +name = "match_cfg" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4" + [[package]] name = "memchr" version = "2.7.4" @@ -377,11 +549,70 @@ dependencies = [ "autocfg", ] +[[package]] +name = "miniz_oxide" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +dependencies = [ + "adler2", +] + +[[package]] +name = "mio" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" +dependencies = [ + "hermit-abi", + "libc", + "wasi", + "windows-sys 0.52.0", +] + +[[package]] +name = "object" +version = "0.36.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e" +dependencies = [ + "memchr", +] + [[package]] name = "once_cell" -version = "1.19.0" +version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets 0.52.6", +] + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "phf" @@ -423,9 +654,9 @@ dependencies = [ [[package]] name = "pin-project-lite" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" +checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" [[package]] name = "pin-utils" @@ -435,42 +666,42 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "portable-atomic" -version = "1.6.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" +checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" [[package]] name = "ppv-lite86" -version = "0.2.18" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dee4364d9f3b902ef14fab8a1ddffb783a1cb6b4bba3bfc1fa3922732c7de97f" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" dependencies = [ "zerocopy", ] [[package]] name = "proc-macro-crate" -version = "3.1.0" +version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d37c51ca738a55da99dc0c4a34860fd675453b8b36209178c2249bb13651284" +checksum = "8ecf48c7ca261d60b74ab1a7b20da18bede46776b2e55535cb958eb595c5fa7b" dependencies = [ "toml_edit", ] [[package]] name = "proc-macro2" -version = "1.0.86" +version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" dependencies = [ "unicode-ident", ] [[package]] name = "pyo3" -version = "0.22.4" +version = "0.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00e89ce2565d6044ca31a3eb79a334c3a79a841120a98f64eea9f579564cb691" +checksum = "f54b3d09cbdd1f8c20650b28e7b09e338881482f4aa908a5f61a00c98fba2690" dependencies = [ "cfg-if", "indoc", @@ -486,9 +717,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.22.4" +version = "0.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8afbaf3abd7325e08f35ffb8deb5892046fcb2608b703db6a583a5ba4cea01e" +checksum = "3015cf985888fe66cfb63ce0e321c603706cd541b7aec7ddd35c281390af45d8" dependencies = [ "once_cell", "target-lexicon", @@ -496,9 +727,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.22.4" +version = "0.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec15a5ba277339d04763f4c23d85987a5b08cbb494860be141e6a10a8eb88022" +checksum = "6fca7cd8fd809b5ac4eefb89c1f98f7a7651d3739dfb341ca6980090f554c270" dependencies = [ "libc", "pyo3-build-config", @@ -506,9 +737,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.22.4" +version = "0.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15e0f01b5364bcfbb686a52fc4181d412b708a68ed20c330db9fc8d2c2bf5a43" +checksum = "34e657fa5379a79151b6ff5328d9216a84f55dc93b17b08e7c3609a969b73aa0" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -518,9 +749,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.22.4" +version = "0.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a09b550200e1e5ed9176976d0060cbc2ea82dc8515da07885e7b8153a85caacb" +checksum = "295548d5ffd95fd1981d2d3cf4458831b21d60af046b729b6fd143b0ba7aee2f" dependencies = [ "heck", "proc-macro2", @@ -529,11 +760,17 @@ dependencies = [ "syn", ] +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + [[package]] name = "quote" -version = "1.0.36" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" dependencies = [ "proc-macro2", ] @@ -568,11 +805,20 @@ dependencies = [ "getrandom", ] +[[package]] +name = "redox_syscall" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" +dependencies = [ + "bitflags", +] + [[package]] name = "regex" -version = "1.10.5" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", @@ -582,9 +828,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.7" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", @@ -593,9 +839,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "relative-path" @@ -603,6 +849,16 @@ version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" +[[package]] +name = "resolv-conf" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52e44394d2086d010551b14b53b1f24e31647570cd1deb0379e2c21b329aba00" +dependencies = [ + "hostname", + "quick-error", +] + [[package]] name = "rstest" version = "0.21.0" @@ -633,15 +889,27 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + [[package]] name = "rustc_version" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" dependencies = [ "semver", ] +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + [[package]] name = "semver" version = "1.0.23" @@ -650,18 +918,18 @@ checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" [[package]] name = "serde" -version = "1.0.204" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" +checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.204" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" +checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" dependencies = [ "proc-macro2", "quote", @@ -689,6 +957,16 @@ version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +[[package]] +name = "socket2" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" @@ -697,9 +975,9 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" [[package]] name = "syn" -version = "2.0.71" +version = "2.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b146dcf730474b4bcd16c311627b31ede9ab149045db4d6088b3becaea046462" +checksum = "44d46482f1c1c87acd84dea20c1bf5ebff4c757009ed6bf19cfd36fb10e92c4e" dependencies = [ "proc-macro2", "quote", @@ -719,9 +997,29 @@ dependencies = [ [[package]] name = "target-lexicon" -version = "0.12.15" +version = "0.12.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4873307b7c257eddcb50c9bedf158eb669578359fb28428bef438fec8e6ba7c2" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] name = "tinystr" @@ -733,46 +1031,168 @@ dependencies = [ "zerovec", ] +[[package]] +name = "tinyvec" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokio" +version = "1.41.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cfb5bee7a6a52939ca9224d6ac897bb669134078daa8735560897f69de4d33" +dependencies = [ + "backtrace", + "bytes", + "libc", + "mio", + "pin-project-lite", + "socket2", + "windows-sys 0.52.0", +] + [[package]] name = "toml_datetime" -version = "0.6.6" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4badfd56924ae69bcc9039335b2e017639ce3f9b001c393c1b2d1ef846ce2cbf" +checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" [[package]] name = "toml_edit" -version = "0.21.1" +version = "0.22.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a8534fd7f78b5405e860340ad6575217ce99f38d4d5c8f2442cb5ecb50090e1" +checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" dependencies = [ "indexmap", "toml_datetime", "winnow", ] +[[package]] +name = "tracing" +version = "0.1.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" +dependencies = [ + "once_cell", +] + +[[package]] +name = "trust-dns-proto" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3119112651c157f4488931a01e586aa459736e9d6046d3bd9105ffb69352d374" +dependencies = [ + "async-trait", + "cfg-if", + "data-encoding", + "enum-as-inner", + "futures-channel", + "futures-io", + "futures-util", + "idna 0.4.0", + "ipnet", + "once_cell", + "rand", + "smallvec", + "thiserror", + "tinyvec", + "tokio", + "tracing", + "url", +] + +[[package]] +name = "trust-dns-resolver" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a3e6c3aff1718b3c73e395d1f35202ba2ffa847c6a62eea0db8fb4cfe30be6" +dependencies = [ + "cfg-if", + "futures-util", + "ipconfig", + "lru-cache", + "once_cell", + "parking_lot", + "rand", + "resolv-conf", + "smallvec", + "thiserror", + "tokio", + "tracing", + "trust-dns-proto", +] + +[[package]] +name = "unicode-bidi" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ab17db44d7388991a428b2ee655ce0c212e862eff1768a455c58f9aad6e7893" + [[package]] name = "unicode-ident" -version = "1.0.12" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" + +[[package]] +name = "unicode-normalization" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" +dependencies = [ + "tinyvec", +] [[package]] name = "unicode-properties" -version = "0.1.1" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4259d9d4425d9f0661581b804cb85fe66a4c631cadd8f490d1c13a35d5d9291" +checksum = "e70f2a8b45122e719eb623c01822704c4e0907e7e426a05927e1a1cfff5b75d0" [[package]] name = "unicode-width" -version = "0.1.13" +version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" [[package]] name = "unicode_names2" -version = "1.2.2" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "addeebf294df7922a1164f729fb27ebbbcea99cc32b3bf08afab62757f707677" +checksum = "d1673eca9782c84de5f81b82e4109dcfb3611c8ba0d52930ec4a9478f547b2dd" dependencies = [ "phf", "unicode_names2_generator", @@ -780,9 +1200,9 @@ dependencies = [ [[package]] name = "unicode_names2_generator" -version = "1.2.2" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f444b8bba042fe3c1251ffaca35c603f2dc2ccc08d595c65a8c4f76f3e8426c0" +checksum = "b91e5b84611016120197efd7dc93ef76774f4e084cd73c9fb3ea4a86c570c56e" dependencies = [ "getopts", "log", @@ -796,6 +1216,17 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" +[[package]] +name = "url" +version = "2.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" +dependencies = [ + "form_urlencoded", + "idna 1.0.3", + "percent-encoding", +] + [[package]] name = "utf16_iter" version = "1.0.5" @@ -814,15 +1245,192 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "widestring" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7219d36b6eac893fa81e84ebe06485e7dcbb616177469b142df14f1f4deb1311" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + [[package]] name = "winnow" -version = "0.5.40" +version = "0.6.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" +checksum = "36c1fec1a2bb5866f07c25f68c26e565c4c200aebb96d7e55710c19d3e8ac49b" dependencies = [ "memchr", ] +[[package]] +name = "winreg" +version = "0.50.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + [[package]] name = "write16" version = "1.0.0" @@ -837,9 +1445,9 @@ checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" [[package]] name = "yoke" -version = "0.7.4" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" +checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" dependencies = [ "serde", "stable_deref_trait", @@ -849,9 +1457,9 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.7.4" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" +checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" dependencies = [ "proc-macro2", "quote", @@ -861,9 +1469,9 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.6.6" +version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "854e949ac82d619ee9a14c66a1b674ac730422372ccb759ce0c39cabcf2bf8e6" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ "byteorder", "zerocopy-derive", @@ -871,9 +1479,9 @@ dependencies = [ [[package]] name = "zerocopy-derive" -version = "0.6.6" +version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "125139de3f6b9d625c39e2efdd73d41bdac468ccd556556440e322be0e1bbd91" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", @@ -882,18 +1490,18 @@ dependencies = [ [[package]] name = "zerofrom" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" +checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" dependencies = [ "zerofrom-derive", ] [[package]] name = "zerofrom-derive" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" +checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 5b10bb1..7109675 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ name = "_emval" crate-type = ["cdylib"] [dependencies] -pyo3 = "0.22.4" +pyo3 = "0.23.0" # emval dependencies idna = "1.0.2" @@ -19,3 +19,4 @@ regex = "1.10.5" unicode-properties = "0.1.1" rstest = "0.21.0" unicode_names2 = "1.2.2" +trust-dns-resolver = "0.23.2" diff --git a/cliff.toml b/cliff.toml new file mode 100644 index 0000000..2308193 --- /dev/null +++ b/cliff.toml @@ -0,0 +1,71 @@ +# git-cliff ~ configuration file +# https://git-cliff.org/docs/configuration + +[changelog] +# template for the changelog header +header = """ +# Changelog\n +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).\n +""" +# template for the changelog body +# https://keats.github.io/tera/docs/#introduction +body = """ +{% if version -%} + ## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }} +{% else -%} + ## [Unreleased] +{% endif -%} +{% for group, commits in commits | group_by(attribute="group") %} + ### {{ group | upper_first }} + {% for commit in commits %} + - {{ commit.message | split(pat="\n") | first | upper_first | trim }}\ + {% endfor %} +{% endfor %}\n +""" +# template for the changelog footer +footer = """ +{% for release in releases -%} + {% if release.version -%} + {% if release.previous.version -%} + [{{ release.version | trim_start_matches(pat="v") }}]: \ + https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }}\ + /compare/{{ release.previous.version }}..{{ release.version }} + {% endif -%} + {% else -%} + [unreleased]: https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }}\ + /compare/{{ release.previous.version }}..HEAD + {% endif -%} +{% endfor %} + +""" +# remove the leading and trailing whitespace from the templates +trim = true + +[git] +# parse the commits based on https://www.conventionalcommits.org +conventional_commits = true +# filter out the commits that are not conventional +filter_unconventional = false +# regex for parsing and grouping commits +commit_parsers = [ + { message = "^[a|A]dd", group = "Added" }, + { message = "^[s|S]upport", group = "Added" }, + { message = "^[r|R]emove", group = "Removed" }, + { message = "^.*: add", group = "Added" }, + { message = "^.*: support", group = "Added" }, + { message = "^.*: remove", group = "Removed" }, + { message = "^.*: delete", group = "Removed" }, + { message = "^test", group = "Fixed" }, + { message = "^fix", group = "Fixed" }, + { message = "^.*: fix", group = "Fixed" }, + { message = "^.*", group = "Changed" }, +] +# filter out the commits that are not matched by commit parsers +filter_commits = false +# sort the tags topologically +topo_order = false +# sort the commits inside sections by oldest/newest order +sort_commits = "newest" diff --git a/emval/model.py b/emval/model.py index 868a74a..5abf41e 100644 --- a/emval/model.py +++ b/emval/model.py @@ -1,4 +1,4 @@ -from typing import Optional, Dict, Any +from typing import Any class ValidatedEmail: @@ -11,11 +11,15 @@ class ValidatedEmail: local_part (str): The local part of the email address (the part before the '@' sign) after it has been Unicode normalized. domain_name (str): The domain part of the email address (the part after the '@' sign) after Unicode normalization. domain_address (Optional[str]): If the domain part is a domain literal, it will be an IPv4Address or IPv6Address object. + is_deliverable (bool): Whether the email address is deliverable. Methods: __repr__() -> str: Returns a string representation of the ValidatedEmail instance, displaying all its attributes. + __eq__(other: Any) -> bool: + Compares two ValidatedEmail instances for equality. + as_dict() -> Dict[str, Any]: Returns a dictionary representation of the ValidatedEmail instance. If the domain_address is present, it is converted to a string. """ @@ -26,21 +30,23 @@ def __init__( normalized: str, local_part: str, domain_name: str, - domain_address: Optional[str] = None, + domain_address: str | None = None, + is_deliverable: bool = True, ): self.original = original self.normalized = normalized self.local_part = local_part self.domain_name = domain_name self.domain_address = domain_address + self.is_deliverable = is_deliverable def __repr__(self) -> str: return ( f"ValidatedEmail(original={self.original}, normalized={self.normalized}, " - f"local_part={self.local_part}, domain_name={self.domain_name}, domain_address={self.domain_address})" + f"local_part={self.local_part}, domain_name={self.domain_name}, domain_address={self.domain_address}, is_deliverable={self.is_deliverable})" ) - def __eq__(self, other) -> bool: + def __eq__(self, other: object) -> bool: if isinstance(other, ValidatedEmail): return ( self.original == other.original @@ -48,10 +54,11 @@ def __eq__(self, other) -> bool: and self.local_part == other.local_part and self.domain_name == other.domain_name and self.domain_address == other.domain_address + and self.is_deliverable == other.is_deliverable ) return False - def as_dict(self) -> Dict[str, Any]: + def as_dict(self) -> dict[str, Any]: d = self.__dict__ if d.get("domain_address"): d["domain_address"] = repr(d["domain_address"]) diff --git a/emval/validator.py b/emval/validator.py index abcf4ee..d2cd7c0 100644 --- a/emval/validator.py +++ b/emval/validator.py @@ -1,7 +1,9 @@ from typing import Union -from .model import ValidatedEmail + from emval import _emval +from .model import ValidatedEmail + class EmailValidator: """ @@ -47,11 +49,6 @@ def validate_email(self, email: Union[str, bytes]) -> ValidatedEmail: Returns: A ValidatedEmail instance if the email is valid. - - Raises: - SyntaxError: If the email syntax is invalid. - DomainLiteralError: If domain literals are not allowed. - LengthError: If the email length exceeds the maximum allowed length. """ validated_email = self._emval.validate_email(email) return ValidatedEmail( @@ -60,6 +57,7 @@ def validate_email(self, email: Union[str, bytes]) -> ValidatedEmail: local_part=validated_email.local_part, domain_name=validated_email.domain_name, domain_address=validated_email.domain_address, + is_deliverable=validated_email.is_deliverable, ) diff --git a/pyproject.toml b/pyproject.toml index bb279e4..ddc5d6e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ authors = [ ] description = "emval is a blazingly fast email validator" readme = "README.md" -requires-python = ">=3.8" +requires-python = ">=3.10" classifiers = [ "License :: OSI Approved :: MIT License", "Intended Audience :: Developers", diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 0000000..5d56faf --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,2 @@ +[toolchain] +channel = "nightly" diff --git a/src/consts.rs b/src/consts.rs new file mode 100644 index 0000000..94537de --- /dev/null +++ b/src/consts.rs @@ -0,0 +1,55 @@ +use std::usize; + +use regex::bytes::Regex; + +lazy_static! { + // See https://www.rfc-editor.org/rfc/rfc5322.html#section-3.2.3 + pub static ref ATEXT: &'static str = r"a-zA-Z0-9_!#\$%&'\*\+\-/=\?\^`\{\|\}~"; + pub static ref ATEXT_RE: Regex = Regex::new(&format!(r"[.{}]", *ATEXT)).unwrap(); + pub static ref DOT_ATOM_TEXT: Regex = Regex::new(&format!(r"^[{}]+(?:\.[{}]+)*$", *ATEXT, *ATEXT)).unwrap(); + + // RFC 6531 3.3 extends allowed characters in internationalized addresses + static ref ATEXT_INTL: String = format!("{}{}", *ATEXT, r"\u{0080}-\u{10FFFF}"); + pub static ref ATEXT_INTL_DOT_RE: Regex = Regex::new(&format!(r"[.{}]", *ATEXT_INTL)).unwrap(); + pub static ref DOT_ATOM_TEXT_INTL: Regex = + Regex::new(&format!(r"^[{}]+(?:\.[{}]+)*$", *ATEXT_INTL, *ATEXT_INTL)).unwrap(); + + // The domain part of the email address, after IDNA (ASCII) encoding, + // must also satisfy the requirements of RFC 952/RFC 1123 2.1 + pub static ref ATEXT_HOSTNAME_INTL: Regex = Regex::new(r"^[a-zA-Z0-9\-\.\u{0080}-\u{10FFFF}]+$").unwrap(); + pub static ref HOSTNAME_LABEL: &'static str = r"(?:(?:[a-zA-Z0-9][a-zA-Z0-9\-]*)?[a-zA-Z0-9])"; + pub static ref DOT_ATOM_TEXT_HOSTNAME: Regex = + Regex::new(&format!(r"^{}(?:\.{})*$", *HOSTNAME_LABEL, *HOSTNAME_LABEL)).unwrap(); + pub static ref DOMAIN_NAME_REGEX: Regex = Regex::new(r"[A-Za-z]\z").unwrap(); + + // Domain literal (RFC 5322 3.4.1) + pub static ref DOMAIN_LITERAL_CHARS: Regex = Regex::new(r"[\u0021-\u00FA\u005E-\u007E]").unwrap(); + + // See https://www.rfc-editor.org/rfc/rfc5321.html#section-4.1.2 + pub static ref QTEXT_INTL: Regex = Regex::new(r"[\u0020-\u007E\u0080-\u{10FFFF}]").unwrap(); + pub static ref DNS_LABEL_REGEX: Regex = Regex::new(r"(?i)^.{2}--").unwrap(); +} + +pub const MAX_ADDRESS_LENGTH: usize = 254; +pub const MAX_DOMAIN_LENGTH: usize = 253; +pub const MAX_LOCAL_PART_LENGTH: usize = 64; +pub const MAX_DNS_LABEL_LENGTH: usize = 63; +pub const SPECIAL_USE_DOMAIN_NAMES: &[&str] = + &["arpa", "invalid", "local", "localhost", "onion", "test"]; +pub const CASE_INSENSITIVE_MAILBOX_NAMES: &[&str] = &[ + "info", + "marketing", + "sales", + "support", + "abuse", + "noc", + "security", + "postmaster", + "hostmaster", + "usenet", + "news", + "webmaster", + "www", + "uucp", + "ftp", +]; diff --git a/src/errors.rs b/src/errors.rs new file mode 100644 index 0000000..ab27bbc --- /dev/null +++ b/src/errors.rs @@ -0,0 +1,17 @@ +use pyo3::exceptions::{PySyntaxError, PyValueError}; +use pyo3::prelude::*; + +#[derive(Debug)] +pub enum ValidationError { + SyntaxError(String), + ValueError(String), +} + +impl From for PyErr { + fn from(err: ValidationError) -> Self { + match err { + ValidationError::SyntaxError(msg) => PySyntaxError::new_err(msg), + ValidationError::ValueError(msg) => PyValueError::new_err(msg), + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 2e79146..3295a90 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,983 +1,17 @@ +#![feature(ip)] #[macro_use] extern crate lazy_static; +mod consts; +mod errors; +mod models; +mod validators; -use std::collections::HashSet; -use std::net::IpAddr; -use std::str::FromStr; -use std::usize; - -use idna::uts46::Uts46; -use idna::uts46::{AsciiDenyList, DnsLength, Hyphens}; -use pyo3::exceptions::{PySyntaxError, PyValueError}; use pyo3::prelude::*; -use regex::bytes::Regex; -use unicode_properties::{GeneralCategoryGroup, UnicodeGeneralCategory}; - -lazy_static! { - // See https://www.rfc-editor.org/rfc/rfc5322.html#section-3.2.3 - static ref ATEXT: &'static str = r"a-zA-Z0-9_!#\$%&'\*\+\-/=\?\^`\{\|\}~"; - static ref ATEXT_RE: Regex = Regex::new(&format!(r"[.{}]", *ATEXT)).unwrap(); - static ref DOT_ATOM_TEXT: Regex = Regex::new(&format!(r"^[{}]+(?:\.[{}]+)*$", *ATEXT, *ATEXT)).unwrap(); - - // RFC 6531 3.3 extends allowed characters in internationalized addresses - static ref ATEXT_INTL: String = format!("{}{}", *ATEXT, r"\u{0080}-\u{10FFFF}"); - static ref ATEXT_INTL_DOT_RE: Regex = Regex::new(&format!(r"[.{}]", *ATEXT_INTL)).unwrap(); - static ref DOT_ATOM_TEXT_INTL: Regex = Regex::new(&format!(r"^[{}]+(?:\.[{}]+)*$", *ATEXT_INTL, *ATEXT_INTL)).unwrap(); - - // The domain part of the email address, after IDNA (ASCII) encoding, - // must also satisfy the requirements of RFC 952/RFC 1123 2.1 - static ref ATEXT_HOSTNAME_INTL: Regex = Regex::new(r"^[a-zA-Z0-9\-\.\u{0080}-\u{10FFFF}]+$").unwrap(); - static ref HOSTNAME_LABEL: &'static str = r"(?:(?:[a-zA-Z0-9][a-zA-Z0-9\-]*)?[a-zA-Z0-9])"; - static ref DOT_ATOM_TEXT_HOSTNAME: Regex = Regex::new(&format!(r"^{}(?:\.{})*$", *HOSTNAME_LABEL, *HOSTNAME_LABEL)).unwrap(); - static ref DOMAIN_NAME_REGEX: Regex = Regex::new(r"[A-Za-z]\z").unwrap(); - - // Domain literal (RFC 5322 3.4.1) - static ref DOMAIN_LITERAL_CHARS: Regex = Regex::new(r"[\u0021-\u00FA\u005E-\u007E]").unwrap(); - - // See https://www.rfc-editor.org/rfc/rfc5321.html#section-4.1.2 - static ref QTEXT_INTL: Regex = Regex::new(r"[\u0020-\u007E\u0080-\u{10FFFF}]").unwrap(); - static ref DNS_LABEL_REGEX: Regex = Regex::new(r"(?i)^.{2}--").unwrap(); -} - -const MAX_ADDRESS_LENGTH: usize = 254; -const MAX_DOMAIN_LENGTH: usize = 253; -const MAX_LOCAL_PART_LENGTH: usize = 64; -const MAX_DNS_LABEL_LENGTH: usize = 63; -const SPECIAL_USE_DOMAIN_NAMES: &[&str] = - &["arpa", "invalid", "local", "localhost", "onion", "test"]; -const CASE_INSENSITIVE_MAILBOX_NAMES: &[&str] = &[ - "info", - "marketing", - "sales", - "support", - "abuse", - "noc", - "security", - "postmaster", - "hostmaster", - "usenet", - "news", - "webmaster", - "www", - "uucp", - "ftp", -]; - -#[pyclass] -struct ValidatedEmail { - #[pyo3(get)] - original: String, - #[pyo3(get)] - normalized: String, - #[pyo3(get)] - local_part: String, - #[pyo3(get)] - domain_address: Option, - #[pyo3(get)] - domain_name: String, -} - -#[derive(Default)] -#[pyclass] -struct EmailValidator { - allow_smtputf8: bool, - allow_empty_local: bool, - allow_quoted_local: bool, - allow_domain_literal: bool, - deliverable_address: bool, -} - -#[pymethods] -impl EmailValidator { - #[new] - #[pyo3(signature = ( - allow_smtputf8 = true, - allow_empty_local = false, - allow_quoted_local = false, - allow_domain_literal = false, - deliverable_address = true, - - ))] - fn new( - allow_smtputf8: bool, - allow_empty_local: bool, - allow_quoted_local: bool, - allow_domain_literal: bool, - deliverable_address: bool, - ) -> Self { - EmailValidator { - allow_smtputf8, - allow_empty_local, - allow_quoted_local, - allow_domain_literal, - deliverable_address, - } - } - - fn validate_email(&self, email: &str) -> PyResult { - // Split the email into local part and domain - let (unvalidated_local_part, unvalidated_domain) = _split_email(&email)?; - - // Validate length of the local part and the domain - _validate_email_length(&unvalidated_local_part, &unvalidated_domain)?; - - // Validate local part and convert to lowercase if necessary - let mut validated_local = self._validate_local_part(&unvalidated_local_part)?; - if CASE_INSENSITIVE_MAILBOX_NAMES.contains(&validated_local.to_lowercase().as_str()) { - validated_local = validated_local.to_lowercase(); - } - - // Validate the domain name and optional address - let (domain_name, domain_address) = self._validate_domain(&unvalidated_domain)?; - - // Construct the normalized email - let normalized = format!("{}@{}", validated_local, domain_name); - - Ok(ValidatedEmail { - original: email.to_string(), - local_part: validated_local, - domain_name, - domain_address, - normalized, - }) - } - - fn _validate_local_part(&self, local: &str) -> PyResult { - // Guard clause for empty local part - if local.is_empty() { - return if self.allow_empty_local { - Ok(local.to_string()) - } else { - Err(PySyntaxError::new_err( - "Invalid Local Part: The part before the '@' sign cannot be empty.", - )) - }; - } - - // Remove surrounding quotes, unescaping any escaped characters within quotes - let unquoted_local = _unquote_local_part(local, self.allow_quoted_local)?; - - // Local part length validation - if unquoted_local.len() > MAX_LOCAL_PART_LENGTH { - return Err(PyValueError::new_err( - "Invalid Local Part: The part before the '@' sign exceeds the maximum length (64 chars).", - )); - } - - // Check for valid dot-atom text - if DOT_ATOM_TEXT.is_match(unquoted_local.as_bytes()) { - return Ok(unquoted_local); - } - - // Check for valid internationalized dot-atom text - if DOT_ATOM_TEXT_INTL.is_match(unquoted_local.as_bytes()) { - if !self.allow_smtputf8 { - return Err(PySyntaxError::new_err( - "Invalid Local Part: Internationalized characters before the '@' sign are not supported.", - )); - } - _validate_chars(&unquoted_local, false)?; - - // Check for valid UTF-8 encoding - if String::from_utf8(unquoted_local.as_bytes().to_vec()).is_err() { - return Err(PySyntaxError::new_err( - "Invalid Local Part: Contains non-UTF-8 characters.", - )); - } - - return Ok(unquoted_local.to_string()); - } - - // Check for quoted local part and validate - if local.starts_with('"') && local.ends_with('"') { - let invalid_chars: HashSet<_> = local - .chars() - .filter(|&c| !QTEXT_INTL.is_match(c.to_string().as_bytes())) - .collect(); - - if !invalid_chars.is_empty() { - return Err(PySyntaxError::new_err( - "Invalid Local Part: contains invalid characters within quoted local part before the '@' sign.", - )); - } - - let invalid_non_ascii_chars: HashSet<_> = local - .chars() - .filter(|&c| !(32..=126).contains(&(c as u32))) - .collect(); - - if !invalid_non_ascii_chars.is_empty() && !self.allow_smtputf8 { - return Err(PySyntaxError::new_err( - "Invalid Local Part: Internationalized characters before the '@' sign are not supported." - )); - } - - _validate_chars(&unquoted_local, true)?; - - // Check for valid UTF-8 encoding - if String::from_utf8(local.as_bytes().to_vec()).is_err() { - return Err(PySyntaxError::new_err( - "Invalid Local Part: Contains non-UTF-8 characters.", - )); - } - - return Ok(local.to_string()); - } - - // Check for other invalid characters - let invalid_chars: HashSet<_> = unquoted_local - .chars() - .filter(|&c| !ATEXT_INTL_DOT_RE.is_match(c.to_string().as_bytes())) - .collect(); - - if !invalid_chars.is_empty() { - return Err(PySyntaxError::new_err( - "Invalid Local Part: contains invalid characters before the '@' sign.", - )); - } - - // Validates the local part of an email address based on RFC 952, RFC 1123, and RFC 5322. - // Each label must have at least one character and cannot start or end with dashes or periods. - // Consecutive periods and adjacent period-hyphen combinations are also invalid. - _validate_email_label( - local, - "Invalid Local Part: Cannot start with a {}.", - "Invalid Local Part: A {} cannot immediately precede the '@' sign.", - true, - )?; - - Err(PySyntaxError::new_err( - "Invalid Local Part: contains invalid characters before the '@' sign.", - )) - } - - fn _validate_domain(&self, domain: &str) -> PyResult<(String, Option)> { - // Guard clause if domain is being executed independently - if domain.is_empty() { - return Err(PySyntaxError::new_err( - "Invalid Domain: The part after the '@' sign cannot be empty.", - )); - } - - // Address Literals - if domain.starts_with('[') && domain.ends_with(']') { - if !self.allow_domain_literal { - return Err(PyValueError::new_err( - "Invalid Domain: A bracketed IP address after the '@' sign is not permitted.", - )); - } - - let domain_literal = &domain[1..domain.len() - 1]; - - // Handle IPv6 addresses - if domain_literal.starts_with("IPv6:") { - let ipv6_literal = &domain_literal[5..]; - let addr = IpAddr::from_str(ipv6_literal).map_err(|_| { - PySyntaxError::new_err( - "Invalid Domain: The IPv6 address in brackets following the '@' symbol is not valid.", - ) - })?; - if let IpAddr::V6(addr) = addr { - return Ok((format!("[IPv6:{}]", addr), Some(IpAddr::V6(addr)))); - } - } - - // Try to parse the domain literal as an IP address (either IPv4 or IPv6) - let addr = IpAddr::from_str(domain_literal).map_err(|_| { - PySyntaxError::new_err("Invalid Domain: The address in brackets following the '@' sign is not a valid IP address.") - })?; - - let name = match addr { - IpAddr::V4(_) => format!("[{}]", addr), - IpAddr::V6(_) => format!("[IPv6:{}]", addr), - }; - - return Ok((name, Some(addr))); - } - - // Check for invalid characters in the domain part - if !ATEXT_HOSTNAME_INTL.is_match(domain.as_bytes()) { - return Err(PySyntaxError::new_err( - "Invalid Domain: Contains invalid characters after '@' sign.", - )); - } - - // Check for unsafe characters - _validate_chars(domain, false)?; - - // Normalize the domain using UTS-46 - let normalized_domain = Uts46::new() - .to_ascii( - domain.as_bytes(), - AsciiDenyList::URL, - Hyphens::Allow, - DnsLength::Verify, - ) - .map_err(|_| { - PySyntaxError::new_err( - "Invalid Domain: Contains invalid characters after '@' sign post Unicode normalization.", - ) - })?; - - // Check for invalid chars after normalization - if !ATEXT_HOSTNAME_INTL.is_match(normalized_domain.as_bytes()) { - return Err(PySyntaxError::new_err( - "Invalid Domain: Contains invalid characters after Unicode normalization.", - )); - } - - // Validates the domain part of an email address based on RFC 952, RFC 1123, and RFC 5322. - // Each label must have at least one character and cannot start or end with dashes or periods. - // Consecutive periods and adjacent period-hyphen combinations are also invalid. - _validate_email_label( - &normalized_domain, - "Invalid Domain: A {} cannot immediately follow the '@' symbol.", - "Invalid Domain: A {} cannot appear at the end of the domain.", - true, - )?; - - // Check the total length of the domain - if normalized_domain.len() > MAX_DOMAIN_LENGTH { - return Err(PyValueError::new_err( - "Invalid Domain: Exceeds the maximum length (253 chars).", - )); - } - - // Check for invalid domain labels - for label in normalized_domain.split('.') { - if label.len() > MAX_DNS_LABEL_LENGTH { - return Err(PyValueError::new_err( - "Invalid Label: Exceeds the maximum length (63 chars).", - )); - } - - if label.is_empty() { - return Err(PySyntaxError::new_err( - "Invalid Label: The Label cannot be empty.", - )); - } - - // Check for two letters followed by two dashes - if DNS_LABEL_REGEX.is_match(label.as_bytes()) - && !label.to_lowercase().starts_with("xn--") - { - return Err(PySyntaxError::new_err( - "Invalid Domain: Two letters followed by two dashes ('--') are not allowed immediately after the '@' sign or a period.", - )); - } - } - - if self.deliverable_address { - // Deliverable addresses must contain atleast one period. - if !normalized_domain.contains(".") { - return Err(PySyntaxError::new_err( - "Invalid Domain: Must contain a period ('.') to be considered valid.", - )); - } - - // TLDs must end with a letter. - if !DOMAIN_NAME_REGEX.is_match(normalized_domain.as_bytes()) { - return Err(PySyntaxError::new_err( - "Invalid domain: The part after the '@' sign does not belong to a valid top-level domain (TLD).", - )); - } - } - - // Check for reserved and "special use" domains - for &special_domain in SPECIAL_USE_DOMAIN_NAMES { - if normalized_domain == special_domain - || normalized_domain.ends_with(&format!(".{}", special_domain)) - { - return Err(PySyntaxError::new_err( - "Invalid Domain: The part after the '@' sign is a reserved or special-use domain that cannot be used.", - )); - } - } - Ok((normalized_domain.to_string(), None)) - } -} - -fn _unquote_local_part(local: &str, allow_quoted: bool) -> Result { - if local.starts_with('"') && local.ends_with('"') { - // Check that the quoted local part is allowed, otherwise raise exception - if !allow_quoted { - return Err(PySyntaxError::new_err( - "Invalid Local Part: Quoting the local part before the '@' sign is not permitted in this context.", - )); - } - - let mut unquoted = String::new(); - let mut chars = local[1..local.len() - 1].chars(); - let mut escaped = false; - - while let Some(c) = chars.next() { - if escaped { - unquoted.push(c); - escaped = false; - } else if c == '\\' { - escaped = true; - } else { - unquoted.push(c); - } - } - - if escaped { - return Err(PySyntaxError::new_err( - "Invalid Local Part: Trailing escape character in the quoted local part before the '@' sign.", - )); - } - - Ok(unquoted) - } else { - Ok(local.to_string()) - } -} - -fn _split_email(email: &str) -> Result<(String, String), PyErr> { - let at_pos = email - .rfind('@') - .ok_or_else(|| PySyntaxError::new_err("Invalid Email Address: Missing an '@' sign."))?; - - let local_part = &email[..at_pos]; - let domain_part = &email[at_pos + 1..]; - - Ok((local_part.to_string(), domain_part.to_string())) -} - -fn _validate_email_length(local_part: &str, domain: &str) -> Result<(), PyErr> { - if local_part.len() + domain.len() + 1 > MAX_ADDRESS_LENGTH { - return Err(PyValueError::new_err( - "Invalid Email Address: The email exceeds the maximum length (254 chars).", - )); - } - Ok(()) -} - -fn _validate_email_label( - label: &str, - beg_descr: &str, - end_descr: &str, - is_hostname: bool, -) -> Result<(), PyErr> { - let errors = [ - (label.ends_with('.'), end_descr.replace("{}", "period")), - (label.starts_with('.'), beg_descr.replace("{}", "period")), - ( - label.contains(".."), - "Invalid Email Address: Two periods ('.') cannot be adjacent in the email address.".to_string(), - ), - ( - is_hostname && label.ends_with('-'), - end_descr.replace("{}", "hyphen ('-')"), - ), - ( - is_hostname && label.starts_with('-'), - beg_descr.replace("{}", "hyphen ('-')"), - ), - ( - is_hostname && (label.contains("-.") || label.contains(".-")), - "Invalid Email Address: A period ('.') and a hyphen ('-') cannot be adjacent in the email address.".to_string(), - ), - ]; - - for (condition, error) in errors.iter() { - if *condition { - return Err(PySyntaxError::new_err(error.clone())); - } - } - - Ok(()) -} - -fn _display_char(c: char) -> String { - // Return safely displayable characters in quotes. - if c == '\\' { - return format!("\"{}\"", c); - } - if c.is_alphanumeric() || c.is_ascii_punctuation() || c.is_ascii_whitespace() { - return format!("{:?}", c); - } - - // Construct a hex string in case the unicode name doesn't exist. - let hex = if c as u32 <= 0xFFFF { - format!("U+{:04X}", c as u32) - } else { - format!("U+{:08X}", c as u32) - }; - - // Return the character name or, if it has no name, the hex string. - if let Some(name) = unicode_names2::name(c) { - name.to_string() - } else { - hex - } -} - -fn _validate_chars(chars: &str, allow_space: bool) -> Result<(), PyErr> { - let mut bad_chars = HashSet::new(); - - for (i, c) in chars.chars().enumerate() { - let group = c.general_category_group(); - match group { - GeneralCategoryGroup::Letter - | GeneralCategoryGroup::Number - | GeneralCategoryGroup::Punctuation - | GeneralCategoryGroup::Symbol => { - continue; - } - GeneralCategoryGroup::Separator => { - // Spaces outside of the ASCII range. - if !allow_space { - bad_chars.insert(c); - } - } - GeneralCategoryGroup::Mark => { - // Combining characters in first position or after the @-sign. - if i == 0 { - bad_chars.insert(c); - } - } - GeneralCategoryGroup::Other => { - bad_chars.insert(c); - } - } - } - - if !bad_chars.is_empty() { - let mut sorted_bad_chars: Vec = bad_chars.iter().cloned().collect(); - sorted_bad_chars.sort_unstable(); - - let bad_chars_str = sorted_bad_chars - .iter() - .map(|c| _display_char(*c)) - .collect::>() - .join(", "); - - return Err(PySyntaxError::new_err(format!( - "Invalid Email Address: contains invalid characters: {}.", - bad_chars_str - ))); - } - - Ok(()) -} #[pymodule] fn _emval(_py: Python, m: &Bound) -> PyResult<()> { - m.add_class::()?; - m.add_class::()?; + m.add_class::()?; + m.add_class::()?; Ok(()) } - -#[cfg(test)] -mod tests { - use super::*; - use rstest::rstest; - use std::u8; - - // Helper functions - fn ipv4(octets: [u8; 4]) -> Option { - Some(IpAddr::V4(std::net::Ipv4Addr::new( - octets[0], octets[1], octets[2], octets[3], - ))) - } - - fn ipv6(addr: &str) -> Option { - Some(IpAddr::V6(std::net::Ipv6Addr::from_str(addr).unwrap())) - } - - #[rstest] - #[case("example@domain.com", Some("example@domain.com"))] - #[case( - "user.name+tag+sorting@example.com", - Some("user.name+tag+sorting@example.com") - )] - #[case("x@example.com", Some("x@example.com"))] - #[case( - "example-indeed@strange-example.com", - Some("example-indeed@strange-example.com") - )] - fn test_validate_email_valid(#[case] email: &str, #[case] expected: Option<&str>) { - let emval = EmailValidator::default(); - let result = emval.validate_email(email); - - match expected { - Some(expected_normalized) => { - assert!(result.is_ok()); - let validated_email = result.unwrap(); - assert_eq!(validated_email.normalized, expected_normalized); - } - None => { - assert!(result.is_err()); - } - } - } - - #[rstest] - #[case("plainaddress", None)] - #[case("@missing-local.org", None)] - #[case("missing-domain@.com", None)] - #[case("missing-at-sign.com", None)] - #[case("missing-tld@domain.", None)] - #[case("invalid-char@domain.c*m", None)] - #[case("too..many..dots@domain.com", None)] - fn test_validate_email_invalid(#[case] email: &str, #[case] expected: Option<&str>) { - let emval = EmailValidator::default(); - let result = emval.validate_email(email); - - match expected { - Some(expected_normalized) => { - assert!(result.is_ok()); - let validated_email = result.unwrap(); - assert_eq!(validated_email.normalized, expected_normalized); - } - None => { - assert!(result.is_err()); - } - } - } - - #[rstest] - #[case("POSTMASTER@example.com", Some("postmaster@example.com"))] - #[case("NOT-POSTMASTER@example.com", Some("NOT-POSTMASTER@example.com"))] - fn test_validate_email_case_insensitive(#[case] email: &str, #[case] expected: Option<&str>) { - let emval = EmailValidator::default(); - let result = emval.validate_email(email); - - match expected { - Some(expected_normalized) => { - assert!(result.is_ok()); - let validated_email = result.unwrap(); - assert_eq!(validated_email.normalized, expected_normalized); - } - None => { - assert!(result.is_err()); - } - } - } - - #[rstest] - #[case("domain.com")] - #[case("a.com")] - #[case("sub.domain.com")] // Subdomain - #[case("example.co.uk")] // Country code TLD - #[case("xn--d1acufc.xn--p1ai")] // Internationalized domain name (IDN) - #[case("123.com")] // Numeric domain - #[case("example.museum")] // Long TLD - #[case("example.travel")] // Another long TLD - #[case("e.com")] // Minimum length domain - #[case("a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z.com")] // Long subdomain - fn test_validate_domain_valid(#[case] domain: &str) { - let emval = EmailValidator::default(); - let result = emval._validate_domain(domain); - - assert!(result.is_ok()); - } - - #[rstest] - #[case("invali*d.com")] - #[case(&"a".repeat(64))] - #[case("a.com-")] - #[case("a-.com")] - #[case(&(String::from("a") + &".com".repeat(126)))] - #[case("example..com")] // Double dot - #[case("example-.com")] // Trailing hyphen - #[case("-example.com")] // Leading hyphen - #[case("example..com")] // Consecutive dots - #[case("example-.com")] // TLD with trailing hyphen - #[case(".example.com")] // Leading dot - #[case("example.com.")] // Trailing dot - #[case("example..com")] - #[case("example.com-")] // Trailing hyphen in second-level domain - #[case("example..com")] // Multiple consecutive dots in second-level domain - #[case("xn--d1acufc.xn--p1ai-")] // Internationalized domain name (IDN) with trailing hyphen - #[case("ex_ample.com")] // Underscore in domain - fn test_validate_domain_invalid(#[case] domain: &str) { - let emval = EmailValidator::default(); - let result = emval._validate_domain(domain); - - assert!(result.is_err()); - } - - #[rstest] - #[case("me@anything.arpa", false)] - #[case("me@link.local", false)] - #[case("me@valid.invalid", false)] - #[case("me@host.localhost", false)] - #[case("me@onion.onion.onion", false)] - #[case("me@test.test.test", false)] - fn test_special_use_domains(#[case] domain: &str, #[case] expected: bool) { - let emval = EmailValidator::default(); - let result = emval._validate_domain(domain); - - if expected { - assert!(result.is_ok()); - } else { - assert!(result.is_err()); - } - } - - #[rstest] - #[case("me@[127.0.0.1]", "[127.0.0.1]", ipv4([127, 0, 0, 1]))] - #[case("me@[192.168.0.1]", "[192.168.0.1]", ipv4([192, 168, 0, 1]))] - #[case("me@[IPv6:::1]", "[IPv6:::1]", ipv6("::1"))] - #[case( - "me@[IPv6:0000:0000:0000:0000:0000:0000:0000:0001]", - "[IPv6:::1]", - ipv6("::1") - )] - #[case("me@[IPv6:2001:db8::1]", "[IPv6:2001:db8::1]", ipv6("2001:db8::1"))] - #[case( - "me@[IPv6:2001:0db8:85a3:0000:0000:8a2e:0370:7334]", - "[IPv6:2001:db8:85a3::8a2e:370:7334]", - ipv6("2001:db8:85a3::8a2e:370:7334") - )] - #[case( - "me@[IPv6:2001:db8:1234:5678:9abc:def0:1234:5678]", - "[IPv6:2001:db8:1234:5678:9abc:def0:1234:5678]", - ipv6("2001:db8:1234:5678:9abc:def0:1234:5678") - )] - fn test_validate_domain_literal_valid( - #[case] email: &str, - #[case] expected_domain: &str, - #[case] expected_ip: Option, - ) { - let emval = EmailValidator { - allow_domain_literal: true, - ..EmailValidator::default() - }; - - let result = emval.validate_email(email); - assert!(result.is_ok()); - let validated_email = result.unwrap(); - assert_eq!(validated_email.domain_name, expected_domain); - assert_eq!(validated_email.domain_address, expected_ip); - } - - #[rstest] - #[case("me@[300.300.300.300]")] - #[case("me@[IPv6:2001:db8:::1:]")] - #[case("me@[IPv6:2001:db8::85a3::8a2e:370:7334]")] - #[case("me@[127.0.0.256]")] - #[case("me@[IPv6:2001:db8:1234:5678:9abc:def0:1234:56789]")] - fn test_validate_domain_literal_invalid(#[case] email: &str) { - let emval = EmailValidator { - allow_domain_literal: true, - ..EmailValidator::default() - }; - - let result = emval.validate_email(email); - assert!(result.is_err()); - } - - #[rstest] - #[case("example", Some("example"), false, true)] - #[case("user.name", Some("user.name"), false, true)] - #[case("user-name", Some("user-name"), false, true)] - #[case("user+name", Some("user+name"), false, true)] - #[case("user_name", Some("user_name"), false, true)] - #[case("user123", Some("user123"), false, true)] - #[case("1233457890", Some("1233457890"), false, true)] - #[case("user&example.com", Some("user&example.com"), false, true)] - fn test_validate_local_part_valid( - #[case] input: &str, - #[case] expected: Option<&str>, - #[case] allow_quoted_local: bool, - #[case] allow_smtputf8: bool, - ) { - let emval = EmailValidator { - allow_quoted_local, - allow_smtputf8, - ..EmailValidator::default() - }; - - let result = emval._validate_local_part(input); - - if let Some(expected_local) = expected { - assert!(result.is_ok()); - assert_eq!(result.unwrap(), expected_local); - } else { - assert!(result.is_err()); - } - } - - #[rstest] - #[case("", None, false, true)] - #[case(&"a".repeat(MAX_LOCAL_PART_LENGTH + 1), None, false, true)] - #[case(".user", None, false, true)] - #[case("user.", None, false, true)] - #[case("user..name", None, false, true)] - #[case("user name", None, false, true)] - #[case("user@name", None, false, true)] - #[case("user(name", None, false, true)] - #[case("user)name", None, false, true)] - fn test_validate_local_part_invalid( - #[case] input: &str, - #[case] expected: Option<&str>, - #[case] allow_quoted_local: bool, - #[case] allow_smtputf8: bool, - ) { - let emval = EmailValidator { - allow_quoted_local, - allow_smtputf8, - ..EmailValidator::default() - }; - - let result = emval._validate_local_part(input); - - if let Some(expected_local) = expected { - assert!(result.is_ok()); - assert_eq!(result.unwrap(), expected_local); - } else { - assert!(result.is_err()); - } - } - - #[rstest] - #[case("\"user@name\"", None, false, true)] - #[case("\"user\nname\"", None, true, false)] - #[case("\"user\rname\"", None, true, false)] - #[case("\"user.name\"", Some("user.name"), true, false)] - #[case("\"user+name\"", Some("user+name"), true, false)] - #[case("\"user_name\"", Some("user_name"), true, false)] - #[case( - "\"unnecessarily.quoted.local.part\"", - Some("unnecessarily.quoted.local.part"), - true, - true - )] - #[case( - "\"quoted.with..unicode.λ\"", - Some("\"quoted.with..unicode.λ\""), - true, - true - )] - #[case( - "\"unnecessarily.quoted.with.unicode.λ\"", - Some("unnecessarily.quoted.with.unicode.λ"), - true, - true - )] - #[case("\"quoted..local.part\"", Some("\"quoted..local.part\""), true, true)] - #[case("\"quoted.with.at@\"", Some("\"quoted.with.at@\""), true, true)] - #[case("\"quoted with space\"", Some("\"quoted with space\""), true, true)] - #[case( - "\"quoted.with.dquote\\\"\"", - Some("\"quoted.with.dquote\\\"\""), - true, - false - )] - #[case( - "\"quoted.with.extraneous.\\escape\"", - Some("quoted.with.extraneous.escape"), - true, - false - )] - fn test_validate_local_part_quoted( - #[case] input: &str, - #[case] expected: Option<&str>, - #[case] allow_quoted_local: bool, - #[case] allow_smtputf8: bool, - ) { - let emval = EmailValidator { - allow_quoted_local, - allow_smtputf8, - ..EmailValidator::default() - }; - - let result = emval._validate_local_part(input); - - if let Some(expected_local) = expected { - assert!(result.is_ok()); - assert_eq!(result.unwrap(), expected_local); - } else { - assert!(result.is_err()); - } - } - - #[rstest] - #[case("username", false)] - #[case("user-name", false)] - #[case("user.name", false)] - #[case("", false)] - #[case("\u{00E9}", false)] // Unicode character é - #[case("user\u{00E9}name", false)] // Unicode character é in the middle - #[case("user\u{00E9}", false)] // Unicode character é at the end - #[case("\u{03B1}\u{03B2}\u{03B3}", false)] // Greek characters - #[case("user\u{03B1}\u{03B2}\u{03B3}name", false)] // Greek characters in the middle - #[case("\u{4E00}\u{4E8C}\u{4E09}", false)] // Chinese characters - #[case("user\u{4E00}\u{4E8C}\u{4E09}name", false)] // Chinese characters in the middle - #[case("user_name", false)] - #[case("user+name", false)] - #[case("user=name", false)] - #[case("user&name", false)] - fn test_validate_chars_valid(#[case] input: &str, #[case] allow_space: bool) { - let result = _validate_chars(input, allow_space); - - assert!(result.is_ok()); - } - - #[rstest] - #[case("user\x01name", false)] - #[case("user\u{2028}name", false)] // Unicode line separator - #[case("user\u{2029}name", false)] // Unicode paragraph separator - #[case("user\u{E000}name", false)] // Unicode private use character - #[case("\u{0301}username", false)] // Combining character - #[case("user\u{007F}name", false)] // Unicode delete character - #[case("user\nname", false)] - #[case("user\tname", false)] - #[case("\u{FEFF}", false)] // Unicode byte order mark - #[case("user\u{FEFF}name", false)] // Unicode byte order mark in the middle - fn test_validate_chars_invalid(#[case] input: &str, #[case] allow_space: bool) { - let result = _validate_chars(input, allow_space); - - assert!(result.is_err()); - } - - #[rstest] - #[case("user name", true, true)] - #[case("user name", true, true)] - #[case("user name", false, false)] - #[case("user name", false, false)] - fn test_validate_chars_with_and_without_spaces( - #[case] input: &str, - #[case] allow_space: bool, - #[case] expected: bool, - ) { - let result = _validate_chars(input, allow_space); - - if expected { - assert!(result.is_ok()); - } else { - assert!(result.is_err()); - } - } - - #[rstest] - #[case("example@domain.com")] - #[case("user.name+tag+sorting@example.com")] - #[case("x@example.com")] - #[case("example-indeed@strange-example.com")] - #[case("a@b.c")] // Minimum length valid email - #[case("valid_email@sub.domain.com")] // Subdomain - #[case("valid-email@domain.co.jp")] // Country code TLD - #[case("invalid-email@domain..com")] // Double dot in domain - #[case("@missing-local.org")] // Can be missing Local - #[case("missing-domain@")] // Can be missing Domain - fn test_split_email_valid(#[case] input: &str) { - let result = _split_email(input); - - assert!(result.is_ok()); - } - - #[rstest] - #[case("plainaddress")] - #[case("missing-at-sign.com")] - #[case("")] - fn test_split_email_invalid(#[case] input: &str) { - let result = _split_email(input); - - assert!(result.is_err()); - } -} diff --git a/src/models.rs b/src/models.rs new file mode 100644 index 0000000..87434c1 --- /dev/null +++ b/src/models.rs @@ -0,0 +1,28 @@ +use pyo3::prelude::*; +use std::net::IpAddr; + +#[pyclass] +pub struct ValidatedEmail { + #[pyo3(get)] + pub original: String, + #[pyo3(get)] + pub normalized: String, + #[pyo3(get)] + pub local_part: String, + #[pyo3(get)] + pub domain_address: Option, + #[pyo3(get)] + pub domain_name: String, + #[pyo3(get)] + pub is_deliverable: bool, +} + +#[derive(Default)] +#[pyclass] +pub struct EmailValidator { + pub allow_smtputf8: bool, + pub allow_empty_local: bool, + pub allow_quoted_local: bool, + pub allow_domain_literal: bool, + pub deliverable_address: bool, +} diff --git a/src/validators/domain.rs b/src/validators/domain.rs new file mode 100644 index 0000000..6714fea --- /dev/null +++ b/src/validators/domain.rs @@ -0,0 +1,381 @@ +use crate::errors::ValidationError; +use crate::models::EmailValidator; +use idna::uts46::Uts46; +use idna::uts46::{AsciiDenyList, DnsLength, Hyphens}; +use std::net::IpAddr; +use std::str::FromStr; +use trust_dns_resolver::config::*; +use trust_dns_resolver::Resolver; + +pub fn validate_domain( + validator: &EmailValidator, + domain: &str, +) -> Result<(String, Option), ValidationError> { + // Guard clause if domain is being executed independently + if domain.is_empty() { + return Err(ValidationError::SyntaxError( + "Invalid Domain: The part after the '@' sign cannot be empty.".to_string(), + )); + } + + // Address Literals + if domain.starts_with('[') && domain.ends_with(']') { + if !validator.allow_domain_literal { + return Err(ValidationError::ValueError( + "Invalid Domain: A bracketed IP address after the '@' sign is not permitted." + .to_string(), + )); + } + + let domain_literal = &domain[1..domain.len() - 1]; + + // Handle IPv6 addresses + if domain_literal.starts_with("IPv6:") { + let ipv6_literal = &domain_literal[5..]; + let addr = IpAddr::from_str(ipv6_literal).map_err(|_| { + ValidationError::SyntaxError( + "Invalid Domain: The IPv6 address in brackets following the '@' symbol is not valid." + .to_string(), + ) + })?; + if let IpAddr::V6(addr) = addr { + return Ok((format!("[IPv6:{}]", addr), Some(IpAddr::V6(addr)))); + } + } + + // Try to parse the domain literal as an IP address (either IPv4 or IPv6) + let addr = IpAddr::from_str(domain_literal).map_err(|_| { + ValidationError::SyntaxError( + "Invalid Domain: The address in brackets following the '@' sign is not a valid IP address." + .to_string(), + ) + })?; + + let name = match addr { + IpAddr::V4(_) => format!("[{}]", addr), + IpAddr::V6(_) => format!("[IPv6:{}]", addr), + }; + + return Ok((name, Some(addr))); + } + + // Check for invalid characters in the domain part + if !crate::consts::ATEXT_HOSTNAME_INTL.is_match(domain.as_bytes()) { + return Err(ValidationError::SyntaxError( + "Invalid Domain: Contains invalid characters after '@' sign.".to_string(), + )); + } + + // Check for unsafe characters + crate::validators::validate_chars(domain, false)?; + + // Normalize the domain using UTS-46 + let normalized_domain = Uts46::new() + .to_ascii( + domain.as_bytes(), + AsciiDenyList::URL, + Hyphens::Allow, + DnsLength::Verify, + ) + .map_err(|_| { + ValidationError::SyntaxError( + "Invalid Domain: Contains invalid characters after '@' sign post Unicode normalization." + .to_string(), + ) + })?; + + // Check for invalid chars after normalization + if !crate::consts::ATEXT_HOSTNAME_INTL.is_match(normalized_domain.as_bytes()) { + return Err(ValidationError::SyntaxError( + "Invalid Domain: Contains invalid characters after Unicode normalization.".to_string(), + )); + } + + // Validates the domain part of an email address based on RFC 952, RFC 1123, and RFC 5322. + // Each label must have at least one character and cannot start or end with dashes or periods. + // Consecutive periods and adjacent period-hyphen combinations are also invalid. + crate::validators::validate_email_label( + &normalized_domain, + "Invalid Domain: A {} cannot immediately follow the '@' symbol.", + "Invalid Domain: A {} cannot appear at the end of the domain.", + true, + )?; + + // Check the total length of the domain + if normalized_domain.len() > crate::consts::MAX_DOMAIN_LENGTH { + return Err(ValidationError::ValueError( + "Invalid Domain: Exceeds the maximum length (253 chars).".to_string(), + )); + } + + // Check for invalid domain labels + for label in normalized_domain.split('.') { + if label.len() > crate::consts::MAX_DNS_LABEL_LENGTH { + return Err(ValidationError::ValueError( + "Invalid Label: Exceeds the maximum length (63 chars).".to_string(), + )); + } + + if label.is_empty() { + return Err(ValidationError::SyntaxError( + "Invalid Label: The Label cannot be empty.".to_string(), + )); + } + + // Check for two letters followed by two dashes + if crate::consts::DNS_LABEL_REGEX.is_match(label.as_bytes()) + && !label.to_lowercase().starts_with("xn--") + { + return Err(ValidationError::SyntaxError( + "Invalid Domain: Two letters followed by two dashes ('--') are not allowed immediately after the '@' sign or a period.".to_string(), + )); + } + } + + if validator.deliverable_address { + // Deliverable addresses must contain atleast one period. + if !normalized_domain.contains(".") { + return Err(ValidationError::SyntaxError( + "Invalid Domain: Must contain a period ('.') to be considered valid.".to_string(), + )); + } + + // TLDs must end with a letter. + if !crate::consts::DOMAIN_NAME_REGEX.is_match(normalized_domain.as_bytes()) { + return Err(ValidationError::SyntaxError( + "Invalid domain: The part after the '@' sign does not belong to a valid top-level domain (TLD).".to_string(), + )); + } + } + + // Check for reserved and "special use" domains + for &special_domain in crate::consts::SPECIAL_USE_DOMAIN_NAMES { + if normalized_domain == special_domain + || normalized_domain.ends_with(&format!(".{}", special_domain)) + { + return Err(ValidationError::SyntaxError( + "Invalid Domain: The part after the '@' sign is a reserved or special-use domain that cannot be used.".to_string(), + )); + } + } + Ok((normalized_domain.to_string(), None)) +} + +pub fn validate_deliverability(domain: &str) -> Result<(), ValidationError> { + let resolver = Resolver::new(ResolverConfig::default(), ResolverOpts::default()) + .map_err(|e| ValidationError::SyntaxError(e.to_string()))?; + + // Check MX records + if let Ok(mx_records) = resolver.mx_lookup(domain) { + for mx in mx_records.iter() { + let exchange = mx.exchange().to_string(); + if exchange == "." { + return Err(ValidationError::SyntaxError( + "Invalid Domain: The domain does not accept email due to a null MX record, indicating it is not configured to receive emails.".to_string(), + )); + } + } + if mx_records + .iter() + .any(|mx| !mx.exchange().to_string().is_empty()) + { + return Ok(()); + } + } + + // Fallback to A/AAAA records + if let Ok(a_records) = resolver.ipv4_lookup(domain) { + if a_records.iter().any(|ip| ip.is_global()) { + return Ok(()); + } + } + if let Ok(aaaa_records) = resolver.ipv6_lookup(domain) { + if aaaa_records.iter().any(|ip| ip.is_global()) { + return Ok(()); + } + } + + // Check SPF records (TXT) + if let Ok(txt_records) = resolver.txt_lookup(domain) { + for record in txt_records.iter() { + let txt = record.to_string(); + if txt.starts_with("v=spf1 ") && txt.contains("-all") { + return Err(ValidationError::SyntaxError( + "Invalid Domain: The domain does not send email due to an SPF record that rejects all emails.".to_string(), + )); + } + } + } + + Err(ValidationError::SyntaxError( + "Invalid Domain: No MX, A, or AAAA records found for domain.".to_string(), + )) +} + +#[cfg(test)] +mod tests { + use super::*; + use rstest::rstest; + + #[rstest] + #[case("domain.com")] + #[case("a.com")] + #[case("sub.domain.com")] // Subdomain + #[case("example.co.uk")] // Country code TLD + #[case("xn--d1acufc.xn--p1ai")] // Internationalized domain name (IDN) + #[case("123.com")] // Numeric domain + #[case("example.museum")] // Long TLD + #[case("example.travel")] // Another long TLD + #[case("e.com")] // Minimum length domain + #[case("a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z.com")] // Long subdomain + fn test_validate_domain_valid(#[case] domain: &str) { + let emval = EmailValidator::default(); + let result = validate_domain(&emval, domain); + + assert!(result.is_ok()); + } + + #[rstest] + #[case("invali*d.com")] + #[case(&"a".repeat(64))] + #[case("a.com-")] + #[case("a-.com")] + #[case(&(String::from("a") + &".com".repeat(126)))] + #[case("example..com")] // Double dot + #[case("example-.com")] // Trailing hyphen + #[case("example-.com")] // Leading hyphen + #[case("example..com")] // Consecutive dots + #[case("example-.com")] // TLD with trailing hyphen + #[case(".example.com")] // Leading dot + #[case("example.com.")] // Trailing dot + #[case("example..com")] + #[case("example.com-")] // Trailing hyphen in second-level domain + #[case("example..com")] // Multiple consecutive dots in second-level domain + #[case("xn--d1acufc.xn--p1ai-")] // Internationalized domain name (IDN) with trailing hyphen + #[case("ex_ample.com")] // Underscore in domain + fn test_validate_domain_invalid(#[case] domain: &str) { + let emval = EmailValidator::default(); + let result = validate_domain(&emval, domain); + + assert!(result.is_err()); + } + + #[rstest] + #[case("me@anything.arpa", false)] + #[case("me@link.local", false)] + #[case("me@valid.invalid", false)] + #[case("me@host.localhost", false)] + #[case("me@onion.onion.onion", false)] + #[case("me@test.test.test", false)] + fn test_special_use_domains(#[case] domain: &str, #[case] expected: bool) { + let emval = EmailValidator::default(); + let result = validate_domain(&emval, domain); + + if expected { + assert!(result.is_ok()); + } else { + assert!(result.is_err()); + } + } + + #[rstest] + #[case("null.example.com")] + #[case("nonexistentdomain.example")] + #[case("-invaliddomain.com")] + #[case("invalid_domain.com")] + #[case("例え.テスト")] + #[case("example..com")] + fn test_validate_deliverability_invalid(#[case] domain: &str) { + assert!(validate_deliverability(domain).is_err()); + } + + #[rstest] + #[case("google.com")] + #[case("gmail.com")] + #[case("yahoo.com")] + #[case("hotmail.com")] + #[case("outlook.com")] + #[case("aol.com")] + fn test_validate_deliverability_valid(#[case] domain: &str) { + assert!(validate_deliverability(domain).is_ok()); + } + + #[rstest] + #[case("blackhole.isi.edu")] // Known to have a null MX record + fn test_validate_deliverability_null_mx(#[case] domain: &str) { + let result = validate_deliverability(domain); + assert!(result.is_err()); + } + + #[rstest] + #[case("www.cloudflare.com")] + #[case("osu.edu")] // OSU's domain + fn test_validate_deliverability_valid_a_no_mx(#[case] domain: &str) { + assert!(validate_deliverability(domain).is_ok()); + } + + #[rstest] + #[case("nonexistentdomain.example")] + #[case("invalid-domain-test-12345.com")] + fn test_validate_deliverability_no_records(#[case] domain: &str) { + let result = validate_deliverability(domain); + assert!(result.is_err()); + } + + #[rstest] + #[case("thisdomaindoesnotexist.tld")] + fn test_validate_deliverability_nxdomain(#[case] domain: &str) { + let result = validate_deliverability(domain); + assert!(result.is_err()); + } + #[rstest] + #[case("example.com")] + #[case("example.org")] + fn test_validate_deliverability_spf_reject_all(#[case] domain: &str) { + let result = validate_deliverability(domain); + assert!(result.is_err()); + } + + #[rstest] + #[case("localhost")] // Resolves to 127.0.0.1 + #[case("example.internal")] // Assuming it resolves to a private IP + fn test_validate_deliverability_private_ip(#[case] domain: &str) { + let result = validate_deliverability(domain); + assert!(result.is_err()); + } + + #[rstest] + #[case("-invaliddomain.com")] + #[case("invalid_domain.com")] + #[case("example..com")] + fn test_validate_deliverability_invalid_syntax(#[case] domain: &str) { + let result = validate_deliverability(domain); + assert!(result.is_err()); + } + + #[rstest] + #[case("例え.テスト")] // Japanese IDN for "example.test" + #[case("مثال.إختبار")] // Arabic IDN for "example.test" + fn test_validate_deliverability_idn(#[case] domain: &str) { + let result = validate_deliverability(domain); + // Depending on the domain, it may pass or fail + // We're checking that the function handles IDNs without panicking + assert!(result.is_err() || result.is_ok()); + } + + #[rstest] + #[case("no-ns.example.com")] // Assuming this domain has no nameservers + fn test_validate_deliverability_no_nameservers(#[case] domain: &str) { + let result = validate_deliverability(domain); + // Depending on implementation, might return an error or a specific message + assert!(result.is_err()); + } + + #[rstest] + #[case("timeout.example.com")] + fn test_validate_deliverability_timeout(#[case] domain: &str) { + let result = validate_deliverability(domain); + // Should handle timeout gracefully + assert!(result.is_err()); + } +} diff --git a/src/validators/email.rs b/src/validators/email.rs new file mode 100644 index 0000000..3cd6058 --- /dev/null +++ b/src/validators/email.rs @@ -0,0 +1,191 @@ +use crate::models::EmailValidator; +use crate::models::ValidatedEmail; + +use pyo3::prelude::*; + +#[pymethods] +impl EmailValidator { + #[new] + #[pyo3(signature = ( + allow_smtputf8 = true, + allow_empty_local = false, + allow_quoted_local = false, + allow_domain_literal = false, + deliverable_address = true, + + ))] + fn new( + allow_smtputf8: bool, + allow_empty_local: bool, + allow_quoted_local: bool, + allow_domain_literal: bool, + deliverable_address: bool, + ) -> Self { + EmailValidator { + allow_smtputf8, + allow_empty_local, + allow_quoted_local, + allow_domain_literal, + deliverable_address, + } + } + + fn validate_email(&self, email: &str) -> PyResult { + let (unvalidated_local_part, unvalidated_domain) = crate::validators::split_email(&email)?; + + crate::validators::validate_email_length(&unvalidated_local_part, &unvalidated_domain)?; + + let mut valid_local_part = + crate::validators::validate_local_part(self, &unvalidated_local_part)?; + + if crate::consts::CASE_INSENSITIVE_MAILBOX_NAMES + .contains(&valid_local_part.to_lowercase().as_str()) + { + valid_local_part = valid_local_part.to_lowercase(); + } + + let (domain_name, domain_address) = + crate::validators::validate_domain(self, &unvalidated_domain)?; + + if self.deliverable_address { + crate::validators::validate_deliverability(&domain_name)?; + } + + let normalized = format!("{}@{}", valid_local_part, domain_name); + + Ok(ValidatedEmail { + original: email.to_string(), + local_part: valid_local_part, + domain_name, + domain_address, + normalized, + is_deliverable: true, + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use rstest::rstest; + use std::net::IpAddr; + use std::str::FromStr; + + // Helper functions + fn ipv4(octets: [u8; 4]) -> Option { + Some(IpAddr::V4(std::net::Ipv4Addr::new( + octets[0], octets[1], octets[2], octets[3], + ))) + } + + fn ipv6(addr: &str) -> Option { + Some(IpAddr::V6(std::net::Ipv6Addr::from_str(addr).unwrap())) + } + + #[rstest] + #[case("example@domain.com", Some("example@domain.com"))] + #[case( + "user.name+tag+sorting@example.com", + Some("user.name+tag+sorting@example.com") + )] + #[case("x@example.com", Some("x@example.com"))] + #[case( + "example-indeed@strange-example.com", + Some("example-indeed@strange-example.com") + )] + fn test_validate_email_valid(#[case] email: &str, #[case] expected: Option<&str>) { + let emval = EmailValidator::default(); + let result = emval.validate_email(email); + + match expected { + Some(expected_normalized) => { + assert!(result.is_ok()); + let validated_email = result.unwrap(); + assert_eq!(validated_email.normalized, expected_normalized); + } + None => { + assert!(result.is_err()); + } + } + } + + #[rstest] + #[case("plainaddress", None)] + #[case("@missing-local.org", None)] + #[case("missing-domain@.com", None)] + #[case("missing-at-sign.com", None)] + #[case("missing-tld@domain.", None)] + #[case("invalid-char@domain.c*m", None)] + #[case("too..many..dots@domain.com", None)] + fn test_validate_email_invalid(#[case] email: &str, #[case] expected: Option<&str>) { + let emval = EmailValidator::default(); + let result = emval.validate_email(email); + + match expected { + Some(expected_normalized) => { + assert!(result.is_ok()); + let validated_email = result.unwrap(); + assert_eq!(validated_email.normalized, expected_normalized); + } + None => { + assert!(result.is_err()); + } + } + } + + #[rstest] + #[case("POSTMASTER@example.com", Some("postmaster@example.com"))] + #[case("NOT-POSTMASTER@example.com", Some("NOT-POSTMASTER@example.com"))] + fn test_validate_email_case_insensitive(#[case] email: &str, #[case] expected: Option<&str>) { + let emval = EmailValidator::default(); + let result = emval.validate_email(email); + + match expected { + Some(expected_normalized) => { + assert!(result.is_ok()); + let validated_email = result.unwrap(); + assert_eq!(validated_email.normalized, expected_normalized); + } + None => { + assert!(result.is_err()); + } + } + } + + #[rstest] + #[case("me@[127.0.0.1]", "[127.0.0.1]", ipv4([127, 0, 0, 1]))] + #[case("me@[192.168.0.1]", "[192.168.0.1]", ipv4([192, 168, 0, 1]))] + #[case("me@[IPv6:::1]", "[IPv6:::1]", ipv6("::1"))] + #[case( + "me@[IPv6:0000:0000:0000:0000:0000:0000:0000:0001]", + "[IPv6:::1]", + ipv6("::1") + )] + #[case("me@[IPv6:2001:db8::1]", "[IPv6:2001:db8::1]", ipv6("2001:db8::1"))] + #[case( + "me@[IPv6:2001:0db8:85a3:0000:0000:8a2e:0370:7334]", + "[IPv6:2001:db8:85a3::8a2e:370:7334]", + ipv6("2001:db8:85a3::8a2e:370:7334") + )] + #[case( + "me@[IPv6:2001:db8:1234:5678:9abc:def0:1234:5678]", + "[IPv6:2001:db8:1234:5678:9abc:def0:1234:5678]", + ipv6("2001:db8:1234:5678:9abc:def0:1234:5678") + )] + fn test_validate_domain_literal_valid( + #[case] email: &str, + #[case] expected_domain: &str, + #[case] expected_ip: Option, + ) { + let emval = EmailValidator { + allow_domain_literal: true, + ..EmailValidator::default() + }; + + let result = emval.validate_email(email); + assert!(result.is_ok()); + let validated_email = result.unwrap(); + assert_eq!(validated_email.domain_name, expected_domain); + assert_eq!(validated_email.domain_address, expected_ip); + } +} diff --git a/src/validators/local_part.rs b/src/validators/local_part.rs new file mode 100644 index 0000000..d05caf9 --- /dev/null +++ b/src/validators/local_part.rs @@ -0,0 +1,281 @@ +use crate::errors::ValidationError; +use crate::models::EmailValidator; +use std::collections::HashSet; + +pub fn validate_local_part( + validator: &EmailValidator, + local: &str, +) -> Result { + if local.is_empty() { + return if validator.allow_empty_local { + Ok(local.to_string()) + } else { + Err(ValidationError::SyntaxError( + "Invalid Local Part: The part before the '@' sign cannot be empty.".to_string(), + )) + }; + } + + // Remove surrounding quotes, unescaping any escaped characters within quotes + let unquoted_local = unquote_local_part(local, validator.allow_quoted_local)?; + + // Local part length validation + if unquoted_local.len() > crate::consts::MAX_LOCAL_PART_LENGTH { + return Err(ValidationError::ValueError( + "Invalid Local Part: The part before the '@' sign exceeds the maximum length (64 chars).".to_string(), + )); + } + + // Check for valid dot-atom text + if crate::consts::DOT_ATOM_TEXT.is_match(unquoted_local.as_bytes()) { + return Ok(unquoted_local); + } + + // Check for valid internationalized dot-atom text + if crate::consts::DOT_ATOM_TEXT_INTL.is_match(unquoted_local.as_bytes()) { + if !validator.allow_smtputf8 { + return Err(ValidationError::SyntaxError( + "Invalid Local Part: Internationalized characters before the '@' sign are not supported.".to_string(), + )); + } + crate::validators::validate_chars(&unquoted_local, false)?; + + // Check for valid UTF-8 encoding + if String::from_utf8(unquoted_local.as_bytes().to_vec()).is_err() { + return Err(ValidationError::SyntaxError( + "Invalid Local Part: Contains non-UTF-8 characters.".to_string(), + )); + } + + return Ok(unquoted_local.to_string()); + } + + // Check for quoted local part and validate + if local.starts_with('"') && local.ends_with('"') { + let invalid_chars: HashSet<_> = local + .chars() + .filter(|&c| !crate::consts::QTEXT_INTL.is_match(c.to_string().as_bytes())) + .collect(); + + if !invalid_chars.is_empty() { + return Err(ValidationError::SyntaxError( + "Invalid Local Part: contains invalid characters within quoted local part before the '@' sign.".to_string(), + )); + } + + let invalid_non_ascii_chars: HashSet<_> = local + .chars() + .filter(|&c| !(32..=126).contains(&(c as u32))) + .collect(); + + if !invalid_non_ascii_chars.is_empty() && !validator.allow_smtputf8 { + return Err(ValidationError::SyntaxError( + "Invalid Local Part: Internationalized characters before the '@' sign are not supported.".to_string(), + )); + } + + crate::validators::validate_chars(&unquoted_local, true)?; + + // Check for valid UTF-8 encoding + if String::from_utf8(local.as_bytes().to_vec()).is_err() { + return Err(ValidationError::SyntaxError( + "Invalid Local Part: Contains non-UTF-8 characters.".to_string(), + )); + } + + return Ok(local.to_string()); + } + + // Check for other invalid characters + let invalid_chars: HashSet<_> = unquoted_local + .chars() + .filter(|&c| !crate::consts::ATEXT_INTL_DOT_RE.is_match(c.to_string().as_bytes())) + .collect(); + + if !invalid_chars.is_empty() { + return Err(ValidationError::SyntaxError( + "Invalid Local Part: contains invalid characters before the '@' sign.".to_string(), + )); + } + + // Validates the local part of an email address based on RFC 952, RFC 1123, and RFC 5322. + // Each label must have at least one character and cannot start or end with dashes or periods. + // Consecutive periods and adjacent period-hyphen combinations are also invalid. + crate::validators::validate_email_label( + local, + "Invalid Local Part: Cannot start with a {}.", + "Invalid Local Part: A {} cannot immediately precede the '@' sign.", + true, + )?; + + Err(ValidationError::SyntaxError( + "Invalid Local Part: contains invalid characters before the '@' sign.".to_string(), + )) +} + +fn unquote_local_part(local: &str, allow_quoted: bool) -> Result { + if local.starts_with('"') && local.ends_with('"') { + // Check that the quoted local part is allowed, otherwise raise exception + if !allow_quoted { + return Err(ValidationError::SyntaxError( + "Invalid Local Part: Quoting the local part before the '@' sign is not permitted in this context.".to_string(), + )); + } + + let mut unquoted = String::new(); + let mut chars = local[1..local.len() - 1].chars(); + let mut escaped = false; + + while let Some(c) = chars.next() { + if escaped { + unquoted.push(c); + escaped = false; + } else if c == '\\' { + escaped = true; + } else { + unquoted.push(c); + } + } + + if escaped { + return Err(ValidationError::SyntaxError( + "Invalid Local Part: Trailing escape character in the quoted local part before the '@' sign.".to_string(), + )); + } + + Ok(unquoted) + } else { + Ok(local.to_string()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use rstest::rstest; + + #[rstest] + #[case("example", Some("example"), false, true)] + #[case("user.name", Some("user.name"), false, true)] + #[case("user-name", Some("user-name"), false, true)] + #[case("user+name", Some("user+name"), false, true)] + #[case("user_name", Some("user_name"), false, true)] + #[case("user123", Some("user123"), false, true)] + #[case("1233457890", Some("1233457890"), false, true)] + #[case("user&example.com", Some("user&example.com"), false, true)] + fn test_validate_local_part_valid( + #[case] input: &str, + #[case] expected: Option<&str>, + #[case] allow_quoted_local: bool, + #[case] allow_smtputf8: bool, + ) { + let emval = EmailValidator { + allow_quoted_local, + allow_smtputf8, + ..EmailValidator::default() + }; + + let result = validate_local_part(&emval, input); + + if let Some(expected_local) = expected { + assert!(result.is_ok()); + assert_eq!(result.unwrap(), expected_local); + } else { + assert!(result.is_err()); + } + } + + #[rstest] + #[case("", None, false, true)] + #[case(&"a".repeat(crate::consts::MAX_LOCAL_PART_LENGTH + 1), None, false, true)] + #[case(".user", None, false, true)] + #[case("user.", None, false, true)] + #[case("user..name", None, false, true)] + #[case("user name", None, false, true)] + #[case("user@name", None, false, true)] + #[case("user(name", None, false, true)] + #[case("user)name", None, false, true)] + fn test_validate_local_part_invalid( + #[case] input: &str, + #[case] expected: Option<&str>, + #[case] allow_quoted_local: bool, + #[case] allow_smtputf8: bool, + ) { + let emval = EmailValidator { + allow_quoted_local, + allow_smtputf8, + ..EmailValidator::default() + }; + + let result = validate_local_part(&emval, input); + + if let Some(expected_local) = expected { + assert!(result.is_ok()); + assert_eq!(result.unwrap(), expected_local); + } else { + assert!(result.is_err()); + } + } + + #[rstest] + #[case("\"user@name\"", None, false, true)] + #[case("\"user\nname\"", None, true, false)] + #[case("\"user\rname\"", None, true, false)] + #[case("\"user.name\"", Some("user.name"), true, false)] + #[case("\"user+name\"", Some("user+name"), true, false)] + #[case("\"user_name\"", Some("user_name"), true, false)] + #[case( + "\"unnecessarily.quoted.local.part\"", + Some("unnecessarily.quoted.local.part"), + true, + true + )] + #[case( + "\"quoted.with..unicode.λ\"", + Some("\"quoted.with..unicode.λ\""), + true, + true + )] + #[case( + "\"unnecessarily.quoted.with.unicode.λ\"", + Some("unnecessarily.quoted.with.unicode.λ"), + true, + true + )] + #[case("\"quoted..local.part\"", Some("\"quoted..local.part\""), true, true)] + #[case("\"quoted.with.at@\"", Some("\"quoted.with.at@\""), true, true)] + #[case("\"quoted with space\"", Some("\"quoted with space\""), true, true)] + #[case( + "\"quoted.with.dquote\\\"\"", + Some("\"quoted.with.dquote\\\"\""), + true, + false + )] + #[case( + "\"quoted.with.extraneous.\\escape\"", + Some("quoted.with.extraneous.escape"), + true, + false + )] + fn test_validate_local_part_quoted( + #[case] input: &str, + #[case] expected: Option<&str>, + #[case] allow_quoted_local: bool, + #[case] allow_smtputf8: bool, + ) { + let emval = EmailValidator { + allow_quoted_local, + allow_smtputf8, + ..EmailValidator::default() + }; + + let result = validate_local_part(&emval, input); + + if let Some(expected_local) = expected { + assert!(result.is_ok()); + assert_eq!(result.unwrap(), expected_local); + } else { + assert!(result.is_err()); + } + } +} diff --git a/src/validators/mod.rs b/src/validators/mod.rs new file mode 100644 index 0000000..8f3606e --- /dev/null +++ b/src/validators/mod.rs @@ -0,0 +1,8 @@ +pub mod domain; +pub mod email; +pub mod local_part; +pub mod utils; + +pub use domain::{validate_deliverability, validate_domain}; +pub use local_part::validate_local_part; +pub use utils::*; diff --git a/src/validators/utils.rs b/src/validators/utils.rs new file mode 100644 index 0000000..994b125 --- /dev/null +++ b/src/validators/utils.rs @@ -0,0 +1,223 @@ +use crate::errors::ValidationError; +use std::collections::HashSet; +use unicode_properties::{GeneralCategoryGroup, UnicodeGeneralCategory}; + +pub fn validate_email_label( + label: &str, + beg_descr: &str, + end_descr: &str, + is_hostname: bool, +) -> Result<(), ValidationError> { + let errors = [ + (label.ends_with('.'), end_descr.replace("{}", "period")), + (label.starts_with('.'), beg_descr.replace("{}", "period")), + ( + label.contains(".."), + "Invalid Email Address: Two periods ('.') cannot be adjacent in the email address.".to_string(), + ), + ( + is_hostname && label.ends_with('-'), + end_descr.replace("{}", "hyphen ('-')"), + ), + ( + is_hostname && label.starts_with('-'), + beg_descr.replace("{}", "hyphen ('-')"), + ), + ( + is_hostname && (label.contains("-.") || label.contains(".-")), + "Invalid Email Address: A period ('.') and a hyphen ('-') cannot be adjacent in the email address.".to_string(), + ), + ]; + + for (condition, error) in errors.iter() { + if *condition { + return Err(ValidationError::SyntaxError(error.clone())); + } + } + + Ok(()) +} + +pub fn validate_email_length(local_part: &str, domain: &str) -> Result<(), ValidationError> { + if local_part.len() + domain.len() + 1 > crate::consts::MAX_ADDRESS_LENGTH { + return Err(ValidationError::ValueError( + "Invalid Email Address: The email exceeds the maximum length (254 chars).".to_string(), + )); + } + Ok(()) +} + +pub fn split_email(email: &str) -> Result<(String, String), ValidationError> { + let at_pos = email.rfind('@').ok_or_else(|| { + ValidationError::SyntaxError("Invalid Email Address: Missing an '@' sign.".to_string()) + })?; + + let local_part = &email[..at_pos]; + let domain_part = &email[at_pos + 1..]; + + Ok((local_part.to_string(), domain_part.to_string())) +} + +pub fn validate_chars(chars: &str, allow_space: bool) -> Result<(), ValidationError> { + let mut bad_chars = HashSet::new(); + + for (i, c) in chars.chars().enumerate() { + let group = c.general_category_group(); + match group { + GeneralCategoryGroup::Letter + | GeneralCategoryGroup::Number + | GeneralCategoryGroup::Punctuation + | GeneralCategoryGroup::Symbol => { + continue; + } + GeneralCategoryGroup::Separator => { + // Spaces outside of the ASCII range. + if !allow_space { + bad_chars.insert(c); + } + } + GeneralCategoryGroup::Mark => { + // Combining characters in first position or after the @-sign. + if i == 0 { + bad_chars.insert(c); + } + } + GeneralCategoryGroup::Other => { + bad_chars.insert(c); + } + } + } + + if !bad_chars.is_empty() { + let mut sorted_bad_chars: Vec = bad_chars.iter().cloned().collect(); + sorted_bad_chars.sort_unstable(); + + let bad_chars_str = sorted_bad_chars + .iter() + .map(|c| display_char(*c)) + .collect::>() + .join(", "); + + return Err(ValidationError::SyntaxError(format!( + "Invalid Email Address: contains invalid characters: {}.", + bad_chars_str + ))); + } + + Ok(()) +} + +fn display_char(c: char) -> String { + // Return safely displayable characters in quotes. + if c == '\\' { + return format!("\"{}\"", c); + } + if c.is_alphanumeric() || c.is_ascii_punctuation() || c.is_ascii_whitespace() { + return format!("{:?}", c); + } + + // Construct a hex string in case the unicode name doesn't exist. + let hex = if c as u32 <= 0xFFFF { + format!("U+{:04X}", c as u32) + } else { + format!("U+{:08X}", c as u32) + }; + + // Return the character name or, if it has no name, the hex string. + if let Some(name) = unicode_names2::name(c) { + name.to_string() + } else { + hex + } +} + +#[cfg(test)] +mod tests { + use super::*; + use rstest::rstest; + + #[rstest] + #[case("username", false)] + #[case("user-name", false)] + #[case("user.name", false)] + #[case("", false)] + #[case("\u{00E9}", false)] // Unicode character é + #[case("user\u{00E9}name", false)] // Unicode character é in the middle + #[case("user\u{00E9}", false)] // Unicode character é at the end + #[case("\u{03B1}\u{03B2}\u{03B3}", false)] // Greek characters + #[case("user\u{03B1}\u{03B2}\u{03B3}name", false)] // Greek characters in the middle + #[case("\u{4E00}\u{4E8C}\u{4E09}", false)] // Chinese characters + #[case("user\u{4E00}\u{4E8C}\u{4E09}name", false)] // Chinese characters in the middle + #[case("user_name", false)] + #[case("user+name", false)] + #[case("user=name", false)] + #[case("user&name", false)] + fn test_validate_chars_valid(#[case] input: &str, #[case] allow_space: bool) { + let result = validate_chars(input, allow_space); + + assert!(result.is_ok()); + } + + #[rstest] + #[case("user\x01name", false)] + #[case("user\u{2028}name", false)] // Unicode line separator + #[case("user\u{2029}name", false)] // Unicode paragraph separator + #[case("user\u{E000}name", false)] // Unicode private use character + #[case("\u{0301}username", false)] // Combining character + #[case("user\u{007F}name", false)] // Unicode delete character + #[case("user\nname", false)] + #[case("user\tname", false)] + #[case("\u{FEFF}", false)] // Unicode byte order mark + #[case("user\u{FEFF}name", false)] // Unicode byte order mark in the middle + fn test_validate_chars_invalid(#[case] input: &str, #[case] allow_space: bool) { + let result = validate_chars(input, allow_space); + + assert!(result.is_err()); + } + + #[rstest] + #[case("user name", true, true)] + #[case("user name", true, true)] + #[case("user name", false, false)] + #[case("user name", false, false)] + fn test_validate_chars_with_and_without_spaces( + #[case] input: &str, + #[case] allow_space: bool, + #[case] expected: bool, + ) { + let result = validate_chars(input, allow_space); + + if expected { + assert!(result.is_ok()); + } else { + assert!(result.is_err()); + } + } + + #[rstest] + #[case("example@domain.com")] + #[case("user.name+tag+sorting@example.com")] + #[case("x@example.com")] + #[case("example-indeed@strange-example.com")] + #[case("a@b.c")] // Minimum length valid email + #[case("valid_email@sub.domain.com")] // Subdomain + #[case("valid-email@domain.co.jp")] // Country code TLD + #[case("invalid-email@domain..com")] // Double dot in domain + #[case("@missing-local.org")] // Can be missing Local + #[case("missing-domain@")] // Can be missing Domain + fn test_split_email_valid(#[case] input: &str) { + let result = split_email(input); + + assert!(result.is_ok()); + } + + #[rstest] + #[case("plainaddress")] + #[case("missing-at-sign.com")] + #[case("")] + fn test_split_email_invalid(#[case] input: &str) { + let result = split_email(input); + + assert!(result.is_err()); + } +} diff --git a/tests/test_validator.py b/tests/test_validator.py index 24f1271..282678b 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -1,6 +1,6 @@ import pytest -from emval import validate_email, ValidatedEmail +from emval import ValidatedEmail, validate_email # This is the python-email-validator (https://github.com/JoshData/python-email-validator/blob/main/tests/test_syntax.py) test suite. # It has been slightly modified and does not cover all edge cases, including display names and checking dns_resolver @@ -118,25 +118,35 @@ def test_email_valid_only_if_quoted_local_part( == "Invalid Local Part: Quoting the local part before the '@' sign is not permitted in this context." ) - validated = validate_email(email_input, allow_quoted_local=True) + validated = validate_email( + email_input, + allow_quoted_local=True, + deliverable_address=False, + ) assert validated.local_part == normalized_local_part def test_domain_literal() -> None: # Check parsing IPv4 addresses. - validated = validate_email("me@[127.0.0.1]", allow_domain_literal=True) + validated = validate_email( + "me@[127.0.0.1]", allow_domain_literal=True, deliverable_address=False + ) assert validated.domain_name == "[127.0.0.1]" assert repr(validated.domain_address) == "IPv4Address('127.0.0.1')" # # Check parsing IPv6 addresses. - validated = validate_email("me@[IPv6:::1]", allow_domain_literal=True) + validated = validate_email( + "me@[IPv6:::1]", allow_domain_literal=True, deliverable_address=False + ) assert validated.domain_name == "[IPv6:::1]" assert repr(validated.domain_address) == "IPv6Address('::1')" # Check that IPv6 addresses are normalized. validated = validate_email( - "me@[IPv6:0000:0000:0000:0000:0000:0000:0000:0001]", allow_domain_literal=True + "me@[IPv6:0000:0000:0000:0000:0000:0000:0000:0001]", + allow_domain_literal=True, + deliverable_address=False, ) assert validated.domain_name == "[IPv6:::1]" assert repr(validated.domain_address) == "IPv6Address('::1')" @@ -150,18 +160,6 @@ def test_domain_literal() -> None: "my@localhost", "Invalid Domain: Must contain a period ('.') to be considered valid.", ), - # ( - # "my@.leadingdot.com", - # "An email address cannot have a period immediately after the @-sign.", - # ), - # ( - # "my@.leadingfwdot.com", - # "An email address cannot have a period immediately after the @-sign.", - # ), - # ("my@twodots..com", "An email address cannot have two periods in a row."), - # ("my@twofwdots...com", "An email address cannot have two periods in a row."), - # ("my@trailingdot.com.", "An email address cannot end with a period."), - # ("my@trailingfwdot.com.", "An email address cannot end with a period."), ( "me@-leadingdash", "Invalid Domain: A hyphen ('-') cannot immediately follow the '@' symbol.", @@ -263,10 +261,6 @@ def test_domain_literal() -> None: "my\n@example.com", "Invalid Local Part: contains invalid characters before the '@' sign.", ), - # ( - # "me.\u037e@example.com", - # "After Unicode normalization: The email address contains invalid characters before the @-sign: ';'.", - # ), ("test@\n", "Invalid Domain: Contains invalid characters after '@' sign."), ( 'bad"quotes"@example.com', @@ -336,18 +330,6 @@ def test_domain_literal() -> None: "my.\ufb2c.address@1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.11111111112222222222333333333344444.info", "Invalid Email Address: The email exceeds the maximum length (254 chars).", ), - # ( - # "my.\ufb2c.address@1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.11111111112222222222333333333344.info", - # "Invalid Email Address: The email exceeds the maximum length (254 chars).", - # ), - # ( - # "my.long.address@λ111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.11111111112222222222333333.info", - # "The email address is too long when the part after the @-sign is converted to IDNA ASCII (1 byte too many).", - # ), - # ( - # "my.λong.address@λ111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.11111111112222222222333333.info", - # "The email address is too long when the part after the @-sign is converted to IDNA ASCII (2 bytes too many).", - # ), ( "me@bad-tld-1", "Invalid Domain: Must contain a period ('.') to be considered valid.", @@ -388,43 +370,6 @@ def test_domain_literal() -> None: "me@[tag:invalid space]", "Invalid Domain: The address in brackets following the '@' sign is not a valid IP address.", ), - # ( - # "", - # "A display name and angle brackets around the email address are not permitted here.", - # ), - # ( - # " !", "There can't be anything after the email address."), - ( - "<\u0338me@example.com", - "Invalid Local Part: contains invalid characters before the '@' sign.", - ), - ( - "DisplayName ", - "Invalid Local Part: contains invalid characters before the '@' sign.", - ), - # ( - # "DisplayName ", - # "A display name and angle brackets around the email address are not permitted here.", - # ), - # ( - # "Display Name ", - # "A display name and angle brackets around the email address are not permitted here.", - # ), - # ( - # '"Display Name" ', - # "A display name and angle brackets around the email address are not permitted here.", - # ), - # ( - # "Display.Name ", - # "The display name contains invalid characters when not quoted: '.'.", - # ), - # ( - # '"Display.Name" ', - # "A display name and angle brackets around the email address are not permitted here.", - # ), ], ) def test_email_invalid_syntax(email_input: str, error_msg: str) -> None: @@ -506,12 +451,68 @@ def test_email_invalid_character_smtputf8_off( def test_email_empty_local() -> None: - validate_email("@example.com", allow_empty_local=True) - validate_email('""@example.com', allow_empty_local=True, allow_quoted_local=True) + validate_email("@example.com", allow_empty_local=True, deliverable_address=False) + validate_email( + '""@example.com', + allow_empty_local=True, + allow_quoted_local=True, + deliverable_address=False, + ) def test_case_insensitive_mailbox_name() -> None: - validate_email("POSTMASTER@example.com").normalized = "postmaster@example.com" - validate_email( - "NOT-POSTMASTER@example.com" - ).normalized = "NOT-POSTMASTER@example.com" + assert ( + validate_email("POSTMASTER@example.com", deliverable_address=False).normalized + == "postmaster@example.com" + ) + + assert ( + validate_email( + "NOT-POSTMASTER@example.com", deliverable_address=False + ).normalized + == "NOT-POSTMASTER@example.com" + ) + + +@pytest.mark.parametrize( + "domain,expected_response", + [ + ( + "test@gmail.com", + True, + ), + ( + "test@pages.github.com", + True, + ), + ], +) +def test_deliverability_found(domain: str, expected_response: bool) -> None: + response = validate_email(domain, deliverable_address=True) + assert response.is_deliverable == expected_response + + +@pytest.mark.parametrize( + ("domain", "error"), + [ + ( + "test@xkxufoekjvjfjeodlfmdfjcu.com", + "Invalid Domain: No MX, A, or AAAA records found for domain.", + ), + ( + "test@example.com", + "Invalid Domain: The domain does not accept email due to a null MX record, indicating it is not configured to receive emails.", + ), + ( + "test@g.mail.com", + "Invalid Domain: No MX, A, or AAAA records found for domain.", + ), + ( + "test@justtxt.joshdata.me", + "Invalid Domain: No MX, A, or AAAA records found for domain.", + ), + ], +) +def test_deliverability_fails(domain: str, error: str) -> None: + with pytest.raises(SyntaxError, match=error): + validate_email(domain, deliverable_address=True) diff --git a/uv.lock b/uv.lock new file mode 100644 index 0000000..e8d104e --- /dev/null +++ b/uv.lock @@ -0,0 +1,7 @@ +version = 1 +requires-python = ">=3.8" + +[[package]] +name = "emval" +version = "0.1.3" +source = { editable = "." }