Skip to content

Commit e3ab972

Browse files
committed
Handle internationalized domain names (IDN)
1 parent 2f9fde9 commit e3ab972

File tree

7 files changed

+68
-24
lines changed

7 files changed

+68
-24
lines changed

CHANGELOG

+3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
#Nmap Changelog ($Id$); -*-text-*-
22

3+
o [GH#1023] Handle Internationalized Domain Names (IDN) like Яндекс.рф on
4+
platforms where getaddrinfo supports the AI_IDN flag. [Daniel Miller]
5+
36
o Avoid storing many small strings from IPv4 OS detection results in the global
47
string_pool. These were effectively leaked after a host is done being
58
scanned, since string_pool allocations are not freed until Nmap quits.

NmapOps.cc

+5
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,10 @@ NmapOps::~NmapOps() {
125125
free(datadir);
126126
datadir = NULL;
127127
}
128+
if (locale) {
129+
free(locale);
130+
locale = NULL;
131+
}
128132

129133
#ifndef NOLUA
130134
if (scriptversion || script)
@@ -305,6 +309,7 @@ void NmapOps::Initialize() {
305309
numhosts_up = 0;
306310
numhosts_scanning = 0;
307311
noninteractive = false;
312+
locale = NULL;
308313
current_scantype = STYPE_UNKNOWN;
309314
ipoptions = NULL;
310315
ipoptionslen = 0;

NmapOps.h

+1
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,7 @@ class NmapOps {
356356
int numhosts_scanning;
357357
stype current_scantype;
358358
bool noninteractive;
359+
char *locale;
359360

360361
bool release_memory; /* suggest to release memory before quitting. used to find memory leaks. */
361362
private:

main.cc

+2
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
/* $Id$ */
6464

6565
#include <signal.h>
66+
#include <locale.h>
6667

6768
#include "nmap.h"
6869
#include "NmapOps.h"
@@ -116,6 +117,7 @@ int main(int argc, char *argv[]) {
116117
int ret;
117118
int i;
118119

120+
o.locale = strdup(setlocale(LC_CTYPE, NULL));
119121
set_program_name(argv[0]);
120122

121123
#ifdef __amigaos__

nselib/http.lua

+32-22
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ local stringaux = require "stringaux"
145145
local table = require "table"
146146
local tableaux = require "tableaux"
147147
local url = require "url"
148+
local ascii_hostname = url.ascii_hostname
148149
local smbauth = require "smbauth"
149150
local unicode = require "unicode"
150151

@@ -187,8 +188,9 @@ local function get_host_field(host, port, scheme)
187188
if host_header then return host_header end
188189
-- If there's no host, we can't invent a name.
189190
if not host then return nil end
191+
local hostname = ascii_hostname(host)
190192
-- If there's no port, just return hostname.
191-
if not port then return stdnse.get_hostname(host) end
193+
if not port then return hostname end
192194
if type(port) == "string" then
193195
port = tonumber(port)
194196
assert(port, "Invalid port: not a number or table")
@@ -200,7 +202,7 @@ local function get_host_field(host, port, scheme)
200202
if scheme then
201203
-- Caller provided scheme. If it's default, return just the hostname.
202204
if number == get_default_port(scheme) then
203-
return stdnse.get_hostname(host)
205+
return hostname
204206
end
205207
else
206208
scheme = url.get_default_scheme(port)
@@ -210,12 +212,12 @@ local function get_host_field(host, port, scheme)
210212
if (ssl_port and scheme == 'https') or
211213
(not ssl_port and scheme == 'http') then
212214
-- If it's SSL and https, or if it's plaintext and http, return just the hostname.
213-
return stdnse.get_hostname(host)
215+
return hostname
214216
end
215217
end
216218
end
217219
-- No special cases matched, so include the port number in the host header
218-
return stdnse.get_hostname(host) .. ":" .. number
220+
return hostname .. ":" .. number
219221
end
220222

221223
-- Skip *( SP | HT ) starting at offset. See RFC 2616, section 2.2.
@@ -1076,7 +1078,7 @@ local function lookup_cache (method, host, port, path, options)
10761078

10771079
if type(port) == "table" then port = port.number end
10781080

1079-
local key = stdnse.get_hostname(host)..":"..port..":"..path;
1081+
local key = ascii_hostname(host)..":"..port..":"..path;
10801082
local mutex = nmap.mutex(tostring(lookup_cache)..key);
10811083

10821084
local state = {
@@ -1615,7 +1617,7 @@ local redirect_ok_rules = {
16151617
-- * ccTLDs are not treated as such. The rule will not stop a redirect
16161618
-- from foo.co.uk to bar.co.uk even though it logically should.
16171619
function (url, host, port)
1618-
local hostname = stdnse.get_hostname(host)
1620+
local hostname = ascii_hostname(host)
16191621
if hostname == host.ip then
16201622
return url.host == hostname
16211623
end
@@ -1700,7 +1702,7 @@ function parse_redirect(host, port, path, response)
17001702
local u = url.parse(response.header.location)
17011703
if ( not(u.host) ) then
17021704
-- we're dealing with a relative url
1703-
u.host = stdnse.get_hostname(host)
1705+
u.host = ascii_hostname(host)
17041706
end
17051707
-- do port fixup
17061708
u.port = u.port or get_default_port(u.scheme) or port.number
@@ -1811,7 +1813,7 @@ function get_url( u, options )
18111813
path = path .. "?" .. parsed.query
18121814
end
18131815

1814-
return get( parsed.host, port, path, options )
1816+
return get( parsed.ascii_host or parsed.host, port, path, options )
18151817
end
18161818

18171819
---Fetches a resource with a HEAD request.
@@ -2857,7 +2859,7 @@ end
28572859
--@param contenttype [optional] The content-type value for the path, if it's known.
28582860
function save_path(host, port, path, status, links_to, linked_from, contenttype)
28592861
-- Make sure we have a proper hostname and port
2860-
host = stdnse.get_hostname(host)
2862+
host = ascii_hostname(host)
28612863
if(type(port) == 'table') then
28622864
port = port['number']
28632865
end
@@ -2888,42 +2890,50 @@ function save_path(host, port, path, status, links_to, linked_from, contenttype)
28882890
end
28892891
end
28902892

2893+
if parsed.host then
2894+
host = parsed.ascii_host or parsed.host
2895+
end
2896+
2897+
if parsed.port then
2898+
port = parsed.port
2899+
end
2900+
28912901
-- Add to the 'all_pages' key
2892-
stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'all_pages'}, parsed['path'])
2902+
stdnse.registry_add_array({host, 'www', port, 'all_pages'}, parsed['path'])
28932903

28942904
-- Add the URL with querystring to all_pages_full_query
2895-
stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'all_pages_full_query'}, parsed['path_query'])
2905+
stdnse.registry_add_array({host, 'www', port, 'all_pages_full_query'}, parsed['path_query'])
28962906

28972907
-- Add the URL to a key matching the response code
28982908
if(status) then
2899-
stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'status_codes', status}, parsed['path'])
2909+
stdnse.registry_add_array({host, 'www', port, 'status_codes', status}, parsed['path'])
29002910
end
29012911

29022912
-- If it's a directory, add it to the directories list; otherwise, add it to the files list
29032913
if(parsed['is_folder']) then
2904-
stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'directories'}, parsed['path'])
2914+
stdnse.registry_add_array({host, 'www', port, 'directories'}, parsed['path'])
29052915
else
2906-
stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'files'}, parsed['path'])
2916+
stdnse.registry_add_array({host, 'www', port, 'files'}, parsed['path'])
29072917
end
29082918

29092919

29102920
-- If we have an extension, add it to the extensions key
29112921
if(parsed['extension']) then
2912-
stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'extensions', parsed['extension']}, parsed['path'])
2922+
stdnse.registry_add_array({host, 'www', port, 'extensions', parsed['extension']}, parsed['path'])
29132923
end
29142924

29152925
-- Add an entry for the page and its arguments
29162926
if(parsed['querystring']) then
29172927
-- Add all scripts with a querystring to the 'cgi' and 'cgi_full_query' keys
2918-
stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'cgi'}, parsed['path'])
2919-
stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'cgi_full_query'}, parsed['path_query'])
2928+
stdnse.registry_add_array({host, 'www', port, 'cgi'}, parsed['path'])
2929+
stdnse.registry_add_array({host, 'www', port, 'cgi_full_query'}, parsed['path_query'])
29202930

29212931
-- Add the query string alone to the registry (probably not necessary)
2922-
stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'cgi_querystring', parsed['path'] }, parsed['raw_querystring'])
2932+
stdnse.registry_add_array({host, 'www', port, 'cgi_querystring', parsed['path'] }, parsed['raw_querystring'])
29232933

29242934
-- Add the individual arguments for the page, along with their values
29252935
for key, value in pairs(parsed['querystring']) do
2926-
stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'cgi_args', parsed['path']}, parsed['querystring'])
2936+
stdnse.registry_add_array({host, 'www', port, 'cgi_args', parsed['path']}, parsed['querystring'])
29272937
end
29282938
end
29292939

@@ -2934,7 +2944,7 @@ function save_path(host, port, path, status, links_to, linked_from, contenttype)
29342944
end
29352945

29362946
for _, v in ipairs(links_to) do
2937-
stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'links_to', parsed['path_query']}, v)
2947+
stdnse.registry_add_array({host, 'www', port, 'links_to', parsed['path_query']}, v)
29382948
end
29392949
end
29402950

@@ -2945,13 +2955,13 @@ function save_path(host, port, path, status, links_to, linked_from, contenttype)
29452955
end
29462956

29472957
for _, v in ipairs(linked_from) do
2948-
stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'links_to', v}, parsed['path_query'])
2958+
stdnse.registry_add_array({host, 'www', port, 'links_to', v}, parsed['path_query'])
29492959
end
29502960
end
29512961

29522962
-- Save it as a content-type, if we have one
29532963
if(contenttype) then
2954-
stdnse.registry_add_array({parsed['host'] or host, 'www', parsed['port'] or port, 'content-type', contenttype}, parsed['path_query'])
2964+
stdnse.registry_add_array({host, 'www', port, 'content-type', contenttype}, parsed['path_query'])
29552965
end
29562966
end
29572967

nselib/url.lua

+16-2
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,21 @@ local function normalize_escape (s)
138138
return escape(unescape(s))
139139
end
140140

141+
function ascii_hostname(host)
142+
local hostname = stdnse.get_hostname(host)
143+
if hostname:match("[\x80-\xff]") then
144+
-- TODO: Allow other Unicode encodings
145+
local decoded = unicode.decode(hostname, unicode.utf8_dec)
146+
if decoded then
147+
local ascii_host = idna.toASCII(decoded)
148+
if ascii_host then
149+
hostname = ascii_host
150+
end
151+
end
152+
end
153+
return hostname
154+
end
155+
141156
---
142157
-- Parses a URL and returns a table with all its parts according to RFC 3986.
143158
--
@@ -219,8 +234,7 @@ function parse(url, default)
219234
function(p) parsed.port = tonumber(p); return "" end)
220235
if authority ~= "" then parsed.host = authority end
221236
if parsed.host then
222-
-- TODO: Allow other Unicode encodings
223-
parsed.ascii_host = idna.toASCII(unicode.decode(parsed.host, unicode.utf8_dec))
237+
parsed.ascii_host = ascii_hostname(parsed.host)
224238
end
225239
local userinfo = parsed.userinfo
226240
if not userinfo then return parsed end

tcpip.cc

+9
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565

6666
#include "nmap.h"
6767

68+
#include <locale.h>
6869
#include "nbase.h"
6970
#include <dnet.h>
7071
#include "tcpip.h"
@@ -419,7 +420,15 @@ struct addrinfo *resolve_all(const char *hostname, int pf) {
419420
hints.ai_family = pf;
420421
/* Otherwise we get multiple identical addresses with different socktypes. */
421422
hints.ai_socktype = SOCK_DGRAM;
423+
#ifdef AI_IDN
424+
/* Try resolving internationalized domain names */
425+
hints.ai_flags = AI_IDN;
426+
setlocale(LC_CTYPE, "");
427+
#endif
422428
rc = getaddrinfo(hostname, NULL, &hints, &result);
429+
#ifdef AI_IDN
430+
setlocale(LC_CTYPE, o.locale);
431+
#endif
423432
if (rc != 0){
424433
if (o.debugging > 1)
425434
error("Error resolving %s: %s", hostname, gai_strerror(rc));

0 commit comments

Comments
 (0)