6
6
import os
7
7
import codecs
8
8
9
+ # Comment xml elements you don't want to be imported into database (please
10
+ # note, if you comment main elements like, for example, "k_ele", "r_ele" or
11
+ # "sense" all the child elements will not be imported as well)
12
+ xml_elements = [
13
+ "k_ele" ,
14
+ "keb" ,
15
+ "ke_inf" ,
16
+ "ke_pri" ,
17
+ "r_ele" ,
18
+ "reb" ,
19
+ "re_nokanji" ,
20
+ "re_restr" ,
21
+ "re_inf" ,
22
+ "re_pri" ,
23
+ "sense" ,
24
+ "stagk" ,
25
+ "stagr" ,
26
+ "pos" ,
27
+ "xref" ,
28
+ "ant" ,
29
+ "field" ,
30
+ "misc" ,
31
+ "s_inf" ,
32
+ "dial" ,
33
+ "gloss"
34
+ ]
35
+
36
+
9
37
def parse_cmdline ():
10
38
parser = ArgumentParser ()
11
39
parser .add_argument ("--jmdictfile" , help = "path to the .xml JMdict file" , default = "JMdict_e" )
@@ -27,49 +55,79 @@ def create_database(name):
27
55
28
56
c .execute ("CREATE TABLE entry (ent_seq INTEGER DEFAULT 0)" )
29
57
30
- c .execute ("CREATE TABLE k_ele (entry_id INTEGER, keb TEXT DEFAULT '')" )
31
- c .execute ("CREATE TABLE k_ele_ke_inf (k_ele_id INTEGER, ke_inf TEXT)" )
32
- c .execute ("CREATE TABLE k_ele_ke_pri (k_ele_id INTEGER, ke_pri TEXT)" )
33
-
34
- c .execute ("CREATE TABLE r_ele (entry_id INTEGER, reb TEXT DEFAULT '', re_nokanji TEXT DEFAULT '')" )
35
- c .execute ("CREATE TABLE r_ele_re_restr (r_ele_id INTEGER, re_restr TEXT)" )
36
- c .execute ("CREATE TABLE r_ele_re_inf (r_ele_id INTEGER, re_inf TEXT)" )
37
- c .execute ("CREATE TABLE r_ele_re_pri (r_ele_id INTEGER, re_pri TEXT)" )
38
-
39
- c .execute ("CREATE TABLE sense (entry_id INTEGER)" )
40
- c .execute ("CREATE TABLE sense_stagk (sense_id INTEGER, stagk TEXT)" )
41
- c .execute ("CREATE TABLE sense_stagr (sense_id INTEGER, stagr TEXT)" )
42
- c .execute ("CREATE TABLE sense_pos (sense_id INTEGER, pos TEXT)" )
43
- c .execute ("CREATE TABLE sense_xref (sense_id INTEGER, xref TEXT)" )
44
- c .execute ("CREATE TABLE sense_ant (sense_id INTEGER, ant TEXT)" )
45
- c .execute ("CREATE TABLE sense_field (sense_id INTEGER, field TEXT)" )
46
- c .execute ("CREATE TABLE sense_misc (sense_id INTEGER, misc TEXT)" )
47
- c .execute ("CREATE TABLE sense_s_inf (sense_id INTEGER, s_inf TEXT)" )
48
- c .execute ("CREATE TABLE sense_dial (sense_id INTEGER, dial TEXT)" )
49
- c .execute ("CREATE TABLE sense_gloss (sense_id INTEGER, gloss TEXT)" )
50
-
51
- c .execute ("CREATE INDEX id_k_ele_index ON k_ele (entry_id)" )
52
- c .execute ("CREATE INDEX reb_k_ele_index ON k_ele (keb)" )
53
- c .execute ("CREATE INDEX id_k_ele_ke_inf_index ON k_ele_ke_inf (k_ele_id)" )
54
- c .execute ("CREATE INDEX id_k_ele_ke_pri_index ON k_ele_ke_pri (k_ele_id)" )
55
-
56
- c .execute ("CREATE INDEX id_r_ele_index ON r_ele (entry_id)" )
57
- c .execute ("CREATE INDEX reb_r_ele_index ON r_ele (reb)" )
58
- c .execute ("CREATE INDEX id_r_ele_re_restr_index ON r_ele_re_restr (r_ele_id)" )
59
- c .execute ("CREATE INDEX id_r_ele_re_inf_index ON r_ele_re_inf (r_ele_id)" )
60
- c .execute ("CREATE INDEX id_r_ele_re_pri_index ON r_ele_re_pri (r_ele_id)" )
61
-
62
- c .execute ("CREATE INDEX id_sense_index ON sense (entry_id)" )
63
- c .execute ("CREATE INDEX id_sense_stagk_index ON sense_stagk (sense_id)" )
64
- c .execute ("CREATE INDEX id_sense_stagr_index ON sense_stagr (sense_id)" )
65
- c .execute ("CREATE INDEX id_sense_pos_index ON sense_pos (sense_id)" )
66
- c .execute ("CREATE INDEX id_sense_xref_index ON sense_xref (sense_id)" )
67
- c .execute ("CREATE INDEX id_sense_ant_index ON sense_ant (sense_id)" )
68
- c .execute ("CREATE INDEX id_sense_field_index ON sense_field (sense_id)" )
69
- c .execute ("CREATE INDEX id_sense_misc_index ON sense_misc (sense_id)" )
70
- c .execute ("CREATE INDEX id_sense_s_inf_index ON sense_s_inf (sense_id)" )
71
- c .execute ("CREATE INDEX id_sense_dial_index ON sense_dial (sense_id)" )
72
- c .execute ("CREATE INDEX id_sense_gloss_index ON sense_gloss (sense_id)" )
58
+ if "k_ele" in xml_elements :
59
+ c .execute ("CREATE TABLE k_ele (entry_id INTEGER, keb TEXT DEFAULT '')" )
60
+ c .execute ("CREATE INDEX id_k_ele_index ON k_ele (entry_id)" )
61
+ c .execute ("CREATE INDEX reb_k_ele_index ON k_ele (keb)" )
62
+
63
+ if "ke_inf" in xml_elements :
64
+ c .execute ("CREATE TABLE k_ele_ke_inf (k_ele_id INTEGER, ke_inf TEXT)" )
65
+ c .execute ("CREATE INDEX id_k_ele_ke_inf_index ON k_ele_ke_inf (k_ele_id)" )
66
+
67
+ if "ke_pri" in xml_elements :
68
+ c .execute ("CREATE TABLE k_ele_ke_pri (k_ele_id INTEGER, ke_pri TEXT)" )
69
+ c .execute ("CREATE INDEX id_k_ele_ke_pri_index ON k_ele_ke_pri (k_ele_id)" )
70
+
71
+ if "r_ele" in xml_elements :
72
+ c .execute ("CREATE TABLE r_ele (entry_id INTEGER, reb TEXT DEFAULT '', re_nokanji TEXT DEFAULT '')" )
73
+ c .execute ("CREATE INDEX id_r_ele_index ON r_ele (entry_id)" )
74
+ c .execute ("CREATE INDEX reb_r_ele_index ON r_ele (reb)" )
75
+
76
+ if "re_restr" in xml_elements :
77
+ c .execute ("CREATE TABLE r_ele_re_restr (r_ele_id INTEGER, re_restr TEXT)" )
78
+ c .execute ("CREATE INDEX id_r_ele_re_restr_index ON r_ele_re_restr (r_ele_id)" )
79
+
80
+ if "re_inf" in xml_elements :
81
+ c .execute ("CREATE TABLE r_ele_re_inf (r_ele_id INTEGER, re_inf TEXT)" )
82
+ c .execute ("CREATE INDEX id_r_ele_re_inf_index ON r_ele_re_inf (r_ele_id)" )
83
+
84
+ if "re_pri" in xml_elements :
85
+ c .execute ("CREATE TABLE r_ele_re_pri (r_ele_id INTEGER, re_pri TEXT)" )
86
+ c .execute ("CREATE INDEX id_r_ele_re_pri_index ON r_ele_re_pri (r_ele_id)" )
87
+
88
+ if "sense" in xml_elements :
89
+ c .execute ("CREATE TABLE sense (entry_id INTEGER)" )
90
+ c .execute ("CREATE INDEX id_sense_index ON sense (entry_id)" )
91
+
92
+ if "stagk" in xml_elements :
93
+ c .execute ("CREATE TABLE sense_stagk (sense_id INTEGER, stagk TEXT)" )
94
+ c .execute ("CREATE INDEX id_sense_stagk_index ON sense_stagk (sense_id)" )
95
+
96
+ if "stagr" in xml_elements :
97
+ c .execute ("CREATE TABLE sense_stagr (sense_id INTEGER, stagr TEXT)" )
98
+ c .execute ("CREATE INDEX id_sense_stagr_index ON sense_stagr (sense_id)" )
99
+
100
+ if "pos" in xml_elements :
101
+ c .execute ("CREATE TABLE sense_pos (sense_id INTEGER, pos TEXT)" )
102
+ c .execute ("CREATE INDEX id_sense_pos_index ON sense_pos (sense_id)" )
103
+
104
+ if "xref" in xml_elements :
105
+ c .execute ("CREATE TABLE sense_xref (sense_id INTEGER, xref TEXT)" )
106
+ c .execute ("CREATE INDEX id_sense_xref_index ON sense_xref (sense_id)" )
107
+
108
+ if "ant" in xml_elements :
109
+ c .execute ("CREATE TABLE sense_ant (sense_id INTEGER, ant TEXT)" )
110
+ c .execute ("CREATE INDEX id_sense_ant_index ON sense_ant (sense_id)" )
111
+
112
+ if "field" in xml_elements :
113
+ c .execute ("CREATE TABLE sense_field (sense_id INTEGER, field TEXT)" )
114
+ c .execute ("CREATE INDEX id_sense_field_index ON sense_field (sense_id)" )
115
+
116
+ if "misc" in xml_elements :
117
+ c .execute ("CREATE TABLE sense_misc (sense_id INTEGER, misc TEXT)" )
118
+ c .execute ("CREATE INDEX id_sense_misc_index ON sense_misc (sense_id)" )
119
+
120
+ if "s_inf" in xml_elements :
121
+ c .execute ("CREATE TABLE sense_s_inf (sense_id INTEGER, s_inf TEXT)" )
122
+ c .execute ("CREATE INDEX id_sense_s_inf_index ON sense_s_inf (sense_id)" )
123
+
124
+ if "dial" in xml_elements :
125
+ c .execute ("CREATE TABLE sense_dial (sense_id INTEGER, dial TEXT)" )
126
+ c .execute ("CREATE INDEX id_sense_dial_index ON sense_dial (sense_id)" )
127
+
128
+ if "gloss" in xml_elements :
129
+ c .execute ("CREATE TABLE sense_gloss (sense_id INTEGER, gloss TEXT)" )
130
+ c .execute ("CREATE INDEX id_sense_gloss_index ON sense_gloss (sense_id)" )
73
131
74
132
return database
75
133
@@ -80,6 +138,8 @@ def parse_k_ele(k_ele, entry_id, dtd, database):
80
138
k_ele_id = c .lastrowid
81
139
82
140
for item in k_ele :
141
+ if item .tag not in xml_elements :
142
+ continue
83
143
if item .tag == "keb" :
84
144
c .execute ("UPDATE k_ele SET keb = ? WHERE rowid = ?" , (item .text , k_ele_id ))
85
145
elif item .tag == "ke_inf" :
@@ -94,6 +154,8 @@ def parse_r_ele(r_ele, entry_id, dtd, database):
94
154
r_ele_id = c .lastrowid
95
155
96
156
for item in r_ele :
157
+ if item .tag not in xml_elements :
158
+ continue
97
159
if item .tag == "reb" :
98
160
c .execute ("UPDATE r_ele SET reb = ? WHERE rowid = ?" , (item .text , r_ele_id ))
99
161
elif item .tag == "re_nokanji" :
@@ -113,6 +175,8 @@ def parse_sense(sense, entry_id, dtd, database):
113
175
sense_id = c .lastrowid
114
176
115
177
for item in sense :
178
+ if item .tag not in xml_elements :
179
+ continue
116
180
if item .tag == "stagk" :
117
181
c .execute ("INSERT INTO sense_stagk (sense_id, stagk) VALUES (?, ?)" , (sense_id , item .text ))
118
182
elif item .tag == "stagr" :
@@ -141,6 +205,8 @@ def parse_entry(entry, dtd, database):
141
205
entry_id = c .lastrowid
142
206
143
207
for item in entry :
208
+ if item .tag not in xml_elements :
209
+ continue
144
210
if item .tag == "ent_seq" :
145
211
c .execute ("UPDATE entry SET ent_seq = ? WHERE rowid = ?" , (item .text , entry_id ))
146
212
elif item .tag == "k_ele" :
0 commit comments