Skip to content

Commit 293f06b

Browse files
committed
Added list for select xml elements to be imported into database
1 parent f2a94f5 commit 293f06b

File tree

1 file changed

+109
-43
lines changed

1 file changed

+109
-43
lines changed

JMdictToSQLite/JMdictToSQLite.py

+109-43
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,34 @@
66
import os
77
import codecs
88

9+
# Comment xml elements you don't want to be imported into database (please
10+
# note, if you comment main elements like, for example, "k_ele", "r_ele" or
11+
# "sense" all the child elements will not be imported as well)
12+
xml_elements = [
13+
"k_ele",
14+
"keb",
15+
"ke_inf",
16+
"ke_pri",
17+
"r_ele",
18+
"reb",
19+
"re_nokanji",
20+
"re_restr",
21+
"re_inf",
22+
"re_pri",
23+
"sense",
24+
"stagk",
25+
"stagr",
26+
"pos",
27+
"xref",
28+
"ant",
29+
"field",
30+
"misc",
31+
"s_inf",
32+
"dial",
33+
"gloss"
34+
]
35+
36+
937
def parse_cmdline():
1038
parser = ArgumentParser()
1139
parser.add_argument("--jmdictfile", help="path to the .xml JMdict file", default="JMdict_e")
@@ -27,49 +55,79 @@ def create_database(name):
2755

2856
c.execute("CREATE TABLE entry (ent_seq INTEGER DEFAULT 0)")
2957

30-
c.execute("CREATE TABLE k_ele (entry_id INTEGER, keb TEXT DEFAULT '')")
31-
c.execute("CREATE TABLE k_ele_ke_inf (k_ele_id INTEGER, ke_inf TEXT)")
32-
c.execute("CREATE TABLE k_ele_ke_pri (k_ele_id INTEGER, ke_pri TEXT)")
33-
34-
c.execute("CREATE TABLE r_ele (entry_id INTEGER, reb TEXT DEFAULT '', re_nokanji TEXT DEFAULT '')")
35-
c.execute("CREATE TABLE r_ele_re_restr (r_ele_id INTEGER, re_restr TEXT)")
36-
c.execute("CREATE TABLE r_ele_re_inf (r_ele_id INTEGER, re_inf TEXT)")
37-
c.execute("CREATE TABLE r_ele_re_pri (r_ele_id INTEGER, re_pri TEXT)")
38-
39-
c.execute("CREATE TABLE sense (entry_id INTEGER)")
40-
c.execute("CREATE TABLE sense_stagk (sense_id INTEGER, stagk TEXT)")
41-
c.execute("CREATE TABLE sense_stagr (sense_id INTEGER, stagr TEXT)")
42-
c.execute("CREATE TABLE sense_pos (sense_id INTEGER, pos TEXT)")
43-
c.execute("CREATE TABLE sense_xref (sense_id INTEGER, xref TEXT)")
44-
c.execute("CREATE TABLE sense_ant (sense_id INTEGER, ant TEXT)")
45-
c.execute("CREATE TABLE sense_field (sense_id INTEGER, field TEXT)")
46-
c.execute("CREATE TABLE sense_misc (sense_id INTEGER, misc TEXT)")
47-
c.execute("CREATE TABLE sense_s_inf (sense_id INTEGER, s_inf TEXT)")
48-
c.execute("CREATE TABLE sense_dial (sense_id INTEGER, dial TEXT)")
49-
c.execute("CREATE TABLE sense_gloss (sense_id INTEGER, gloss TEXT)")
50-
51-
c.execute("CREATE INDEX id_k_ele_index ON k_ele (entry_id)")
52-
c.execute("CREATE INDEX reb_k_ele_index ON k_ele (keb)")
53-
c.execute("CREATE INDEX id_k_ele_ke_inf_index ON k_ele_ke_inf (k_ele_id)")
54-
c.execute("CREATE INDEX id_k_ele_ke_pri_index ON k_ele_ke_pri (k_ele_id)")
55-
56-
c.execute("CREATE INDEX id_r_ele_index ON r_ele (entry_id)")
57-
c.execute("CREATE INDEX reb_r_ele_index ON r_ele (reb)")
58-
c.execute("CREATE INDEX id_r_ele_re_restr_index ON r_ele_re_restr (r_ele_id)")
59-
c.execute("CREATE INDEX id_r_ele_re_inf_index ON r_ele_re_inf (r_ele_id)")
60-
c.execute("CREATE INDEX id_r_ele_re_pri_index ON r_ele_re_pri (r_ele_id)")
61-
62-
c.execute("CREATE INDEX id_sense_index ON sense (entry_id)")
63-
c.execute("CREATE INDEX id_sense_stagk_index ON sense_stagk (sense_id)")
64-
c.execute("CREATE INDEX id_sense_stagr_index ON sense_stagr (sense_id)")
65-
c.execute("CREATE INDEX id_sense_pos_index ON sense_pos (sense_id)")
66-
c.execute("CREATE INDEX id_sense_xref_index ON sense_xref (sense_id)")
67-
c.execute("CREATE INDEX id_sense_ant_index ON sense_ant (sense_id)")
68-
c.execute("CREATE INDEX id_sense_field_index ON sense_field (sense_id)")
69-
c.execute("CREATE INDEX id_sense_misc_index ON sense_misc (sense_id)")
70-
c.execute("CREATE INDEX id_sense_s_inf_index ON sense_s_inf (sense_id)")
71-
c.execute("CREATE INDEX id_sense_dial_index ON sense_dial (sense_id)")
72-
c.execute("CREATE INDEX id_sense_gloss_index ON sense_gloss (sense_id)")
58+
if "k_ele" in xml_elements:
59+
c.execute("CREATE TABLE k_ele (entry_id INTEGER, keb TEXT DEFAULT '')")
60+
c.execute("CREATE INDEX id_k_ele_index ON k_ele (entry_id)")
61+
c.execute("CREATE INDEX reb_k_ele_index ON k_ele (keb)")
62+
63+
if "ke_inf" in xml_elements:
64+
c.execute("CREATE TABLE k_ele_ke_inf (k_ele_id INTEGER, ke_inf TEXT)")
65+
c.execute("CREATE INDEX id_k_ele_ke_inf_index ON k_ele_ke_inf (k_ele_id)")
66+
67+
if "ke_pri" in xml_elements:
68+
c.execute("CREATE TABLE k_ele_ke_pri (k_ele_id INTEGER, ke_pri TEXT)")
69+
c.execute("CREATE INDEX id_k_ele_ke_pri_index ON k_ele_ke_pri (k_ele_id)")
70+
71+
if "r_ele" in xml_elements:
72+
c.execute("CREATE TABLE r_ele (entry_id INTEGER, reb TEXT DEFAULT '', re_nokanji TEXT DEFAULT '')")
73+
c.execute("CREATE INDEX id_r_ele_index ON r_ele (entry_id)")
74+
c.execute("CREATE INDEX reb_r_ele_index ON r_ele (reb)")
75+
76+
if "re_restr" in xml_elements:
77+
c.execute("CREATE TABLE r_ele_re_restr (r_ele_id INTEGER, re_restr TEXT)")
78+
c.execute("CREATE INDEX id_r_ele_re_restr_index ON r_ele_re_restr (r_ele_id)")
79+
80+
if "re_inf" in xml_elements:
81+
c.execute("CREATE TABLE r_ele_re_inf (r_ele_id INTEGER, re_inf TEXT)")
82+
c.execute("CREATE INDEX id_r_ele_re_inf_index ON r_ele_re_inf (r_ele_id)")
83+
84+
if "re_pri" in xml_elements:
85+
c.execute("CREATE TABLE r_ele_re_pri (r_ele_id INTEGER, re_pri TEXT)")
86+
c.execute("CREATE INDEX id_r_ele_re_pri_index ON r_ele_re_pri (r_ele_id)")
87+
88+
if "sense" in xml_elements:
89+
c.execute("CREATE TABLE sense (entry_id INTEGER)")
90+
c.execute("CREATE INDEX id_sense_index ON sense (entry_id)")
91+
92+
if "stagk" in xml_elements:
93+
c.execute("CREATE TABLE sense_stagk (sense_id INTEGER, stagk TEXT)")
94+
c.execute("CREATE INDEX id_sense_stagk_index ON sense_stagk (sense_id)")
95+
96+
if "stagr" in xml_elements:
97+
c.execute("CREATE TABLE sense_stagr (sense_id INTEGER, stagr TEXT)")
98+
c.execute("CREATE INDEX id_sense_stagr_index ON sense_stagr (sense_id)")
99+
100+
if "pos" in xml_elements:
101+
c.execute("CREATE TABLE sense_pos (sense_id INTEGER, pos TEXT)")
102+
c.execute("CREATE INDEX id_sense_pos_index ON sense_pos (sense_id)")
103+
104+
if "xref" in xml_elements:
105+
c.execute("CREATE TABLE sense_xref (sense_id INTEGER, xref TEXT)")
106+
c.execute("CREATE INDEX id_sense_xref_index ON sense_xref (sense_id)")
107+
108+
if "ant" in xml_elements:
109+
c.execute("CREATE TABLE sense_ant (sense_id INTEGER, ant TEXT)")
110+
c.execute("CREATE INDEX id_sense_ant_index ON sense_ant (sense_id)")
111+
112+
if "field" in xml_elements:
113+
c.execute("CREATE TABLE sense_field (sense_id INTEGER, field TEXT)")
114+
c.execute("CREATE INDEX id_sense_field_index ON sense_field (sense_id)")
115+
116+
if "misc" in xml_elements:
117+
c.execute("CREATE TABLE sense_misc (sense_id INTEGER, misc TEXT)")
118+
c.execute("CREATE INDEX id_sense_misc_index ON sense_misc (sense_id)")
119+
120+
if "s_inf" in xml_elements:
121+
c.execute("CREATE TABLE sense_s_inf (sense_id INTEGER, s_inf TEXT)")
122+
c.execute("CREATE INDEX id_sense_s_inf_index ON sense_s_inf (sense_id)")
123+
124+
if "dial" in xml_elements:
125+
c.execute("CREATE TABLE sense_dial (sense_id INTEGER, dial TEXT)")
126+
c.execute("CREATE INDEX id_sense_dial_index ON sense_dial (sense_id)")
127+
128+
if "gloss" in xml_elements:
129+
c.execute("CREATE TABLE sense_gloss (sense_id INTEGER, gloss TEXT)")
130+
c.execute("CREATE INDEX id_sense_gloss_index ON sense_gloss (sense_id)")
73131

74132
return database
75133

@@ -80,6 +138,8 @@ def parse_k_ele(k_ele, entry_id, dtd, database):
80138
k_ele_id = c.lastrowid
81139

82140
for item in k_ele:
141+
if item.tag not in xml_elements:
142+
continue
83143
if item.tag == "keb":
84144
c.execute("UPDATE k_ele SET keb = ? WHERE rowid = ?", (item.text, k_ele_id))
85145
elif item.tag == "ke_inf":
@@ -94,6 +154,8 @@ def parse_r_ele(r_ele, entry_id, dtd, database):
94154
r_ele_id = c.lastrowid
95155

96156
for item in r_ele:
157+
if item.tag not in xml_elements:
158+
continue
97159
if item.tag == "reb":
98160
c.execute("UPDATE r_ele SET reb = ? WHERE rowid = ?", (item.text, r_ele_id))
99161
elif item.tag == "re_nokanji":
@@ -113,6 +175,8 @@ def parse_sense(sense, entry_id, dtd, database):
113175
sense_id = c.lastrowid
114176

115177
for item in sense:
178+
if item.tag not in xml_elements:
179+
continue
116180
if item.tag == "stagk":
117181
c.execute("INSERT INTO sense_stagk (sense_id, stagk) VALUES (?, ?)", (sense_id, item.text))
118182
elif item.tag == "stagr":
@@ -141,6 +205,8 @@ def parse_entry(entry, dtd, database):
141205
entry_id = c.lastrowid
142206

143207
for item in entry:
208+
if item.tag not in xml_elements:
209+
continue
144210
if item.tag == "ent_seq":
145211
c.execute("UPDATE entry SET ent_seq = ? WHERE rowid = ?", (item.text, entry_id))
146212
elif item.tag == "k_ele":

0 commit comments

Comments
 (0)