Skip to content

Commit 2650603

Browse files
committed
Added KradfileToSQLite script
0 parents  commit 2650603

File tree

5 files changed

+848
-0
lines changed

5 files changed

+848
-0
lines changed

KradfileToSQLite/KradfileToSQLite.py

+105
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
2+
from argparse import ArgumentParser, FileType, Action
3+
import sqlite3
4+
import sys
5+
import codecs
6+
import os
7+
8+
def parse_cmdline():
9+
parser = ArgumentParser()
10+
parser.add_argument("--kradfile", help="path to the kradfile", default="kradfile")
11+
parser.add_argument("--kradfile2", help="path to the kradfile2", default="kradfile2")
12+
parser.add_argument("--sqlitefile", help="path to the sqlite database to create", required=True)
13+
return parser.parse_args()
14+
15+
16+
def create_database(name):
17+
sqlitefile = name
18+
19+
if len(sqlitefile) < 3 or sqlitefile[-3:] != ".db":
20+
sqlitefile = sqlitefile + ".db"
21+
22+
if os.path.exists(sqlitefile):
23+
os.remove(sqlitefile)
24+
25+
database = sqlite3.connect(sqlitefile)
26+
27+
c = database.cursor()
28+
c.execute("CREATE TABLE radicals (data TEXT NOT NULL)")
29+
c.execute("CREATE TABLE kanji (data TEXT NOT NULL)")
30+
c.execute("CREATE TABLE kanji_radical (kanji_id INTEGER, radical_id INTEGER)")
31+
32+
return database
33+
34+
35+
def get_radical_id(radical, database):
36+
c = database.cursor()
37+
radical_id = 0
38+
39+
c.execute("SELECT rowid FROM radicals WHERE data = ?", [radical])
40+
row = c.fetchone()
41+
42+
if row is None:
43+
c.execute("INSERT INTO radicals (data) VALUES (?)", [radical])
44+
radical_id = c.lastrowid
45+
else:
46+
radical_id = row[0]
47+
48+
return radical_id
49+
50+
51+
def import_data_file(database, kradfile):
52+
c = database.cursor()
53+
counter = 0
54+
55+
for line in kradfile:
56+
line = line.strip()
57+
58+
if line[0] == "#" or line[0] == " ":
59+
continue
60+
61+
try:
62+
kanji,radicals = line.split(":")
63+
except:
64+
continue
65+
66+
c.execute("INSERT INTO kanji (data) VALUES (?)", [line[0]])
67+
kanji_id = c.lastrowid
68+
69+
for radical in radicals.split():
70+
radical_id = get_radical_id(radical.strip(), database)
71+
c.execute("INSERT INTO kanji_radical (kanji_id, radical_id) VALUES (?, ?)", (kanji_id, radical_id))
72+
73+
counter += 1
74+
if not counter % 100:
75+
print("#", end = "", flush = True)
76+
77+
database.commit()
78+
79+
80+
def main():
81+
args = parse_cmdline()
82+
83+
# Example query for get the list of radicas of specific kanji
84+
# select data from radicals, kanji_radical where radicals.rowid = kanji_radical.radical_id and kanji_radical.kanji_id = (select rowid from kanji where data = '鰯')
85+
# Example query for get the list kanji having specific radical
86+
# select data from kanji, kanji_radical where kanji.rowid = kanji_radical.kanji_id and kanji_radical.radical_id = (select rowid from radicals where data = '田')
87+
88+
kradfile = codecs.open(args.kradfile, 'r', 'euc-jp')
89+
kradfile2 = codecs.open(args.kradfile2, 'r', 'euc-jp')
90+
database = create_database(args.sqlitefile.strip())
91+
92+
print("Start conversion: ", end = "", flush = True)
93+
94+
import_data_file(database, kradfile)
95+
import_data_file(database, kradfile2)
96+
97+
print("\nConversion finished")
98+
99+
kradfile.close()
100+
kradfile2.close()
101+
database.close()
102+
103+
104+
if __name__ == '__main__':
105+
main()
+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003" ToolsVersion="4.0">
2+
<PropertyGroup>
3+
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
4+
<SchemaVersion>2.0</SchemaVersion>
5+
<ProjectGuid>3164fef1-678a-46dc-81d1-b4a44144f4ad</ProjectGuid>
6+
<ProjectHome>.</ProjectHome>
7+
<StartupFile>KradfileToSQLite.py</StartupFile>
8+
<SearchPath>
9+
</SearchPath>
10+
<WorkingDirectory>.</WorkingDirectory>
11+
<OutputPath>.</OutputPath>
12+
<Name>KradfileToSQLite</Name>
13+
<RootNamespace>KradfileToSQLite</RootNamespace>
14+
<LaunchProvider>Standard Python launcher</LaunchProvider>
15+
<CommandLineArguments>--sqlitefile=kradfile</CommandLineArguments>
16+
<EnableNativeCodeDebugging>False</EnableNativeCodeDebugging>
17+
</PropertyGroup>
18+
<PropertyGroup Condition=" '$(Configuration)' == 'Debug' ">
19+
<DebugSymbols>true</DebugSymbols>
20+
<EnableUnmanagedDebugging>false</EnableUnmanagedDebugging>
21+
</PropertyGroup>
22+
<PropertyGroup Condition=" '$(Configuration)' == 'Release' ">
23+
<DebugSymbols>true</DebugSymbols>
24+
<EnableUnmanagedDebugging>false</EnableUnmanagedDebugging>
25+
</PropertyGroup>
26+
<ItemGroup>
27+
<Compile Include="KradfileToSQLite.py" />
28+
</ItemGroup>
29+
<Import Project="$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\Python Tools\Microsoft.PythonTools.targets" />
30+
<!-- Uncomment the CoreCompile target to enable the Build command in
31+
Visual Studio and specify your pre- and post-build commands in
32+
the BeforeBuild and AfterBuild targets below. -->
33+
<!--<Target Name="CoreCompile" />-->
34+
<Target Name="BeforeBuild">
35+
</Target>
36+
<Target Name="AfterBuild">
37+
</Target>
38+
</Project>

KradfileToSQLite/KradfileToSQLite.sln

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
2+
Microsoft Visual Studio Solution File, Format Version 12.00
3+
# Visual Studio 15
4+
VisualStudioVersion = 15.0.27428.2037
5+
MinimumVisualStudioVersion = 10.0.40219.1
6+
Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "KradfileToSQLite", "KradfileToSQLite.pyproj", "{3164FEF1-678A-46DC-81D1-B4A44144F4AD}"
7+
EndProject
8+
Global
9+
GlobalSection(SolutionConfigurationPlatforms) = preSolution
10+
Debug|Any CPU = Debug|Any CPU
11+
Release|Any CPU = Release|Any CPU
12+
EndGlobalSection
13+
GlobalSection(ProjectConfigurationPlatforms) = postSolution
14+
{3164FEF1-678A-46DC-81D1-B4A44144F4AD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
15+
{3164FEF1-678A-46DC-81D1-B4A44144F4AD}.Release|Any CPU.ActiveCfg = Release|Any CPU
16+
EndGlobalSection
17+
GlobalSection(SolutionProperties) = preSolution
18+
HideSolutionNode = FALSE
19+
EndGlobalSection
20+
GlobalSection(ExtensibilityGlobals) = postSolution
21+
SolutionGuid = {2E885054-2E6B-46E6-B175-1C05CF394570}
22+
EndGlobalSection
23+
EndGlobal

0 commit comments

Comments
 (0)