Skip to content

Commit 30e45f7

Browse files
committed
CLIL themes: generate mapping files
these will be used to associate a new book to a shelf. [ci skip]
1 parent 408cb63 commit 30e45f7

File tree

5 files changed

+3961
-0
lines changed

5 files changed

+3961
-0
lines changed

.gitattributes

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
src/datasources/*.csv linguist-generated

src/datasources/clil-themes.lisp

+121
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
(defpackage bookshops.datasources.themes-clil
2+
(:use :cl)
3+
(:documentation "Mapping of CLIL themes, the book classification. We use it to guess the best shelf for a new book. Generate better mapping files."))
4+
5+
#|
6+
7+
parse-themes-file
8+
9+
Parses *themes-file*, data from Dilicom.
10+
11+
Print the hierarchy of themes on standard output:
12+
13+
level 1: 3000 -> SCOLAIRE
14+
level 2: 3001 -> SCOLAIRE -> Pré-scolaire et primaire
15+
level 3: 3002 -> SCOLAIRE -> Pré-scolaire et primaire -> Pré-scolaire et maternelle
16+
level 3: 3003 -> SCOLAIRE -> Pré-scolaire et primaire -> Élémentaire
17+
level 2: 3004 -> SCOLAIRE -> Manuels scolaires Secondaire Général
18+
level 3: 3005 -> SCOLAIRE -> Manuels scolaires Secondaire Général -> Collège
19+
level 3: 3006 -> SCOLAIRE -> Manuels scolaires Secondaire Général -> Lycée général
20+
21+
22+
Generate two files:
23+
24+
- theme-names.csv
25+
contains the mapping code -> name
26+
27+
- theme-hierarchies.csv
28+
contains the mapping code -> level 1 code -> … -> level 4 code
29+
(so the first code repeats itself once)
30+
31+
3000;3000;;;
32+
3001;3000;3001;;
33+
3002;3000;3001;3002;
34+
35+
|#
36+
37+
(in-package :bookshops.datasources.themes-clil)
38+
39+
(defparameter *themes-file* "themes-clil.csv"
40+
"File: from Dilicom's 319-TABLE DE CLASSIFICATION DES THEMES CLIL.xlsx, transformed to CSV, edited to remove the first comma of each line, that shouldn't be there.")
41+
42+
(defun parse-themes-file (&key (file *themes-file*) (start 0) end (write-to-files t))
43+
"For each theme number, associate the names of its level 1, 2, 3 and 4.
44+
45+
Example:
46+
47+
3744 -> 3722 Jeunesse / 3744 Fiction Jeunesse / <blank>
48+
49+
3746 -> 3722 Jeunesse / 3744 Fiction Jeunesse / 3746 Histoire"
50+
(let* ((lines (uiop:read-file-lines file))
51+
current-l1
52+
current-l1-name
53+
current-l2
54+
current-l2-name
55+
current-l3
56+
current-l3-name
57+
current-l4
58+
current-l4-name)
59+
60+
(with-open-file (theme-names "theme-names.csv"
61+
:direction :output
62+
:if-exists :supersede)
63+
;; Hierarchies: theme -> parents
64+
;; theme number ; level 1 ; level 2 ; level 2 ; level 4
65+
(with-open-file (theme-hierarchies "theme-hierarchies.csv"
66+
:direction :output
67+
:if-exists :supersede)
68+
(loop for line in (subseq lines
69+
(or start 0)
70+
(or end (length lines)))
71+
for columns = (str:split ";" line)
72+
;; Only one theme number is given on each line.
73+
for level-1 = (elt columns 0)
74+
for level-2 = (elt columns 1)
75+
for level-3 = (elt columns 2)
76+
for level-4 = (elt columns 3)
77+
for name = (elt columns 4)
78+
when (not (str:blankp level-1))
79+
do (progn (setf current-l1 level-1)
80+
(setf current-l1-name name)
81+
(format t "~%level 1:~t~t~t ~a -> ~a~&" level-1 name)
82+
;; Save to files.
83+
(format theme-names "~&~a;~a" level-1 name)
84+
;; Try a yaml representation
85+
(when write-to-files
86+
(format theme-names "~&-~a;~a" level-1 name)
87+
(format theme-hierarchies "~&~a;~a;;;" level-1 level-1)))
88+
89+
when (not (str:blankp level-2))
90+
do (progn (setf current-l2 level-2)
91+
(setf current-l2-name name)
92+
(format t "~&~tlevel 2:~t~t ~a -> ~a -> ~a~&" level-2 current-l1-name name)
93+
(when write-to-files
94+
(format theme-names "~&~a;~a" level-2 name)
95+
(format theme-hierarchies "~&~a;~a;~a;;" level-2 current-l1 level-2)))
96+
97+
when (not (str:blankp level-3))
98+
do (progn (setf current-l3 level-3)
99+
(setf current-l3-name name)
100+
(format t "~&~t~tlevel 3:~t ~a -> ~a -> ~a -> ~a~&"
101+
level-3
102+
current-l1-name
103+
current-l2-name
104+
name)
105+
(when write-to-files
106+
(format theme-names "~&~a;~a" level-3 name)
107+
(format theme-hierarchies "~&~a;~a;~a;~a;" level-3 current-l1 current-l2 level-3)))
108+
109+
when (not (str:blankp level-4))
110+
do (progn (setf current-l4 level-4)
111+
(setf current-l4-name name)
112+
(format t "~&~t~t~tlevel 4: ~a -> ~a -> ~a -> ~a -> ~a~&"
113+
level-4
114+
current-l1-name
115+
current-l2-name
116+
current-l3-name
117+
name)
118+
(when write-to-files
119+
(format theme-names "~&~a;~a" level-4 name)
120+
(format theme-hierarchies "~&~a;~a;~a;~a;~a"
121+
level-4 current-l1 current-l2 current-l3 level-4))))))))

0 commit comments

Comments
 (0)