|
| 1 | +(defpackage bookshops.datasources.themes-clil |
| 2 | + (:use :cl) |
| 3 | + (:documentation "Mapping of CLIL themes, the book classification. We use it to guess the best shelf for a new book. Generate better mapping files.")) |
| 4 | + |
| 5 | +#| |
| 6 | +
|
| 7 | +parse-themes-file |
| 8 | +
|
| 9 | +Parses *themes-file*, data from Dilicom. |
| 10 | +
|
| 11 | +Print the hierarchy of themes on standard output: |
| 12 | +
|
| 13 | +level 1: 3000 -> SCOLAIRE |
| 14 | + level 2: 3001 -> SCOLAIRE -> Pré-scolaire et primaire |
| 15 | + level 3: 3002 -> SCOLAIRE -> Pré-scolaire et primaire -> Pré-scolaire et maternelle |
| 16 | + level 3: 3003 -> SCOLAIRE -> Pré-scolaire et primaire -> Élémentaire |
| 17 | + level 2: 3004 -> SCOLAIRE -> Manuels scolaires Secondaire Général |
| 18 | + level 3: 3005 -> SCOLAIRE -> Manuels scolaires Secondaire Général -> Collège |
| 19 | + level 3: 3006 -> SCOLAIRE -> Manuels scolaires Secondaire Général -> Lycée général |
| 20 | + … |
| 21 | +
|
| 22 | +Generate two files: |
| 23 | +
|
| 24 | +- theme-names.csv |
| 25 | + contains the mapping code -> name |
| 26 | +
|
| 27 | +- theme-hierarchies.csv |
| 28 | + contains the mapping code -> level 1 code -> … -> level 4 code |
| 29 | + (so the first code repeats itself once) |
| 30 | +
|
| 31 | +3000;3000;;; |
| 32 | +3001;3000;3001;; |
| 33 | +3002;3000;3001;3002; |
| 34 | +
|
| 35 | +|# |
| 36 | + |
| 37 | +(in-package :bookshops.datasources.themes-clil) |
| 38 | + |
| 39 | +(defparameter *themes-file* "themes-clil.csv" |
| 40 | + "File: from Dilicom's 319-TABLE DE CLASSIFICATION DES THEMES CLIL.xlsx, transformed to CSV, edited to remove the first comma of each line, that shouldn't be there.") |
| 41 | + |
| 42 | +(defun parse-themes-file (&key (file *themes-file*) (start 0) end (write-to-files t)) |
| 43 | + "For each theme number, associate the names of its level 1, 2, 3 and 4. |
| 44 | +
|
| 45 | + Example: |
| 46 | +
|
| 47 | + 3744 -> 3722 Jeunesse / 3744 Fiction Jeunesse / <blank> |
| 48 | +
|
| 49 | + 3746 -> 3722 Jeunesse / 3744 Fiction Jeunesse / 3746 Histoire" |
| 50 | + (let* ((lines (uiop:read-file-lines file)) |
| 51 | + current-l1 |
| 52 | + current-l1-name |
| 53 | + current-l2 |
| 54 | + current-l2-name |
| 55 | + current-l3 |
| 56 | + current-l3-name |
| 57 | + current-l4 |
| 58 | + current-l4-name) |
| 59 | + |
| 60 | + (with-open-file (theme-names "theme-names.csv" |
| 61 | + :direction :output |
| 62 | + :if-exists :supersede) |
| 63 | + ;; Hierarchies: theme -> parents |
| 64 | + ;; theme number ; level 1 ; level 2 ; level 2 ; level 4 |
| 65 | + (with-open-file (theme-hierarchies "theme-hierarchies.csv" |
| 66 | + :direction :output |
| 67 | + :if-exists :supersede) |
| 68 | + (loop for line in (subseq lines |
| 69 | + (or start 0) |
| 70 | + (or end (length lines))) |
| 71 | + for columns = (str:split ";" line) |
| 72 | + ;; Only one theme number is given on each line. |
| 73 | + for level-1 = (elt columns 0) |
| 74 | + for level-2 = (elt columns 1) |
| 75 | + for level-3 = (elt columns 2) |
| 76 | + for level-4 = (elt columns 3) |
| 77 | + for name = (elt columns 4) |
| 78 | + when (not (str:blankp level-1)) |
| 79 | + do (progn (setf current-l1 level-1) |
| 80 | + (setf current-l1-name name) |
| 81 | + (format t "~%level 1:~t~t~t ~a -> ~a~&" level-1 name) |
| 82 | + ;; Save to files. |
| 83 | + (format theme-names "~&~a;~a" level-1 name) |
| 84 | + ;; Try a yaml representation |
| 85 | + (when write-to-files |
| 86 | + (format theme-names "~&-~a;~a" level-1 name) |
| 87 | + (format theme-hierarchies "~&~a;~a;;;" level-1 level-1))) |
| 88 | + |
| 89 | + when (not (str:blankp level-2)) |
| 90 | + do (progn (setf current-l2 level-2) |
| 91 | + (setf current-l2-name name) |
| 92 | + (format t "~&~tlevel 2:~t~t ~a -> ~a -> ~a~&" level-2 current-l1-name name) |
| 93 | + (when write-to-files |
| 94 | + (format theme-names "~&~a;~a" level-2 name) |
| 95 | + (format theme-hierarchies "~&~a;~a;~a;;" level-2 current-l1 level-2))) |
| 96 | + |
| 97 | + when (not (str:blankp level-3)) |
| 98 | + do (progn (setf current-l3 level-3) |
| 99 | + (setf current-l3-name name) |
| 100 | + (format t "~&~t~tlevel 3:~t ~a -> ~a -> ~a -> ~a~&" |
| 101 | + level-3 |
| 102 | + current-l1-name |
| 103 | + current-l2-name |
| 104 | + name) |
| 105 | + (when write-to-files |
| 106 | + (format theme-names "~&~a;~a" level-3 name) |
| 107 | + (format theme-hierarchies "~&~a;~a;~a;~a;" level-3 current-l1 current-l2 level-3))) |
| 108 | + |
| 109 | + when (not (str:blankp level-4)) |
| 110 | + do (progn (setf current-l4 level-4) |
| 111 | + (setf current-l4-name name) |
| 112 | + (format t "~&~t~t~tlevel 4: ~a -> ~a -> ~a -> ~a -> ~a~&" |
| 113 | + level-4 |
| 114 | + current-l1-name |
| 115 | + current-l2-name |
| 116 | + current-l3-name |
| 117 | + name) |
| 118 | + (when write-to-files |
| 119 | + (format theme-names "~&~a;~a" level-4 name) |
| 120 | + (format theme-hierarchies "~&~a;~a;~a;~a;~a" |
| 121 | + level-4 current-l1 current-l2 current-l3 level-4)))))))) |
0 commit comments