Skip to content

Commit b8f0638

Browse files
authored
Merge pull request #825 from security-force-monitor/hcg/mm-import
Remove redundant import steps for easier debugging
2 parents c8a00b4 + c3bc6f9 commit b8f0638

14 files changed

+2395
-1789
lines changed

Makefile

+3-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ PG_HOST=localhost
66
PG_USER=datamade
77
PG_PASSWORD=
88

9+
SOURCE_DATA_PATH=sfm_pc/management/commands/country_data/countries
10+
DATA_ARCHIVE_PATH=data/wwic_download/countries
911

1012
.PHONY : import_directory import_db flush_db recreate_db
1113

@@ -43,4 +45,4 @@ recreate_db : import_directory flush_db import_docket_import data_archive
4345
clean :
4446
rm auth_models.json *errors.csv
4547

46-
include docket.mk
48+
include docket.mk download.mk

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ appropriate directory, and fire the recipe to build a fresh database:
367367
```
368368
tmux new -s fresh-import
369369
sudo su - datamade
370-
workon sfm
370+
source ~/.virtualenvs/sfm/bin/activate
371371
cd ~/sfm-importer
372372
make recreate_db
373373
```
@@ -376,7 +376,7 @@ Finally, switch the `sfm` and `importer` databases:
376376

377377
```
378378
# Renames the databases in a transaction -- the app doesn't need to stop
379-
psql postgres < sfm_pc/management/commands/flush/rename.sql
379+
psql -U postgres < sfm_pc/management/commands/flush/rename.sql
380380
```
381381

382382
Presto! A fresh import, with no server downtime.

docker-compose.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ services:
2424
- .:/app
2525
environment:
2626
- IMPORT_DIRECTORY=${IMPORT_DIRECTORY}
27-
- PG_HOST=${PG_HOST}
28-
- PG_USER=${PG_USER}
29-
- PG_PASSWORD=${PG_PASSWORD}
27+
- PG_HOST=postgres
28+
- PG_USER=sfm
29+
- PG_PASSWORD=postgres
3030
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
3131
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
3232
command: python manage.py runserver 0.0.0.0:8000

docket.mk

+11-55
Original file line numberDiff line numberDiff line change
@@ -1,67 +1,23 @@
1-
# Path variable for the source data
2-
SOURCE_DATA_PATH=sfm_pc/management/commands/country_data/countries
1+
clean_import :
2+
rm -rf $(SOURCE_DATA_PATH)/*
33

4-
# Variables for the archive data
5-
DATA_ARCHIVE_PATH=data/wwic_download/countries
6-
COUNTRY_NAMES=$(shell perl -pe "s/,/ /g" import_docket.csv | cut -d' ' -f5)
7-
ENTITIES=units.csv persons.csv incidents.csv locations.csv locations.geojson sources.csv
4+
.PHONY: $(SOURCE_DATA_PATH) source_import clean_import
85

9-
.PHONY: $(SOURCE_DATA_PATH) data/wwic_download/countries data_archive wwic_download.zip directories data/wwic_download/metadata/sfm_research_handbook.pdf
10-
11-
12-
# Create the data archive and upload it to S3
13-
data_archive : wwic_download.zip
14-
aws s3 cp $< s3://$(shell cat configs/s3_config.json | jq -r '.data_archive_bucket')/
15-
rm $<
16-
17-
wwic_download.zip : filtered_data data/wwic_download/metadata/sfm_research_handbook.pdf
18-
cd data/wwic_download && zip -r ../../$@ .
19-
20-
filtered_data: directories $(SOURCE_DATA_PATH) $(foreach country,$(COUNTRY_NAMES),$(patsubst %,$(country)_%,$(ENTITIES)))
21-
echo "filtered csvs for entities"
22-
23-
directories :
24-
mkdir -p $(foreach country,$(COUNTRY_NAMES),$(DATA_ARCHIVE_PATH)/$(country))
25-
26-
define filter_entity_data
27-
$(shell csvgrep --columns $(1):status:admin --match 3 $< | \
28-
python data/processors/blank_columns.py --entity $(1) > $(DATA_ARCHIVE_PATH)/$*/$@)
29-
endef
30-
31-
%_units.csv : $(SOURCE_DATA_PATH)/%/units.csv
32-
$(call filter_entity_data,unit)
33-
34-
%_persons.csv : $(SOURCE_DATA_PATH)/%/persons.csv
35-
$(call filter_entity_data,person)
36-
37-
%_incidents.csv : $(SOURCE_DATA_PATH)/%/incidents.csv
38-
$(call filter_entity_data,incident)
39-
40-
%_sources.csv : $(SOURCE_DATA_PATH)/%/sources.csv
41-
cp $< $(DATA_ARCHIVE_PATH)/$*/$@
42-
43-
%_locations.csv : $(SOURCE_DATA_PATH)/%/locations.csv
44-
cp $< $(DATA_ARCHIVE_PATH)/$*/$@
45-
46-
%_locations.geojson : $(SOURCE_DATA_PATH)/%/locations.geojson
47-
cp $< $(DATA_ARCHIVE_PATH)/$*/$@
48-
49-
data/wwic_download/metadata/sfm_research_handbook.pdf :
50-
curl -o $@ https://help.securityforcemonitor.org/_/downloads/en/latest/pdf/
51-
52-
53-
# Download the source data and load it into the database
54-
%_import : %.csv $(SOURCE_DATA_PATH)
6+
%_import : %.csv $(SOURCE_DATA_PATH) source_import
557
perl -pe "s/,/ /g" $< | \
568
xargs -L1 bash -c ' \
57-
echo "Loading data for country code $$3" && (\
9+
echo "Loading data for country code $$3 from $(SOURCE_DATA_PATH)/$$4" && (\
5810
python -u manage.py import_country_data \
5911
--country_code $$3 \
60-
--country_path $(word 2, $^)/$$4 \
61-
--sources_path $(word 2, $^)/sources.csv || \
12+
--country_path $(SOURCE_DATA_PATH)/$$4 || \
6213
exit 255 \
6314
)'
6415

16+
source_import : $(SOURCE_DATA_PATH)
17+
echo "Loading source data" && \
18+
python -u manage.py import_source_data \
19+
--sources_path $(SOURCE_DATA_PATH)/sources.csv
20+
6521
$(SOURCE_DATA_PATH) : import_docket.csv
6622
perl -pe "s/,/ /g" $< | \
6723
xargs -L1 bash -c ' \

download.mk

+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Variables for the archive data
2+
COUNTRY_NAMES=$(shell perl -pe "s/,/ /g" import_docket.csv | cut -d' ' -f5)
3+
ENTITIES=units.csv persons.csv incidents.csv locations.csv locations.geojson sources.csv
4+
5+
clean_archive :
6+
rm -rf $(DATA_ARCHIVE_PATH)/*
7+
8+
.PHONY : $(DATA_ARCHIVE_PATH) data_archive wwic_download.zip directories \
9+
data/wwic_download/metadata/sfm_research_handbook.pdf clean_archive
10+
11+
# Create the data archive and upload it to S3
12+
data_archive : wwic_download.zip
13+
aws s3 cp $< s3://$(shell cat configs/s3_config.json | jq -r '.data_archive_bucket')/
14+
rm $<
15+
16+
wwic_download.zip : filtered_data data/wwic_download/metadata/sfm_research_handbook.pdf
17+
cd data/wwic_download && zip -r ../../$@ .
18+
19+
filtered_data: directories $(SOURCE_DATA_PATH) $(foreach country,$(COUNTRY_NAMES),$(patsubst %,$(country)_%,$(ENTITIES)))
20+
echo "filtered csvs for entities"
21+
22+
directories :
23+
mkdir -p $(foreach country,$(COUNTRY_NAMES),$(DATA_ARCHIVE_PATH)/$(country))
24+
25+
define filter_entity_data
26+
$(shell csvgrep --columns $(1):status:admin --match 3 $< | \
27+
python data/processors/blank_columns.py --entity $(1) > $(DATA_ARCHIVE_PATH)/$*/$@)
28+
endef
29+
30+
%_units.csv : $(SOURCE_DATA_PATH)/%/units.csv
31+
$(call filter_entity_data,unit)
32+
33+
%_persons.csv : $(SOURCE_DATA_PATH)/%/persons.csv
34+
$(call filter_entity_data,person)
35+
36+
%_incidents.csv : $(SOURCE_DATA_PATH)/%/incidents.csv
37+
$(call filter_entity_data,incident)
38+
39+
%_sources.csv : $(SOURCE_DATA_PATH)/%/sources.csv
40+
cp $< $(DATA_ARCHIVE_PATH)/$*/$@
41+
42+
%_locations.csv : $(SOURCE_DATA_PATH)/%/locations.csv
43+
cp $< $(DATA_ARCHIVE_PATH)/$*/$@
44+
45+
%_locations.geojson : $(SOURCE_DATA_PATH)/%/locations.geojson
46+
cp $< $(DATA_ARCHIVE_PATH)/$*/$@
47+
48+
data/wwic_download/metadata/sfm_research_handbook.pdf :
49+
curl -o $@ https://help.securityforcemonitor.org/_/downloads/en/latest/pdf/

fixtures/import_docket.csv

+1-25
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,2 @@
11
source_document_id,location_document_id,entitity_document_id,sfm:iso,sfm:country_name
2-
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1ztOfGaQT3WDrq-BOjT0x5VErzgrWQ0Ku,1Ck11zLFVP6iJZFAR0_Xsq0UaeEJrmFl7ysbFX9mGu7c,ae,united-arab-emirates
3-
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1HpIjYaH_iMCRQD1jP159VGz-2NL4nB5p,1EqAi59wjE1v-bYX3cC1qdl6zkThpWJ8YcvSPUC-RGHc,bd,bangladesh
4-
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1j8KgLnpjlnLy6bN4ozkwnBpkHUI6i3si,1wBmSuTkoEhosDzfHtyvZqd9SKez-sWoPoJ9oPonWsSo,bf,burkina-faso
5-
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1mjTLMZ1la3zyiVQxLZ56sW497Sp8Lh5m,1c0O2XlwSpTAtB0AdhkkdgevWbsBUxvsmsETUwPPVIlk,bh,bahrain
6-
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1qZoQciglG1DOeEa3hh5iUvF7q4_bKOQl,1cZVy2PUAzeq2xOoLRLwL9z9mqbry32zv_XY7sjEih2c,eg,egypt
7-
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1eZHw6k7xM7Z_ZNMnF0Wc5cjJuVyuOop3,1CKkNsXwRdwXDiOldwT-6baw9DayXA2Vsn4ttpwP9SuM,jo,jordan
8-
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1vnWgeTkq5TUyF7555F4renMJnl4WiFNy,1Y6-9-9kai-YyK1pXvcv_W6fqUn9lORltUhuFc2YUu1I,kw,kuwait
9-
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,13XnZOF7U0uwL3EP_QpdTVd1FUh3A3cwi,1aGbMvFHzGn9ZlKKcFhiQ2c9egsoGDH11QBgyqmhS-IM,lr,liberia
10-
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1fnTq_ONVxzVBbCyQy_-s6ngmadA6st70,16962grIJlisFbh2Zp9kBAhv6jVnZz6bHgb6RGBUHd3o,ma,morocco
11-
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1syUOihIFKzk6QsLXd7XNUZIwzZZfAqEH,1UcgoJ_ytS-WSWl2_5OuV9h92wSCBWRFBoDtr4Ztqt14,ml,mali
12-
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1dU24WM8MAnqApFcBoYCiKPzPevebal6_,1vwb7ENaOeVRJIc5iCDBbF8K0Oql4SscENmLEdUT77Hg,mm,myanmar
13-
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,17Zqej6mrCT3BBBKcKj7949qHyRCa-9SJ,1cUtCEUuZRMqcxlRqFyoEM9eAdiDdWy2DUocroYivCx4,mr,mauritania
14-
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1nVWV5_1kGDwyWJ3PPqExKfchs3sAlEuh,168KuHwUr9565zWaQVZ5au3qtGOb-qyJx_WOwNzqt_Eo,mx,mexico
15-
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1qlHquI9EDz2lteBcjz_MheNLspg3mp_q,1_Pj5BryFXUPQPmMigII8G2HBUrpsnkK5V-Zu_9LCdGw,ne,niger
16-
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1boFMPurqyxbfYBvfisRiROmzr8TuPI8j,1f3W3qJATCzVjZGw239Wy3D25THs8ThnvoC24aUFaGZQ,ng,nigeria
17-
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1nMXXtFwJ3TqeynpKSW11uYAzihSMV8So,1Uc5eZswLB6mrwQLhd_OYQm7v7ThH99N0eb7RbTtD5iY,np,nepal
18-
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,19o8a1zwxUEYFxvZkqs2AwCyIm0oe_CPF,1h1a0S5aVv9Z3wucgKsYXmg5Z_CWzsKfjJSfJFcXxPSY,ph,philippines
19-
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1x4LjK_UWdxZm5EyNCupj7ikv7E-WMrkd,1UGOxjmJdJ9Dzj8cX3mZkgXAzT_ap_EMD2OqLjzDeGeE,qa,qatar
20-
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1wSzKP9bsXB9w4U8frj4Y5kHrfV3C6Vi8,1QAgVpj0bf_A0HGFzHgwxBbZqgIFurfH4h7u1MnfKzJc,rw,rwanda
21-
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1FLls5iHQD71Omy4VEzXYQ4HacMubzg8v,1a9XRXK5rG4_n0Afw7tIDkIbAmdydqKcU8J8zx5pLnVU,sa,saudi-arabia
22-
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1tNS4yJYlh265zDy9rQnjAZSqSmBZxrRh,11dEjFSe56YdmJfVeKhRZpQKSgRb6mfM1DWKoNFxYg9Y,sd,sudan
23-
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1l3NE8P8Xi-1qGwqZcdVdvV3Hn1h4Bwjv,1YxRrB39ItO_kEPTrMQ9FJlvMEp1Fjby0vchHiwW3C_I,sl,sierra-leone
24-
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1yPQVFwbQ4edUfBhgAbL2o9DAOljZigTF,15cnbBqIlp4LzEXrs2z2L4_RTnY5e1GMrGV150JV615Q,td,chad
25-
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1cyrCvMKVRHJtpQtcbTpoboJc9iNZ-oHy,1WlN4Hbv3JKE76hnNYkr80HU9oNJwjjOnj9nt7mm9ddw,ug,uganda
26-
1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1Ii31JX8y2InKt-FnHK-6kaqVK41XBOzY,1r62axKA5xgvJAiSiHrKgHZSATwSkKB-K15fdmLbn3zo,ye,yemen
2+
1d2FIMxqeL7Oa1hQrnbuFuzNr2lVLwfD7hknTbX8E-Dw,12O-PyMp4CN7O8ZdnZpCNm8Rs3lzLfMvo,1uVz_9edm0ejSGOHCRV2BWZoPPKOX64XpBORjm47hopU,mm,myanmar

location/models.py

+62-39
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,21 @@
66

77

88
class LocationManager(models.Manager):
9-
109
def from_humane_id(self, humane_id):
1110
if isinstance(humane_id, str):
12-
return self.get(**{'sfm__location:humane_id:admin': humane_id})
11+
return self.get(
12+
**{
13+
"sfm__location:humane_id:admin": humane_id,
14+
"geometry__isnull": False,
15+
}
16+
)
1317

14-
return self.filter(**{'sfm__location:humane_id:admin__in': humane_id})
18+
return self.filter(
19+
**{
20+
"sfm__location:humane_id:admin__in": humane_id,
21+
"geometry__isnull": False,
22+
}
23+
)
1524

1625

1726
class Location(models.Model):
@@ -21,16 +30,20 @@ class Location(models.Model):
2130
feature_type = models.TextField(blank=True, null=True)
2231
tags = models.JSONField(blank=True, null=True)
2332
sfm = models.JSONField(blank=True, null=True)
24-
adminlevel1 = models.ForeignKey('self',
25-
related_name='area_locations',
26-
on_delete=models.CASCADE,
27-
null=True,
28-
blank=True)
29-
adminlevel2 = models.ForeignKey('self',
30-
related_name='place_locations',
31-
on_delete=models.CASCADE,
32-
null=True,
33-
blank=True)
33+
adminlevel1 = models.ForeignKey(
34+
"self",
35+
related_name="area_locations",
36+
on_delete=models.CASCADE,
37+
null=True,
38+
blank=True,
39+
)
40+
adminlevel2 = models.ForeignKey(
41+
"self",
42+
related_name="place_locations",
43+
on_delete=models.CASCADE,
44+
null=True,
45+
blank=True,
46+
)
3447
adminlevel = models.CharField(max_length=50, null=True, blank=True)
3548
geometry = GeometryField(blank=True, null=True)
3649

@@ -60,42 +73,52 @@ def related_entities(self):
6073
for associationarea in self.associationarea_set.all():
6174
association = associationarea.object_ref
6275
organization = association.organization.get_value().value
63-
related_entities.append({
64-
'name': organization.name.get_value().value,
65-
'entity_type': _('Organization'),
66-
'start_date': association.startdate.get_value(),
67-
'end_date': association.enddate.get_value(),
68-
'open_ended': association.open_ended.get_value(),
69-
'url': reverse('view-organization', kwargs={'slug': organization.uuid}),
70-
})
76+
related_entities.append(
77+
{
78+
"name": organization.name.get_value().value,
79+
"entity_type": _("Organization"),
80+
"start_date": association.startdate.get_value(),
81+
"end_date": association.enddate.get_value(),
82+
"open_ended": association.open_ended.get_value(),
83+
"url": reverse(
84+
"view-organization", kwargs={"slug": organization.uuid}
85+
),
86+
}
87+
)
7188

7289
for emplacementsite in self.emplacementsite_set.all():
7390
emplacement = emplacementsite.object_ref
7491
organization = emplacement.organization.get_value().value
75-
related_entities.append({
76-
'name': organization.name.get_value().value,
77-
'entity_type': _('Organization'),
78-
'start_date': emplacement.startdate.get_value(),
79-
'end_date': emplacement.enddate.get_value(),
80-
'open_ended': emplacement.open_ended.get_value(),
81-
'url': reverse('view-organization', kwargs={'slug': organization.uuid}),
82-
})
92+
related_entities.append(
93+
{
94+
"name": organization.name.get_value().value,
95+
"entity_type": _("Organization"),
96+
"start_date": emplacement.startdate.get_value(),
97+
"end_date": emplacement.enddate.get_value(),
98+
"open_ended": emplacement.open_ended.get_value(),
99+
"url": reverse(
100+
"view-organization", kwargs={"slug": organization.uuid}
101+
),
102+
}
103+
)
83104

84105
for violationlocation in self.violationlocation_set.all():
85106
violation = violationlocation.object_ref
86-
related_entities.append({
87-
'name': truncatewords(violation.description.get_value(), 10),
88-
'entity_type': _('Violation'),
89-
'start_date': violation.startdate.get_value(),
90-
'end_date': violation.enddate.get_value(),
91-
'open_ended': '',
92-
'url': reverse('view-violation', kwargs={'slug': violation.uuid}),
93-
})
107+
related_entities.append(
108+
{
109+
"name": truncatewords(violation.description.get_value(), 10),
110+
"entity_type": _("Violation"),
111+
"start_date": violation.startdate.get_value(),
112+
"end_date": violation.enddate.get_value(),
113+
"open_ended": "",
114+
"url": reverse("view-violation", kwargs={"slug": violation.uuid}),
115+
}
116+
)
94117

95118
return related_entities
96119

97120
@property
98121
def osm_feature_type(self):
99-
if self.feature_type == 'boundary':
100-
return 'relation'
122+
if self.feature_type == "boundary":
123+
return "relation"
101124
return self.feature_type

requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ django-countries-plus==1.3.2
88
django-bootstrap-pagination==1.6.4
99
django-date-extensions==3.1.1
1010
django-leaflet==0.28.2
11-
psycopg2==2.8.6
11+
psycopg2-binary==2.8.6
1212
django-rosetta==0.9.8
1313
django-queryset-csv==1.1.0
1414
boto3==1.24.21

0 commit comments

Comments
 (0)