From f3d059d5353fb914d4a5b66adf3ea8ba42bc06cb Mon Sep 17 00:00:00 2001 From: wajones98 Date: Thu, 8 Feb 2024 11:08:25 +0000 Subject: [PATCH 001/156] create dataset --- ckanext/falkor/plugin.py | 9 ++------- ckanext/falkor/tasks2.py | 42 ++++++++++++++++++++-------------------- test.ini | 2 +- 3 files changed, 24 insertions(+), 29 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index c817733..911bea0 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -100,18 +100,13 @@ def notify(self, entity, operation=None): return if isinstance(entity, model.Package): - #Dataset create if operation == DomainObjectOperation.new: topic = 'dataset/create' resource = table_dictize(entity, context) - - #tasks.notify_hooks_dataset_create(resource) + log.debug("IS THIS BEING CALLED ------------------------------------------------ Dataset", resource) - #jobs.enqueue( - # tasks.notify_hooks_dataset_create, - # [resource, webhook, website] - #) + tasks2.datasetCreation(resource) #Dataset update #Most likely not required as falkor doesnt allow updating datasets diff --git a/ckanext/falkor/tasks2.py b/ckanext/falkor/tasks2.py index 1e3cfe3..f4a79f8 100644 --- a/ckanext/falkor/tasks2.py +++ b/ckanext/falkor/tasks2.py @@ -17,37 +17,37 @@ # Constants -TenantID = 3 - -Bearer = "~~~~~~~~~~~~~~~~~~~~~~~~~~" - -baseurl = "https://test.falkor.byzgen.com/api/core/v0/" +TENANT_ID = 2001 +BEARER = '' +CORE_BASE_URL = "http://192.168.66.1:8080/api/core/v0/" +ADMIN_BASE_URL = "http://192.168.66.1:8585/api/admin/v0/" # Base header constant baseHeaders = { 'Content-Type': 'application/json', "accept": "application/json", - "Authorization": "Bearer " + Bearer + "Authorization": "Bearer " + BEARER } # Send a post request to falkor def falkorPost(url, payload, headers): - response = requests.post(url, headers = headers,json = payload,timeout=2) + response = requests.post(url, headers = headers,json = payload,timeout=120) + log.debug(response) return response # Send a post request to falkor def falkorPut(url, payload, headers): - response = requests.put(url, headers = headers,json = payload,timeout=2) + response = requests.put(url, headers = headers,json = payload,timeout=120) return response # Send a get request to falkor def falkorGet(url, headers): - response = requests.get(url, headers = headers,timeout=2) + response = requests.get(url, headers = headers,timeout=120) return response def documentCreation(resource): # Format data for falkor - url = baseurl + TenantID +"/dataset/" + resource['package_id'] + "/create" + url = CORE_BASE_URL + str(TENANT_ID) +"/dataset/" + resource['package_id'] + "/create" payload = { 'documentId': resource['id'], 'data': "name = " + resource['name'] @@ -64,7 +64,7 @@ def documentCreation(resource): # JSON document updates def documentUpdate(resource): # Format data for falkor - url = baseurl + str(TenantID) +"/dataset/"+ resource['package_id'] + "/" + resource['id'] +"/body" + url = CORE_BASE_URL + str(TENANT_ID) +"/dataset/"+ resource['package_id'] + "/" + resource['id'] +"/body" payload = { 'data': "name = " + resource['name'] } @@ -76,21 +76,21 @@ def documentUpdate(resource): ) def documentRead(context, resource): - - if context["user_obj"] == None: - url = baseurl + str(TenantID) + resource['package_id'] + "/" + resource['id'] + "/body?userId=" + "guest" + if "user_obj" not in context: + url = CORE_BASE_URL + str(TENANT_ID) + resource['package_id'] + "/" + resource['id'] + "/body?userId=" + "guest" else: - url = baseurl + str(TenantID) + resource['package_id'] + "/" + resource['id'] + "/body?userId=" + context["user_obj"].id - + url = CORE_BASE_URL + str(TENANT_ID) + resource['package_id'] + "/" + resource['id'] + "/body?userId=" + context["user_obj"].id + log.debug(url) #run async request - #jobs.enqueue( - # falkorGett, - # [url, baseHeaders] - #) + jobs.enqueue( + falkor_get, + [url, baseHeaders] + ) def datasetCreation(resource): # Format data for falkor - url = baseurl + TenantID +"/dataset" + url = ADMIN_BASE_URL + str(TENANT_ID) +"/dataset" + log.debug(f'DATASET {resource["id"]}') payload = { 'datasetId': str(resource['id']), "encryptionType": "none", diff --git a/test.ini b/test.ini index 300bbda..fc3a65e 100644 --- a/test.ini +++ b/test.ini @@ -4,7 +4,7 @@ smtp_server = localhost error_email_from = ckan@localhost [app:main] -use = config:../ckan/test-core.ini +use = config:../../src/ckan/test-core.ini # Insert any custom config settings to be used when running your extension's # tests here. These will override the one defined in CKAN core's test-core.ini From e958ab02f249e673ce1c126035ce50a7e92c63ce Mon Sep 17 00:00:00 2001 From: wajones98 Date: Thu, 8 Feb 2024 11:17:07 +0000 Subject: [PATCH 002/156] document creation --- ckanext/falkor/plugin.py | 6 +----- ckanext/falkor/tasks2.py | 4 ++-- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 911bea0..e4efa10 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -63,12 +63,8 @@ def notify(self, entity, operation=None): elif operation == DomainObjectOperation.new: topic = 'resource/create' resource = table_dictize(entity, context) - #tasks.notify_hooks_resource_create_cheaty(resource, website) - #jobs.enqueue( - # tasks.notify_hooks_resource_create_cheaty, - # [resource, webhook, website] - #) + tasks2.documentCreation(resource) #resource/document update if operation == DomainObjectOperation.changed: diff --git a/ckanext/falkor/tasks2.py b/ckanext/falkor/tasks2.py index f4a79f8..79d852a 100644 --- a/ckanext/falkor/tasks2.py +++ b/ckanext/falkor/tasks2.py @@ -50,7 +50,7 @@ def documentCreation(resource): url = CORE_BASE_URL + str(TENANT_ID) +"/dataset/" + resource['package_id'] + "/create" payload = { 'documentId': resource['id'], - 'data': "name = " + resource['name'] + 'data': json.dumps(resource) } #run async request @@ -66,7 +66,7 @@ def documentUpdate(resource): # Format data for falkor url = CORE_BASE_URL + str(TENANT_ID) +"/dataset/"+ resource['package_id'] + "/" + resource['id'] +"/body" payload = { - 'data': "name = " + resource['name'] + 'data': resource } #run async request From 24efc856a39ec878be345bb671dc5157927c245f Mon Sep 17 00:00:00 2001 From: wajones98 Date: Thu, 8 Feb 2024 14:34:31 +0000 Subject: [PATCH 003/156] remaining crud operations for documents --- ckanext/falkor/plugin.py | 27 ++++----------------------- ckanext/falkor/tasks2.py | 27 ++++++++++++++++++++++----- 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index e4efa10..a6544c9 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -59,39 +59,22 @@ def notify(self, entity, operation=None): #to make this into a webhook. return - #resource/document create elif operation == DomainObjectOperation.new: topic = 'resource/create' resource = table_dictize(entity, context) - - tasks2.documentCreation(resource) + tasks2.documentCreate(resource) #resource/document update if operation == DomainObjectOperation.changed: topic = 'resource/update' - resource = table_dictize(entity, context) - - #tasks.notify_hooks_resource_update(resource, webhook) - - #jobs.enqueue( - # tasks.notify_hooks_resource_update, - # [resource, webhook, website] - #) + tasks2.documentUpdate(resource) #resource/document delete elif operation == DomainObjectOperation.deleted: topic = 'resource/delete' - resource = table_dictize(entity, context) - - #tasks.notify_hooks_resource_delete(resource, webhook, website) - - #jobs.enqueue( - # tasks.notify_hooks_resource_update, - # [resource, webhook, website] - #) - + tasks2.documentDelete(resource) else: return @@ -100,9 +83,7 @@ def notify(self, entity, operation=None): if operation == DomainObjectOperation.new: topic = 'dataset/create' resource = table_dictize(entity, context) - log.debug("IS THIS BEING CALLED ------------------------------------------------ Dataset", resource) - - tasks2.datasetCreation(resource) + tasks2.datasetCreate(resource) #Dataset update #Most likely not required as falkor doesnt allow updating datasets diff --git a/ckanext/falkor/tasks2.py b/ckanext/falkor/tasks2.py index 79d852a..2ef56dd 100644 --- a/ckanext/falkor/tasks2.py +++ b/ckanext/falkor/tasks2.py @@ -32,20 +32,27 @@ # Send a post request to falkor def falkorPost(url, payload, headers): response = requests.post(url, headers = headers,json = payload,timeout=120) - log.debug(response) + log.debug(response.json()) return response # Send a post request to falkor def falkorPut(url, payload, headers): response = requests.put(url, headers = headers,json = payload,timeout=120) + log.debug(response.json()) return response # Send a get request to falkor def falkorGet(url, headers): response = requests.get(url, headers = headers,timeout=120) + log.debug(response.json()) return response -def documentCreation(resource): +def falkorDelete(url, headers): + response = requests.delete(url, headers, timeout=120) + log.debug(response.json()) + return response + +def documentCreate(resource): # Format data for falkor url = CORE_BASE_URL + str(TENANT_ID) +"/dataset/" + resource['package_id'] + "/create" payload = { @@ -53,6 +60,7 @@ def documentCreation(resource): 'data': json.dumps(resource) } + log.debug(f'Creating document with id {str(resource["id"])}') #run async request jobs.enqueue( falkorPost, @@ -68,7 +76,7 @@ def documentUpdate(resource): payload = { 'data': resource } - + log.debug(f'Updating document with id {str(resource["id"])}') #run async request jobs.enqueue( falkorPut, @@ -87,10 +95,18 @@ def documentRead(context, resource): [url, baseHeaders] ) -def datasetCreation(resource): +def documentDelete(resource): + url = CORE_BASE_URL + str(TENANT_ID) +"/dataset/"+ resource['package_id'] + "/" + resource['id'] + #run async request + log.debug(f'Deleting document with id {str(resource["id"])}') + jobs.enqueue( + falkorDelete, + [url, baseHeaders] + ) + +def datasetCreate(resource): # Format data for falkor url = ADMIN_BASE_URL + str(TENANT_ID) +"/dataset" - log.debug(f'DATASET {resource["id"]}') payload = { 'datasetId': str(resource['id']), "encryptionType": "none", @@ -102,6 +118,7 @@ def datasetCreation(resource): } #run async request + log.debug(f'Create dataset with id {str(resource["id"])}') jobs.enqueue( falkorPost, [url, payload, baseHeaders] From 2bb92e5e648303cfcd6336dfbb9e90a2e2776228 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Thu, 8 Feb 2024 14:58:46 +0000 Subject: [PATCH 004/156] Fix documentRead --- ckanext/falkor/plugin.py | 6 +++--- ckanext/falkor/tasks2.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index a6544c9..769f1fd 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -36,11 +36,11 @@ def before_show(self, resource_dict): context = { 'model': model, 'session': model.Session, - 'user': tk.c.user, - 'user_obj': tk.c.userobj + 'user': toolkit.g.user, + 'user_obj': toolkit.g.userobj } tasks2.documentRead(context,resource_dict) - except: + except(e): a = 1 diff --git a/ckanext/falkor/tasks2.py b/ckanext/falkor/tasks2.py index 2ef56dd..10cfb76 100644 --- a/ckanext/falkor/tasks2.py +++ b/ckanext/falkor/tasks2.py @@ -85,13 +85,13 @@ def documentUpdate(resource): def documentRead(context, resource): if "user_obj" not in context: - url = CORE_BASE_URL + str(TENANT_ID) + resource['package_id'] + "/" + resource['id'] + "/body?userId=" + "guest" + url = CORE_BASE_URL + str(TENANT_ID) +"/dataset/"+ resource['package_id'] + "/" + resource['id'] + "/body?userId=" + "guest" else: - url = CORE_BASE_URL + str(TENANT_ID) + resource['package_id'] + "/" + resource['id'] + "/body?userId=" + context["user_obj"].id + url = CORE_BASE_URL + str(TENANT_ID) +"/dataset/"+ resource['package_id'] + "/" + resource['id'] + "/body?userId=" + context["user_obj"].id log.debug(url) #run async request jobs.enqueue( - falkor_get, + falkorGet, [url, baseHeaders] ) From 5b39661fcc92626cad21e5e4ce36aedbb1dc16fe Mon Sep 17 00:00:00 2001 From: wajones98 Date: Fri, 9 Feb 2024 13:44:33 +0000 Subject: [PATCH 005/156] Authentication --- ckanext/falkor/auth.py | 95 +++++++++++++++++++++++++ ckanext/falkor/plugin.py | 77 ++++++++------------- ckanext/falkor/tasks2.py | 146 ++++++++++++++++++++++++--------------- 3 files changed, 214 insertions(+), 104 deletions(-) create mode 100644 ckanext/falkor/auth.py diff --git a/ckanext/falkor/auth.py b/ckanext/falkor/auth.py new file mode 100644 index 0000000..d52d693 --- /dev/null +++ b/ckanext/falkor/auth.py @@ -0,0 +1,95 @@ +import logging +import requests +import time +import json +from typing import Union + +import logging + +log = logging.getLogger(__name__) + + +class Credentials: + client_id: str + client_secret: str + username: str + password: str + + def __init__( + self, client_id: str, client_secret: str, username: str, password: str + ): + self.client_id = client_id + self.client_secret = client_secret + self.username = username + self.password = password + + +class Token: + token: str + expires_in: int + + def __init__(self, token: str, expires_in: int): + self.token = token + self.expires_in = expires_in + + +class Auth: + __access_token: Union[Token, None] + __refresh_token: Union[Token, None] + __credentials: Credentials + __timestamp: float + __endpoint: str + + def __init__(self, credentials: Credentials, endpoint: str): + self.__credentials = credentials + self.__access_token = None + self.__refresh_token = None + self.__timestamp = 0 + self.__endpoint = endpoint + + @property + def access_token(self) -> str: + if self.__access_token is None or self.__refresh_token is None: + log.debug("No tokens, logging in...") + self.__login() + if self.__is_token_expired(self.__refresh_token): + log.debug("Refresh token expired, reauthenticating...") + self.__login() + elif self.__is_token_expired(self.__access_token): + log.debug("Access token expired, refreshing...") + self.__refresh() + return self.__access_token.token + + def __is_token_expired(self, token: Token) -> bool: + expires_at = self.__timestamp + token.expires_in + current_time = time.time() + return current_time >= expires_at + + def __login(self) -> None: + request = { + "client_id": self.__credentials.client_id, + "client_secret": self.__credentials.client_secret, + "username": self.__credentials.username, + "password": self.__credentials.password, + "grant_type": "password", + } + response = requests.post(self.__endpoint, request) + body = response.json() + self.__set_token(body) + + def __refresh(self) -> None: + request = { + "grant_type": "refresh_token", + "client_id": self.__credentials.client_id, + "client_secret": self.__credentials.client_secret, + "refresh_token": self.__refresh_token.token, + } + + response = requests.post(self.__endpoint, request) + body = response.json() + self.__set_token(body) + + def __set_token(self, body) -> None: + self.__access_token = Token(body["access_token"], body["expires_in"]) + self.__refresh_token = Token(body["refresh_token"], body["refresh_expires_in"]) + self.__timestamp = time.time() diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 769f1fd..070c013 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -23,77 +23,56 @@ class FalkorPlugin(plugins.SingletonPlugin): # IConfigurer def update_config(self, config_): - toolkit.add_template_directory(config_, 'templates') - toolkit.add_public_directory(config_, 'public') - toolkit.add_resource('fanstatic', - 'falkor') + toolkit.add_template_directory(config_, "templates") + toolkit.add_public_directory(config_, "public") + toolkit.add_resource("fanstatic", "falkor") - - #IResourceController + # IResourceController def before_show(self, resource_dict): - - try: - context = { - 'model': model, - 'session': model.Session, - 'user': toolkit.g.user, - 'user_obj': toolkit.g.userobj - } - tasks2.documentRead(context,resource_dict) - except(e): - a = 1 - - - - - - #IDomainObjectNotification & #IResourceURLChange + context = { + "model": model, + "session": model.Session, + "user": toolkit.g.user, + "user_obj": toolkit.g.userobj, + } + tasks2.documentRead(context, resource_dict) + + # IDomainObjectNotification & #IResourceURLChange def notify(self, entity, operation=None): - context = {'model': model, 'ignore_auth': True, 'defer_commit': True} - - website = "http://ec2-18-134-94-243.eu-west-2.compute.amazonaws.com:5000/" + context = {"model": model, "ignore_auth": True, "defer_commit": True} if isinstance(entity, model.Resource): if not operation: - #This happens on IResourceURLChange, but I'm not sure whether - #to make this into a webhook. + # This happens on IResourceURLChange, but I'm not sure whether + # to make this into a webhook. return elif operation == DomainObjectOperation.new: - topic = 'resource/create' + topic = "resource/create" resource = table_dictize(entity, context) - tasks2.documentCreate(resource) + tasks2.documentCreate(resource) - #resource/document update + # resource/document update if operation == DomainObjectOperation.changed: - topic = 'resource/update' + topic = "resource/update" resource = table_dictize(entity, context) tasks2.documentUpdate(resource) - - #resource/document delete + + # resource/document delete elif operation == DomainObjectOperation.deleted: - topic = 'resource/delete' + topic = "resource/delete" resource = table_dictize(entity, context) - tasks2.documentDelete(resource) + tasks2.documentDelete(resource) + else: return if isinstance(entity, model.Package): - #Dataset create + # Dataset create if operation == DomainObjectOperation.new: - topic = 'dataset/create' + topic = "dataset/create" resource = table_dictize(entity, context) - tasks2.datasetCreate(resource) - - #Dataset update - #Most likely not required as falkor doesnt allow updating datasets - elif operation == DomainObjectOperation.changed: - topic = 'dataset/update' - - #Dataset delete - #Most likely not required as falkor doesnt allow deleting datasets - elif operation == DomainObjectOperation.deleted: - topic = 'dataset/delete' + tasks2.datasetCreate(resource) else: return diff --git a/ckanext/falkor/tasks2.py b/ckanext/falkor/tasks2.py index 10cfb76..47e3414 100644 --- a/ckanext/falkor/tasks2.py +++ b/ckanext/falkor/tasks2.py @@ -1,4 +1,4 @@ -#updated verson of tasks +# updated verson of tasks from urllib import response @@ -8,118 +8,154 @@ import ckan.plugins.toolkit as toolkit import ckan.lib.jobs as jobs import ckanext.falkor +from typing import Union +from ckanext.falkor import auth import os import sys import logging + log = logging.getLogger(__name__) # Constants - -TENANT_ID = 2001 -BEARER = '' +TENANT_ID = 2001 CORE_BASE_URL = "http://192.168.66.1:8080/api/core/v0/" ADMIN_BASE_URL = "http://192.168.66.1:8585/api/admin/v0/" -# Base header constant -baseHeaders = { - 'Content-Type': 'application/json', - "accept": "application/json", - "Authorization": "Bearer " + BEARER - } +credentials = auth.Credentials("falkor", "", "testuser", "") +endpoint = ( + "http://192.168.66.1:38080/realms/byzgen-falkor/protocol/openid-connect/token" +) +auth = auth.Auth(credentials, endpoint) + + +def base_headers() -> dict[str, str, str]: + return { + "Content-Type": "application/json", + "accept": "application/json", + "Authorization": "Bearer " + auth.access_token, + } + # Send a post request to falkor def falkorPost(url, payload, headers): - response = requests.post(url, headers = headers,json = payload,timeout=120) + response = requests.post(url, headers=headers, json=payload, timeout=120) log.debug(response.json()) return response + # Send a post request to falkor def falkorPut(url, payload, headers): - response = requests.put(url, headers = headers,json = payload,timeout=120) + response = requests.put(url, headers=headers, json=payload, timeout=120) log.debug(response.json()) return response + # Send a get request to falkor def falkorGet(url, headers): - response = requests.get(url, headers = headers,timeout=120) + response = requests.get(url, headers=headers, timeout=120) log.debug(response.json()) return response + def falkorDelete(url, headers): - response = requests.delete(url, headers, timeout=120) + response = requests.delete(url, headers=headers, timeout=120) log.debug(response.json()) return response + def documentCreate(resource): # Format data for falkor - url = CORE_BASE_URL + str(TENANT_ID) +"/dataset/" + resource['package_id'] + "/create" - payload = { - 'documentId': resource['id'], - 'data': json.dumps(resource) - } + url = ( + CORE_BASE_URL + + str(TENANT_ID) + + "/dataset/" + + resource["package_id"] + + "/create" + ) + payload = {"documentId": resource["id"], "data": json.dumps(resource)} log.debug(f'Creating document with id {str(resource["id"])}') - #run async request - jobs.enqueue( - falkorPost, - [url, payload, baseHeaders] - ) + # run async request + jobs.enqueue(falkorPost, [url, payload, base_headers()]) + # Cannot be used till falkor can deal with: -# document UUIDS +# document UUIDS # JSON document updates def documentUpdate(resource): # Format data for falkor - url = CORE_BASE_URL + str(TENANT_ID) +"/dataset/"+ resource['package_id'] + "/" + resource['id'] +"/body" - payload = { - 'data': resource - } - log.debug(f'Updating document with id {str(resource["id"])}') - #run async request - jobs.enqueue( - falkorPut, - [url, payload, baseHeaders] + url = ( + CORE_BASE_URL + + str(TENANT_ID) + + "/dataset/" + + resource["package_id"] + + "/" + + resource["id"] + + "/body" ) + payload = {"data": resource} + log.debug(f'Updating document with id {str(resource["id"])}') + # run async request + jobs.enqueue(falkorPut, [url, payload, base_headers()]) + def documentRead(context, resource): if "user_obj" not in context: - url = CORE_BASE_URL + str(TENANT_ID) +"/dataset/"+ resource['package_id'] + "/" + resource['id'] + "/body?userId=" + "guest" + url = ( + CORE_BASE_URL + + str(TENANT_ID) + + "/dataset/" + + resource["package_id"] + + "/" + + resource["id"] + + "/body?userId=" + + "guest" + ) else: - url = CORE_BASE_URL + str(TENANT_ID) +"/dataset/"+ resource['package_id'] + "/" + resource['id'] + "/body?userId=" + context["user_obj"].id + url = ( + CORE_BASE_URL + + str(TENANT_ID) + + "/dataset/" + + resource["package_id"] + + "/" + + resource["id"] + + "/body?userId=" + + context["user_obj"].id + ) log.debug(url) - #run async request - jobs.enqueue( - falkorGet, - [url, baseHeaders] - ) + # run async request + jobs.enqueue(falkorGet, [url, base_headers()]) + def documentDelete(resource): - url = CORE_BASE_URL + str(TENANT_ID) +"/dataset/"+ resource['package_id'] + "/" + resource['id'] - #run async request - log.debug(f'Deleting document with id {str(resource["id"])}') - jobs.enqueue( - falkorDelete, - [url, baseHeaders] + url = ( + CORE_BASE_URL + + str(TENANT_ID) + + "/dataset/" + + resource["package_id"] + + "/" + + resource["id"] ) + # run async request + log.debug(f'Deleting document with id {str(resource["id"])}') + jobs.enqueue(falkorDelete, [url, base_headers()]) + def datasetCreate(resource): # Format data for falkor - url = ADMIN_BASE_URL + str(TENANT_ID) +"/dataset" + url = ADMIN_BASE_URL + str(TENANT_ID) + "/dataset" payload = { - 'datasetId': str(resource['id']), + "datasetId": str(resource["id"]), "encryptionType": "none", "externalStorage": "false", "permissionEnabled": "false", "taggingEnabled": "false", "iotaEnabled": "false", - "tokensEnabled": "false" + "tokensEnabled": "false", } - #run async request + # run async request log.debug(f'Create dataset with id {str(resource["id"])}') - jobs.enqueue( - falkorPost, - [url, payload, baseHeaders] - ) \ No newline at end of file + jobs.enqueue(falkorPost, [url, payload, base_headers()]) From 3a316a865747d9a6c36269157f1c39f86c543017 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Fri, 9 Feb 2024 14:19:48 +0000 Subject: [PATCH 006/156] Add config --- ckanext/falkor/plugin.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 070c013..e6a38c2 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -20,6 +20,7 @@ class FalkorPlugin(plugins.SingletonPlugin): plugins.implements(plugins.IConfigurer) plugins.implements(plugins.IDomainObjectModification, inherit=True) plugins.implements(plugins.IResourceController, inherit=True) + plugins.implements(plugins.IConfigurable, inherit=True) # IConfigurer def update_config(self, config_): @@ -27,6 +28,23 @@ def update_config(self, config_): toolkit.add_public_directory(config_, "public") toolkit.add_resource("fanstatic", "falkor") + def configure(self, config): + self.config = config + config_keys = [ + "falkor.tenant_id", + "falkor.core_api_url", + "falkor.admin_api_url", + "falkor.auth.endpoint", + "falkor.auth.client_id", + "falkor.auth.client_secret", + "falkor.auth.username", + "falkor.auth.password", + ] + + for key in config_keys: + if not config.get(key): + raise Exception(f"{key} not present in configration") + # IResourceController def before_show(self, resource_dict): context = { From be2b82437699cc2b456fa4df4b05582e2968b6f9 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 13 Feb 2024 13:42:28 +0000 Subject: [PATCH 007/156] Load values from config --- ckanext/falkor/plugin.py | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index e6a38c2..655ff7d 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -16,7 +16,18 @@ log = logging.getLogger(__name__) +def get_config_value(config, key: str) -> str: + value = config.get(key) + if not value: + raise Exception(f"{key} not present in configration") + return value + + class FalkorPlugin(plugins.SingletonPlugin): + tenant_id: str + core_api_url: str + admin_api_url: str + plugins.implements(plugins.IConfigurer) plugins.implements(plugins.IDomainObjectModification, inherit=True) plugins.implements(plugins.IResourceController, inherit=True) @@ -30,20 +41,16 @@ def update_config(self, config_): def configure(self, config): self.config = config - config_keys = [ - "falkor.tenant_id", - "falkor.core_api_url", - "falkor.admin_api_url", - "falkor.auth.endpoint", - "falkor.auth.client_id", - "falkor.auth.client_secret", - "falkor.auth.username", - "falkor.auth.password", - ] - - for key in config_keys: - if not config.get(key): - raise Exception(f"{key} not present in configration") + # config_keys = [ + # "ckanext.falkor.auth.endpoint", + # "ckanext.falkor.auth.client_id", + # "ckanext.falkor.auth.client_secret", + # "ckanext.falkor.auth.username", + # "ckanext.falkor.auth.password", + # ] + self.tenant_id = get_config_value(config, "ckanext.falkor.tenant_id") + self.core_api_url = get_config_value(config, "ckanext.falkor.tenant_id") + self.admin_api_url = get_config_value(config, "ckanext.falkor.admin_api_url") # IResourceController def before_show(self, resource_dict): From 51f801a8c0364f53c1037ea5bd003d586d11a2b1 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 14 Feb 2024 13:14:11 +0000 Subject: [PATCH 008/156] Move document create to falkor_client --- ckanext/falkor/auth.py | 8 ++- ckanext/falkor/falkor_client.py | 93 +++++++++++++++++++++++++++++++++ ckanext/falkor/plugin.py | 31 ++++++----- ckanext/falkor/tasks2.py | 2 +- 4 files changed, 119 insertions(+), 15 deletions(-) create mode 100644 ckanext/falkor/falkor_client.py diff --git a/ckanext/falkor/auth.py b/ckanext/falkor/auth.py index d52d693..ddd6585 100644 --- a/ckanext/falkor/auth.py +++ b/ckanext/falkor/auth.py @@ -52,7 +52,7 @@ def access_token(self) -> str: if self.__access_token is None or self.__refresh_token is None: log.debug("No tokens, logging in...") self.__login() - if self.__is_token_expired(self.__refresh_token): + elif self.__is_token_expired(self.__refresh_token): log.debug("Refresh token expired, reauthenticating...") self.__login() elif self.__is_token_expired(self.__access_token): @@ -63,6 +63,12 @@ def access_token(self) -> str: def __is_token_expired(self, token: Token) -> bool: expires_at = self.__timestamp + token.expires_in current_time = time.time() + log.debug( + "TOKEN EXPIRE INFO: Expires at: " + + str(expires_at) + + " - Current Time: " + + str(current_time) + ) return current_time >= expires_at def __login(self) -> None: diff --git a/ckanext/falkor/falkor_client.py b/ckanext/falkor/falkor_client.py new file mode 100644 index 0000000..3a59954 --- /dev/null +++ b/ckanext/falkor/falkor_client.py @@ -0,0 +1,93 @@ +from ckanext.falkor import auth +import ckan.lib.jobs as jobs +from typing import TypedDict +import requests +import logging +import json + +log = logging.getLogger(__name__) + +HttpHeaders = TypedDict( + "HttpHeaders", {"Content-Type": str, "accept": str, "Authorization": str} +) + + +def base_headers(access_token: str) -> HttpHeaders: + return { + "Content-Type": "application/json", + "accept": "application/json", + "Authorization": "Bearer " + access_token, + } + + +def falkor_post(url, payload, headers): + response = requests.post(url, headers=headers, json=payload, timeout=120) + log.debug(response.json()) + return response + + +def falkor_put(url, payload, headers): + response = requests.put(url, headers=headers, json=payload, timeout=120) + log.debug(response.json()) + return response + + +def falkor_get(url, headers): + response = requests.get(url, headers=headers, timeout=120) + log.debug(response.json()) + return response + + +def falkor_delete(url, headers): + response = requests.delete(url, headers=headers, timeout=120) + log.debug(response.json()) + return response + + +class Falkor: + __auth: auth.Auth + __core_base_url: str + __admin_base_url: str + __tenant_id: str + + def __init__( + self, auth: auth.Auth, tenant_id: str, core_base_url: str, admin_base_url: str + ): + self.__auth = auth + self.__tenant_id = tenant_id + self.__core_base_url = core_base_url + self.__admin_base_url = admin_base_url + + def document_read(self, context, resource): + url = ( + self.__core_base_url + + self.__tenant_id + + "/dataset/" + + resource["package_id"] + + "/" + + resource["id"] + + "/body?userId=" + ) + + if "user_obj" not in context: + url = url + "guest" + else: + url = url + context["user_obj"].id + + jobs.enqueue(falkor_get, [url, base_headers(self.__auth.access_token)]) + + def document_create(self, resource: dict): + url = ( + self.__core_base_url + + self.__tenant_id + + "/dataset/" + + resource["package_id"] + + "/create" + ) + resource_id = str(resource["id"]) + payload = {"documentId": resource_id, "data": json.dumps(resource)} + + log.debug(f"Creating document with id {resource_id}") + jobs.enqueue( + falkor_post, [url, payload, base_headers(self.__auth.access_token)] + ) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 655ff7d..88cfc49 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -12,6 +12,7 @@ from ckan.model.domain_object import DomainObjectOperation from ckanext.falkor import tasks2 +from ckanext.falkor import falkor_client, auth log = logging.getLogger(__name__) @@ -24,9 +25,7 @@ def get_config_value(config, key: str) -> str: class FalkorPlugin(plugins.SingletonPlugin): - tenant_id: str - core_api_url: str - admin_api_url: str + falkor: falkor_client.Falkor plugins.implements(plugins.IConfigurer) plugins.implements(plugins.IDomainObjectModification, inherit=True) @@ -48,9 +47,17 @@ def configure(self, config): # "ckanext.falkor.auth.username", # "ckanext.falkor.auth.password", # ] - self.tenant_id = get_config_value(config, "ckanext.falkor.tenant_id") - self.core_api_url = get_config_value(config, "ckanext.falkor.tenant_id") - self.admin_api_url = get_config_value(config, "ckanext.falkor.admin_api_url") + tenant_id = get_config_value(config, "ckanext.falkor.tenant_id") + core_api_url = get_config_value(config, "ckanext.falkor.core_api_url") + admin_api_url = get_config_value(config, "ckanext.falkor.admin_api_url") + + credentials = auth.Credentials("falkor", "secret", "testuser", "password") + endpoint = "http://192.168.66.1:38080/realms/byzgen-falkor/protocol/openid-connect/token" + auth_client = auth.Auth(credentials, endpoint) + + self.falkor = falkor_client.Falkor( + auth_client, tenant_id, core_api_url, admin_api_url + ) # IResourceController def before_show(self, resource_dict): @@ -60,7 +67,8 @@ def before_show(self, resource_dict): "user": toolkit.g.user, "user_obj": toolkit.g.userobj, } - tasks2.documentRead(context, resource_dict) + + # self.falkor.document_read(context, resource_dict) # IDomainObjectNotification & #IResourceURLChange def notify(self, entity, operation=None): @@ -68,17 +76,16 @@ def notify(self, entity, operation=None): if isinstance(entity, model.Resource): if not operation: - # This happens on IResourceURLChange, but I'm not sure whether - # to make this into a webhook. return elif operation == DomainObjectOperation.new: topic = "resource/create" resource = table_dictize(entity, context) - tasks2.documentCreate(resource) + log.debug("CREATE EVENT") + self.falkor.document_create(resource) # resource/document update - if operation == DomainObjectOperation.changed: + elif operation == DomainObjectOperation.changed: topic = "resource/update" resource = table_dictize(entity, context) tasks2.documentUpdate(resource) @@ -88,7 +95,6 @@ def notify(self, entity, operation=None): topic = "resource/delete" resource = table_dictize(entity, context) tasks2.documentDelete(resource) - else: return @@ -98,6 +104,5 @@ def notify(self, entity, operation=None): topic = "dataset/create" resource = table_dictize(entity, context) tasks2.datasetCreate(resource) - else: return diff --git a/ckanext/falkor/tasks2.py b/ckanext/falkor/tasks2.py index 47e3414..aafcd7b 100644 --- a/ckanext/falkor/tasks2.py +++ b/ckanext/falkor/tasks2.py @@ -23,7 +23,7 @@ CORE_BASE_URL = "http://192.168.66.1:8080/api/core/v0/" ADMIN_BASE_URL = "http://192.168.66.1:8585/api/admin/v0/" -credentials = auth.Credentials("falkor", "", "testuser", "") +credentials = auth.Credentials("falkor", "secret", "testuser", "password") endpoint = ( "http://192.168.66.1:38080/realms/byzgen-falkor/protocol/openid-connect/token" ) From c9e983905d0999ede75520aaaf49c40869f836e5 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 14 Feb 2024 14:29:37 +0000 Subject: [PATCH 009/156] move delete and update to falkor_client --- ckanext/falkor/falkor_client.py | 56 +++++++++++++++++++++++++++------ ckanext/falkor/plugin.py | 15 +++------ 2 files changed, 51 insertions(+), 20 deletions(-) diff --git a/ckanext/falkor/falkor_client.py b/ckanext/falkor/falkor_client.py index 3a59954..05c549b 100644 --- a/ckanext/falkor/falkor_client.py +++ b/ckanext/falkor/falkor_client.py @@ -59,24 +59,28 @@ def __init__( self.__admin_base_url = admin_base_url def document_read(self, context, resource): + resource_id = str(resource["id"]) + package_id = str(resource["package_id"]) + + user_id = "guest" if "user_obj" not in context else context["user_obj"].id + url = ( self.__core_base_url + self.__tenant_id + "/dataset/" - + resource["package_id"] + + package_id + "/" - + resource["id"] - + "/body?userId=" + + resource_id + + f"/body?userId={user_id}" ) - if "user_obj" not in context: - url = url + "guest" - else: - url = url + context["user_obj"].id - + log.debug(f"Read by {user_id} for document with id {resource_id}") jobs.enqueue(falkor_get, [url, base_headers(self.__auth.access_token)]) def document_create(self, resource: dict): + resource_id = str(resource["id"]) + package_id = str(resource["package_id"]) + url = ( self.__core_base_url + self.__tenant_id @@ -84,10 +88,44 @@ def document_create(self, resource: dict): + resource["package_id"] + "/create" ) - resource_id = str(resource["id"]) payload = {"documentId": resource_id, "data": json.dumps(resource)} log.debug(f"Creating document with id {resource_id}") jobs.enqueue( falkor_post, [url, payload, base_headers(self.__auth.access_token)] ) + + def document_update(self, resource): + resource_id = str(resource["id"]) + package_id = str(resource["package_id"]) + + url = ( + self.__core_base_url + + self.__tenant_id + + "/dataset/" + + package_id + + "/" + + resource_id + + "/body" + ) + + log.debug(f"Updating document with id {resource_id}") + jobs.enqueue( + falkor_put, [url, resource, base_headers(self.__auth.access_token)] + ) + + def document_delete(self, resource): + resource_id = str(resource["id"]) + package_id = str(resource["package_id"]) + + url = ( + self.__core_base_url + + self.__tenant_id + + "/dataset/" + + package_id + + "/" + + resource_id + ) + + log.debug(f'Deleting document with id {str(resource["id"])}') + jobs.enqueue(falkorDelete, [url, base_headers(self.__auth.access_token)]) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 88cfc49..f60e864 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -75,31 +75,24 @@ def notify(self, entity, operation=None): context = {"model": model, "ignore_auth": True, "defer_commit": True} if isinstance(entity, model.Resource): - if not operation: - return - - elif operation == DomainObjectOperation.new: + if operation == DomainObjectOperation.new: topic = "resource/create" resource = table_dictize(entity, context) - log.debug("CREATE EVENT") self.falkor.document_create(resource) - # resource/document update elif operation == DomainObjectOperation.changed: topic = "resource/update" resource = table_dictize(entity, context) - tasks2.documentUpdate(resource) + self.falkor.document_update(resource) - # resource/document delete elif operation == DomainObjectOperation.deleted: topic = "resource/delete" resource = table_dictize(entity, context) - tasks2.documentDelete(resource) + self.falkor.document_delete(resource) else: return - if isinstance(entity, model.Package): - # Dataset create + elif isinstance(entity, model.Package): if operation == DomainObjectOperation.new: topic = "dataset/create" resource = table_dictize(entity, context) From b93865f448258a7b42232865497f159ff6ada273 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 14 Feb 2024 15:08:35 +0000 Subject: [PATCH 010/156] Move datasetCreate to falkor client --- ckanext/falkor/falkor_client.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/ckanext/falkor/falkor_client.py b/ckanext/falkor/falkor_client.py index 05c549b..3106f2c 100644 --- a/ckanext/falkor/falkor_client.py +++ b/ckanext/falkor/falkor_client.py @@ -58,6 +58,23 @@ def __init__( self.__core_base_url = core_base_url self.__admin_base_url = admin_base_url + def dataset_create(self, resource): + resource_id = str(resource["id"]) + url = self.__admin_base_url + self.__tenant_id + "/dataset" + payload = { + "datasetId": resource_id, + "encryptionType": "none", + "externalStorage": "false", + "permissionEnabled": "false", + "taggingEnabled": "false", + "iotaEnabled": "false", + "tokensEnabled": "false", + } + + # run async request + log.debug(f"Create dataset with id {resource_id}") + jobs.enqueue(falkorPost, [url, payload, base_headers()]) + def document_read(self, context, resource): resource_id = str(resource["id"]) package_id = str(resource["package_id"]) @@ -85,7 +102,7 @@ def document_create(self, resource: dict): self.__core_base_url + self.__tenant_id + "/dataset/" - + resource["package_id"] + + package_id + "/create" ) payload = {"documentId": resource_id, "data": json.dumps(resource)} @@ -127,5 +144,5 @@ def document_delete(self, resource): + resource_id ) - log.debug(f'Deleting document with id {str(resource["id"])}') + log.debug(f"Deleting document with id {resource_id}") jobs.enqueue(falkorDelete, [url, base_headers(self.__auth.access_token)]) From 943daba53cd095cd74e649020af43723b5f4c997 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 14 Feb 2024 15:48:30 +0000 Subject: [PATCH 011/156] remove task2 and fix create_dataset call --- ckanext/falkor/falkor_client.py | 4 +- ckanext/falkor/plugin.py | 3 +- ckanext/falkor/tasks2.py | 161 -------------------------------- 3 files changed, 4 insertions(+), 164 deletions(-) delete mode 100644 ckanext/falkor/tasks2.py diff --git a/ckanext/falkor/falkor_client.py b/ckanext/falkor/falkor_client.py index 3106f2c..4ef2cbf 100644 --- a/ckanext/falkor/falkor_client.py +++ b/ckanext/falkor/falkor_client.py @@ -73,7 +73,9 @@ def dataset_create(self, resource): # run async request log.debug(f"Create dataset with id {resource_id}") - jobs.enqueue(falkorPost, [url, payload, base_headers()]) + jobs.enqueue( + falkor_post, [url, payload, base_headers(self.__auth.access_token)] + ) def document_read(self, context, resource): resource_id = str(resource["id"]) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index f60e864..73592ff 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -11,7 +11,6 @@ from ckan.lib.dictization import table_dictize from ckan.model.domain_object import DomainObjectOperation -from ckanext.falkor import tasks2 from ckanext.falkor import falkor_client, auth log = logging.getLogger(__name__) @@ -96,6 +95,6 @@ def notify(self, entity, operation=None): if operation == DomainObjectOperation.new: topic = "dataset/create" resource = table_dictize(entity, context) - tasks2.datasetCreate(resource) + self.falkor.dataset_create(resource) else: return diff --git a/ckanext/falkor/tasks2.py b/ckanext/falkor/tasks2.py deleted file mode 100644 index aafcd7b..0000000 --- a/ckanext/falkor/tasks2.py +++ /dev/null @@ -1,161 +0,0 @@ -# updated verson of tasks - -from urllib import response - -import json -import requests -import ckan.model as models -import ckan.plugins.toolkit as toolkit -import ckan.lib.jobs as jobs -import ckanext.falkor -from typing import Union -from ckanext.falkor import auth - -import os -import sys - -import logging - -log = logging.getLogger(__name__) - -# Constants -TENANT_ID = 2001 -CORE_BASE_URL = "http://192.168.66.1:8080/api/core/v0/" -ADMIN_BASE_URL = "http://192.168.66.1:8585/api/admin/v0/" - -credentials = auth.Credentials("falkor", "secret", "testuser", "password") -endpoint = ( - "http://192.168.66.1:38080/realms/byzgen-falkor/protocol/openid-connect/token" -) -auth = auth.Auth(credentials, endpoint) - - -def base_headers() -> dict[str, str, str]: - return { - "Content-Type": "application/json", - "accept": "application/json", - "Authorization": "Bearer " + auth.access_token, - } - - -# Send a post request to falkor -def falkorPost(url, payload, headers): - response = requests.post(url, headers=headers, json=payload, timeout=120) - log.debug(response.json()) - return response - - -# Send a post request to falkor -def falkorPut(url, payload, headers): - response = requests.put(url, headers=headers, json=payload, timeout=120) - log.debug(response.json()) - return response - - -# Send a get request to falkor -def falkorGet(url, headers): - response = requests.get(url, headers=headers, timeout=120) - log.debug(response.json()) - return response - - -def falkorDelete(url, headers): - response = requests.delete(url, headers=headers, timeout=120) - log.debug(response.json()) - return response - - -def documentCreate(resource): - # Format data for falkor - url = ( - CORE_BASE_URL - + str(TENANT_ID) - + "/dataset/" - + resource["package_id"] - + "/create" - ) - payload = {"documentId": resource["id"], "data": json.dumps(resource)} - - log.debug(f'Creating document with id {str(resource["id"])}') - # run async request - jobs.enqueue(falkorPost, [url, payload, base_headers()]) - - -# Cannot be used till falkor can deal with: -# document UUIDS -# JSON document updates -def documentUpdate(resource): - # Format data for falkor - url = ( - CORE_BASE_URL - + str(TENANT_ID) - + "/dataset/" - + resource["package_id"] - + "/" - + resource["id"] - + "/body" - ) - payload = {"data": resource} - log.debug(f'Updating document with id {str(resource["id"])}') - # run async request - jobs.enqueue(falkorPut, [url, payload, base_headers()]) - - -def documentRead(context, resource): - if "user_obj" not in context: - url = ( - CORE_BASE_URL - + str(TENANT_ID) - + "/dataset/" - + resource["package_id"] - + "/" - + resource["id"] - + "/body?userId=" - + "guest" - ) - else: - url = ( - CORE_BASE_URL - + str(TENANT_ID) - + "/dataset/" - + resource["package_id"] - + "/" - + resource["id"] - + "/body?userId=" - + context["user_obj"].id - ) - log.debug(url) - # run async request - jobs.enqueue(falkorGet, [url, base_headers()]) - - -def documentDelete(resource): - url = ( - CORE_BASE_URL - + str(TENANT_ID) - + "/dataset/" - + resource["package_id"] - + "/" - + resource["id"] - ) - # run async request - log.debug(f'Deleting document with id {str(resource["id"])}') - jobs.enqueue(falkorDelete, [url, base_headers()]) - - -def datasetCreate(resource): - # Format data for falkor - url = ADMIN_BASE_URL + str(TENANT_ID) + "/dataset" - payload = { - "datasetId": str(resource["id"]), - "encryptionType": "none", - "externalStorage": "false", - "permissionEnabled": "false", - "taggingEnabled": "false", - "iotaEnabled": "false", - "tokensEnabled": "false", - } - - # run async request - log.debug(f'Create dataset with id {str(resource["id"])}') - jobs.enqueue(falkorPost, [url, payload, base_headers()]) From b9e1daba6a1fd3332b143ce4b0ed6b216767fe4d Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 14 Feb 2024 16:07:39 +0000 Subject: [PATCH 012/156] fix falkorDelete method name --- ckanext/falkor/falkor_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/falkor/falkor_client.py b/ckanext/falkor/falkor_client.py index 4ef2cbf..a159b9d 100644 --- a/ckanext/falkor/falkor_client.py +++ b/ckanext/falkor/falkor_client.py @@ -147,4 +147,4 @@ def document_delete(self, resource): ) log.debug(f"Deleting document with id {resource_id}") - jobs.enqueue(falkorDelete, [url, base_headers(self.__auth.access_token)]) + jobs.enqueue(falkor_delete, [url, base_headers(self.__auth.access_token)]) From 9e85a3a48a6c258582eb708a3dc601b373da3347 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Fri, 16 Feb 2024 12:33:19 +0000 Subject: [PATCH 013/156] Use config for remaining values --- ckanext/falkor/plugin.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 73592ff..616cf32 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -39,21 +39,23 @@ def update_config(self, config_): def configure(self, config): self.config = config - # config_keys = [ - # "ckanext.falkor.auth.endpoint", - # "ckanext.falkor.auth.client_id", - # "ckanext.falkor.auth.client_secret", - # "ckanext.falkor.auth.username", - # "ckanext.falkor.auth.password", - # ] + + endpoint = get_config_value(config, "ckanext.falkor.auth.endpoint") + client_id = get_config_value(config, "ckanext.falkor.auth.client_id") + client_secret = get_config_value(config, "ckanext.falkor.auth.client_secret") + username = get_config_value(config, "ckanext.falkor.auth.username") + password = get_config_value(config, "ckanext.falkor.auth.password") + + credentials = auth.Credentials(client_id, client_secret, username, password) + auth_client = auth.Auth( + credentials, + endpoint, + ) + tenant_id = get_config_value(config, "ckanext.falkor.tenant_id") core_api_url = get_config_value(config, "ckanext.falkor.core_api_url") admin_api_url = get_config_value(config, "ckanext.falkor.admin_api_url") - credentials = auth.Credentials("falkor", "secret", "testuser", "password") - endpoint = "http://192.168.66.1:38080/realms/byzgen-falkor/protocol/openid-connect/token" - auth_client = auth.Auth(credentials, endpoint) - self.falkor = falkor_client.Falkor( auth_client, tenant_id, core_api_url, admin_api_url ) From d2b04f9e68ddad894c9f1eac8ada6f28d50b590a Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 21 Feb 2024 15:09:43 +0000 Subject: [PATCH 014/156] Add user_ids to audit events --- ckanext/falkor/falkor_client.py | 13 +++++++------ ckanext/falkor/plugin.py | 9 +-------- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/ckanext/falkor/falkor_client.py b/ckanext/falkor/falkor_client.py index a159b9d..217db67 100644 --- a/ckanext/falkor/falkor_client.py +++ b/ckanext/falkor/falkor_client.py @@ -1,5 +1,6 @@ from ckanext.falkor import auth import ckan.lib.jobs as jobs +import ckan.plugins.toolkit as toolkit from typing import TypedDict import requests import logging @@ -13,10 +14,13 @@ def base_headers(access_token: str) -> HttpHeaders: + user = toolkit.g.userobj + user_id = "guest" if not user else user.id return { "Content-Type": "application/json", "accept": "application/json", "Authorization": "Bearer " + access_token, + "x-user": user_id, } @@ -77,12 +81,9 @@ def dataset_create(self, resource): falkor_post, [url, payload, base_headers(self.__auth.access_token)] ) - def document_read(self, context, resource): + def document_read(self, resource): resource_id = str(resource["id"]) package_id = str(resource["package_id"]) - - user_id = "guest" if "user_obj" not in context else context["user_obj"].id - url = ( self.__core_base_url + self.__tenant_id @@ -90,10 +91,10 @@ def document_read(self, context, resource): + package_id + "/" + resource_id - + f"/body?userId={user_id}" + + f"/body" ) - log.debug(f"Read by {user_id} for document with id {resource_id}") + log.debug(f"Read for document with id {resource_id}") jobs.enqueue(falkor_get, [url, base_headers(self.__auth.access_token)]) def document_create(self, resource: dict): diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 616cf32..5ac8f00 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -62,14 +62,7 @@ def configure(self, config): # IResourceController def before_show(self, resource_dict): - context = { - "model": model, - "session": model.Session, - "user": toolkit.g.user, - "user_obj": toolkit.g.userobj, - } - - # self.falkor.document_read(context, resource_dict) + self.falkor.document_read(resource_dict) # IDomainObjectNotification & #IResourceURLChange def notify(self, entity, operation=None): From 224ac92e63490d5bf87758df3db39ba1b6b8bc94 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 9 Apr 2024 10:56:19 +0100 Subject: [PATCH 015/156] Add audit button to resource button action panel --- ckanext/falkor/templates/package/resource_read.html | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 ckanext/falkor/templates/package/resource_read.html diff --git a/ckanext/falkor/templates/package/resource_read.html b/ckanext/falkor/templates/package/resource_read.html new file mode 100644 index 0000000..8fd5855 --- /dev/null +++ b/ckanext/falkor/templates/package/resource_read.html @@ -0,0 +1,8 @@ +{% ckan_extends %} + +{% block resource_actions_inner %} + {{ super() }} +
  • + Audit +
  • +{% endblock %} \ No newline at end of file From 30baa1eb548d56dd7f4b28aa0362380aaf803b24 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 9 Apr 2024 10:56:27 +0100 Subject: [PATCH 016/156] Cleanup unused vars/imports --- ckanext/falkor/plugin.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 5ac8f00..603e397 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -5,9 +5,6 @@ import ckan.model as model -from ckan.plugins.toolkit import config - -import ckan.lib.jobs as jobs from ckan.lib.dictization import table_dictize from ckan.model.domain_object import DomainObjectOperation @@ -39,7 +36,6 @@ def update_config(self, config_): def configure(self, config): self.config = config - endpoint = get_config_value(config, "ckanext.falkor.auth.endpoint") client_id = get_config_value(config, "ckanext.falkor.auth.client_id") client_secret = get_config_value(config, "ckanext.falkor.auth.client_secret") @@ -70,17 +66,14 @@ def notify(self, entity, operation=None): if isinstance(entity, model.Resource): if operation == DomainObjectOperation.new: - topic = "resource/create" resource = table_dictize(entity, context) self.falkor.document_create(resource) elif operation == DomainObjectOperation.changed: - topic = "resource/update" resource = table_dictize(entity, context) self.falkor.document_update(resource) elif operation == DomainObjectOperation.deleted: - topic = "resource/delete" resource = table_dictize(entity, context) self.falkor.document_delete(resource) else: @@ -88,7 +81,6 @@ def notify(self, entity, operation=None): elif isinstance(entity, model.Package): if operation == DomainObjectOperation.new: - topic = "dataset/create" resource = table_dictize(entity, context) self.falkor.dataset_create(resource) else: From e17148eb0945de73245b45e042ee397da0796a01 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 9 Apr 2024 17:36:32 +0100 Subject: [PATCH 017/156] Render audit in table on the resource view page --- ckanext/falkor/falkor_client.py | 20 +++++++++- ckanext/falkor/plugin.py | 38 ++++++++++++++++--- ckanext/falkor/templates/falkor-audit.html | 1 + .../templates/package/resource_read.html | 36 +++++++++++++++--- 4 files changed, 83 insertions(+), 12 deletions(-) create mode 100644 ckanext/falkor/templates/falkor-audit.html diff --git a/ckanext/falkor/falkor_client.py b/ckanext/falkor/falkor_client.py index 217db67..0dc54fb 100644 --- a/ckanext/falkor/falkor_client.py +++ b/ckanext/falkor/falkor_client.py @@ -91,7 +91,7 @@ def document_read(self, resource): + package_id + "/" + resource_id - + f"/body" + + "/body" ) log.debug(f"Read for document with id {resource_id}") @@ -149,3 +149,21 @@ def document_delete(self, resource): log.debug(f"Deleting document with id {resource_id}") jobs.enqueue(falkor_delete, [url, base_headers(self.__auth.access_token)]) + + def document_audit_trail(self, resource): + resource_id = str(resource["id"]) + package_id = str(resource["package_id"]) + + url = ( + self.__core_base_url + + self.__tenant_id + + "/dataset/" + + package_id + + "/" + + resource_id + + "/body/audit" + ) + + return falkor_get(url, base_headers(self.__auth.access_token)).json() + + diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 603e397..e5e176a 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -4,12 +4,15 @@ import logging import ckan.model as model +from ckan.common import config as ckanconfig from ckan.lib.dictization import table_dictize from ckan.model.domain_object import DomainObjectOperation from ckanext.falkor import falkor_client, auth +# from flask import Blueprint, render_template + log = logging.getLogger(__name__) @@ -19,20 +22,25 @@ def get_config_value(config, key: str) -> str: raise Exception(f"{key} not present in configration") return value +# def render_audit(): +# u'''A simple view function''' +# return render_template(u"falkor-audit.html") + class FalkorPlugin(plugins.SingletonPlugin): - falkor: falkor_client.Falkor + falkor: falkor_client.Falkor = None plugins.implements(plugins.IConfigurer) + plugins.implements(plugins.IConfigurable, inherit=True) + # plugins.implements(plugins.IBlueprint) + plugins.implements(plugins.ITemplateHelpers) plugins.implements(plugins.IDomainObjectModification, inherit=True) plugins.implements(plugins.IResourceController, inherit=True) - plugins.implements(plugins.IConfigurable, inherit=True) # IConfigurer - def update_config(self, config_): - toolkit.add_template_directory(config_, "templates") - toolkit.add_public_directory(config_, "public") - toolkit.add_resource("fanstatic", "falkor") + def update_config(self, config): + toolkit.add_template_directory(config, "templates") + toolkit.add_public_directory(config, "public") def configure(self, config): self.config = config @@ -59,6 +67,7 @@ def configure(self, config): # IResourceController def before_show(self, resource_dict): self.falkor.document_read(resource_dict) + self.get_helpers() # IDomainObjectNotification & #IResourceURLChange def notify(self, entity, operation=None): @@ -85,3 +94,20 @@ def notify(self, entity, operation=None): self.falkor.dataset_create(resource) else: return + + def get_helpers(self): + if self.falkor is None: + self.configure(ckanconfig) + return { 'get_audit_trail': self.falkor.document_audit_trail } + + # def get_blueprint(self): + # u'''Return a Flask Blueprint object to be registered by the app.''' + # + # # Create Blueprint for plugin + # blueprint = Blueprint("test", __name__) + # blueprint.template_folder = u'templates' + # + # # Add plugin url rules to Blueprint object + # blueprint.add_url_rule(u'/hello_plugin', u'hello_plugin', render_audit) + # + # return blueprint diff --git a/ckanext/falkor/templates/falkor-audit.html b/ckanext/falkor/templates/falkor-audit.html new file mode 100644 index 0000000..34295ee --- /dev/null +++ b/ckanext/falkor/templates/falkor-audit.html @@ -0,0 +1 @@ +

    This is a test

    diff --git a/ckanext/falkor/templates/package/resource_read.html b/ckanext/falkor/templates/package/resource_read.html index 8fd5855..b28954f 100644 --- a/ckanext/falkor/templates/package/resource_read.html +++ b/ckanext/falkor/templates/package/resource_read.html @@ -1,8 +1,34 @@ {% ckan_extends %} -{% block resource_actions_inner %} +{% block resource_additional_information_inner %} {{ super() }} -
  • - Audit -
  • -{% endblock %} \ No newline at end of file +
    +

    {{ _('Audit') }}

    + + + + + + + + + + {% set audits = h.get_audit_trail(res) %} + {% for audit in audits[:10] %} + + + + + + {% endfor %} + {% for audit in audits[10:] %} + + + + + + {% endfor %} + +
    {{ _('Event') }}{{ _('Timestamp') }}{{ _('User ID') }}
    {{ audit.event }}{{ audit.timestamp }}{{ audit.userId }}
    {{ audit.event }}{{ audit.timestamp }}{{ audit.userId }}
    +
    +{% endblock %} From 569aaca2f1e90d64aa8c458efcc16e67f02561ff Mon Sep 17 00:00:00 2001 From: wajones98 Date: Thu, 11 Apr 2024 18:22:50 +0100 Subject: [PATCH 018/156] Revert audit template to external link --- ckanext/falkor/falkor_client.py | 19 +--------- ckanext/falkor/plugin.py | 35 +++++++------------ .../templates/package/resource_read.html | 35 +++---------------- 3 files changed, 18 insertions(+), 71 deletions(-) diff --git a/ckanext/falkor/falkor_client.py b/ckanext/falkor/falkor_client.py index 0dc54fb..d7e8b67 100644 --- a/ckanext/falkor/falkor_client.py +++ b/ckanext/falkor/falkor_client.py @@ -5,6 +5,7 @@ import requests import logging import json +import datetime log = logging.getLogger(__name__) @@ -149,21 +150,3 @@ def document_delete(self, resource): log.debug(f"Deleting document with id {resource_id}") jobs.enqueue(falkor_delete, [url, base_headers(self.__auth.access_token)]) - - def document_audit_trail(self, resource): - resource_id = str(resource["id"]) - package_id = str(resource["package_id"]) - - url = ( - self.__core_base_url - + self.__tenant_id - + "/dataset/" - + package_id - + "/" - + resource_id - + "/body/audit" - ) - - return falkor_get(url, base_headers(self.__auth.access_token)).json() - - diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index e5e176a..080320d 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -11,8 +11,6 @@ from ckanext.falkor import falkor_client, auth -# from flask import Blueprint, render_template - log = logging.getLogger(__name__) @@ -22,17 +20,12 @@ def get_config_value(config, key: str) -> str: raise Exception(f"{key} not present in configration") return value -# def render_audit(): -# u'''A simple view function''' -# return render_template(u"falkor-audit.html") - class FalkorPlugin(plugins.SingletonPlugin): - falkor: falkor_client.Falkor = None + falkor: falkor_client.Falkor plugins.implements(plugins.IConfigurer) plugins.implements(plugins.IConfigurable, inherit=True) - # plugins.implements(plugins.IBlueprint) plugins.implements(plugins.ITemplateHelpers) plugins.implements(plugins.IDomainObjectModification, inherit=True) plugins.implements(plugins.IResourceController, inherit=True) @@ -95,19 +88,15 @@ def notify(self, entity, operation=None): else: return + def construct_falkor_url(self, resource): + log.info(resource) + resource_id = resource["id"] + package_id = resource["package_id"] + + package_info = toolkit.get_action("package_show")(data_dict={"id": package_id}) + organisation_id = package_info["organization"]["id"] + + return f"http://192.168.66.1:8686/{organisation_id}/{package_id}/{resource_id}" + def get_helpers(self): - if self.falkor is None: - self.configure(ckanconfig) - return { 'get_audit_trail': self.falkor.document_audit_trail } - - # def get_blueprint(self): - # u'''Return a Flask Blueprint object to be registered by the app.''' - # - # # Create Blueprint for plugin - # blueprint = Blueprint("test", __name__) - # blueprint.template_folder = u'templates' - # - # # Add plugin url rules to Blueprint object - # blueprint.add_url_rule(u'/hello_plugin', u'hello_plugin', render_audit) - # - # return blueprint + return {"construct_falkor_url": self.construct_falkor_url} diff --git a/ckanext/falkor/templates/package/resource_read.html b/ckanext/falkor/templates/package/resource_read.html index b28954f..f46c4f9 100644 --- a/ckanext/falkor/templates/package/resource_read.html +++ b/ckanext/falkor/templates/package/resource_read.html @@ -1,34 +1,9 @@ {% ckan_extends %} -{% block resource_additional_information_inner %} +{% block resource_actions_inner %} {{ super() }} -
    -

    {{ _('Audit') }}

    - - - - - - - - - - {% set audits = h.get_audit_trail(res) %} - {% for audit in audits[:10] %} - - - - - - {% endfor %} - {% for audit in audits[10:] %} - - - - - - {% endfor %} - -
    {{ _('Event') }}{{ _('Timestamp') }}{{ _('User ID') }}
    {{ audit.event }}{{ audit.timestamp }}{{ audit.userId }}
    {{ audit.event }}{{ audit.timestamp }}{{ audit.userId }}
    -
    +
  • + {% set url = h.construct_falkor_url(res) %} + Audit +
  • {% endblock %} From a51d1d384d17d5645e363aa89375f8103dac5d97 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Mon, 15 Apr 2024 16:05:02 +0100 Subject: [PATCH 019/156] Adjust position of audit button --- ckanext/falkor/templates/package/resource_read.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/falkor/templates/package/resource_read.html b/ckanext/falkor/templates/package/resource_read.html index f46c4f9..af41a65 100644 --- a/ckanext/falkor/templates/package/resource_read.html +++ b/ckanext/falkor/templates/package/resource_read.html @@ -1,9 +1,9 @@ {% ckan_extends %} {% block resource_actions_inner %} - {{ super() }}
  • {% set url = h.construct_falkor_url(res) %} Audit
  • + {{ super() }} {% endblock %} From a6abfb3329122df32c4acba14186ecf9a6088b09 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Mon, 15 Apr 2024 16:05:45 +0100 Subject: [PATCH 020/156] Adjust audit url and add query params --- ckanext/falkor/plugin.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 080320d..fee3bbf 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -89,14 +89,26 @@ def notify(self, entity, operation=None): return def construct_falkor_url(self, resource): - log.info(resource) resource_id = resource["id"] + resource_name = resource["name"] + package_id = resource["package_id"] package_info = toolkit.get_action("package_show")(data_dict={"id": package_id}) - organisation_id = package_info["organization"]["id"] + package_name = resource["name"] + + organisation_info = package_info["organization"] + organisation_id = organisation_info["id"] + organisation_name = organisation_info["title"] + + log.debug(resource) + log.debug(package_info) + + # TODO: Add base url of audit app to config + url = f"http://192.168.66.1:8686/{organisation_name}/{package_name}/{resource_name}" + query = f"?org_id={organisation_id}&dataset_id={package_id}&doc_id={resource_id}" - return f"http://192.168.66.1:8686/{organisation_id}/{package_id}/{resource_id}" + return url + query def get_helpers(self): return {"construct_falkor_url": self.construct_falkor_url} From 2c349a083e0d23a6b8cde6bd475c9f4f569eaa1a Mon Sep 17 00:00:00 2001 From: wajones98 Date: Thu, 18 Apr 2024 09:54:34 +0100 Subject: [PATCH 021/156] read audit base url from config --- ckanext/falkor/plugin.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index fee3bbf..846ff66 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -52,6 +52,7 @@ def configure(self, config): tenant_id = get_config_value(config, "ckanext.falkor.tenant_id") core_api_url = get_config_value(config, "ckanext.falkor.core_api_url") admin_api_url = get_config_value(config, "ckanext.falkor.admin_api_url") + self.audit_base_url = get_config_value(config, "ckanext.falkor.audit_base_url") self.falkor = falkor_client.Falkor( auth_client, tenant_id, core_api_url, admin_api_url @@ -104,9 +105,10 @@ def construct_falkor_url(self, resource): log.debug(resource) log.debug(package_info) - # TODO: Add base url of audit app to config - url = f"http://192.168.66.1:8686/{organisation_name}/{package_name}/{resource_name}" - query = f"?org_id={organisation_id}&dataset_id={package_id}&doc_id={resource_id}" + url = f"{self.audit_base_url}{organisation_name}/{package_name}/{resource_name}" + query = ( + f"?org_id={organisation_id}&dataset_id={package_id}&doc_id={resource_id}" + ) return url + query From 225b78e07cecc47b2de7625d9a0388f3e34fc8e8 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Thu, 18 Apr 2024 10:55:59 +0100 Subject: [PATCH 022/156] Org id handling refactor --- ckanext/falkor/falkor_client.py | 20 +++++++++++++++++--- ckanext/falkor/plugin.py | 25 ++++++++++++++----------- 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/ckanext/falkor/falkor_client.py b/ckanext/falkor/falkor_client.py index d7e8b67..3618c93 100644 --- a/ckanext/falkor/falkor_client.py +++ b/ckanext/falkor/falkor_client.py @@ -71,7 +71,8 @@ def dataset_create(self, resource): "encryptionType": "none", "externalStorage": "false", "permissionEnabled": "false", - "taggingEnabled": "false", + "taggingEnabled": "true", + "linkedContract": "none", "iotaEnabled": "false", "tokensEnabled": "false", } @@ -98,7 +99,12 @@ def document_read(self, resource): log.debug(f"Read for document with id {resource_id}") jobs.enqueue(falkor_get, [url, base_headers(self.__auth.access_token)]) - def document_create(self, resource: dict): + def document_create( + self, + resource: dict, + organisation_id: str, + package_id: str, + ): resource_id = str(resource["id"]) package_id = str(resource["package_id"]) @@ -109,7 +115,15 @@ def document_create(self, resource: dict): + package_id + "/create" ) - payload = {"documentId": resource_id, "data": json.dumps(resource)} + payload = { + "documentId": resource_id, + "data": json.dumps(resource), + "tags": { + "organisation_id": organisation_id, + "package_id": package_id, + "resource_id": resource_id, + }, + } log.debug(f"Creating document with id {resource_id}") jobs.enqueue( diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 846ff66..6e0e61a 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -70,7 +70,17 @@ def notify(self, entity, operation=None): if isinstance(entity, model.Resource): if operation == DomainObjectOperation.new: resource = table_dictize(entity, context) - self.falkor.document_create(resource) + + package_id = resource["package_id"] + + package_info = toolkit.get_action("package_show")( + data_dict={"id": package_id} + ) + + organisation_info = package_info["organization"] + organisation_id = organisation_info["id"] + + self.falkor.document_create(resource, organisation_id, package_id) elif operation == DomainObjectOperation.changed: resource = table_dictize(entity, context) @@ -96,20 +106,13 @@ def construct_falkor_url(self, resource): package_id = resource["package_id"] package_info = toolkit.get_action("package_show")(data_dict={"id": package_id}) - package_name = resource["name"] + package_name = package_info["name"] organisation_info = package_info["organization"] - organisation_id = organisation_info["id"] organisation_name = organisation_info["title"] - log.debug(resource) - log.debug(package_info) - - url = f"{self.audit_base_url}{organisation_name}/{package_name}/{resource_name}" - query = ( - f"?org_id={organisation_id}&dataset_id={package_id}&doc_id={resource_id}" - ) - + url = f"{self.audit_base_url}{package_id}/{resource_id}" + query = f"?dataset_name={package_name}&org_name={organisation_name}&doc_name={resource_name}" return url + query def get_helpers(self): From b39e33d133942824f57b1521ba657eb0f3a94b73 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Thu, 3 Oct 2024 11:53:15 +0100 Subject: [PATCH 023/156] Move access token to job Prevent the UI from blocking when obtaining an access token from Falkor by moving the request to the job --- ckanext/falkor/auth.py | 6 ++---- ckanext/falkor/falkor_client.py | 31 +++++++++++++++++-------------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/ckanext/falkor/auth.py b/ckanext/falkor/auth.py index ddd6585..c59dc10 100644 --- a/ckanext/falkor/auth.py +++ b/ckanext/falkor/auth.py @@ -1,11 +1,8 @@ import logging import requests import time -import json from typing import Union -import logging - log = logging.getLogger(__name__) @@ -97,5 +94,6 @@ def __refresh(self) -> None: def __set_token(self, body) -> None: self.__access_token = Token(body["access_token"], body["expires_in"]) - self.__refresh_token = Token(body["refresh_token"], body["refresh_expires_in"]) + self.__refresh_token = Token( + body["refresh_token"], body["refresh_expires_in"]) self.__timestamp = time.time() diff --git a/ckanext/falkor/falkor_client.py b/ckanext/falkor/falkor_client.py index 3618c93..fade327 100644 --- a/ckanext/falkor/falkor_client.py +++ b/ckanext/falkor/falkor_client.py @@ -5,7 +5,6 @@ import requests import logging import json -import datetime log = logging.getLogger(__name__) @@ -25,26 +24,26 @@ def base_headers(access_token: str) -> HttpHeaders: } -def falkor_post(url, payload, headers): - response = requests.post(url, headers=headers, json=payload, timeout=120) +def falkor_post(url: str, payload: dict, auth: auth.Auth, extra_headers=None): + response = requests.post(url, headers=base_headers(auth.access_token), json=payload, timeout=120) log.debug(response.json()) return response -def falkor_put(url, payload, headers): - response = requests.put(url, headers=headers, json=payload, timeout=120) +def falkor_put(url: str, payload: dict, auth: auth.Auth, extra_headers=None): + response = requests.put(url, headers=base_headers(auth.access_token), json=payload, timeout=120) log.debug(response.json()) return response -def falkor_get(url, headers): - response = requests.get(url, headers=headers, timeout=120) +def falkor_get(url: str, auth: auth.Auth, extra_headers=None): + response = requests.get(url, headers=base_headers(auth.access_token), timeout=120) log.debug(response.json()) return response -def falkor_delete(url, headers): - response = requests.delete(url, headers=headers, timeout=120) +def falkor_delete(url: str, auth: auth.Auth, extra_headers=None): + response = requests.delete(url, headers=base_headers(auth.access_token), timeout=120) log.debug(response.json()) return response @@ -56,7 +55,11 @@ class Falkor: __tenant_id: str def __init__( - self, auth: auth.Auth, tenant_id: str, core_base_url: str, admin_base_url: str + self, + auth: auth.Auth, + tenant_id: str, + core_base_url: str, + admin_base_url: str ): self.__auth = auth self.__tenant_id = tenant_id @@ -97,7 +100,7 @@ def document_read(self, resource): ) log.debug(f"Read for document with id {resource_id}") - jobs.enqueue(falkor_get, [url, base_headers(self.__auth.access_token)]) + jobs.enqueue(falkor_get, [url, self.__auth]) def document_create( self, @@ -127,7 +130,7 @@ def document_create( log.debug(f"Creating document with id {resource_id}") jobs.enqueue( - falkor_post, [url, payload, base_headers(self.__auth.access_token)] + falkor_post, [url, payload, self.__auth] ) def document_update(self, resource): @@ -146,7 +149,7 @@ def document_update(self, resource): log.debug(f"Updating document with id {resource_id}") jobs.enqueue( - falkor_put, [url, resource, base_headers(self.__auth.access_token)] + falkor_put, [url, resource, self.__auth] ) def document_delete(self, resource): @@ -163,4 +166,4 @@ def document_delete(self, resource): ) log.debug(f"Deleting document with id {resource_id}") - jobs.enqueue(falkor_delete, [url, base_headers(self.__auth.access_token)]) + jobs.enqueue(falkor_delete, [url, self.__auth]) From d19ff4b77dbf8ff0e3cdc4bda162d5e0c1aa7d29 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Fri, 4 Oct 2024 12:57:45 +0100 Subject: [PATCH 024/156] Remove unused import --- ckanext/falkor/plugin.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 6e0e61a..7a7fffb 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -4,7 +4,6 @@ import logging import ckan.model as model -from ckan.common import config as ckanconfig from ckan.lib.dictization import table_dictize from ckan.model.domain_object import DomainObjectOperation @@ -39,11 +38,13 @@ def configure(self, config): self.config = config endpoint = get_config_value(config, "ckanext.falkor.auth.endpoint") client_id = get_config_value(config, "ckanext.falkor.auth.client_id") - client_secret = get_config_value(config, "ckanext.falkor.auth.client_secret") + client_secret = get_config_value( + config, "ckanext.falkor.auth.client_secret") username = get_config_value(config, "ckanext.falkor.auth.username") password = get_config_value(config, "ckanext.falkor.auth.password") - credentials = auth.Credentials(client_id, client_secret, username, password) + credentials = auth.Credentials( + client_id, client_secret, username, password) auth_client = auth.Auth( credentials, endpoint, @@ -51,8 +52,10 @@ def configure(self, config): tenant_id = get_config_value(config, "ckanext.falkor.tenant_id") core_api_url = get_config_value(config, "ckanext.falkor.core_api_url") - admin_api_url = get_config_value(config, "ckanext.falkor.admin_api_url") - self.audit_base_url = get_config_value(config, "ckanext.falkor.audit_base_url") + admin_api_url = get_config_value( + config, "ckanext.falkor.admin_api_url") + self.audit_base_url = get_config_value( + config, "ckanext.falkor.audit_base_url") self.falkor = falkor_client.Falkor( auth_client, tenant_id, core_api_url, admin_api_url @@ -80,7 +83,8 @@ def notify(self, entity, operation=None): organisation_info = package_info["organization"] organisation_id = organisation_info["id"] - self.falkor.document_create(resource, organisation_id, package_id) + self.falkor.document_create( + resource, organisation_id, package_id) elif operation == DomainObjectOperation.changed: resource = table_dictize(entity, context) @@ -105,14 +109,16 @@ def construct_falkor_url(self, resource): package_id = resource["package_id"] - package_info = toolkit.get_action("package_show")(data_dict={"id": package_id}) + package_info = toolkit.get_action( + "package_show")(data_dict={"id": package_id}) package_name = package_info["name"] organisation_info = package_info["organization"] organisation_name = organisation_info["title"] url = f"{self.audit_base_url}{package_id}/{resource_id}" - query = f"?dataset_name={package_name}&org_name={organisation_name}&doc_name={resource_name}" + query = f"?dataset_name={package_name}&org_name={ + organisation_name}&doc_name={resource_name}" return url + query def get_helpers(self): From f299bdfa5ffe06e2890cb6fe958fae96198981fe Mon Sep 17 00:00:00 2001 From: wajones98 Date: Fri, 4 Oct 2024 13:02:38 +0100 Subject: [PATCH 025/156] Rename resource -> package --- ckanext/falkor/plugin.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 7a7fffb..0b9c4ae 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -69,7 +69,6 @@ def before_show(self, resource_dict): # IDomainObjectNotification & #IResourceURLChange def notify(self, entity, operation=None): context = {"model": model, "ignore_auth": True, "defer_commit": True} - if isinstance(entity, model.Resource): if operation == DomainObjectOperation.new: resource = table_dictize(entity, context) @@ -98,8 +97,8 @@ def notify(self, entity, operation=None): elif isinstance(entity, model.Package): if operation == DomainObjectOperation.new: - resource = table_dictize(entity, context) - self.falkor.dataset_create(resource) + package = table_dictize(entity, context) + self.falkor.dataset_create(package) else: return From b171e26d5ee5c9fbbc8f4ba6c1ebda1741428401 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Fri, 4 Oct 2024 14:27:40 +0100 Subject: [PATCH 026/156] Log package activity stream --- ckanext/falkor/plugin.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 0b9c4ae..46107ca 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -96,6 +96,11 @@ def notify(self, entity, operation=None): return elif isinstance(entity, model.Package): + package = table_dictize(entity, context) + test = toolkit.get_action( + "package_activity_list")(data_dict={"id": package["id"], "limit": 100, "offset": 0}) + log.info(test) + if operation == DomainObjectOperation.new: package = table_dictize(entity, context) self.falkor.dataset_create(package) @@ -116,8 +121,11 @@ def construct_falkor_url(self, resource): organisation_name = organisation_info["title"] url = f"{self.audit_base_url}{package_id}/{resource_id}" - query = f"?dataset_name={package_name}&org_name={ - organisation_name}&doc_name={resource_name}" + query = ( + f"?dataset_name={package_name}" + f"&org_name={organisation_name}" + f"&doc_name={resource_name}" + ) return url + query def get_helpers(self): From b00b5864fb3d355b80656953c8cc5cf02c69a2b2 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Mon, 7 Oct 2024 17:06:00 +0100 Subject: [PATCH 027/156] bug fix: move toolkit userobj access outside of worker --- ckanext/falkor/falkor_client.py | 35 ++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/ckanext/falkor/falkor_client.py b/ckanext/falkor/falkor_client.py index fade327..b40307d 100644 --- a/ckanext/falkor/falkor_client.py +++ b/ckanext/falkor/falkor_client.py @@ -13,9 +13,7 @@ ) -def base_headers(access_token: str) -> HttpHeaders: - user = toolkit.g.userobj - user_id = "guest" if not user else user.id +def base_headers(access_token: str, user_id: str) -> HttpHeaders: return { "Content-Type": "application/json", "accept": "application/json", @@ -24,26 +22,31 @@ def base_headers(access_token: str) -> HttpHeaders: } -def falkor_post(url: str, payload: dict, auth: auth.Auth, extra_headers=None): - response = requests.post(url, headers=base_headers(auth.access_token), json=payload, timeout=120) +def get_user_id() -> str: + user = toolkit.g.userobj + return "guest" if not user else user.id + + +def falkor_post(url: str, payload: dict, auth: auth.Auth, user_id: str): + response = requests.post(url, headers=base_headers(auth.access_token, user_id), json=payload, timeout=120) log.debug(response.json()) return response -def falkor_put(url: str, payload: dict, auth: auth.Auth, extra_headers=None): - response = requests.put(url, headers=base_headers(auth.access_token), json=payload, timeout=120) +def falkor_put(url: str, payload: dict, auth: auth.Auth, user_id: str): + response = requests.put(url, headers=base_headers(auth.access_token, user_id), json=payload, timeout=120) log.debug(response.json()) return response -def falkor_get(url: str, auth: auth.Auth, extra_headers=None): - response = requests.get(url, headers=base_headers(auth.access_token), timeout=120) +def falkor_get(url: str, auth: auth.Auth, user_id: str): + response = requests.get(url, headers=base_headers(auth.access_token, user_id), timeout=120) log.debug(response.json()) return response -def falkor_delete(url: str, auth: auth.Auth, extra_headers=None): - response = requests.delete(url, headers=base_headers(auth.access_token), timeout=120) +def falkor_delete(url: str, auth: auth.Auth, user_id: str): + response = requests.delete(url, headers=base_headers(auth.access_token, user_id), timeout=120) log.debug(response.json()) return response @@ -83,7 +86,7 @@ def dataset_create(self, resource): # run async request log.debug(f"Create dataset with id {resource_id}") jobs.enqueue( - falkor_post, [url, payload, base_headers(self.__auth.access_token)] + falkor_post, [url, payload, self.__auth, get_user_id()] ) def document_read(self, resource): @@ -100,7 +103,7 @@ def document_read(self, resource): ) log.debug(f"Read for document with id {resource_id}") - jobs.enqueue(falkor_get, [url, self.__auth]) + jobs.enqueue(falkor_get, [url, self.__auth, get_user_id()]) def document_create( self, @@ -130,7 +133,7 @@ def document_create( log.debug(f"Creating document with id {resource_id}") jobs.enqueue( - falkor_post, [url, payload, self.__auth] + falkor_post, [url, payload, self.__auth, get_user_id()] ) def document_update(self, resource): @@ -149,7 +152,7 @@ def document_update(self, resource): log.debug(f"Updating document with id {resource_id}") jobs.enqueue( - falkor_put, [url, resource, self.__auth] + falkor_put, [url, resource, self.__auth, get_user_id()] ) def document_delete(self, resource): @@ -166,4 +169,4 @@ def document_delete(self, resource): ) log.debug(f"Deleting document with id {resource_id}") - jobs.enqueue(falkor_delete, [url, self.__auth]) + jobs.enqueue(falkor_delete, [url, self.__auth, get_user_id()]) From bde1b7681c3f260d299ff6951c8c80cae4ce8c80 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 8 Oct 2024 10:57:12 +0100 Subject: [PATCH 028/156] Remove test log --- ckanext/falkor/plugin.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 46107ca..cfecbea 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -97,9 +97,6 @@ def notify(self, entity, operation=None): elif isinstance(entity, model.Package): package = table_dictize(entity, context) - test = toolkit.get_action( - "package_activity_list")(data_dict={"id": package["id"], "limit": 100, "offset": 0}) - log.info(test) if operation == DomainObjectOperation.new: package = table_dictize(entity, context) From 1c1c344e75610c7d29acc46d3f7b5215e36c4f77 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 8 Oct 2024 10:57:16 +0100 Subject: [PATCH 029/156] formatting --- ckanext/falkor/falkor_client.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/ckanext/falkor/falkor_client.py b/ckanext/falkor/falkor_client.py index b40307d..62f1f9f 100644 --- a/ckanext/falkor/falkor_client.py +++ b/ckanext/falkor/falkor_client.py @@ -28,25 +28,29 @@ def get_user_id() -> str: def falkor_post(url: str, payload: dict, auth: auth.Auth, user_id: str): - response = requests.post(url, headers=base_headers(auth.access_token, user_id), json=payload, timeout=120) + response = requests.post(url, headers=base_headers( + auth.access_token, user_id), json=payload, timeout=120) log.debug(response.json()) return response def falkor_put(url: str, payload: dict, auth: auth.Auth, user_id: str): - response = requests.put(url, headers=base_headers(auth.access_token, user_id), json=payload, timeout=120) + response = requests.put(url, headers=base_headers( + auth.access_token, user_id), json=payload, timeout=120) log.debug(response.json()) return response def falkor_get(url: str, auth: auth.Auth, user_id: str): - response = requests.get(url, headers=base_headers(auth.access_token, user_id), timeout=120) + response = requests.get(url, headers=base_headers( + auth.access_token, user_id), timeout=120) log.debug(response.json()) return response def falkor_delete(url: str, auth: auth.Auth, user_id: str): - response = requests.delete(url, headers=base_headers(auth.access_token, user_id), timeout=120) + response = requests.delete(url, headers=base_headers( + auth.access_token, user_id), timeout=120) log.debug(response.json()) return response From 4e87a6b000799fc9f63d49ee118c75e502c63a54 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 8 Oct 2024 14:39:13 +0100 Subject: [PATCH 030/156] Create dataset_sync table and load with existing datasets --- ckanext/falkor/migration/falkor/README | 1 + ckanext/falkor/migration/falkor/alembic.ini | 74 +++++++++++++++++ ckanext/falkor/migration/falkor/env.py | 81 +++++++++++++++++++ .../falkor/migration/falkor/script.py.mako | 24 ++++++ .../falkor/versions/376615bb5319_init.py | 55 +++++++++++++ 5 files changed, 235 insertions(+) create mode 100644 ckanext/falkor/migration/falkor/README create mode 100644 ckanext/falkor/migration/falkor/alembic.ini create mode 100644 ckanext/falkor/migration/falkor/env.py create mode 100644 ckanext/falkor/migration/falkor/script.py.mako create mode 100644 ckanext/falkor/migration/falkor/versions/376615bb5319_init.py diff --git a/ckanext/falkor/migration/falkor/README b/ckanext/falkor/migration/falkor/README new file mode 100644 index 0000000..98e4f9c --- /dev/null +++ b/ckanext/falkor/migration/falkor/README @@ -0,0 +1 @@ +Generic single-database configuration. \ No newline at end of file diff --git a/ckanext/falkor/migration/falkor/alembic.ini b/ckanext/falkor/migration/falkor/alembic.ini new file mode 100644 index 0000000..b305a01 --- /dev/null +++ b/ckanext/falkor/migration/falkor/alembic.ini @@ -0,0 +1,74 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +script_location = %(here)s + +# template used to generate migration files +# file_template = %%(rev)s_%%(slug)s + +# timezone to use when rendering the date +# within the migration file as well as the filename. +# string value is passed to dateutil.tz.gettz() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the +# "slug" field +#truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; this defaults +# to /srv/app/src_extensions/ckanext-falkor2/ckanext/falkor/migration/falkor/versions. When using multiple version +# directories, initial revisions must be specified with --version-path +# version_locations = %(here)s/bar %(here)s/bat /srv/app/src_extensions/ckanext-falkor2/ckanext/falkor/migration/falkor/versions + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +sqlalchemy.url = driver://user:pass@localhost/dbname + + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/ckanext/falkor/migration/falkor/env.py b/ckanext/falkor/migration/falkor/env.py new file mode 100644 index 0000000..0093682 --- /dev/null +++ b/ckanext/falkor/migration/falkor/env.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- + +from __future__ import with_statement +from alembic import context +from sqlalchemy import engine_from_config, pool +from logging.config import fileConfig + +import os + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata +target_metadata = None + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + +name = os.path.basename(os.path.dirname(__file__)) + + +def run_migrations_offline(): + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + + url = config.get_main_option(u"sqlalchemy.url") + context.configure( + url=url, target_metadata=target_metadata, literal_binds=True, + version_table=u'{}_alembic_version'.format(name) + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online(): + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + connectable = engine_from_config( + config.get_section(config.config_ini_section), + prefix=u'sqlalchemy.', + poolclass=pool.NullPool) + + with connectable.connect() as connection: + context.configure( + connection=connection, + target_metadata=target_metadata, + version_table=u'{}_alembic_version'.format(name) + ) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/ckanext/falkor/migration/falkor/script.py.mako b/ckanext/falkor/migration/falkor/script.py.mako new file mode 100644 index 0000000..2c01563 --- /dev/null +++ b/ckanext/falkor/migration/falkor/script.py.mako @@ -0,0 +1,24 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision = ${repr(up_revision)} +down_revision = ${repr(down_revision)} +branch_labels = ${repr(branch_labels)} +depends_on = ${repr(depends_on)} + + +def upgrade(): + ${upgrades if upgrades else "pass"} + + +def downgrade(): + ${downgrades if downgrades else "pass"} diff --git a/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py b/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py new file mode 100644 index 0000000..38592e9 --- /dev/null +++ b/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py @@ -0,0 +1,55 @@ +"""INIT + +Revision ID: 376615bb5319 +Revises: +Create Date: 2024-10-07 13:20:12.171995 + +""" +from alembic import op +from ckan.model import meta +from sqlalchemy import orm +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '376615bb5319' +down_revision = None +branch_labels = None +depends_on = None + +package_table = sa.Table( + "package", + meta.MetaData(), + sa.Column("id", sa.types.UnicodeText, + primary_key=True), +) + +falkor_dataset_sync_table = sa.Table( + "falkor_dataset_sync", + meta.MetaData(), + sa.Column("id", sa.TEXT, sa.ForeignKey( + "package.id"), primary_key=True) +) + + +def upgrade(): + bind = op.get_bind() + session = orm.Session(bind=bind) + + op.create_table( + "falkor_dataset_sync", + sa.Column("id", sa.TEXT, sa.ForeignKey( + "package.id"), primary_key=True) + ) + + for package in session.query(package_table): + session.execute( + falkor_dataset_sync_table.insert().values(id=package[0])) + + session.commit() + + # model.package.package_table + + +def downgrade(): + op.drop_table("falkor_dataset_sync") From 6840355a29b3ab4e61e3e3a77a1f0e8c291f5061 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 8 Oct 2024 15:43:16 +0100 Subject: [PATCH 031/156] Load dataset ids as part of migration --- .../falkor/versions/376615bb5319_init.py | 46 +++++++++++-------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py b/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py index 38592e9..602270a 100644 --- a/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py +++ b/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py @@ -10,6 +10,9 @@ from sqlalchemy import orm import sqlalchemy as sa +import logging + +log = logging.getLogger(__name__) # revision identifiers, used by Alembic. revision = '376615bb5319' @@ -24,32 +27,35 @@ primary_key=True), ) -falkor_dataset_sync_table = sa.Table( - "falkor_dataset_sync", - meta.MetaData(), - sa.Column("id", sa.TEXT, sa.ForeignKey( - "package.id"), primary_key=True) -) - def upgrade(): bind = op.get_bind() - session = orm.Session(bind=bind) - op.create_table( - "falkor_dataset_sync", - sa.Column("id", sa.TEXT, sa.ForeignKey( - "package.id"), primary_key=True) - ) - - for package in session.query(package_table): - session.execute( - falkor_dataset_sync_table.insert().values(id=package[0])) - - session.commit() + session = orm.Session(bind=bind) + try: + falkor_dataset_sync_table = op.create_table( + "falkor_dataset_sync", + meta.MetaData(), + sa.Column("id", sa.TEXT, sa.ForeignKey( + "package.id"), primary_key=True, nullable=False), + sa.Column("status", sa.TEXT, default="NOT_SYNCED") + ) + + for package in session.query(package_table): + session.execute( + falkor_dataset_sync_table.insert().values(id=package[0])) + + session.commit() + except Exception as e: + log.error(e) + session.rollback() + finally: + session.close() # model.package.package_table def downgrade(): - op.drop_table("falkor_dataset_sync") + op.drop_table( + "falkor_dataset_sync" + ) From 3c8a63177c26e3cd497a3225024348172579f275 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 8 Oct 2024 16:30:09 +0100 Subject: [PATCH 032/156] Use resource model instead of dict --- ckanext/falkor/falkor_client.py | 62 ++++++++++++++------------------- ckanext/falkor/plugin.py | 17 ++++----- 2 files changed, 33 insertions(+), 46 deletions(-) diff --git a/ckanext/falkor/falkor_client.py b/ckanext/falkor/falkor_client.py index 62f1f9f..92530ae 100644 --- a/ckanext/falkor/falkor_client.py +++ b/ckanext/falkor/falkor_client.py @@ -1,10 +1,12 @@ -from ckanext.falkor import auth -import ckan.lib.jobs as jobs -import ckan.plugins.toolkit as toolkit -from typing import TypedDict import requests import logging import json +import ckan.lib.jobs as jobs +import ckan.plugins.toolkit as toolkit +import ckan.model as model + +from typing import TypedDict +from ckanext.falkor import auth log = logging.getLogger(__name__) @@ -73,11 +75,10 @@ def __init__( self.__core_base_url = core_base_url self.__admin_base_url = admin_base_url - def dataset_create(self, resource): - resource_id = str(resource["id"]) + def dataset_create(self, resource: model.Resource): url = self.__admin_base_url + self.__tenant_id + "/dataset" payload = { - "datasetId": resource_id, + "datasetId": resource.id, "encryptionType": "none", "externalStorage": "false", "permissionEnabled": "false", @@ -88,14 +89,12 @@ def dataset_create(self, resource): } # run async request - log.debug(f"Create dataset with id {resource_id}") + log.debug(f"Create dataset with id {resource.id}") jobs.enqueue( falkor_post, [url, payload, self.__auth, get_user_id()] ) - def document_read(self, resource): - resource_id = str(resource["id"]) - package_id = str(resource["package_id"]) + def document_read(self, package_id: str, resource_id: str): url = ( self.__core_base_url + self.__tenant_id @@ -111,66 +110,57 @@ def document_read(self, resource): def document_create( self, - resource: dict, + resource: model.Resource, organisation_id: str, - package_id: str, ): - resource_id = str(resource["id"]) - package_id = str(resource["package_id"]) url = ( self.__core_base_url + self.__tenant_id + "/dataset/" - + package_id + + resource.package_id + "/create" ) payload = { - "documentId": resource_id, - "data": json.dumps(resource), + "documentId": resource.id, + "data": json.dumps(resource.as_dict()), "tags": { "organisation_id": organisation_id, - "package_id": package_id, - "resource_id": resource_id, + "package_id": resource.package_id, + "resource_id": resource.id, }, } - log.debug(f"Creating document with id {resource_id}") + log.debug(f"Creating document with id {resource.id}") jobs.enqueue( falkor_post, [url, payload, self.__auth, get_user_id()] ) - def document_update(self, resource): - resource_id = str(resource["id"]) - package_id = str(resource["package_id"]) - + def document_update(self, resource: model.Resource): url = ( self.__core_base_url + self.__tenant_id + "/dataset/" - + package_id + + resource.package_id + "/" - + resource_id + + resource.id + "/body" ) - log.debug(f"Updating document with id {resource_id}") + log.debug(f"Updating document with id {resource.id}") jobs.enqueue( - falkor_put, [url, resource, self.__auth, get_user_id()] + falkor_put, [url, resource.as_dict(), self.__auth, get_user_id()] ) - def document_delete(self, resource): - resource_id = str(resource["id"]) - package_id = str(resource["package_id"]) - + def document_delete(self, resource: model.Resource): url = ( self.__core_base_url + self.__tenant_id + "/dataset/" - + package_id + + resource.package_id + "/" - + resource_id + + resource.id ) - log.debug(f"Deleting document with id {resource_id}") + log.debug(f"Deleting document with id {resource.id}") jobs.enqueue(falkor_delete, [url, self.__auth, get_user_id()]) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index cfecbea..4badf6f 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -63,34 +63,31 @@ def configure(self, config): # IResourceController def before_show(self, resource_dict): - self.falkor.document_read(resource_dict) + self.falkor.document_read( + package_id=resource_dict["id"], + resource_id=resource_dict["package_id"] + ) self.get_helpers() # IDomainObjectNotification & #IResourceURLChange def notify(self, entity, operation=None): context = {"model": model, "ignore_auth": True, "defer_commit": True} if isinstance(entity, model.Resource): + resource: model.Resource = entity if operation == DomainObjectOperation.new: - resource = table_dictize(entity, context) - - package_id = resource["package_id"] - package_info = toolkit.get_action("package_show")( - data_dict={"id": package_id} + data_dict={"id": resource.package_id} ) organisation_info = package_info["organization"] organisation_id = organisation_info["id"] - self.falkor.document_create( - resource, organisation_id, package_id) + self.falkor.document_create(resource, organisation_id) elif operation == DomainObjectOperation.changed: - resource = table_dictize(entity, context) self.falkor.document_update(resource) elif operation == DomainObjectOperation.deleted: - resource = table_dictize(entity, context) self.falkor.document_delete(resource) else: return From 96c8d7d4811f7f50dc28b93f2cc8b1422b15e56e Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 9 Oct 2024 15:17:31 +0100 Subject: [PATCH 033/156] Log event to table --- .../falkor/versions/376615bb5319_init.py | 63 +++++++-------- ckanext/falkor/plugin.py | 76 +++++++++++++++++-- 2 files changed, 102 insertions(+), 37 deletions(-) diff --git a/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py b/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py index 602270a..5d72412 100644 --- a/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py +++ b/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py @@ -7,7 +7,6 @@ """ from alembic import op from ckan.model import meta -from sqlalchemy import orm import sqlalchemy as sa import logging @@ -20,42 +19,46 @@ branch_labels = None depends_on = None -package_table = sa.Table( - "package", - meta.MetaData(), - sa.Column("id", sa.types.UnicodeText, - primary_key=True), -) +# package_table = sa.Table( +# "package", +# meta.MetaData(), +# sa.Column("id", sa.types.UnicodeText, +# primary_key=True), +# ) def upgrade(): - bind = op.get_bind() - - session = orm.Session(bind=bind) - try: - falkor_dataset_sync_table = op.create_table( - "falkor_dataset_sync", - meta.MetaData(), - sa.Column("id", sa.TEXT, sa.ForeignKey( - "package.id"), primary_key=True, nullable=False), - sa.Column("status", sa.TEXT, default="NOT_SYNCED") - ) - - for package in session.query(package_table): - session.execute( - falkor_dataset_sync_table.insert().values(id=package[0])) - - session.commit() - except Exception as e: - log.error(e) - session.rollback() - finally: - session.close() + # bind = op.get_bind() + # + # session = orm.Session(bind=bind) + # try: + # falkor_dataset_sync_table = + op.create_table( + "falkor_event", + meta.MetaData(), + sa.Column("id", sa.TEXT, primary_key=True, nullable=False), + sa.Column("object_id", sa.TEXT, nullable=False), + sa.Column("object_type", sa.TEXT, nullable=False), + sa.Column("status", sa.TEXT, default="NOT_SYNCED"), + sa.Column("created_at", sa.DateTime(), nullable=False), + sa.Column("synced_at", sa.DateTime(), nullable=True, default=True) + ) + + # for package in session.query(package_table): + # session.execute( + # falkor_dataset_sync_table.insert().values(id=package[0])) + # + # session.commit() + # except Exception as e: + # log.error(e) + # session.rollback() + # finally: + # session.close() # model.package.package_table def downgrade(): op.drop_table( - "falkor_dataset_sync" + "falkor_event" ) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 4badf6f..516ccda 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -1,17 +1,52 @@ -import ckan.plugins as plugins -import ckan.plugins.toolkit as toolkit - import logging +import datetime +import uuid + +import sqlalchemy as sa +from sqlalchemy.ext.declarative import declarative_base + +import ckan.plugins as plugins +import ckan.plugins.toolkit as toolkit import ckan.model as model from ckan.lib.dictization import table_dictize from ckan.model.domain_object import DomainObjectOperation - from ckanext.falkor import falkor_client, auth log = logging.getLogger(__name__) +Base = declarative_base(metadata=model.meta.metadata) + + +class FalkorEvent(Base): + __tablename__ = "falkor_event" + + id = sa.Column(sa.TEXT, primary_key=True, nullable=False) + object_id = sa.Column(sa.TEXT, nullable=False) + object_type = sa.Column(sa.TEXT, nullable=False) + status = sa.Column(sa.TEXT, default="NOT_SYNCED") + created_at = sa.Column(sa.DateTime(), nullable=False) + synced_at = sa.Column(sa.DateTime(), nullable=False) + + +def new_falkor_event( + id: str, + object_id: str, + object_type: str, + status: str, + created_at: sa.DateTime, + synced_at: sa.DateTime +) -> FalkorEvent: + return FalkorEvent( + id=id, + object_id=object_id, + object_type=object_type, + status=status, + created_at=created_at, + synced_at=synced_at + ) + def get_config_value(config, key: str) -> str: value = config.get(key) @@ -22,6 +57,7 @@ def get_config_value(config, key: str) -> str: class FalkorPlugin(plugins.SingletonPlugin): falkor: falkor_client.Falkor + engine: sa.engine.Engine plugins.implements(plugins.IConfigurer) plugins.implements(plugins.IConfigurable, inherit=True) @@ -61,11 +97,13 @@ def configure(self, config): auth_client, tenant_id, core_api_url, admin_api_url ) + self.engine = model.meta.engine + # IResourceController def before_show(self, resource_dict): - self.falkor.document_read( - package_id=resource_dict["id"], - resource_id=resource_dict["package_id"] + self.handle_resource_read( + resource_id=resource_dict["id"], + package_id=resource_dict["package_id"] ) self.get_helpers() @@ -124,3 +162,27 @@ def construct_falkor_url(self, resource): def get_helpers(self): return {"construct_falkor_url": self.construct_falkor_url} + + def handle_resource_read(self, resource_id: str, package_id: str): + session = sa.orm.Session(bind=self.engine) + try: + event = new_falkor_event( + id=uuid.uuid4(), + object_id=resource_id, + object_type="resource", + status="pending", + created_at=datetime.datetime.now(), + synced_at=None, + ) + session.add(event) + session.commit() + + self.falkor.document_read( + package_id=resource_id, + resource_id=package_id + ) + log.info(session.query(FalkorEvent).all()) + except: + session.rollback() + finally: + session.close() From e3d9b2787011633c6a79a309b6c82f67dcf82473 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Thu, 10 Oct 2024 13:01:04 +0100 Subject: [PATCH 034/156] Test custom_action --- ckanext/falkor/plugin.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 516ccda..caceab6 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -2,6 +2,8 @@ import datetime import uuid +from typing import Optional + import sqlalchemy as sa from sqlalchemy.ext.declarative import declarative_base @@ -64,6 +66,10 @@ class FalkorPlugin(plugins.SingletonPlugin): plugins.implements(plugins.ITemplateHelpers) plugins.implements(plugins.IDomainObjectModification, inherit=True) plugins.implements(plugins.IResourceController, inherit=True) + plugins.implements(plugins.IActions) + + def get_actions(self): + return {"hello_world": hello_world} # IConfigurer def update_config(self, config): @@ -186,3 +192,8 @@ def handle_resource_read(self, resource_id: str, package_id: str): session.rollback() finally: session.close() + + +@toolkit.side_effect_free +def hello_world(context, data_dict: Optional[dict] = None) -> str: + return {"message": f"Hello, {data_dict['name'] if 'name' in data_dict else 'World'}!"} From d4cce718f66b92a93804570f849a061564eb42c8 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Thu, 10 Oct 2024 14:18:00 +0100 Subject: [PATCH 035/156] New EventHandler class --- ckanext/falkor/plugin.py | 89 +++++++++++++++++++++++++++++++++++----- 1 file changed, 78 insertions(+), 11 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index caceab6..8372668 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -3,6 +3,7 @@ import uuid from typing import Optional +from enum import Enum import sqlalchemy as sa @@ -21,24 +22,43 @@ Base = declarative_base(metadata=model.meta.metadata) +class FalkorEventObjectType(Enum): + PACKAGE = 'package' + RESOURCE = 'resource' + + +class FalkorEventStatus(Enum): + PENDING = 'pending' + FAILED = 'failed' + SYNCED = 'synced' + + class FalkorEvent(Base): __tablename__ = "falkor_event" - id = sa.Column(sa.TEXT, primary_key=True, nullable=False) - object_id = sa.Column(sa.TEXT, nullable=False) - object_type = sa.Column(sa.TEXT, nullable=False) - status = sa.Column(sa.TEXT, default="NOT_SYNCED") - created_at = sa.Column(sa.DateTime(), nullable=False) - synced_at = sa.Column(sa.DateTime(), nullable=False) + id = sa.Column( + sa.dialects.postgresql.UUID, + primary_key=True, + nullable=False, + default=uuid.uuid4 + ) + object_id = sa.Column(sa.dialects.postgresql.UUID, nullable=False) + object_type = sa.Column(sa.Enum(FalkorEventObjectType), nullable=False) + status = sa.Column( + sa.Enum(FalkorEventStatus), + default=FalkorEventStatus.PENDING + ) + created_at = sa.Column(sa.DateTime, nullable=False) + synced_at = sa.Column(sa.DateTime, nullable=False) def new_falkor_event( - id: str, - object_id: str, - object_type: str, - status: str, + id: uuid.UUID, + object_id: uuid.UUID, + object_type: FalkorEventObjectType, created_at: sa.DateTime, - synced_at: sa.DateTime + status: FalkorEventStatus = FalkorEventStatus.PENDING, + synced_at: Optional[sa.DateTime] = None ) -> FalkorEvent: return FalkorEvent( id=id, @@ -197,3 +217,50 @@ def handle_resource_read(self, resource_id: str, package_id: str): @toolkit.side_effect_free def hello_world(context, data_dict: Optional[dict] = None) -> str: return {"message": f"Hello, {data_dict['name'] if 'name' in data_dict else 'World'}!"} + + +class EventHandler: + falkor: falkor_client.Falkor + engine: sa.engine.Engine + + def __init__(self, falkor: falkor_client.Falkor, engine: sa.engine.Engine): + self.falkor = falkor + self.engine = engine + + def handle_package_create(self): + pass + + def handle_resource_create(self): + pass + + def handle_resource_read(self): + pass + + def handle_resource_update(self): + pass + + def handle_resource_delete(self): + pass + + def __insert_pending_event( + self, + event_id: uuid.UUID, + object_id: uuid.UUID, + object_type: FalkorEventObjectType, + created_at: datetime.datetime + ): + session = sa.orm.Session(bind=self.engine) + try: + event = new_falkor_event( + id=event_id, + object_id=object_id, + object_type=object_type, + created_at=created_at, + ) + session.add(event) + session.commit() + except Exception as e: + logging.critical(e, exc_info=True) + session.rollback() + finally: + session.close() From 88ba7ac97693bcb99f69d10d2f8a6b5ae679d2ff Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 15 Oct 2024 13:44:21 +0100 Subject: [PATCH 036/156] Refactor events to event_handler --- ckanext/falkor/event_handler.py | 92 +++++++ ckanext/falkor/falkor_client.py | 6 +- .../falkor/versions/376615bb5319_init.py | 75 +++--- ckanext/falkor/model.py | 68 +++++ ckanext/falkor/plugin.py | 237 +++++------------- 5 files changed, 274 insertions(+), 204 deletions(-) create mode 100644 ckanext/falkor/event_handler.py create mode 100644 ckanext/falkor/model.py diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py new file mode 100644 index 0000000..549d78a --- /dev/null +++ b/ckanext/falkor/event_handler.py @@ -0,0 +1,92 @@ + +import logging +import uuid +import sqlalchemy as sa + +from datetime import datetime +from ckanext.falkor.model import FalkorEventObjectType, new_falkor_event +from ckanext.falkor.falkor_client import Falkor +from ckan.model import meta + +log = logging.getLogger(__name__) + + +def generate_event_id() -> str: + return uuid.uuid4() + + +class EventHandler: + falkor: Falkor + engine: sa.engine.Engine + + def __init__(self, falkor: Falkor): + self.falkor = falkor + self.engine = meta.engine + + def handle_package_create(self, package: dict): + self.__insert_pending_event( + event_id=generate_event_id(), + object_id=package["id"], + object_type=FalkorEventObjectType.PACKAGE, + created_at=package["created_at"] + ) + + def handle_resource_create(self, resource: dict): + self.__insert_pending_event( + event_id=generate_event_id(), + object_id=resource["id"], + object_type=FalkorEventObjectType.RESOURCE, + created_at=resource["created_at"] + ) + + def handle_resource_read( + self, + resource_id: str, + package_id: str, + created_at: datetime = datetime.now() + ): + self.__insert_pending_event( + event_id=generate_event_id(), + object_id=resource_id, + object_type=FalkorEventObjectType.RESOURCE, + created_at=created_at + ) + + def handle_resource_update(self, resource: dict): + self.__insert_pending_event( + event_id=generate_event_id(), + object_id=resource["id"], + object_type=FalkorEventObjectType.RESOURCE, + created_at=resource["created_at"] + ) + + def handle_resource_delete(self, resource: dict): + self.__insert_pending_event( + event_id=generate_event_id(), + object_id=resource["id"], + object_type=FalkorEventObjectType.RESOURCE, + created_at=resource["created_at"] + ) + + def __insert_pending_event( + self, + event_id: uuid.UUID, + object_id: uuid.UUID, + object_type: FalkorEventObjectType, + created_at: datetime.datetime + ): + session = sa.orm.Session(bind=self.engine) + try: + event = new_falkor_event( + id=event_id, + object_id=object_id, + object_type=object_type, + created_at=created_at, + ) + session.add(event) + session.commit() + except Exception as e: + logging.critical(e, exc_info=True) + session.rollback() + finally: + session.close() diff --git a/ckanext/falkor/falkor_client.py b/ckanext/falkor/falkor_client.py index 92530ae..a323d0a 100644 --- a/ckanext/falkor/falkor_client.py +++ b/ckanext/falkor/falkor_client.py @@ -75,10 +75,10 @@ def __init__( self.__core_base_url = core_base_url self.__admin_base_url = admin_base_url - def dataset_create(self, resource: model.Resource): + def dataset_create(self, package_id: str): url = self.__admin_base_url + self.__tenant_id + "/dataset" payload = { - "datasetId": resource.id, + "datasetId": package_id, "encryptionType": "none", "externalStorage": "false", "permissionEnabled": "false", @@ -89,7 +89,7 @@ def dataset_create(self, resource: model.Resource): } # run async request - log.debug(f"Create dataset with id {resource.id}") + log.debug(f"Create dataset with id {package_id}") jobs.enqueue( falkor_post, [url, payload, self.__auth, get_user_id()] ) diff --git a/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py b/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py index 5d72412..de65c12 100644 --- a/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py +++ b/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py @@ -7,9 +7,11 @@ """ from alembic import op from ckan.model import meta -import sqlalchemy as sa +from enum import Enum +import sqlalchemy as sa import logging +import uuid log = logging.getLogger(__name__) @@ -19,46 +21,57 @@ branch_labels = None depends_on = None -# package_table = sa.Table( -# "package", -# meta.MetaData(), -# sa.Column("id", sa.types.UnicodeText, -# primary_key=True), -# ) + +class FalkorEventObjectType(Enum): + PACKAGE = 'package' + RESOURCE = 'resource' + + +class FalkorEventStatus(Enum): + PENDING = 'pending' + FAILED = 'failed' + SYNCED = 'synced' + + +class FalkorEventType(Enum): + CREATE = "create" + READ = "read" + UPDATE = "update" + DELETE = "delete" def upgrade(): - # bind = op.get_bind() - # - # session = orm.Session(bind=bind) - # try: - # falkor_dataset_sync_table = op.create_table( "falkor_event", meta.MetaData(), - sa.Column("id", sa.TEXT, primary_key=True, nullable=False), - sa.Column("object_id", sa.TEXT, nullable=False), - sa.Column("object_type", sa.TEXT, nullable=False), - sa.Column("status", sa.TEXT, default="NOT_SYNCED"), - sa.Column("created_at", sa.DateTime(), nullable=False), - sa.Column("synced_at", sa.DateTime(), nullable=True, default=True) + sa.Column( + "id", + sa.dialects.postgresql.UUID, + primary_key=True, + nullable=False, + default=uuid.uuid4 + ), + sa.Column("object_id", sa.dialects.postgresql.UUID, nullable=False), + sa.Column( + "object_type", + sa.Enum(FalkorEventObjectType), + nullable=False + ), + sa.Column("event_type", sa.Enum(FalkorEventType), nullable=False), + sa.Column("user_id", sa.TEXT, nullable=False, default="guest"), + sa.Column( + "status", + sa.Enum(FalkorEventStatus), + default=FalkorEventStatus.PENDING + ), + sa.Column("created_at", sa.DateTime, nullable=False), + sa.Column("synced_at", sa.DateTime, nullable=True) ) - # for package in session.query(package_table): - # session.execute( - # falkor_dataset_sync_table.insert().values(id=package[0])) - # - # session.commit() - # except Exception as e: - # log.error(e) - # session.rollback() - # finally: - # session.close() - - # model.package.package_table - def downgrade(): op.drop_table( "falkor_event" ) + op.execute('DROP TYPE IF EXISTS falkoreventobjecttype;') + op.execute('DROP TYPE IF EXISTS falkoreventstatus;') diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py new file mode 100644 index 0000000..9ebc077 --- /dev/null +++ b/ckanext/falkor/model.py @@ -0,0 +1,68 @@ +import uuid +import sqlalchemy as sa +import ckan.model as model + +from enum import Enum +from typing import Union, Optional +from sqlalchemy.ext.declarative import declarative_base + +Base = declarative_base(metadata=model.meta.metadata) + + +class FalkorEventObjectType(Enum): + PACKAGE = 'package' + RESOURCE = 'resource' + + +class FalkorEventStatus(Enum): + PENDING = 'pending' + FAILED = 'failed' + SYNCED = 'synced' + + +class FalkorEventType(Enum): + CREATE = "create" + READ = "read" + UPDATE = "update" + DELETE = "delete" + + +class FalkorEvent(Base): + __tablename__ = "falkor_event" + + id = sa.Column( + sa.dialects.postgresql.UUID, + primary_key=True, + nullable=False, + default=uuid.uuid4 + ) + object_id = sa.Column(sa.dialects.postgresql.UUID, nullable=False) + object_type = sa.Column(sa.Enum(FalkorEventObjectType), nullable=False) + event_type = sa.Column(sa.Enum(FalkorEventType), nullable=False) + user_id = sa.Column(sa.TEXT, nullable=False, default="guest") + status = sa.Column( + sa.Enum(FalkorEventStatus), + default=FalkorEventStatus.PENDING + ) + created_at = sa.Column(sa.DateTime, nullable=False) + synced_at = sa.Column(sa.DateTime, nullable=True) + + +def new_falkor_event( + id: uuid.UUID, + object_id: uuid.UUID, + object_type: FalkorEventObjectType, + user_id: Union[uuid.UUID, str], + created_at: sa.DateTime, + status: FalkorEventStatus = FalkorEventStatus.PENDING, + synced_at: Optional[sa.DateTime] = None +) -> FalkorEvent: + return FalkorEvent( + id=id, + object_id=object_id, + object_type=object_type, + user_id=user_id, + status=status, + created_at=created_at, + synced_at=synced_at + ) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 8372668..e5accc0 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -1,73 +1,16 @@ import logging -import datetime -import uuid - -from typing import Optional -from enum import Enum import sqlalchemy as sa - -from sqlalchemy.ext.declarative import declarative_base - import ckan.plugins as plugins import ckan.plugins.toolkit as toolkit -import ckan.model as model +import ckan.model as ckan_model from ckan.lib.dictization import table_dictize +from ckanext.falkor import falkor_client, auth, event_handler from ckan.model.domain_object import DomainObjectOperation -from ckanext.falkor import falkor_client, auth - -log = logging.getLogger(__name__) - -Base = declarative_base(metadata=model.meta.metadata) - - -class FalkorEventObjectType(Enum): - PACKAGE = 'package' - RESOURCE = 'resource' - - -class FalkorEventStatus(Enum): - PENDING = 'pending' - FAILED = 'failed' - SYNCED = 'synced' - -class FalkorEvent(Base): - __tablename__ = "falkor_event" - id = sa.Column( - sa.dialects.postgresql.UUID, - primary_key=True, - nullable=False, - default=uuid.uuid4 - ) - object_id = sa.Column(sa.dialects.postgresql.UUID, nullable=False) - object_type = sa.Column(sa.Enum(FalkorEventObjectType), nullable=False) - status = sa.Column( - sa.Enum(FalkorEventStatus), - default=FalkorEventStatus.PENDING - ) - created_at = sa.Column(sa.DateTime, nullable=False) - synced_at = sa.Column(sa.DateTime, nullable=False) - - -def new_falkor_event( - id: uuid.UUID, - object_id: uuid.UUID, - object_type: FalkorEventObjectType, - created_at: sa.DateTime, - status: FalkorEventStatus = FalkorEventStatus.PENDING, - synced_at: Optional[sa.DateTime] = None -) -> FalkorEvent: - return FalkorEvent( - id=id, - object_id=object_id, - object_type=object_type, - status=status, - created_at=created_at, - synced_at=synced_at - ) +log = logging.getLogger(__name__) def get_config_value(config, key: str) -> str: @@ -77,9 +20,15 @@ def get_config_value(config, key: str) -> str: return value +def get_user_id() -> str: + user = toolkit.g.userobj + return "guest" if not user else user.id + + class FalkorPlugin(plugins.SingletonPlugin): falkor: falkor_client.Falkor engine: sa.engine.Engine + event_handler: event_handler.EventHandler plugins.implements(plugins.IConfigurer) plugins.implements(plugins.IConfigurable, inherit=True) @@ -88,9 +37,6 @@ class FalkorPlugin(plugins.SingletonPlugin): plugins.implements(plugins.IResourceController, inherit=True) plugins.implements(plugins.IActions) - def get_actions(self): - return {"hello_world": hello_world} - # IConfigurer def update_config(self, config): toolkit.add_template_directory(config, "templates") @@ -101,7 +47,9 @@ def configure(self, config): endpoint = get_config_value(config, "ckanext.falkor.auth.endpoint") client_id = get_config_value(config, "ckanext.falkor.auth.client_id") client_secret = get_config_value( - config, "ckanext.falkor.auth.client_secret") + config, + "ckanext.falkor.auth.client_secret" + ) username = get_config_value(config, "ckanext.falkor.auth.username") password = get_config_value(config, "ckanext.falkor.auth.password") @@ -123,47 +71,43 @@ def configure(self, config): auth_client, tenant_id, core_api_url, admin_api_url ) - self.engine = model.meta.engine + self.event_handler = EventHandler(self.falkor) # IResourceController def before_show(self, resource_dict): - self.handle_resource_read( + self.event_handler.handle_resource_read( resource_id=resource_dict["id"], package_id=resource_dict["package_id"] ) self.get_helpers() - # IDomainObjectNotification & #IResourceURLChange def notify(self, entity, operation=None): - context = {"model": model, "ignore_auth": True, "defer_commit": True} - if isinstance(entity, model.Resource): - resource: model.Resource = entity - if operation == DomainObjectOperation.new: - package_info = toolkit.get_action("package_show")( - data_dict={"id": resource.package_id} - ) - - organisation_info = package_info["organization"] - organisation_id = organisation_info["id"] - - self.falkor.document_create(resource, organisation_id) - - elif operation == DomainObjectOperation.changed: - self.falkor.document_update(resource) - - elif operation == DomainObjectOperation.deleted: - self.falkor.document_delete(resource) - else: - return - - elif isinstance(entity, model.Package): + context = { + "model": ckan_model, + "ignore_auth": True, + "defer_commit": True + } + if isinstance(entity, ckan_model.Package): package = table_dictize(entity, context) - - if operation == DomainObjectOperation.new: - package = table_dictize(entity, context) - self.falkor.dataset_create(package) - else: - return + self.event_handler.handle_package_create( + id=package["id"], + created_at=package["metadata_created"] + ) + elif isinstance(entity, ckan_model.Resource): + resource = table_dictize(entity, context) + self.handle_resource_modification_event(resource, operation) + + def handle_resource_modification_event( + self, + resource: dict, + operation: DomainObjectOperation + ): + if operation == DomainObjectOperation.new: + self.event_handler.handle_resource_create() + elif operation == DomainObjectOperation.changed: + self.event_handler.handle_resource_update() + elif operation == DomainObjectOperation.deleted: + self.event_handler.handle_resource_delete() def construct_falkor_url(self, resource): resource_id = resource["id"] @@ -189,78 +133,31 @@ def construct_falkor_url(self, resource): def get_helpers(self): return {"construct_falkor_url": self.construct_falkor_url} - def handle_resource_read(self, resource_id: str, package_id: str): - session = sa.orm.Session(bind=self.engine) - try: - event = new_falkor_event( - id=uuid.uuid4(), - object_id=resource_id, - object_type="resource", - status="pending", - created_at=datetime.datetime.now(), - synced_at=None, - ) - session.add(event) - session.commit() - - self.falkor.document_read( - package_id=resource_id, - resource_id=package_id - ) - log.info(session.query(FalkorEvent).all()) - except: - session.rollback() - finally: - session.close() - - -@toolkit.side_effect_free -def hello_world(context, data_dict: Optional[dict] = None) -> str: - return {"message": f"Hello, {data_dict['name'] if 'name' in data_dict else 'World'}!"} - - -class EventHandler: - falkor: falkor_client.Falkor - engine: sa.engine.Engine - - def __init__(self, falkor: falkor_client.Falkor, engine: sa.engine.Engine): - self.falkor = falkor - self.engine = engine - - def handle_package_create(self): - pass - - def handle_resource_create(self): - pass - - def handle_resource_read(self): - pass - - def handle_resource_update(self): - pass - - def handle_resource_delete(self): - pass - - def __insert_pending_event( - self, - event_id: uuid.UUID, - object_id: uuid.UUID, - object_type: FalkorEventObjectType, - created_at: datetime.datetime - ): - session = sa.orm.Session(bind=self.engine) - try: - event = new_falkor_event( - id=event_id, - object_id=object_id, - object_type=object_type, - created_at=created_at, - ) - session.add(event) - session.commit() - except Exception as e: - logging.critical(e, exc_info=True) - session.rollback() - finally: - session.close() + # def handle_resource_read(self, resource_id: str, package_id: str): + # session = sa.orm.Session(bind=self.engine) + # try: + # event = new_falkor_event( + # id=uuid.uuid4(), + # object_id=resource_id, + # object_type="resource", + # status="pending", + # created_at=datetime.datetime.now(), + # synced_at=None, + # ) + # session.add(event) + # session.commit() + # + # self.falkor.document_read( + # package_id=resource_id, + # resource_id=package_id + # ) + # log.info(session.query(FalkorEvent).all()) + # except: + # session.rollback() + # finally: + # session.close() + + +# @toolkit.side_effect_free +# def hello_world(context, data_dict: Optional[dict] = None) -> str: +# return {"message": f"Hello, {data_dict['name'] if 'name' in data_dict else 'World'}!"} From bf3a6c35089a464e3c2ad3144e85f18c397e9049 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 15 Oct 2024 13:47:07 +0100 Subject: [PATCH 037/156] Rename falkor_client -> client --- ckanext/falkor/{falkor_client.py => client.py} | 2 +- ckanext/falkor/event_handler.py | 6 +++--- ckanext/falkor/plugin.py | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) rename ckanext/falkor/{falkor_client.py => client.py} (99%) diff --git a/ckanext/falkor/falkor_client.py b/ckanext/falkor/client.py similarity index 99% rename from ckanext/falkor/falkor_client.py rename to ckanext/falkor/client.py index a323d0a..713d0a5 100644 --- a/ckanext/falkor/falkor_client.py +++ b/ckanext/falkor/client.py @@ -57,7 +57,7 @@ def falkor_delete(url: str, auth: auth.Auth, user_id: str): return response -class Falkor: +class Client: __auth: auth.Auth __core_base_url: str __admin_base_url: str diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index 549d78a..0329133 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -5,7 +5,7 @@ from datetime import datetime from ckanext.falkor.model import FalkorEventObjectType, new_falkor_event -from ckanext.falkor.falkor_client import Falkor +from ckanext.falkor.client import Client from ckan.model import meta log = logging.getLogger(__name__) @@ -16,10 +16,10 @@ def generate_event_id() -> str: class EventHandler: - falkor: Falkor + falkor: Client engine: sa.engine.Engine - def __init__(self, falkor: Falkor): + def __init__(self, falkor: Client): self.falkor = falkor self.engine = meta.engine diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index e5accc0..bcab258 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -6,7 +6,7 @@ import ckan.model as ckan_model from ckan.lib.dictization import table_dictize -from ckanext.falkor import falkor_client, auth, event_handler +from ckanext.falkor import client, auth, event_handler from ckan.model.domain_object import DomainObjectOperation @@ -26,7 +26,7 @@ def get_user_id() -> str: class FalkorPlugin(plugins.SingletonPlugin): - falkor: falkor_client.Falkor + falkor: client.Client engine: sa.engine.Engine event_handler: event_handler.EventHandler @@ -67,11 +67,11 @@ def configure(self, config): self.audit_base_url = get_config_value( config, "ckanext.falkor.audit_base_url") - self.falkor = falkor_client.Falkor( + self.falkor = client.Client( auth_client, tenant_id, core_api_url, admin_api_url ) - self.event_handler = EventHandler(self.falkor) + self.event_handler = event_handler.EventHandler(self.falkor) # IResourceController def before_show(self, resource_dict): From c8a4a9e6320a7a98dbc00f8497f0234527b1f311 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 15 Oct 2024 13:50:08 +0100 Subject: [PATCH 038/156] Temporarily remove actions plugin --- ckanext/falkor/event_handler.py | 2 +- ckanext/falkor/plugin.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index 0329133..9e4b03f 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -73,7 +73,7 @@ def __insert_pending_event( event_id: uuid.UUID, object_id: uuid.UUID, object_type: FalkorEventObjectType, - created_at: datetime.datetime + created_at: datetime ): session = sa.orm.Session(bind=self.engine) try: diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index bcab258..7f5c0ab 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -35,7 +35,7 @@ class FalkorPlugin(plugins.SingletonPlugin): plugins.implements(plugins.ITemplateHelpers) plugins.implements(plugins.IDomainObjectModification, inherit=True) plugins.implements(plugins.IResourceController, inherit=True) - plugins.implements(plugins.IActions) + # plugins.implements(plugins.IActions) # IConfigurer def update_config(self, config): From 3bd1df6770b6b37c1c137a5b3977315f53e5c46a Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 15 Oct 2024 14:01:06 +0100 Subject: [PATCH 039/156] Add event type to event_handler --- ckanext/falkor/event_handler.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index 9e4b03f..0acdb86 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -4,7 +4,7 @@ import sqlalchemy as sa from datetime import datetime -from ckanext.falkor.model import FalkorEventObjectType, new_falkor_event +from ckanext.falkor.model import FalkorEventObjectType, FalkorEventType, new_falkor_event from ckanext.falkor.client import Client from ckan.model import meta @@ -28,6 +28,7 @@ def handle_package_create(self, package: dict): event_id=generate_event_id(), object_id=package["id"], object_type=FalkorEventObjectType.PACKAGE, + event_type=FalkorEventType.CREATE, created_at=package["created_at"] ) @@ -36,6 +37,7 @@ def handle_resource_create(self, resource: dict): event_id=generate_event_id(), object_id=resource["id"], object_type=FalkorEventObjectType.RESOURCE, + event_type=FalkorEventType.CREATE, created_at=resource["created_at"] ) @@ -49,6 +51,7 @@ def handle_resource_read( event_id=generate_event_id(), object_id=resource_id, object_type=FalkorEventObjectType.RESOURCE, + event_type=FalkorEventType.READ, created_at=created_at ) @@ -57,6 +60,7 @@ def handle_resource_update(self, resource: dict): event_id=generate_event_id(), object_id=resource["id"], object_type=FalkorEventObjectType.RESOURCE, + event_type=FalkorEventType.UPDATE, created_at=resource["created_at"] ) @@ -65,6 +69,7 @@ def handle_resource_delete(self, resource: dict): event_id=generate_event_id(), object_id=resource["id"], object_type=FalkorEventObjectType.RESOURCE, + event_type=FalkorEventType.DELETE, created_at=resource["created_at"] ) @@ -73,6 +78,7 @@ def __insert_pending_event( event_id: uuid.UUID, object_id: uuid.UUID, object_type: FalkorEventObjectType, + event_type: FalkorEventType, created_at: datetime ): session = sa.orm.Session(bind=self.engine) @@ -81,6 +87,7 @@ def __insert_pending_event( id=event_id, object_id=object_id, object_type=object_type, + event_type=event_type, created_at=created_at, ) session.add(event) From 31edc47d60ec5d20fffac9d469f0894b4a0666fa Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 15 Oct 2024 14:43:01 +0100 Subject: [PATCH 040/156] Improve package create event --- ckanext/falkor/client.py | 27 ++++++------- ckanext/falkor/event_handler.py | 39 +++++++++++-------- .../falkor/versions/376615bb5319_init.py | 6 ++- ckanext/falkor/model.py | 19 +++++---- ckanext/falkor/plugin.py | 22 +++++++---- 5 files changed, 63 insertions(+), 50 deletions(-) diff --git a/ckanext/falkor/client.py b/ckanext/falkor/client.py index 713d0a5..6e82391 100644 --- a/ckanext/falkor/client.py +++ b/ckanext/falkor/client.py @@ -24,11 +24,6 @@ def base_headers(access_token: str, user_id: str) -> HttpHeaders: } -def get_user_id() -> str: - user = toolkit.g.userobj - return "guest" if not user else user.id - - def falkor_post(url: str, payload: dict, auth: auth.Auth, user_id: str): response = requests.post(url, headers=base_headers( auth.access_token, user_id), json=payload, timeout=120) @@ -90,9 +85,9 @@ def dataset_create(self, package_id: str): # run async request log.debug(f"Create dataset with id {package_id}") - jobs.enqueue( - falkor_post, [url, payload, self.__auth, get_user_id()] - ) + # jobs.enqueue( + # falkor_post, [url, payload, self.__auth, get_user_id()] + # ) def document_read(self, package_id: str, resource_id: str): url = ( @@ -106,7 +101,7 @@ def document_read(self, package_id: str, resource_id: str): ) log.debug(f"Read for document with id {resource_id}") - jobs.enqueue(falkor_get, [url, self.__auth, get_user_id()]) + # jobs.enqueue(falkor_get, [url, self.__auth, get_user_id()]) def document_create( self, @@ -132,9 +127,9 @@ def document_create( } log.debug(f"Creating document with id {resource.id}") - jobs.enqueue( - falkor_post, [url, payload, self.__auth, get_user_id()] - ) + # jobs.enqueue( + # falkor_post, [url, payload, self.__auth, get_user_id()] + # ) def document_update(self, resource: model.Resource): url = ( @@ -148,9 +143,9 @@ def document_update(self, resource: model.Resource): ) log.debug(f"Updating document with id {resource.id}") - jobs.enqueue( - falkor_put, [url, resource.as_dict(), self.__auth, get_user_id()] - ) + # jobs.enqueue( + # falkor_put, [url, resource.as_dict(), self.__auth, get_user_id()] + # ) def document_delete(self, resource: model.Resource): url = ( @@ -163,4 +158,4 @@ def document_delete(self, resource: model.Resource): ) log.debug(f"Deleting document with id {resource.id}") - jobs.enqueue(falkor_delete, [url, self.__auth, get_user_id()]) + # jobs.enqueue(falkor_delete, [url, self.__auth, get_user_id()]) diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index 0acdb86..ae6b157 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -1,9 +1,9 @@ import logging -import uuid import sqlalchemy as sa from datetime import datetime +from uuid import UUID, uuid4 from ckanext.falkor.model import FalkorEventObjectType, FalkorEventType, new_falkor_event from ckanext.falkor.client import Client from ckan.model import meta @@ -11,8 +11,8 @@ log = logging.getLogger(__name__) -def generate_event_id() -> str: - return uuid.uuid4() +def generate_event_id() -> UUID: + return uuid4() class EventHandler: @@ -23,19 +23,21 @@ def __init__(self, falkor: Client): self.falkor = falkor self.engine = meta.engine - def handle_package_create(self, package: dict): + def handle_package_create(self, package: dict, user_id: str): + log.info(package) self.__insert_pending_event( event_id=generate_event_id(), - object_id=package["id"], + object_id=UUID(package["id"]), object_type=FalkorEventObjectType.PACKAGE, event_type=FalkorEventType.CREATE, - created_at=package["created_at"] + user_id=user_id, + created_at=package["metadata_created"] ) - def handle_resource_create(self, resource: dict): + def handle_resource_create(self, resource: dict, user_id: str): self.__insert_pending_event( event_id=generate_event_id(), - object_id=resource["id"], + object_id=UUID(resource["id"]), object_type=FalkorEventObjectType.RESOURCE, event_type=FalkorEventType.CREATE, created_at=resource["created_at"] @@ -43,8 +45,9 @@ def handle_resource_create(self, resource: dict): def handle_resource_read( self, - resource_id: str, - package_id: str, + resource_id: UUID, + package_id: UUID, + user_id: str, created_at: datetime = datetime.now() ): self.__insert_pending_event( @@ -55,19 +58,19 @@ def handle_resource_read( created_at=created_at ) - def handle_resource_update(self, resource: dict): + def handle_resource_update(self, resource: dict, user_id: str): self.__insert_pending_event( event_id=generate_event_id(), - object_id=resource["id"], + object_id=UUID(resource["id"]), object_type=FalkorEventObjectType.RESOURCE, event_type=FalkorEventType.UPDATE, created_at=resource["created_at"] ) - def handle_resource_delete(self, resource: dict): + def handle_resource_delete(self, resource: dict, user_id: str): self.__insert_pending_event( event_id=generate_event_id(), - object_id=resource["id"], + object_id=UUID(resource["id"]), object_type=FalkorEventObjectType.RESOURCE, event_type=FalkorEventType.DELETE, created_at=resource["created_at"] @@ -75,11 +78,12 @@ def handle_resource_delete(self, resource: dict): def __insert_pending_event( self, - event_id: uuid.UUID, - object_id: uuid.UUID, + event_id: UUID, + object_id: UUID, object_type: FalkorEventObjectType, event_type: FalkorEventType, - created_at: datetime + user_id: str, + created_at: datetime, ): session = sa.orm.Session(bind=self.engine) try: @@ -88,6 +92,7 @@ def __insert_pending_event( object_id=object_id, object_type=object_type, event_type=event_type, + user_id=user_id, created_at=created_at, ) session.add(event) diff --git a/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py b/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py index de65c12..170e1d8 100644 --- a/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py +++ b/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py @@ -46,12 +46,13 @@ def upgrade(): meta.MetaData(), sa.Column( "id", - sa.dialects.postgresql.UUID, + sa.dialects.postgresql.UUID(as_uuid=True), primary_key=True, nullable=False, default=uuid.uuid4 ), - sa.Column("object_id", sa.dialects.postgresql.UUID, nullable=False), + sa.Column("object_id", sa.dialects.postgresql.UUID( + as_uuid=True), nullable=False), sa.Column( "object_type", sa.Enum(FalkorEventObjectType), @@ -74,4 +75,5 @@ def downgrade(): "falkor_event" ) op.execute('DROP TYPE IF EXISTS falkoreventobjecttype;') + op.execute('DROP TYPE IF EXISTS falkoreventtype;') op.execute('DROP TYPE IF EXISTS falkoreventstatus;') diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 9ebc077..28735de 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -1,9 +1,9 @@ -import uuid import sqlalchemy as sa import ckan.model as model from enum import Enum -from typing import Union, Optional +from uuid import UUID, uuid4 +from typing import Optional from sqlalchemy.ext.declarative import declarative_base Base = declarative_base(metadata=model.meta.metadata) @@ -31,12 +31,13 @@ class FalkorEvent(Base): __tablename__ = "falkor_event" id = sa.Column( - sa.dialects.postgresql.UUID, + sa.dialects.postgresql.UUID(as_uuid=True), primary_key=True, nullable=False, - default=uuid.uuid4 + default=uuid4 ) - object_id = sa.Column(sa.dialects.postgresql.UUID, nullable=False) + object_id = sa.Column(sa.dialects.postgresql.UUID( + as_uuid=True), nullable=False) object_type = sa.Column(sa.Enum(FalkorEventObjectType), nullable=False) event_type = sa.Column(sa.Enum(FalkorEventType), nullable=False) user_id = sa.Column(sa.TEXT, nullable=False, default="guest") @@ -49,10 +50,11 @@ class FalkorEvent(Base): def new_falkor_event( - id: uuid.UUID, - object_id: uuid.UUID, + id: UUID, + object_id: UUID, object_type: FalkorEventObjectType, - user_id: Union[uuid.UUID, str], + event_type: FalkorEventType, + user_id: str, created_at: sa.DateTime, status: FalkorEventStatus = FalkorEventStatus.PENDING, synced_at: Optional[sa.DateTime] = None @@ -61,6 +63,7 @@ def new_falkor_event( id=id, object_id=object_id, object_type=object_type, + event_type=event_type, user_id=user_id, status=status, created_at=created_at, diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 7f5c0ab..cd83f57 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -75,10 +75,11 @@ def configure(self, config): # IResourceController def before_show(self, resource_dict): - self.event_handler.handle_resource_read( - resource_id=resource_dict["id"], - package_id=resource_dict["package_id"] - ) + # self.event_handler.handle_resource_read( + # resource_id=resource_dict["id"], + # package_id=resource_dict["package_id"], + # user_id=get_user_id() + # ) self.get_helpers() def notify(self, entity, operation=None): @@ -89,13 +90,20 @@ def notify(self, entity, operation=None): } if isinstance(entity, ckan_model.Package): package = table_dictize(entity, context) + + # We do not want to create datasets on Falkor that are still + # in draft on CKAN. + if package["state"] != "active": + return + self.event_handler.handle_package_create( - id=package["id"], - created_at=package["metadata_created"] + package=package, + user_id=get_user_id() ) + elif isinstance(entity, ckan_model.Resource): resource = table_dictize(entity, context) - self.handle_resource_modification_event(resource, operation) + # self.handle_resource_modification_event(resource, operation) def handle_resource_modification_event( self, From 2d3b5c3cff3101f44b5bcdd0cff90f603b8ad61a Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 15 Oct 2024 14:44:02 +0100 Subject: [PATCH 041/156] Add TODO --- ckanext/falkor/plugin.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index cd83f57..ced5b01 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -21,6 +21,7 @@ def get_config_value(config, key: str) -> str: def get_user_id() -> str: + # TODO: Make this work outside of application context user = toolkit.g.userobj return "guest" if not user else user.id From 731934d55fa71e2bdb58a1d44b8b168721537d4f Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 15 Oct 2024 14:50:53 +0100 Subject: [PATCH 042/156] Add todo and check resource state before logging event --- ckanext/falkor/plugin.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index ced5b01..78f0ce1 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -89,6 +89,9 @@ def notify(self, entity, operation=None): "ignore_auth": True, "defer_commit": True } + + # TODO: Figure out a way to filter out package create events that are + # a result of a new resource if isinstance(entity, ckan_model.Package): package = table_dictize(entity, context) @@ -104,6 +107,13 @@ def notify(self, entity, operation=None): elif isinstance(entity, ckan_model.Resource): resource = table_dictize(entity, context) + + # We do not want to create documents on Falkor that are still + # in draft on CKAN. + if resource["state"] != "active": + return + + log.info(resource) # self.handle_resource_modification_event(resource, operation) def handle_resource_modification_event( From 437f6c49e7c24927fab73e46c8b12116c75dbb16 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 15 Oct 2024 15:28:41 +0100 Subject: [PATCH 043/156] Capture resource events --- ckanext/falkor/event_handler.py | 10 +++++++--- ckanext/falkor/plugin.py | 28 +++++++++++++++++++--------- 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index ae6b157..38a004b 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -40,7 +40,8 @@ def handle_resource_create(self, resource: dict, user_id: str): object_id=UUID(resource["id"]), object_type=FalkorEventObjectType.RESOURCE, event_type=FalkorEventType.CREATE, - created_at=resource["created_at"] + user_id=user_id, + created_at=resource["created"] ) def handle_resource_read( @@ -55,6 +56,7 @@ def handle_resource_read( object_id=resource_id, object_type=FalkorEventObjectType.RESOURCE, event_type=FalkorEventType.READ, + user_id=user_id, created_at=created_at ) @@ -64,7 +66,8 @@ def handle_resource_update(self, resource: dict, user_id: str): object_id=UUID(resource["id"]), object_type=FalkorEventObjectType.RESOURCE, event_type=FalkorEventType.UPDATE, - created_at=resource["created_at"] + user_id=user_id, + created_at=resource["created"] ) def handle_resource_delete(self, resource: dict, user_id: str): @@ -73,7 +76,8 @@ def handle_resource_delete(self, resource: dict, user_id: str): object_id=UUID(resource["id"]), object_type=FalkorEventObjectType.RESOURCE, event_type=FalkorEventType.DELETE, - created_at=resource["created_at"] + user_id=user_id, + created_at=resource["created"] ) def __insert_pending_event( diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 78f0ce1..ea00089 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -90,11 +90,17 @@ def notify(self, entity, operation=None): "defer_commit": True } - # TODO: Figure out a way to filter out package create events that are - # a result of a new resource + user_id = get_user_id() + if isinstance(entity, ckan_model.Package): package = table_dictize(entity, context) + # Currently Falkor does not track changes to packages. + # We only use the create event to create the dataset and ignore + # any further events. + if operation != DomainObjectOperation.new: + return + # We do not want to create datasets on Falkor that are still # in draft on CKAN. if package["state"] != "active": @@ -102,7 +108,7 @@ def notify(self, entity, operation=None): self.event_handler.handle_package_create( package=package, - user_id=get_user_id() + user_id=user_id ) elif isinstance(entity, ckan_model.Resource): @@ -113,20 +119,24 @@ def notify(self, entity, operation=None): if resource["state"] != "active": return - log.info(resource) - # self.handle_resource_modification_event(resource, operation) + self.handle_resource_modification_event( + resource=resource, + operation=operation, + user_id=user_id + ) def handle_resource_modification_event( self, resource: dict, - operation: DomainObjectOperation + operation: DomainObjectOperation, + user_id: str, ): if operation == DomainObjectOperation.new: - self.event_handler.handle_resource_create() + self.event_handler.handle_resource_create(resource, user_id) elif operation == DomainObjectOperation.changed: - self.event_handler.handle_resource_update() + self.event_handler.handle_resource_update(resource, user_id) elif operation == DomainObjectOperation.deleted: - self.event_handler.handle_resource_delete() + self.event_handler.handle_resource_delete(resource, user_id) def construct_falkor_url(self, resource): resource_id = resource["id"] From b2e29bf8e319acf5025eab2a6123e4744e132615 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Thu, 17 Oct 2024 14:59:59 +0100 Subject: [PATCH 044/156] Add seq to read events --- ckanext/falkor/event_handler.py | 47 +++++---------- .../versions/ebe0938c411a_object_seq.py | 53 ++++++++++++++++ ckanext/falkor/model.py | 60 ++++++++++++++++++- ckanext/falkor/plugin.py | 10 ++-- 4 files changed, 132 insertions(+), 38 deletions(-) create mode 100644 ckanext/falkor/migration/falkor/versions/ebe0938c411a_object_seq.py diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index 38a004b..f440513 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -1,12 +1,17 @@ +from ckan.model import meta +from ckanext.falkor.client import Client import logging import sqlalchemy as sa from datetime import datetime from uuid import UUID, uuid4 -from ckanext.falkor.model import FalkorEventObjectType, FalkorEventType, new_falkor_event -from ckanext.falkor.client import Client -from ckan.model import meta +from ckanext.falkor.model import ( + FalkorEventType, + FalkorEventObjectType, + insert_pending_event, + get_sequence_number, +) log = logging.getLogger(__name__) @@ -24,7 +29,6 @@ def __init__(self, falkor: Client): self.engine = meta.engine def handle_package_create(self, package: dict, user_id: str): - log.info(package) self.__insert_pending_event( event_id=generate_event_id(), object_id=UUID(package["id"]), @@ -51,15 +55,21 @@ def handle_resource_read( user_id: str, created_at: datetime = datetime.now() ): - self.__insert_pending_event( + session = sa.orm.Session(bind=self.engine) + sequence = get_sequence_number(session, resource_id) + insert_pending_event( + session=session, event_id=generate_event_id(), object_id=resource_id, object_type=FalkorEventObjectType.RESOURCE, event_type=FalkorEventType.READ, user_id=user_id, + sequence=sequence, created_at=created_at ) + session.commit() + def handle_resource_update(self, resource: dict, user_id: str): self.__insert_pending_event( event_id=generate_event_id(), @@ -79,30 +89,3 @@ def handle_resource_delete(self, resource: dict, user_id: str): user_id=user_id, created_at=resource["created"] ) - - def __insert_pending_event( - self, - event_id: UUID, - object_id: UUID, - object_type: FalkorEventObjectType, - event_type: FalkorEventType, - user_id: str, - created_at: datetime, - ): - session = sa.orm.Session(bind=self.engine) - try: - event = new_falkor_event( - id=event_id, - object_id=object_id, - object_type=object_type, - event_type=event_type, - user_id=user_id, - created_at=created_at, - ) - session.add(event) - session.commit() - except Exception as e: - logging.critical(e, exc_info=True) - session.rollback() - finally: - session.close() diff --git a/ckanext/falkor/migration/falkor/versions/ebe0938c411a_object_seq.py b/ckanext/falkor/migration/falkor/versions/ebe0938c411a_object_seq.py new file mode 100644 index 0000000..e8dba88 --- /dev/null +++ b/ckanext/falkor/migration/falkor/versions/ebe0938c411a_object_seq.py @@ -0,0 +1,53 @@ +"""object_seq + +Revision ID: ebe0938c411a +Revises: 376615bb5319 +Create Date: 2024-10-15 14:30:36.471133 + +""" +import sqlalchemy as sa + +from alembic import op +from ckan.model import meta + + +# revision identifiers, used by Alembic. +revision = 'ebe0938c411a' +down_revision = '376615bb5319' +branch_labels = None +depends_on = None + + +def upgrade(): + op.create_table( + "falkor_object_event_sequence", + meta.MetaData(), + sa.Column( + "id", + sa.dialects.postgresql.UUID(as_uuid=True), + primary_key=True, + nullable=False, + ), + sa.Column("sequence", sa.INTEGER, nullable=False) + ) + + op.create_foreign_key( + "fk_falkor_event_object_id_falkor_object_event_sequence", + "falkor_event", + "falkor_object_event_sequence", + ["object_id"], + ["id"], + ) + + op.add_column("falkor_event", sa.Column( + "sequence", sa.INTEGER, nullable=False)) + + +def downgrade(): + op.drop_constraint( + "fk_falkor_event_object_id_falkor_object_event_sequence", + "falkor_event", + type_="foreignkey" + ) + op.drop_table("falkor_object_event_sequence") + op.drop_column("falkor_event", "sequence") diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 28735de..3220f89 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -1,13 +1,17 @@ +import logging import sqlalchemy as sa import ckan.model as model from enum import Enum from uuid import UUID, uuid4 -from typing import Optional +from datetime import datetime +from typing import Optional, Union from sqlalchemy.ext.declarative import declarative_base Base = declarative_base(metadata=model.meta.metadata) +log = logging.getLogger(__name__) + class FalkorEventObjectType(Enum): PACKAGE = 'package' @@ -45,6 +49,7 @@ class FalkorEvent(Base): sa.Enum(FalkorEventStatus), default=FalkorEventStatus.PENDING ) + sequence = sa.Column(sa.INTEGER, nullable=False) created_at = sa.Column(sa.DateTime, nullable=False) synced_at = sa.Column(sa.DateTime, nullable=True) @@ -55,6 +60,7 @@ def new_falkor_event( object_type: FalkorEventObjectType, event_type: FalkorEventType, user_id: str, + sequence: sa.INTEGER, created_at: sa.DateTime, status: FalkorEventStatus = FalkorEventStatus.PENDING, synced_at: Optional[sa.DateTime] = None @@ -66,6 +72,58 @@ def new_falkor_event( event_type=event_type, user_id=user_id, status=status, + sequence=sequence, created_at=created_at, synced_at=synced_at ) + + +def insert_pending_event( + session: sa.orm.Session, + event_id: UUID, + object_id: UUID, + object_type: FalkorEventObjectType, + event_type: FalkorEventType, + user_id: str, + sequence: int, + created_at: datetime, +): + session.add( + new_falkor_event( + id=event_id, + object_id=object_id, + object_type=object_type, + event_type=event_type, + user_id=user_id, + sequence=sequence, + created_at=created_at, + ) + ) + + +class FalkorObjectEventSequence(Base): + __tablename__ = "falkor_object_event_sequence" + + id = sa.Column( + sa.dialects.postgresql.UUID(as_uuid=True), + primary_key=True, + nullable=False, + ) + sequence = sa.Column(sa.INTEGER, nullable=False) + + +def new_falkor_object_event_sequence(object_id: UUID, sequence: sa.Integer = 0) -> FalkorObjectEventSequence: + return FalkorObjectEventSequence(id=object_id, sequence=sequence) + + +def get_sequence_number(session: sa.orm.Session, object_id: UUID): + object_event_sequence: Union[FalkorObjectEventSequence, None] = session.query( + FalkorObjectEventSequence).get(object_id) + + if object_event_sequence is None: + object_event_sequence = new_falkor_object_event_sequence( + object_id=object_id) + session.add(object_event_sequence) + + object_event_sequence.sequence += 1 + return object_event_sequence.sequence diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index ea00089..87c8777 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -76,11 +76,11 @@ def configure(self, config): # IResourceController def before_show(self, resource_dict): - # self.event_handler.handle_resource_read( - # resource_id=resource_dict["id"], - # package_id=resource_dict["package_id"], - # user_id=get_user_id() - # ) + self.event_handler.handle_resource_read( + resource_id=resource_dict["id"], + package_id=resource_dict["package_id"], + user_id=get_user_id() + ) self.get_helpers() def notify(self, entity, operation=None): From a0af9ef62c8bb9c224834b0912b2b00aa54c1336 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Thu, 17 Oct 2024 15:07:51 +0100 Subject: [PATCH 045/156] Add some TODOS and clean up comments --- ckanext/falkor/model.py | 1 + ckanext/falkor/plugin.py | 31 +------------------------------ 2 files changed, 2 insertions(+), 30 deletions(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 3220f89..bf25f20 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -120,6 +120,7 @@ def get_sequence_number(session: sa.orm.Session, object_id: UUID): object_event_sequence: Union[FalkorObjectEventSequence, None] = session.query( FalkorObjectEventSequence).get(object_id) + # TODO: Seq should always be 1 for CREATE events if object_event_sequence is None: object_event_sequence = new_falkor_object_event_sequence( object_id=object_id) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 87c8777..43cecab 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -36,7 +36,6 @@ class FalkorPlugin(plugins.SingletonPlugin): plugins.implements(plugins.ITemplateHelpers) plugins.implements(plugins.IDomainObjectModification, inherit=True) plugins.implements(plugins.IResourceController, inherit=True) - # plugins.implements(plugins.IActions) # IConfigurer def update_config(self, config): @@ -44,6 +43,7 @@ def update_config(self, config): toolkit.add_public_directory(config, "public") def configure(self, config): + # TODO: Check if plugins has been initialised before tracking events self.config = config endpoint = get_config_value(config, "ckanext.falkor.auth.endpoint") client_id = get_config_value(config, "ckanext.falkor.auth.client_id") @@ -161,32 +161,3 @@ def construct_falkor_url(self, resource): def get_helpers(self): return {"construct_falkor_url": self.construct_falkor_url} - - # def handle_resource_read(self, resource_id: str, package_id: str): - # session = sa.orm.Session(bind=self.engine) - # try: - # event = new_falkor_event( - # id=uuid.uuid4(), - # object_id=resource_id, - # object_type="resource", - # status="pending", - # created_at=datetime.datetime.now(), - # synced_at=None, - # ) - # session.add(event) - # session.commit() - # - # self.falkor.document_read( - # package_id=resource_id, - # resource_id=package_id - # ) - # log.info(session.query(FalkorEvent).all()) - # except: - # session.rollback() - # finally: - # session.close() - - -# @toolkit.side_effect_free -# def hello_world(context, data_dict: Optional[dict] = None) -> str: -# return {"message": f"Hello, {data_dict['name'] if 'name' in data_dict else 'World'}!"} From af370fd4189a9ea098b9b64c3f5148ef2ce747ab Mon Sep 17 00:00:00 2001 From: wajones98 Date: Mon, 21 Oct 2024 11:37:25 +0100 Subject: [PATCH 046/156] Remove event sequence --- ckanext/falkor/event_handler.py | 3 -- .../falkor/versions/6b860291073e_config.py | 24 +++++++++ .../versions/ebe0938c411a_object_seq.py | 53 ------------------- ckanext/falkor/model.py | 35 +----------- 4 files changed, 25 insertions(+), 90 deletions(-) create mode 100644 ckanext/falkor/migration/falkor/versions/6b860291073e_config.py delete mode 100644 ckanext/falkor/migration/falkor/versions/ebe0938c411a_object_seq.py diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index f440513..5de7050 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -10,7 +10,6 @@ FalkorEventType, FalkorEventObjectType, insert_pending_event, - get_sequence_number, ) log = logging.getLogger(__name__) @@ -56,7 +55,6 @@ def handle_resource_read( created_at: datetime = datetime.now() ): session = sa.orm.Session(bind=self.engine) - sequence = get_sequence_number(session, resource_id) insert_pending_event( session=session, event_id=generate_event_id(), @@ -64,7 +62,6 @@ def handle_resource_read( object_type=FalkorEventObjectType.RESOURCE, event_type=FalkorEventType.READ, user_id=user_id, - sequence=sequence, created_at=created_at ) diff --git a/ckanext/falkor/migration/falkor/versions/6b860291073e_config.py b/ckanext/falkor/migration/falkor/versions/6b860291073e_config.py new file mode 100644 index 0000000..91bc0f8 --- /dev/null +++ b/ckanext/falkor/migration/falkor/versions/6b860291073e_config.py @@ -0,0 +1,24 @@ +"""config + +Revision ID: 6b860291073e +Revises: 376615bb5319 +Create Date: 2024-10-21 10:36:57.994360 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '6b860291073e' +down_revision = '376615bb5319' +branch_labels = None +depends_on = None + + +def upgrade(): + pass + + +def downgrade(): + pass diff --git a/ckanext/falkor/migration/falkor/versions/ebe0938c411a_object_seq.py b/ckanext/falkor/migration/falkor/versions/ebe0938c411a_object_seq.py deleted file mode 100644 index e8dba88..0000000 --- a/ckanext/falkor/migration/falkor/versions/ebe0938c411a_object_seq.py +++ /dev/null @@ -1,53 +0,0 @@ -"""object_seq - -Revision ID: ebe0938c411a -Revises: 376615bb5319 -Create Date: 2024-10-15 14:30:36.471133 - -""" -import sqlalchemy as sa - -from alembic import op -from ckan.model import meta - - -# revision identifiers, used by Alembic. -revision = 'ebe0938c411a' -down_revision = '376615bb5319' -branch_labels = None -depends_on = None - - -def upgrade(): - op.create_table( - "falkor_object_event_sequence", - meta.MetaData(), - sa.Column( - "id", - sa.dialects.postgresql.UUID(as_uuid=True), - primary_key=True, - nullable=False, - ), - sa.Column("sequence", sa.INTEGER, nullable=False) - ) - - op.create_foreign_key( - "fk_falkor_event_object_id_falkor_object_event_sequence", - "falkor_event", - "falkor_object_event_sequence", - ["object_id"], - ["id"], - ) - - op.add_column("falkor_event", sa.Column( - "sequence", sa.INTEGER, nullable=False)) - - -def downgrade(): - op.drop_constraint( - "fk_falkor_event_object_id_falkor_object_event_sequence", - "falkor_event", - type_="foreignkey" - ) - op.drop_table("falkor_object_event_sequence") - op.drop_column("falkor_event", "sequence") diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index bf25f20..c22e942 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -5,7 +5,7 @@ from enum import Enum from uuid import UUID, uuid4 from datetime import datetime -from typing import Optional, Union +from typing import Optional from sqlalchemy.ext.declarative import declarative_base Base = declarative_base(metadata=model.meta.metadata) @@ -60,7 +60,6 @@ def new_falkor_event( object_type: FalkorEventObjectType, event_type: FalkorEventType, user_id: str, - sequence: sa.INTEGER, created_at: sa.DateTime, status: FalkorEventStatus = FalkorEventStatus.PENDING, synced_at: Optional[sa.DateTime] = None @@ -72,7 +71,6 @@ def new_falkor_event( event_type=event_type, user_id=user_id, status=status, - sequence=sequence, created_at=created_at, synced_at=synced_at ) @@ -85,7 +83,6 @@ def insert_pending_event( object_type: FalkorEventObjectType, event_type: FalkorEventType, user_id: str, - sequence: int, created_at: datetime, ): session.add( @@ -95,36 +92,6 @@ def insert_pending_event( object_type=object_type, event_type=event_type, user_id=user_id, - sequence=sequence, created_at=created_at, ) ) - - -class FalkorObjectEventSequence(Base): - __tablename__ = "falkor_object_event_sequence" - - id = sa.Column( - sa.dialects.postgresql.UUID(as_uuid=True), - primary_key=True, - nullable=False, - ) - sequence = sa.Column(sa.INTEGER, nullable=False) - - -def new_falkor_object_event_sequence(object_id: UUID, sequence: sa.Integer = 0) -> FalkorObjectEventSequence: - return FalkorObjectEventSequence(id=object_id, sequence=sequence) - - -def get_sequence_number(session: sa.orm.Session, object_id: UUID): - object_event_sequence: Union[FalkorObjectEventSequence, None] = session.query( - FalkorObjectEventSequence).get(object_id) - - # TODO: Seq should always be 1 for CREATE events - if object_event_sequence is None: - object_event_sequence = new_falkor_object_event_sequence( - object_id=object_id) - session.add(object_event_sequence) - - object_event_sequence.sequence += 1 - return object_event_sequence.sequence From 691f9cd9f671f1dce96bb47fad674f1aa1c9cc3a Mon Sep 17 00:00:00 2001 From: wajones98 Date: Mon, 21 Oct 2024 13:30:23 +0100 Subject: [PATCH 047/156] config migration --- .../falkor/versions/6b860291073e_config.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/ckanext/falkor/migration/falkor/versions/6b860291073e_config.py b/ckanext/falkor/migration/falkor/versions/6b860291073e_config.py index 91bc0f8..e7c753a 100644 --- a/ckanext/falkor/migration/falkor/versions/6b860291073e_config.py +++ b/ckanext/falkor/migration/falkor/versions/6b860291073e_config.py @@ -6,6 +6,7 @@ """ from alembic import op +from ckan.model import meta import sqlalchemy as sa @@ -16,9 +17,20 @@ depends_on = None +TABLE_NAME = "falkor_config" + + def upgrade(): - pass + op.create_table( + TABLE_NAME, + meta.MetaData(), + sa.Column("initialised", sa.Boolean, nullable=False), + ) + op.execute(""" + INSERT INTO falkor_config(initialised) + VALUES (false) + """) def downgrade(): - pass + op.drop_table(TABLE_NAME) From 213dbcf97cb8217a443634cfabb4f1e540acbb28 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Mon, 21 Oct 2024 15:27:43 +0100 Subject: [PATCH 048/156] Remove sequence from event model --- ckanext/falkor/model.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index c22e942..7b004c7 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -49,7 +49,6 @@ class FalkorEvent(Base): sa.Enum(FalkorEventStatus), default=FalkorEventStatus.PENDING ) - sequence = sa.Column(sa.INTEGER, nullable=False) created_at = sa.Column(sa.DateTime, nullable=False) synced_at = sa.Column(sa.DateTime, nullable=True) From 69fd68a4fe8c608a397cc0c6425ce14fa0f7229d Mon Sep 17 00:00:00 2001 From: wajones98 Date: Mon, 21 Oct 2024 16:30:20 +0100 Subject: [PATCH 049/156] Move event processing back to worker --- ckanext/falkor/event_handler.py | 95 ++++++++++++++++++++++++--------- ckanext/falkor/plugin.py | 80 ++++++++------------------- 2 files changed, 93 insertions(+), 82 deletions(-) diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index 5de7050..66b1e15 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -1,17 +1,19 @@ - -from ckan.model import meta -from ckanext.falkor.client import Client import logging import sqlalchemy as sa from datetime import datetime from uuid import UUID, uuid4 +from typing import Union from ckanext.falkor.model import ( FalkorEventType, FalkorEventObjectType, insert_pending_event, ) +from ckan.model import meta, Package, Resource, State +from ckanext.falkor.client import Client +from ckan.model.domain_object import DomainObjectOperation + log = logging.getLogger(__name__) @@ -21,31 +23,35 @@ def generate_event_id() -> UUID: class EventHandler: falkor: Client - engine: sa.engine.Engine def __init__(self, falkor: Client): self.falkor = falkor - self.engine = meta.engine - def handle_package_create(self, package: dict, user_id: str): - self.__insert_pending_event( + def handle_package_create(self, package: Package, user_id: str): + session = meta.create_local_session() + insert_pending_event( + session, event_id=generate_event_id(), - object_id=UUID(package["id"]), + object_id=UUID(package.id), object_type=FalkorEventObjectType.PACKAGE, event_type=FalkorEventType.CREATE, user_id=user_id, - created_at=package["metadata_created"] + created_at=package.metadata_created ) + session.commit() - def handle_resource_create(self, resource: dict, user_id: str): - self.__insert_pending_event( + def handle_resource_create(self, resource: Resource, user_id: str): + session = meta.create_local_session() + insert_pending_event( + session, event_id=generate_event_id(), - object_id=UUID(resource["id"]), + object_id=UUID(resource.id), object_type=FalkorEventObjectType.RESOURCE, event_type=FalkorEventType.CREATE, user_id=user_id, - created_at=resource["created"] + created_at=resource.created ) + session.commit() def handle_resource_read( self, @@ -54,9 +60,9 @@ def handle_resource_read( user_id: str, created_at: datetime = datetime.now() ): - session = sa.orm.Session(bind=self.engine) + session = meta.create_local_session() insert_pending_event( - session=session, + session, event_id=generate_event_id(), object_id=resource_id, object_type=FalkorEventObjectType.RESOURCE, @@ -64,25 +70,66 @@ def handle_resource_read( user_id=user_id, created_at=created_at ) - session.commit() - def handle_resource_update(self, resource: dict, user_id: str): - self.__insert_pending_event( + def handle_resource_update(self, resource: Resource, user_id: str): + session = meta.create_local_session() + insert_pending_event( + session, event_id=generate_event_id(), - object_id=UUID(resource["id"]), + object_id=UUID(resource.id), object_type=FalkorEventObjectType.RESOURCE, event_type=FalkorEventType.UPDATE, user_id=user_id, - created_at=resource["created"] + created_at=resource.created ) + session.commit() - def handle_resource_delete(self, resource: dict, user_id: str): - self.__insert_pending_event( + def handle_resource_delete(self, resource: Resource, user_id: str): + session = meta.create_local_session() + insert_pending_event( + session, event_id=generate_event_id(), - object_id=UUID(resource["id"]), + object_id=UUID(resource.id), object_type=FalkorEventObjectType.RESOURCE, event_type=FalkorEventType.DELETE, user_id=user_id, - created_at=resource["created"] + created_at=resource.created ) + session.commit() + + +def handle_read_event( + handler: EventHandler, + resource: dict, + user_id: str, +): + handler.handle_resource_read( + resource["id"], + resource["package_id"], + user_id + ) + + +def handle_modification_event( + handler: EventHandler, + entity: Union[Package, Resource], + operation: DomainObjectOperation, + user_id: str, +): + if isinstance(entity, Package): + # Currently Falkor does not track changes to packages. + # We only use the create event to create the dataset + # and ignore any further changes. + if operation != DomainObjectOperation.new: + return + + handler.handle_package_create(entity, user_id) + + elif isinstance(entity, Resource): + if operation == DomainObjectOperation.new: + handler.handle_resource_create(entity, user_id) + elif operation == DomainObjectOperation.changed: + handler.handle_resource_update(entity, user_id) + elif operation == DomainObjectOperation.deleted: + handler.handle_resource_delete(entity, user_id) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 43cecab..f16b713 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -8,7 +8,7 @@ from ckan.lib.dictization import table_dictize from ckanext.falkor import client, auth, event_handler from ckan.model.domain_object import DomainObjectOperation - +from ckan.lib import jobs log = logging.getLogger(__name__) @@ -21,7 +21,6 @@ def get_config_value(config, key: str) -> str: def get_user_id() -> str: - # TODO: Make this work outside of application context user = toolkit.g.userobj return "guest" if not user else user.id @@ -76,67 +75,32 @@ def configure(self, config): # IResourceController def before_show(self, resource_dict): - self.event_handler.handle_resource_read( - resource_id=resource_dict["id"], - package_id=resource_dict["package_id"], - user_id=get_user_id() + jobs.enqueue( + event_handler.handle_read_event, + [ + self.event_handler, + resource_dict, + get_user_id() + ] ) + self.get_helpers() - def notify(self, entity, operation=None): - context = { - "model": ckan_model, - "ignore_auth": True, - "defer_commit": True - } - - user_id = get_user_id() - - if isinstance(entity, ckan_model.Package): - package = table_dictize(entity, context) - - # Currently Falkor does not track changes to packages. - # We only use the create event to create the dataset and ignore - # any further events. - if operation != DomainObjectOperation.new: - return - - # We do not want to create datasets on Falkor that are still - # in draft on CKAN. - if package["state"] != "active": - return - - self.event_handler.handle_package_create( - package=package, - user_id=user_id - ) - - elif isinstance(entity, ckan_model.Resource): - resource = table_dictize(entity, context) - - # We do not want to create documents on Falkor that are still - # in draft on CKAN. - if resource["state"] != "active": - return - - self.handle_resource_modification_event( - resource=resource, - operation=operation, - user_id=user_id - ) - - def handle_resource_modification_event( + def notify( self, - resource: dict, - operation: DomainObjectOperation, - user_id: str, + entity, + operation=None ): - if operation == DomainObjectOperation.new: - self.event_handler.handle_resource_create(resource, user_id) - elif operation == DomainObjectOperation.changed: - self.event_handler.handle_resource_update(resource, user_id) - elif operation == DomainObjectOperation.deleted: - self.event_handler.handle_resource_delete(resource, user_id) + if operation is None: + return + + jobs.enqueue( + event_handler.handle_modification_event, + args=[ + self.event_handler, + entity, operation, get_user_id() + ] + ) def construct_falkor_url(self, resource): resource_id = resource["id"] From 5aee3506252c95d00684269ae7a9896c6ba2548c Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 22 Oct 2024 11:07:16 +0100 Subject: [PATCH 050/156] Check plugin is initialised before tracking events --- ckanext/falkor/event_handler.py | 2 +- .../falkor/versions/6b860291073e_config.py | 2 +- ckanext/falkor/model.py | 22 +++++++++++++++-- ckanext/falkor/plugin.py | 24 +++++++++++++++---- 4 files changed, 42 insertions(+), 8 deletions(-) diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index 66b1e15..25b3986 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -10,7 +10,7 @@ insert_pending_event, ) -from ckan.model import meta, Package, Resource, State +from ckan.model import meta, Package, Resource from ckanext.falkor.client import Client from ckan.model.domain_object import DomainObjectOperation diff --git a/ckanext/falkor/migration/falkor/versions/6b860291073e_config.py b/ckanext/falkor/migration/falkor/versions/6b860291073e_config.py index e7c753a..d71ec21 100644 --- a/ckanext/falkor/migration/falkor/versions/6b860291073e_config.py +++ b/ckanext/falkor/migration/falkor/versions/6b860291073e_config.py @@ -24,7 +24,7 @@ def upgrade(): op.create_table( TABLE_NAME, meta.MetaData(), - sa.Column("initialised", sa.Boolean, nullable=False), + sa.Column("initialised", sa.Boolean, nullable=False, primary_key=True), ) op.execute(""" INSERT INTO falkor_config(initialised) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 7b004c7..a4b3337 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -1,14 +1,14 @@ import logging import sqlalchemy as sa -import ckan.model as model from enum import Enum from uuid import UUID, uuid4 from datetime import datetime from typing import Optional from sqlalchemy.ext.declarative import declarative_base +from ckan.model import meta -Base = declarative_base(metadata=model.meta.metadata) +Base = declarative_base(metadata=meta.metadata) log = logging.getLogger(__name__) @@ -94,3 +94,21 @@ def insert_pending_event( created_at=created_at, ) ) + + +class FalkorConfig(Base): + __tablename__ = "falkor_config" + + initialised = sa.Column(sa.Boolean, nullable=False, primary_key=True) + + +def get_falkor_config(session: sa.orm.Session) -> FalkorConfig: + return session.query(FalkorConfig).first() + + +def validate_falkor_config(session: sa.orm.Session): + row_count = session.query(FalkorConfig).count() + if row_count != 1: + raise Exception( + f"falkor_config should have exactly 1 row. Has {row_count}" + ) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index f16b713..c85a076 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -3,11 +3,8 @@ import sqlalchemy as sa import ckan.plugins as plugins import ckan.plugins.toolkit as toolkit -import ckan.model as ckan_model -from ckan.lib.dictization import table_dictize -from ckanext.falkor import client, auth, event_handler -from ckan.model.domain_object import DomainObjectOperation +from ckanext.falkor import client, auth, event_handler, model from ckan.lib import jobs log = logging.getLogger(__name__) @@ -29,6 +26,7 @@ class FalkorPlugin(plugins.SingletonPlugin): falkor: client.Client engine: sa.engine.Engine event_handler: event_handler.EventHandler + __initialised: bool plugins.implements(plugins.IConfigurer) plugins.implements(plugins.IConfigurable, inherit=True) @@ -72,9 +70,24 @@ def configure(self, config): ) self.event_handler = event_handler.EventHandler(self.falkor) + self.__initialised = model.get_falkor_config( + model.meta.Session + ).initialised + + @property + def initialised(self): + if not self.__initialised: + # TODO: Can this be retrieved from redis? + self.__initialised = model.get_falkor_config( + model.meta.Session + ).initialised + return self.__initialised # IResourceController def before_show(self, resource_dict): + if not self.initialised: + return + jobs.enqueue( event_handler.handle_read_event, [ @@ -91,6 +104,9 @@ def notify( entity, operation=None ): + if not self.initialised: + return + if operation is None: return From 26380b2a7ac1205e4a0051ab28161da724d9e315 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 22 Oct 2024 11:08:54 +0100 Subject: [PATCH 051/156] Validate config on plugin startup --- ckanext/falkor/plugin.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index c85a076..d31b01b 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -70,8 +70,10 @@ def configure(self, config): ) self.event_handler = event_handler.EventHandler(self.falkor) + session = model.meta.Session + model.validate_falkor_config(session) self.__initialised = model.get_falkor_config( - model.meta.Session + session ).initialised @property From 2cf4d47ccec28fbd567fb8bed2b90e2ae4b6e0c8 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 22 Oct 2024 12:05:22 +0100 Subject: [PATCH 052/156] initialise plugin function --- ckanext/falkor/client.py | 2 -- ckanext/falkor/event_handler.py | 3 +-- ckanext/falkor/model.py | 5 +++++ ckanext/falkor/plugin.py | 21 ++++++++++++++++++++- 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/ckanext/falkor/client.py b/ckanext/falkor/client.py index 6e82391..012c08a 100644 --- a/ckanext/falkor/client.py +++ b/ckanext/falkor/client.py @@ -1,8 +1,6 @@ import requests import logging import json -import ckan.lib.jobs as jobs -import ckan.plugins.toolkit as toolkit import ckan.model as model from typing import TypedDict diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index 25b3986..98ec452 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -1,8 +1,7 @@ import logging -import sqlalchemy as sa -from datetime import datetime from uuid import UUID, uuid4 +from datetime import datetime from typing import Union from ckanext.falkor.model import ( FalkorEventType, diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index a4b3337..bbfa09d 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -112,3 +112,8 @@ def validate_falkor_config(session: sa.orm.Session): raise Exception( f"falkor_config should have exactly 1 row. Has {row_count}" ) + + +def initialise_plugin(session: sa.orm.Session): + config = get_falkor_config(session) + config.initialised = True diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index d31b01b..de108a3 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -3,6 +3,7 @@ import sqlalchemy as sa import ckan.plugins as plugins import ckan.plugins.toolkit as toolkit +import ckan.model as ckan_model from ckanext.falkor import client, auth, event_handler, model from ckan.lib import jobs @@ -73,7 +74,7 @@ def configure(self, config): session = model.meta.Session model.validate_falkor_config(session) self.__initialised = model.get_falkor_config( - session + session ).initialised @property @@ -85,7 +86,25 @@ def initialised(self): ).initialised return self.__initialised + def initialise_plugin(self): + if self.initialised: + log.warning("Plugin already initialised") + return + session: sa.orm.Session = model.meta.create_local_session() + + packages = session.query(ckan_model.Package).all() + for package in packages: + self.event_handler.handle_package_create(package, "plugin") + + resources = session.query(ckan_model.Resource).all() + for resource in resources: + self.event_handler.handle_resource_create(resource, "plugin") + + model.initialise_plugin(session) + session.commit() + # IResourceController + def before_show(self, resource_dict): if not self.initialised: return From 0662a0f872b91efd0340c8a067649c55dc22fd86 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 22 Oct 2024 15:45:06 +0100 Subject: [PATCH 053/156] Remove falkor config table and introduce sync job --- ckanext/falkor/model.py | 63 +++++++++++++++++++++++++--------------- ckanext/falkor/plugin.py | 49 ++++++++++--------------------- 2 files changed, 55 insertions(+), 57 deletions(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index bbfa09d..d197d99 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -4,9 +4,9 @@ from enum import Enum from uuid import UUID, uuid4 from datetime import datetime -from typing import Optional +from typing import Optional, List from sqlalchemy.ext.declarative import declarative_base -from ckan.model import meta +from ckan.model import meta, Package, Resource Base = declarative_base(metadata=meta.metadata) @@ -96,24 +96,41 @@ def insert_pending_event( ) -class FalkorConfig(Base): - __tablename__ = "falkor_config" - - initialised = sa.Column(sa.Boolean, nullable=False, primary_key=True) - - -def get_falkor_config(session: sa.orm.Session) -> FalkorConfig: - return session.query(FalkorConfig).first() - - -def validate_falkor_config(session: sa.orm.Session): - row_count = session.query(FalkorConfig).count() - if row_count != 1: - raise Exception( - f"falkor_config should have exactly 1 row. Has {row_count}" - ) - - -def initialise_plugin(session: sa.orm.Session): - config = get_falkor_config(session) - config.initialised = True +def get_packages_without_create_events(session: sa.orm.Session) -> List[Package]: + distinct_package_creates = session.query( + FalkorEvent + ).filter( + FalkorEvent.object_type == FalkorEventObjectType.PACKAGE + ).filter( + FalkorEvent.event_type == FalkorEventType.CREATE + ).subquery() + + return session.query( + Package + ).outerjoin( + distinct_package_creates, + Package.id == sa.cast( + distinct_package_creates.c.object_id, sa.TEXT) + ).filter( + sa.cast(distinct_package_creates.c.object_id, sa.TEXT) == None + ).all() + + +def get_resources_without_create_events(session: sa.orm.Session) -> List[Resource]: + distinct_resource_creates = session.query( + FalkorEvent + ).filter( + FalkorEvent.object_type == FalkorEventObjectType.RESOURCE + ).filter( + FalkorEvent.event_type == FalkorEventType.CREATE + ).subquery() + + return session.query( + Resource + ).outerjoin( + distinct_resource_creates, + Resource.id == sa.cast( + distinct_resource_creates.c.object_id, sa.TEXT) + ).filter( + sa.cast(distinct_resource_creates.c.object_id, sa.TEXT) == None + ).all() diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index de108a3..409277f 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -71,44 +71,28 @@ def configure(self, config): ) self.event_handler = event_handler.EventHandler(self.falkor) - session = model.meta.Session - model.validate_falkor_config(session) - self.__initialised = model.get_falkor_config( - session - ).initialised - - @property - def initialised(self): - if not self.__initialised: - # TODO: Can this be retrieved from redis? - self.__initialised = model.get_falkor_config( - model.meta.Session - ).initialised - return self.__initialised - - def initialise_plugin(self): - if self.initialised: - log.warning("Plugin already initialised") - return - session: sa.orm.Session = model.meta.create_local_session() + self.sync() + + def sync(self): + try: + session: sa.orm.Session = ckan_model.meta.create_local_session() - packages = session.query(ckan_model.Package).all() - for package in packages: - self.event_handler.handle_package_create(package, "plugin") + packages = model.get_packages_without_create_events(session) + for package in packages: + self.event_handler.handle_package_create(package, "sync_job") - resources = session.query(ckan_model.Resource).all() - for resource in resources: - self.event_handler.handle_resource_create(resource, "plugin") + resources = model.get_resources_without_create_events(session) + for resource in resources: + self.event_handler.handle_resource_create(resource, "sync_job") - model.initialise_plugin(session) - session.commit() + session.commit() + + except Exception as e: + log.exception(e) # IResourceController def before_show(self, resource_dict): - if not self.initialised: - return - jobs.enqueue( event_handler.handle_read_event, [ @@ -125,9 +109,6 @@ def notify( entity, operation=None ): - if not self.initialised: - return - if operation is None: return From 83307ceab11f7bb9fdc52f8b8465e13a35c96db8 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 22 Oct 2024 15:53:38 +0100 Subject: [PATCH 054/156] Add migration for sync job history --- .../falkor/versions/6b860291073e_config.py | 36 ---------- .../versions/a3c1de54a0d9_falkor_sync_job.py | 68 +++++++++++++++++++ 2 files changed, 68 insertions(+), 36 deletions(-) delete mode 100644 ckanext/falkor/migration/falkor/versions/6b860291073e_config.py create mode 100644 ckanext/falkor/migration/falkor/versions/a3c1de54a0d9_falkor_sync_job.py diff --git a/ckanext/falkor/migration/falkor/versions/6b860291073e_config.py b/ckanext/falkor/migration/falkor/versions/6b860291073e_config.py deleted file mode 100644 index d71ec21..0000000 --- a/ckanext/falkor/migration/falkor/versions/6b860291073e_config.py +++ /dev/null @@ -1,36 +0,0 @@ -"""config - -Revision ID: 6b860291073e -Revises: 376615bb5319 -Create Date: 2024-10-21 10:36:57.994360 - -""" -from alembic import op -from ckan.model import meta -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = '6b860291073e' -down_revision = '376615bb5319' -branch_labels = None -depends_on = None - - -TABLE_NAME = "falkor_config" - - -def upgrade(): - op.create_table( - TABLE_NAME, - meta.MetaData(), - sa.Column("initialised", sa.Boolean, nullable=False, primary_key=True), - ) - op.execute(""" - INSERT INTO falkor_config(initialised) - VALUES (false) - """) - - -def downgrade(): - op.drop_table(TABLE_NAME) diff --git a/ckanext/falkor/migration/falkor/versions/a3c1de54a0d9_falkor_sync_job.py b/ckanext/falkor/migration/falkor/versions/a3c1de54a0d9_falkor_sync_job.py new file mode 100644 index 0000000..fe380ec --- /dev/null +++ b/ckanext/falkor/migration/falkor/versions/a3c1de54a0d9_falkor_sync_job.py @@ -0,0 +1,68 @@ +"""falkor_sync_job + +Revision ID: a3c1de54a0d9 +Revises: 376615bb5319 +Create Date: 2024-10-22 14:44:44.219739 + +""" +from alembic import op +from ckan.model import meta +from uuid import uuid4 +from enum import Enum + +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'a3c1de54a0d9' +down_revision = '376615bb5319' +branch_labels = None +depends_on = None + + +class FalkorSyncJobStatus(Enum): + RUNNING = "running" + FINISHED = "finished" + FAILED = "failed" + + +def upgrade(): + op.create_table( + "falkor_sync_job", + meta.MetaData(), + sa.Column( + "id", + sa.dialects.postgresql.UUID(as_uuid=True), + primary_key=True, + nullable=False, + default=uuid4 + ), + sa.Column( + "status", + sa.Enum(FalkorSyncJobStatus), + nullable=False, + ), + sa.Column( + "is_latest", + sa.Boolean, + nullable=False + ), + sa.Column( + "start", + sa.DateTime, + nullable=False + ), + sa.Column( + "end", + sa.DateTime, + nullable=True, + default=None + ) + ) + + +def downgrade(): + op.drop_table( + "falkor_sync_job" + ) + op.execute("DROP TYPE IF EXISTS falkorsyncjobstatus;") From 32d79d4fe36526852942696e29b4590932e1edfc Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 22 Oct 2024 16:20:43 +0100 Subject: [PATCH 055/156] insert sync job --- ckanext/falkor/model.py | 65 ++++++++++++++++++++++++++++++++++++++++ ckanext/falkor/plugin.py | 16 +++++++--- 2 files changed, 77 insertions(+), 4 deletions(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index d197d99..2214742 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -134,3 +134,68 @@ def get_resources_without_create_events(session: sa.orm.Session) -> List[Resourc ).filter( sa.cast(distinct_resource_creates.c.object_id, sa.TEXT) == None ).all() + + +class FalkorSyncJobStatus(Enum): + RUNNING = "running" + FINISHED = "finished" + FAILED = "failed" + + +class FalkorSyncJob(Base): + __tablename__ = "falkor_sync_job" + + id = sa.Column( + sa.dialects.postgresql.UUID(as_uuid=True), + primary_key=True, + nullable=False, + default=uuid4 + ) + status = sa.Column( + sa.Enum(FalkorSyncJobStatus), + nullable=False, + ) + is_latest = sa.Column( + sa.Boolean, + nullable=False + ) + start = sa.Column( + sa.DateTime, + nullable=False + ) + end = sa.Column( + sa.DateTime, + nullable=True, + default=None + ) + + +def new_falkor_sync_job( + id: UUID = uuid4(), + status: FalkorSyncJobStatus = FalkorSyncJobStatus.RUNNING, + is_latest: bool = True, + start: datetime = datetime.now(), + end: Optional[datetime] = None +) -> FalkorSyncJob: + return FalkorSyncJob( + id=id, + status=status, + is_latest=is_latest, + start=start, + end=end + ) + + +def insert_new_falkor_sync_job(session: sa.orm.Session, job: FalkorSyncJob): + running_job = session.query(FalkorSyncJob).filter( + FalkorSyncJob.status == FalkorSyncJobStatus.RUNNING + ).first() + + if running_job is not None: + raise Exception(f"Falkor sync job is already running. ID: {running_job.id}") + + session.query(FalkorSyncJob).filter( + FalkorSyncJob.is_latest == True + ).update({FalkorSyncJob.is_latest: False}) + session.add(job) + session.commit() diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 409277f..d84dc60 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -7,6 +7,7 @@ from ckanext.falkor import client, auth, event_handler, model from ckan.lib import jobs +from datetime import datetime log = logging.getLogger(__name__) @@ -74,8 +75,10 @@ def configure(self, config): self.sync() def sync(self): + session: sa.orm.Session = ckan_model.meta.create_local_session() + job = model.new_falkor_sync_job() try: - session: sa.orm.Session = ckan_model.meta.create_local_session() + model.insert_new_falkor_sync_job(session, job) packages = model.get_packages_without_create_events(session) for package in packages: @@ -85,10 +88,15 @@ def sync(self): for resource in resources: self.event_handler.handle_resource_create(resource, "sync_job") - session.commit() - + job.status = model.FalkorSyncJobStatus.FINISHED except Exception as e: - log.exception(e) + log.exception(e, extra={"job_id": job.id}) + session.rollback() + job.status = model.FalkorSyncJobStatus.FAILED + finally: + job.end = datetime.now() + session.commit() + session.close() # IResourceController From 8fee68a5e6c3c0f0f75acb1020b4f7a58ab5a9d9 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 22 Oct 2024 16:42:59 +0100 Subject: [PATCH 056/156] Refactor package_create to reprocess events --- ckanext/falkor/event_handler.py | 33 +++++++++++++++++++++------------ ckanext/falkor/model.py | 4 ++++ ckanext/falkor/plugin.py | 16 +++++++++++++++- 3 files changed, 40 insertions(+), 13 deletions(-) diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index 98ec452..13b031e 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -2,8 +2,9 @@ from uuid import UUID, uuid4 from datetime import datetime -from typing import Union +from typing import Union, Optional from ckanext.falkor.model import ( + FalkorEvent, FalkorEventType, FalkorEventObjectType, insert_pending_event, @@ -26,17 +27,24 @@ class EventHandler: def __init__(self, falkor: Client): self.falkor = falkor - def handle_package_create(self, package: Package, user_id: str): + def handle_package_create( + self, + package_id: str, + metadata_created: datetime, + user_id: str, + event: Optional[FalkorEvent] = None + ): session = meta.create_local_session() - insert_pending_event( - session, - event_id=generate_event_id(), - object_id=UUID(package.id), - object_type=FalkorEventObjectType.PACKAGE, - event_type=FalkorEventType.CREATE, - user_id=user_id, - created_at=package.metadata_created - ) + if event is None: + event = insert_pending_event( + session, + event_id=generate_event_id(), + object_id=UUID(package_id), + object_type=FalkorEventObjectType.PACKAGE, + event_type=FalkorEventType.CREATE, + user_id=user_id, + created_at=metadata_created + ) session.commit() def handle_resource_create(self, resource: Resource, user_id: str): @@ -123,7 +131,8 @@ def handle_modification_event( if operation != DomainObjectOperation.new: return - handler.handle_package_create(entity, user_id) + handler.handle_package_create( + entity["id"], entity["metadata_created"], user_id) elif isinstance(entity, Resource): if operation == DomainObjectOperation.new: diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 2214742..10933b3 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -96,6 +96,10 @@ def insert_pending_event( ) +def get_pending_events(session: sa.orm.Session) -> List[FalkorEvent]: + return session.query(FalkorEvent).filter(FalkorEvent.status == FalkorEventStatus.PENDING).all() + + def get_packages_without_create_events(session: sa.orm.Session) -> List[Package]: distinct_package_creates = session.query( FalkorEvent diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index d84dc60..989c85f 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -82,12 +82,26 @@ def sync(self): packages = model.get_packages_without_create_events(session) for package in packages: - self.event_handler.handle_package_create(package, "sync_job") + self.event_handler.handle_package_create( + package_id=package.id, + metadata_created=package.metadata_created, + user_id="sync_job" + ) resources = model.get_resources_without_create_events(session) for resource in resources: self.event_handler.handle_resource_create(resource, "sync_job") + pending_events = model.get_pending_events(session) + for event in pending_events: + if event.object_type == model.FalkorEventObjectType.PACKAGE \ + and event.event_type == model.FalkorEventType.CREATE: + self.event_handler.handle_package_create( + package_id=event.object_id, + metadata_created=event.created_at, + user_id=event.user_id, event=event + ) + job.status = model.FalkorSyncJobStatus.FINISHED except Exception as e: log.exception(e, extra={"job_id": job.id}) From 668983f62b7606c79517df6ae34b7bbc5259b22f Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 23 Oct 2024 13:09:59 +0100 Subject: [PATCH 057/156] Make event_handler handlers compatible with sync job --- ckanext/falkor/event_handler.py | 152 ++++++++++++++++++++------------ ckanext/falkor/plugin.py | 41 ++++++++- 2 files changed, 134 insertions(+), 59 deletions(-) diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index 13b031e..2708839 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -34,8 +34,8 @@ def handle_package_create( user_id: str, event: Optional[FalkorEvent] = None ): - session = meta.create_local_session() if event is None: + session = meta.create_local_session() event = insert_pending_event( session, event_id=generate_event_id(), @@ -45,65 +45,88 @@ def handle_package_create( user_id=user_id, created_at=metadata_created ) - session.commit() - - def handle_resource_create(self, resource: Resource, user_id: str): - session = meta.create_local_session() - insert_pending_event( - session, - event_id=generate_event_id(), - object_id=UUID(resource.id), - object_type=FalkorEventObjectType.RESOURCE, - event_type=FalkorEventType.CREATE, - user_id=user_id, - created_at=resource.created - ) - session.commit() + session.commit() + + def handle_resource_create( + self, + resource_id: str, + created_at: datetime, + user_id: str, + event: Optional[FalkorEvent] = None + ): + if event is None: + session = meta.create_local_session() + insert_pending_event( + session, + event_id=generate_event_id(), + object_id=UUID(resource_id), + object_type=FalkorEventObjectType.RESOURCE, + event_type=FalkorEventType.CREATE, + user_id=user_id, + created_at=created_at + ) + session.commit() def handle_resource_read( self, resource_id: UUID, package_id: UUID, user_id: str, - created_at: datetime = datetime.now() + created_at: datetime = datetime.now(), + event: Optional[FalkorEvent] = None ): - session = meta.create_local_session() - insert_pending_event( - session, - event_id=generate_event_id(), - object_id=resource_id, - object_type=FalkorEventObjectType.RESOURCE, - event_type=FalkorEventType.READ, - user_id=user_id, - created_at=created_at - ) - session.commit() - - def handle_resource_update(self, resource: Resource, user_id: str): - session = meta.create_local_session() - insert_pending_event( - session, - event_id=generate_event_id(), - object_id=UUID(resource.id), - object_type=FalkorEventObjectType.RESOURCE, - event_type=FalkorEventType.UPDATE, - user_id=user_id, - created_at=resource.created - ) - session.commit() - - def handle_resource_delete(self, resource: Resource, user_id: str): - session = meta.create_local_session() - insert_pending_event( - session, - event_id=generate_event_id(), - object_id=UUID(resource.id), - object_type=FalkorEventObjectType.RESOURCE, - event_type=FalkorEventType.DELETE, - user_id=user_id, - created_at=resource.created - ) - session.commit() + if event is None: + session = meta.create_local_session() + insert_pending_event( + session, + event_id=generate_event_id(), + object_id=resource_id, + object_type=FalkorEventObjectType.RESOURCE, + event_type=FalkorEventType.READ, + user_id=user_id, + created_at=created_at + ) + session.commit() + + def handle_resource_update( + self, + resource_id: str, + created_at: datetime, + user_id: str, + event: Optional[FalkorEvent] = None + ): + if event is None: + session = meta.create_local_session() + insert_pending_event( + session, + event_id=generate_event_id(), + object_id=UUID(resource_id), + object_type=FalkorEventObjectType.RESOURCE, + event_type=FalkorEventType.UPDATE, + user_id=user_id, + created_at=created_at + ) + session.commit() + + def handle_resource_delete( + self, + resource_id: str, + created_at: datetime, + user_id: str, + event: Optional[FalkorEvent] = None + ): + if event is None: + session = meta.create_local_session() + insert_pending_event( + session, + event_id=generate_event_id(), + object_id=UUID(resource_id), + object_type=FalkorEventObjectType.RESOURCE, + event_type=FalkorEventType.DELETE, + user_id=user_id, + created_at=created_at + ) + session.commit() def handle_read_event( @@ -132,12 +155,27 @@ def handle_modification_event( return handler.handle_package_create( - entity["id"], entity["metadata_created"], user_id) + package_id=entity.id, + metadata_created=entity.metadata_created, + user_id=user_id + ) elif isinstance(entity, Resource): if operation == DomainObjectOperation.new: - handler.handle_resource_create(entity, user_id) + handler.handle_resource_create( + resource_id=entity.id, + created_at=entity.created, + user_id=user_id + ) elif operation == DomainObjectOperation.changed: - handler.handle_resource_update(entity, user_id) + handler.handle_resource_update( + resource_id=entity.id, + created_at=entity.created, + user_id=user_id + ) elif operation == DomainObjectOperation.deleted: - handler.handle_resource_delete(entity, user_id) + handler.handle_resource_delete( + resource_id=entity.id, + created_at=entity.created, + user_id=user_id + ) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 989c85f..1f44617 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -8,6 +8,7 @@ from ckanext.falkor import client, auth, event_handler, model from ckan.lib import jobs from datetime import datetime +from flask import request log = logging.getLogger(__name__) @@ -90,7 +91,11 @@ def sync(self): resources = model.get_resources_without_create_events(session) for resource in resources: - self.event_handler.handle_resource_create(resource, "sync_job") + self.event_handler.handle_resource_create( + resource_id=resource.id, + created_at=resource.created, + user_id="sync_job" + ) pending_events = model.get_pending_events(session) for event in pending_events: @@ -99,8 +104,34 @@ def sync(self): self.event_handler.handle_package_create( package_id=event.object_id, metadata_created=event.created_at, - user_id=event.user_id, event=event + user_id=event.user_id, + event=event ) + elif event.object_type == model.FalkorEventObjectType.RESOURCE: + if model.FalkorEventType.CREATE: + self.event_handler.handle_resource_create( + resource_id=event.id, + created_at=event.created_at, + user_id=event.user_id + ) + elif model.FalkorEventType.READ: + self.event_handler.handle_resource_read( + resource_id=event.id, + created_at=event.created_at, + user_id=event.user_id + ) + elif model.FalkorEventType.UPDATE: + self.event_handler.handle_resource_update( + resource_id=event.id, + created_at=event.created_at, + user_id=event.user_id + ) + elif model.FalkorEventType.DELETE: + self.event_handler.handle_resource_delete( + resource_id=event.id, + created_at=event.created_at, + user_id=event.user_id + ) job.status = model.FalkorSyncJobStatus.FINISHED except Exception as e: @@ -115,6 +146,12 @@ def sync(self): # IResourceController def before_show(self, resource_dict): + # TODO: See whether we should expand on this idea as we are currently + # generating a lot of reads. For now use to reduce noise of READ events + # during development. + if resource_dict["id"] not in request.url: + return + jobs.enqueue( event_handler.handle_read_event, [ From f72c20f009f7b5d7b22afebc183aa71b40fcf028 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 23 Oct 2024 14:34:33 +0100 Subject: [PATCH 058/156] Simplify event handling --- ckanext/falkor/event_handler.py | 193 ++++---------------------------- ckanext/falkor/model.py | 47 +------- ckanext/falkor/plugin.py | 152 ++++++++++++++----------- 3 files changed, 113 insertions(+), 279 deletions(-) diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index 2708839..6742571 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -1,181 +1,34 @@ import logging +import sqlalchemy as sa -from uuid import UUID, uuid4 -from datetime import datetime -from typing import Union, Optional from ckanext.falkor.model import ( FalkorEvent, FalkorEventType, - FalkorEventObjectType, - insert_pending_event, + FalkorEventStatus, ) -from ckan.model import meta, Package, Resource -from ckanext.falkor.client import Client +from ckan.model import meta from ckan.model.domain_object import DomainObjectOperation log = logging.getLogger(__name__) - -def generate_event_id() -> UUID: - return uuid4() - - -class EventHandler: - falkor: Client - - def __init__(self, falkor: Client): - self.falkor = falkor - - def handle_package_create( - self, - package_id: str, - metadata_created: datetime, - user_id: str, - event: Optional[FalkorEvent] = None - ): - if event is None: - session = meta.create_local_session() - event = insert_pending_event( - session, - event_id=generate_event_id(), - object_id=UUID(package_id), - object_type=FalkorEventObjectType.PACKAGE, - event_type=FalkorEventType.CREATE, - user_id=user_id, - created_at=metadata_created - ) - session.commit() - - def handle_resource_create( - self, - resource_id: str, - created_at: datetime, - user_id: str, - event: Optional[FalkorEvent] = None - ): - if event is None: - session = meta.create_local_session() - insert_pending_event( - session, - event_id=generate_event_id(), - object_id=UUID(resource_id), - object_type=FalkorEventObjectType.RESOURCE, - event_type=FalkorEventType.CREATE, - user_id=user_id, - created_at=created_at - ) - session.commit() - - def handle_resource_read( - self, - resource_id: UUID, - package_id: UUID, - user_id: str, - created_at: datetime = datetime.now(), - event: Optional[FalkorEvent] = None - ): - if event is None: - session = meta.create_local_session() - insert_pending_event( - session, - event_id=generate_event_id(), - object_id=resource_id, - object_type=FalkorEventObjectType.RESOURCE, - event_type=FalkorEventType.READ, - user_id=user_id, - created_at=created_at - ) - session.commit() - - def handle_resource_update( - self, - resource_id: str, - created_at: datetime, - user_id: str, - event: Optional[FalkorEvent] = None - ): - if event is None: - session = meta.create_local_session() - insert_pending_event( - session, - event_id=generate_event_id(), - object_id=UUID(resource_id), - object_type=FalkorEventObjectType.RESOURCE, - event_type=FalkorEventType.UPDATE, - user_id=user_id, - created_at=created_at - ) - session.commit() - - def handle_resource_delete( - self, - resource_id: str, - created_at: datetime, - user_id: str, - event: Optional[FalkorEvent] = None - ): - if event is None: - session = meta.create_local_session() - insert_pending_event( - session, - event_id=generate_event_id(), - object_id=UUID(resource_id), - object_type=FalkorEventObjectType.RESOURCE, - event_type=FalkorEventType.DELETE, - user_id=user_id, - created_at=created_at - ) - session.commit() - - -def handle_read_event( - handler: EventHandler, - resource: dict, - user_id: str, -): - handler.handle_resource_read( - resource["id"], - resource["package_id"], - user_id - ) - - -def handle_modification_event( - handler: EventHandler, - entity: Union[Package, Resource], - operation: DomainObjectOperation, - user_id: str, -): - if isinstance(entity, Package): - # Currently Falkor does not track changes to packages. - # We only use the create event to create the dataset - # and ignore any further changes. - if operation != DomainObjectOperation.new: - return - - handler.handle_package_create( - package_id=entity.id, - metadata_created=entity.metadata_created, - user_id=user_id - ) - - elif isinstance(entity, Resource): - if operation == DomainObjectOperation.new: - handler.handle_resource_create( - resource_id=entity.id, - created_at=entity.created, - user_id=user_id - ) - elif operation == DomainObjectOperation.changed: - handler.handle_resource_update( - resource_id=entity.id, - created_at=entity.created, - user_id=user_id - ) - elif operation == DomainObjectOperation.deleted: - handler.handle_resource_delete( - resource_id=entity.id, - created_at=entity.created, - user_id=user_id - ) +DomainObjectOperationToFalkorEventTypeMap = { + DomainObjectOperation.new: FalkorEventType.CREATE, + DomainObjectOperation.changed: FalkorEventType.UPDATE, + DomainObjectOperation.deleted: FalkorEventType.DELETE +} + + +def handle_event(event: FalkorEvent): + session: sa.orm.Session = meta.create_local_session() + session.add(event) + session.commit() + try: + pass + except Exception as e: + log.exception(e) + session.rollback() + event.status = FalkorEventStatus.FAILED + session.commit() + finally: + session.close() diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 10933b3..dc8e4ec 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -53,49 +53,6 @@ class FalkorEvent(Base): synced_at = sa.Column(sa.DateTime, nullable=True) -def new_falkor_event( - id: UUID, - object_id: UUID, - object_type: FalkorEventObjectType, - event_type: FalkorEventType, - user_id: str, - created_at: sa.DateTime, - status: FalkorEventStatus = FalkorEventStatus.PENDING, - synced_at: Optional[sa.DateTime] = None -) -> FalkorEvent: - return FalkorEvent( - id=id, - object_id=object_id, - object_type=object_type, - event_type=event_type, - user_id=user_id, - status=status, - created_at=created_at, - synced_at=synced_at - ) - - -def insert_pending_event( - session: sa.orm.Session, - event_id: UUID, - object_id: UUID, - object_type: FalkorEventObjectType, - event_type: FalkorEventType, - user_id: str, - created_at: datetime, -): - session.add( - new_falkor_event( - id=event_id, - object_id=object_id, - object_type=object_type, - event_type=event_type, - user_id=user_id, - created_at=created_at, - ) - ) - - def get_pending_events(session: sa.orm.Session) -> List[FalkorEvent]: return session.query(FalkorEvent).filter(FalkorEvent.status == FalkorEventStatus.PENDING).all() @@ -196,9 +153,11 @@ def insert_new_falkor_sync_job(session: sa.orm.Session, job: FalkorSyncJob): ).first() if running_job is not None: - raise Exception(f"Falkor sync job is already running. ID: {running_job.id}") + raise Exception( + f"Falkor sync job is already running. ID: {running_job.id}") session.query(FalkorSyncJob).filter( + # Using "is True" doesn't seem to work here FalkorSyncJob.is_latest == True ).update({FalkorSyncJob.is_latest: False}) session.add(job) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 1f44617..fd2db19 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -1,3 +1,6 @@ +from flask import request +from datetime import datetime +from ckan.lib import jobs import logging import sqlalchemy as sa @@ -5,10 +8,18 @@ import ckan.plugins.toolkit as toolkit import ckan.model as ckan_model -from ckanext.falkor import client, auth, event_handler, model -from ckan.lib import jobs -from datetime import datetime -from flask import request +from ckanext.falkor import client, auth, event_handler +from ckanext.falkor.model import ( + FalkorEvent, + FalkorEventType, + FalkorEventObjectType, + FalkorSyncJobStatus, + new_falkor_sync_job, + get_pending_events, + get_packages_without_create_events, + get_resources_without_create_events, + insert_new_falkor_sync_job +) log = logging.getLogger(__name__) @@ -77,67 +88,50 @@ def configure(self, config): def sync(self): session: sa.orm.Session = ckan_model.meta.create_local_session() - job = model.new_falkor_sync_job() + job = new_falkor_sync_job() try: - model.insert_new_falkor_sync_job(session, job) + insert_new_falkor_sync_job(session, job) - packages = model.get_packages_without_create_events(session) + packages = get_packages_without_create_events(session) for package in packages: - self.event_handler.handle_package_create( - package_id=package.id, - metadata_created=package.metadata_created, - user_id="sync_job" + event = FalkorEvent( + object_id=package.id, + object_type=FalkorEventObjectType.PACKAGE, + event_type=FalkorEventType.CREATE, + user_id="sync_job", + created_at=package.metadata_created + ) + jobs.enqueue( + event_handler.handle_event, + [event] ) - resources = model.get_resources_without_create_events(session) + resources = get_resources_without_create_events(session) for resource in resources: - self.event_handler.handle_resource_create( - resource_id=resource.id, - created_at=resource.created, - user_id="sync_job" + event = FalkorEvent( + object_id=resource.id, + object_type=FalkorEventObjectType.RESOURCE, + event_type=FalkorEventType.CREATE, + user_id="sync_job", + created_at=resource.created + ) + jobs.enqueue( + event_handler.handle_event, + [event] ) - pending_events = model.get_pending_events(session) + pending_events = get_pending_events(session) for event in pending_events: - if event.object_type == model.FalkorEventObjectType.PACKAGE \ - and event.event_type == model.FalkorEventType.CREATE: - self.event_handler.handle_package_create( - package_id=event.object_id, - metadata_created=event.created_at, - user_id=event.user_id, - event=event - ) - elif event.object_type == model.FalkorEventObjectType.RESOURCE: - if model.FalkorEventType.CREATE: - self.event_handler.handle_resource_create( - resource_id=event.id, - created_at=event.created_at, - user_id=event.user_id - ) - elif model.FalkorEventType.READ: - self.event_handler.handle_resource_read( - resource_id=event.id, - created_at=event.created_at, - user_id=event.user_id - ) - elif model.FalkorEventType.UPDATE: - self.event_handler.handle_resource_update( - resource_id=event.id, - created_at=event.created_at, - user_id=event.user_id - ) - elif model.FalkorEventType.DELETE: - self.event_handler.handle_resource_delete( - resource_id=event.id, - created_at=event.created_at, - user_id=event.user_id - ) - - job.status = model.FalkorSyncJobStatus.FINISHED + jobs.enqueue( + event_handler.handle_event, + [event] + ) + + job.status = FalkorSyncJobStatus.FINISHED except Exception as e: log.exception(e, extra={"job_id": job.id}) session.rollback() - job.status = model.FalkorSyncJobStatus.FAILED + job.status = FalkorSyncJobStatus.FAILED finally: job.end = datetime.now() session.commit() @@ -146,19 +140,26 @@ def sync(self): # IResourceController def before_show(self, resource_dict): + resource_id = resource_dict["id"] + created_at = resource_dict["created"] + # TODO: See whether we should expand on this idea as we are currently # generating a lot of reads. For now use to reduce noise of READ events # during development. - if resource_dict["id"] not in request.url: + if resource_id not in request.url: return + event = FalkorEvent( + object_id=resource_id, + object_type=FalkorEventObjectType.RESOURCE, + event_type=FalkorEventType.READ, + user_id=get_user_id(), + created_at=created_at + ) + jobs.enqueue( - event_handler.handle_read_event, - [ - self.event_handler, - resource_dict, - get_user_id() - ] + event_handler.handle_event, + [event] ) self.get_helpers() @@ -171,12 +172,33 @@ def notify( if operation is None: return + event = FalkorEvent( + object_id=entity.id, + event_type=event_handler.DomainObjectOperationToFalkorEventTypeMap[ + operation + ], + user_id=get_user_id(), + ) + + if isinstance(entity, ckan_model.Package): + # Currently Falkor does not track changes to packages. + # We only use the create event to create the dataset + # and ignore any further changes. + if event.event_type != FalkorEventType.CREATE: + return + + event.object_type = FalkorEventObjectType.PACKAGE + event.created_at = entity.metadata_created + + elif isinstance(entity, ckan_model.Resource): + event.object_type = FalkorEventObjectType.RESOURCE + event.created_at = entity.created + else: + return + jobs.enqueue( - event_handler.handle_modification_event, - args=[ - self.event_handler, - entity, operation, get_user_id() - ] + event_handler.handle_event, + args=[event] ) def construct_falkor_url(self, resource): From 06e40b4b675d767806b28bf3d036b5b8ed97a2f3 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 23 Oct 2024 14:35:07 +0100 Subject: [PATCH 059/156] Remove sync job run in plugin startup --- ckanext/falkor/plugin.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index fd2db19..71efaf3 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -84,7 +84,6 @@ def configure(self, config): ) self.event_handler = event_handler.EventHandler(self.falkor) - self.sync() def sync(self): session: sa.orm.Session = ckan_model.meta.create_local_session() From 1cb9dc69ef000bb32c87a53005bfa5b304e94e07 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 23 Oct 2024 14:41:06 +0100 Subject: [PATCH 060/156] Remove unused plugin properties --- ckanext/falkor/plugin.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 71efaf3..f99331b 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -38,9 +38,6 @@ def get_user_id() -> str: class FalkorPlugin(plugins.SingletonPlugin): falkor: client.Client - engine: sa.engine.Engine - event_handler: event_handler.EventHandler - __initialised: bool plugins.implements(plugins.IConfigurer) plugins.implements(plugins.IConfigurable, inherit=True) @@ -83,8 +80,6 @@ def configure(self, config): auth_client, tenant_id, core_api_url, admin_api_url ) - self.event_handler = event_handler.EventHandler(self.falkor) - def sync(self): session: sa.orm.Session = ckan_model.meta.create_local_session() job = new_falkor_sync_job() From e919d1dcd86f47b4850d3f73f7133a108c33120f Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 23 Oct 2024 15:14:03 +0100 Subject: [PATCH 061/156] Add processing status to events --- ckanext/falkor/event_handler.py | 9 ++++++++- .../migration/falkor/versions/376615bb5319_init.py | 1 + ckanext/falkor/model.py | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index 6742571..f8247d9 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -1,6 +1,8 @@ import logging import sqlalchemy as sa +from datetime import datetime + from ckanext.falkor.model import ( FalkorEvent, FalkorEventType, @@ -24,7 +26,12 @@ def handle_event(event: FalkorEvent): session.add(event) session.commit() try: - pass + event.status = FalkorEventStatus.PROCESSING + session.commit() + + event.status = FalkorEventStatus.SYNCED + event.synced_at = datetime.now() + session.commit() except Exception as e: log.exception(e) session.rollback() diff --git a/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py b/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py index 170e1d8..9d8094c 100644 --- a/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py +++ b/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py @@ -29,6 +29,7 @@ class FalkorEventObjectType(Enum): class FalkorEventStatus(Enum): PENDING = 'pending' + PROCESSING = 'processing' FAILED = 'failed' SYNCED = 'synced' diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index dc8e4ec..3ffdac2 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -20,6 +20,7 @@ class FalkorEventObjectType(Enum): class FalkorEventStatus(Enum): PENDING = 'pending' + PROCESSING = 'processing' FAILED = 'failed' SYNCED = 'synced' From bc70d70d5ec9c1adcdec2765ac978b700e754e34 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 23 Oct 2024 15:33:04 +0100 Subject: [PATCH 062/156] Change event_handler to class --- ckanext/falkor/event_handler.py | 38 +++++++++++++++++++-------------- ckanext/falkor/plugin.py | 22 +++++++++++++------ 2 files changed, 37 insertions(+), 23 deletions(-) diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index f8247d9..f051c8f 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -8,6 +8,7 @@ FalkorEventType, FalkorEventStatus, ) +from ckanext.falkor.client import Client from ckan.model import meta from ckan.model.domain_object import DomainObjectOperation @@ -21,21 +22,26 @@ } -def handle_event(event: FalkorEvent): - session: sa.orm.Session = meta.create_local_session() - session.add(event) - session.commit() - try: - event.status = FalkorEventStatus.PROCESSING - session.commit() +class EventHandler: + falkor: Client - event.status = FalkorEventStatus.SYNCED - event.synced_at = datetime.now() - session.commit() - except Exception as e: - log.exception(e) - session.rollback() - event.status = FalkorEventStatus.FAILED + def __init__(self, falkor: Client): + self.falkor = falkor + + def handle(self, event: FalkorEvent): + session: sa.orm.Session = meta.create_local_session() + session.add(event) session.commit() - finally: - session.close() + try: + event.status = FalkorEventStatus.PROCESSING + session.commit() + + event.status = FalkorEventStatus.SYNCED + event.synced_at = datetime.now() + session.commit() + except Exception as e: + log.exception(e) + event.status = FalkorEventStatus.FAILED + session.commit() + finally: + session.close() diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index f99331b..01d1e30 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -8,7 +8,7 @@ import ckan.plugins.toolkit as toolkit import ckan.model as ckan_model -from ckanext.falkor import client, auth, event_handler +from ckanext.falkor import client, auth from ckanext.falkor.model import ( FalkorEvent, FalkorEventType, @@ -20,6 +20,10 @@ get_resources_without_create_events, insert_new_falkor_sync_job ) +from ckanext.falkor.event_handler import ( + EventHandler, + DomainObjectOperationToFalkorEventTypeMap +) log = logging.getLogger(__name__) @@ -38,6 +42,7 @@ def get_user_id() -> str: class FalkorPlugin(plugins.SingletonPlugin): falkor: client.Client + event_handler: EventHandler plugins.implements(plugins.IConfigurer) plugins.implements(plugins.IConfigurable, inherit=True) @@ -80,6 +85,9 @@ def configure(self, config): auth_client, tenant_id, core_api_url, admin_api_url ) + self.event_handler = EventHandler(self.falkor) + self.sync() + def sync(self): session: sa.orm.Session = ckan_model.meta.create_local_session() job = new_falkor_sync_job() @@ -96,7 +104,7 @@ def sync(self): created_at=package.metadata_created ) jobs.enqueue( - event_handler.handle_event, + self.event_handler.handle, [event] ) @@ -110,14 +118,14 @@ def sync(self): created_at=resource.created ) jobs.enqueue( - event_handler.handle_event, + self.event_handler.handle, [event] ) pending_events = get_pending_events(session) for event in pending_events: jobs.enqueue( - event_handler.handle_event, + self.event_handler.handle, [event] ) @@ -152,7 +160,7 @@ def before_show(self, resource_dict): ) jobs.enqueue( - event_handler.handle_event, + self.event_handler.handle, [event] ) @@ -168,7 +176,7 @@ def notify( event = FalkorEvent( object_id=entity.id, - event_type=event_handler.DomainObjectOperationToFalkorEventTypeMap[ + event_type=DomainObjectOperationToFalkorEventTypeMap[ operation ], user_id=get_user_id(), @@ -191,7 +199,7 @@ def notify( return jobs.enqueue( - event_handler.handle_event, + self.event_handler.handle, args=[event] ) From 1dbb1d17054c971edfdf0fd3ebf7a05e9e70487c Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 23 Oct 2024 15:49:37 +0100 Subject: [PATCH 063/156] Add timeout to auth post requests --- ckanext/falkor/auth.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ckanext/falkor/auth.py b/ckanext/falkor/auth.py index c59dc10..5befe3d 100644 --- a/ckanext/falkor/auth.py +++ b/ckanext/falkor/auth.py @@ -76,7 +76,7 @@ def __login(self) -> None: "password": self.__credentials.password, "grant_type": "password", } - response = requests.post(self.__endpoint, request) + response = requests.post(self.__endpoint, request, timeout=10) body = response.json() self.__set_token(body) @@ -88,7 +88,7 @@ def __refresh(self) -> None: "refresh_token": self.__refresh_token.token, } - response = requests.post(self.__endpoint, request) + response = requests.post(self.__endpoint, request, timeout=10) body = response.json() self.__set_token(body) From 00939c7ad04ed33d24f79f90344ac307ae3ad255 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 23 Oct 2024 15:49:50 +0100 Subject: [PATCH 064/156] Add return type to falkor api requests --- ckanext/falkor/client.py | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/ckanext/falkor/client.py b/ckanext/falkor/client.py index 012c08a..b2389e8 100644 --- a/ckanext/falkor/client.py +++ b/ckanext/falkor/client.py @@ -22,28 +22,46 @@ def base_headers(access_token: str, user_id: str) -> HttpHeaders: } -def falkor_post(url: str, payload: dict, auth: auth.Auth, user_id: str): +def falkor_post( + url: str, + payload: dict, + auth: auth.Auth, + user_id: str +) -> requests.Response: response = requests.post(url, headers=base_headers( auth.access_token, user_id), json=payload, timeout=120) log.debug(response.json()) return response -def falkor_put(url: str, payload: dict, auth: auth.Auth, user_id: str): +def falkor_put( + url: str, + payload: dict, + auth: auth.Auth, + user_id: str +) -> requests.Response: response = requests.put(url, headers=base_headers( auth.access_token, user_id), json=payload, timeout=120) log.debug(response.json()) return response -def falkor_get(url: str, auth: auth.Auth, user_id: str): +def falkor_get( + url: str, + auth: auth.Auth, + user_id: str +) -> requests.Response: response = requests.get(url, headers=base_headers( auth.access_token, user_id), timeout=120) log.debug(response.json()) return response -def falkor_delete(url: str, auth: auth.Auth, user_id: str): +def falkor_delete( + url: str, + auth: auth.Auth, + user_id: str +) -> requests.Response: response = requests.delete(url, headers=base_headers( auth.access_token, user_id), timeout=120) log.debug(response.json()) @@ -68,7 +86,7 @@ def __init__( self.__core_base_url = core_base_url self.__admin_base_url = admin_base_url - def dataset_create(self, package_id: str): + def dataset_create(self, package_id: str, user_id: str): url = self.__admin_base_url + self.__tenant_id + "/dataset" payload = { "datasetId": package_id, @@ -81,11 +99,7 @@ def dataset_create(self, package_id: str): "tokensEnabled": "false", } - # run async request - log.debug(f"Create dataset with id {package_id}") - # jobs.enqueue( - # falkor_post, [url, payload, self.__auth, get_user_id()] - # ) + return falkor_post(url, payload, self.__auth, user_id) def document_read(self, package_id: str, resource_id: str): url = ( From 79ba78ca3b0555b592e3b13961142e0a82d8b24b Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 23 Oct 2024 15:50:10 +0100 Subject: [PATCH 065/156] Reintroduce package create events for falkor --- ckanext/falkor/event_handler.py | 4 ++++ ckanext/falkor/plugin.py | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index f051c8f..e6dd6be 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -7,6 +7,7 @@ FalkorEvent, FalkorEventType, FalkorEventStatus, + FalkorEventObjectType ) from ckanext.falkor.client import Client @@ -36,6 +37,9 @@ def handle(self, event: FalkorEvent): event.status = FalkorEventStatus.PROCESSING session.commit() + if event.object_type == FalkorEventObjectType.PACKAGE: + self.falkor.dataset_create(event.object_id, event.user_id) + event.status = FalkorEventStatus.SYNCED event.synced_at = datetime.now() session.commit() diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 01d1e30..e4d0ee1 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -86,7 +86,6 @@ def configure(self, config): ) self.event_handler = EventHandler(self.falkor) - self.sync() def sync(self): session: sa.orm.Session = ckan_model.meta.create_local_session() From bef3be729a0d2e48b37607f3f66162a6370fd85b Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 23 Oct 2024 16:09:46 +0100 Subject: [PATCH 066/156] Do not process resources that still have a pending create package event --- ckanext/falkor/event_handler.py | 17 +++++++++++++---- ckanext/falkor/model.py | 18 +++++++++++++++++- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index e6dd6be..9684800 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -7,7 +7,8 @@ FalkorEvent, FalkorEventType, FalkorEventStatus, - FalkorEventObjectType + FalkorEventObjectType, + get_package_create_event_status_for_resource ) from ckanext.falkor.client import Client @@ -34,11 +35,19 @@ def handle(self, event: FalkorEvent): session.add(event) session.commit() try: - event.status = FalkorEventStatus.PROCESSING - session.commit() - + # TODO: Clean up nesting. if event.object_type == FalkorEventObjectType.PACKAGE: + # TODO: Is there a way to avoid setting PROCESSING in both branches? + event.status = FalkorEventStatus.PROCESSING + session.commit() self.falkor.dataset_create(event.object_id, event.user_id) + elif event.object_type == FalkorEventObjectType.RESOURCE: + status = get_package_create_event_status_for_resource( + session, event.object_id) + if status != FalkorEventStatus.SYNCED: + return + event.status = FalkorEventStatus.PROCESSING + session.commit() event.status = FalkorEventStatus.SYNCED event.synced_at = datetime.now() diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 3ffdac2..67007d9 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -98,6 +98,23 @@ def get_resources_without_create_events(session: sa.orm.Session) -> List[Resourc ).all() +def get_package_create_event_status_for_resource( + session: sa.orm.Session, + resource_id: UUID +) -> FalkorEventStatus: + resource = session.query(Resource).filter(Resource.id == resource_id).first() + + package = session.query(FalkorEvent).filter( + FalkorEvent.object_id == resource.package_id + ).filter( + FalkorEvent.object_type == FalkorEventObjectType.PACKAGE + ).filter( + FalkorEvent.event_type == FalkorEventType.CREATE + ).first() + + return package.status + + class FalkorSyncJobStatus(Enum): RUNNING = "running" FINISHED = "finished" @@ -162,4 +179,3 @@ def insert_new_falkor_sync_job(session: sa.orm.Session, job: FalkorSyncJob): FalkorSyncJob.is_latest == True ).update({FalkorSyncJob.is_latest: False}) session.add(job) - session.commit() From e253b8f92a7a6c071a53df4bd98787755ea59602 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Thu, 24 Oct 2024 12:02:45 +0100 Subject: [PATCH 067/156] Events for new documents --- ckanext/falkor/client.py | 62 ++++++++++++++++----------------- ckanext/falkor/event_handler.py | 27 +++++++++++--- ckanext/falkor/model.py | 15 ++++---- 3 files changed, 62 insertions(+), 42 deletions(-) diff --git a/ckanext/falkor/client.py b/ckanext/falkor/client.py index b2389e8..9f0352b 100644 --- a/ckanext/falkor/client.py +++ b/ckanext/falkor/client.py @@ -5,6 +5,7 @@ from typing import TypedDict from ckanext.falkor import auth +from ckanext.falkor.model import FalkorEvent log = logging.getLogger(__name__) @@ -13,12 +14,11 @@ ) -def base_headers(access_token: str, user_id: str) -> HttpHeaders: +def base_headers(access_token: str) -> HttpHeaders: return { "Content-Type": "application/json", "accept": "application/json", "Authorization": "Bearer " + access_token, - "x-user": user_id, } @@ -26,10 +26,9 @@ def falkor_post( url: str, payload: dict, auth: auth.Auth, - user_id: str ) -> requests.Response: response = requests.post(url, headers=base_headers( - auth.access_token, user_id), json=payload, timeout=120) + auth.access_token), json=payload, timeout=120) log.debug(response.json()) return response @@ -38,10 +37,9 @@ def falkor_put( url: str, payload: dict, auth: auth.Auth, - user_id: str ) -> requests.Response: response = requests.put(url, headers=base_headers( - auth.access_token, user_id), json=payload, timeout=120) + auth.access_token), json=payload, timeout=120) log.debug(response.json()) return response @@ -49,10 +47,9 @@ def falkor_put( def falkor_get( url: str, auth: auth.Auth, - user_id: str ) -> requests.Response: response = requests.get(url, headers=base_headers( - auth.access_token, user_id), timeout=120) + auth.access_token), timeout=120) log.debug(response.json()) return response @@ -60,10 +57,9 @@ def falkor_get( def falkor_delete( url: str, auth: auth.Auth, - user_id: str ) -> requests.Response: response = requests.delete(url, headers=base_headers( - auth.access_token, user_id), timeout=120) + auth.access_token), timeout=120) log.debug(response.json()) return response @@ -86,22 +82,21 @@ def __init__( self.__core_base_url = core_base_url self.__admin_base_url = admin_base_url - def dataset_create(self, package_id: str, user_id: str): + def dataset_create(self, package_id: str): url = self.__admin_base_url + self.__tenant_id + "/dataset" payload = { "datasetId": package_id, "encryptionType": "none", "externalStorage": "false", "permissionEnabled": "false", - "taggingEnabled": "true", - "linkedContract": "none", + "taggingEnabled": "false", "iotaEnabled": "false", "tokensEnabled": "false", } - return falkor_post(url, payload, self.__auth, user_id) + falkor_post(url, payload, self.__auth).raise_for_status() - def document_read(self, package_id: str, resource_id: str): + def document_get(self, package_id: str, resource_id: str): url = ( self.__core_base_url + self.__tenant_id @@ -112,36 +107,41 @@ def document_read(self, package_id: str, resource_id: str): + "/body" ) - log.debug(f"Read for document with id {resource_id}") - # jobs.enqueue(falkor_get, [url, self.__auth, get_user_id()]) + resp = falkor_get(url, self.__auth) + resp.raise_for_status() + return resp.json() def document_create( self, - resource: model.Resource, - organisation_id: str, + package_id: str, + event: FalkorEvent, + # organisation_id: str, ): url = ( self.__core_base_url + self.__tenant_id + "/dataset/" - + resource.package_id + + package_id + "/create" ) payload = { - "documentId": resource.id, - "data": json.dumps(resource.as_dict()), - "tags": { - "organisation_id": organisation_id, - "package_id": resource.package_id, - "resource_id": resource.id, + "documentId": str(event.object_id), + "data": json.dumps([{ + "id": str(event.id), + "event_type": event.event_type, + "user_id": event.user_id, + "created_at": str(event.created_at), + }]), + "documentMetadata": { + # "organisation_id": organisation_id, + "package_id": package_id, + "resource_id": str(event.object_id), }, } - log.debug(f"Creating document with id {resource.id}") - # jobs.enqueue( - # falkor_post, [url, payload, self.__auth, get_user_id()] - # ) + # log.debug(f"Creating document with id {resource.id}") + falkor_post(url, payload, self.__auth).raise_for_status() def document_update(self, resource: model.Resource): url = ( @@ -156,7 +156,7 @@ def document_update(self, resource: model.Resource): log.debug(f"Updating document with id {resource.id}") # jobs.enqueue( - # falkor_put, [url, resource.as_dict(), self.__auth, get_user_id()] + # falkor_put, [url, resource.as_dict(), self.__auth] # ) def document_delete(self, resource: model.Resource): diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index 9684800..a846dc0 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -2,13 +2,14 @@ import sqlalchemy as sa from datetime import datetime +from requests import HTTPError from ckanext.falkor.model import ( FalkorEvent, FalkorEventType, FalkorEventStatus, FalkorEventObjectType, - get_package_create_event_status_for_resource + get_package_create_event_for_resource ) from ckanext.falkor.client import Client @@ -40,12 +41,29 @@ def handle(self, event: FalkorEvent): # TODO: Is there a way to avoid setting PROCESSING in both branches? event.status = FalkorEventStatus.PROCESSING session.commit() - self.falkor.dataset_create(event.object_id, event.user_id) + + self.falkor.dataset_create(event.object_id) + elif event.object_type == FalkorEventObjectType.RESOURCE: - status = get_package_create_event_status_for_resource( + package_create_event = get_package_create_event_for_resource( session, event.object_id) - if status != FalkorEventStatus.SYNCED: + + # TODO: Add retry here in case resource was created shortly after + # package and it is still processing. + if package_create_event.status != FalkorEventStatus.SYNCED: return + + package_id = str(package_create_event.object_id) + + try: + document = self.falkor.document_get( + package_id, str(event.object_id)) + except HTTPError as e: + if e.response.status_code == 404: + self.falkor.document_create(package_id, event) + else: + raise e + event.status = FalkorEventStatus.PROCESSING session.commit() @@ -54,6 +72,7 @@ def handle(self, event: FalkorEvent): session.commit() except Exception as e: log.exception(e) + log.debug(e.response.json()) event.status = FalkorEventStatus.FAILED session.commit() finally: diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 67007d9..17e2138 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -13,19 +13,19 @@ log = logging.getLogger(__name__) -class FalkorEventObjectType(Enum): +class FalkorEventObjectType(str, Enum): PACKAGE = 'package' RESOURCE = 'resource' -class FalkorEventStatus(Enum): +class FalkorEventStatus(str, Enum): PENDING = 'pending' PROCESSING = 'processing' FAILED = 'failed' SYNCED = 'synced' -class FalkorEventType(Enum): +class FalkorEventType(str, Enum): CREATE = "create" READ = "read" UPDATE = "update" @@ -98,11 +98,12 @@ def get_resources_without_create_events(session: sa.orm.Session) -> List[Resourc ).all() -def get_package_create_event_status_for_resource( +def get_package_create_event_for_resource( session: sa.orm.Session, resource_id: UUID -) -> FalkorEventStatus: - resource = session.query(Resource).filter(Resource.id == resource_id).first() +) -> FalkorEvent: + resource = session.query(Resource).filter( + Resource.id == resource_id).first() package = session.query(FalkorEvent).filter( FalkorEvent.object_id == resource.package_id @@ -112,7 +113,7 @@ def get_package_create_event_status_for_resource( FalkorEvent.event_type == FalkorEventType.CREATE ).first() - return package.status + return package class FalkorSyncJobStatus(Enum): From 5be0bd92c652ad7bde1722d5bdb0d969acf667fb Mon Sep 17 00:00:00 2001 From: wajones98 Date: Thu, 24 Oct 2024 12:27:05 +0100 Subject: [PATCH 068/156] Add events for existing documents --- ckanext/falkor/client.py | 30 +++++++++--------------------- ckanext/falkor/event_handler.py | 21 +++++++++++++++++++-- 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/ckanext/falkor/client.py b/ckanext/falkor/client.py index 9f0352b..0bef896 100644 --- a/ckanext/falkor/client.py +++ b/ckanext/falkor/client.py @@ -140,34 +140,22 @@ def document_create( }, } - # log.debug(f"Creating document with id {resource.id}") falkor_post(url, payload, self.__auth).raise_for_status() - def document_update(self, resource: model.Resource): + def document_update( + self, + resource_id: str, + package_id: str, + data: str + ): url = ( self.__core_base_url + self.__tenant_id + "/dataset/" - + resource.package_id + + package_id + "/" - + resource.id + + resource_id + "/body" ) - log.debug(f"Updating document with id {resource.id}") - # jobs.enqueue( - # falkor_put, [url, resource.as_dict(), self.__auth] - # ) - - def document_delete(self, resource: model.Resource): - url = ( - self.__core_base_url - + self.__tenant_id - + "/dataset/" - + resource.package_id - + "/" - + resource.id - ) - - log.debug(f"Deleting document with id {resource.id}") - # jobs.enqueue(falkor_delete, [url, self.__auth, get_user_id()]) + falkor_put(url, data, self.__auth).raise_for_status() diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index a846dc0..6eea9c3 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -1,8 +1,10 @@ import logging import sqlalchemy as sa +import json from datetime import datetime from requests import HTTPError +from typing import List from ckanext.falkor.model import ( FalkorEvent, @@ -56,8 +58,23 @@ def handle(self, event: FalkorEvent): package_id = str(package_create_event.object_id) try: - document = self.falkor.document_get( - package_id, str(event.object_id)) + document_events: List[dict] = self.falkor.document_get( + package_id, + str(event.object_id) + ) + + document_events.append({ + "id": str(event.id), + "event_type": event.event_type, + "user_id": event.user_id, + "created_at": str(event.created_at), + }) + + self.falkor.document_update( + str(event.object_id), + package_id, + document_events + ) except HTTPError as e: if e.response.status_code == 404: self.falkor.document_create(package_id, event) From 417dbf8c9823297c7597c3f6f0b6b805d9c888e2 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Thu, 24 Oct 2024 12:44:29 +0100 Subject: [PATCH 069/156] Move processing location and remove response() from exception --- ckanext/falkor/event_handler.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index 6eea9c3..8c7fc50 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -55,6 +55,9 @@ def handle(self, event: FalkorEvent): if package_create_event.status != FalkorEventStatus.SYNCED: return + event.status = FalkorEventStatus.PROCESSING + session.commit() + package_id = str(package_create_event.object_id) try: @@ -81,15 +84,11 @@ def handle(self, event: FalkorEvent): else: raise e - event.status = FalkorEventStatus.PROCESSING - session.commit() - event.status = FalkorEventStatus.SYNCED event.synced_at = datetime.now() session.commit() except Exception as e: log.exception(e) - log.debug(e.response.json()) event.status = FalkorEventStatus.FAILED session.commit() finally: From e1cc8dcaf81a7f3235ac6490fdc4c87dea86aa27 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Thu, 24 Oct 2024 12:55:54 +0100 Subject: [PATCH 070/156] Pass entity to event handler and fix created_at --- ckanext/falkor/event_handler.py | 7 +++---- ckanext/falkor/plugin.py | 17 +++++++++++++---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index 8c7fc50..352979b 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -1,10 +1,9 @@ import logging import sqlalchemy as sa -import json from datetime import datetime from requests import HTTPError -from typing import List +from typing import List, Union from ckanext.falkor.model import ( FalkorEvent, @@ -15,7 +14,7 @@ ) from ckanext.falkor.client import Client -from ckan.model import meta +from ckan.model import meta, Package, Resource from ckan.model.domain_object import DomainObjectOperation log = logging.getLogger(__name__) @@ -33,7 +32,7 @@ class EventHandler: def __init__(self, falkor: Client): self.falkor = falkor - def handle(self, event: FalkorEvent): + def handle(self, event: FalkorEvent, entity: Union[Package, Resource]): session: sa.orm.Session = meta.create_local_session() session.add(event) session.commit() diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index e4d0ee1..a85aee6 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -7,6 +7,7 @@ import ckan.plugins as plugins import ckan.plugins.toolkit as toolkit import ckan.model as ckan_model +from ckan.model.domain_object import DomainObjectOperation from ckanext.falkor import client, auth from ckanext.falkor.model import ( @@ -150,17 +151,20 @@ def before_show(self, resource_dict): if resource_id not in request.url: return + session = ckan_model.meta.create_local_session() + resource = session.query(ckan_model.Resource).get(resource_id) + event = FalkorEvent( object_id=resource_id, object_type=FalkorEventObjectType.RESOURCE, event_type=FalkorEventType.READ, user_id=get_user_id(), - created_at=created_at + created_at=datetime.now() ) jobs.enqueue( self.event_handler.handle, - [event] + [event, resource] ) self.get_helpers() @@ -193,13 +197,18 @@ def notify( elif isinstance(entity, ckan_model.Resource): event.object_type = FalkorEventObjectType.RESOURCE - event.created_at = entity.created + if operation == DomainObjectOperation.new: + event.created_at = entity.created + elif operation == DomainObjectOperation.update: + event.created_at = entity.last_modified + else: + event.created_at = datetime.now() else: return jobs.enqueue( self.event_handler.handle, - args=[event] + args=[event, entity] ) def construct_falkor_url(self, resource): From ded98c26e0ead23e40cdf1dfd7de8f5693003f62 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Thu, 24 Oct 2024 12:56:22 +0100 Subject: [PATCH 071/156] Remove created_at from before_show --- ckanext/falkor/plugin.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index a85aee6..8a70e90 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -143,7 +143,6 @@ def sync(self): def before_show(self, resource_dict): resource_id = resource_dict["id"] - created_at = resource_dict["created"] # TODO: See whether we should expand on this idea as we are currently # generating a lot of reads. For now use to reduce noise of READ events From 5ada2017eefd6522239c4de5081c9ea7375a3e4d Mon Sep 17 00:00:00 2001 From: wajones98 Date: Thu, 24 Oct 2024 13:54:44 +0100 Subject: [PATCH 072/156] Update temp read filter to use regex --- ckanext/falkor/plugin.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 8a70e90..4fcb8d3 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -1,7 +1,9 @@ +import logging +import re + from flask import request from datetime import datetime from ckan.lib import jobs -import logging import sqlalchemy as sa import ckan.plugins as plugins @@ -147,7 +149,11 @@ def before_show(self, resource_dict): # TODO: See whether we should expand on this idea as we are currently # generating a lot of reads. For now use to reduce noise of READ events # during development. - if resource_id not in request.url: + valid_url_pattern = re.compile(r'^.*?/dataset/[^/]+/resource/[^/]+/?$') + + log.debug( + f"URL: {request.url}\nMatched: {valid_url_pattern.match(request.url)}") + if not valid_url_pattern.match(request.url): return session = ckan_model.meta.create_local_session() From 50f1363d9bdf0403bd0879f02ba3f9e7aba1984b Mon Sep 17 00:00:00 2001 From: wajones98 Date: Thu, 24 Oct 2024 16:34:38 +0100 Subject: [PATCH 073/156] Reintroduce dict and org id --- ckanext/falkor/client.py | 4 ++-- ckanext/falkor/event_handler.py | 29 +++++++++++++++++++---------- ckanext/falkor/model.py | 7 ++----- ckanext/falkor/plugin.py | 19 ++++++++++++------- 4 files changed, 35 insertions(+), 24 deletions(-) diff --git a/ckanext/falkor/client.py b/ckanext/falkor/client.py index 0bef896..9ed1e85 100644 --- a/ckanext/falkor/client.py +++ b/ckanext/falkor/client.py @@ -115,7 +115,7 @@ def document_create( self, package_id: str, event: FalkorEvent, - # organisation_id: str, + organisation_id: str, ): url = ( @@ -134,7 +134,7 @@ def document_create( "created_at": str(event.created_at), }]), "documentMetadata": { - # "organisation_id": organisation_id, + "organisation_id": organisation_id, "package_id": package_id, "resource_id": str(event.object_id), }, diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index 352979b..92b1e49 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -16,6 +16,7 @@ from ckan.model import meta, Package, Resource from ckan.model.domain_object import DomainObjectOperation +import ckan.plugins.toolkit as toolkit log = logging.getLogger(__name__) @@ -32,7 +33,7 @@ class EventHandler: def __init__(self, falkor: Client): self.falkor = falkor - def handle(self, event: FalkorEvent, entity: Union[Package, Resource]): + def handle(self, event: FalkorEvent, entity: dict): session: sa.orm.Session = meta.create_local_session() session.add(event) session.commit() @@ -43,26 +44,26 @@ def handle(self, event: FalkorEvent, entity: Union[Package, Resource]): event.status = FalkorEventStatus.PROCESSING session.commit() - self.falkor.dataset_create(event.object_id) + self.falkor.dataset_create(entity["id"]) elif event.object_type == FalkorEventObjectType.RESOURCE: + # TODO: Would it be better to check Falkor for the existence of the dataset instead? + # Check Falkor for dataset, if not exists then fire create event if no create event already pending. package_create_event = get_package_create_event_for_resource( - session, event.object_id) + session, entity["package_id"]) # TODO: Add retry here in case resource was created shortly after # package and it is still processing. - if package_create_event.status != FalkorEventStatus.SYNCED: + if package_create_event is None or package_create_event.status != FalkorEventStatus.SYNCED: return event.status = FalkorEventStatus.PROCESSING session.commit() - package_id = str(package_create_event.object_id) - try: document_events: List[dict] = self.falkor.document_get( - package_id, - str(event.object_id) + entity["package_id"], + entity["id"] ) document_events.append({ @@ -74,12 +75,20 @@ def handle(self, event: FalkorEvent, entity: Union[Package, Resource]): self.falkor.document_update( str(event.object_id), - package_id, + entity["package_id"], document_events ) except HTTPError as e: if e.response.status_code == 404: - self.falkor.document_create(package_id, event) + log.debug(entity) + package_info = toolkit.get_action("package_show")( + data_dict={"id": entity["package_id"]} + ) + self.falkor.document_create( + entity["package_id"], + event, + package_info["organization"]["id"] + ) else: raise e diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 17e2138..8d41916 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -100,13 +100,10 @@ def get_resources_without_create_events(session: sa.orm.Session) -> List[Resourc def get_package_create_event_for_resource( session: sa.orm.Session, - resource_id: UUID + package_id: UUID ) -> FalkorEvent: - resource = session.query(Resource).filter( - Resource.id == resource_id).first() - package = session.query(FalkorEvent).filter( - FalkorEvent.object_id == resource.package_id + FalkorEvent.object_id == package_id ).filter( FalkorEvent.object_type == FalkorEventObjectType.PACKAGE ).filter( diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 4fcb8d3..4b38b6e 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -10,6 +10,7 @@ import ckan.plugins.toolkit as toolkit import ckan.model as ckan_model from ckan.model.domain_object import DomainObjectOperation +from ckan.lib.dictization import table_dictize from ckanext.falkor import client, auth from ckanext.falkor.model import ( @@ -149,16 +150,14 @@ def before_show(self, resource_dict): # TODO: See whether we should expand on this idea as we are currently # generating a lot of reads. For now use to reduce noise of READ events # during development. - valid_url_pattern = re.compile(r'^.*?/dataset/[^/]+/resource/[^/]+/?$') + valid_url_pattern = re.compile( + r'^.*?/dataset/[^/]+/resource/(?!new)[^/]+/?$') log.debug( - f"URL: {request.url}\nMatched: {valid_url_pattern.match(request.url)}") + f"URL: {request.url}\nMatched: {bool(valid_url_pattern.match(request.url))}") if not valid_url_pattern.match(request.url): return - session = ckan_model.meta.create_local_session() - resource = session.query(ckan_model.Resource).get(resource_id) - event = FalkorEvent( object_id=resource_id, object_type=FalkorEventObjectType.RESOURCE, @@ -169,7 +168,7 @@ def before_show(self, resource_dict): jobs.enqueue( self.event_handler.handle, - [event, resource] + [event, resource_dict] ) self.get_helpers() @@ -211,9 +210,15 @@ def notify( else: return + context = { + "model": ckan_model, + "ignore_auth": True, + "defer_commit": True + } + jobs.enqueue( self.event_handler.handle, - args=[event, entity] + args=[event, table_dictize(entity, context)] ) def construct_falkor_url(self, resource): From cde1a98e04657f5d716525b6e4b2b38e0513aa8d Mon Sep 17 00:00:00 2001 From: wajones98 Date: Mon, 28 Oct 2024 11:23:11 +0000 Subject: [PATCH 074/156] Add entity to sync jobs --- ckanext/falkor/client.py | 1 - ckanext/falkor/event_handler.py | 4 ++-- ckanext/falkor/model.py | 19 ++++++++++++++++++- ckanext/falkor/plugin.py | 30 +++++++++++++++++++----------- 4 files changed, 39 insertions(+), 15 deletions(-) diff --git a/ckanext/falkor/client.py b/ckanext/falkor/client.py index 9ed1e85..9825f4f 100644 --- a/ckanext/falkor/client.py +++ b/ckanext/falkor/client.py @@ -1,7 +1,6 @@ import requests import logging import json -import ckan.model as model from typing import TypedDict from ckanext.falkor import auth diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index 92b1e49..227ec66 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -3,7 +3,7 @@ from datetime import datetime from requests import HTTPError -from typing import List, Union +from typing import List from ckanext.falkor.model import ( FalkorEvent, @@ -14,7 +14,7 @@ ) from ckanext.falkor.client import Client -from ckan.model import meta, Package, Resource +from ckan.model import meta, Resource from ckan.model.domain_object import DomainObjectOperation import ckan.plugins.toolkit as toolkit diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 8d41916..357ba9e 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -4,9 +4,10 @@ from enum import Enum from uuid import UUID, uuid4 from datetime import datetime -from typing import Optional, List +from typing import Optional, List, Union from sqlalchemy.ext.declarative import declarative_base from ckan.model import meta, Package, Resource +from ckan.lib.dictization import table_dictize Base = declarative_base(metadata=meta.metadata) @@ -98,6 +99,22 @@ def get_resources_without_create_events(session: sa.orm.Session) -> List[Resourc ).all() +def get_dictized_entity( + session: sa.orm.Session, + context: dict, id: str, + object_type: FalkorEventObjectType +) -> dict: + ckan_model_type: Union[Package, Resource] + if object_type == FalkorEventObjectType.RESOURCE: + ckan_model_type = Resource + elif object_type == FalkorEventObjectType.PACKAGE: + ckan_model_type = Package + else: + raise Exception("Invalid object type for retrieving dictized object") + + return table_dictize(session.query(ckan_model_type).get(id), context) + + def get_package_create_event_for_resource( session: sa.orm.Session, package_id: UUID diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 4b38b6e..d3a3281 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -22,13 +22,20 @@ get_pending_events, get_packages_without_create_events, get_resources_without_create_events, - insert_new_falkor_sync_job + insert_new_falkor_sync_job, + get_dictized_entity ) from ckanext.falkor.event_handler import ( EventHandler, DomainObjectOperationToFalkorEventTypeMap ) +CONTEXT = { + "model": ckan_model, + "ignore_auth": True, + "defer_commit": True +} + log = logging.getLogger(__name__) @@ -90,6 +97,7 @@ def configure(self, config): ) self.event_handler = EventHandler(self.falkor) + self.sync() def sync(self): session: sa.orm.Session = ckan_model.meta.create_local_session() @@ -108,7 +116,7 @@ def sync(self): ) jobs.enqueue( self.event_handler.handle, - [event] + [event, table_dictize(package, CONTEXT)] ) resources = get_resources_without_create_events(session) @@ -122,14 +130,20 @@ def sync(self): ) jobs.enqueue( self.event_handler.handle, - [event] + [event, table_dictize(resource, CONTEXT)] ) pending_events = get_pending_events(session) for event in pending_events: + entity = get_dictized_entity( + session, + CONTEXT, + str(event.object_id), + event.object_type + ) jobs.enqueue( self.event_handler.handle, - [event] + [event, entity] ) job.status = FalkorSyncJobStatus.FINISHED @@ -210,15 +224,9 @@ def notify( else: return - context = { - "model": ckan_model, - "ignore_auth": True, - "defer_commit": True - } - jobs.enqueue( self.event_handler.handle, - args=[event, table_dictize(entity, context)] + args=[event, table_dictize(entity, CONTEXT)] ) def construct_falkor_url(self, resource): From 22997e7021b6dc1b993f17c3c8fd35b1f17940d1 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Mon, 28 Oct 2024 13:30:50 +0000 Subject: [PATCH 075/156] Add admin tab --- ckanext/falkor/blueprint.py | 16 ++++++++++++++++ ckanext/falkor/plugin.py | 9 ++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 ckanext/falkor/blueprint.py diff --git a/ckanext/falkor/blueprint.py b/ckanext/falkor/blueprint.py new file mode 100644 index 0000000..472f67e --- /dev/null +++ b/ckanext/falkor/blueprint.py @@ -0,0 +1,16 @@ +import ckan.lib.base as base +from flask import Blueprint +render = base.render + +falkor_blueprint = Blueprint(u'falkor_blueprint', __name__) + + +def falkor_audit(): + return render( + "falkor-audit.html", + ) + + +falkor_blueprint.add_url_rule( + u'/ckan-admin/falkor-audit', view_func=falkor_audit +) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index d3a3281..b69ccc5 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -29,6 +29,7 @@ EventHandler, DomainObjectOperationToFalkorEventTypeMap ) +from ckanext.falkor.blueprint import falkor_blueprint CONTEXT = { "model": ckan_model, @@ -57,6 +58,7 @@ class FalkorPlugin(plugins.SingletonPlugin): plugins.implements(plugins.IConfigurer) plugins.implements(plugins.IConfigurable, inherit=True) + plugins.implements(plugins.IBlueprint) plugins.implements(plugins.ITemplateHelpers) plugins.implements(plugins.IDomainObjectModification, inherit=True) plugins.implements(plugins.IResourceController, inherit=True) @@ -66,6 +68,9 @@ def update_config(self, config): toolkit.add_template_directory(config, "templates") toolkit.add_public_directory(config, "public") + toolkit.add_ckan_admin_tab( + config, "falkor_blueprint.falkor_audit", "Falkor", icon="gavel") + def configure(self, config): # TODO: Check if plugins has been initialised before tracking events self.config = config @@ -97,7 +102,9 @@ def configure(self, config): ) self.event_handler = EventHandler(self.falkor) - self.sync() + + def get_blueprint(self): + return falkor_blueprint def sync(self): session: sa.orm.Session = ckan_model.meta.create_local_session() From c9ed66db71a44f39b85ddd007cbc8e114809a558 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Mon, 28 Oct 2024 14:00:53 +0000 Subject: [PATCH 076/156] Properly inheirt admin page --- ckanext/falkor/blueprint.py | 6 +++--- ckanext/falkor/plugin.py | 2 +- ckanext/falkor/templates/admin/base.html | 1 + ckanext/falkor/templates/falkor-audit.html | 1 - 4 files changed, 5 insertions(+), 5 deletions(-) create mode 100644 ckanext/falkor/templates/admin/base.html delete mode 100644 ckanext/falkor/templates/falkor-audit.html diff --git a/ckanext/falkor/blueprint.py b/ckanext/falkor/blueprint.py index 472f67e..4573d49 100644 --- a/ckanext/falkor/blueprint.py +++ b/ckanext/falkor/blueprint.py @@ -5,12 +5,12 @@ falkor_blueprint = Blueprint(u'falkor_blueprint', __name__) -def falkor_audit(): +def admin_tab(): return render( - "falkor-audit.html", + "admin/base.html", ) falkor_blueprint.add_url_rule( - u'/ckan-admin/falkor-audit', view_func=falkor_audit + u'/ckan-admin/falkor', view_func=admin_tab ) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index b69ccc5..58e984a 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -69,7 +69,7 @@ def update_config(self, config): toolkit.add_public_directory(config, "public") toolkit.add_ckan_admin_tab( - config, "falkor_blueprint.falkor_audit", "Falkor", icon="gavel") + config, "falkor_blueprint.admin_tab", "Falkor", icon="gavel") def configure(self, config): # TODO: Check if plugins has been initialised before tracking events diff --git a/ckanext/falkor/templates/admin/base.html b/ckanext/falkor/templates/admin/base.html new file mode 100644 index 0000000..c58cded --- /dev/null +++ b/ckanext/falkor/templates/admin/base.html @@ -0,0 +1 @@ +{% ckan_extends %} diff --git a/ckanext/falkor/templates/falkor-audit.html b/ckanext/falkor/templates/falkor-audit.html deleted file mode 100644 index 34295ee..0000000 --- a/ckanext/falkor/templates/falkor-audit.html +++ /dev/null @@ -1 +0,0 @@ -

    This is a test

    From be410287af09836979fda60d7755cf17909287da Mon Sep 17 00:00:00 2001 From: wajones98 Date: Mon, 28 Oct 2024 16:08:09 +0000 Subject: [PATCH 077/156] Start sync job from admin button --- ckanext/falkor/blueprint.py | 16 ------------ ckanext/falkor/plugin.py | 32 +++++++++++++++++++++--- ckanext/falkor/templates/admin/base.html | 13 ++++++++++ 3 files changed, 41 insertions(+), 20 deletions(-) delete mode 100644 ckanext/falkor/blueprint.py diff --git a/ckanext/falkor/blueprint.py b/ckanext/falkor/blueprint.py deleted file mode 100644 index 4573d49..0000000 --- a/ckanext/falkor/blueprint.py +++ /dev/null @@ -1,16 +0,0 @@ -import ckan.lib.base as base -from flask import Blueprint -render = base.render - -falkor_blueprint = Blueprint(u'falkor_blueprint', __name__) - - -def admin_tab(): - return render( - "admin/base.html", - ) - - -falkor_blueprint.add_url_rule( - u'/ckan-admin/falkor', view_func=admin_tab -) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 58e984a..9dcec97 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -1,7 +1,7 @@ import logging import re -from flask import request +from flask import request, Blueprint from datetime import datetime from ckan.lib import jobs @@ -9,6 +9,7 @@ import ckan.plugins as plugins import ckan.plugins.toolkit as toolkit import ckan.model as ckan_model +import ckan.lib.base as base from ckan.model.domain_object import DomainObjectOperation from ckan.lib.dictization import table_dictize @@ -29,7 +30,8 @@ EventHandler, DomainObjectOperationToFalkorEventTypeMap ) -from ckanext.falkor.blueprint import falkor_blueprint + +render = base.render CONTEXT = { "model": ckan_model, @@ -55,6 +57,7 @@ def get_user_id() -> str: class FalkorPlugin(plugins.SingletonPlugin): falkor: client.Client event_handler: EventHandler + blueprint: Blueprint plugins.implements(plugins.IConfigurer) plugins.implements(plugins.IConfigurable, inherit=True) @@ -69,7 +72,7 @@ def update_config(self, config): toolkit.add_public_directory(config, "public") toolkit.add_ckan_admin_tab( - config, "falkor_blueprint.admin_tab", "Falkor", icon="gavel") + config, "falkor_admin.admin_tab", "Falkor", icon="gavel") def configure(self, config): # TODO: Check if plugins has been initialised before tracking events @@ -102,11 +105,28 @@ def configure(self, config): ) self.event_handler = EventHandler(self.falkor) + self.blueprint = Blueprint(u'falkor_admin', __name__) + self.blueprint.add_url_rule( + "/ckan-admin/falkor", + view_func=self.admin_tab, + methods=["GET"] + ) + self.blueprint.add_url_rule( + "/ckan-admin/falkor/sync", + view_func=self.sync, + methods=["POST"] + ) def get_blueprint(self): - return falkor_blueprint + return self.blueprint + + def admin_tab(self): + return render( + "admin/base.html", + ) def sync(self): + # TODO: Verify user is sys admin session: sa.orm.Session = ckan_model.meta.create_local_session() job = new_falkor_sync_job() try: @@ -154,15 +174,19 @@ def sync(self): ) job.status = FalkorSyncJobStatus.FINISHED + toolkit.h.flash_success("Sync job started") except Exception as e: log.exception(e, extra={"job_id": job.id}) session.rollback() job.status = FalkorSyncJobStatus.FAILED + toolkit.h.flash_error("There was an error starting the sync job") finally: job.end = datetime.now() session.commit() session.close() + return toolkit.h.redirect_to(toolkit.h.url_for("falkor_admin.admin_tab")) + # IResourceController def before_show(self, resource_dict): diff --git a/ckanext/falkor/templates/admin/base.html b/ckanext/falkor/templates/admin/base.html index c58cded..c94597a 100644 --- a/ckanext/falkor/templates/admin/base.html +++ b/ckanext/falkor/templates/admin/base.html @@ -1 +1,14 @@ {% ckan_extends %} + +{% block primary_content_inner %} + {% import 'macros/form.html' as form %} +
    +
    + +
    +
    +{% endblock %} From f6edacfec7e7236f4e9d61c40c1aef1c72b05ac6 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 29 Oct 2024 14:24:28 +0000 Subject: [PATCH 078/156] UI for Falkor admin tab --- ckanext/falkor/model.py | 13 ++++ ckanext/falkor/plugin.py | 14 ++++- ckanext/falkor/templates/admin/base.html | 79 ++++++++++++++++++++++-- 3 files changed, 100 insertions(+), 6 deletions(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 357ba9e..7ea9af8 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -130,6 +130,12 @@ def get_package_create_event_for_resource( return package +def get_failed_events( + session: sa.orm.Session, +) -> List[FalkorEvent]: + return session.query(FalkorEvent).filter(FalkorEvent.status == FalkorEventStatus.FAILED).all() + + class FalkorSyncJobStatus(Enum): RUNNING = "running" FINISHED = "finished" @@ -194,3 +200,10 @@ def insert_new_falkor_sync_job(session: sa.orm.Session, job: FalkorSyncJob): FalkorSyncJob.is_latest == True ).update({FalkorSyncJob.is_latest: False}) session.add(job) + + +def get_sync_job_history(session: sa.orm.Session, limit: Optional[int] = None) -> List[FalkorSyncJob]: + query = session.query(FalkorSyncJob).order_by(FalkorSyncJob.start.desc()) + if limit is not None: + query = query.limit(limit) + return query.all() diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 9dcec97..2bfaff2 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -24,7 +24,9 @@ get_packages_without_create_events, get_resources_without_create_events, insert_new_falkor_sync_job, - get_dictized_entity + get_dictized_entity, + get_sync_job_history, + get_failed_events ) from ckanext.falkor.event_handler import ( EventHandler, @@ -121,8 +123,18 @@ def get_blueprint(self): return self.blueprint def admin_tab(self): + session: sa.orm.Session = ckan_model.meta.create_local_session() + recent_job_limit = 10 + sync_jobs = get_sync_job_history(session, recent_job_limit) + failed_events = get_failed_events(session) + session.close() return render( "admin/base.html", + extra_vars={ + "latest_job_run": sync_jobs[0].start, + "sync_jobs": sync_jobs, + "failed_events": failed_events + } ) def sync(self): diff --git a/ckanext/falkor/templates/admin/base.html b/ckanext/falkor/templates/admin/base.html index c94597a..d083158 100644 --- a/ckanext/falkor/templates/admin/base.html +++ b/ckanext/falkor/templates/admin/base.html @@ -1,14 +1,83 @@ {% ckan_extends %} {% block primary_content_inner %} - {% import 'macros/form.html' as form %} +{% import 'macros/form.html' as form %} +

    {{ _('Last sync job run: ' + h.render_datetime(latest_job_run, with_hours=True, with_seconds=True)) }}

    -
    + +

    {{ _('Most recent runs') }}

    + + + + + + + + + + + {% for sync_job in sync_jobs %} + + + + + + + {% endfor %} + +
    Job IDStatusStartedFinished
    {{ sync_job.id }}{{ sync_job.status.value }}{{ h.render_datetime(sync_job.start, with_hours=True, with_seconds=True) or _('Never') }}{{ h.render_datetime(sync_job.end, with_hours=True, with_seconds=True) or _('Never') }}
    +

    Failed events

    + + + + + + + + + + + + + {% for event in failed_events %} + + + + + + + + + {% endfor %} + +
    Event IDObject IDObject TypeEvent TypeUser IDCreated At
    {{ event.id }}{{ event.object_id }}{{ event.object_type.value }}{{ event.event_type.value }}{{ event.user_id }}{{ h.render_datetime(event.created_at, with_hours=True, with_seconds=True) }}
    +{% endblock %} + + +{% block secondary_content %} +
    +

    + + {{ _('Falkor Admin') }} +

    +
    + {% block admin_form_help %} + {% trans %} +

    This page can be used to:

    +
      +
    • Manually run sync jobs
    • +
    • View recent sync job runs
    • +
    • View failed events
    • +
    + {% endtrans %} + {% endblock %}
    - +
    {% endblock %} From 590fc25bfacf487f9ffc08dc5fa48246d4f2e7d4 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 29 Oct 2024 14:26:04 +0000 Subject: [PATCH 079/156] Add order by to failed events --- ckanext/falkor/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 7ea9af8..57bc987 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -133,7 +133,7 @@ def get_package_create_event_for_resource( def get_failed_events( session: sa.orm.Session, ) -> List[FalkorEvent]: - return session.query(FalkorEvent).filter(FalkorEvent.status == FalkorEventStatus.FAILED).all() + return session.query(FalkorEvent).filter(FalkorEvent.status == FalkorEventStatus.FAILED).order_by(FalkorEvent.created_at.desc()).all() class FalkorSyncJobStatus(Enum): From 548c992bc76628c2eab9f444622ac26e787d45eb Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 30 Oct 2024 16:13:19 +0000 Subject: [PATCH 080/156] Refactor metadata --- ckanext/falkor/client.py | 13 ++++--------- ckanext/falkor/event_handler.py | 14 ++++++++++---- ckanext/falkor/plugin.py | 22 +++++----------------- ckanext/falkor/templates/admin/base.html | 4 +++- 4 files changed, 22 insertions(+), 31 deletions(-) diff --git a/ckanext/falkor/client.py b/ckanext/falkor/client.py index 9825f4f..fc07d53 100644 --- a/ckanext/falkor/client.py +++ b/ckanext/falkor/client.py @@ -112,16 +112,15 @@ def document_get(self, package_id: str, resource_id: str): def document_create( self, - package_id: str, event: FalkorEvent, - organisation_id: str, + metadata: dict, ): url = ( self.__core_base_url + self.__tenant_id + "/dataset/" - + package_id + + metadata["package_id"] + "/create" ) payload = { @@ -130,13 +129,9 @@ def document_create( "id": str(event.id), "event_type": event.event_type, "user_id": event.user_id, - "created_at": str(event.created_at), + "created_at": event.created_at.strftime("%Y-%m-%dT%H:%M:%S.%fZ"), }]), - "documentMetadata": { - "organisation_id": organisation_id, - "package_id": package_id, - "resource_id": str(event.object_id), - }, + "documentMetadata": metadata, } falkor_post(url, payload, self.__auth).raise_for_status() diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index 227ec66..39b463b 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -14,7 +14,7 @@ ) from ckanext.falkor.client import Client -from ckan.model import meta, Resource +from ckan.model import meta from ckan.model.domain_object import DomainObjectOperation import ckan.plugins.toolkit as toolkit @@ -70,7 +70,7 @@ def handle(self, event: FalkorEvent, entity: dict): "id": str(event.id), "event_type": event.event_type, "user_id": event.user_id, - "created_at": str(event.created_at), + "created_at": event.created_at.strftime("%Y-%m-%dT%H:%M:%S.%fZ"), }) self.falkor.document_update( @@ -85,9 +85,15 @@ def handle(self, event: FalkorEvent, entity: dict): data_dict={"id": entity["package_id"]} ) self.falkor.document_create( - entity["package_id"], event, - package_info["organization"]["id"] + { + "org_id": package_info["organization"]["id"], + "org_name": package_info["organization"]["title"], + "package_id": entity["package_id"], + "package_name": package_info["name"], + "resource_id": str(event.object_id), + "resource_name": entity["name"] + } ) else: raise e diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 2bfaff2..50225c9 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -131,7 +131,7 @@ def admin_tab(self): return render( "admin/base.html", extra_vars={ - "latest_job_run": sync_jobs[0].start, + "latest_job_run": sync_jobs[0].start if len(sync_jobs) else None, "sync_jobs": sync_jobs, "failed_events": failed_events } @@ -210,11 +210,11 @@ def before_show(self, resource_dict): valid_url_pattern = re.compile( r'^.*?/dataset/[^/]+/resource/(?!new)[^/]+/?$') - log.debug( - f"URL: {request.url}\nMatched: {bool(valid_url_pattern.match(request.url))}") if not valid_url_pattern.match(request.url): return + log.debug(resource_dict) + event = FalkorEvent( object_id=resource_id, object_type=FalkorEventObjectType.RESOURCE, @@ -274,24 +274,12 @@ def notify( def construct_falkor_url(self, resource): resource_id = resource["id"] - resource_name = resource["name"] - package_id = resource["package_id"] - package_info = toolkit.get_action( "package_show")(data_dict={"id": package_id}) - package_name = package_info["name"] + org_id = package_info["organization"]["id"] - organisation_info = package_info["organization"] - organisation_name = organisation_info["title"] - - url = f"{self.audit_base_url}{package_id}/{resource_id}" - query = ( - f"?dataset_name={package_name}" - f"&org_name={organisation_name}" - f"&doc_name={resource_name}" - ) - return url + query + return f"{self.audit_base_url}{org_id}/{package_id}/{resource_id}" def get_helpers(self): return {"construct_falkor_url": self.construct_falkor_url} diff --git a/ckanext/falkor/templates/admin/base.html b/ckanext/falkor/templates/admin/base.html index d083158..d781bfe 100644 --- a/ckanext/falkor/templates/admin/base.html +++ b/ckanext/falkor/templates/admin/base.html @@ -2,7 +2,9 @@ {% block primary_content_inner %} {% import 'macros/form.html' as form %} -

    {{ _('Last sync job run: ' + h.render_datetime(latest_job_run, with_hours=True, with_seconds=True)) }}

    +{% if latest_job_run != None %} +

    {{ _('Last sync job run: ' + h.render_datetime(latest_job_run, with_hours=True, with_seconds=True)) }}

    +{% endif %}
    -

    {{ _('Most recent runs') }}

    - - - - - - - - - - - {% for sync_job in sync_jobs %} - - - - - - - {% endfor %} - -
    Job IDStatusStartedFinished
    {{ sync_job.id }}{{ sync_job.status.value }}{{ h.render_datetime(sync_job.start, with_hours=True, with_seconds=True) or _('Never') }}{{ h.render_datetime(sync_job.end, with_hours=True, with_seconds=True) or _('Never') }}

    Failed events

    @@ -45,6 +24,19 @@

    Failed events

    + @@ -56,6 +48,38 @@

    Failed events

    + + + {% endfor %} + +
    Event Type User ID Created At + {% if failed_events | length > 0 %} +
    + +
    + {% endif %} +
    {{ event.event_type.value }} {{ event.user_id }} {{ h.render_datetime(event.created_at, with_hours=True, with_seconds=True) }} +
    + +
    +
    +

    Most recent runs

    + + + + + + + + + + + {% for sync_job in sync_jobs %} + + + + + {% endfor %} From 49b3099b86fbaa30ef2273c24860893109139d2a Mon Sep 17 00:00:00 2001 From: wajones98 Date: Thu, 31 Oct 2024 17:03:51 +0000 Subject: [PATCH 085/156] Remove TODOs and only sync unique events --- ckanext/falkor/event_handler.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index 563a8ca..0ba8133 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -38,22 +38,16 @@ def handle(self, event: FalkorEvent, entity: dict): session.add(event) session.commit() try: - # TODO: Clean up nesting. if event.object_type == FalkorEventObjectType.PACKAGE: - # TODO: Is there a way to avoid setting PROCESSING in both branches? event.status = FalkorEventStatus.PROCESSING session.commit() self.falkor.dataset_create(entity["id"]) elif event.object_type == FalkorEventObjectType.RESOURCE: - # TODO: Would it be better to check Falkor for the existence of the dataset instead? - # Check Falkor for dataset, if not exists then fire create event if no create event already pending. package_create_event = get_package_create_event_for_resource( session, entity["package_id"]) - # TODO: Add retry here in case resource was created shortly after - # package and it is still processing. if package_create_event is None or package_create_event.status != FalkorEventStatus.SYNCED: return @@ -66,12 +60,22 @@ def handle(self, event: FalkorEvent, entity: dict): entity["id"] ) - document_events.append({ + document_event = { "id": str(event.id), "event_type": event.event_type, "user_id": event.user_id, "created_at": event.created_at.strftime("%Y-%m-%dT%H:%M:%S.%fZ"), - }) + } + + if document_event in document_events: + log.warning( + f"[Event ID: {event.id}] Already synced to Falkor") + event.status = FalkorEventStatus.SYNCED + event.synced_at = datetime.now() + session.commit() + return + + document_events.append(document_event) self.falkor.document_update( str(event.object_id), @@ -102,7 +106,8 @@ def handle(self, event: FalkorEvent, entity: dict): event.synced_at = datetime.now() session.commit() except Exception as e: - log.exception(f"[Event ID: {event.id}] {e}") + log.exception( + f"[Event ID: {event.id}] {e}") event.status = FalkorEventStatus.FAILED session.commit() finally: From 137c3d1f9145a57880264bba72a874ca407efebd Mon Sep 17 00:00:00 2001 From: wajones98 Date: Fri, 1 Nov 2024 14:03:19 +0000 Subject: [PATCH 086/156] Check resource id in request url --- ckanext/falkor/plugin.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 317f318..7ac4e16 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -265,13 +265,11 @@ def reprocess(self, event_id: str): def before_show(self, resource_dict): resource_id = resource_dict["id"] - # TODO: See whether we should expand on this idea as we are currently - # generating a lot of reads. For now use to reduce noise of READ events - # during development. + # This regex pattern will only match /dataset//resource/ valid_url_pattern = re.compile( r'^.*?/dataset/[^/]+/resource/(?!new)[^/]+/?$') - if not valid_url_pattern.match(request.url): + if not valid_url_pattern.match(request.url) or resource_id not in request.url: return log.debug(resource_dict) From e026453448ffd588c20d14bbfb159d147f1a3b94 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Fri, 1 Nov 2024 14:06:41 +0000 Subject: [PATCH 087/156] Add sysadmin check to reprocessing jobs --- ckanext/falkor/plugin.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 7ac4e16..a639873 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -154,12 +154,10 @@ def admin_tab(self): def sync(self): if toolkit.g.userobj is None: logging.warning("Sync attempted by unauthorised user") - toolkit.h.flash_error("There was an error starting the sync job") return toolkit.h.redirect_to(toolkit.h.url_for("falkor_admin.admin_tab")) elif not toolkit.g.userobj.sysadmin: logging.warning( f"Sync attempted by non sysadmin user {toolkit.g.userobj.id}") - toolkit.h.flash_error("There was an error starting the sync job") return toolkit.h.redirect_to(toolkit.h.url_for("falkor_admin.admin_tab")) session: sa.orm.Session = ckan_model.meta.create_local_session() @@ -224,6 +222,14 @@ def sync(self): return toolkit.h.redirect_to(toolkit.h.url_for("falkor_admin.admin_tab")) def reprocess_all(self): + if toolkit.g.userobj is None: + logging.warning("Batch reprocess attempted by unauthorised user") + return toolkit.h.redirect_to(toolkit.h.url_for("falkor_admin.admin_tab")) + elif not toolkit.g.userobj.sysadmin: + logging.warning( + f"Batch reprocess attempted by non sysadmin user {toolkit.g.userobj.id}") + return toolkit.h.redirect_to(toolkit.h.url_for("falkor_admin.admin_tab")) + log.debug("Reprocessing all failed events") session: sa.orm.Session = ckan_model.meta.create_local_session() failed_events = get_failed_events(session) @@ -244,6 +250,15 @@ def reprocess_all(self): return toolkit.h.redirect_to(toolkit.h.url_for("falkor_admin.admin_tab")) def reprocess(self, event_id: str): + if toolkit.g.userobj is None: + logging.warning( + f"Reprocessing of event {event_id} attempted by unauthorised user") + return toolkit.h.redirect_to(toolkit.h.url_for("falkor_admin.admin_tab")) + elif not toolkit.g.userobj.sysadmin: + logging.warning( + f"Reprocessing of event {event_id} attempted by non sysadmin user {toolkit.g.userobj.id}") + return toolkit.h.redirect_to(toolkit.h.url_for("falkor_admin.admin_tab")) + log.debug(f"Reprocessing {event_id}") session: sa.orm.Session = ckan_model.meta.create_local_session() event = get_event(session, event_id) From 3c15e25c66496fcc356326f97857fc6d1b51f40b Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 12 Nov 2024 12:06:15 +0000 Subject: [PATCH 088/156] Check sysadmin role for plugin endpoints --- ckanext/falkor/plugin.py | 38 +++++++++++++------------------------- 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index a639873..2b9955e 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -58,6 +58,15 @@ def get_user_id() -> str: return "guest" if not user else user.id +def check_access(): + context = {"model": ckan_model, + "user": toolkit.g.user, "auth_user_obj": toolkit.g.userobj} + try: + toolkit.check_access('sysadmin', context, {}) + except toolkit.NotAuthorized: + toolkit.abort(403, "Need to be system administrator to administer") + + class FalkorPlugin(plugins.SingletonPlugin): falkor: client.Client event_handler: EventHandler @@ -137,6 +146,7 @@ def get_blueprint(self): return self.blueprint def admin_tab(self): + check_access() session: sa.orm.Session = ckan_model.meta.create_local_session() recent_job_limit = 10 sync_jobs = get_sync_job_history(session, recent_job_limit) @@ -152,14 +162,7 @@ def admin_tab(self): ) def sync(self): - if toolkit.g.userobj is None: - logging.warning("Sync attempted by unauthorised user") - return toolkit.h.redirect_to(toolkit.h.url_for("falkor_admin.admin_tab")) - elif not toolkit.g.userobj.sysadmin: - logging.warning( - f"Sync attempted by non sysadmin user {toolkit.g.userobj.id}") - return toolkit.h.redirect_to(toolkit.h.url_for("falkor_admin.admin_tab")) - + check_access() session: sa.orm.Session = ckan_model.meta.create_local_session() job_id = uuid4() job = new_falkor_sync_job(job_id, start=datetime.now()) @@ -222,14 +225,7 @@ def sync(self): return toolkit.h.redirect_to(toolkit.h.url_for("falkor_admin.admin_tab")) def reprocess_all(self): - if toolkit.g.userobj is None: - logging.warning("Batch reprocess attempted by unauthorised user") - return toolkit.h.redirect_to(toolkit.h.url_for("falkor_admin.admin_tab")) - elif not toolkit.g.userobj.sysadmin: - logging.warning( - f"Batch reprocess attempted by non sysadmin user {toolkit.g.userobj.id}") - return toolkit.h.redirect_to(toolkit.h.url_for("falkor_admin.admin_tab")) - + check_access() log.debug("Reprocessing all failed events") session: sa.orm.Session = ckan_model.meta.create_local_session() failed_events = get_failed_events(session) @@ -250,15 +246,7 @@ def reprocess_all(self): return toolkit.h.redirect_to(toolkit.h.url_for("falkor_admin.admin_tab")) def reprocess(self, event_id: str): - if toolkit.g.userobj is None: - logging.warning( - f"Reprocessing of event {event_id} attempted by unauthorised user") - return toolkit.h.redirect_to(toolkit.h.url_for("falkor_admin.admin_tab")) - elif not toolkit.g.userobj.sysadmin: - logging.warning( - f"Reprocessing of event {event_id} attempted by non sysadmin user {toolkit.g.userobj.id}") - return toolkit.h.redirect_to(toolkit.h.url_for("falkor_admin.admin_tab")) - + check_access() log.debug(f"Reprocessing {event_id}") session: sa.orm.Session = ckan_model.meta.create_local_session() event = get_event(session, event_id) From 13c5e6358b074a576cced870f48b4c5fcf110055 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 15:47:40 +0000 Subject: [PATCH 089/156] Update FalkorEvent --- .../falkor/versions/376615bb5319_init.py | 24 ++++++++++++------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py b/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py index 9d8094c..ec93981 100644 --- a/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py +++ b/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py @@ -22,9 +22,9 @@ depends_on = None -class FalkorEventObjectType(Enum): - PACKAGE = 'package' - RESOURCE = 'resource' +class FalkorEventResourceType(Enum): + default = 'default' + stream = 'stream' class FalkorEventStatus(Enum): @@ -47,20 +47,26 @@ def upgrade(): meta.MetaData(), sa.Column( "id", - sa.dialects.postgresql.UUID(as_uuid=True), + sa.dialects.postgresql.UUID, primary_key=True, nullable=False, default=uuid.uuid4 ), - sa.Column("object_id", sa.dialects.postgresql.UUID( - as_uuid=True), nullable=False), + sa.Column("org_id", sa.dialects.postgresql.UUID, nullable=False), + sa.Column("org_name", sa.TEXT, nullable=False), + sa.Column("package_id", sa.dialects.postgresql.UUID, nullable=False), + sa.Column("package_name", sa.TEXT, nullable=False), + sa.Column("resource_id", sa.dialects.postgresql.UUID, nullable=False), + sa.Column("resource_name", sa.TEXT, nullable=False), sa.Column( - "object_type", - sa.Enum(FalkorEventObjectType), - nullable=False + "resource_type", + sa.Enum(FalkorEventResourceType), + nullable=False, + default=FalkorEventResourceType.default ), sa.Column("event_type", sa.Enum(FalkorEventType), nullable=False), sa.Column("user_id", sa.TEXT, nullable=False, default="guest"), + sa.Column("user_email", sa.TEXT, nullable=False, default="guest"), sa.Column( "status", sa.Enum(FalkorEventStatus), From c3d7a22838ce4fcada204630fc634088cf7290b9 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 15:48:06 +0000 Subject: [PATCH 090/156] Replace FalkorEventObjectType Replace the FalkorEventObjectType wiht the FalkorEventResourceType --- ckanext/falkor/model.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 51d9bd2..28c0d49 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -14,9 +14,9 @@ log = logging.getLogger(__name__) -class FalkorEventObjectType(str, Enum): - PACKAGE = 'package' - RESOURCE = 'resource' +class FalkorEventResourceType(Enum): + default = 'default' + stream = 'stream' class FalkorEventStatus(str, Enum): From ca5af33b308164f986b6a7245cb5ee683d74ab8f Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 15:52:54 +0000 Subject: [PATCH 091/156] Update FalkorEvent model and remove get_package_without_create_events func --- ckanext/falkor/model.py | 39 ++++++++++++--------------------------- 1 file changed, 12 insertions(+), 27 deletions(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 28c0d49..4571414 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -42,15 +42,20 @@ class FalkorEvent(Base): nullable=False, default=uuid4 ) - object_id = sa.Column(sa.dialects.postgresql.UUID( - as_uuid=True), nullable=False) - object_type = sa.Column(sa.Enum(FalkorEventObjectType), nullable=False) + org_id = sa.Column(sa.dialects.postgresql.UUID, nullable=False) + org_name = sa.Column(sa.TEXT, nullable=False) + package_id = sa.Column(sa.dialects.postgresql.UUID, nullable=False) + package_name = sa.Column(sa.TEXT, nullable=False) + resource_id = sa.Column(sa.dialects.postgresql.UUID, nullable=False) + resource_name = sa.Column(sa.TEXT, nullable=False) + resource_type = sa.Column( + sa.Enum(FalkorEventResourceType), + nullable=False, + default=FalkorEventResourceType.default + ) event_type = sa.Column(sa.Enum(FalkorEventType), nullable=False) user_id = sa.Column(sa.TEXT, nullable=False, default="guest") - status = sa.Column( - sa.Enum(FalkorEventStatus), - default=FalkorEventStatus.PENDING - ) + user_email = sa.Column(sa.TEXT, nullable=False, default="guest") created_at = sa.Column(sa.DateTime, nullable=False) synced_at = sa.Column(sa.DateTime, nullable=True) @@ -63,26 +68,6 @@ def get_event(session: sa.orm.Session, event_id: str) -> FalkorEvent: return session.query(FalkorEvent).get(event_id) -def get_packages_without_create_events(session: sa.orm.Session) -> List[Package]: - distinct_package_creates = session.query( - FalkorEvent - ).filter( - FalkorEvent.object_type == FalkorEventObjectType.PACKAGE - ).filter( - FalkorEvent.event_type == FalkorEventType.CREATE - ).subquery() - - return session.query( - Package - ).outerjoin( - distinct_package_creates, - Package.id == sa.cast( - distinct_package_creates.c.object_id, sa.TEXT) - ).filter( - sa.cast(distinct_package_creates.c.object_id, sa.TEXT) == None - ).all() - - def get_resources_without_create_events(session: sa.orm.Session) -> List[Resource]: distinct_resource_creates = session.query( FalkorEvent From 5556e18c8c3bb0adbe8b1bf950b3d4127f35f256 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 15:53:13 +0000 Subject: [PATCH 092/156] notify refactor Remove the pacakge create events and change created_at setting logic to use the FalkorEventType enum --- ckanext/falkor/plugin.py | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 2b9955e..3b239b7 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -17,12 +17,10 @@ from ckanext.falkor.model import ( FalkorEvent, FalkorEventType, - FalkorEventObjectType, FalkorSyncJobStatus, new_falkor_sync_job, get_event, get_pending_events, - get_packages_without_create_events, get_resources_without_create_events, insert_new_falkor_sync_job, get_dictized_entity, @@ -300,6 +298,9 @@ def notify( if operation is None: return + if not isinstance(entity, ckan_model.Resource): + return + event = FalkorEvent( object_id=entity.id, event_type=DomainObjectOperationToFalkorEventTypeMap[ @@ -308,26 +309,12 @@ def notify( user_id=get_user_id(), ) - if isinstance(entity, ckan_model.Package): - # Currently Falkor does not track changes to packages. - # We only use the create event to create the dataset - # and ignore any further changes. - if event.event_type != FalkorEventType.CREATE: - return - - event.object_type = FalkorEventObjectType.PACKAGE - event.created_at = entity.metadata_created - - elif isinstance(entity, ckan_model.Resource): - event.object_type = FalkorEventObjectType.RESOURCE - if operation == DomainObjectOperation.new: - event.created_at = entity.created - elif operation == DomainObjectOperation.update: - event.created_at = entity.last_modified - else: - event.created_at = datetime.now() + if event.event_type == FalkorEventType.CREATE: + event.created_at = entity.created + elif event.event_type == FalkorEventType.UPDATE: + event.created_at = entity.last_modified else: - return + event.created_at = datetime.now() jobs.enqueue( self.event_handler.handle, From 84fe5806405197826c2523002a17e0d22fc0b751 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 15:55:01 +0000 Subject: [PATCH 093/156] remove object_id and type from FalkorEvent in before_show --- ckanext/falkor/plugin.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 3b239b7..0f7198c 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -273,11 +273,8 @@ def before_show(self, resource_dict): if not valid_url_pattern.match(request.url) or resource_id not in request.url: return - log.debug(resource_dict) - event = FalkorEvent( - object_id=resource_id, - object_type=FalkorEventObjectType.RESOURCE, + resource_id=resource_id, event_type=FalkorEventType.READ, user_id=get_user_id(), created_at=datetime.now() From dfa394ee22bb80352bd32d0d4b2e9914e861fe10 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 18:01:36 +0000 Subject: [PATCH 094/156] Change instance check to elif --- ckanext/falkor/plugin.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 0f7198c..056f730 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -294,8 +294,7 @@ def notify( ): if operation is None: return - - if not isinstance(entity, ckan_model.Resource): + elif not isinstance(entity, ckan_model.Resource): return event = FalkorEvent( From a1ae957fe2e6f0f7368efbc1f8c5b1f2f29864fc Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 18:03:23 +0000 Subject: [PATCH 095/156] Remove create packages without create events for sync --- ckanext/falkor/plugin.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 056f730..00ff2a0 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -167,20 +167,6 @@ def sync(self): try: insert_new_falkor_sync_job(session, job) - packages = get_packages_without_create_events(session) - for package in packages: - event = FalkorEvent( - object_id=package.id, - object_type=FalkorEventObjectType.PACKAGE, - event_type=FalkorEventType.CREATE, - user_id="sync_job", - created_at=package.metadata_created - ) - jobs.enqueue( - self.event_handler.handle, - [event, table_dictize(package, CONTEXT)] - ) - resources = get_resources_without_create_events(session) for resource in resources: event = FalkorEvent( From ff53b8e94168f1b5201cc531247eddda4db0c808 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 18:07:06 +0000 Subject: [PATCH 096/156] Refactor client Add dataset_exists and document_exists functions Remove FalkorEvent object as parameter for document create --- ckanext/falkor/client.py | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/ckanext/falkor/client.py b/ckanext/falkor/client.py index fc07d53..152634b 100644 --- a/ckanext/falkor/client.py +++ b/ckanext/falkor/client.py @@ -95,6 +95,29 @@ def dataset_create(self, package_id: str): falkor_post(url, payload, self.__auth).raise_for_status() + def dataset_exists(self, package_id: str) -> bool: + url = self.__core_base_url + self.__tenant_id + "/dataset/" + package_id + "/info" + try: + falkor_get(url, self.__auth).raise_for_status() + return True + except HTTPError as e: + if e.response.status_code == 404: + return False + else: + raise e + + def document_exists(self, package_id: str, resource_id: str) -> bool: + url = self.__core_base_url + self.__tenant_id + \ + "/dataset/" + package_id + "/" + resource_id + "/info" + try: + falkor_get(url, self.__auth).raise_for_status() + return True + except HTTPError as e: + if e.response.status_code == 404: + return False + else: + raise e + def document_get(self, package_id: str, resource_id: str): url = ( self.__core_base_url @@ -112,7 +135,9 @@ def document_get(self, package_id: str, resource_id: str): def document_create( self, - event: FalkorEvent, + dataset_id: str, + document_id: str, + data: str, metadata: dict, ): @@ -120,17 +145,12 @@ def document_create( self.__core_base_url + self.__tenant_id + "/dataset/" - + metadata["package_id"] + + dataset_id + "/create" ) payload = { - "documentId": str(event.object_id), - "data": json.dumps([{ - "id": str(event.id), - "event_type": event.event_type, - "user_id": event.user_id, - "created_at": event.created_at.strftime("%Y-%m-%dT%H:%M:%S.%fZ"), - }]), + "documentId": document_id, + "data": data, "documentMetadata": metadata, } From d4e3affc1482c91cd31c3d11dd41983b035dec00 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 18:09:00 +0000 Subject: [PATCH 097/156] Import HttpError from requests --- ckanext/falkor/client.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ckanext/falkor/client.py b/ckanext/falkor/client.py index 152634b..8cce6c8 100644 --- a/ckanext/falkor/client.py +++ b/ckanext/falkor/client.py @@ -1,10 +1,9 @@ import requests import logging -import json from typing import TypedDict from ckanext.falkor import auth -from ckanext.falkor.model import FalkorEvent +from requests import HTTPError log = logging.getLogger(__name__) @@ -100,7 +99,7 @@ def dataset_exists(self, package_id: str) -> bool: try: falkor_get(url, self.__auth).raise_for_status() return True - except HTTPError as e: + except HttpError as e: if e.response.status_code == 404: return False else: From 78ad6f506a4f214278f249bdf5c3df9fc4fb647a Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 18:09:11 +0000 Subject: [PATCH 098/156] Remove old object type filter from get resources func --- ckanext/falkor/model.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 4571414..e3d66de 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -71,8 +71,6 @@ def get_event(session: sa.orm.Session, event_id: str) -> FalkorEvent: def get_resources_without_create_events(session: sa.orm.Session) -> List[Resource]: distinct_resource_creates = session.query( FalkorEvent - ).filter( - FalkorEvent.object_type == FalkorEventObjectType.RESOURCE ).filter( FalkorEvent.event_type == FalkorEventType.CREATE ).subquery() From ba905e79418c8174098572ee436e9ab6dbd4765c Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 18:10:43 +0000 Subject: [PATCH 099/156] get_dictized_entity -> get_dictized_resource --- ckanext/falkor/model.py | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index e3d66de..02428d5 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -86,20 +86,12 @@ def get_resources_without_create_events(session: sa.orm.Session) -> List[Resourc ).all() -def get_dictized_entity( +def get_dictized_resource( session: sa.orm.Session, - context: dict, id: str, - object_type: FalkorEventObjectType + context: dict, + id: str, ) -> dict: - ckan_model_type: Union[Package, Resource] - if object_type == FalkorEventObjectType.RESOURCE: - ckan_model_type = Resource - elif object_type == FalkorEventObjectType.PACKAGE: - ckan_model_type = Package - else: - raise Exception("Invalid object type for retrieving dictized object") - - return table_dictize(session.query(ckan_model_type).get(id), context) + return table_dictize(session.query(Resource).get(id), context) def get_package_create_event_for_resource( @@ -108,8 +100,6 @@ def get_package_create_event_for_resource( ) -> FalkorEvent: package = session.query(FalkorEvent).filter( FalkorEvent.object_id == package_id - ).filter( - FalkorEvent.object_type == FalkorEventObjectType.PACKAGE ).filter( FalkorEvent.event_type == FalkorEventType.CREATE ).first() From ffc03983bbec6d268722b646804651eb90d59664 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 18:12:57 +0000 Subject: [PATCH 100/156] Remove package import from model --- ckanext/falkor/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 02428d5..e996588 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -6,7 +6,7 @@ from datetime import datetime from typing import Optional, List, Union from sqlalchemy.ext.declarative import declarative_base -from ckan.model import meta, Package, Resource +from ckan.model import meta, Resource from ckan.lib.dictization import table_dictize Base = declarative_base(metadata=meta.metadata) From ab0107392211719cbe2b4667d319d804515a6ca9 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 18:14:29 +0000 Subject: [PATCH 101/156] New job queue name enum --- ckanext/falkor/model.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index e996588..7678e3c 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -184,3 +184,9 @@ def get_sync_job_history(session: sa.orm.Session, limit: Optional[int] = None) - if limit is not None: query = query.limit(limit) return query.all() + + +class JobQueueName(Enum): + REPROCESS = "reprocess" + SYNC = "sync" + EVENT = "event" From b69121eae9458ada41d14555354797dd014f7e18 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 18:15:20 +0000 Subject: [PATCH 102/156] Remove Union import from model --- ckanext/falkor/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 7678e3c..027568e 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -4,7 +4,7 @@ from enum import Enum from uuid import UUID, uuid4 from datetime import datetime -from typing import Optional, List, Union +from typing import Optional, List from sqlalchemy.ext.declarative import declarative_base from ckan.model import meta, Resource from ckan.lib.dictization import table_dictize From 9837be819ef352154035d1a9810a74e4df461f18 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 18:16:02 +0000 Subject: [PATCH 103/156] Remove FalkorEventObjectType import --- ckanext/falkor/event_handler.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index 0ba8133..14086b6 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -9,7 +9,6 @@ FalkorEvent, FalkorEventType, FalkorEventStatus, - FalkorEventObjectType, get_package_create_event_for_resource ) from ckanext.falkor.client import Client @@ -33,7 +32,7 @@ class EventHandler: def __init__(self, falkor: Client): self.falkor = falkor - def handle(self, event: FalkorEvent, entity: dict): + def handle_event(self, event: FalkorEvent): session: sa.orm.Session = meta.create_local_session() session.add(event) session.commit() From 3b245fdbf620c2d18d9052e015fc92090ab1fcf1 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 18:34:59 +0000 Subject: [PATCH 104/156] Capitalise names for FalkorEventResourceType Enum --- ckanext/falkor/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 027568e..0b656b4 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -15,8 +15,8 @@ class FalkorEventResourceType(Enum): - default = 'default' - stream = 'stream' + DEFAULT = 'default' + STREAM = 'stream' class FalkorEventStatus(str, Enum): From c468035491206a47b2f34469b0a8aec4bbf33756 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 18:35:11 +0000 Subject: [PATCH 105/156] refactor handle to handle_event --- ckanext/falkor/event_handler.py | 118 +++++++++++++++----------------- 1 file changed, 55 insertions(+), 63 deletions(-) diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index 14086b6..98d9264 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -2,20 +2,18 @@ import sqlalchemy as sa from datetime import datetime -from requests import HTTPError from typing import List from ckanext.falkor.model import ( FalkorEvent, FalkorEventType, FalkorEventStatus, - get_package_create_event_for_resource + FalkorEventResourceType, ) from ckanext.falkor.client import Client from ckan.model import meta from ckan.model.domain_object import DomainObjectOperation -import ckan.plugins.toolkit as toolkit log = logging.getLogger(__name__) @@ -37,69 +35,63 @@ def handle_event(self, event: FalkorEvent): session.add(event) session.commit() try: - if event.object_type == FalkorEventObjectType.PACKAGE: - event.status = FalkorEventStatus.PROCESSING - session.commit() - self.falkor.dataset_create(entity["id"]) - - elif event.object_type == FalkorEventObjectType.RESOURCE: - package_create_event = get_package_create_event_for_resource( - session, entity["package_id"]) - - if package_create_event is None or package_create_event.status != FalkorEventStatus.SYNCED: + event.status = FalkorEventStatus.PROCESSING + session.commit() + if not self.falkor.dataset_exists(event.package_id): + self.falkor.dataset_create(event.package_id) + + document_event = { + "id": str(event.id), + "event_type": event.event_type, + "user_id": event.user_id, + "user_email": event.user_email, + "created_at": event.created_at.strftime("%Y-%m-%dT%H:%M:%S.%fZ"), + } + + metadata = { + "org_id": event.org_id, + "org_name": event.org_name, + "package_id": event.package_id, + "package_name": event.package_name, + "resource_id": event.resource_id, + "resource_name": event.resource_name, + } + + if event.resource_type == FalkorEventResourceType.STREAM: + document_event["user_id"] = event.user_email + metadata["org_id"] = event.org_name + metadata["package_id"] = event.package_name + metadata["resource_name"] = event.resource_name + + if not self.falkor.document_exists(event.package_id, event.resource_id): + self.falkor.document_create( + event.package_id, + event.resource_id, + [document_event], + metadata + ) + else: + document_events: List[dict] = self.falkor.document_get( + event.package_id, + event.resource_id + ) + + if document_event in document_events: + log.warning( + f"[Event ID: {event.id}] Already synced to Falkor") + event.status = FalkorEventStatus.SYNCED + event.synced_at = datetime.now() + session.commit() return - event.status = FalkorEventStatus.PROCESSING - session.commit() - - try: - document_events: List[dict] = self.falkor.document_get( - entity["package_id"], - entity["id"] - ) - - document_event = { - "id": str(event.id), - "event_type": event.event_type, - "user_id": event.user_id, - "created_at": event.created_at.strftime("%Y-%m-%dT%H:%M:%S.%fZ"), - } - - if document_event in document_events: - log.warning( - f"[Event ID: {event.id}] Already synced to Falkor") - event.status = FalkorEventStatus.SYNCED - event.synced_at = datetime.now() - session.commit() - return - - document_events.append(document_event) - - self.falkor.document_update( - str(event.object_id), - entity["package_id"], - document_events - ) - except HTTPError as e: - if e.response.status_code == 404: - log.debug(entity) - package_info = toolkit.get_action("package_show")( - data_dict={"id": entity["package_id"]} - ) - self.falkor.document_create( - event, - { - "org_id": package_info["organization"]["id"], - "org_name": package_info["organization"]["title"], - "package_id": entity["package_id"], - "package_name": package_info["name"], - "resource_id": str(event.object_id), - "resource_name": entity["name"] - } - ) - else: - raise e + document_events.append(document_event) + + self.falkor.document_update( + str(event.resource_id), + event.package_id, + document_events + ) event.status = FalkorEventStatus.SYNCED event.synced_at = datetime.now() From 49122ff89832ee56a05b5e8dd1ebc7d57be49416 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 18:36:28 +0000 Subject: [PATCH 106/156] Remove package_create_event_for_resource --- ckanext/falkor/model.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 0b656b4..b8756c4 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -94,19 +94,6 @@ def get_dictized_resource( return table_dictize(session.query(Resource).get(id), context) -def get_package_create_event_for_resource( - session: sa.orm.Session, - package_id: UUID -) -> FalkorEvent: - package = session.query(FalkorEvent).filter( - FalkorEvent.object_id == package_id - ).filter( - FalkorEvent.event_type == FalkorEventType.CREATE - ).first() - - return package - - def get_failed_events( session: sa.orm.Session, ) -> List[FalkorEvent]: From 4399449173bfdeb4182d71ed0700fa0b07f34c7a Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 18:38:59 +0000 Subject: [PATCH 107/156] Move CONTEXT to model as TOOLKIT_CONTEXT --- ckanext/falkor/model.py | 10 ++++++++++ ckanext/falkor/plugin.py | 8 +------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index b8756c4..33fea24 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -14,6 +14,13 @@ log = logging.getLogger(__name__) +TOOLKIT_CONTEXT = { + "model": ckan_model, + "ignore_auth": True, + "defer_commit": True +} + + class FalkorEventResourceType(Enum): DEFAULT = 'default' STREAM = 'stream' @@ -68,6 +75,9 @@ def get_event(session: sa.orm.Session, event_id: str) -> FalkorEvent: return session.query(FalkorEvent).get(event_id) +def create_new_event(session: sa.orm.Session, entity: dict) -> FalkorEvent: + + def get_resources_without_create_events(session: sa.orm.Session) -> List[Resource]: distinct_resource_creates = session.query( FalkorEvent diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 00ff2a0..4765326 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -10,11 +10,11 @@ import ckan.plugins.toolkit as toolkit import ckan.model as ckan_model import ckan.lib.base as base -from ckan.model.domain_object import DomainObjectOperation from ckan.lib.dictization import table_dictize from ckanext.falkor import client, auth from ckanext.falkor.model import ( + TOOLKIT_CONTEXT, FalkorEvent, FalkorEventType, FalkorSyncJobStatus, @@ -35,12 +35,6 @@ render = base.render -CONTEXT = { - "model": ckan_model, - "ignore_auth": True, - "defer_commit": True -} - log = logging.getLogger(__name__) From c532bb0508dbab7fde23dec10c5acf55c4156891 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 18:39:37 +0000 Subject: [PATCH 108/156] Rename CONTEXT to TOOLKIT_CONTEXT in plugin --- ckanext/falkor/plugin.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 4765326..946a26a 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -172,14 +172,14 @@ def sync(self): ) jobs.enqueue( self.event_handler.handle, - [event, table_dictize(resource, CONTEXT)] + [event, table_dictize(resource, TOOLKIT_CONTEXT)] ) pending_events = get_pending_events(session) for event in pending_events: entity = get_dictized_entity( session, - CONTEXT, + TOOLKIT_CONTEXT, str(event.object_id), event.object_type ) @@ -212,7 +212,7 @@ def reprocess_all(self): for event in failed_events: entity = get_dictized_entity( session, - CONTEXT, + TOOLKIT_CONTEXT, str(event.object_id), event.object_type ) @@ -230,7 +230,7 @@ def reprocess(self, event_id: str): event = get_event(session, event_id) entity = get_dictized_entity( session, - CONTEXT, + TOOLKIT_CONTEXT, str(event.object_id), event.object_type ) @@ -294,7 +294,7 @@ def notify( jobs.enqueue( self.event_handler.handle, - args=[event, table_dictize(entity, CONTEXT)] + args=[event, table_dictize(entity, TOOLKIT_CONTEXT)] ) def construct_falkor_url(self, resource): From 70eaee34b9305f4a0236595e41519387ad20ce0f Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 18:53:26 +0000 Subject: [PATCH 109/156] Add create_new_event function --- ckanext/falkor/model.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 33fea24..1432cbc 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -1,5 +1,6 @@ import logging import sqlalchemy as sa +import ckan.plugins.toolkit as toolkit from enum import Enum from uuid import UUID, uuid4 @@ -75,7 +76,32 @@ def get_event(session: sa.orm.Session, event_id: str) -> FalkorEvent: return session.query(FalkorEvent).get(event_id) -def create_new_event(session: sa.orm.Session, entity: dict) -> FalkorEvent: +def create_new_event(event_type: FalkorEventType, resource: dict, user: dict) -> FalkorEvent: + package = toolkit.get_action( + "package_show")(context=TOOLKIT_CONTEXT, data_dict={"id": resource["package_id"]}) + org = package["organization"] + + event = FalkorEvent( + org_id=org["id"], + org_name=org["name"], + package_id=package["id"], + package_name=package["name"], + resource_id=resource["id"], + resource_name=resource["name"], + user_id=user["id"], + user_email=user["email"], + event_type=event_type, + ) + + if event.event_type == FalkorEventType.CREATE: + event.created_at = resource["created"] + elif event.event_type == FalkorEventType.UPDATE: + event.created_at = resource["last_modified"] + else: + event.created_at = datetime.now() + + if resource["resource_type"] == FalkorEventResourceType.STREAM: + event.resource_type = FalkorEventResourceType.STREAM def get_resources_without_create_events(session: sa.orm.Session) -> List[Resource]: From 5210845542c3635d30952adac6b6ace86e84cc4e Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 18:58:31 +0000 Subject: [PATCH 110/156] Simplify event handling --- ckanext/falkor/event_handler.py | 1 + ckanext/falkor/plugin.py | 51 +++++++++++++++++---------------- 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index 98d9264..00ab077 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -9,6 +9,7 @@ FalkorEventType, FalkorEventStatus, FalkorEventResourceType, + JobQueueName ) from ckanext.falkor.client import Client diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 946a26a..01d0344 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -15,10 +15,12 @@ from ckanext.falkor import client, auth from ckanext.falkor.model import ( TOOLKIT_CONTEXT, + JobQueueName, FalkorEvent, FalkorEventType, FalkorSyncJobStatus, new_falkor_sync_job, + create_new_event, get_event, get_pending_events, get_resources_without_create_events, @@ -45,9 +47,18 @@ def get_config_value(config, key: str) -> str: return value -def get_user_id() -> str: +def get_user() -> dict: user = toolkit.g.userobj - return "guest" if not user else user.id + if not user: + return { + "id": "guest", + "email": "guest" + } + + return { + "id": user.id, + "email": user.email + } def check_access(): @@ -253,16 +264,16 @@ def before_show(self, resource_dict): if not valid_url_pattern.match(request.url) or resource_id not in request.url: return - event = FalkorEvent( - resource_id=resource_id, - event_type=FalkorEventType.READ, - user_id=get_user_id(), - created_at=datetime.now() + event = create_new_event( + FalkorEventType.READ, + resource_dict, + get_user() ) jobs.enqueue( - self.event_handler.handle, - [event, resource_dict] + self.event_handler.handle_event, + args=[event], + queue=JobQueueName.EVENT ) self.get_helpers() @@ -277,24 +288,16 @@ def notify( elif not isinstance(entity, ckan_model.Resource): return - event = FalkorEvent( - object_id=entity.id, - event_type=DomainObjectOperationToFalkorEventTypeMap[ - operation - ], - user_id=get_user_id(), + event = create_new_event( + DomainObjectOperationToFalkorEventTypeMap[operation], + table_dictize(entity, TOOLKIT_CONTEXT), + get_user() ) - if event.event_type == FalkorEventType.CREATE: - event.created_at = entity.created - elif event.event_type == FalkorEventType.UPDATE: - event.created_at = entity.last_modified - else: - event.created_at = datetime.now() - jobs.enqueue( - self.event_handler.handle, - args=[event, table_dictize(entity, TOOLKIT_CONTEXT)] + self.event_handler.handle_event, + args=[event], + queue=JobQueueName.EVENT ) def construct_falkor_url(self, resource): From 0a5a3b558fa55f3c16cfa13d151462168d2cf188 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 19:12:35 +0000 Subject: [PATCH 111/156] Handle stream resource types --- ckanext/falkor/event_handler.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index 00ab077..d00acb8 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -36,11 +36,8 @@ def handle_event(self, event: FalkorEvent): session.add(event) session.commit() try: - event.status = FalkorEventStatus.PROCESSING session.commit() - if not self.falkor.dataset_exists(event.package_id): - self.falkor.dataset_create(event.package_id) document_event = { "id": str(event.id), @@ -59,23 +56,33 @@ def handle_event(self, event: FalkorEvent): "resource_name": event.resource_name, } + package_id = event.package_id + resource_id = event.resource_id + if event.resource_type == FalkorEventResourceType.STREAM: document_event["user_id"] = event.user_email + metadata["org_id"] = event.org_name metadata["package_id"] = event.package_name metadata["resource_name"] = event.resource_name - if not self.falkor.document_exists(event.package_id, event.resource_id): + package_id = event.package_name + resource_id = event.resource_name + + if not self.falkor.dataset_exists(package_id): + self.falkor.dataset_create(package_id) + + if not self.falkor.document_exists(package_id, resource_id): self.falkor.document_create( - event.package_id, - event.resource_id, + package_id, + resource_id, [document_event], metadata ) else: document_events: List[dict] = self.falkor.document_get( - event.package_id, - event.resource_id + package_id, + resource_id ) if document_event in document_events: @@ -89,8 +96,8 @@ def handle_event(self, event: FalkorEvent): document_events.append(document_event) self.falkor.document_update( - str(event.resource_id), - event.package_id, + resource_id, + package_id, document_events ) From 01a4230935c217407727a7a1a369349514807c67 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 19:13:54 +0000 Subject: [PATCH 112/156] Add ckan.model import --- ckanext/falkor/model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 1432cbc..8699590 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -1,6 +1,7 @@ import logging import sqlalchemy as sa import ckan.plugins.toolkit as toolkit +import ckan.model as ckan_model from enum import Enum from uuid import UUID, uuid4 From 8df4c8acd4eeb43fcff2e60ea8537d8eded4c417 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 19:16:27 +0000 Subject: [PATCH 113/156] Fix ResourceType enum for migration --- .../falkor/migration/falkor/versions/376615bb5319_init.py | 6 +++--- ckanext/falkor/model.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py b/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py index ec93981..5546acc 100644 --- a/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py +++ b/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py @@ -23,8 +23,8 @@ class FalkorEventResourceType(Enum): - default = 'default' - stream = 'stream' + DEFAULT = 'default' + STREAM = 'stream' class FalkorEventStatus(Enum): @@ -81,6 +81,6 @@ def downgrade(): op.drop_table( "falkor_event" ) - op.execute('DROP TYPE IF EXISTS falkoreventobjecttype;') + op.execute('DROP TYPE IF EXISTS falkoreventresourcetype;') op.execute('DROP TYPE IF EXISTS falkoreventtype;') op.execute('DROP TYPE IF EXISTS falkoreventstatus;') diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 8699590..5fb0c2b 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -60,7 +60,7 @@ class FalkorEvent(Base): resource_type = sa.Column( sa.Enum(FalkorEventResourceType), nullable=False, - default=FalkorEventResourceType.default + default=FalkorEventResourceType.DEFAULT ) event_type = sa.Column(sa.Enum(FalkorEventType), nullable=False) user_id = sa.Column(sa.TEXT, nullable=False, default="guest") From 5b6ee2d39c7d1e94db7d6f150a3de1b9c9dc5984 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 19:18:36 +0000 Subject: [PATCH 114/156] comment out funcs to refactor --- ckanext/falkor/plugin.py | 96 ++++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 49 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 01d0344..d08b206 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -23,9 +23,7 @@ create_new_event, get_event, get_pending_events, - get_resources_without_create_events, insert_new_falkor_sync_job, - get_dictized_entity, get_sync_job_history, get_failed_events ) @@ -172,32 +170,32 @@ def sync(self): try: insert_new_falkor_sync_job(session, job) - resources = get_resources_without_create_events(session) - for resource in resources: - event = FalkorEvent( - object_id=resource.id, - object_type=FalkorEventObjectType.RESOURCE, - event_type=FalkorEventType.CREATE, - user_id="sync_job", - created_at=resource.created - ) - jobs.enqueue( - self.event_handler.handle, - [event, table_dictize(resource, TOOLKIT_CONTEXT)] - ) - - pending_events = get_pending_events(session) - for event in pending_events: - entity = get_dictized_entity( - session, - TOOLKIT_CONTEXT, - str(event.object_id), - event.object_type - ) - jobs.enqueue( - self.event_handler.handle, - [event, entity] - ) + # resources = get_resources_without_create_events(session) + # for resource in resources: + # event = FalkorEvent( + # object_id=resource.id, + # object_type=FalkorEventObjectType.RESOURCE, + # event_type=FalkorEventType.CREATE, + # user_id="sync_job", + # created_at=resource.created + # ) + # jobs.enqueue( + # self.event_handler.handle, + # [event, table_dictize(resource, TOOLKIT_CONTEXT)] + # ) + # + # pending_events = get_pending_events(session) + # for event in pending_events: + # entity = get_dictized_entity( + # session, + # TOOLKIT_CONTEXT, + # str(event.object_id), + # event.object_type + # ) + # jobs.enqueue( + # self.event_handler.handle, + # [event, entity] + # ) job.status = FalkorSyncJobStatus.FINISHED toolkit.h.flash_success("Sync job started") @@ -220,17 +218,17 @@ def reprocess_all(self): failed_events = get_failed_events(session) session.close() - for event in failed_events: - entity = get_dictized_entity( - session, - TOOLKIT_CONTEXT, - str(event.object_id), - event.object_type - ) - jobs.enqueue( - self.event_handler.handle, - [event, entity] - ) + # for event in failed_events: + # entity = get_dictized_entity( + # session, + # TOOLKIT_CONTEXT, + # str(event.object_id), + # event.object_type + # ) + # jobs.enqueue( + # self.event_handler.handle, + # [event, entity] + # ) return toolkit.h.redirect_to(toolkit.h.url_for("falkor_admin.admin_tab")) @@ -239,17 +237,17 @@ def reprocess(self, event_id: str): log.debug(f"Reprocessing {event_id}") session: sa.orm.Session = ckan_model.meta.create_local_session() event = get_event(session, event_id) - entity = get_dictized_entity( - session, - TOOLKIT_CONTEXT, - str(event.object_id), - event.object_type - ) + # entity = get_dictized_entity( + # session, + # TOOLKIT_CONTEXT, + # str(event.object_id), + # event.object_type + # ) session.close() - jobs.enqueue( - self.event_handler.handle, - [event, entity] - ) + # jobs.enqueue( + # self.event_handler.handle, + # [event, entity] + # ) return toolkit.h.redirect_to(toolkit.h.url_for("falkor_admin.admin_tab")) # IResourceController From 71aa410304404c0a353f1c89d4cfe71a795ffeb4 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 19:19:40 +0000 Subject: [PATCH 115/156] Fix name for resourceType in migration --- ckanext/falkor/migration/falkor/versions/376615bb5319_init.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py b/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py index 5546acc..c794e06 100644 --- a/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py +++ b/ckanext/falkor/migration/falkor/versions/376615bb5319_init.py @@ -62,7 +62,7 @@ def upgrade(): "resource_type", sa.Enum(FalkorEventResourceType), nullable=False, - default=FalkorEventResourceType.default + default=FalkorEventResourceType.DEFAULT ), sa.Column("event_type", sa.Enum(FalkorEventType), nullable=False), sa.Column("user_id", sa.TEXT, nullable=False, default="guest"), From 256090f4fa2e6d7870167fb2cf5dbb2f93205238 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 19:34:00 +0000 Subject: [PATCH 116/156] QUery db directly to avoid recursive calls to "package_show" --- ckanext/falkor/model.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 5fb0c2b..9dc7d0d 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -8,7 +8,7 @@ from datetime import datetime from typing import Optional, List from sqlalchemy.ext.declarative import declarative_base -from ckan.model import meta, Resource +from ckan.model import meta, Resource, Package from ckan.lib.dictization import table_dictize Base = declarative_base(metadata=meta.metadata) @@ -78,9 +78,8 @@ def get_event(session: sa.orm.Session, event_id: str) -> FalkorEvent: def create_new_event(event_type: FalkorEventType, resource: dict, user: dict) -> FalkorEvent: - package = toolkit.get_action( - "package_show")(context=TOOLKIT_CONTEXT, data_dict={"id": resource["package_id"]}) - org = package["organization"] + package = get_dictized_package(resource["package_id"]) + org = package["org"] event = FalkorEvent( org_id=org["id"], @@ -131,6 +130,21 @@ def get_dictized_resource( return table_dictize(session.query(Resource).get(id), context) +def get_dictized_package( + id: str +) -> Package: + session = meta.create_local_session() + try: + return table_dictize(session.query(Package).get(id), TOOLKIT_CONTEXT) + finally: + session.close() + + +def get_dictized_org( + id: str +) -> + + def get_failed_events( session: sa.orm.Session, ) -> List[FalkorEvent]: From 9f5bc7d3463dbe143514e0f98cbe9ab961da0f69 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 19:58:38 +0000 Subject: [PATCH 117/156] Get package and org directly from db --- ckanext/falkor/model.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 9dc7d0d..317ad04 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -1,6 +1,5 @@ import logging import sqlalchemy as sa -import ckan.plugins.toolkit as toolkit import ckan.model as ckan_model from enum import Enum @@ -8,7 +7,7 @@ from datetime import datetime from typing import Optional, List from sqlalchemy.ext.declarative import declarative_base -from ckan.model import meta, Resource, Package +from ckan.model import meta, Resource, Package, Group from ckan.lib.dictization import table_dictize Base = declarative_base(metadata=meta.metadata) @@ -78,8 +77,10 @@ def get_event(session: sa.orm.Session, event_id: str) -> FalkorEvent: def create_new_event(event_type: FalkorEventType, resource: dict, user: dict) -> FalkorEvent: - package = get_dictized_package(resource["package_id"]) - org = package["org"] + package = table_dictize(Package.get( + resource["package_id"]), TOOLKIT_CONTEXT) + org = table_dictize(Group.get(package["owner_org"]), TOOLKIT_CONTEXT) + log.debug(org) event = FalkorEvent( org_id=org["id"], @@ -94,9 +95,9 @@ def create_new_event(event_type: FalkorEventType, resource: dict, user: dict) -> ) if event.event_type == FalkorEventType.CREATE: - event.created_at = resource["created"] + event.created_at = datetime.fromtimestamp(resource["created"]) elif event.event_type == FalkorEventType.UPDATE: - event.created_at = resource["last_modified"] + event.created_at = datetime.fromtimestamp(resource["last_modified"]) else: event.created_at = datetime.now() @@ -126,6 +127,8 @@ def get_dictized_resource( session: sa.orm.Session, context: dict, id: str, + + ) -> dict: return table_dictize(session.query(Resource).get(id), context) @@ -140,11 +143,6 @@ def get_dictized_package( session.close() -def get_dictized_org( - id: str -) -> - - def get_failed_events( session: sa.orm.Session, ) -> List[FalkorEvent]: From 2f892018c64893bc0367629440cb39bf07f057ab Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 19:58:51 +0000 Subject: [PATCH 118/156] Remove debug line for org --- ckanext/falkor/model.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 317ad04..2652e03 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -80,7 +80,6 @@ def create_new_event(event_type: FalkorEventType, resource: dict, user: dict) -> package = table_dictize(Package.get( resource["package_id"]), TOOLKIT_CONTEXT) org = table_dictize(Group.get(package["owner_org"]), TOOLKIT_CONTEXT) - log.debug(org) event = FalkorEvent( org_id=org["id"], From c9895cf1b07bc074242f9112716bb792f9a4b6d7 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 20:08:58 +0000 Subject: [PATCH 119/156] Fix typo for HTTPError exception --- ckanext/falkor/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/falkor/client.py b/ckanext/falkor/client.py index 8cce6c8..d674f84 100644 --- a/ckanext/falkor/client.py +++ b/ckanext/falkor/client.py @@ -99,7 +99,7 @@ def dataset_exists(self, package_id: str) -> bool: try: falkor_get(url, self.__auth).raise_for_status() return True - except HttpError as e: + except HTTPError as e: if e.response.status_code == 404: return False else: From e206f1c4efbed213c79e062d03e643e11424d197 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 20:09:11 +0000 Subject: [PATCH 120/156] Return result from create_new_event --- ckanext/falkor/model.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 2652e03..bad9067 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -103,6 +103,8 @@ def create_new_event(event_type: FalkorEventType, resource: dict, user: dict) -> if resource["resource_type"] == FalkorEventResourceType.STREAM: event.resource_type = FalkorEventResourceType.STREAM + return event + def get_resources_without_create_events(session: sa.orm.Session) -> List[Resource]: distinct_resource_creates = session.query( From e844f02816729708e48a63e8f0bd88d9a32c2653 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 20:11:28 +0000 Subject: [PATCH 121/156] Add missing status column to FalkorEvent --- ckanext/falkor/model.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index bad9067..fcfc05e 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -64,6 +64,10 @@ class FalkorEvent(Base): event_type = sa.Column(sa.Enum(FalkorEventType), nullable=False) user_id = sa.Column(sa.TEXT, nullable=False, default="guest") user_email = sa.Column(sa.TEXT, nullable=False, default="guest") + status = sa.Column( + sa.Enum(FalkorEventStatus), + default=FalkorEventStatus.PENDING + ) created_at = sa.Column(sa.DateTime, nullable=False) synced_at = sa.Column(sa.DateTime, nullable=True) From 0d08da7907b371c8c8a729c72268317e7bd46751 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 20:18:58 +0000 Subject: [PATCH 122/156] Remove commit before setting event status --- ckanext/falkor/event_handler.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index d00acb8..de1d989 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -34,7 +34,6 @@ def __init__(self, falkor: Client): def handle_event(self, event: FalkorEvent): session: sa.orm.Session = meta.create_local_session() session.add(event) - session.commit() try: event.status = FalkorEventStatus.PROCESSING session.commit() From f8370166eda24b143e6b4afb84f8ce3e1614af67 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 20:19:15 +0000 Subject: [PATCH 123/156] formatting --- ckanext/falkor/model.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index fcfc05e..285b42b 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -64,10 +64,7 @@ class FalkorEvent(Base): event_type = sa.Column(sa.Enum(FalkorEventType), nullable=False) user_id = sa.Column(sa.TEXT, nullable=False, default="guest") user_email = sa.Column(sa.TEXT, nullable=False, default="guest") - status = sa.Column( - sa.Enum(FalkorEventStatus), - default=FalkorEventStatus.PENDING - ) + status = sa.Column(sa.Enum(FalkorEventStatus), default=FalkorEventStatus.PENDING) created_at = sa.Column(sa.DateTime, nullable=False) synced_at = sa.Column(sa.DateTime, nullable=True) From 472773b64d86c612130bfaca7ba03433256853a5 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 20:19:21 +0000 Subject: [PATCH 124/156] Remove queue name for now --- ckanext/falkor/plugin.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index d08b206..d6acbd1 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -271,7 +271,6 @@ def before_show(self, resource_dict): jobs.enqueue( self.event_handler.handle_event, args=[event], - queue=JobQueueName.EVENT ) self.get_helpers() @@ -295,7 +294,6 @@ def notify( jobs.enqueue( self.event_handler.handle_event, args=[event], - queue=JobQueueName.EVENT ) def construct_falkor_url(self, resource): From 3db146a9edb4d30a4b8e37c9ef57eebbef4c12f7 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 20:19:32 +0000 Subject: [PATCH 125/156] formatting --- ckanext/falkor/model.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 285b42b..42e7386 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -64,7 +64,8 @@ class FalkorEvent(Base): event_type = sa.Column(sa.Enum(FalkorEventType), nullable=False) user_id = sa.Column(sa.TEXT, nullable=False, default="guest") user_email = sa.Column(sa.TEXT, nullable=False, default="guest") - status = sa.Column(sa.Enum(FalkorEventStatus), default=FalkorEventStatus.PENDING) + status = sa.Column(sa.Enum(FalkorEventStatus), + default=FalkorEventStatus.PENDING) created_at = sa.Column(sa.DateTime, nullable=False) synced_at = sa.Column(sa.DateTime, nullable=True) @@ -129,8 +130,6 @@ def get_dictized_resource( session: sa.orm.Session, context: dict, id: str, - - ) -> dict: return table_dictize(session.query(Resource).get(id), context) From 3a73989e640f79ae6b703b5643f985cabf0882b6 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 20:22:51 +0000 Subject: [PATCH 126/156] convert document events to json --- ckanext/falkor/event_handler.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index de1d989..e48165e 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -1,4 +1,5 @@ import logging +import json import sqlalchemy as sa from datetime import datetime @@ -75,7 +76,7 @@ def handle_event(self, event: FalkorEvent): self.falkor.document_create( package_id, resource_id, - [document_event], + json.dumps([document_event]), metadata ) else: @@ -97,7 +98,7 @@ def handle_event(self, event: FalkorEvent): self.falkor.document_update( resource_id, package_id, - document_events + json.dumps(document_events) ) event.status = FalkorEventStatus.SYNCED From 115c306c60058ab71936a1125ae61a245d7370b0 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 20:28:38 +0000 Subject: [PATCH 127/156] parse datetime from string --- ckanext/falkor/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 42e7386..1fda293 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -96,9 +96,9 @@ def create_new_event(event_type: FalkorEventType, resource: dict, user: dict) -> ) if event.event_type == FalkorEventType.CREATE: - event.created_at = datetime.fromtimestamp(resource["created"]) + event.created_at = datetime.fromisoformat(resource["created"]) elif event.event_type == FalkorEventType.UPDATE: - event.created_at = datetime.fromtimestamp(resource["last_modified"]) + event.created_at = datetime.fromisoformat(resource["last_modified"]) else: event.created_at = datetime.now() From 8361a0c861c36997f0778b6efe1c2c34c6c10366 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Tue, 19 Nov 2024 20:45:02 +0000 Subject: [PATCH 128/156] Fix issues with parsing document body json --- ckanext/falkor/client.py | 3 ++- ckanext/falkor/event_handler.py | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/ckanext/falkor/client.py b/ckanext/falkor/client.py index d674f84..7a9e5bf 100644 --- a/ckanext/falkor/client.py +++ b/ckanext/falkor/client.py @@ -1,5 +1,6 @@ import requests import logging +import json from typing import TypedDict from ckanext.falkor import auth @@ -149,7 +150,7 @@ def document_create( ) payload = { "documentId": document_id, - "data": data, + "data": json.dumps(data), "documentMetadata": metadata, } diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index e48165e..84254eb 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -44,7 +44,7 @@ def handle_event(self, event: FalkorEvent): "event_type": event.event_type, "user_id": event.user_id, "user_email": event.user_email, - "created_at": event.created_at.strftime("%Y-%m-%dT%H:%M:%S.%fZ"), + "created_at": event.created_at.strftime("%Y-%m-%dT%H:%M:%S.%fZ") } metadata = { @@ -76,7 +76,7 @@ def handle_event(self, event: FalkorEvent): self.falkor.document_create( package_id, resource_id, - json.dumps([document_event]), + [document_event], metadata ) else: @@ -98,7 +98,7 @@ def handle_event(self, event: FalkorEvent): self.falkor.document_update( resource_id, package_id, - json.dumps(document_events) + document_events ) event.status = FalkorEventStatus.SYNCED From 9cae957bc6007e5629d987161318ba11b6b7971d Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 20 Nov 2024 09:48:16 +0000 Subject: [PATCH 129/156] Just use datetime.now for updated event --- ckanext/falkor/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 1fda293..e4c974c 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -95,10 +95,10 @@ def create_new_event(event_type: FalkorEventType, resource: dict, user: dict) -> event_type=event_type, ) + log.debug("LAST MODIFIED " + str(resource["last_modified"])) + if event.event_type == FalkorEventType.CREATE: event.created_at = datetime.fromisoformat(resource["created"]) - elif event.event_type == FalkorEventType.UPDATE: - event.created_at = datetime.fromisoformat(resource["last_modified"]) else: event.created_at = datetime.now() From 2428fe092165a942da64914995f68c20040f2061 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 20 Nov 2024 09:48:31 +0000 Subject: [PATCH 130/156] Update admin template with new FalkorEvent model --- ckanext/falkor/templates/admin/base.html | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ckanext/falkor/templates/admin/base.html b/ckanext/falkor/templates/admin/base.html index a35d9c2..4c84b8c 100644 --- a/ckanext/falkor/templates/admin/base.html +++ b/ckanext/falkor/templates/admin/base.html @@ -19,8 +19,8 @@

    Failed events

    - - + + @@ -43,8 +43,8 @@

    Failed events

    {% for event in failed_events %} - - + + From f65988b1cd30b1d03a2d83c585e84ac49bf896ac Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 20 Nov 2024 09:56:43 +0000 Subject: [PATCH 131/156] Make reprocessing single event synchronous --- ckanext/falkor/event_handler.py | 1 + ckanext/falkor/plugin.py | 26 ++++++++++++-------------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index 84254eb..15407b6 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -109,5 +109,6 @@ def handle_event(self, event: FalkorEvent): f"[Event ID: {event.id}] {e}") event.status = FalkorEventStatus.FAILED session.commit() + raise e finally: session.close() diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index d6acbd1..0a4ec90 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -234,20 +234,18 @@ def reprocess_all(self): def reprocess(self, event_id: str): check_access() - log.debug(f"Reprocessing {event_id}") - session: sa.orm.Session = ckan_model.meta.create_local_session() - event = get_event(session, event_id) - # entity = get_dictized_entity( - # session, - # TOOLKIT_CONTEXT, - # str(event.object_id), - # event.object_type - # ) - session.close() - # jobs.enqueue( - # self.event_handler.handle, - # [event, entity] - # ) + try: + log.debug(f"Reprocessing {event_id}") + session: sa.orm.Session = ckan_model.meta.create_local_session() + event = get_event(session, event_id) + session.close() + self.event_handler.handle_event(event) + toolkit.h.flash_success(f"Event {event_id} reprocessed") + except Exception as e: + toolkit.h.flash_error( + f"Could not reprocess event {event_id}. Please check the logs") + log.exception(e) + return toolkit.h.redirect_to(toolkit.h.url_for("falkor_admin.admin_tab")) # IResourceController From f9da5293ea9d870b353f7232e0d0d5010210e87d Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 20 Nov 2024 10:10:52 +0000 Subject: [PATCH 132/156] Reprocess all button --- ckanext/falkor/plugin.py | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 0a4ec90..2042b0c 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -17,6 +17,7 @@ TOOLKIT_CONTEXT, JobQueueName, FalkorEvent, + FalkorEventStatus, FalkorEventType, FalkorSyncJobStatus, new_falkor_sync_job, @@ -213,38 +214,45 @@ def sync(self): def reprocess_all(self): check_access() - log.debug("Reprocessing all failed events") session: sa.orm.Session = ckan_model.meta.create_local_session() - failed_events = get_failed_events(session) - session.close() + try: + failed_events = get_failed_events(session) - # for event in failed_events: - # entity = get_dictized_entity( - # session, - # TOOLKIT_CONTEXT, - # str(event.object_id), - # event.object_type - # ) - # jobs.enqueue( - # self.event_handler.handle, - # [event, entity] - # ) + for event in failed_events: + session.add(event) + event.status = FalkorEventStatus.PENDING + jobs.enqueue( + self.event_handler.handle_event, + [event] + ) + + session.commit() + toolkit.h.flash_success( + f"Reprocessing {len(failed_events)} failed events") + except Exception as e: + session.rollback() + toolkit.h.flash_error( + "Something went wrong when trying to failed events") + log.exception(e) + finally: + session.close() return toolkit.h.redirect_to(toolkit.h.url_for("falkor_admin.admin_tab")) def reprocess(self, event_id: str): check_access() + session: sa.orm.Session = ckan_model.meta.create_local_session() try: log.debug(f"Reprocessing {event_id}") - session: sa.orm.Session = ckan_model.meta.create_local_session() event = get_event(session, event_id) - session.close() self.event_handler.handle_event(event) toolkit.h.flash_success(f"Event {event_id} reprocessed") except Exception as e: toolkit.h.flash_error( f"Could not reprocess event {event_id}. Please check the logs") log.exception(e) + finally: + session.close() return toolkit.h.redirect_to(toolkit.h.url_for("falkor_admin.admin_tab")) From b13132c8c7b6397de4bb7a5ca4d1fa7bcf7f2767 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 20 Nov 2024 10:29:40 +0000 Subject: [PATCH 133/156] Reintroduce sync job with latest changes --- ckanext/falkor/model.py | 6 ++--- ckanext/falkor/plugin.py | 48 ++++++++++++++++++---------------------- 2 files changed, 24 insertions(+), 30 deletions(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index e4c974c..ad1dc37 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -102,7 +102,7 @@ def create_new_event(event_type: FalkorEventType, resource: dict, user: dict) -> else: event.created_at = datetime.now() - if resource["resource_type"] == FalkorEventResourceType.STREAM: + if resource["resource_type"] == FalkorEventResourceType.STREAM.value: event.resource_type = FalkorEventResourceType.STREAM return event @@ -120,9 +120,9 @@ def get_resources_without_create_events(session: sa.orm.Session) -> List[Resourc ).outerjoin( distinct_resource_creates, Resource.id == sa.cast( - distinct_resource_creates.c.object_id, sa.TEXT) + distinct_resource_creates.c.resource_id, sa.TEXT) ).filter( - sa.cast(distinct_resource_creates.c.object_id, sa.TEXT) == None + sa.cast(distinct_resource_creates.c.resource_id, sa.TEXT) == None ).all() diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 2042b0c..e0bda1f 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -24,6 +24,7 @@ create_new_event, get_event, get_pending_events, + get_resources_without_create_events, insert_new_falkor_sync_job, get_sync_job_history, get_failed_events @@ -171,35 +172,28 @@ def sync(self): try: insert_new_falkor_sync_job(session, job) - # resources = get_resources_without_create_events(session) - # for resource in resources: - # event = FalkorEvent( - # object_id=resource.id, - # object_type=FalkorEventObjectType.RESOURCE, - # event_type=FalkorEventType.CREATE, - # user_id="sync_job", - # created_at=resource.created - # ) - # jobs.enqueue( - # self.event_handler.handle, - # [event, table_dictize(resource, TOOLKIT_CONTEXT)] - # ) - # - # pending_events = get_pending_events(session) - # for event in pending_events: - # entity = get_dictized_entity( - # session, - # TOOLKIT_CONTEXT, - # str(event.object_id), - # event.object_type - # ) - # jobs.enqueue( - # self.event_handler.handle, - # [event, entity] - # ) + resources = get_resources_without_create_events(session) + for resource in resources: + event = create_new_event( + FalkorEventType.CREATE, + table_dictize(resource, TOOLKIT_CONTEXT), + {"id": "sync_job", "email": "sync_job"} + ) + jobs.enqueue( + self.event_handler.handle_event, + [event] + ) + + pending_events = get_pending_events(session) + for event in pending_events: + jobs.enqueue( + self.event_handler.handle_event, + [event] + ) job.status = FalkorSyncJobStatus.FINISHED - toolkit.h.flash_success("Sync job started") + toolkit.h.flash_success( + f"Sync job started to process {len(resources) + len(pending_events)} pending events") except Exception as e: log.exception(f"[Job ID: {job_id}] {e}") session.rollback() From a83553565fb2eccfdc406c4a195eb61972f88780 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 20 Nov 2024 10:40:53 +0000 Subject: [PATCH 134/156] Add retries to falkor calls --- ckanext/falkor/client.py | 44 ++++++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/ckanext/falkor/client.py b/ckanext/falkor/client.py index 7a9e5bf..25e5c70 100644 --- a/ckanext/falkor/client.py +++ b/ckanext/falkor/client.py @@ -4,7 +4,8 @@ from typing import TypedDict from ckanext.falkor import auth -from requests import HTTPError +from requests import HTTPError, Session +from requests.adapters import HTTPAdapter, Retry log = logging.getLogger(__name__) @@ -22,42 +23,46 @@ def base_headers(access_token: str) -> HttpHeaders: def falkor_post( + session: Session, url: str, payload: dict, auth: auth.Auth, ) -> requests.Response: - response = requests.post(url, headers=base_headers( + response = session.post(url, headers=base_headers( auth.access_token), json=payload, timeout=120) log.debug(response.json()) return response def falkor_put( + session: Session, url: str, payload: dict, auth: auth.Auth, ) -> requests.Response: - response = requests.put(url, headers=base_headers( + response = session.put(url, headers=base_headers( auth.access_token), json=payload, timeout=120) log.debug(response.json()) return response def falkor_get( + session: Session, url: str, auth: auth.Auth, ) -> requests.Response: - response = requests.get(url, headers=base_headers( + response = session.get(url, headers=base_headers( auth.access_token), timeout=120) log.debug(response.json()) return response def falkor_delete( + session: Session, url: str, auth: auth.Auth, ) -> requests.Response: - response = requests.delete(url, headers=base_headers( + response = session.delete(url, headers=base_headers( auth.access_token), timeout=120) log.debug(response.json()) return response @@ -68,6 +73,7 @@ class Client: __core_base_url: str __admin_base_url: str __tenant_id: str + __http_session: Session def __init__( self, @@ -81,6 +87,17 @@ def __init__( self.__core_base_url = core_base_url self.__admin_base_url = admin_base_url + http_session = requests.Session() + retries = Retry(total=5, + backoff_factor=0.1, + status_forcelist=[500, 502, 503, 504]) + http_session.mount( + self.__core_base_url, HTTPAdapter(max_retries=retries)) + http_session.mount( + self.__admin_base_url, HTTPAdapter(max_retries=retries)) + + self.__http_session = http_session + def dataset_create(self, package_id: str): url = self.__admin_base_url + self.__tenant_id + "/dataset" payload = { @@ -93,12 +110,14 @@ def dataset_create(self, package_id: str): "tokensEnabled": "false", } - falkor_post(url, payload, self.__auth).raise_for_status() + falkor_post(self.__http_session, url, payload, + self.__auth).raise_for_status() def dataset_exists(self, package_id: str) -> bool: url = self.__core_base_url + self.__tenant_id + "/dataset/" + package_id + "/info" try: - falkor_get(url, self.__auth).raise_for_status() + falkor_get(self.__http_session, url, + self.__auth).raise_for_status() return True except HTTPError as e: if e.response.status_code == 404: @@ -110,7 +129,8 @@ def document_exists(self, package_id: str, resource_id: str) -> bool: url = self.__core_base_url + self.__tenant_id + \ "/dataset/" + package_id + "/" + resource_id + "/info" try: - falkor_get(url, self.__auth).raise_for_status() + falkor_get(self.__http_session, url, + self.__auth).raise_for_status() return True except HTTPError as e: if e.response.status_code == 404: @@ -129,7 +149,7 @@ def document_get(self, package_id: str, resource_id: str): + "/body" ) - resp = falkor_get(url, self.__auth) + resp = falkor_get(self.__http_session, url, self.__auth) resp.raise_for_status() return resp.json() @@ -154,7 +174,8 @@ def document_create( "documentMetadata": metadata, } - falkor_post(url, payload, self.__auth).raise_for_status() + falkor_post(self.__http_session, url, payload, + self.__auth).raise_for_status() def document_update( self, @@ -172,4 +193,5 @@ def document_update( + "/body" ) - falkor_put(url, data, self.__auth).raise_for_status() + falkor_put(self.__http_session, url, data, + self.__auth).raise_for_status() From 69a8b41f78c34747e1b6f2638e9ddc1b4fbfd03f Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 20 Nov 2024 10:53:51 +0000 Subject: [PATCH 135/156] ADd commit to add a "running" sync job --- ckanext/falkor/plugin.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index e0bda1f..165f061 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -171,6 +171,7 @@ def sync(self): job = new_falkor_sync_job(job_id, start=datetime.now()) try: insert_new_falkor_sync_job(session, job) + session.commit() resources = get_resources_without_create_events(session) for resource in resources: From 81bcd348c8e9b6e73e8a76a0e1c35d91282f8e45 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 20 Nov 2024 10:54:13 +0000 Subject: [PATCH 136/156] Remove unused imports --- ckanext/falkor/plugin.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 165f061..020d57b 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -15,8 +15,6 @@ from ckanext.falkor import client, auth from ckanext.falkor.model import ( TOOLKIT_CONTEXT, - JobQueueName, - FalkorEvent, FalkorEventStatus, FalkorEventType, FalkorSyncJobStatus, From 673f490dbe4fb7cc12d25865788c14598fa6bc39 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 20 Nov 2024 12:28:34 +0000 Subject: [PATCH 137/156] Remove job queue name --- ckanext/falkor/model.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index ad1dc37..6146560 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -221,9 +221,3 @@ def get_sync_job_history(session: sa.orm.Session, limit: Optional[int] = None) - if limit is not None: query = query.limit(limit) return query.all() - - -class JobQueueName(Enum): - REPROCESS = "reprocess" - SYNC = "sync" - EVENT = "event" From 7874cd5c838320431005fab9d61bbe72c1469fa8 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 20 Nov 2024 12:59:44 +0000 Subject: [PATCH 138/156] Remove unused code --- ckanext/falkor/event_handler.py | 2 -- ckanext/falkor/model.py | 10 ---------- 2 files changed, 12 deletions(-) diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index 15407b6..b24afe3 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -1,5 +1,4 @@ import logging -import json import sqlalchemy as sa from datetime import datetime @@ -10,7 +9,6 @@ FalkorEventType, FalkorEventStatus, FalkorEventResourceType, - JobQueueName ) from ckanext.falkor.client import Client diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 6146560..3f483a9 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -95,8 +95,6 @@ def create_new_event(event_type: FalkorEventType, resource: dict, user: dict) -> event_type=event_type, ) - log.debug("LAST MODIFIED " + str(resource["last_modified"])) - if event.event_type == FalkorEventType.CREATE: event.created_at = datetime.fromisoformat(resource["created"]) else: @@ -126,14 +124,6 @@ def get_resources_without_create_events(session: sa.orm.Session) -> List[Resourc ).all() -def get_dictized_resource( - session: sa.orm.Session, - context: dict, - id: str, -) -> dict: - return table_dictize(session.query(Resource).get(id), context) - - def get_dictized_package( id: str ) -> Package: From 06b9d964d3eece721dd678652fc02a8bafcfc40e Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 20 Nov 2024 13:02:03 +0000 Subject: [PATCH 139/156] Remove outdated TODO --- ckanext/falkor/plugin.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 020d57b..f80c8cd 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -89,7 +89,6 @@ def update_config(self, config): config, "falkor_admin.admin_tab", "Falkor", icon="gavel") def configure(self, config): - # TODO: Check if plugins has been initialised before tracking events self.config = config endpoint = get_config_value(config, "ckanext.falkor.auth.endpoint") client_id = get_config_value(config, "ckanext.falkor.auth.client_id") From 33deeeb542897570d6fadf0e279b87c5679084a8 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 20 Nov 2024 13:14:39 +0000 Subject: [PATCH 140/156] Remove org id from audit path --- ckanext/falkor/plugin.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index f80c8cd..2ae51bc 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -297,11 +297,8 @@ def notify( def construct_falkor_url(self, resource): resource_id = resource["id"] package_id = resource["package_id"] - package_info = toolkit.get_action( - "package_show")(data_dict={"id": package_id}) - org_id = package_info["organization"]["id"] - return f"{self.audit_base_url}{org_id}/{package_id}/{resource_id}" + return f"{self.audit_base_url}{package_id}/{resource_id}" def get_helpers(self): return {"construct_falkor_url": self.construct_falkor_url} From 3633e42edae454d4e7690806864d020125a41e59 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 20 Nov 2024 13:32:41 +0000 Subject: [PATCH 141/156] refactor audit url --- ckanext/falkor/plugin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 2ae51bc..fc6aaa4 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -298,7 +298,7 @@ def construct_falkor_url(self, resource): resource_id = resource["id"] package_id = resource["package_id"] - return f"{self.audit_base_url}{package_id}/{resource_id}" + return f"{self.audit_base_url}dataset/{package_id}/document/{resource_id}" def get_helpers(self): return {"construct_falkor_url": self.construct_falkor_url} From 83b815958ff802666b2945dc8811696168230da4 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 20 Nov 2024 13:34:19 +0000 Subject: [PATCH 142/156] Fix bug with setting resource_id for stream type and add resource type to metadata --- ckanext/falkor/event_handler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index b24afe3..d1f10b2 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -52,6 +52,7 @@ def handle_event(self, event: FalkorEvent): "package_name": event.package_name, "resource_id": event.resource_id, "resource_name": event.resource_name, + "resource_type": event.resource_type } package_id = event.package_id @@ -62,7 +63,7 @@ def handle_event(self, event: FalkorEvent): metadata["org_id"] = event.org_name metadata["package_id"] = event.package_name - metadata["resource_name"] = event.resource_name + metadata["resource_id"] = event.resource_name package_id = event.package_name resource_id = event.resource_name From 3df89af62ce3173107dd32ce2b62a77d45ad6a96 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 20 Nov 2024 13:44:42 +0000 Subject: [PATCH 143/156] get raw value from resource type for metadata --- ckanext/falkor/event_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index d1f10b2..499d996 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -52,7 +52,7 @@ def handle_event(self, event: FalkorEvent): "package_name": event.package_name, "resource_id": event.resource_id, "resource_name": event.resource_name, - "resource_type": event.resource_type + "resource_type": event.resource_type.value } package_id = event.package_id From 6ca51309e3e44bae25cfb6663096b2aa5d9098b8 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 20 Nov 2024 13:51:09 +0000 Subject: [PATCH 144/156] Handle session from within get_event func --- ckanext/falkor/model.py | 8 ++++++-- ckanext/falkor/plugin.py | 5 +---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 3f483a9..b23e81e 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -74,8 +74,12 @@ def get_pending_events(session: sa.orm.Session) -> List[FalkorEvent]: return session.query(FalkorEvent).filter(FalkorEvent.status == FalkorEventStatus.PENDING).all() -def get_event(session: sa.orm.Session, event_id: str) -> FalkorEvent: - return session.query(FalkorEvent).get(event_id) +def get_event(event_id: str) -> FalkorEvent: + session = meta.create_local_session() + try: + return session.query(FalkorEvent).get(event_id) + finally: + session.close() def create_new_event(event_type: FalkorEventType, resource: dict, user: dict) -> FalkorEvent: diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index fc6aaa4..0027e28 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -233,18 +233,15 @@ def reprocess_all(self): def reprocess(self, event_id: str): check_access() - session: sa.orm.Session = ckan_model.meta.create_local_session() try: log.debug(f"Reprocessing {event_id}") - event = get_event(session, event_id) + event = get_event(event_id) self.event_handler.handle_event(event) toolkit.h.flash_success(f"Event {event_id} reprocessed") except Exception as e: toolkit.h.flash_error( f"Could not reprocess event {event_id}. Please check the logs") log.exception(e) - finally: - session.close() return toolkit.h.redirect_to(toolkit.h.url_for("falkor_admin.admin_tab")) From e577bfe08081aab5b5f3980364937db1736e49d9 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 20 Nov 2024 14:27:35 +0000 Subject: [PATCH 145/156] Use get_events with filter input instead of separate functiosn for each event status --- ckanext/falkor/model.py | 23 +++++++++++++++++------ ckanext/falkor/plugin.py | 9 ++++----- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index b23e81e..b135b73 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -74,6 +74,23 @@ def get_pending_events(session: sa.orm.Session) -> List[FalkorEvent]: return session.query(FalkorEvent).filter(FalkorEvent.status == FalkorEventStatus.PENDING).all() +def get_failed_events( + session: sa.orm.Session, +) -> List[FalkorEvent]: + return session.query(FalkorEvent).filter(FalkorEvent.status == FalkorEventStatus.FAILED).order_by(FalkorEvent.created_at.desc()).all() + + +def get_events(status: Optional[FalkorEventStatus] = None) -> FalkorEvent: + session = meta.create_local_session() + try: + query = session.query(FalkorEvent).order_by(FalkorEvent.created_at.desc()) + if status is not None: + query = query.filter(FalkorEvent.status == status) + return query.all() + finally: + session.close() + + def get_event(event_id: str) -> FalkorEvent: session = meta.create_local_session() try: @@ -138,12 +155,6 @@ def get_dictized_package( session.close() -def get_failed_events( - session: sa.orm.Session, -) -> List[FalkorEvent]: - return session.query(FalkorEvent).filter(FalkorEvent.status == FalkorEventStatus.FAILED).order_by(FalkorEvent.created_at.desc()).all() - - class FalkorSyncJobStatus(Enum): RUNNING = "running" FINISHED = "finished" diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 0027e28..15cea32 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -20,12 +20,11 @@ FalkorSyncJobStatus, new_falkor_sync_job, create_new_event, + get_events, get_event, - get_pending_events, get_resources_without_create_events, insert_new_falkor_sync_job, get_sync_job_history, - get_failed_events ) from ckanext.falkor.event_handler import ( EventHandler, @@ -150,7 +149,7 @@ def admin_tab(self): session: sa.orm.Session = ckan_model.meta.create_local_session() recent_job_limit = 10 sync_jobs = get_sync_job_history(session, recent_job_limit) - failed_events = get_failed_events(session) + failed_events = get_events(FalkorEventStatus.FAILED) session.close() return render( "admin/base.html", @@ -182,7 +181,7 @@ def sync(self): [event] ) - pending_events = get_pending_events(session) + pending_events = get_events(FalkorEventStatus.PENDING) for event in pending_events: jobs.enqueue( self.event_handler.handle_event, @@ -208,7 +207,7 @@ def reprocess_all(self): check_access() session: sa.orm.Session = ckan_model.meta.create_local_session() try: - failed_events = get_failed_events(session) + failed_events = get_events(FalkorEventStatus.FAILED) for event in failed_events: session.add(event) From 2e99afb27cfa744cb398e677edd7a3d9eaaeaca5 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 20 Nov 2024 14:52:18 +0000 Subject: [PATCH 146/156] Add tabs for event statuses to falkor admin tab --- ckanext/falkor/plugin.py | 11 +++++++++-- ckanext/falkor/templates/admin/base.html | 16 +++++++++++----- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 15cea32..afed7cb 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -149,14 +149,21 @@ def admin_tab(self): session: sa.orm.Session = ckan_model.meta.create_local_session() recent_job_limit = 10 sync_jobs = get_sync_job_history(session, recent_job_limit) - failed_events = get_events(FalkorEventStatus.FAILED) + event_status = FalkorEventStatus.FAILED + + if "event_status" in request.args: + event_status = FalkorEventStatus[ + request.args["event_status"].upper() + ] + session.close() return render( "admin/base.html", extra_vars={ "latest_job_run": sync_jobs[0].start if len(sync_jobs) else None, "sync_jobs": sync_jobs, - "failed_events": failed_events + "events": get_events(event_status), + "event_status": event_status.value } ) diff --git a/ckanext/falkor/templates/admin/base.html b/ckanext/falkor/templates/admin/base.html index 4c84b8c..fec2cc1 100644 --- a/ckanext/falkor/templates/admin/base.html +++ b/ckanext/falkor/templates/admin/base.html @@ -14,7 +14,13 @@ {{ _('Sync') }} -

    Failed events

    +

    Events

    +
    Job IDStatusStartedFinished
    {{ sync_job.id }}{{ sync_job.status.value }}{{ h.render_datetime(sync_job.start, with_hours=True, with_seconds=True) or _('Never') }}{{ h.render_datetime(sync_job.end, with_hours=True, with_seconds=True) or _('Never') }}
    Event IDObject IDObject TypeResource IDResource Type Event Type User ID Created At
    {{ event.id }}{{ event.object_id }}{{ event.object_type.value }}{{ event.resource_id }}{{ event.resource_type.value }} {{ event.event_type.value }} {{ event.user_id }} {{ h.render_datetime(event.created_at, with_hours=True, with_seconds=True) }}
    @@ -25,12 +31,12 @@

    Failed events

    - {% for event in failed_events %} + {% for event in events %} @@ -53,7 +59,7 @@

    Failed events

    From c33457c1c196cb9a4c0e7a22211cec31fb7ea635 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 20 Nov 2024 15:10:24 +0000 Subject: [PATCH 147/156] Properly construct audit url based off of resource type --- ckanext/falkor/plugin.py | 9 +++++++-- ckanext/falkor/templates/package/resource_read.html | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index afed7cb..dfa8127 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -18,6 +18,7 @@ FalkorEventStatus, FalkorEventType, FalkorSyncJobStatus, + FalkorEventResourceType, new_falkor_sync_job, create_new_event, get_events, @@ -297,9 +298,13 @@ def notify( args=[event], ) - def construct_falkor_url(self, resource): + def construct_falkor_url(self, resource, package): resource_id = resource["id"] - package_id = resource["package_id"] + package_id = package["id"] + + if "resource_type" in resource and resource["resource_type"].lower() == FalkorEventResourceType.STREAM.value: + resource_id = resource["name"] + package_id = package["name"] return f"{self.audit_base_url}dataset/{package_id}/document/{resource_id}" diff --git a/ckanext/falkor/templates/package/resource_read.html b/ckanext/falkor/templates/package/resource_read.html index af41a65..46970f8 100644 --- a/ckanext/falkor/templates/package/resource_read.html +++ b/ckanext/falkor/templates/package/resource_read.html @@ -2,7 +2,7 @@ {% block resource_actions_inner %}
  • - {% set url = h.construct_falkor_url(res) %} + {% set url = h.construct_falkor_url(res, package) %} Audit
  • {{ super() }} From 79565d4ceb611c42fc7f0bd55c014fc813673b2b Mon Sep 17 00:00:00 2001 From: wajones98 Date: Wed, 20 Nov 2024 15:16:52 +0000 Subject: [PATCH 148/156] Check event_status query value --- ckanext/falkor/plugin.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index dfa8127..05639ef 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -153,9 +153,17 @@ def admin_tab(self): event_status = FalkorEventStatus.FAILED if "event_status" in request.args: - event_status = FalkorEventStatus[ - request.args["event_status"].upper() - ] + event_query = request.args["event_status"].upper() + try: + event_status = FalkorEventStatus[event_query] + except KeyError: + toolkit.h.flash_error( + f""" +Invalid event type: \"{event_query.lower()}\". +Must be one of \"pending\", \"processing\", \"synced\"or \"failed\". +Defaulting to failed. +""" + ) session.close() return render( From a164dccbaeb36a420d3b70724c2f87e44ea3aa64 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Thu, 21 Nov 2024 10:17:57 +0000 Subject: [PATCH 149/156] Remove debug log --- ckanext/falkor/auth.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/ckanext/falkor/auth.py b/ckanext/falkor/auth.py index 5befe3d..ba24f0e 100644 --- a/ckanext/falkor/auth.py +++ b/ckanext/falkor/auth.py @@ -60,12 +60,6 @@ def access_token(self) -> str: def __is_token_expired(self, token: Token) -> bool: expires_at = self.__timestamp + token.expires_in current_time = time.time() - log.debug( - "TOKEN EXPIRE INFO: Expires at: " - + str(expires_at) - + " - Current Time: " - + str(current_time) - ) return current_time >= expires_at def __login(self) -> None: From 9c10c9d8321153c43ff58270c65d9f31e6d23c72 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Thu, 21 Nov 2024 10:48:21 +0000 Subject: [PATCH 150/156] Rename parameters in falkor client methods --- ckanext/falkor/client.py | 26 +++++++++++++------------- ckanext/falkor/event_handler.py | 2 +- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/ckanext/falkor/client.py b/ckanext/falkor/client.py index 25e5c70..40029dd 100644 --- a/ckanext/falkor/client.py +++ b/ckanext/falkor/client.py @@ -98,10 +98,10 @@ def __init__( self.__http_session = http_session - def dataset_create(self, package_id: str): + def dataset_create(self, dataset_id: str): url = self.__admin_base_url + self.__tenant_id + "/dataset" payload = { - "datasetId": package_id, + "datasetId": dataset_id, "encryptionType": "none", "externalStorage": "false", "permissionEnabled": "false", @@ -113,8 +113,8 @@ def dataset_create(self, package_id: str): falkor_post(self.__http_session, url, payload, self.__auth).raise_for_status() - def dataset_exists(self, package_id: str) -> bool: - url = self.__core_base_url + self.__tenant_id + "/dataset/" + package_id + "/info" + def dataset_exists(self, dataset_id: str) -> bool: + url = self.__core_base_url + self.__tenant_id + "/dataset/" + dataset_id + "/info" try: falkor_get(self.__http_session, url, self.__auth).raise_for_status() @@ -125,9 +125,9 @@ def dataset_exists(self, package_id: str) -> bool: else: raise e - def document_exists(self, package_id: str, resource_id: str) -> bool: + def document_exists(self, dataset_id: str, document_id: str) -> bool: url = self.__core_base_url + self.__tenant_id + \ - "/dataset/" + package_id + "/" + resource_id + "/info" + "/dataset/" + dataset_id + "/" + document_id + "/info" try: falkor_get(self.__http_session, url, self.__auth).raise_for_status() @@ -138,14 +138,14 @@ def document_exists(self, package_id: str, resource_id: str) -> bool: else: raise e - def document_get(self, package_id: str, resource_id: str): + def document_get(self, dataset_id: str, document_id: str): url = ( self.__core_base_url + self.__tenant_id + "/dataset/" - + package_id + + dataset_id + "/" - + resource_id + + document_id + "/body" ) @@ -179,17 +179,17 @@ def document_create( def document_update( self, - resource_id: str, - package_id: str, + dataset_id: str, + document_id: str, data: str ): url = ( self.__core_base_url + self.__tenant_id + "/dataset/" - + package_id + + dataset_id + "/" - + resource_id + + document_id + "/body" ) diff --git a/ckanext/falkor/event_handler.py b/ckanext/falkor/event_handler.py index 499d996..c4c249e 100644 --- a/ckanext/falkor/event_handler.py +++ b/ckanext/falkor/event_handler.py @@ -95,8 +95,8 @@ def handle_event(self, event: FalkorEvent): document_events.append(document_event) self.falkor.document_update( - resource_id, package_id, + resource_id, document_events ) From f96f5843eedd3b899871042dbddf18c47ae7b902 Mon Sep 17 00:00:00 2001 From: wajones98 Date: Thu, 21 Nov 2024 11:28:47 +0000 Subject: [PATCH 151/156] Remove unused func --- ckanext/falkor/model.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index b135b73..4bcb9c0 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -83,7 +83,8 @@ def get_failed_events( def get_events(status: Optional[FalkorEventStatus] = None) -> FalkorEvent: session = meta.create_local_session() try: - query = session.query(FalkorEvent).order_by(FalkorEvent.created_at.desc()) + query = session.query(FalkorEvent).order_by( + FalkorEvent.created_at.desc()) if status is not None: query = query.filter(FalkorEvent.status == status) return query.all() @@ -145,16 +146,6 @@ def get_resources_without_create_events(session: sa.orm.Session) -> List[Resourc ).all() -def get_dictized_package( - id: str -) -> Package: - session = meta.create_local_session() - try: - return table_dictize(session.query(Package).get(id), TOOLKIT_CONTEXT) - finally: - session.close() - - class FalkorSyncJobStatus(Enum): RUNNING = "running" FINISHED = "finished" From a909ab80d0826b6058bbb8449193348d42d3b24f Mon Sep 17 00:00:00 2001 From: wajones98 Date: Thu, 21 Nov 2024 11:33:49 +0000 Subject: [PATCH 152/156] Check resource_type is not None --- ckanext/falkor/plugin.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 05639ef..79b7fa4 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -310,7 +310,9 @@ def construct_falkor_url(self, resource, package): resource_id = resource["id"] package_id = package["id"] - if "resource_type" in resource and resource["resource_type"].lower() == FalkorEventResourceType.STREAM.value: + if "resource_type" in resource \ + and resource["resource_type"] is not None \ + and resource["resource_type"].lower() == FalkorEventResourceType.STREAM.value: resource_id = resource["name"] package_id = package["name"] From 6de11ab2248bdc574a8d9baca221129aa13945bf Mon Sep 17 00:00:00 2001 From: wajones98 Date: Thu, 21 Nov 2024 11:41:04 +0000 Subject: [PATCH 153/156] Adjust comment --- ckanext/falkor/plugin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 79b7fa4..9b1e067 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -265,7 +265,7 @@ def reprocess(self, event_id: str): def before_show(self, resource_dict): resource_id = resource_dict["id"] - # This regex pattern will only match /dataset//resource/ + # This regex pattern will only match /dataset//resource/ valid_url_pattern = re.compile( r'^.*?/dataset/[^/]+/resource/(?!new)[^/]+/?$') From 2672d0c72986a03b2ae8644c39bd656d15d47c7e Mon Sep 17 00:00:00 2001 From: wajones98 Date: Thu, 21 Nov 2024 15:21:05 +0000 Subject: [PATCH 154/156] Add support for all event statuses to reprocess_all --- ckanext/falkor/model.py | 13 +++++++++ ckanext/falkor/plugin.py | 34 ++++++++++++------------ ckanext/falkor/templates/admin/base.html | 4 +-- 3 files changed, 32 insertions(+), 19 deletions(-) diff --git a/ckanext/falkor/model.py b/ckanext/falkor/model.py index 4bcb9c0..28503e2 100644 --- a/ckanext/falkor/model.py +++ b/ckanext/falkor/model.py @@ -33,6 +33,19 @@ class FalkorEventStatus(str, Enum): FAILED = 'failed' SYNCED = 'synced' + @classmethod + def from_str(cls, value: str): + try: + return cls[value.upper()] + except KeyError: + raise ValueError( + f""" +Invalid event type: \"{value}\". +Must be one of \"pending\", \"processing\", \"synced\"or \"failed\". +Defaulting to failed. +""" + ) + class FalkorEventType(str, Enum): CREATE = "create" diff --git a/ckanext/falkor/plugin.py b/ckanext/falkor/plugin.py index 9b1e067..2914ac4 100644 --- a/ckanext/falkor/plugin.py +++ b/ckanext/falkor/plugin.py @@ -131,13 +131,13 @@ def configure(self, config): ) self.blueprint.add_url_rule( - "/ckan-admin/falkor/reprocess", + "/ckan-admin/falkor/reprocess/", view_func=self.reprocess_all, methods=["POST"] ) self.blueprint.add_url_rule( - "/ckan-admin/falkor/reprocess/", + "/ckan-admin/falkor/event/reprocess/event/", view_func=self.reprocess, methods=["POST"] ) @@ -153,17 +153,11 @@ def admin_tab(self): event_status = FalkorEventStatus.FAILED if "event_status" in request.args: - event_query = request.args["event_status"].upper() try: - event_status = FalkorEventStatus[event_query] - except KeyError: - toolkit.h.flash_error( - f""" -Invalid event type: \"{event_query.lower()}\". -Must be one of \"pending\", \"processing\", \"synced\"or \"failed\". -Defaulting to failed. -""" - ) + event_status = FalkorEventStatus.from_str( + request.args["event_status"]) + except ValueError as e: + toolkit.h.flash_error(str(e)) session.close() return render( @@ -219,13 +213,15 @@ def sync(self): return toolkit.h.redirect_to(toolkit.h.url_for("falkor_admin.admin_tab")) - def reprocess_all(self): + def reprocess_all(self, event_status: str): check_access() session: sa.orm.Session = ckan_model.meta.create_local_session() try: - failed_events = get_events(FalkorEventStatus.FAILED) + event_status = FalkorEventStatus.from_str(event_status) + + events = get_events(event_status) - for event in failed_events: + for event in events: session.add(event) event.status = FalkorEventStatus.PENDING jobs.enqueue( @@ -235,11 +231,15 @@ def reprocess_all(self): session.commit() toolkit.h.flash_success( - f"Reprocessing {len(failed_events)} failed events") + f"Reprocessing {len(events)} events") + except ValueError as e: + session.rollback() + toolkit.h.flash_error(str(e)) + log.exception(e) except Exception as e: session.rollback() toolkit.h.flash_error( - "Something went wrong when trying to failed events") + "Something went wrong when trying to reprocess events") log.exception(e) finally: session.close() diff --git a/ckanext/falkor/templates/admin/base.html b/ckanext/falkor/templates/admin/base.html index fec2cc1..757959b 100644 --- a/ckanext/falkor/templates/admin/base.html +++ b/ckanext/falkor/templates/admin/base.html @@ -32,7 +32,7 @@

    Events

    User ID Created At - {% if failed_events | length > 0 %} + {% if events | length > 0 %}
    @@ -40,7 +46,7 @@

    Failed events

    {{ event.id }} {{ event.resource_id }} Created At {% if events | length > 0 %} - +