Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: FileBone(public=True) for public files #1241

Merged
merged 39 commits into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
42c8936
feat: added public flag to filebone and file entity. default: public …
Aug 15, 2024
b703f1b
fix: missing public flag
Aug 20, 2024
74dac89
feat: added servingurls for publicfiles
Aug 20, 2024
466728c
feat: added public bucket support.
Aug 20, 2024
740b3ea
chore: pep8
Aug 20, 2024
2678224
Merge branch 'main' into feat/public-files
akelch Aug 20, 2024
abc5110
Merge branch 'main' into feat/public-files
phorward Aug 20, 2024
0abb466
Merge branch 'develop' into feat/public-files
phorward Aug 27, 2024
8fc1475
Refactoring the new public file repo
phorward Aug 27, 2024
abd1ae7
Merge branch 'develop' into feat/public-files
phorward Aug 27, 2024
68281db
Cleaning up code for working order
phorward Aug 27, 2024
5c65dbd
Minor fixes
phorward Aug 27, 2024
dbc347c
Provide refactored File.serve function
phorward Aug 27, 2024
d8841b9
Ha ha ha
phorward Aug 27, 2024
67b09c7
clean code
phorward Aug 27, 2024
b8ba387
Consequent use of PUBLIC_DLKEY_POSTFIX
phorward Aug 28, 2024
14854d7
Fixed File.read() and some split calls.
phorward Aug 28, 2024
c8f8c21
fix: added public attribute to structure
Aug 28, 2024
610f59c
Always write Exception to log
phorward Aug 29, 2024
32bde87
Renamed PUBLIC_DLKEY_POSTFIX into PUBLIC_DLKEY_SUFFIX
phorward Aug 30, 2024
e19a204
Keep bucket lookups low
phorward Aug 30, 2024
eb4bc77
Fixed missing trailing commas
phorward Sep 2, 2024
6511311
Raise UnprocessableEntity on invalid format parameter
phorward Sep 2, 2024
9aadd06
Provide filename Content-Disposition with quotes
phorward Sep 2, 2024
6d835c0
Eliminated poor error handling with try...except
phorward Sep 2, 2024
9579523
Fixed pep-8 issues and broken suggestion
phorward Sep 2, 2024
d2d8cc5
Move serve-endpoint validiy dicts to File
phorward Sep 2, 2024
e539538
Merge remote-tracking branch 'origin/develop' into feat/public-files
Sep 4, 2024
74ae93e
fix: unlimit image sizes
Sep 4, 2024
935c03f
fix: serve now takes a host and a key parameter
Sep 4, 2024
599adfb
fix: added create_serve_parameters and create_serve_url function
Sep 4, 2024
eab49a1
chore: linting
Sep 4, 2024
716a9ec
Apply suggestions from code review
phorward Sep 18, 2024
ed7bca6
Merge branch 'develop' into feat/public-files
phorward Sep 20, 2024
f254a86
create_internal_serving_url() and Jinja wrapper
phorward Sep 20, 2024
f9d7166
fix: bucket object
Sep 20, 2024
7bffdee
Merge branch 'develop' into feat/public-files
phorward Sep 23, 2024
a94b922
Simplifying `create_internal_serving_url`
phorward Sep 23, 2024
fd1cb6b
Remove unused import for itertools
phorward Sep 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions src/viur/core/bones/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ class FileBone(TreeLeafBone):

kind = "file"
"""The kind of this bone is 'file'"""

type = "relational.tree.leaf.file"
"""The type of this bone is 'relational.tree.leaf.file'."""

Expand All @@ -137,7 +138,15 @@ def __init__(
derive: None | dict[str, t.Any] = None,
maxFileSize: None | int = None,
validMimeTypes: None | list[str] = None,
refKeys: t.Optional[t.Iterable[str]] = ("name", "mimetype", "size", "width", "height", "derived", "public"),
refKeys: t.Optional[t.Iterable[str]] = (
"name",
"mimetype",
"size",
"width",
"height",
"derived",
"public",
),
public: bool = False,
**kwargs
):
Expand Down Expand Up @@ -193,8 +202,10 @@ def isInvalid(self, value):
if self.maxFileSize:
if value["dest"]["size"] > self.maxFileSize:
return "File too large."

if value["dest"]["public"] != self.public:
return f"Only files which are marked as public: {self.public} are allowed"
return f"Only files marked public={self.public!r} are allowed."

return None

def postSavedHandler(self, skel, boneName, key):
Expand Down
112 changes: 67 additions & 45 deletions src/viur/core/modules/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,23 +38,14 @@

_CREDENTIALS, __PROJECT_ID = google.auth.default()
GOOGLE_STORAGE_CLIENT = storage.Client(__PROJECT_ID, _CREDENTIALS)
GOOGLE_STORAGE_BUCKET = GOOGLE_STORAGE_CLIENT.lookup_bucket(f"""{__PROJECT_ID}.appspot.com""")
PUBLIC_GOOGLE_STORAGE_BUCKET = GOOGLE_STORAGE_CLIENT.lookup_bucket(f"""public-dot-{__PROJECT_ID}""")
PUBLIC_DLKEY_POSTFIX = "_pub"

# FilePath is a descriptor for ViUR file components
FilePath = namedtuple("FilePath", ("dlkey", "is_derived", "filename"))


def get_current_bucket(dlkey: str) -> google.cloud.storage.bucket.Bucket:
if dlkey.endswith("_pub"):
if public_bucket := PUBLIC_GOOGLE_STORAGE_BUCKET:
return public_bucket
raise ValueError(f"""the bucket: public-dot-{__PROJECT_ID} does not exist! Please create it with ACL access.""")
return GOOGLE_STORAGE_BUCKET


def importBlobFromViur2(dlKey, fileName):
current_bucket = get_current_bucket(dlKey)
bucket = File.get_bucket(dlKey)

if not conf.viur2import_blobsource:
return False
Expand All @@ -80,19 +71,19 @@ def importBlobFromViur2(dlKey, fileName):
return False
importData = json.loads(importDataReq.read())
oldBlobName = conf.viur2import_blobsource["gsdir"] + "/" + importData["key"]
srcBlob = storage.Blob(bucket=current_bucket,
srcBlob = storage.Blob(bucket=bucket,
name=conf.viur2import_blobsource["gsdir"] + "/" + importData["key"])
else:
oldBlobName = conf.viur2import_blobsource["gsdir"] + "/" + dlKey
srcBlob = storage.Blob(bucket=current_bucket, name=conf.viur2import_blobsource["gsdir"] + "/" + dlKey)
srcBlob = storage.Blob(bucket=bucket, name=conf.viur2import_blobsource["gsdir"] + "/" + dlKey)
if not srcBlob.exists():
marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey))
marker["success"] = False
marker["error"] = "Local SRC-Blob missing"
marker["oldBlobName"] = oldBlobName
db.Put(marker)
return False
current_bucket.rename_blob(srcBlob, f"{dlKey}/source/{fileName}")
bucket.rename_blob(srcBlob, f"{dlKey}/source/{fileName}")
marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey))
marker["success"] = True
marker["old_src_key"] = dlKey
Expand All @@ -104,8 +95,8 @@ def importBlobFromViur2(dlKey, fileName):

def thumbnailer(fileSkel, existingFiles, params):
file_name = html.unescape(fileSkel["name"])
current_bucket = get_current_bucket(fileSkel["dlkey"])
blob = current_bucket.get_blob(f"""{fileSkel["dlkey"]}/source/{file_name}""")
bucket = File.get_bucket(fileSkel["dlkey"])
blob = bucket.get_blob(f"""{fileSkel["dlkey"]}/source/{file_name}""")
if not blob:
logging.warning(f"""Blob {fileSkel["dlkey"]}/source/{file_name} is missing from cloud storage!""")
return
Expand Down Expand Up @@ -151,7 +142,7 @@ def thumbnailer(fileSkel, existingFiles, params):
img.save(outData, fileExtension)
outSize = outData.tell()
outData.seek(0)
targetBlob = current_bucket.blob(f"""{fileSkel["dlkey"]}/derived/{targetName}""")
targetBlob = bucket.blob(f"""{fileSkel["dlkey"]}/derived/{targetName}""")
targetBlob.upload_from_file(outData, content_type=mimeType)
resList.append((targetName, outSize, mimeType, {"mimetype": mimeType, "width": width, "height": height}))
return resList
Expand Down Expand Up @@ -186,14 +177,14 @@ def cloudfunction_thumbnailer(fileSkel, existingFiles, params):
if not conf.file_thumbnailer_url:
raise ValueError("conf.file_thumbnailer_url is not set")

current_bucket = get_current_bucket(fileSkel["dlkey"])
bucket = File.get_bucket(fileSkel["dlkey"])

def getsignedurl():
if conf.instance.is_dev_server:
signedUrl = File.create_download_url(fileSkel["dlkey"], fileSkel["name"])
else:
path = f"""{fileSkel["dlkey"]}/source/{file_name}"""
if not (blob := current_bucket.get_blob(path)):
if not (blob := bucket.get_blob(path)):
logging.warning(f"Blob {path} is missing from cloud storage!")
return None
authRequest = google.auth.transport.requests.Request()
Expand Down Expand Up @@ -258,7 +249,7 @@ def make_request():
uploadUrls = {}
for data in derivedData["values"]:
fileName = File.sanitize_filename(data["name"])
blob = current_bucket.blob(f"""{fileSkel["dlkey"]}/derived/{fileName}""")
blob = bucket.blob(f"""{fileSkel["dlkey"]}/derived/{fileName}""")
uploadUrls[fileSkel["dlkey"] + fileName] = blob.create_resumable_upload_session(timeout=60,
content_type=data["mimeType"])

Expand Down Expand Up @@ -451,6 +442,20 @@ class File(Tree):

# Helper functions currently resist here

@staticmethod
def get_bucket(dlkey: str) -> google.cloud.storage.bucket.Bucket:
"""
Retrieves a Google Cloud Storage bucket for the given dlkey.
"""

if dlkey and dlkey.endswith(PUBLIC_DLKEY_POSTFIX):
if public_bucket := GOOGLE_STORAGE_CLIENT.lookup_bucket(f"""public-dot-{__PROJECT_ID}"""):
return public_bucket

raise ValueError(f"""The bucket 'public-dot-{__PROJECT_ID}' does not exist! Please create it with ACL access.""")

return GOOGLE_STORAGE_CLIENT.lookup_bucket(f"""{__PROJECT_ID}.appspot.com""")

@staticmethod
def is_valid_filename(filename: str) -> bool:
"""
Expand Down Expand Up @@ -629,8 +634,15 @@ def create_src_set(

return ", ".join(src_set)

def write(self, filename: str, content: t.Any, mimetype: str = "text/plain", width: int = None,
height: int = None, public: bool = False) -> db.Key:
def write(
self,
filename: str,
content: t.Any,
mimetype: str = "text/plain",
width: int = None,
height: int = None,
public: bool = False,
) -> db.Key:
"""
Write a file from any buffer into the file module.

Expand All @@ -648,10 +660,11 @@ def write(self, filename: str, content: t.Any, mimetype: str = "text/plain", wid
dl_key = utils.string.random()

if public:
dl_key += "_pub" # mark folder as public
current_bucket = get_current_bucket(dl_key)
dl_key += "_pub" # mark file as public

bucket = File.get_bucket(dl_key)

blob = current_bucket.blob(f"{dl_key}/source/{filename}")
blob = bucket.blob(f"{dl_key}/source/{filename}")
blob.upload_from_file(io.BytesIO(content), content_type=mimetype)

skel = self.addSkel("leaf")
Expand All @@ -668,7 +681,12 @@ def write(self, filename: str, content: t.Any, mimetype: str = "text/plain", wid

return skel.toDB()

def read(self, key: db.Key | int | str | None = None, path: str | None = None) -> tuple[io.BytesIO, str]:
def read(
self,
key: db.Key | int | str | None = None,
path: str | None = None,
public: bool = False,
) -> tuple[io.BytesIO, str]:
"""
Read a file from the Cloud Storage.

Expand All @@ -677,22 +695,26 @@ def read(self, key: db.Key | int | str | None = None, path: str | None = None) -

:param key: Key of the LeafSkel that contains the "dlkey" and the "name".
:param path: The path of the file in the Cloud Storage Bucket.
:param public: Defines whether the file path should be taken from private or public repo.

:return: Returns the file as a io.BytesIO buffer and the content-type
"""
current_bucket = GOOGLE_STORAGE_BUCKET
if not key and not path:
raise ValueError("Please provide a key or a path")

if key:
skel = self.viewSkel("leaf")
if not skel.fromDB(db.keyHelper(key, skel.kindName)):
if not path:
raise ValueError("This skeleton is not in the database!")
else:
path = f"""{skel["dlkey"]}/source/{skel["name"]}"""
current_bucket = get_current_bucket(skel["dlkey"])

blob = current_bucket.blob(path)
bucket = File.get_bucket(skel["dlkey"])
else:
bucket = File.get_bucket(PUBLIC_DLKEY_POSTFIX if public else "")

blob = bucket.blob(path)
return io.BytesIO(blob.download_as_bytes()), blob.content_type

@CallDeferred
Expand Down Expand Up @@ -783,7 +805,7 @@ def getUploadURL(
if public:
dlkey += "_pub" # mark folder as public

blob = get_current_bucket(dlkey).blob(f"{dlkey}/source/{filename}")
blob = File.get_bucket(dlkey).blob(f"{dlkey}/source/{filename}")
upload_url = blob.create_resumable_upload_session(content_type=mimeType, size=size, timeout=60)

# Create a corresponding file-lock object early, otherwise we would have to ensure that the file-lock object
Expand Down Expand Up @@ -846,7 +868,7 @@ def download(self, blobKey: str, fileName: str = "", download: bool = False, sig
dlPath, validUntil = base64.urlsafe_b64decode(blobKey).decode(
"UTF-8").split("\0")

current_bucket = get_current_bucket(dlPath.split("/")[0])
bucket = File.get_bucket(dlPath.split("/")[0])

if not sig:
# Check if the current user has the right to download *any* blob present in this application.
Expand All @@ -856,7 +878,7 @@ def download(self, blobKey: str, fileName: str = "", download: bool = False, sig
if "root" not in usr["access"] and "file-view" not in usr["access"]:
raise errors.Forbidden()
validUntil = "-1" # Prevent this from being cached down below
blob = current_bucket.get_blob(blobKey)
blob = bucket.get_blob(blobKey)

else:
# We got an request including a signature (probably a guest or a user without file-view access)
Expand All @@ -867,7 +889,7 @@ def download(self, blobKey: str, fileName: str = "", download: bool = False, sig
if validUntil != "0" and datetime.datetime.strptime(validUntil, "%Y%m%d%H%M") < datetime.datetime.now():
blob = None
else:
blob = current_bucket.get_blob(dlPath)
blob = bucket.get_blob(dlPath)

if not blob:
raise errors.Gone("The requested blob has expired.")
Expand Down Expand Up @@ -1028,9 +1050,9 @@ def add(self, skelType: SkelType, node: db.Key | int | str | None = None, *args,
session.markChanged()

# Now read the blob from the dlkey folder
current_bucket = get_current_bucket(skel["dlkey"])
bucket = File.get_bucket(skel["dlkey"])

blobs = list(current_bucket.list_blobs(prefix=f"""{skel["dlkey"]}/"""))
blobs = list(bucket.list_blobs(prefix=f"""{skel["dlkey"]}/"""))
if len(blobs) != 1:
logging.error("Invalid number of blobs in folder")
logging.error(targetKey)
Expand Down Expand Up @@ -1072,13 +1094,13 @@ def onEdit(self, skelType: SkelType, skel: SkeletonInstance):
old_path = f"{skel['dlkey']}/source/{html.unescape(old_skel['name'])}"
new_path = f"{skel['dlkey']}/source/{html.unescape(skel['name'])}"

current_bucket = get_current_bucket(skel['dlkey'])
bucket = File.get_bucket(skel['dlkey'])

if not (old_blob := current_bucket.get_blob(old_path)):
if not (old_blob := bucket.get_blob(old_path)):
raise errors.Gone()

current_bucket.copy_blob(old_blob, current_bucket, new_path, if_generation_match=0)
current_bucket.delete_blob(old_path)
bucket.copy_blob(old_blob, bucket, new_path, if_generation_match=0)
bucket.delete_blob(old_path)

self.create_serving_url(skel)

Expand Down Expand Up @@ -1112,11 +1134,11 @@ def create_serving_url(self, skel: SkeletonInstance) -> SkeletonInstance:
and skel["mimetype"].startswith("image/") and not skel["serving_url"]:

try:
current_bucket = get_current_bucket(skel['dlkey'])
bucket = File.get_bucket(skel['dlkey'])
skel["serving_url"] = images.get_serving_url(
None,
secure_url=True,
filename=f"/gs/{current_bucket.name}/{skel['dlkey']}/source/{skel['name']}",
filename=f"/gs/{bucket.name}/{skel['dlkey']}/source/{skel['name']}",
)
except Exception as e:
logging.warning("Error while creating serving url")
Expand Down Expand Up @@ -1197,8 +1219,8 @@ def doCleanupDeletedFiles(cursor=None):
else:
if file["itercount"] > maxIterCount:
logging.info(f"""Finally deleting, {file["dlkey"]}""")
current_bucket = get_current_bucket(file["dlkey"])
blobs = current_bucket.list_blobs(prefix=f"""{file["dlkey"]}/""")
bucket = File.get_bucket(file["dlkey"])
blobs = bucket.list_blobs(prefix=f"""{file["dlkey"]}/""")
for blob in blobs:
blob.delete()
db.Delete(file.key)
Expand All @@ -1207,9 +1229,9 @@ def doCleanupDeletedFiles(cursor=None):
f.delete()

if f["serving_url"]:
current_bucket = get_current_bucket(f["dlkey"])
bucket = File.get_bucket(f["dlkey"])
blob_key = blobstore.create_gs_key(
f"/gs/{current_bucket.name}/{f['dlkey']}/source/{f['name']}"
f"/gs/{bucket.name}/{f['dlkey']}/source/{f['name']}"
)
images.delete_serving_url(blob_key) # delete serving url
else:
Expand Down