Coverage for /home/runner/work/viur-core/viur-core/viur/src/viur/core/modules/file.py: 0%
790 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-13 11:04 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-13 11:04 +0000
1import base64
2import datetime
3import hashlib
4import hmac
5import html
6import io
7import json
8import logging
9import re
10import string
11import typing as t
12import warnings
13from collections import namedtuple
14from urllib.parse import quote as urlquote, urlencode
15from urllib.request import urlopen
17import PIL
18import PIL.ImageCms
19import google.auth
20import requests
21from PIL import Image
22from google.appengine.api import blobstore, images
23from google.cloud import storage
24from google.oauth2.service_account import Credentials as ServiceAccountCredentials
26from viur.core import conf, current, db, errors, utils, i18n
27from viur.core.bones import BaseBone, BooleanBone, JsonBone, KeyBone, NumericBone, StringBone
29from viur.core.decorators import *
30from viur.core.prototypes.tree import SkelType, Tree, TreeSkel
31from viur.core.skeleton import SkeletonInstance, skeletonByKind
32from viur.core.tasks import CallDeferred, DeleteEntitiesIter, PeriodicTask
34# Globals for connectivity
36VALID_FILENAME_REGEX = re.compile(
37 # || MAY NOT BE THE NAME | MADE OF SPECIAL CHARS | SPECIAL CHARS + `. `|`
38 r"^(?!^(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])$)[^\x00-\x1F<>:\"\/\\|?*]*[^\x00-\x1F<>:\"\/\\|?*. ]$",
39 re.IGNORECASE
40)
42_CREDENTIALS, _PROJECT_ID = google.auth.default()
43GOOGLE_STORAGE_CLIENT = storage.Client(_PROJECT_ID, _CREDENTIALS)
45PRIVATE_BUCKET_NAME = f"""{_PROJECT_ID}.appspot.com"""
46PUBLIC_BUCKET_NAME = f"""public-dot-{_PROJECT_ID}"""
47PUBLIC_DLKEY_SUFFIX = "_pub"
49_private_bucket = GOOGLE_STORAGE_CLIENT.lookup_bucket(PRIVATE_BUCKET_NAME)
50_public_bucket = None
52# FilePath is a descriptor for ViUR file components
53FilePath = namedtuple("FilePath", ("dlkey", "is_derived", "filename"))
56def importBlobFromViur2(dlKey, fileName):
57 bucket = conf.main_app.file.get_bucket(dlKey)
59 if not conf.viur2import_blobsource:
60 return False
61 existingImport = db.get(db.Key("viur-viur2-blobimport", dlKey))
62 if existingImport:
63 if existingImport["success"]:
64 return existingImport["dlurl"]
65 return False
66 if conf.viur2import_blobsource["infoURL"]:
67 try:
68 importDataReq = urlopen(conf.viur2import_blobsource["infoURL"] + dlKey)
69 except Exception as e:
70 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey))
71 marker["success"] = False
72 marker["error"] = "Failed URL-FETCH 1"
73 db.put(marker)
74 return False
75 if importDataReq.status != 200:
76 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey))
77 marker["success"] = False
78 marker["error"] = "Failed URL-FETCH 2"
79 db.put(marker)
80 return False
81 importData = json.loads(importDataReq.read())
82 oldBlobName = conf.viur2import_blobsource["gsdir"] + "/" + importData["key"]
83 srcBlob = storage.Blob(bucket=bucket,
84 name=conf.viur2import_blobsource["gsdir"] + "/" + importData["key"])
85 else:
86 oldBlobName = conf.viur2import_blobsource["gsdir"] + "/" + dlKey
87 srcBlob = storage.Blob(bucket=bucket, name=conf.viur2import_blobsource["gsdir"] + "/" + dlKey)
88 if not srcBlob.exists():
89 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey))
90 marker["success"] = False
91 marker["error"] = "Local SRC-Blob missing"
92 marker["oldBlobName"] = oldBlobName
93 db.put(marker)
94 return False
95 bucket.rename_blob(srcBlob, f"{dlKey}/source/{fileName}")
96 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey))
97 marker["success"] = True
98 marker["old_src_key"] = dlKey
99 marker["old_src_name"] = fileName
100 marker["dlurl"] = conf.main_app.file.create_download_url(dlKey, fileName, False, None)
101 db.put(marker)
102 return marker["dlurl"]
105def thumbnailer(fileSkel, existingFiles, params):
106 file_name = html.unescape(fileSkel["name"])
107 bucket = conf.main_app.file.get_bucket(fileSkel["dlkey"])
108 blob = bucket.get_blob(f"""{fileSkel["dlkey"]}/source/{file_name}""")
109 if not blob:
110 logging.warning(f"""Blob {fileSkel["dlkey"]}/source/{file_name} is missing from cloud storage!""")
111 return
112 fileData = io.BytesIO()
113 blob.download_to_file(fileData)
114 resList = []
115 for sizeDict in params:
116 fileData.seek(0)
117 outData = io.BytesIO()
118 try:
119 img = PIL.Image.open(fileData)
120 except PIL.Image.UnidentifiedImageError: # Can't load this image; so there's no need to try other resolutions
121 return []
122 iccProfile = img.info.get('icc_profile')
123 if iccProfile:
124 # JPEGs might be encoded with a non-standard color-profile; we need to compensate for this if we convert
125 # to WEBp as we'll loose this color-profile information
126 f = io.BytesIO(iccProfile)
127 src_profile = PIL.ImageCms.ImageCmsProfile(f)
128 dst_profile = PIL.ImageCms.createProfile('sRGB')
129 try:
130 img = PIL.ImageCms.profileToProfile(
131 img,
132 inputProfile=src_profile,
133 outputProfile=dst_profile,
134 outputMode="RGBA" if img.has_transparency_data else "RGB")
135 except Exception as e:
136 logging.exception(e)
137 continue
138 fileExtension = sizeDict.get("fileExtension", "webp")
139 if "width" in sizeDict and "height" in sizeDict:
140 width = sizeDict["width"]
141 height = sizeDict["height"]
142 targetName = f"thumbnail-{width}-{height}.{fileExtension}"
143 elif "width" in sizeDict:
144 width = sizeDict["width"]
145 height = int((float(img.size[1]) * float(width / float(img.size[0]))))
146 targetName = f"thumbnail-w{width}.{fileExtension}"
147 else: # No default fallback - ignore
148 continue
149 mimeType = sizeDict.get("mimeType", "image/webp")
150 img = img.resize((width, height), PIL.Image.LANCZOS)
151 img.save(outData, fileExtension)
152 outSize = outData.tell()
153 outData.seek(0)
154 targetBlob = bucket.blob(f"""{fileSkel["dlkey"]}/derived/{targetName}""")
155 targetBlob.upload_from_file(outData, content_type=mimeType)
156 resList.append((targetName, outSize, mimeType, {"mimetype": mimeType, "width": width, "height": height}))
157 return resList
160def cloudfunction_thumbnailer(fileSkel, existingFiles, params):
161 """External Thumbnailer for images.
163 The corresponding cloudfunction can be found here .
164 https://github.com/viur-framework/viur-cloudfunctions/tree/main/thumbnailer
166 You can use it like so:
167 main.py:
169 .. code-block:: python
171 from viur.core.modules.file import cloudfunction_thumbnailer
173 conf.file_thumbnailer_url = "https://xxxxx.cloudfunctions.net/imagerenderer"
174 conf.file_derivations = {"thumbnail": cloudfunction_thumbnailer}
176 conf.derives_pdf = {
177 "thumbnail": [{"width": 1920,"sites":"1,2"}]
178 }
180 skeletons/xxx.py:
181 .. code-block:: python
183 test = FileBone(derive=conf.derives_pdf)
184 """
186 if not conf.file_thumbnailer_url:
187 raise ValueError("conf.file_thumbnailer_url is not set")
189 bucket = conf.main_app.file.get_bucket(fileSkel["dlkey"])
191 def getsignedurl():
192 if conf.instance.is_dev_server:
193 signedUrl = conf.main_app.file.create_download_url(fileSkel["dlkey"], fileSkel["name"])
194 else:
195 path = f"""{fileSkel["dlkey"]}/source/{file_name}"""
196 if not (blob := bucket.get_blob(path)):
197 logging.warning(f"Blob {path} is missing from cloud storage!")
198 return None
199 authRequest = google.auth.transport.requests.Request()
200 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60)
201 signing_credentials = google.auth.compute_engine.IDTokenCredentials(authRequest, "")
202 content_disposition = utils.build_content_disposition_header(fileSkel["name"])
203 signedUrl = blob.generate_signed_url(
204 expiresAt,
205 credentials=signing_credentials,
206 response_disposition=content_disposition,
207 version="v4")
208 return signedUrl
210 def make_request():
211 headers = {"Content-Type": "application/json"}
212 data_str = base64.b64encode(json.dumps(dataDict).encode("UTF-8"))
213 sig = conf.main_app.file.hmac_sign(data_str)
214 datadump = json.dumps({"dataStr": data_str.decode('ASCII'), "sign": sig})
215 resp = requests.post(conf.file_thumbnailer_url, data=datadump, headers=headers, allow_redirects=False)
216 if resp.status_code != 200: # Error Handling
217 match resp.status_code:
218 case 302:
219 # The problem is Google resposen 302 to an auth Site when the cloudfunction was not found
220 # https://cloud.google.com/functions/docs/troubleshooting#login
221 logging.error("Cloudfunction not found")
222 case 404:
223 logging.error("Cloudfunction not found")
224 case 403:
225 logging.error("No permission for the Cloudfunction")
226 case _:
227 logging.error(
228 f"cloudfunction_thumbnailer failed with code: {resp.status_code} and data: {resp.content}")
229 return
231 try:
232 response_data = resp.json()
233 except Exception as e:
234 logging.error(f"response could not be converted in json failed with: {e=}")
235 return
236 if "error" in response_data:
237 logging.error(f"cloudfunction_thumbnailer failed with: {response_data.get('error')}")
238 return
240 return response_data
242 file_name = html.unescape(fileSkel["name"])
244 if not (url := getsignedurl()):
245 return
246 dataDict = {
247 "url": url,
248 "name": fileSkel["name"],
249 "params": params,
250 "minetype": fileSkel["mimetype"],
251 "baseUrl": current.request.get().request.host_url.lower(),
252 "targetKey": fileSkel["dlkey"],
253 "nameOnly": True
254 }
255 if not (derivedData := make_request()):
256 return
258 uploadUrls = {}
259 for data in derivedData["values"]:
260 fileName = conf.main_app.file.sanitize_filename(data["name"])
261 blob = bucket.blob(f"""{fileSkel["dlkey"]}/derived/{fileName}""")
262 uploadUrls[fileSkel["dlkey"] + fileName] = blob.create_resumable_upload_session(timeout=60,
263 content_type=data["mimeType"])
265 if not (url := getsignedurl()):
266 return
268 dataDict["url"] = url
269 dataDict["nameOnly"] = False
270 dataDict["uploadUrls"] = uploadUrls
272 if not (derivedData := make_request()):
273 return
274 reslist = []
275 try:
276 for derived in derivedData["values"]:
277 for key, value in derived.items():
278 reslist.append((key, value["size"], value["mimetype"], value["customData"]))
280 except Exception as e:
281 logging.error(f"cloudfunction_thumbnailer failed with: {e=}")
282 return reslist
285class DownloadUrlBone(BaseBone):
286 """
287 This bone is used to inject a freshly signed download url into a FileSkel.
288 """
290 def unserialize(self, skel, name):
291 if "dlkey" in skel.dbEntity and "name" in skel.dbEntity:
292 skel.accessedValues[name] = conf.main_app.file.create_download_url(
293 skel["dlkey"], skel["name"], expires=conf.render_json_download_url_expiration
294 )
295 return True
297 return False
300class FileLeafSkel(TreeSkel):
301 """
302 Default file leaf skeleton.
303 """
304 kindName = "file"
306 name = StringBone(
307 descr="Filename",
308 caseSensitive=False,
309 searchable=True,
310 vfunc=lambda val: None if File.is_valid_filename(val) else "Invalid filename provided",
311 )
313 alt = StringBone(
314 descr=i18n.translate(
315 "viur.core.image.alt",
316 defaultText="Alternative description",
317 ),
318 searchable=True,
319 languages=conf.i18n.available_languages,
320 )
322 size = NumericBone(
323 descr="Filesize in Bytes",
324 readOnly=True,
325 searchable=True,
326 )
328 dlkey = StringBone(
329 descr="Download-Key",
330 readOnly=True,
331 )
333 mimetype = StringBone(
334 descr="MIME-Type",
335 readOnly=True,
336 )
338 weak = BooleanBone(
339 descr="Weak reference",
340 readOnly=True,
341 visible=False,
342 )
344 pending = BooleanBone(
345 descr="Pending upload",
346 readOnly=True,
347 visible=False,
348 defaultValue=False,
349 )
351 width = NumericBone(
352 descr="Width",
353 readOnly=True,
354 searchable=True,
355 )
357 height = NumericBone(
358 descr="Height",
359 readOnly=True,
360 searchable=True,
361 )
363 downloadUrl = DownloadUrlBone(
364 descr="Download-URL",
365 readOnly=True,
366 visible=False,
367 )
369 derived = JsonBone(
370 descr="Derived Files",
371 readOnly=True,
372 visible=False,
373 )
375 pendingparententry = KeyBone(
376 descr="Pending key Reference",
377 readOnly=True,
378 visible=False,
379 )
381 crc32c_checksum = StringBone(
382 descr="CRC32C checksum",
383 readOnly=True,
384 )
386 md5_checksum = StringBone(
387 descr="MD5 checksum",
388 readOnly=True,
389 )
391 public = BooleanBone(
392 descr="Public File",
393 readOnly=True,
394 defaultValue=False,
395 )
397 serving_url = StringBone(
398 descr="Serving-URL",
399 readOnly=True,
400 params={
401 "tooltip": "The 'serving_url' is only available in public file repositories.",
402 }
403 )
405 @classmethod
406 def _inject_serving_url(cls, skel: SkeletonInstance) -> None:
407 """Inject the serving url for public image files into a FileSkel"""
408 if (
409 skel["public"]
410 and skel["mimetype"]
411 and skel["mimetype"].startswith("image/")
412 and not skel["serving_url"]
413 ):
414 bucket = File.get_bucket(skel["dlkey"])
415 filename = f"/gs/{bucket.name}/{skel['dlkey']}/source/{skel['name']}"
417 # Trying this on local development server will raise a
418 # `google.appengine.runtime.apiproxy_errors.RPCFailedError`
419 if conf.instance.is_dev_server:
420 logging.warning(f"Can't inject serving_url for {filename!r} on local development server")
421 return
423 try:
424 skel["serving_url"] = images.get_serving_url(None, secure_url=True, filename=filename)
426 except Exception as e:
427 logging.warning(f"Failed to create serving_url for {filename!r} with exception {e!r}")
428 logging.exception(e)
430 def preProcessBlobLocks(self, locks):
431 """
432 Ensure that our dlkey is locked even if we don't have a filebone here
433 """
434 if not self["weak"] and self["dlkey"]:
435 locks.add(self["dlkey"])
436 return locks
438 @classmethod
439 def refresh(cls, skel):
440 super().refresh(skel)
441 if conf.viur2import_blobsource:
442 importData = importBlobFromViur2(skel["dlkey"], skel["name"])
443 if importData:
444 if not skel["downloadUrl"]:
445 skel["downloadUrl"] = importData
446 skel["pendingparententry"] = None
448 cls._inject_serving_url(skel)
450 @classmethod
451 def write(cls, skel, **kwargs):
452 cls._inject_serving_url(skel)
453 return super().write(skel, **kwargs)
456class FileNodeSkel(TreeSkel):
457 """
458 Default file node skeleton.
459 """
460 kindName = "file_rootNode" # FIXME: VIUR4, don't use "_rootNode" kindname
462 name = StringBone(
463 descr="Name",
464 required=True,
465 searchable=True
466 )
468 rootNode = BooleanBone(
469 descr="Is RootNode",
470 defaultValue=False,
471 readOnly=True,
472 visible=False,
473 )
475 public = BooleanBone(
476 descr="Is public?",
477 defaultValue=False,
478 readOnly=True,
479 visible=False,
480 )
482 viurCurrentSeoKeys = None
485class File(Tree):
486 PENDING_POSTFIX = " (pending)"
487 DOWNLOAD_URL_PREFIX = "/file/download/"
488 INTERNAL_SERVING_URL_PREFIX = "/file/serve/"
489 MAX_FILENAME_LEN = 256
490 IMAGE_META_MAX_SIZE: t.Final[int] = 10 * 1024 ** 2
491 """Maximum size of image files that should be analysed in :meth:`set_image_meta`.
492 Default: 10 MiB"""
494 leafSkelCls = FileLeafSkel
495 nodeSkelCls = FileNodeSkel
497 handler = "tree.simple.file"
498 adminInfo = {
499 "icon": "folder-fill",
500 "handler": handler, # fixme: Use static handler; Remove with VIUR4!
501 }
503 roles = {
504 "*": "view",
505 "editor": ("add", "edit"),
506 "admin": "*",
507 }
509 default_order = "name"
511 # Helper functions currently resist here
513 @staticmethod
514 def get_bucket(dlkey: str) -> google.cloud.storage.bucket.Bucket:
515 """
516 Retrieves a Google Cloud Storage bucket for the given dlkey.
517 """
518 global _public_bucket
519 if dlkey and dlkey.endswith(PUBLIC_DLKEY_SUFFIX):
520 if _public_bucket or (_public_bucket := GOOGLE_STORAGE_CLIENT.lookup_bucket(PUBLIC_BUCKET_NAME)):
521 return _public_bucket
523 raise ValueError(
524 f"""The bucket '{PUBLIC_BUCKET_NAME}' does not exist! Please create it with ACL access."""
525 )
527 return _private_bucket
529 @classmethod
530 def is_valid_filename(cls, filename: str) -> bool:
531 """
532 Verifies a valid filename.
534 The filename should be valid on Linux, Mac OS and Windows.
535 It should not be longer than MAX_FILENAME_LEN chars.
537 Rule set: https://stackoverflow.com/a/31976060/3749896
538 Regex test: https://regex101.com/r/iBYpoC/1
539 """
540 if not filename.strip():
541 return False
543 if len(filename) > cls.MAX_FILENAME_LEN:
544 return False
546 return bool(re.match(VALID_FILENAME_REGEX, filename))
548 @staticmethod
549 def hmac_sign(data: t.Any) -> str:
550 assert conf.file_hmac_key is not None, "No hmac-key set!"
551 if not isinstance(data, bytes):
552 data = str(data).encode("UTF-8")
553 return hmac.new(conf.file_hmac_key, msg=data, digestmod=hashlib.sha3_384).hexdigest()
555 @classmethod
556 def hmac_verify(cls, data: t.Any, signature: str) -> bool:
557 return hmac.compare_digest(cls.hmac_sign(data.encode("ASCII")), signature)
559 @classmethod
560 def create_internal_serving_url(
561 cls,
562 serving_url: str,
563 size: int = 0,
564 filename: str = "",
565 options: str = "",
566 download: bool = False
567 ) -> str:
568 """
569 Helper function to generate an internal serving url (endpoint: /file/serve) from a Google serving url.
571 This is needed to hide requests to Google as they are internally be routed, and can be the result of a
572 legal requirement like GDPR.
574 :param serving_url: Is the original serving URL as generated from FileLeafSkel._inject_serving_url()
575 :param size: Optional size setting
576 :param filename: Optonal filename setting
577 :param options: Additional options parameter-pass through to /file/serve
578 :param download: Download parameter-pass through to /file/serve
579 """
581 # Split a serving URL into its components, used by serve function.
582 res = re.match(
583 r"^https:\/\/(.*?)\.googleusercontent\.com\/(.*?)$",
584 serving_url
585 )
587 if not res:
588 raise ValueError(f"Invalid {serving_url=!r} provided")
590 # Create internal serving URL
591 serving_url = cls.INTERNAL_SERVING_URL_PREFIX + "/".join(res.groups())
593 # Append additional parameters
594 if params := {
595 k: v for k, v in {
596 "download": download,
597 "filename": filename,
598 "options": options,
599 "size": size,
600 }.items() if v
601 }:
602 serving_url += f"?{urlencode(params)}"
604 return serving_url
606 @classmethod
607 def create_download_url(
608 cls,
609 dlkey: str,
610 filename: str,
611 derived: bool = False,
612 expires: t.Optional[datetime.timedelta | int] = datetime.timedelta(hours=1),
613 download_filename: t.Optional[str] = None
614 ) -> str:
615 """
616 Utility function that creates a signed download-url for the given folder/filename combination
618 :param folder: The GCS-Folder (= the download-key) for that file
619 :param filename: The name of the file. Either the original filename or the name of a derived file.
620 :param derived: True, if it points to a derived file, False if it points to the original uploaded file
621 :param expires:
622 None if the file is supposed to be public (which causes it to be cached on the google ede caches),
623 otherwise a datetime.timedelta of how long that link should be valid
624 :param download_filename: If set, browser is enforced to download this blob with the given alternate
625 filename
626 :return: The signed download-url relative to the current domain (eg /download/...)
627 """
628 if isinstance(expires, int):
629 expires = datetime.timedelta(minutes=expires)
631 # Undo escaping on ()= performed on fileNames
632 filename = filename.replace("(", "(").replace(")", ")").replace("=", "=")
633 filepath = f"""{dlkey}/{"derived" if derived else "source"}/{filename}"""
635 if download_filename:
636 if not cls.is_valid_filename(download_filename):
637 raise errors.UnprocessableEntity(f"Invalid download_filename {download_filename!r} provided")
639 download_filename = urlquote(download_filename)
641 expires = (datetime.datetime.now() + expires).strftime("%Y%m%d%H%M") if expires else 0
643 data = base64.urlsafe_b64encode(f"""{filepath}\0{expires}\0{download_filename or ""}""".encode("UTF-8"))
644 sig = cls.hmac_sign(data)
646 return f"""{cls.DOWNLOAD_URL_PREFIX}{data.decode("ASCII")}?sig={sig}"""
648 @classmethod
649 def parse_download_url(cls, url) -> t.Optional[FilePath]:
650 """
651 Parses a file download URL in the format `/file/download/xxxx?sig=yyyy` into its FilePath.
653 If the URL cannot be parsed, the function returns None.
655 :param url: The file download URL to be parsed.
656 :return: A FilePath on success, None otherwise.
657 """
658 if not url.startswith(cls.DOWNLOAD_URL_PREFIX) or "?" not in url:
659 return None
661 data, sig = url.removeprefix(cls.DOWNLOAD_URL_PREFIX).split("?", 1) # Strip "/file/download/" and split on "?"
662 sig = sig.removeprefix("sig=")
664 if not cls.hmac_verify(data, sig):
665 # Invalid signature
666 return None
668 # Split the blobKey into the individual fields it should contain
669 data = base64.urlsafe_b64decode(data).decode("UTF-8")
671 match data.count("\0"):
672 case 2:
673 dlpath, valid_until, _ = data.split("\0")
674 case 1:
675 # It's the old format, without an downloadFileName
676 dlpath, valid_until = data.split("\0")
677 case _:
678 # Invalid path
679 return None
681 if valid_until != "0" and datetime.strptime(valid_until, "%Y%m%d%H%M") < datetime.now():
682 # Signature expired
683 return None
685 if dlpath.count("/") != 2:
686 # Invalid path
687 return None
689 dlkey, derived, filename = dlpath.split("/")
690 return FilePath(dlkey, derived != "source", filename)
692 @classmethod
693 def create_src_set(
694 cls,
695 file: t.Union["SkeletonInstance", dict, str],
696 expires: t.Optional[datetime.timedelta | int] = datetime.timedelta(hours=1),
697 width: t.Optional[int] = None,
698 height: t.Optional[int] = None,
699 language: t.Optional[str] = None,
700 ) -> str:
701 """
702 Generates a string suitable for use as the srcset tag in html. This functionality provides the browser
703 with a list of images in different sizes and allows it to choose the smallest file that will fill it's
704 viewport without upscaling.
706 :param file: The file skeleton (or if multiple=True a single value from it) to generate the srcset.
707 :param expires:
708 None if the file is supposed to be public (which causes it to be cached on the google edecaches),
709 otherwise it's lifetime in seconds
710 :param width:
711 A list of widths that should be included in the srcset.
712 If a given width is not available, it will be skipped.
713 :param height: A list of heights that should be included in the srcset. If a given height is not available,
714 it will be skipped.
715 :param language: Language overwrite if file has multiple languages, and we want to explicitly specify one
716 :return: The srctag generated or an empty string if a invalid file object was supplied
717 """
718 if not width and not height:
719 logging.error("Neither width or height supplied")
720 return ""
722 if isinstance(file, str):
723 file = db.Query("file").filter("dlkey =", file).order(("creationdate", db.SortOrder.Ascending)).getEntry()
725 if not file:
726 return ""
728 if isinstance(file, i18n.LanguageWrapper):
729 language = language or current.language.get()
730 if not language or not (file := cls.get(language)):
731 return ""
733 if "dlkey" not in file and "dest" in file:
734 file = file["dest"]
736 from viur.core.skeleton import SkeletonInstance # avoid circular imports
738 if not (
739 isinstance(file, (SkeletonInstance, dict))
740 and "dlkey" in file
741 and "derived" in file
742 ):
743 logging.error("Invalid file supplied")
744 return ""
746 if not isinstance(file["derived"], dict):
747 logging.error("No derives available")
748 return ""
750 src_set = []
751 for filename, derivate in file["derived"]["files"].items():
752 customData = derivate.get("customData", {})
754 if width and customData.get("width") in width:
755 src_set.append(
756 f"""{cls.create_download_url(file["dlkey"], filename, True, expires)} {customData["width"]}w"""
757 )
759 if height and customData.get("height") in height:
760 src_set.append(
761 f"""{cls.create_download_url(file["dlkey"], filename, True, expires)} {customData["height"]}h"""
762 )
764 return ", ".join(src_set)
766 def write(
767 self,
768 filename: str,
769 content: t.Any,
770 mimetype: str = "text/plain",
771 *,
772 width: int = None,
773 height: int = None,
774 public: bool = False,
775 rootnode: t.Optional[db.Key] = None,
776 folder: t.Iterable[str] | str = (),
777 ) -> db.Key:
778 """
779 Write a file from any bytes-like object into the file module.
781 If *folder* and *rootnode* are both set, the file is added to the repository in that folder.
782 If only *folder* is set, the file is added to the default repository in that folder.
783 If only *rootnode* is set, the file is added to that repository in the root folder.
785 If both are not set, the file is added without a path or repository as a weak file.
786 It will not be visible in admin in this case.
788 :param filename: Filename to be written.
789 :param content: The file content to be written, as bytes-like object.
790 :param mimetype: The file's mimetype.
791 :param width: Optional width information for the file.
792 :param height: Optional height information for the file.
793 :param public: True if the file should be publicly accessible.
794 :param rootnode: Optional root-node of the repository to add the file to
795 :param folder: Optional folder the file should be written into.
797 :return: Returns the key of the file object written. This can be associated e.g. with a FileBone.
798 """
799 # logging.info(f"{filename=} {mimetype=} {width=} {height=} {public=}")
800 if not self.is_valid_filename(filename):
801 raise ValueError(f"{filename=} is invalid")
803 # Folder mode?
804 if folder:
805 # Validate correct folder naming
806 if isinstance(folder, str):
807 folder = folder, # make it a tuple
809 for foldername in folder:
810 if not self.is_valid_filename(foldername):
811 raise ValueError(f"{foldername=} is invalid")
813 # When in folder-mode, a rootnode must exist!
814 if rootnode is None:
815 rootnode = self.ensureOwnModuleRootNode()
817 parentrepokey = rootnode.key
818 parentfolderkey = rootnode.key
820 for foldername in folder:
821 query = self.addSkel("node").all()
822 query.filter("parentrepo", parentrepokey)
823 query.filter("parententry", parentfolderkey)
824 query.filter("name", foldername)
826 if folder_skel := query.getSkel():
827 # Skip existing folder
828 parentfolderkey = folder_skel["key"]
829 else:
830 # Create new folder
831 folder_skel = self.addSkel("node")
833 folder_skel["name"] = foldername
834 folder_skel["parentrepo"] = parentrepokey
835 folder_skel["parententry"] = parentfolderkey
836 folder_skel.write()
838 parentfolderkey = folder_skel["key"]
840 else:
841 parentrepokey = None
842 parentfolderkey = None
844 # Write the file
845 dl_key = utils.string.random()
847 if public:
848 dl_key += PUBLIC_DLKEY_SUFFIX # mark file as public
850 bucket = self.get_bucket(dl_key)
852 blob = bucket.blob(f"{dl_key}/source/{filename}")
853 blob.upload_from_file(io.BytesIO(content), content_type=mimetype)
855 fileskel = self.addSkel("leaf")
857 fileskel["parentrepo"] = parentrepokey
858 fileskel["parententry"] = parentfolderkey
859 fileskel["name"] = filename
860 fileskel["size"] = blob.size
861 fileskel["mimetype"] = mimetype
862 fileskel["dlkey"] = dl_key
863 fileskel["weak"] = bool(parentrepokey)
864 fileskel["public"] = public
865 fileskel["width"] = width
866 fileskel["height"] = height
867 fileskel["crc32c_checksum"] = base64.b64decode(blob.crc32c).hex()
868 fileskel["md5_checksum"] = base64.b64decode(blob.md5_hash).hex()
869 fileskel["pending"] = False
871 return fileskel.write()["key"]
873 def read(
874 self,
875 key: db.Key | int | str | None = None,
876 path: str | None = None,
877 ) -> tuple[io.BytesIO, str]:
878 """
879 Read a file from the Cloud Storage.
881 If a key and a path are provided, the key is preferred.
882 This means that the entry in the db is searched first and if this is not found, the path is used.
884 :param key: Key of the LeafSkel that contains the "dlkey" and the "name".
885 :param path: The path of the file in the Cloud Storage Bucket.
887 :return: Returns the file as a io.BytesIO buffer and the content-type
888 """
889 if not key and not path:
890 raise ValueError("Please provide a key or a path")
892 if key:
893 skel = self.viewSkel("leaf")
894 if not skel.read(db.key_helper(key, skel.kindName)):
895 if not path:
896 raise ValueError("This skeleton is not in the database!")
897 else:
898 path = f"""{skel["dlkey"]}/source/{skel["name"]}"""
900 bucket = self.get_bucket(skel["dlkey"])
901 else:
902 bucket = self.get_bucket(path.split("/", 1)[0]) # path's first part is dlkey plus eventual postfix
904 blob = bucket.blob(path)
905 return io.BytesIO(blob.download_as_bytes()), blob.content_type
907 @CallDeferred
908 def deleteRecursive(self, parentKey):
909 files = db.Query(self.leafSkelCls().kindName).filter("parentdir =", parentKey).iter()
910 for fileEntry in files:
911 self.mark_for_deletion(fileEntry["dlkey"])
912 skel = self.leafSkelCls()
914 if skel.read(str(fileEntry.key())):
915 skel.delete()
916 dirs = db.Query(self.nodeSkelCls().kindName).filter("parentdir", parentKey).iter()
917 for d in dirs:
918 self.deleteRecursive(d.key)
919 skel = self.nodeSkelCls()
920 if skel.read(d.key):
921 skel.delete()
923 @exposed
924 @skey
925 def getUploadURL(
926 self,
927 fileName: str,
928 mimeType: str,
929 size: t.Optional[int] = None,
930 node: t.Optional[str | db.Key] = None,
931 authData: t.Optional[str] = None,
932 authSig: t.Optional[str] = None,
933 public: bool = False,
934 ):
935 filename = fileName.strip() # VIUR4 FIXME: just for compatiblity of the parameter names
937 if not self.is_valid_filename(filename):
938 raise errors.UnprocessableEntity(f"Invalid filename {filename!r} provided")
940 # Validate the mimetype from the client seems legit
941 mimetype = mimeType.strip().lower()
942 if not (
943 mimetype
944 and mimetype.count("/") == 1
945 and all(ch in string.ascii_letters + string.digits + "/-.+" for ch in mimetype)
946 ):
947 raise errors.UnprocessableEntity(f"Invalid mime-type {mimetype!r} provided")
949 # Validate authentication data
950 if authData and authSig:
951 # First, validate the signature, otherwise we don't need to proceed further
952 if not self.hmac_verify(authData, authSig):
953 raise errors.Unauthorized()
955 authData = json.loads(base64.b64decode(authData.encode("ASCII")).decode("UTF-8"))
957 if datetime.datetime.strptime(authData["validUntil"], "%Y%m%d%H%M") < datetime.datetime.now():
958 raise errors.Gone("The upload URL has expired")
960 if authData["validMimeTypes"]:
961 for validMimeType in authData["validMimeTypes"]:
962 if (
963 validMimeType == mimetype
964 or (validMimeType.endswith("*") and mimetype.startswith(validMimeType[:-1]))
965 ):
966 break
967 else:
968 raise errors.UnprocessableEntity(f"Invalid mime-type {mimetype} provided")
970 node = authData["node"]
971 maxSize = authData["maxSize"]
973 else:
974 rootNode = None
975 if node and not (rootNode := self.getRootNode(node)):
976 raise errors.NotFound(f"No valid root node found for {node=}")
978 if not self.canAdd("leaf", rootNode):
979 raise errors.Forbidden()
981 if rootNode and public != bool(rootNode.get("public")):
982 raise errors.Forbidden("Cannot upload a public file into private repository or vice versa")
984 maxSize = None # The user has some file/add permissions, don't restrict fileSize
986 if maxSize:
987 if size > maxSize:
988 raise errors.UnprocessableEntity(f"Size {size} exceeds maximum size {maxSize}")
989 else:
990 size = None
992 # Create upload-URL and download key
993 dlkey = utils.string.random() # let's roll a random key
995 if public:
996 dlkey += PUBLIC_DLKEY_SUFFIX # mark file as public
998 blob = self.get_bucket(dlkey).blob(f"{dlkey}/source/{filename}")
999 upload_url = blob.create_resumable_upload_session(content_type=mimeType, size=size, timeout=60)
1001 # Create a corresponding file-lock object early, otherwise we would have to ensure that the file-lock object
1002 # the user creates matches the file he had uploaded
1003 file_skel = self.addSkel("leaf")
1005 file_skel["name"] = filename + self.PENDING_POSTFIX
1006 file_skel["size"] = 0
1007 file_skel["mimetype"] = "application/octetstream"
1008 file_skel["dlkey"] = dlkey
1009 file_skel["parentdir"] = None
1010 file_skel["pendingparententry"] = db.key_helper(node, self.addSkel("node").kindName) if node else None
1011 file_skel["pending"] = True
1012 file_skel["weak"] = True
1013 file_skel["public"] = public
1014 file_skel["width"] = 0
1015 file_skel["height"] = 0
1017 file_skel.write()
1018 key = str(file_skel["key"])
1020 # Mark that entry dirty as we might never receive an add
1021 self.mark_for_deletion(dlkey)
1023 # In this case, we'd have to store the key in the users session so he can call add() later on
1024 if authData and authSig:
1025 session = current.session.get()
1027 if "pendingFileUploadKeys" not in session:
1028 session["pendingFileUploadKeys"] = []
1030 session["pendingFileUploadKeys"].append(key)
1032 # Clamp to the latest 50 pending uploads
1033 session["pendingFileUploadKeys"] = session["pendingFileUploadKeys"][-50:]
1034 session.markChanged()
1036 return self.render.view({
1037 "uploadKey": key,
1038 "uploadUrl": upload_url,
1039 })
1041 @exposed
1042 def download(self, blobKey: str, fileName: str = "", download: bool = False, sig: str = "", *args, **kwargs):
1043 """
1044 Download a file.
1045 :param blobKey: The unique blob key of the file.
1046 :param fileName: Optional filename to provide in the header.
1047 :param download: Set header to attachment retrival, set explictly to "1" if download is wanted.
1048 """
1049 if filename := fileName.strip():
1050 if not self.is_valid_filename(filename):
1051 raise errors.UnprocessableEntity(f"The provided filename {filename!r} is invalid!")
1053 try:
1054 values = base64.urlsafe_b64decode(blobKey).decode("UTF-8").split("\0")
1055 except ValueError:
1056 raise errors.BadRequest(f"Invalid encoding of blob key {blobKey!r}!")
1057 try:
1058 dlPath, validUntil, *download_filename = values
1059 # Maybe it's the old format, without a download_filename
1060 download_filename = download_filename[0] if download_filename else ""
1061 except ValueError:
1062 logging.error(f"Encoding of {blobKey=!r} OK. {values=} invalid.")
1063 raise errors.BadRequest(f"The blob key {blobKey!r} has an invalid amount of encoded values!")
1065 bucket = self.get_bucket(dlPath.split("/", 1)[0])
1067 if not sig:
1068 # Check if the current user has the right to download *any* blob present in this application.
1069 # blobKey is then the path inside cloudstore - not a base64 encoded tuple
1070 if not (usr := current.user.get()):
1071 raise errors.Unauthorized()
1072 if "root" not in usr["access"] and "file-view" not in usr["access"]:
1073 raise errors.Forbidden()
1074 validUntil = "-1" # Prevent this from being cached down below
1075 blob = bucket.get_blob(blobKey)
1077 else:
1078 # We got an request including a signature (probably a guest or a user without file-view access)
1079 # First, validate the signature, otherwise we don't need to proceed any further
1080 if not self.hmac_verify(blobKey, sig):
1081 raise errors.Forbidden()
1083 if validUntil != "0" and datetime.datetime.strptime(validUntil, "%Y%m%d%H%M") < datetime.datetime.now():
1084 blob = None
1085 else:
1086 blob = bucket.get_blob(dlPath)
1088 if not blob:
1089 raise errors.Gone("The requested blob has expired.")
1091 if not filename:
1092 filename = download_filename or urlquote(blob.name.rsplit("/", 1)[-1])
1094 content_disposition = utils.build_content_disposition_header(filename, attachment=download)
1096 if isinstance(_CREDENTIALS, ServiceAccountCredentials):
1097 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60)
1098 signedUrl = blob.generate_signed_url(expiresAt, response_disposition=content_disposition, version="v4")
1099 raise errors.Redirect(signedUrl)
1101 elif conf.instance.is_dev_server: # No Service-Account to sign with - Serve everything directly
1102 response = current.request.get().response
1103 response.headers["Content-Type"] = blob.content_type
1104 if content_disposition:
1105 response.headers["Content-Disposition"] = content_disposition
1106 return blob.download_as_bytes()
1108 if validUntil == "0" or blobKey.endswith(PUBLIC_DLKEY_SUFFIX): # Its an indefinitely valid URL
1109 if blob.size < 5 * 1024 * 1024: # Less than 5 MB - Serve directly and push it into the ede caches
1110 response = current.request.get().response
1111 response.headers["Content-Type"] = blob.content_type
1112 response.headers["Cache-Control"] = "public, max-age=604800" # 7 Days
1113 if content_disposition:
1114 response.headers["Content-Disposition"] = content_disposition
1115 return blob.download_as_bytes()
1117 # Default fallback - create a signed URL and redirect
1118 authRequest = google.auth.transport.requests.Request()
1119 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60)
1120 signing_credentials = google.auth.compute_engine.IDTokenCredentials(authRequest, "")
1121 signedUrl = blob.generate_signed_url(
1122 expiresAt,
1123 credentials=signing_credentials,
1124 response_disposition=content_disposition,
1125 version="v4")
1127 raise errors.Redirect(signedUrl)
1129 SERVE_VALID_OPTIONS = {
1130 "c",
1131 "p",
1132 "fv",
1133 "fh",
1134 "r90",
1135 "r180",
1136 "r270",
1137 "nu",
1138 }
1139 """
1140 Valid modification option shorts for the serve-function.
1141 This is passed-through to the Google UserContent API, and hast to be supported there.
1142 """
1144 SERVE_VALID_FORMATS = {
1145 "jpg": "rj",
1146 "jpeg": "rj",
1147 "png": "rp",
1148 "webp": "rw",
1149 }
1150 """
1151 Valid file-formats to the serve-function.
1152 This is passed-through to the Google UserContent API, and hast to be supported there.
1153 """
1155 @exposed
1156 def serve(
1157 self,
1158 host: str,
1159 key: str,
1160 size: t.Optional[int] = None,
1161 filename: t.Optional[str] = None,
1162 options: str = "",
1163 download: bool = False,
1164 ):
1165 """
1166 Requests an image using the serving url to bypass direct Google requests.
1168 :param host: the google host prefix i.e. lh3
1169 :param key: the serving url key
1170 :param size: the target image size
1171 :param filename: a random string with an extention, valid extentions are (defined in File.SERVE_VALID_FORMATS).
1172 :param options: - seperated options (defined in File.SERVE_VALID_OPTIONS).
1173 c - crop
1174 p - face crop
1175 fv - vertrical flip
1176 fh - horizontal flip
1177 rXXX - rotate 90, 180, 270
1178 nu - no upscale
1179 :param download: Serves the content as download (Content-Disposition) or not.
1181 :return: Returns the requested content on success, raises a proper HTTP exception otherwise.
1182 """
1184 if any(c not in conf.search_valid_chars for c in host):
1185 raise errors.BadRequest("key contains invalid characters")
1187 # extract format from filename
1188 file_fmt = "webp"
1190 if filename:
1191 fmt = filename.rsplit(".", 1)[-1].lower()
1192 if fmt in self.SERVE_VALID_FORMATS:
1193 file_fmt = fmt
1194 else:
1195 raise errors.UnprocessableEntity(f"Unsupported filetype {fmt}")
1197 url = f"https://{host}.googleusercontent.com/{key}"
1199 if options and not all(param in self.SERVE_VALID_OPTIONS for param in options.split("-")):
1200 raise errors.BadRequest("Invalid options provided")
1202 options += f"-{self.SERVE_VALID_FORMATS[file_fmt]}"
1204 if size:
1205 options = f"s{size}-" + options
1207 url += "=" + options
1209 response = current.request.get().response
1210 response.headers["Content-Type"] = f"image/{file_fmt}"
1211 response.headers["Cache-Control"] = "public, max-age=604800" # 7 Days
1212 response.headers["Content-Disposition"] = utils.build_content_disposition_header(filename, attachment=download)
1214 answ = requests.get(url, timeout=20)
1215 if not answ.ok:
1216 logging.error(f"{answ.status_code} {answ.text}")
1217 raise errors.BadRequest("Unable to fetch a file with these parameters")
1219 return answ.content
1221 @exposed
1222 @force_ssl
1223 @force_post
1224 @skey(allow_empty=True)
1225 def add(self, skelType: SkelType, node: db.Key | int | str | None = None, *args, **kwargs):
1226 # We can't add files directly (they need to be uploaded
1227 if skelType == "leaf": # We need to handle leafs separately here
1228 targetKey = kwargs.get("key")
1229 skel = self.addSkel("leaf")
1231 if not skel.read(targetKey):
1232 raise errors.NotFound()
1234 if not skel["pending"]:
1235 raise errors.PreconditionFailed()
1237 skel["pending"] = False
1238 skel["parententry"] = skel["pendingparententry"]
1240 if skel["parententry"]:
1241 rootNode = self.getRootNode(skel["parententry"])
1242 else:
1243 rootNode = None
1245 if not self.canAdd("leaf", rootNode):
1246 # Check for a marker in this session (created if using a signed upload URL)
1247 session = current.session.get()
1248 if targetKey not in (session.get("pendingFileUploadKeys") or []):
1249 raise errors.Forbidden()
1250 session["pendingFileUploadKeys"].remove(targetKey)
1251 session.markChanged()
1253 # Now read the blob from the dlkey folder
1254 bucket = self.get_bucket(skel["dlkey"])
1256 blobs = list(bucket.list_blobs(prefix=f"""{skel["dlkey"]}/"""))
1257 if len(blobs) != 1:
1258 logging.error("Invalid number of blobs in folder")
1259 logging.error(targetKey)
1260 raise errors.PreconditionFailed()
1262 # only one item is allowed here!
1263 blob = blobs[0]
1265 # update the corresponding file skeleton
1266 skel["name"] = skel["name"].removesuffix(self.PENDING_POSTFIX)
1267 skel["mimetype"] = utils.string.escape(blob.content_type)
1268 skel["size"] = blob.size
1269 skel["parentrepo"] = rootNode["key"] if rootNode else None
1270 skel["weak"] = rootNode is None
1271 skel["crc32c_checksum"] = base64.b64decode(blob.crc32c).hex()
1272 skel["md5_checksum"] = base64.b64decode(blob.md5_hash).hex()
1273 self.onAdd("leaf", skel)
1274 skel.write()
1275 self.onAdded("leaf", skel)
1277 # Add updated download-URL as the auto-generated isn't valid yet
1278 skel["downloadUrl"] = self.create_download_url(skel["dlkey"], skel["name"])
1280 return self.render.addSuccess(skel)
1282 return super().add(skelType, node, *args, **kwargs)
1284 @exposed
1285 def get_download_url(
1286 self,
1287 key: t.Optional[db.Key] = None,
1288 dlkey: t.Optional[str] = None,
1289 filename: t.Optional[str] = None,
1290 derived: bool = False,
1291 ):
1292 """
1293 Request a download url for a given file
1294 :param key: The key of the file
1295 :param dlkey: The download key of the file
1296 :param filename: The filename to be given. If no filename is provided
1297 downloadUrls for all derived files are returned in case of `derived=True`.
1298 :param derived: True, if a derived file download URL is being requested.
1299 """
1300 skel = self.viewSkel("leaf")
1301 if dlkey is not None:
1302 skel = skel.all().filter("dlkey", dlkey).getSkel()
1303 elif key is None and dlkey is None:
1304 raise errors.BadRequest("No key or dlkey provided")
1306 if not (skel and skel.read(key)):
1307 raise errors.NotFound()
1309 if not self.canView("leaf", skel):
1310 raise errors.Unauthorized()
1312 dlkey = skel["dlkey"]
1314 if derived and filename is None:
1315 res = {}
1316 for filename in skel["derived"]["files"]:
1317 res[filename] = self.create_download_url(dlkey, filename, derived)
1318 else:
1319 if derived:
1320 # Check if Filename exist in the Derives. We sign nothing that not exist.
1321 if filename not in skel["derived"]["files"]:
1322 raise errors.NotFound("File not in derives")
1323 else:
1324 if filename is None:
1325 filename = skel["name"]
1326 elif filename != skel["name"]:
1327 raise errors.NotFound("Filename not match")
1329 res = self.create_download_url(dlkey, filename, derived)
1331 return self.render.view(res)
1333 def onEdit(self, skelType: SkelType, skel: SkeletonInstance):
1334 super().onEdit(skelType, skel)
1336 if skelType == "leaf":
1337 old_skel = self.editSkel(skelType)
1338 old_skel.setEntity(skel.dbEntity)
1340 if old_skel["name"] == skel["name"]: # name not changed we can return
1341 return
1343 # Move Blob to new name
1344 # https://cloud.google.com/storage/docs/copying-renaming-moving-objects
1345 old_path = f"""{skel["dlkey"]}/source/{html.unescape(old_skel["name"])}"""
1346 new_path = f"""{skel["dlkey"]}/source/{html.unescape(skel["name"])}"""
1348 bucket = self.get_bucket(skel["dlkey"])
1350 if not (old_blob := bucket.get_blob(old_path)):
1351 raise errors.Gone()
1353 bucket.copy_blob(old_blob, bucket, new_path, if_generation_match=0)
1354 bucket.delete_blob(old_path)
1356 def onAdded(self, skelType: SkelType, skel: SkeletonInstance) -> None:
1357 if skelType == "leaf" and skel["mimetype"].startswith("image/"):
1358 if skel["size"] > self.IMAGE_META_MAX_SIZE:
1359 logging.warning(f"File size {skel['size']} exceeds limit {self.IMAGE_META_MAX_SIZE=}")
1360 return
1361 self.set_image_meta(skel["key"])
1363 super().onAdded(skelType, skel)
1365 @CallDeferred
1366 def set_image_meta(self, key: db.Key) -> None:
1367 """Write image metadata (height and width) to FileSkel"""
1368 skel = self.editSkel("leaf", key)
1369 if not skel.read(key):
1370 logging.error(f"File {key} does not exist")
1371 return
1372 if skel["width"] and skel["height"]:
1373 logging.info(f'File {skel["key"]} has already {skel["width"]=} and {skel["height"]=}')
1374 return
1375 file_name = html.unescape(skel["name"])
1376 blob = self.get_bucket(skel["dlkey"]).get_blob(f"""{skel["dlkey"]}/source/{file_name}""")
1377 if not blob:
1378 logging.error(f'Blob {skel["dlkey"]}/source/{file_name} is missing in Cloud Storage!')
1379 return
1381 file_obj = io.BytesIO()
1382 blob.download_to_file(file_obj)
1383 file_obj.seek(0)
1384 try:
1385 img = Image.open(file_obj)
1386 except Image.UnidentifiedImageError as e: # Can't load this image
1387 logging.exception(f'Cannot open {skel["key"]} | {skel["name"]} to set image meta data: {e}')
1388 return
1390 skel.patch(
1391 values={
1392 "width": img.width,
1393 "height": img.height,
1394 },
1395 )
1397 def mark_for_deletion(self, dlkey: str) -> None:
1398 """
1399 Adds a marker to the datastore that the file specified as *dlkey* can be deleted.
1401 Once the mark has been set, the data store is checked four times (default: every 4 hours)
1402 if the file is in use somewhere. If it is still in use, the mark goes away, otherwise
1403 the mark and the file are removed from the datastore. These delayed checks are necessary
1404 due to database inconsistency.
1406 :param dlkey: Unique download-key of the file that shall be marked for deletion.
1407 """
1408 fileObj = db.Query("viur-deleted-files").filter("dlkey", dlkey).getEntry()
1410 if fileObj: # Its allready marked
1411 return
1413 fileObj = db.Entity(db.Key("viur-deleted-files"))
1414 fileObj["itercount"] = 0
1415 fileObj["dlkey"] = str(dlkey)
1417 db.put(fileObj)
1420@PeriodicTask(interval=datetime.timedelta(hours=4))
1421def startCheckForUnreferencedBlobs():
1422 """
1423 Start searching for blob locks that have been recently freed
1424 """
1425 doCheckForUnreferencedBlobs()
1428@CallDeferred
1429def doCheckForUnreferencedBlobs(cursor=None):
1430 def getOldBlobKeysTxn(dbKey):
1431 obj = db.get(dbKey)
1432 res = obj["old_blob_references"] or []
1433 if obj["is_stale"]:
1434 db.delete(dbKey)
1435 else:
1436 obj["has_old_blob_references"] = False
1437 obj["old_blob_references"] = []
1438 db.put(obj)
1439 return res
1441 query = db.Query("viur-blob-locks").filter("has_old_blob_references", True).setCursor(cursor)
1442 for lockObj in query.run(100):
1443 oldBlobKeys = db.run_in_transaction(getOldBlobKeysTxn, lockObj.key)
1444 for blobKey in oldBlobKeys:
1445 if db.Query("viur-blob-locks").filter("active_blob_references =", blobKey).getEntry():
1446 # This blob is referenced elsewhere
1447 logging.info(f"Stale blob is still referenced, {blobKey}")
1448 continue
1449 # Add a marker and schedule it for deletion
1450 fileObj = db.Query("viur-deleted-files").filter("dlkey", blobKey).getEntry()
1451 if fileObj: # Its already marked
1452 logging.info(f"Stale blob already marked for deletion, {blobKey}")
1453 return
1454 fileObj = db.Entity(db.Key("viur-deleted-files"))
1455 fileObj["itercount"] = 0
1456 fileObj["dlkey"] = str(blobKey)
1457 logging.info(f"Stale blob marked dirty, {blobKey}")
1458 db.put(fileObj)
1459 newCursor = query.getCursor()
1460 if newCursor:
1461 doCheckForUnreferencedBlobs(newCursor)
1464@PeriodicTask(interval=datetime.timedelta(hours=4))
1465def startCleanupDeletedFiles():
1466 """
1467 Increase deletion counter on each blob currently not referenced and delete
1468 it if that counter reaches maxIterCount
1469 """
1470 doCleanupDeletedFiles()
1473@CallDeferred
1474def doCleanupDeletedFiles(cursor=None):
1475 maxIterCount = 2 # How often a file will be checked for deletion
1476 query = db.Query("viur-deleted-files")
1477 if cursor:
1478 query.setCursor(cursor)
1479 for file in query.run(100):
1480 if "dlkey" not in file:
1481 db.delete(file.key)
1482 elif db.Query("viur-blob-locks").filter("active_blob_references =", file["dlkey"]).getEntry():
1483 logging.info(f"""is referenced, {file["dlkey"]}""")
1484 db.delete(file.key)
1485 else:
1486 if file["itercount"] > maxIterCount:
1487 logging.info(f"""Finally deleting, {file["dlkey"]}""")
1488 bucket = conf.main_app.file.get_bucket(file["dlkey"])
1489 blobs = bucket.list_blobs(prefix=f"""{file["dlkey"]}/""")
1490 for blob in blobs:
1491 blob.delete()
1492 db.delete(file.key)
1493 # There should be exactly 1 or 0 of these
1494 for f in skeletonByKind("file")().all().filter("dlkey =", file["dlkey"]).fetch(99):
1495 f.delete()
1497 if f["serving_url"]:
1498 bucket = conf.main_app.file.get_bucket(f["dlkey"])
1499 blob_key = blobstore.create_gs_key(
1500 f"/gs/{bucket.name}/{f['dlkey']}/source/{f['name']}"
1501 )
1502 images.delete_serving_url(blob_key) # delete serving url
1503 else:
1504 logging.debug(f"""Increasing count, {file["dlkey"]}""")
1505 file["itercount"] += 1
1506 db.put(file)
1507 newCursor = query.getCursor()
1508 if newCursor:
1509 doCleanupDeletedFiles(newCursor)
1512@PeriodicTask(interval=datetime.timedelta(hours=4))
1513def start_delete_pending_files():
1514 """
1515 Start deletion of pending FileSkels that are older than 7 days.
1516 """
1517 DeleteEntitiesIter.startIterOnQuery(
1518 FileLeafSkel().all()
1519 .filter("pending =", True)
1520 .filter("creationdate <", utils.utcNow() - datetime.timedelta(days=7))
1521 )
1524# DEPRECATED ATTRIBUTES HANDLING
1526def __getattr__(attr: str) -> object:
1527 if entry := {
1528 # stuff prior viur-core < 3.7
1529 "GOOGLE_STORAGE_BUCKET": ("conf.main_app.file.get_bucket()", _private_bucket),
1530 }.get(attr):
1531 msg = f"{attr} was replaced by {entry[0]}"
1532 warnings.warn(msg, DeprecationWarning, stacklevel=2)
1533 logging.warning(msg, stacklevel=2)
1534 return entry[1]
1536 return super(__import__(__name__).__class__).__getattribute__(attr)