Coverage for / home / runner / work / viur-core / viur-core / viur / src / viur / core / modules / file.py: 0%
798 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-24 12:35 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-24 12:35 +0000
1import base64
2import datetime
3import hashlib
4import hmac
5import html
6import io
7import json
8import logging
9import re
10import string
11import typing as t
12import warnings
13from collections import namedtuple
14from urllib.parse import quote as urlquote, urlencode
15from urllib.request import urlopen
17import PIL
18import PIL.ImageCms
19import google.auth
20import requests
21from PIL import Image
22from google.appengine.api import blobstore, images
23from google.cloud import storage
24from google.oauth2.service_account import Credentials as ServiceAccountCredentials
26from viur.core import conf, current, db, errors, utils, i18n
27from viur.core.bones import BaseBone, BooleanBone, JsonBone, KeyBone, NumericBone, StringBone
29from viur.core.decorators import *
30from viur.core.prototypes.tree import SkelType, Tree, TreeSkel
31from viur.core.skeleton import SkeletonInstance, skeletonByKind
32from viur.core.tasks import CallDeferred, DeleteEntitiesIter, PeriodicTask
34# Globals for connectivity
36VALID_FILENAME_REGEX = re.compile(
37 # || MAY NOT BE THE NAME | MADE OF SPECIAL CHARS | SPECIAL CHARS + `. `|`
38 r"^(?!^(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])$)[^\x00-\x1F<>:\"\/\\|?*]*[^\x00-\x1F<>:\"\/\\|?*. ]$",
39 re.IGNORECASE
40)
42_CREDENTIALS, _PROJECT_ID = google.auth.default()
43GOOGLE_STORAGE_CLIENT = storage.Client(_PROJECT_ID, _CREDENTIALS)
45PRIVATE_BUCKET_NAME = f"""{_PROJECT_ID}.appspot.com"""
46PUBLIC_BUCKET_NAME = f"""public-dot-{_PROJECT_ID}"""
47PUBLIC_DLKEY_SUFFIX = "_pub"
49_private_bucket = GOOGLE_STORAGE_CLIENT.lookup_bucket(PRIVATE_BUCKET_NAME)
50_public_bucket = None
52# FilePath is a descriptor for ViUR file components
53FilePath = namedtuple("FilePath", ("dlkey", "is_derived", "filename"))
56def importBlobFromViur2(dlKey, fileName):
57 bucket = conf.main_app.file.get_bucket(dlKey)
59 if not conf.viur2import_blobsource:
60 return False
61 existingImport = db.get(db.Key("viur-viur2-blobimport", dlKey))
62 if existingImport:
63 if existingImport["success"]:
64 return existingImport["dlurl"]
65 return False
66 if conf.viur2import_blobsource["infoURL"]:
67 try:
68 importDataReq = urlopen(conf.viur2import_blobsource["infoURL"] + dlKey)
69 except Exception as e:
70 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey))
71 marker["success"] = False
72 marker["error"] = "Failed URL-FETCH 1"
73 db.put(marker)
74 return False
75 if importDataReq.status != 200:
76 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey))
77 marker["success"] = False
78 marker["error"] = "Failed URL-FETCH 2"
79 db.put(marker)
80 return False
81 importData = json.loads(importDataReq.read())
82 oldBlobName = conf.viur2import_blobsource["gsdir"] + "/" + importData["key"]
83 srcBlob = storage.Blob(bucket=bucket,
84 name=conf.viur2import_blobsource["gsdir"] + "/" + importData["key"])
85 else:
86 oldBlobName = conf.viur2import_blobsource["gsdir"] + "/" + dlKey
87 srcBlob = storage.Blob(bucket=bucket, name=conf.viur2import_blobsource["gsdir"] + "/" + dlKey)
88 if not srcBlob.exists():
89 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey))
90 marker["success"] = False
91 marker["error"] = "Local SRC-Blob missing"
92 marker["oldBlobName"] = oldBlobName
93 db.put(marker)
94 return False
95 bucket.rename_blob(srcBlob, f"{dlKey}/source/{fileName}")
96 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey))
97 marker["success"] = True
98 marker["old_src_key"] = dlKey
99 marker["old_src_name"] = fileName
100 marker["dlurl"] = conf.main_app.file.create_download_url(dlKey, fileName, False, None)
101 db.put(marker)
102 return marker["dlurl"]
105def thumbnailer(fileSkel, existingFiles, params):
106 file_name = html.unescape(fileSkel["name"])
107 bucket = conf.main_app.file.get_bucket(fileSkel["dlkey"])
109 blob = bucket.get_blob(f"""{fileSkel["dlkey"]}/source/{file_name}""")
110 if not blob:
111 logging.warning(f"""Blob {fileSkel["dlkey"]}/source/{file_name} is missing from cloud storage!""")
112 return
114 source = io.BytesIO()
115 blob.download_to_file(source)
117 result = []
119 for info in params:
120 # Read the image into PIL
121 try:
122 source.seek(0)
123 img = PIL.Image.open(source)
124 except PIL.Image.UnidentifiedImageError: # Can't load this image; so there's no need to try other resolutions
125 break
127 if icc_profile := img.info.get("icc_profile"):
128 # JPEGs might be encoded with a non-standard color-profile; we need to compensate for this if we convert
129 # to WEBp as we'll loose this color-profile information
130 f = io.BytesIO(icc_profile)
131 src_profile = PIL.ImageCms.ImageCmsProfile(f)
132 dst_profile = PIL.ImageCms.createProfile("sRGB")
133 try:
134 img = PIL.ImageCms.profileToProfile(
135 img,
136 inputProfile=src_profile,
137 outputProfile=dst_profile,
138 outputMode="RGBA" if img.has_transparency_data else "RGB")
139 except Exception as e:
140 logging.debug(f"{info=}")
141 logging.exception(e)
142 continue
144 file_extension = info.get("fileExtension", "webp")
145 mimetype = info.get("mimeType", "image/webp")
147 if "width" in info and "height" in info:
148 width = info["width"]
149 height = info["height"]
150 target_filename = f"thumbnail-{width}-{height}.{file_extension}"
152 elif "width" in info:
153 width = info["width"]
154 height = int((float(img.size[1]) * float(width / float(img.size[0]))))
155 target_filename = f"thumbnail-w{width}.{file_extension}"
157 else: # No default fallback - ignore
158 continue
160 # Create resized version of the source
161 target = io.BytesIO()
163 try:
164 img = img.resize((width, height), PIL.Image.LANCZOS)
165 except ValueError as e:
166 # Usually happens to some files, like TIFF-images.
167 logging.debug(f"{info=}")
168 logging.exception(e)
169 break
171 img.save(target, file_extension)
173 # Safe derived target file
174 target_size = target.tell()
175 target.seek(0)
176 target_blob = bucket.blob(f"""{fileSkel["dlkey"]}/derived/{target_filename}""")
177 target_blob.upload_from_file(target, content_type=mimetype)
179 result.append(
180 (target_filename, target_size, mimetype, {"mimetype": mimetype, "width": width, "height": height})
181 )
183 return result
186def cloudfunction_thumbnailer(fileSkel, existingFiles, params):
187 """External Thumbnailer for images.
189 The corresponding cloudfunction can be found here .
190 https://github.com/viur-framework/viur-cloudfunctions/tree/main/thumbnailer
192 You can use it like so:
193 main.py:
195 .. code-block:: python
197 from viur.core.modules.file import cloudfunction_thumbnailer
199 conf.file_thumbnailer_url = "https://xxxxx.cloudfunctions.net/imagerenderer"
200 conf.file_derivations = {"thumbnail": cloudfunction_thumbnailer}
202 conf.derives_pdf = {
203 "thumbnail": [{"width": 1920,"sites":"1,2"}]
204 }
206 skeletons/xxx.py:
207 .. code-block:: python
209 test = FileBone(derive=conf.derives_pdf)
210 """
212 if not conf.file_thumbnailer_url:
213 raise ValueError("conf.file_thumbnailer_url is not set")
215 bucket = conf.main_app.file.get_bucket(fileSkel["dlkey"])
217 def getsignedurl():
218 if conf.instance.is_dev_server:
219 signedUrl = conf.main_app.file.create_download_url(fileSkel["dlkey"], fileSkel["name"])
220 else:
221 path = f"""{fileSkel["dlkey"]}/source/{file_name}"""
222 if not (blob := bucket.get_blob(path)):
223 logging.warning(f"Blob {path} is missing from cloud storage!")
224 return None
225 authRequest = google.auth.transport.requests.Request()
226 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60)
227 signing_credentials = google.auth.compute_engine.IDTokenCredentials(authRequest, "")
228 content_disposition = utils.build_content_disposition_header(fileSkel["name"])
229 signedUrl = blob.generate_signed_url(
230 expiresAt,
231 credentials=signing_credentials,
232 response_disposition=content_disposition,
233 version="v4")
234 return signedUrl
236 def make_request():
237 headers = {"Content-Type": "application/json"}
238 data_str = base64.b64encode(json.dumps(dataDict).encode("UTF-8"))
239 sig = conf.main_app.file.hmac_sign(data_str)
240 datadump = json.dumps({"dataStr": data_str.decode('ASCII'), "sign": sig})
241 resp = requests.post(conf.file_thumbnailer_url, data=datadump, headers=headers, allow_redirects=False)
242 if resp.status_code != 200: # Error Handling
243 match resp.status_code:
244 case 302:
245 # The problem is Google resposen 302 to an auth Site when the cloudfunction was not found
246 # https://cloud.google.com/functions/docs/troubleshooting#login
247 logging.error("Cloudfunction not found")
248 case 404:
249 logging.error("Cloudfunction not found")
250 case 403:
251 logging.error("No permission for the Cloudfunction")
252 case _:
253 logging.error(
254 f"cloudfunction_thumbnailer failed with code: {resp.status_code} and data: {resp.content}")
255 return
257 try:
258 response_data = resp.json()
259 except Exception as e:
260 logging.error(f"response could not be converted in json failed with: {e=}")
261 return
262 if "error" in response_data:
263 logging.error(f"cloudfunction_thumbnailer failed with: {response_data.get('error')}")
264 return
266 return response_data
268 file_name = html.unescape(fileSkel["name"])
270 if not (url := getsignedurl()):
271 return
272 dataDict = {
273 "url": url,
274 "name": fileSkel["name"],
275 "params": params,
276 "minetype": fileSkel["mimetype"],
277 "baseUrl": current.request.get().request.host_url.lower(),
278 "targetKey": fileSkel["dlkey"],
279 "nameOnly": True
280 }
281 if not (derivedData := make_request()):
282 return
284 uploadUrls = {}
285 for data in derivedData["values"]:
286 fileName = conf.main_app.file.sanitize_filename(data["name"])
287 blob = bucket.blob(f"""{fileSkel["dlkey"]}/derived/{fileName}""")
288 uploadUrls[fileSkel["dlkey"] + fileName] = blob.create_resumable_upload_session(timeout=60,
289 content_type=data["mimeType"])
291 if not (url := getsignedurl()):
292 return
294 dataDict["url"] = url
295 dataDict["nameOnly"] = False
296 dataDict["uploadUrls"] = uploadUrls
298 if not (derivedData := make_request()):
299 return
300 reslist = []
301 try:
302 for derived in derivedData["values"]:
303 for key, value in derived.items():
304 reslist.append((key, value["size"], value["mimetype"], value["customData"]))
306 except Exception as e:
307 logging.error(f"cloudfunction_thumbnailer failed with: {e=}")
308 return reslist
311class DownloadUrlBone(BaseBone):
312 """
313 This bone is used to inject a freshly signed download url into a FileSkel.
314 """
316 def unserialize(self, skel, name):
317 if "dlkey" in skel.dbEntity and "name" in skel.dbEntity:
318 skel.accessedValues[name] = conf.main_app.file.create_download_url(
319 skel["dlkey"], skel["name"], expires=conf.render_json_download_url_expiration
320 )
321 return True
323 return False
326class FileLeafSkel(TreeSkel):
327 """
328 Default file leaf skeleton.
329 """
330 kindName = "file"
332 name = StringBone(
333 descr="Filename",
334 caseSensitive=False,
335 searchable=True,
336 vfunc=lambda val: None if File.is_valid_filename(val) else "Invalid filename provided",
337 )
339 alt = StringBone(
340 descr=i18n.translate(
341 "viur.core.image.alt",
342 defaultText="Alternative description",
343 ),
344 searchable=True,
345 languages=conf.i18n.available_languages,
346 )
348 size = NumericBone(
349 descr="Filesize in Bytes",
350 readOnly=True,
351 searchable=True,
352 )
354 dlkey = StringBone(
355 descr="Download-Key",
356 readOnly=True,
357 )
359 mimetype = StringBone(
360 descr="MIME-Type",
361 readOnly=True,
362 )
364 weak = BooleanBone(
365 descr="Weak reference",
366 readOnly=True,
367 visible=False,
368 )
370 pending = BooleanBone(
371 descr="Pending upload",
372 readOnly=True,
373 visible=False,
374 defaultValue=False,
375 )
377 width = NumericBone(
378 descr="Width",
379 readOnly=True,
380 searchable=True,
381 )
383 height = NumericBone(
384 descr="Height",
385 readOnly=True,
386 searchable=True,
387 )
389 downloadUrl = DownloadUrlBone(
390 descr="Download-URL",
391 readOnly=True,
392 visible=False,
393 )
395 derived = JsonBone(
396 descr="Derived Files",
397 readOnly=True,
398 visible=False,
399 )
401 pendingparententry = KeyBone(
402 descr="Pending key Reference",
403 readOnly=True,
404 visible=False,
405 )
407 crc32c_checksum = StringBone(
408 descr="CRC32C checksum",
409 readOnly=True,
410 )
412 md5_checksum = StringBone(
413 descr="MD5 checksum",
414 readOnly=True,
415 )
417 public = BooleanBone(
418 descr="Public File",
419 readOnly=True,
420 defaultValue=False,
421 )
423 serving_url = StringBone(
424 descr="Serving-URL",
425 readOnly=True,
426 params={
427 "tooltip": "The 'serving_url' is only available in public file repositories.",
428 }
429 )
431 @classmethod
432 def _inject_serving_url(cls, skel: SkeletonInstance) -> None:
433 """Inject the serving url for public image files into a FileSkel"""
434 if (
435 skel["public"]
436 and skel["mimetype"]
437 and skel["mimetype"].startswith("image/")
438 and not skel["serving_url"]
439 ):
440 bucket = File.get_bucket(skel["dlkey"])
441 filename = f"/gs/{bucket.name}/{skel['dlkey']}/source/{utils.string.unescape(skel['name'])}"
443 # Trying this on local development server will raise a
444 # `google.appengine.runtime.apiproxy_errors.RPCFailedError`
445 if conf.instance.is_dev_server:
446 logging.warning(f"Can't inject serving_url for {filename!r} on local development server")
447 return
449 try:
450 skel["serving_url"] = images.get_serving_url(None, secure_url=True, filename=filename)
452 except Exception as e:
453 logging.warning(f"Failed to create serving_url for {filename!r} with exception {e!r}")
454 logging.exception(e)
456 def preProcessBlobLocks(self, locks):
457 """
458 Ensure that our dlkey is locked even if we don't have a filebone here
459 """
460 if not self["weak"] and self["dlkey"]:
461 locks.add(self["dlkey"])
462 return locks
464 @classmethod
465 def refresh(cls, skel):
466 super().refresh(skel)
467 if conf.viur2import_blobsource:
468 importData = importBlobFromViur2(skel["dlkey"], skel["name"])
469 if importData:
470 if not skel["downloadUrl"]:
471 skel["downloadUrl"] = importData
472 skel["pendingparententry"] = None
474 cls._inject_serving_url(skel)
476 @classmethod
477 def write(cls, skel, **kwargs):
478 cls._inject_serving_url(skel)
479 return super().write(skel, **kwargs)
482class FileNodeSkel(TreeSkel):
483 """
484 Default file node skeleton.
485 """
486 kindName = "file_rootNode" # FIXME: VIUR4, don't use "_rootNode" kindname
488 name = StringBone(
489 descr="Name",
490 required=True,
491 searchable=True
492 )
494 rootNode = BooleanBone(
495 descr="Is RootNode",
496 defaultValue=False,
497 readOnly=True,
498 visible=False,
499 )
501 public = BooleanBone(
502 descr="Is public?",
503 defaultValue=False,
504 readOnly=True,
505 visible=False,
506 )
508 viurCurrentSeoKeys = None
511class File(Tree):
512 PENDING_POSTFIX = " (pending)"
513 DOWNLOAD_URL_PREFIX = "/file/download/"
514 INTERNAL_SERVING_URL_PREFIX = "/file/serve/"
515 MAX_FILENAME_LEN = 256
516 IMAGE_META_MAX_SIZE: t.Final[int] = 10 * 1024 ** 2
517 """Maximum size of image files that should be analysed in :meth:`set_image_meta`.
518 Default: 10 MiB"""
520 leafSkelCls = FileLeafSkel
521 nodeSkelCls = FileNodeSkel
523 handler = "tree.simple.file"
524 adminInfo = {
525 "icon": "folder-fill",
526 "handler": handler, # fixme: Use static handler; Remove with VIUR4!
527 }
529 roles = {
530 "*": "view",
531 "editor": ("add", "edit"),
532 "admin": "*",
533 }
535 default_order = "name"
537 # Helper functions currently resist here
539 @staticmethod
540 def get_bucket(dlkey: str) -> google.cloud.storage.bucket.Bucket:
541 """
542 Retrieves a Google Cloud Storage bucket for the given dlkey.
543 """
544 global _public_bucket
545 if dlkey and dlkey.endswith(PUBLIC_DLKEY_SUFFIX):
546 if _public_bucket or (_public_bucket := GOOGLE_STORAGE_CLIENT.lookup_bucket(PUBLIC_BUCKET_NAME)):
547 return _public_bucket
549 raise ValueError(
550 f"""The bucket '{PUBLIC_BUCKET_NAME}' does not exist! Please create it with ACL access."""
551 )
553 return _private_bucket
555 @classmethod
556 def is_valid_filename(cls, filename: str) -> bool:
557 """
558 Verifies a valid filename.
560 The filename should be valid on Linux, Mac OS and Windows.
561 It should not be longer than MAX_FILENAME_LEN chars.
563 Rule set: https://stackoverflow.com/a/31976060/3749896
564 Regex test: https://regex101.com/r/iBYpoC/1
565 """
566 if not filename.strip():
567 return False
569 if len(filename) > cls.MAX_FILENAME_LEN:
570 return False
572 return bool(re.match(VALID_FILENAME_REGEX, filename))
574 @staticmethod
575 def hmac_sign(data: t.Any) -> str:
576 assert conf.file_hmac_key is not None, "No hmac-key set!"
577 if not isinstance(data, bytes):
578 data = str(data).encode("UTF-8")
579 return hmac.new(conf.file_hmac_key, msg=data, digestmod=hashlib.sha3_384).hexdigest()
581 @classmethod
582 def hmac_verify(cls, data: t.Any, signature: str) -> bool:
583 try:
584 return hmac.compare_digest(cls.hmac_sign(data.encode("ASCII")), signature)
585 except (TypeError, UnicodeEncodeError):
586 return False
588 @classmethod
589 def create_internal_serving_url(
590 cls,
591 serving_url: str,
592 size: int = 0,
593 filename: str = "",
594 options: str = "",
595 download: bool = False
596 ) -> str:
597 """
598 Helper function to generate an internal serving url (endpoint: /file/serve) from a Google serving url.
600 This is needed to hide requests to Google as they are internally be routed, and can be the result of a
601 legal requirement like GDPR.
603 :param serving_url: Is the original serving URL as generated from FileLeafSkel._inject_serving_url()
604 :param size: Optional size setting
605 :param filename: Optonal filename setting
606 :param options: Additional options parameter-pass through to /file/serve
607 :param download: Download parameter-pass through to /file/serve
608 """
610 # Split a serving URL into its components, used by serve function.
611 res = re.match(
612 r"^https:\/\/(.*?)\.googleusercontent\.com\/(.*?)$",
613 serving_url
614 )
616 if not res:
617 raise ValueError(f"Invalid {serving_url=!r} provided")
619 # Create internal serving URL
620 serving_url = cls.INTERNAL_SERVING_URL_PREFIX + "/".join(res.groups())
622 # Append additional parameters
623 if params := {
624 k: v for k, v in {
625 "download": download,
626 "filename": filename,
627 "options": options,
628 "size": size,
629 }.items() if v
630 }:
631 serving_url += f"?{urlencode(params)}"
633 return serving_url
635 @classmethod
636 def create_download_url(
637 cls,
638 dlkey: str,
639 filename: str,
640 derived: bool = False,
641 expires: t.Optional[datetime.timedelta | int] = datetime.timedelta(hours=1),
642 download_filename: t.Optional[str] = None
643 ) -> str:
644 """
645 Utility function that creates a signed download-url for the given folder/filename combination
647 :param folder: The GCS-Folder (= the download-key) for that file
648 :param filename: The name of the file. Either the original filename or the name of a derived file.
649 :param derived: True, if it points to a derived file, False if it points to the original uploaded file
650 :param expires:
651 None if the file is supposed to be public (which causes it to be cached on the google ede caches),
652 otherwise a datetime.timedelta of how long that link should be valid
653 :param download_filename: If set, browser is enforced to download this blob with the given alternate
654 filename
655 :return: The signed download-url relative to the current domain (eg /download/...)
656 """
657 if isinstance(expires, int):
658 expires = datetime.timedelta(minutes=expires)
660 # Undo escaping on ()= performed on fileNames
661 filename = filename.replace("(", "(").replace(")", ")").replace("=", "=")
662 filepath = f"""{dlkey}/{"derived" if derived else "source"}/{filename}"""
664 if download_filename:
665 if not cls.is_valid_filename(download_filename):
666 raise errors.UnprocessableEntity(f"Invalid download_filename {download_filename!r} provided")
668 download_filename = urlquote(download_filename)
670 expires = (datetime.datetime.now() + expires).strftime("%Y%m%d%H%M") if expires else 0
672 data = base64.urlsafe_b64encode(f"""{filepath}\0{expires}\0{download_filename or ""}""".encode("UTF-8"))
673 sig = cls.hmac_sign(data)
675 return f"""{cls.DOWNLOAD_URL_PREFIX}{data.decode("ASCII")}?sig={sig}"""
677 @classmethod
678 def parse_download_url(cls, url) -> t.Optional[FilePath]:
679 """
680 Parses a file download URL in the format `/file/download/xxxx?sig=yyyy` into its FilePath.
682 If the URL cannot be parsed, the function returns None.
684 :param url: The file download URL to be parsed.
685 :return: A FilePath on success, None otherwise.
686 """
687 if not url.startswith(cls.DOWNLOAD_URL_PREFIX) or "?" not in url:
688 return None
690 data, sig = url.removeprefix(cls.DOWNLOAD_URL_PREFIX).split("?", 1) # Strip "/file/download/" and split on "?"
691 sig = sig.removeprefix("sig=")
693 if not cls.hmac_verify(data, sig):
694 # Invalid signature
695 return None
697 # Split the blobKey into the individual fields it should contain
698 data = base64.urlsafe_b64decode(data).decode("UTF-8")
700 match data.count("\0"):
701 case 2:
702 dlpath, valid_until, _ = data.split("\0")
703 case 1:
704 # It's the old format, without an downloadFileName
705 dlpath, valid_until = data.split("\0")
706 case _:
707 # Invalid path
708 return None
710 if valid_until != "0" and datetime.strptime(valid_until, "%Y%m%d%H%M") < datetime.now():
711 # Signature expired
712 return None
714 if dlpath.count("/") != 2:
715 # Invalid path
716 return None
718 dlkey, derived, filename = dlpath.split("/")
719 return FilePath(dlkey, derived != "source", filename)
721 @classmethod
722 def create_src_set(
723 cls,
724 file: t.Union["SkeletonInstance", dict, str],
725 expires: t.Optional[datetime.timedelta | int] = datetime.timedelta(hours=1),
726 width: t.Optional[int] = None,
727 height: t.Optional[int] = None,
728 language: t.Optional[str] = None,
729 ) -> str:
730 """
731 Generates a string suitable for use as the srcset tag in html. This functionality provides the browser
732 with a list of images in different sizes and allows it to choose the smallest file that will fill it's
733 viewport without upscaling.
735 :param file: The file skeleton (or if multiple=True a single value from it) to generate the srcset.
736 :param expires:
737 None if the file is supposed to be public (which causes it to be cached on the google edecaches),
738 otherwise it's lifetime in seconds
739 :param width:
740 A list of widths that should be included in the srcset.
741 If a given width is not available, it will be skipped.
742 :param height: A list of heights that should be included in the srcset. If a given height is not available,
743 it will be skipped.
744 :param language: Language overwrite if file has multiple languages, and we want to explicitly specify one
745 :return: The srctag generated or an empty string if a invalid file object was supplied
746 """
747 if not width and not height:
748 logging.error("Neither width or height supplied")
749 return ""
751 if isinstance(file, str):
752 file = db.Query("file").filter("dlkey =", file).order(("creationdate", db.SortOrder.Ascending)).getEntry()
754 if not file:
755 return ""
757 if isinstance(file, i18n.LanguageWrapper):
758 language = language or current.language.get()
759 if not language or not (file := cls.get(language)):
760 return ""
762 if "dlkey" not in file and "dest" in file:
763 file = file["dest"]
765 from viur.core.skeleton import SkeletonInstance # avoid circular imports
767 if not (
768 isinstance(file, (SkeletonInstance, dict))
769 and "dlkey" in file
770 and "derived" in file
771 ):
772 logging.error("Invalid file supplied")
773 return ""
775 if not isinstance(file["derived"], dict):
776 logging.error("No derives available")
777 return ""
779 src_set = []
780 for filename, derivate in file["derived"]["files"].items():
781 customData = derivate.get("customData", {})
783 if width and customData.get("width") in width:
784 src_set.append(
785 f"""{cls.create_download_url(file["dlkey"], filename, True, expires)} {customData["width"]}w"""
786 )
788 if height and customData.get("height") in height:
789 src_set.append(
790 f"""{cls.create_download_url(file["dlkey"], filename, True, expires)} {customData["height"]}h"""
791 )
793 return ", ".join(src_set)
795 def write(
796 self,
797 filename: str,
798 content: t.Any,
799 mimetype: str = "text/plain",
800 *,
801 width: int = None,
802 height: int = None,
803 public: bool = False,
804 rootnode: t.Optional[db.Key] = None,
805 folder: t.Iterable[str] | str = (),
806 ) -> db.Key:
807 """
808 Write a file from any bytes-like object into the file module.
810 If *folder* and *rootnode* are both set, the file is added to the repository in that folder.
811 If only *folder* is set, the file is added to the default repository in that folder.
812 If only *rootnode* is set, the file is added to that repository in the root folder.
814 If both are not set, the file is added without a path or repository as a weak file.
815 It will not be visible in admin in this case.
817 :param filename: Filename to be written.
818 :param content: The file content to be written, as bytes-like object.
819 :param mimetype: The file's mimetype.
820 :param width: Optional width information for the file.
821 :param height: Optional height information for the file.
822 :param public: True if the file should be publicly accessible.
823 :param rootnode: Optional root-node of the repository to add the file to
824 :param folder: Optional folder the file should be written into.
826 :return: Returns the key of the file object written. This can be associated e.g. with a FileBone.
827 """
828 # logging.info(f"{filename=} {mimetype=} {width=} {height=} {public=}")
829 if not self.is_valid_filename(filename):
830 raise ValueError(f"{filename=} is invalid")
832 # Folder mode?
833 if folder:
834 # Validate correct folder naming
835 if isinstance(folder, str):
836 folder = folder, # make it a tuple
838 for foldername in folder:
839 if not self.is_valid_filename(foldername):
840 raise ValueError(f"{foldername=} is invalid")
842 # When in folder-mode, a rootnode must exist!
843 if rootnode is None:
844 rootnode = self.ensureOwnModuleRootNode()
846 parentrepokey = rootnode.key
847 parentfolderkey = rootnode.key
849 for foldername in folder:
850 query = self.addSkel("node").all()
851 query.filter("parentrepo", parentrepokey)
852 query.filter("parententry", parentfolderkey)
853 query.filter("name", foldername)
855 if folder_skel := query.getSkel():
856 # Skip existing folder
857 parentfolderkey = folder_skel["key"]
858 else:
859 # Create new folder
860 folder_skel = self.addSkel("node")
862 folder_skel["name"] = foldername
863 folder_skel["parentrepo"] = parentrepokey
864 folder_skel["parententry"] = parentfolderkey
865 folder_skel.write()
867 parentfolderkey = folder_skel["key"]
869 else:
870 parentrepokey = None
871 parentfolderkey = None
873 # Write the file
874 dl_key = utils.string.random()
876 if public:
877 dl_key += PUBLIC_DLKEY_SUFFIX # mark file as public
879 bucket = self.get_bucket(dl_key)
881 blob = bucket.blob(f"{dl_key}/source/{filename}")
882 blob.upload_from_file(io.BytesIO(content), content_type=mimetype)
884 fileskel = self.addSkel("leaf")
886 fileskel["parentrepo"] = parentrepokey
887 fileskel["parententry"] = parentfolderkey
888 fileskel["name"] = filename
889 fileskel["size"] = blob.size
890 fileskel["mimetype"] = mimetype
891 fileskel["dlkey"] = dl_key
892 fileskel["weak"] = bool(parentrepokey)
893 fileskel["public"] = public
894 fileskel["width"] = width
895 fileskel["height"] = height
896 fileskel["crc32c_checksum"] = base64.b64decode(blob.crc32c).hex()
897 fileskel["md5_checksum"] = base64.b64decode(blob.md5_hash).hex()
898 fileskel["pending"] = False
900 return fileskel.write()["key"]
902 def read(
903 self,
904 key: db.Key | int | str | None = None,
905 path: str | None = None,
906 ) -> tuple[io.BytesIO, str]:
907 """
908 Read a file from the Cloud Storage.
910 If a key and a path are provided, the key is preferred.
911 This means that the entry in the db is searched first and if this is not found, the path is used.
913 :param key: Key of the LeafSkel that contains the "dlkey" and the "name".
914 :param path: The path of the file in the Cloud Storage Bucket.
916 :return: Returns the file as a io.BytesIO buffer and the content-type
917 """
918 if not key and not path:
919 raise ValueError("Please provide a key or a path")
921 if key:
922 skel = self.viewSkel("leaf")
923 if not skel.read(db.key_helper(key, skel.kindName)):
924 if not path:
925 raise ValueError("This skeleton is not in the database!")
926 else:
927 path = f"""{skel["dlkey"]}/source/{skel["name"]}"""
929 bucket = self.get_bucket(skel["dlkey"])
930 else:
931 bucket = self.get_bucket(path.split("/", 1)[0]) # path's first part is dlkey plus eventual postfix
933 blob = bucket.blob(path)
934 return io.BytesIO(blob.download_as_bytes()), blob.content_type
936 @CallDeferred
937 def deleteRecursive(self, parentKey):
938 files = db.Query(self.leafSkelCls().kindName).filter("parentdir =", parentKey).iter()
939 for fileEntry in files:
940 self.mark_for_deletion(fileEntry["dlkey"])
941 skel = self.leafSkelCls()
943 if skel.read(str(fileEntry.key())):
944 skel.delete()
945 dirs = db.Query(self.nodeSkelCls().kindName).filter("parentdir", parentKey).iter()
946 for d in dirs:
947 self.deleteRecursive(d.key)
948 skel = self.nodeSkelCls()
949 if skel.read(d.key):
950 skel.delete()
952 @exposed
953 @skey
954 def getUploadURL(
955 self,
956 fileName: str,
957 mimeType: str,
958 size: t.Optional[int] = None,
959 node: t.Optional[str | db.Key] = None,
960 authData: t.Optional[str] = None,
961 authSig: t.Optional[str] = None,
962 public: bool = False,
963 ):
964 filename = fileName.strip() # VIUR4 FIXME: just for compatiblity of the parameter names
966 if not self.is_valid_filename(filename):
967 raise errors.UnprocessableEntity(f"Invalid filename {filename!r} provided")
969 # Validate the mimetype from the client seems legit
970 mimetype = mimeType.strip().lower()
971 if not (
972 mimetype
973 and mimetype.count("/") == 1
974 and all(ch in string.ascii_letters + string.digits + "/-.+" for ch in mimetype)
975 ):
976 raise errors.UnprocessableEntity(f"Invalid mime-type {mimetype!r} provided")
978 # Validate authentication data
979 if authData and authSig:
980 # First, validate the signature, otherwise we don't need to proceed further
981 if not self.hmac_verify(authData, authSig):
982 raise errors.Unauthorized()
984 authData = json.loads(base64.b64decode(authData.encode("ASCII")).decode("UTF-8"))
986 if datetime.datetime.strptime(authData["validUntil"], "%Y%m%d%H%M") < datetime.datetime.now():
987 raise errors.Gone("The upload URL has expired")
989 if authData["validMimeTypes"]:
990 for validMimeType in authData["validMimeTypes"]:
991 if (
992 validMimeType == mimetype
993 or (validMimeType.endswith("*") and mimetype.startswith(validMimeType[:-1]))
994 ):
995 break
996 else:
997 raise errors.UnprocessableEntity(f"Invalid mime-type {mimetype} provided")
999 node = authData["node"]
1000 maxSize = authData["maxSize"]
1002 else:
1003 rootNode = None
1004 if node and not (rootNode := self.getRootNode(node)):
1005 raise errors.NotFound(f"No valid root node found for {node=}")
1007 if not self.canAdd("leaf", rootNode):
1008 raise errors.Forbidden()
1010 if rootNode and public != bool(rootNode.get("public")):
1011 raise errors.Forbidden("Cannot upload a public file into private repository or vice versa")
1013 maxSize = None # The user has some file/add permissions, don't restrict fileSize
1015 if maxSize:
1016 if size > maxSize:
1017 raise errors.UnprocessableEntity(f"Size {size} exceeds maximum size {maxSize}")
1018 else:
1019 size = None
1021 # Create upload-URL and download key
1022 dlkey = utils.string.random() # let's roll a random key
1024 if public:
1025 dlkey += PUBLIC_DLKEY_SUFFIX # mark file as public
1027 blob = self.get_bucket(dlkey).blob(f"{dlkey}/source/{filename}")
1028 upload_url = blob.create_resumable_upload_session(content_type=mimeType, size=size, timeout=60)
1030 # Create a corresponding file-lock object early, otherwise we would have to ensure that the file-lock object
1031 # the user creates matches the file he had uploaded
1032 file_skel = self.addSkel("leaf")
1034 file_skel["name"] = filename + self.PENDING_POSTFIX
1035 file_skel["size"] = 0
1036 file_skel["mimetype"] = "application/octetstream"
1037 file_skel["dlkey"] = dlkey
1038 file_skel["parentdir"] = None
1039 file_skel["pendingparententry"] = db.key_helper(node, self.addSkel("node").kindName) if node else None
1040 file_skel["pending"] = True
1041 file_skel["weak"] = True
1042 file_skel["public"] = public
1043 file_skel["width"] = 0
1044 file_skel["height"] = 0
1046 file_skel.write()
1047 key = str(file_skel["key"])
1049 # Mark that entry dirty as we might never receive an add
1050 self.mark_for_deletion(dlkey)
1052 # In this case, we'd have to store the key in the users session so he can call add() later on
1053 if authData and authSig:
1054 session = current.session.get()
1056 if "pendingFileUploadKeys" not in session:
1057 session["pendingFileUploadKeys"] = []
1059 session["pendingFileUploadKeys"].append(key)
1061 # Clamp to the latest 50 pending uploads
1062 session["pendingFileUploadKeys"] = session["pendingFileUploadKeys"][-50:]
1063 session.markChanged()
1065 return self.render.view({
1066 "uploadKey": key,
1067 "uploadUrl": upload_url,
1068 })
1070 @exposed
1071 def download(self, blobKey: str, fileName: str = "", download: bool = False, sig: str = "", *args, **kwargs):
1072 """
1073 Download a file.
1074 :param blobKey: The unique blob key of the file.
1075 :param fileName: Optional filename to provide in the header.
1076 :param download: Set header to attachment retrival, set explictly to "1" if download is wanted.
1077 """
1078 if filename := fileName.strip():
1079 if not self.is_valid_filename(filename):
1080 raise errors.UnprocessableEntity(f"The provided filename {filename!r} is invalid!")
1082 try:
1083 values = base64.urlsafe_b64decode(blobKey).decode("UTF-8").split("\0")
1084 except ValueError:
1085 raise errors.BadRequest(f"Invalid encoding of blob key {blobKey!r}!")
1086 try:
1087 dlPath, validUntil, *download_filename = values
1088 # Maybe it's the old format, without a download_filename
1089 download_filename = download_filename[0] if download_filename else ""
1090 except ValueError:
1091 logging.error(f"Encoding of {blobKey=!r} OK. {values=} invalid.")
1092 raise errors.BadRequest(f"The blob key {blobKey!r} has an invalid amount of encoded values!")
1094 bucket = self.get_bucket(dlPath.split("/", 1)[0])
1096 if not sig:
1097 # Check if the current user has the right to download *any* blob present in this application.
1098 # blobKey is then the path inside cloudstore - not a base64 encoded tuple
1099 if not (usr := current.user.get()):
1100 raise errors.Unauthorized()
1101 if "root" not in usr["access"] and "file-view" not in usr["access"]:
1102 raise errors.Forbidden()
1103 validUntil = "-1" # Prevent this from being cached down below
1104 blob = bucket.get_blob(blobKey)
1106 else:
1107 # We got an request including a signature (probably a guest or a user without file-view access)
1108 # First, validate the signature, otherwise we don't need to proceed any further
1109 if not self.hmac_verify(blobKey, sig):
1110 raise errors.Forbidden()
1112 if validUntil != "0" and datetime.datetime.strptime(validUntil, "%Y%m%d%H%M") < datetime.datetime.now():
1113 blob = None
1114 else:
1115 blob = bucket.get_blob(dlPath)
1117 if not blob:
1118 raise errors.Gone("The requested blob has expired.")
1120 if not filename:
1121 filename = download_filename or urlquote(blob.name.rsplit("/", 1)[-1])
1123 content_disposition = utils.build_content_disposition_header(filename, attachment=download)
1125 if isinstance(_CREDENTIALS, ServiceAccountCredentials):
1126 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60)
1127 signedUrl = blob.generate_signed_url(expiresAt, response_disposition=content_disposition, version="v4")
1128 raise errors.Redirect(signedUrl)
1130 elif conf.instance.is_dev_server: # No Service-Account to sign with - Serve everything directly
1131 response = current.request.get().response
1132 response.headers["Content-Type"] = blob.content_type
1133 if content_disposition:
1134 response.headers["Content-Disposition"] = content_disposition
1135 return blob.download_as_bytes()
1137 if validUntil == "0" or blobKey.endswith(PUBLIC_DLKEY_SUFFIX): # Its an indefinitely valid URL
1138 if blob.size < 5 * 1024 * 1024: # Less than 5 MB - Serve directly and push it into the ede caches
1139 response = current.request.get().response
1140 response.headers["Content-Type"] = blob.content_type
1141 response.headers["Cache-Control"] = "public, max-age=604800" # 7 Days
1142 if content_disposition:
1143 response.headers["Content-Disposition"] = content_disposition
1144 return blob.download_as_bytes()
1146 # Default fallback - create a signed URL and redirect
1147 authRequest = google.auth.transport.requests.Request()
1148 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60)
1149 signing_credentials = google.auth.compute_engine.IDTokenCredentials(authRequest, "")
1150 signedUrl = blob.generate_signed_url(
1151 expiresAt,
1152 credentials=signing_credentials,
1153 response_disposition=content_disposition,
1154 version="v4")
1156 raise errors.Redirect(signedUrl)
1158 SERVE_VALID_OPTIONS = {
1159 "c",
1160 "p",
1161 "fv",
1162 "fh",
1163 "r90",
1164 "r180",
1165 "r270",
1166 "nu",
1167 }
1168 """
1169 Valid modification option shorts for the serve-function.
1170 This is passed-through to the Google UserContent API, and hast to be supported there.
1171 """
1173 SERVE_VALID_FORMATS = {
1174 "jpg": "rj",
1175 "jpeg": "rj",
1176 "png": "rp",
1177 "webp": "rw",
1178 }
1179 """
1180 Valid file-formats to the serve-function.
1181 This is passed-through to the Google UserContent API, and hast to be supported there.
1182 """
1184 @exposed
1185 def serve(
1186 self,
1187 host: str,
1188 key: str,
1189 size: t.Optional[int] = None,
1190 filename: t.Optional[str] = None,
1191 options: str = "",
1192 download: bool = False,
1193 ):
1194 """
1195 Requests an image using the serving url to bypass direct Google requests.
1197 :param host: the google host prefix i.e. lh3
1198 :param key: the serving url key
1199 :param size: the target image size
1200 :param filename: a random string with an extention, valid extentions are (defined in File.SERVE_VALID_FORMATS).
1201 :param options: - seperated options (defined in File.SERVE_VALID_OPTIONS).
1202 c - crop
1203 p - face crop
1204 fv - vertrical flip
1205 fh - horizontal flip
1206 rXXX - rotate 90, 180, 270
1207 nu - no upscale
1208 :param download: Serves the content as download (Content-Disposition) or not.
1210 :return: Returns the requested content on success, raises a proper HTTP exception otherwise.
1211 """
1213 if any(c not in conf.search_valid_chars for c in host):
1214 raise errors.BadRequest("key contains invalid characters")
1216 # extract format from filename
1217 file_fmt = "webp"
1219 if filename:
1220 fmt = filename.rsplit(".", 1)[-1].lower()
1221 if fmt in self.SERVE_VALID_FORMATS:
1222 file_fmt = fmt
1223 else:
1224 raise errors.UnprocessableEntity(f"Unsupported filetype {fmt}")
1226 url = f"https://{host}.googleusercontent.com/{key}"
1228 if options and not all(param in self.SERVE_VALID_OPTIONS for param in options.split("-")):
1229 raise errors.BadRequest("Invalid options provided")
1231 options += f"-{self.SERVE_VALID_FORMATS[file_fmt]}"
1233 if size:
1234 options = f"s{size}-" + options
1236 url += "=" + options
1238 response = current.request.get().response
1239 response.headers["Content-Type"] = f"image/{file_fmt}"
1240 response.headers["Cache-Control"] = "public, max-age=604800" # 7 Days
1241 response.headers["Content-Disposition"] = utils.build_content_disposition_header(filename, attachment=download)
1243 answ = requests.get(url, timeout=20)
1244 if not answ.ok:
1245 logging.error(f"{answ.status_code} {answ.text}")
1246 raise errors.BadRequest("Unable to fetch a file with these parameters")
1248 return answ.content
1250 @exposed
1251 @force_ssl
1252 @force_post
1253 @skey(allow_empty=True)
1254 def add(self, skelType: SkelType, node: db.Key | int | str | None = None, *args, **kwargs):
1255 # We can't add files directly (they need to be uploaded
1256 if skelType == "leaf": # We need to handle leafs separately here
1257 targetKey = kwargs.get("key")
1258 skel = self.addSkel("leaf")
1260 if not skel.read(targetKey):
1261 raise errors.NotFound()
1263 if not skel["pending"]:
1264 raise errors.PreconditionFailed()
1266 skel["pending"] = False
1267 skel["parententry"] = skel["pendingparententry"]
1269 if skel["parententry"]:
1270 rootNode = self.getRootNode(skel["parententry"])
1271 else:
1272 rootNode = None
1274 if not self.canAdd("leaf", rootNode):
1275 # Check for a marker in this session (created if using a signed upload URL)
1276 session = current.session.get()
1277 if targetKey not in (session.get("pendingFileUploadKeys") or []):
1278 raise errors.Forbidden()
1279 session["pendingFileUploadKeys"].remove(targetKey)
1280 session.markChanged()
1282 # Now read the blob from the dlkey folder
1283 bucket = self.get_bucket(skel["dlkey"])
1285 blobs = list(bucket.list_blobs(prefix=f"""{skel["dlkey"]}/"""))
1286 if len(blobs) != 1:
1287 logging.error("Invalid number of blobs in folder")
1288 logging.error(targetKey)
1289 raise errors.PreconditionFailed()
1291 # only one item is allowed here!
1292 blob = blobs[0]
1294 # update the corresponding file skeleton
1295 skel["name"] = skel["name"].removesuffix(self.PENDING_POSTFIX)
1296 skel["mimetype"] = utils.string.escape(blob.content_type)
1297 skel["size"] = blob.size
1298 skel["parentrepo"] = rootNode["key"] if rootNode else None
1299 skel["weak"] = rootNode is None
1300 skel["crc32c_checksum"] = base64.b64decode(blob.crc32c).hex()
1301 skel["md5_checksum"] = base64.b64decode(blob.md5_hash).hex()
1302 self.onAdd("leaf", skel)
1303 skel.write()
1304 self.onAdded("leaf", skel)
1306 # Add updated download-URL as the auto-generated isn't valid yet
1307 skel["downloadUrl"] = self.create_download_url(skel["dlkey"], skel["name"])
1309 return self.render.addSuccess(skel)
1311 return super().add(skelType, node, *args, **kwargs)
1313 @exposed
1314 def get_download_url(
1315 self,
1316 key: t.Optional[db.Key] = None,
1317 dlkey: t.Optional[str] = None,
1318 filename: t.Optional[str] = None,
1319 derived: bool = False,
1320 ):
1321 """
1322 Request a download url for a given file
1323 :param key: The key of the file
1324 :param dlkey: The download key of the file
1325 :param filename: The filename to be given. If no filename is provided
1326 downloadUrls for all derived files are returned in case of `derived=True`.
1327 :param derived: True, if a derived file download URL is being requested.
1328 """
1329 skel = self.viewSkel("leaf")
1330 if dlkey is not None:
1331 skel = skel.all().filter("dlkey", dlkey).getSkel()
1332 elif key is None and dlkey is None:
1333 raise errors.BadRequest("No key or dlkey provided")
1335 if not (skel and skel.read(key)):
1336 raise errors.NotFound()
1338 if not self.canView("leaf", skel):
1339 raise errors.Unauthorized()
1341 dlkey = skel["dlkey"]
1343 if derived and filename is None:
1344 res = {}
1345 for filename in skel["derived"]["files"]:
1346 res[filename] = self.create_download_url(dlkey, filename, derived)
1347 else:
1348 if derived:
1349 # Check if Filename exist in the Derives. We sign nothing that not exist.
1350 if filename not in skel["derived"]["files"]:
1351 raise errors.NotFound("File not in derives")
1352 else:
1353 if filename is None:
1354 filename = skel["name"]
1355 elif filename != skel["name"]:
1356 raise errors.NotFound("Filename not match")
1358 res = self.create_download_url(dlkey, filename, derived)
1360 return self.render.view(res)
1362 def onEdit(self, skelType: SkelType, skel: SkeletonInstance):
1363 super().onEdit(skelType, skel)
1365 if skelType == "leaf":
1366 old_skel = self.editSkel(skelType)
1367 old_skel.setEntity(skel.dbEntity)
1369 if old_skel["name"] == skel["name"]: # name not changed we can return
1370 return
1372 # Move Blob to new name
1373 # https://cloud.google.com/storage/docs/copying-renaming-moving-objects
1374 old_path = f"""{skel["dlkey"]}/source/{html.unescape(old_skel["name"])}"""
1375 new_path = f"""{skel["dlkey"]}/source/{html.unescape(skel["name"])}"""
1377 bucket = self.get_bucket(skel["dlkey"])
1379 if not (old_blob := bucket.get_blob(old_path)):
1380 raise errors.Gone()
1382 bucket.copy_blob(old_blob, bucket, new_path, if_generation_match=0)
1383 bucket.delete_blob(old_path)
1385 def onAdded(self, skelType: SkelType, skel: SkeletonInstance) -> None:
1386 if skelType == "leaf" and skel["mimetype"].startswith("image/"):
1387 if skel["size"] > self.IMAGE_META_MAX_SIZE:
1388 logging.warning(f"File size {skel['size']} exceeds limit {self.IMAGE_META_MAX_SIZE=}")
1389 return
1390 self.set_image_meta(skel["key"])
1392 super().onAdded(skelType, skel)
1394 @CallDeferred
1395 def set_image_meta(self, key: db.Key) -> None:
1396 """Write image metadata (height and width) to FileSkel"""
1397 skel = self.editSkel("leaf", key)
1398 if not skel.read(key):
1399 logging.error(f"File {key} does not exist")
1400 return
1401 if skel["width"] and skel["height"]:
1402 logging.info(f'File {skel["key"]} has already {skel["width"]=} and {skel["height"]=}')
1403 return
1404 file_name = html.unescape(skel["name"])
1405 blob = self.get_bucket(skel["dlkey"]).get_blob(f"""{skel["dlkey"]}/source/{file_name}""")
1406 if not blob:
1407 logging.error(f'Blob {skel["dlkey"]}/source/{file_name} is missing in Cloud Storage!')
1408 return
1410 file_obj = io.BytesIO()
1411 blob.download_to_file(file_obj)
1412 file_obj.seek(0)
1413 try:
1414 img = Image.open(file_obj)
1415 except Image.UnidentifiedImageError as e: # Can't load this image
1416 logging.exception(f'Cannot open {skel["key"]} | {skel["name"]} to set image meta data: {e}')
1417 return
1419 skel.patch(
1420 values={
1421 "width": img.width,
1422 "height": img.height,
1423 },
1424 )
1426 def mark_for_deletion(self, dlkey: str) -> None:
1427 """
1428 Adds a marker to the datastore that the file specified as *dlkey* can be deleted.
1430 Once the mark has been set, the data store is checked four times (default: every 4 hours)
1431 if the file is in use somewhere. If it is still in use, the mark goes away, otherwise
1432 the mark and the file are removed from the datastore. These delayed checks are necessary
1433 due to database inconsistency.
1435 :param dlkey: Unique download-key of the file that shall be marked for deletion.
1436 """
1437 fileObj = db.Query("viur-deleted-files").filter("dlkey", dlkey).getEntry()
1439 if fileObj: # Its allready marked
1440 return
1442 fileObj = db.Entity(db.Key("viur-deleted-files"))
1443 fileObj["itercount"] = 0
1444 fileObj["dlkey"] = str(dlkey)
1446 db.put(fileObj)
1449@PeriodicTask(interval=datetime.timedelta(hours=4))
1450def startCheckForUnreferencedBlobs():
1451 """
1452 Start searching for blob locks that have been recently freed
1453 """
1454 doCheckForUnreferencedBlobs()
1457@CallDeferred
1458def doCheckForUnreferencedBlobs(cursor=None):
1459 def getOldBlobKeysTxn(dbKey):
1460 obj = db.get(dbKey)
1461 res = obj["old_blob_references"] or []
1462 if obj["is_stale"]:
1463 db.delete(dbKey)
1464 else:
1465 obj["has_old_blob_references"] = False
1466 obj["old_blob_references"] = []
1467 db.put(obj)
1468 return res
1470 query = db.Query("viur-blob-locks").filter("has_old_blob_references", True).setCursor(cursor)
1471 for lockObj in query.run(100):
1472 oldBlobKeys = db.run_in_transaction(getOldBlobKeysTxn, lockObj.key)
1473 for blobKey in oldBlobKeys:
1474 if db.Query("viur-blob-locks").filter("active_blob_references =", blobKey).getEntry():
1475 # This blob is referenced elsewhere
1476 logging.info(f"Stale blob is still referenced, {blobKey}")
1477 continue
1478 # Add a marker and schedule it for deletion
1479 fileObj = db.Query("viur-deleted-files").filter("dlkey", blobKey).getEntry()
1480 if fileObj: # Its already marked
1481 logging.info(f"Stale blob already marked for deletion, {blobKey}")
1482 return
1483 fileObj = db.Entity(db.Key("viur-deleted-files"))
1484 fileObj["itercount"] = 0
1485 fileObj["dlkey"] = str(blobKey)
1486 logging.info(f"Stale blob marked dirty, {blobKey}")
1487 db.put(fileObj)
1488 newCursor = query.getCursor()
1489 if newCursor:
1490 doCheckForUnreferencedBlobs(newCursor)
1493@PeriodicTask(interval=datetime.timedelta(hours=4))
1494def startCleanupDeletedFiles():
1495 """
1496 Increase deletion counter on each blob currently not referenced and delete
1497 it if that counter reaches maxIterCount
1498 """
1499 doCleanupDeletedFiles()
1502@CallDeferred
1503def doCleanupDeletedFiles(cursor=None):
1504 maxIterCount = 2 # How often a file will be checked for deletion
1505 query = db.Query("viur-deleted-files")
1506 if cursor:
1507 query.setCursor(cursor)
1508 for file in query.run(100):
1509 if "dlkey" not in file:
1510 db.delete(file.key)
1511 elif db.Query("viur-blob-locks").filter("active_blob_references =", file["dlkey"]).getEntry():
1512 logging.info(f"""is referenced, {file["dlkey"]}""")
1513 db.delete(file.key)
1514 else:
1515 if file["itercount"] > maxIterCount:
1516 logging.info(f"""Finally deleting, {file["dlkey"]}""")
1517 bucket = conf.main_app.file.get_bucket(file["dlkey"])
1518 blobs = bucket.list_blobs(prefix=f"""{file["dlkey"]}/""")
1519 for blob in blobs:
1520 blob.delete()
1521 db.delete(file.key)
1522 # There should be exactly 1 or 0 of these
1523 for f in skeletonByKind("file")().all().filter("dlkey =", file["dlkey"]).fetch(99):
1524 f.delete()
1526 if f["serving_url"]:
1527 bucket = conf.main_app.file.get_bucket(f["dlkey"])
1528 blob_key = blobstore.create_gs_key(
1529 f"/gs/{bucket.name}/{f['dlkey']}/source/{f['name']}"
1530 )
1531 images.delete_serving_url(blob_key) # delete serving url
1532 else:
1533 logging.debug(f"""Increasing count, {file["dlkey"]}""")
1534 file["itercount"] += 1
1535 db.put(file)
1536 newCursor = query.getCursor()
1537 if newCursor:
1538 doCleanupDeletedFiles(newCursor)
1541@PeriodicTask(interval=datetime.timedelta(hours=4))
1542def start_delete_pending_files():
1543 """
1544 Start deletion of pending FileSkels that are older than 7 days.
1545 """
1546 DeleteEntitiesIter.startIterOnQuery(
1547 FileLeafSkel().all()
1548 .filter("pending =", True)
1549 .filter("creationdate <", utils.utcNow() - datetime.timedelta(days=7))
1550 )
1553# DEPRECATED ATTRIBUTES HANDLING
1555def __getattr__(attr: str) -> object:
1556 if entry := {
1557 # stuff prior viur-core < 3.7
1558 "GOOGLE_STORAGE_BUCKET": ("conf.main_app.file.get_bucket()", _private_bucket),
1559 }.get(attr):
1560 msg = f"{attr} was replaced by {entry[0]}"
1561 warnings.warn(msg, DeprecationWarning, stacklevel=2)
1562 logging.warning(msg, stacklevel=2)
1563 return entry[1]
1565 return super(__import__(__name__).__class__).__getattribute__(attr)