Coverage for /home/runner/work/viur-core/viur-core/viur/src/viur/core/modules/file.py: 0%
761 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-03 12:27 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-03 12:27 +0000
1import base64
2import datetime
3import hashlib
4import hmac
5import html
6import io
7import json
8import logging
9import re
10import string
11import typing as t
12import warnings
13from collections import namedtuple
14from urllib.parse import quote as urlquote, urlencode
15from urllib.request import urlopen
17import PIL
18import PIL.ImageCms
19import google.auth
20import requests
21from PIL import Image
22from google.appengine.api import blobstore, images
23from google.cloud import storage
24from google.oauth2.service_account import Credentials as ServiceAccountCredentials
26from viur.core import conf, current, db, errors, utils
27from viur.core.bones import BaseBone, BooleanBone, KeyBone, NumericBone, StringBone
28from viur.core.decorators import *
29from viur.core.i18n import LanguageWrapper
30from viur.core.prototypes.tree import SkelType, Tree, TreeSkel
31from viur.core.skeleton import SkeletonInstance, skeletonByKind
32from viur.core.tasks import CallDeferred, DeleteEntitiesIter, PeriodicTask
34# Globals for connectivity
36VALID_FILENAME_REGEX = re.compile(
37 # || MAY NOT BE THE NAME | MADE OF SPECIAL CHARS | SPECIAL CHARS + `. `|`
38 r"^(?!^(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])$)[^\x00-\x1F<>:\"\/\\|?*]*[^\x00-\x1F<>:\"\/\\|?*. ]$",
39 re.IGNORECASE
40)
42_CREDENTIALS, _PROJECT_ID = google.auth.default()
43GOOGLE_STORAGE_CLIENT = storage.Client(_PROJECT_ID, _CREDENTIALS)
45PRIVATE_BUCKET_NAME = f"""{_PROJECT_ID}.appspot.com"""
46PUBLIC_BUCKET_NAME = f"""public-dot-{_PROJECT_ID}"""
47PUBLIC_DLKEY_SUFFIX = "_pub"
49_private_bucket = GOOGLE_STORAGE_CLIENT.lookup_bucket(PRIVATE_BUCKET_NAME)
50_public_bucket = None
52# FilePath is a descriptor for ViUR file components
53FilePath = namedtuple("FilePath", ("dlkey", "is_derived", "filename"))
56def importBlobFromViur2(dlKey, fileName):
57 bucket = File.get_bucket(dlKey)
59 if not conf.viur2import_blobsource:
60 return False
61 existingImport = db.Get(db.Key("viur-viur2-blobimport", dlKey))
62 if existingImport:
63 if existingImport["success"]:
64 return existingImport["dlurl"]
65 return False
66 if conf.viur2import_blobsource["infoURL"]:
67 try:
68 importDataReq = urlopen(conf.viur2import_blobsource["infoURL"] + dlKey)
69 except Exception as e:
70 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey))
71 marker["success"] = False
72 marker["error"] = "Failed URL-FETCH 1"
73 db.Put(marker)
74 return False
75 if importDataReq.status != 200:
76 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey))
77 marker["success"] = False
78 marker["error"] = "Failed URL-FETCH 2"
79 db.Put(marker)
80 return False
81 importData = json.loads(importDataReq.read())
82 oldBlobName = conf.viur2import_blobsource["gsdir"] + "/" + importData["key"]
83 srcBlob = storage.Blob(bucket=bucket,
84 name=conf.viur2import_blobsource["gsdir"] + "/" + importData["key"])
85 else:
86 oldBlobName = conf.viur2import_blobsource["gsdir"] + "/" + dlKey
87 srcBlob = storage.Blob(bucket=bucket, name=conf.viur2import_blobsource["gsdir"] + "/" + dlKey)
88 if not srcBlob.exists():
89 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey))
90 marker["success"] = False
91 marker["error"] = "Local SRC-Blob missing"
92 marker["oldBlobName"] = oldBlobName
93 db.Put(marker)
94 return False
95 bucket.rename_blob(srcBlob, f"{dlKey}/source/{fileName}")
96 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey))
97 marker["success"] = True
98 marker["old_src_key"] = dlKey
99 marker["old_src_name"] = fileName
100 marker["dlurl"] = File.create_download_url(dlKey, fileName, False, None)
101 db.Put(marker)
102 return marker["dlurl"]
105def thumbnailer(fileSkel, existingFiles, params):
106 file_name = html.unescape(fileSkel["name"])
107 bucket = File.get_bucket(fileSkel["dlkey"])
108 blob = bucket.get_blob(f"""{fileSkel["dlkey"]}/source/{file_name}""")
109 if not blob:
110 logging.warning(f"""Blob {fileSkel["dlkey"]}/source/{file_name} is missing from cloud storage!""")
111 return
112 fileData = io.BytesIO()
113 blob.download_to_file(fileData)
114 resList = []
115 for sizeDict in params:
116 fileData.seek(0)
117 outData = io.BytesIO()
118 try:
119 img = PIL.Image.open(fileData)
120 except PIL.Image.UnidentifiedImageError: # Can't load this image; so there's no need to try other resolutions
121 return []
122 iccProfile = img.info.get('icc_profile')
123 if iccProfile:
124 # JPEGs might be encoded with a non-standard color-profile; we need to compensate for this if we convert
125 # to WEBp as we'll loose this color-profile information
126 f = io.BytesIO(iccProfile)
127 src_profile = PIL.ImageCms.ImageCmsProfile(f)
128 dst_profile = PIL.ImageCms.createProfile('sRGB')
129 try:
130 img = PIL.ImageCms.profileToProfile(
131 img,
132 inputProfile=src_profile,
133 outputProfile=dst_profile,
134 outputMode="RGBA" if img.has_transparency_data else "RGB")
135 except Exception as e:
136 logging.exception(e)
137 continue
138 fileExtension = sizeDict.get("fileExtension", "webp")
139 if "width" in sizeDict and "height" in sizeDict:
140 width = sizeDict["width"]
141 height = sizeDict["height"]
142 targetName = f"thumbnail-{width}-{height}.{fileExtension}"
143 elif "width" in sizeDict:
144 width = sizeDict["width"]
145 height = int((float(img.size[1]) * float(width / float(img.size[0]))))
146 targetName = f"thumbnail-w{width}.{fileExtension}"
147 else: # No default fallback - ignore
148 continue
149 mimeType = sizeDict.get("mimeType", "image/webp")
150 img = img.resize((width, height), PIL.Image.LANCZOS)
151 img.save(outData, fileExtension)
152 outSize = outData.tell()
153 outData.seek(0)
154 targetBlob = bucket.blob(f"""{fileSkel["dlkey"]}/derived/{targetName}""")
155 targetBlob.upload_from_file(outData, content_type=mimeType)
156 resList.append((targetName, outSize, mimeType, {"mimetype": mimeType, "width": width, "height": height}))
157 return resList
160def cloudfunction_thumbnailer(fileSkel, existingFiles, params):
161 """External Thumbnailer for images.
163 The corresponding cloudfunction can be found here .
164 https://github.com/viur-framework/viur-cloudfunctions/tree/main/thumbnailer
166 You can use it like so:
167 main.py:
169 .. code-block:: python
171 from viur.core.modules.file import cloudfunction_thumbnailer
173 conf.file_thumbnailer_url = "https://xxxxx.cloudfunctions.net/imagerenderer"
174 conf.file_derivations = {"thumbnail": cloudfunction_thumbnailer}
176 conf.derives_pdf = {
177 "thumbnail": [{"width": 1920,"sites":"1,2"}]
178 }
180 skeletons/xxx.py:
181 .. code-block:: python
183 test = FileBone(derive=conf.derives_pdf)
184 """
186 if not conf.file_thumbnailer_url:
187 raise ValueError("conf.file_thumbnailer_url is not set")
189 bucket = File.get_bucket(fileSkel["dlkey"])
191 def getsignedurl():
192 if conf.instance.is_dev_server:
193 signedUrl = File.create_download_url(fileSkel["dlkey"], fileSkel["name"])
194 else:
195 path = f"""{fileSkel["dlkey"]}/source/{file_name}"""
196 if not (blob := bucket.get_blob(path)):
197 logging.warning(f"Blob {path} is missing from cloud storage!")
198 return None
199 authRequest = google.auth.transport.requests.Request()
200 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60)
201 signing_credentials = google.auth.compute_engine.IDTokenCredentials(authRequest, "")
202 content_disposition = utils.build_content_disposition_header(fileSkel["name"])
203 signedUrl = blob.generate_signed_url(
204 expiresAt,
205 credentials=signing_credentials,
206 response_disposition=content_disposition,
207 version="v4")
208 return signedUrl
210 def make_request():
211 headers = {"Content-Type": "application/json"}
212 data_str = base64.b64encode(json.dumps(dataDict).encode("UTF-8"))
213 sig = File.hmac_sign(data_str)
214 datadump = json.dumps({"dataStr": data_str.decode('ASCII'), "sign": sig})
215 resp = requests.post(conf.file_thumbnailer_url, data=datadump, headers=headers, allow_redirects=False)
216 if resp.status_code != 200: # Error Handling
217 match resp.status_code:
218 case 302:
219 # The problem is Google resposen 302 to an auth Site when the cloudfunction was not found
220 # https://cloud.google.com/functions/docs/troubleshooting#login
221 logging.error("Cloudfunction not found")
222 case 404:
223 logging.error("Cloudfunction not found")
224 case 403:
225 logging.error("No permission for the Cloudfunction")
226 case _:
227 logging.error(
228 f"cloudfunction_thumbnailer failed with code: {resp.status_code} and data: {resp.content}")
229 return
231 try:
232 response_data = resp.json()
233 except Exception as e:
234 logging.error(f"response could not be converted in json failed with: {e=}")
235 return
236 if "error" in response_data:
237 logging.error(f"cloudfunction_thumbnailer failed with: {response_data.get('error')}")
238 return
240 return response_data
242 file_name = html.unescape(fileSkel["name"])
244 if not (url := getsignedurl()):
245 return
246 dataDict = {
247 "url": url,
248 "name": fileSkel["name"],
249 "params": params,
250 "minetype": fileSkel["mimetype"],
251 "baseUrl": current.request.get().request.host_url.lower(),
252 "targetKey": fileSkel["dlkey"],
253 "nameOnly": True
254 }
255 if not (derivedData := make_request()):
256 return
258 uploadUrls = {}
259 for data in derivedData["values"]:
260 fileName = File.sanitize_filename(data["name"])
261 blob = bucket.blob(f"""{fileSkel["dlkey"]}/derived/{fileName}""")
262 uploadUrls[fileSkel["dlkey"] + fileName] = blob.create_resumable_upload_session(timeout=60,
263 content_type=data["mimeType"])
265 if not (url := getsignedurl()):
266 return
268 dataDict["url"] = url
269 dataDict["nameOnly"] = False
270 dataDict["uploadUrls"] = uploadUrls
272 if not (derivedData := make_request()):
273 return
274 reslist = []
275 try:
276 for derived in derivedData["values"]:
277 for key, value in derived.items():
278 reslist.append((key, value["size"], value["mimetype"], value["customData"]))
280 except Exception as e:
281 logging.error(f"cloudfunction_thumbnailer failed with: {e=}")
282 return reslist
285class DownloadUrlBone(BaseBone):
286 """
287 This bone is used to inject a freshly signed download url into a FileSkel.
288 """
290 def unserialize(self, skel, name):
291 if "dlkey" in skel.dbEntity and "name" in skel.dbEntity:
292 skel.accessedValues[name] = File.create_download_url(
293 skel["dlkey"], skel["name"], expires=conf.render_json_download_url_expiration
294 )
295 return True
297 return False
300class FileLeafSkel(TreeSkel):
301 """
302 Default file leaf skeleton.
303 """
304 kindName = "file"
306 size = StringBone(
307 descr="Size",
308 readOnly=True,
309 searchable=True,
310 )
312 dlkey = StringBone(
313 descr="Download-Key",
314 readOnly=True,
315 )
317 name = StringBone(
318 descr="Filename",
319 caseSensitive=False,
320 searchable=True,
321 vfunc=lambda val: None if File.is_valid_filename(val) else "Invalid filename provided",
322 )
324 mimetype = StringBone(
325 descr="MIME-Type",
326 readOnly=True,
327 )
329 weak = BooleanBone(
330 descr="Weak reference",
331 readOnly=True,
332 visible=False,
333 )
335 pending = BooleanBone(
336 descr="Pending upload",
337 readOnly=True,
338 visible=False,
339 defaultValue=False,
340 )
342 width = NumericBone(
343 descr="Width",
344 readOnly=True,
345 searchable=True,
346 )
348 height = NumericBone(
349 descr="Height",
350 readOnly=True,
351 searchable=True,
352 )
354 downloadUrl = DownloadUrlBone(
355 descr="Download-URL",
356 readOnly=True,
357 visible=False,
358 )
360 derived = BaseBone(
361 descr="Derived Files",
362 readOnly=True,
363 visible=False,
364 )
366 pendingparententry = KeyBone(
367 descr="Pending key Reference",
368 readOnly=True,
369 visible=False,
370 )
372 crc32c_checksum = StringBone(
373 descr="CRC32C checksum",
374 readOnly=True,
375 )
377 md5_checksum = StringBone(
378 descr="MD5 checksum",
379 readOnly=True,
380 )
382 public = BooleanBone(
383 descr="Public File",
384 readOnly=True,
385 defaultValue=False,
386 )
388 serving_url = StringBone(
389 descr="Serving-URL",
390 readOnly=True,
391 params={
392 "tooltip": "The 'serving_url' is only available in public file repositories.",
393 }
394 )
396 @classmethod
397 def _inject_serving_url(cls, skel: SkeletonInstance) -> None:
398 """Inject the serving url for public image files into a FileSkel"""
399 if (
400 skel["public"]
401 and skel["mimetype"]
402 and skel["mimetype"].startswith("image/")
403 and not skel["serving_url"]
404 ):
405 bucket = File.get_bucket(skel["dlkey"])
406 filename = f"/gs/{bucket.name}/{skel['dlkey']}/source/{skel['name']}"
408 # Trying this on local development server will raise a
409 # `google.appengine.runtime.apiproxy_errors.RPCFailedError`
410 if conf.instance.is_dev_server:
411 logging.warning(f"Can't inject serving_url for {filename!r} on local development server")
412 return
414 try:
415 skel["serving_url"] = images.get_serving_url(None, secure_url=True, filename=filename)
417 except Exception as e:
418 logging.warning(f"Failed to create serving_url for {filename!r} with exception {e!r}")
419 logging.exception(e)
421 def preProcessBlobLocks(self, locks):
422 """
423 Ensure that our dlkey is locked even if we don't have a filebone here
424 """
425 if not self["weak"] and self["dlkey"]:
426 locks.add(self["dlkey"])
427 return locks
429 @classmethod
430 def refresh(cls, skel):
431 super().refresh(skel)
432 if conf.viur2import_blobsource:
433 importData = importBlobFromViur2(skel["dlkey"], skel["name"])
434 if importData:
435 if not skel["downloadUrl"]:
436 skel["downloadUrl"] = importData
437 skel["pendingparententry"] = None
439 cls._inject_serving_url(skel)
441 @classmethod
442 def write(cls, skel, **kwargs):
443 cls._inject_serving_url(skel)
444 return super().write(skel, **kwargs)
447class FileNodeSkel(TreeSkel):
448 """
449 Default file node skeleton.
450 """
451 kindName = "file_rootNode" # FIXME: VIUR4, don't use "_rootNode" kindname
453 name = StringBone(
454 descr="Name",
455 required=True,
456 searchable=True
457 )
459 rootNode = BooleanBone(
460 descr="Is RootNode",
461 defaultValue=False,
462 readOnly=True,
463 visible=False,
464 )
466 public = BooleanBone(
467 descr="Is public?",
468 defaultValue=False,
469 readOnly=True,
470 visible=False,
471 )
473 viurCurrentSeoKeys = None
476class File(Tree):
477 PENDING_POSTFIX = " (pending)"
478 DOWNLOAD_URL_PREFIX = "/file/download/"
479 INTERNAL_SERVING_URL_PREFIX = "/file/serve/"
480 MAX_FILENAME_LEN = 256
481 IMAGE_META_MAX_SIZE: t.Final[int] = 10 * 1024 ** 2
482 """Maximum size of image files that should be analysed in :meth:`set_image_meta`.
483 Default: 10 MiB"""
485 leafSkelCls = FileLeafSkel
486 nodeSkelCls = FileNodeSkel
488 handler = "tree.simple.file"
489 adminInfo = {
490 "icon": "folder-fill",
491 "handler": handler, # fixme: Use static handler; Remove with VIUR4!
492 }
494 roles = {
495 "*": "view",
496 "editor": ("add", "edit"),
497 "admin": "*",
498 }
500 default_order = "name"
502 # Helper functions currently resist here
504 @staticmethod
505 def get_bucket(dlkey: str) -> google.cloud.storage.bucket.Bucket:
506 """
507 Retrieves a Google Cloud Storage bucket for the given dlkey.
508 """
509 global _public_bucket
510 if dlkey and dlkey.endswith(PUBLIC_DLKEY_SUFFIX):
511 if _public_bucket or (_public_bucket := GOOGLE_STORAGE_CLIENT.lookup_bucket(PUBLIC_BUCKET_NAME)):
512 return _public_bucket
514 raise ValueError(
515 f"""The bucket '{PUBLIC_BUCKET_NAME}' does not exist! Please create it with ACL access."""
516 )
518 return _private_bucket
520 @staticmethod
521 def is_valid_filename(filename: str) -> bool:
522 """
523 Verifies a valid filename.
525 The filename should be valid on Linux, Mac OS and Windows.
526 It should not be longer than MAX_FILENAME_LEN chars.
528 Rule set: https://stackoverflow.com/a/31976060/3749896
529 Regex test: https://regex101.com/r/iBYpoC/1
530 """
531 if len(filename) > File.MAX_FILENAME_LEN:
532 return False
534 return bool(re.match(VALID_FILENAME_REGEX, filename))
536 @staticmethod
537 def hmac_sign(data: t.Any) -> str:
538 assert conf.file_hmac_key is not None, "No hmac-key set!"
539 if not isinstance(data, bytes):
540 data = str(data).encode("UTF-8")
541 return hmac.new(conf.file_hmac_key, msg=data, digestmod=hashlib.sha3_384).hexdigest()
543 @staticmethod
544 def hmac_verify(data: t.Any, signature: str) -> bool:
545 return hmac.compare_digest(File.hmac_sign(data.encode("ASCII")), signature)
547 @staticmethod
548 def create_internal_serving_url(
549 serving_url: str,
550 size: int = 0,
551 filename: str = "",
552 options: str = "",
553 download: bool = False
554 ) -> str:
555 """
556 Helper function to generate an internal serving url (endpoint: /file/serve) from a Google serving url.
558 This is needed to hide requests to Google as they are internally be routed, and can be the result of a
559 legal requirement like GDPR.
561 :param serving_url: Is the original serving URL as generated from FileLeafSkel._inject_serving_url()
562 :param size: Optional size setting
563 :param filename: Optonal filename setting
564 :param options: Additional options parameter-pass through to /file/serve
565 :param download: Download parameter-pass through to /file/serve
566 """
568 # Split a serving URL into its components, used by serve function.
569 res = re.match(
570 r"^https:\/\/(.*?)\.googleusercontent\.com\/(.*?)$",
571 serving_url
572 )
574 if not res:
575 raise ValueError(f"Invalid {serving_url=!r} provided")
577 # Create internal serving URL
578 serving_url = File.INTERNAL_SERVING_URL_PREFIX + "/".join(res.groups())
580 # Append additional parameters
581 if params := {
582 k: v for k, v in {
583 "download": download,
584 "filename": filename,
585 "options": options,
586 "size": size,
587 }.items() if v
588 }:
589 serving_url += f"?{urlencode(params)}"
591 return serving_url
593 @staticmethod
594 def create_download_url(
595 dlkey: str,
596 filename: str,
597 derived: bool = False,
598 expires: t.Optional[datetime.timedelta | int] = datetime.timedelta(hours=1),
599 download_filename: t.Optional[str] = None
600 ) -> str:
601 """
602 Utility function that creates a signed download-url for the given folder/filename combination
604 :param folder: The GCS-Folder (= the download-key) for that file
605 :param filename: The name of the file. Either the original filename or the name of a derived file.
606 :param derived: True, if it points to a derived file, False if it points to the original uploaded file
607 :param expires:
608 None if the file is supposed to be public (which causes it to be cached on the google ede caches),
609 otherwise a datetime.timedelta of how long that link should be valid
610 :param download_filename: If set, browser is enforced to download this blob with the given alternate
611 filename
612 :return: The signed download-url relative to the current domain (eg /download/...)
613 """
614 if isinstance(expires, int):
615 expires = datetime.timedelta(minutes=expires)
617 # Undo escaping on ()= performed on fileNames
618 filename = filename.replace("(", "(").replace(")", ")").replace("=", "=")
619 filepath = f"""{dlkey}/{"derived" if derived else "source"}/{filename}"""
621 if download_filename:
622 if not File.is_valid_filename(download_filename):
623 raise errors.UnprocessableEntity(f"Invalid download_filename {download_filename!r} provided")
625 download_filename = urlquote(download_filename)
627 expires = (datetime.datetime.now() + expires).strftime("%Y%m%d%H%M") if expires else 0
629 data = base64.urlsafe_b64encode(f"""{filepath}\0{expires}\0{download_filename or ""}""".encode("UTF-8"))
630 sig = File.hmac_sign(data)
632 return f"""{File.DOWNLOAD_URL_PREFIX}{data.decode("ASCII")}?sig={sig}"""
634 @staticmethod
635 def parse_download_url(url) -> t.Optional[FilePath]:
636 """
637 Parses a file download URL in the format `/file/download/xxxx?sig=yyyy` into its FilePath.
639 If the URL cannot be parsed, the function returns None.
641 :param url: The file download URL to be parsed.
642 :return: A FilePath on success, None otherwise.
643 """
644 if not url.startswith(File.DOWNLOAD_URL_PREFIX) or "?" not in url:
645 return None
647 data, sig = url.removeprefix(File.DOWNLOAD_URL_PREFIX).split("?", 1) # Strip "/file/download/" and split on "?"
648 sig = sig.removeprefix("sig=")
650 if not File.hmac_verify(data, sig):
651 # Invalid signature
652 return None
654 # Split the blobKey into the individual fields it should contain
655 data = base64.urlsafe_b64decode(data).decode("UTF-8")
657 match data.count("\0"):
658 case 2:
659 dlpath, valid_until, _ = data.split("\0")
660 case 1:
661 # It's the old format, without an downloadFileName
662 dlpath, valid_until = data.split("\0")
663 case _:
664 # Invalid path
665 return None
667 if valid_until != "0" and datetime.strptime(valid_until, "%Y%m%d%H%M") < datetime.now():
668 # Signature expired
669 return None
671 if dlpath.count("/") != 2:
672 # Invalid path
673 return None
675 dlkey, derived, filename = dlpath.split("/")
676 return FilePath(dlkey, derived != "source", filename)
678 @staticmethod
679 def create_src_set(
680 file: t.Union["SkeletonInstance", dict, str],
681 expires: t.Optional[datetime.timedelta | int] = datetime.timedelta(hours=1),
682 width: t.Optional[int] = None,
683 height: t.Optional[int] = None,
684 language: t.Optional[str] = None,
685 ) -> str:
686 """
687 Generates a string suitable for use as the srcset tag in html. This functionality provides the browser
688 with a list of images in different sizes and allows it to choose the smallest file that will fill it's
689 viewport without upscaling.
691 :param file: The file skeleton (or if multiple=True a single value from it) to generate the srcset.
692 :param expires:
693 None if the file is supposed to be public (which causes it to be cached on the google edecaches),
694 otherwise it's lifetime in seconds
695 :param width:
696 A list of widths that should be included in the srcset.
697 If a given width is not available, it will be skipped.
698 :param height: A list of heights that should be included in the srcset. If a given height is not available,
699 it will be skipped.
700 :param language: Language overwrite if file has multiple languages, and we want to explicitly specify one
701 :return: The srctag generated or an empty string if a invalid file object was supplied
702 """
703 if not width and not height:
704 logging.error("Neither width or height supplied")
705 return ""
707 if isinstance(file, str):
708 file = db.Query("file").filter("dlkey =", file).order(("creationdate", db.SortOrder.Ascending)).getEntry()
710 if not file:
711 return ""
713 if isinstance(file, LanguageWrapper):
714 language = language or current.language.get()
715 if not language or not (file := file.get(language)):
716 return ""
718 if "dlkey" not in file and "dest" in file:
719 file = file["dest"]
721 from viur.core.skeleton import SkeletonInstance # avoid circular imports
723 if not (
724 isinstance(file, (SkeletonInstance, dict))
725 and "dlkey" in file
726 and "derived" in file
727 ):
728 logging.error("Invalid file supplied")
729 return ""
731 if not isinstance(file["derived"], dict):
732 logging.error("No derives available")
733 return ""
735 src_set = []
736 for filename, derivate in file["derived"]["files"].items():
737 customData = derivate.get("customData", {})
739 if width and customData.get("width") in width:
740 src_set.append(
741 f"""{File.create_download_url(file["dlkey"], filename, True, expires)} {customData["width"]}w"""
742 )
744 if height and customData.get("height") in height:
745 src_set.append(
746 f"""{File.create_download_url(file["dlkey"], filename, True, expires)} {customData["height"]}h"""
747 )
749 return ", ".join(src_set)
751 def write(
752 self,
753 filename: str,
754 content: t.Any,
755 mimetype: str = "text/plain",
756 width: int = None,
757 height: int = None,
758 public: bool = False,
759 ) -> db.Key:
760 """
761 Write a file from any buffer into the file module.
763 :param filename: Filename to be written.
764 :param content: The file content to be written, as bytes-like object.
765 :param mimetype: The file's mimetype.
766 :param width: Optional width information for the file.
767 :param height: Optional height information for the file.
768 :param public: True if the file should be publicly accessible.
769 :return: Returns the key of the file object written. This can be associated e.g. with a FileBone.
770 """
771 # logging.info(f"{filename=} {mimetype=} {width=} {height=} {public=}")
772 if not File.is_valid_filename(filename):
773 raise ValueError(f"{filename=} is invalid")
775 dl_key = utils.string.random()
777 if public:
778 dl_key += PUBLIC_DLKEY_SUFFIX # mark file as public
780 bucket = File.get_bucket(dl_key)
782 blob = bucket.blob(f"{dl_key}/source/{filename}")
783 blob.upload_from_file(io.BytesIO(content), content_type=mimetype)
785 skel = self.addSkel("leaf")
786 skel["name"] = filename
787 skel["size"] = blob.size
788 skel["mimetype"] = mimetype
789 skel["dlkey"] = dl_key
790 skel["weak"] = True
791 skel["public"] = public
792 skel["width"] = width
793 skel["height"] = height
794 skel["crc32c_checksum"] = base64.b64decode(blob.crc32c).hex()
795 skel["md5_checksum"] = base64.b64decode(blob.md5_hash).hex()
797 skel.write()
798 return skel["key"]
800 def read(
801 self,
802 key: db.Key | int | str | None = None,
803 path: str | None = None,
804 ) -> tuple[io.BytesIO, str]:
805 """
806 Read a file from the Cloud Storage.
808 If a key and a path are provided, the key is preferred.
809 This means that the entry in the db is searched first and if this is not found, the path is used.
811 :param key: Key of the LeafSkel that contains the "dlkey" and the "name".
812 :param path: The path of the file in the Cloud Storage Bucket.
814 :return: Returns the file as a io.BytesIO buffer and the content-type
815 """
816 if not key and not path:
817 raise ValueError("Please provide a key or a path")
819 if key:
820 skel = self.viewSkel("leaf")
821 if not skel.read(db.keyHelper(key, skel.kindName)):
822 if not path:
823 raise ValueError("This skeleton is not in the database!")
824 else:
825 path = f"""{skel["dlkey"]}/source/{skel["name"]}"""
827 bucket = File.get_bucket(skel["dlkey"])
828 else:
829 bucket = File.get_bucket(path.split("/", 1)[0]) # path's first part is dlkey plus eventual postfix
831 blob = bucket.blob(path)
832 return io.BytesIO(blob.download_as_bytes()), blob.content_type
834 @CallDeferred
835 def deleteRecursive(self, parentKey):
836 files = db.Query(self.leafSkelCls().kindName).filter("parentdir =", parentKey).iter()
837 for fileEntry in files:
838 self.mark_for_deletion(fileEntry["dlkey"])
839 skel = self.leafSkelCls()
841 if skel.read(str(fileEntry.key())):
842 skel.delete()
843 dirs = db.Query(self.nodeSkelCls().kindName).filter("parentdir", parentKey).iter()
844 for d in dirs:
845 self.deleteRecursive(d.key)
846 skel = self.nodeSkelCls()
847 if skel.read(d.key):
848 skel.delete()
850 @exposed
851 @skey
852 def getUploadURL(
853 self,
854 fileName: str,
855 mimeType: str,
856 size: t.Optional[int] = None,
857 node: t.Optional[str | db.Key] = None,
858 authData: t.Optional[str] = None,
859 authSig: t.Optional[str] = None,
860 public: bool = False,
861 ):
862 filename = fileName.strip() # VIUR4 FIXME: just for compatiblity of the parameter names
864 if not File.is_valid_filename(filename):
865 raise errors.UnprocessableEntity(f"Invalid filename {filename!r} provided")
867 # Validate the mimetype from the client seems legit
868 mimetype = mimeType.strip().lower()
869 if not (
870 mimetype
871 and mimetype.count("/") == 1
872 and all(ch in string.ascii_letters + string.digits + "/-.+" for ch in mimetype)
873 ):
874 raise errors.UnprocessableEntity(f"Invalid mime-type {mimetype!r} provided")
876 # Validate authentication data
877 if authData and authSig:
878 # First, validate the signature, otherwise we don't need to proceed further
879 if not self.hmac_verify(authData, authSig):
880 raise errors.Unauthorized()
882 authData = json.loads(base64.b64decode(authData.encode("ASCII")).decode("UTF-8"))
884 if datetime.datetime.strptime(authData["validUntil"], "%Y%m%d%H%M") < datetime.datetime.now():
885 raise errors.Gone("The upload URL has expired")
887 if authData["validMimeTypes"]:
888 for validMimeType in authData["validMimeTypes"]:
889 if (
890 validMimeType == mimetype
891 or (validMimeType.endswith("*") and mimetype.startswith(validMimeType[:-1]))
892 ):
893 break
894 else:
895 raise errors.UnprocessableEntity(f"Invalid mime-type {mimetype} provided")
897 node = authData["node"]
898 maxSize = authData["maxSize"]
900 else:
901 rootNode = None
902 if node and not (rootNode := self.getRootNode(node)):
903 raise errors.NotFound(f"No valid root node found for {node=}")
905 if not self.canAdd("leaf", rootNode):
906 raise errors.Forbidden()
908 if rootNode and public != bool(rootNode.get("public")):
909 raise errors.Forbidden("Cannot upload a public file into private repository or vice versa")
911 maxSize = None # The user has some file/add permissions, don't restrict fileSize
913 if maxSize:
914 if size > maxSize:
915 raise errors.UnprocessableEntity(f"Size {size} exceeds maximum size {maxSize}")
916 else:
917 size = None
919 # Create upload-URL and download key
920 dlkey = utils.string.random() # let's roll a random key
922 if public:
923 dlkey += PUBLIC_DLKEY_SUFFIX # mark file as public
925 blob = File.get_bucket(dlkey).blob(f"{dlkey}/source/{filename}")
926 upload_url = blob.create_resumable_upload_session(content_type=mimeType, size=size, timeout=60)
928 # Create a corresponding file-lock object early, otherwise we would have to ensure that the file-lock object
929 # the user creates matches the file he had uploaded
930 file_skel = self.addSkel("leaf")
932 file_skel["name"] = filename + self.PENDING_POSTFIX
933 file_skel["size"] = 0
934 file_skel["mimetype"] = "application/octetstream"
935 file_skel["dlkey"] = dlkey
936 file_skel["parentdir"] = None
937 file_skel["pendingparententry"] = db.keyHelper(node, self.addSkel("node").kindName) if node else None
938 file_skel["pending"] = True
939 file_skel["weak"] = True
940 file_skel["public"] = public
941 file_skel["width"] = 0
942 file_skel["height"] = 0
944 file_skel.write()
945 key = str(file_skel["key"])
947 # Mark that entry dirty as we might never receive an add
948 self.mark_for_deletion(dlkey)
950 # In this case, we'd have to store the key in the users session so he can call add() later on
951 if authData and authSig:
952 session = current.session.get()
954 if "pendingFileUploadKeys" not in session:
955 session["pendingFileUploadKeys"] = []
957 session["pendingFileUploadKeys"].append(key)
959 # Clamp to the latest 50 pending uploads
960 session["pendingFileUploadKeys"] = session["pendingFileUploadKeys"][-50:]
961 session.markChanged()
963 return self.render.view({
964 "uploadKey": key,
965 "uploadUrl": upload_url,
966 })
968 @exposed
969 def download(self, blobKey: str, fileName: str = "", download: bool = False, sig: str = "", *args, **kwargs):
970 """
971 Download a file.
972 :param blobKey: The unique blob key of the file.
973 :param fileName: Optional filename to provide in the header.
974 :param download: Set header to attachment retrival, set explictly to "1" if download is wanted.
975 """
976 if filename := fileName.strip():
977 if not File.is_valid_filename(filename):
978 raise errors.UnprocessableEntity(f"The provided filename {filename!r} is invalid!")
980 try:
981 values = base64.urlsafe_b64decode(blobKey).decode("UTF-8").split("\0")
982 except ValueError:
983 raise errors.BadRequest(f"Invalid encoding of blob key {blobKey!r}!")
984 try:
985 dlPath, validUntil, *download_filename = values
986 # Maybe it's the old format, without a download_filename
987 download_filename = download_filename[0] if download_filename else ""
988 except ValueError:
989 logging.error(f"Encoding of {blobKey=!r} OK. {values=} invalid.")
990 raise errors.BadRequest(f"The blob key {blobKey!r} has an invalid amount of encoded values!")
992 bucket = File.get_bucket(dlPath.split("/", 1)[0])
994 if not sig:
995 # Check if the current user has the right to download *any* blob present in this application.
996 # blobKey is then the path inside cloudstore - not a base64 encoded tuple
997 if not (usr := current.user.get()):
998 raise errors.Unauthorized()
999 if "root" not in usr["access"] and "file-view" not in usr["access"]:
1000 raise errors.Forbidden()
1001 validUntil = "-1" # Prevent this from being cached down below
1002 blob = bucket.get_blob(blobKey)
1004 else:
1005 # We got an request including a signature (probably a guest or a user without file-view access)
1006 # First, validate the signature, otherwise we don't need to proceed any further
1007 if not self.hmac_verify(blobKey, sig):
1008 raise errors.Forbidden()
1010 if validUntil != "0" and datetime.datetime.strptime(validUntil, "%Y%m%d%H%M") < datetime.datetime.now():
1011 blob = None
1012 else:
1013 blob = bucket.get_blob(dlPath)
1015 if not blob:
1016 raise errors.Gone("The requested blob has expired.")
1018 if not filename:
1019 filename = download_filename or urlquote(blob.name.rsplit("/", 1)[-1])
1021 content_disposition = utils.build_content_disposition_header(filename, attachment=download)
1023 if isinstance(_CREDENTIALS, ServiceAccountCredentials):
1024 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60)
1025 signedUrl = blob.generate_signed_url(expiresAt, response_disposition=content_disposition, version="v4")
1026 raise errors.Redirect(signedUrl)
1028 elif conf.instance.is_dev_server: # No Service-Account to sign with - Serve everything directly
1029 response = current.request.get().response
1030 response.headers["Content-Type"] = blob.content_type
1031 if content_disposition:
1032 response.headers["Content-Disposition"] = content_disposition
1033 return blob.download_as_bytes()
1035 if validUntil == "0" or blobKey.endswith(PUBLIC_DLKEY_SUFFIX): # Its an indefinitely valid URL
1036 if blob.size < 5 * 1024 * 1024: # Less than 5 MB - Serve directly and push it into the ede caches
1037 response = current.request.get().response
1038 response.headers["Content-Type"] = blob.content_type
1039 response.headers["Cache-Control"] = "public, max-age=604800" # 7 Days
1040 if content_disposition:
1041 response.headers["Content-Disposition"] = content_disposition
1042 return blob.download_as_bytes()
1044 # Default fallback - create a signed URL and redirect
1045 authRequest = google.auth.transport.requests.Request()
1046 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60)
1047 signing_credentials = google.auth.compute_engine.IDTokenCredentials(authRequest, "")
1048 signedUrl = blob.generate_signed_url(
1049 expiresAt,
1050 credentials=signing_credentials,
1051 response_disposition=content_disposition,
1052 version="v4")
1054 raise errors.Redirect(signedUrl)
1056 SERVE_VALID_OPTIONS = {
1057 "c",
1058 "p",
1059 "fv",
1060 "fh",
1061 "r90",
1062 "r180",
1063 "r270",
1064 "nu",
1065 }
1066 """
1067 Valid modification option shorts for the serve-function.
1068 This is passed-through to the Google UserContent API, and hast to be supported there.
1069 """
1071 SERVE_VALID_FORMATS = {
1072 "jpg": "rj",
1073 "jpeg": "rj",
1074 "png": "rp",
1075 "webp": "rw",
1076 }
1077 """
1078 Valid file-formats to the serve-function.
1079 This is passed-through to the Google UserContent API, and hast to be supported there.
1080 """
1082 @exposed
1083 def serve(
1084 self,
1085 host: str,
1086 key: str,
1087 size: t.Optional[int] = None,
1088 filename: t.Optional[str] = None,
1089 options: str = "",
1090 download: bool = False,
1091 ):
1092 """
1093 Requests an image using the serving url to bypass direct Google requests.
1095 :param host: the google host prefix i.e. lh3
1096 :param key: the serving url key
1097 :param size: the target image size
1098 :param filename: a random string with an extention, valid extentions are (defined in File.SERVE_VALID_FORMATS).
1099 :param options: - seperated options (defined in File.SERVE_VALID_OPTIONS).
1100 c - crop
1101 p - face crop
1102 fv - vertrical flip
1103 fh - horizontal flip
1104 rXXX - rotate 90, 180, 270
1105 nu - no upscale
1106 :param download: Serves the content as download (Content-Disposition) or not.
1108 :return: Returns the requested content on success, raises a proper HTTP exception otherwise.
1109 """
1111 if any(c not in conf.search_valid_chars for c in host):
1112 raise errors.BadRequest("key contains invalid characters")
1114 # extract format from filename
1115 file_fmt = "webp"
1117 if filename:
1118 fmt = filename.rsplit(".", 1)[-1].lower()
1119 if fmt in self.SERVE_VALID_FORMATS:
1120 file_fmt = fmt
1121 else:
1122 raise errors.UnprocessableEntity(f"Unsupported filetype {fmt}")
1124 url = f"https://{host}.googleusercontent.com/{key}"
1126 if options and not all(param in self.SERVE_VALID_OPTIONS for param in options.split("-")):
1127 raise errors.BadRequest("Invalid options provided")
1129 options += f"-{self.SERVE_VALID_FORMATS[file_fmt]}"
1131 if size:
1132 options = f"s{size}-" + options
1134 url += "=" + options
1136 response = current.request.get().response
1137 response.headers["Content-Type"] = f"image/{file_fmt}"
1138 response.headers["Cache-Control"] = "public, max-age=604800" # 7 Days
1139 response.headers["Content-Disposition"] = utils.build_content_disposition_header(filename, attachment=download)
1141 answ = requests.get(url, timeout=20)
1142 if not answ.ok:
1143 logging.error(f"{answ.status_code} {answ.text}")
1144 raise errors.BadRequest("Unable to fetch a file with these parameters")
1146 return answ.content
1148 @exposed
1149 @force_ssl
1150 @force_post
1151 @skey(allow_empty=True)
1152 def add(self, skelType: SkelType, node: db.Key | int | str | None = None, *args, **kwargs):
1153 # We can't add files directly (they need to be uploaded
1154 if skelType == "leaf": # We need to handle leafs separately here
1155 targetKey = kwargs.get("key")
1156 skel = self.addSkel("leaf")
1158 if not skel.read(targetKey):
1159 raise errors.NotFound()
1161 if not skel["pending"]:
1162 raise errors.PreconditionFailed()
1164 skel["pending"] = False
1165 skel["parententry"] = skel["pendingparententry"]
1167 if skel["parententry"]:
1168 rootNode = self.getRootNode(skel["parententry"])
1169 else:
1170 rootNode = None
1172 if not self.canAdd("leaf", rootNode):
1173 # Check for a marker in this session (created if using a signed upload URL)
1174 session = current.session.get()
1175 if targetKey not in (session.get("pendingFileUploadKeys") or []):
1176 raise errors.Forbidden()
1177 session["pendingFileUploadKeys"].remove(targetKey)
1178 session.markChanged()
1180 # Now read the blob from the dlkey folder
1181 bucket = File.get_bucket(skel["dlkey"])
1183 blobs = list(bucket.list_blobs(prefix=f"""{skel["dlkey"]}/"""))
1184 if len(blobs) != 1:
1185 logging.error("Invalid number of blobs in folder")
1186 logging.error(targetKey)
1187 raise errors.PreconditionFailed()
1189 # only one item is allowed here!
1190 blob = blobs[0]
1192 # update the corresponding file skeleton
1193 skel["name"] = skel["name"].removesuffix(self.PENDING_POSTFIX)
1194 skel["mimetype"] = utils.string.escape(blob.content_type)
1195 skel["size"] = blob.size
1196 skel["parentrepo"] = rootNode["key"] if rootNode else None
1197 skel["weak"] = rootNode is None
1198 skel["crc32c_checksum"] = base64.b64decode(blob.crc32c).hex()
1199 skel["md5_checksum"] = base64.b64decode(blob.md5_hash).hex()
1200 self.onAdd("leaf", skel)
1201 skel.write()
1202 self.onAdded("leaf", skel)
1204 # Add updated download-URL as the auto-generated isn't valid yet
1205 skel["downloadUrl"] = self.create_download_url(skel["dlkey"], skel["name"])
1207 return self.render.addSuccess(skel)
1209 return super().add(skelType, node, *args, **kwargs)
1211 @exposed
1212 def get_download_url(
1213 self,
1214 key: t.Optional[db.Key] = None,
1215 dlkey: t.Optional[str] = None,
1216 filename: t.Optional[str] = None,
1217 derived: bool = False,
1218 ):
1219 """
1220 Request a download url for a given file
1221 :param key: The key of the file
1222 :param dlkey: The download key of the file
1223 :param filename: The filename to be given. If no filename is provided
1224 downloadUrls for all derived files are returned in case of `derived=True`.
1225 :param derived: True, if a derived file download URL is being requested.
1226 """
1227 skel = self.viewSkel("leaf")
1228 if dlkey is not None:
1229 skel = skel.all().filter("dlkey", dlkey).getSkel()
1230 elif key is None and dlkey is None:
1231 raise errors.BadRequest("No key or dlkey provided")
1233 if not (skel and skel.read(key)):
1234 raise errors.NotFound()
1236 if not self.canView("leaf", skel):
1237 raise errors.Unauthorized()
1239 dlkey = skel["dlkey"]
1241 if derived and filename is None:
1242 res = {}
1243 for filename in skel["derived"]["files"]:
1244 res[filename] = self.create_download_url(dlkey, filename, derived)
1245 else:
1246 if derived:
1247 # Check if Filename exist in the Derives. We sign nothing that not exist.
1248 if filename not in skel["derived"]["files"]:
1249 raise errors.NotFound("File not in derives")
1250 else:
1251 if filename is None:
1252 filename = skel["name"]
1253 elif filename != skel["name"]:
1254 raise errors.NotFound("Filename not match")
1256 res = self.create_download_url(dlkey, filename, derived)
1258 return self.render.view(res)
1260 def onEdit(self, skelType: SkelType, skel: SkeletonInstance):
1261 super().onEdit(skelType, skel)
1263 if skelType == "leaf":
1264 old_skel = self.editSkel(skelType)
1265 old_skel.setEntity(skel.dbEntity)
1267 if old_skel["name"] == skel["name"]: # name not changed we can return
1268 return
1270 # Move Blob to new name
1271 # https://cloud.google.com/storage/docs/copying-renaming-moving-objects
1272 old_path = f"{skel['dlkey']}/source/{html.unescape(old_skel['name'])}"
1273 new_path = f"{skel['dlkey']}/source/{html.unescape(skel['name'])}"
1275 bucket = File.get_bucket(skel['dlkey'])
1277 if not (old_blob := bucket.get_blob(old_path)):
1278 raise errors.Gone()
1280 bucket.copy_blob(old_blob, bucket, new_path, if_generation_match=0)
1281 bucket.delete_blob(old_path)
1283 def onAdded(self, skelType: SkelType, skel: SkeletonInstance) -> None:
1284 if skelType == "leaf" and skel["mimetype"].startswith("image/"):
1285 if skel["size"] > self.IMAGE_META_MAX_SIZE:
1286 logging.warning(f"File size {skel['size']} exceeds limit {self.IMAGE_META_MAX_SIZE=}")
1287 return
1288 self.set_image_meta(skel["key"])
1290 super().onAdded(skelType, skel)
1292 @CallDeferred
1293 def set_image_meta(self, key: db.Key) -> None:
1294 """Write image metadata (height and width) to FileSkel"""
1295 skel = self.editSkel("leaf", key)
1296 if not skel.read(key):
1297 logging.error(f"File {key} does not exist")
1298 return
1299 if skel["width"] and skel["height"]:
1300 logging.info(f'File {skel["key"]} has already {skel["width"]=} and {skel["height"]=}')
1301 return
1302 file_name = html.unescape(skel["name"])
1303 blob = self.get_bucket(skel["dlkey"]).get_blob(f"""{skel["dlkey"]}/source/{file_name}""")
1304 if not blob:
1305 logging.error(f'Blob {skel["dlkey"]}/source/{file_name} is missing in Cloud Storage!')
1306 return
1308 file_obj = io.BytesIO()
1309 blob.download_to_file(file_obj)
1310 file_obj.seek(0)
1311 try:
1312 img = Image.open(file_obj)
1313 except Image.UnidentifiedImageError as e: # Can't load this image
1314 logging.exception(f'Cannot open {skel["key"]} | {skel["name"]} to set image meta data: {e}')
1315 return
1317 skel.patch(
1318 values={
1319 "width": img.width,
1320 "height": img.height,
1321 },
1322 )
1324 def mark_for_deletion(self, dlkey: str) -> None:
1325 """
1326 Adds a marker to the datastore that the file specified as *dlkey* can be deleted.
1328 Once the mark has been set, the data store is checked four times (default: every 4 hours)
1329 if the file is in use somewhere. If it is still in use, the mark goes away, otherwise
1330 the mark and the file are removed from the datastore. These delayed checks are necessary
1331 due to database inconsistency.
1333 :param dlkey: Unique download-key of the file that shall be marked for deletion.
1334 """
1335 fileObj = db.Query("viur-deleted-files").filter("dlkey", dlkey).getEntry()
1337 if fileObj: # Its allready marked
1338 return
1340 fileObj = db.Entity(db.Key("viur-deleted-files"))
1341 fileObj["itercount"] = 0
1342 fileObj["dlkey"] = str(dlkey)
1344 db.Put(fileObj)
1347@PeriodicTask(interval=datetime.timedelta(hours=4))
1348def startCheckForUnreferencedBlobs():
1349 """
1350 Start searching for blob locks that have been recently freed
1351 """
1352 doCheckForUnreferencedBlobs()
1355@CallDeferred
1356def doCheckForUnreferencedBlobs(cursor=None):
1357 def getOldBlobKeysTxn(dbKey):
1358 obj = db.Get(dbKey)
1359 res = obj["old_blob_references"] or []
1360 if obj["is_stale"]:
1361 db.Delete(dbKey)
1362 else:
1363 obj["has_old_blob_references"] = False
1364 obj["old_blob_references"] = []
1365 db.Put(obj)
1366 return res
1368 query = db.Query("viur-blob-locks").filter("has_old_blob_references", True).setCursor(cursor)
1369 for lockObj in query.run(100):
1370 oldBlobKeys = db.RunInTransaction(getOldBlobKeysTxn, lockObj.key)
1371 for blobKey in oldBlobKeys:
1372 if db.Query("viur-blob-locks").filter("active_blob_references =", blobKey).getEntry():
1373 # This blob is referenced elsewhere
1374 logging.info(f"Stale blob is still referenced, {blobKey}")
1375 continue
1376 # Add a marker and schedule it for deletion
1377 fileObj = db.Query("viur-deleted-files").filter("dlkey", blobKey).getEntry()
1378 if fileObj: # Its already marked
1379 logging.info(f"Stale blob already marked for deletion, {blobKey}")
1380 return
1381 fileObj = db.Entity(db.Key("viur-deleted-files"))
1382 fileObj["itercount"] = 0
1383 fileObj["dlkey"] = str(blobKey)
1384 logging.info(f"Stale blob marked dirty, {blobKey}")
1385 db.Put(fileObj)
1386 newCursor = query.getCursor()
1387 if newCursor:
1388 doCheckForUnreferencedBlobs(newCursor)
1391@PeriodicTask(interval=datetime.timedelta(hours=4))
1392def startCleanupDeletedFiles():
1393 """
1394 Increase deletion counter on each blob currently not referenced and delete
1395 it if that counter reaches maxIterCount
1396 """
1397 doCleanupDeletedFiles()
1400@CallDeferred
1401def doCleanupDeletedFiles(cursor=None):
1402 maxIterCount = 2 # How often a file will be checked for deletion
1403 query = db.Query("viur-deleted-files")
1404 if cursor:
1405 query.setCursor(cursor)
1406 for file in query.run(100):
1407 if "dlkey" not in file:
1408 db.Delete(file.key)
1409 elif db.Query("viur-blob-locks").filter("active_blob_references =", file["dlkey"]).getEntry():
1410 logging.info(f"""is referenced, {file["dlkey"]}""")
1411 db.Delete(file.key)
1412 else:
1413 if file["itercount"] > maxIterCount:
1414 logging.info(f"""Finally deleting, {file["dlkey"]}""")
1415 bucket = File.get_bucket(file["dlkey"])
1416 blobs = bucket.list_blobs(prefix=f"""{file["dlkey"]}/""")
1417 for blob in blobs:
1418 blob.delete()
1419 db.Delete(file.key)
1420 # There should be exactly 1 or 0 of these
1421 for f in skeletonByKind("file")().all().filter("dlkey =", file["dlkey"]).fetch(99):
1422 f.delete()
1424 if f["serving_url"]:
1425 bucket = File.get_bucket(f["dlkey"])
1426 blob_key = blobstore.create_gs_key(
1427 f"/gs/{bucket.name}/{f['dlkey']}/source/{f['name']}"
1428 )
1429 images.delete_serving_url(blob_key) # delete serving url
1430 else:
1431 logging.debug(f"""Increasing count, {file["dlkey"]}""")
1432 file["itercount"] += 1
1433 db.Put(file)
1434 newCursor = query.getCursor()
1435 if newCursor:
1436 doCleanupDeletedFiles(newCursor)
1439@PeriodicTask(interval=datetime.timedelta(hours=4))
1440def start_delete_pending_files():
1441 """
1442 Start deletion of pending FileSkels that are older than 7 days.
1443 """
1444 DeleteEntitiesIter.startIterOnQuery(
1445 FileLeafSkel().all()
1446 .filter("pending =", True)
1447 .filter("creationdate <", utils.utcNow() - datetime.timedelta(days=7))
1448 )
1451# DEPRECATED ATTRIBUTES HANDLING
1453def __getattr__(attr: str) -> object:
1454 if entry := {
1455 # stuff prior viur-core < 3.7
1456 "GOOGLE_STORAGE_BUCKET": ("File.get_bucket()", _private_bucket),
1457 }.get(attr):
1458 msg = f"{attr} was replaced by {entry[0]}"
1459 warnings.warn(msg, DeprecationWarning, stacklevel=2)
1460 logging.warning(msg, stacklevel=2)
1461 return entry[1]
1463 return super(__import__(__name__).__class__).__getattribute__(attr)