Coverage for /home/runner/work/viur-core/viur-core/viur/src/viur/core/modules/file.py: 0%

761 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-03 12:27 +0000

1import base64 

2import datetime 

3import hashlib 

4import hmac 

5import html 

6import io 

7import json 

8import logging 

9import re 

10import string 

11import typing as t 

12import warnings 

13from collections import namedtuple 

14from urllib.parse import quote as urlquote, urlencode 

15from urllib.request import urlopen 

16 

17import PIL 

18import PIL.ImageCms 

19import google.auth 

20import requests 

21from PIL import Image 

22from google.appengine.api import blobstore, images 

23from google.cloud import storage 

24from google.oauth2.service_account import Credentials as ServiceAccountCredentials 

25 

26from viur.core import conf, current, db, errors, utils 

27from viur.core.bones import BaseBone, BooleanBone, KeyBone, NumericBone, StringBone 

28from viur.core.decorators import * 

29from viur.core.i18n import LanguageWrapper 

30from viur.core.prototypes.tree import SkelType, Tree, TreeSkel 

31from viur.core.skeleton import SkeletonInstance, skeletonByKind 

32from viur.core.tasks import CallDeferred, DeleteEntitiesIter, PeriodicTask 

33 

34# Globals for connectivity 

35 

36VALID_FILENAME_REGEX = re.compile( 

37 # || MAY NOT BE THE NAME | MADE OF SPECIAL CHARS | SPECIAL CHARS + `. `|` 

38 r"^(?!^(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])$)[^\x00-\x1F<>:\"\/\\|?*]*[^\x00-\x1F<>:\"\/\\|?*. ]$", 

39 re.IGNORECASE 

40) 

41 

42_CREDENTIALS, _PROJECT_ID = google.auth.default() 

43GOOGLE_STORAGE_CLIENT = storage.Client(_PROJECT_ID, _CREDENTIALS) 

44 

45PRIVATE_BUCKET_NAME = f"""{_PROJECT_ID}.appspot.com""" 

46PUBLIC_BUCKET_NAME = f"""public-dot-{_PROJECT_ID}""" 

47PUBLIC_DLKEY_SUFFIX = "_pub" 

48 

49_private_bucket = GOOGLE_STORAGE_CLIENT.lookup_bucket(PRIVATE_BUCKET_NAME) 

50_public_bucket = None 

51 

52# FilePath is a descriptor for ViUR file components 

53FilePath = namedtuple("FilePath", ("dlkey", "is_derived", "filename")) 

54 

55 

56def importBlobFromViur2(dlKey, fileName): 

57 bucket = File.get_bucket(dlKey) 

58 

59 if not conf.viur2import_blobsource: 

60 return False 

61 existingImport = db.Get(db.Key("viur-viur2-blobimport", dlKey)) 

62 if existingImport: 

63 if existingImport["success"]: 

64 return existingImport["dlurl"] 

65 return False 

66 if conf.viur2import_blobsource["infoURL"]: 

67 try: 

68 importDataReq = urlopen(conf.viur2import_blobsource["infoURL"] + dlKey) 

69 except Exception as e: 

70 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey)) 

71 marker["success"] = False 

72 marker["error"] = "Failed URL-FETCH 1" 

73 db.Put(marker) 

74 return False 

75 if importDataReq.status != 200: 

76 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey)) 

77 marker["success"] = False 

78 marker["error"] = "Failed URL-FETCH 2" 

79 db.Put(marker) 

80 return False 

81 importData = json.loads(importDataReq.read()) 

82 oldBlobName = conf.viur2import_blobsource["gsdir"] + "/" + importData["key"] 

83 srcBlob = storage.Blob(bucket=bucket, 

84 name=conf.viur2import_blobsource["gsdir"] + "/" + importData["key"]) 

85 else: 

86 oldBlobName = conf.viur2import_blobsource["gsdir"] + "/" + dlKey 

87 srcBlob = storage.Blob(bucket=bucket, name=conf.viur2import_blobsource["gsdir"] + "/" + dlKey) 

88 if not srcBlob.exists(): 

89 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey)) 

90 marker["success"] = False 

91 marker["error"] = "Local SRC-Blob missing" 

92 marker["oldBlobName"] = oldBlobName 

93 db.Put(marker) 

94 return False 

95 bucket.rename_blob(srcBlob, f"{dlKey}/source/{fileName}") 

96 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey)) 

97 marker["success"] = True 

98 marker["old_src_key"] = dlKey 

99 marker["old_src_name"] = fileName 

100 marker["dlurl"] = File.create_download_url(dlKey, fileName, False, None) 

101 db.Put(marker) 

102 return marker["dlurl"] 

103 

104 

105def thumbnailer(fileSkel, existingFiles, params): 

106 file_name = html.unescape(fileSkel["name"]) 

107 bucket = File.get_bucket(fileSkel["dlkey"]) 

108 blob = bucket.get_blob(f"""{fileSkel["dlkey"]}/source/{file_name}""") 

109 if not blob: 

110 logging.warning(f"""Blob {fileSkel["dlkey"]}/source/{file_name} is missing from cloud storage!""") 

111 return 

112 fileData = io.BytesIO() 

113 blob.download_to_file(fileData) 

114 resList = [] 

115 for sizeDict in params: 

116 fileData.seek(0) 

117 outData = io.BytesIO() 

118 try: 

119 img = PIL.Image.open(fileData) 

120 except PIL.Image.UnidentifiedImageError: # Can't load this image; so there's no need to try other resolutions 

121 return [] 

122 iccProfile = img.info.get('icc_profile') 

123 if iccProfile: 

124 # JPEGs might be encoded with a non-standard color-profile; we need to compensate for this if we convert 

125 # to WEBp as we'll loose this color-profile information 

126 f = io.BytesIO(iccProfile) 

127 src_profile = PIL.ImageCms.ImageCmsProfile(f) 

128 dst_profile = PIL.ImageCms.createProfile('sRGB') 

129 try: 

130 img = PIL.ImageCms.profileToProfile( 

131 img, 

132 inputProfile=src_profile, 

133 outputProfile=dst_profile, 

134 outputMode="RGBA" if img.has_transparency_data else "RGB") 

135 except Exception as e: 

136 logging.exception(e) 

137 continue 

138 fileExtension = sizeDict.get("fileExtension", "webp") 

139 if "width" in sizeDict and "height" in sizeDict: 

140 width = sizeDict["width"] 

141 height = sizeDict["height"] 

142 targetName = f"thumbnail-{width}-{height}.{fileExtension}" 

143 elif "width" in sizeDict: 

144 width = sizeDict["width"] 

145 height = int((float(img.size[1]) * float(width / float(img.size[0])))) 

146 targetName = f"thumbnail-w{width}.{fileExtension}" 

147 else: # No default fallback - ignore 

148 continue 

149 mimeType = sizeDict.get("mimeType", "image/webp") 

150 img = img.resize((width, height), PIL.Image.LANCZOS) 

151 img.save(outData, fileExtension) 

152 outSize = outData.tell() 

153 outData.seek(0) 

154 targetBlob = bucket.blob(f"""{fileSkel["dlkey"]}/derived/{targetName}""") 

155 targetBlob.upload_from_file(outData, content_type=mimeType) 

156 resList.append((targetName, outSize, mimeType, {"mimetype": mimeType, "width": width, "height": height})) 

157 return resList 

158 

159 

160def cloudfunction_thumbnailer(fileSkel, existingFiles, params): 

161 """External Thumbnailer for images. 

162 

163 The corresponding cloudfunction can be found here . 

164 https://github.com/viur-framework/viur-cloudfunctions/tree/main/thumbnailer 

165 

166 You can use it like so: 

167 main.py: 

168 

169 .. code-block:: python 

170 

171 from viur.core.modules.file import cloudfunction_thumbnailer 

172 

173 conf.file_thumbnailer_url = "https://xxxxx.cloudfunctions.net/imagerenderer" 

174 conf.file_derivations = {"thumbnail": cloudfunction_thumbnailer} 

175 

176 conf.derives_pdf = { 

177 "thumbnail": [{"width": 1920,"sites":"1,2"}] 

178 } 

179 

180 skeletons/xxx.py: 

181 .. code-block:: python 

182 

183 test = FileBone(derive=conf.derives_pdf) 

184 """ 

185 

186 if not conf.file_thumbnailer_url: 

187 raise ValueError("conf.file_thumbnailer_url is not set") 

188 

189 bucket = File.get_bucket(fileSkel["dlkey"]) 

190 

191 def getsignedurl(): 

192 if conf.instance.is_dev_server: 

193 signedUrl = File.create_download_url(fileSkel["dlkey"], fileSkel["name"]) 

194 else: 

195 path = f"""{fileSkel["dlkey"]}/source/{file_name}""" 

196 if not (blob := bucket.get_blob(path)): 

197 logging.warning(f"Blob {path} is missing from cloud storage!") 

198 return None 

199 authRequest = google.auth.transport.requests.Request() 

200 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60) 

201 signing_credentials = google.auth.compute_engine.IDTokenCredentials(authRequest, "") 

202 content_disposition = utils.build_content_disposition_header(fileSkel["name"]) 

203 signedUrl = blob.generate_signed_url( 

204 expiresAt, 

205 credentials=signing_credentials, 

206 response_disposition=content_disposition, 

207 version="v4") 

208 return signedUrl 

209 

210 def make_request(): 

211 headers = {"Content-Type": "application/json"} 

212 data_str = base64.b64encode(json.dumps(dataDict).encode("UTF-8")) 

213 sig = File.hmac_sign(data_str) 

214 datadump = json.dumps({"dataStr": data_str.decode('ASCII'), "sign": sig}) 

215 resp = requests.post(conf.file_thumbnailer_url, data=datadump, headers=headers, allow_redirects=False) 

216 if resp.status_code != 200: # Error Handling 

217 match resp.status_code: 

218 case 302: 

219 # The problem is Google resposen 302 to an auth Site when the cloudfunction was not found 

220 # https://cloud.google.com/functions/docs/troubleshooting#login 

221 logging.error("Cloudfunction not found") 

222 case 404: 

223 logging.error("Cloudfunction not found") 

224 case 403: 

225 logging.error("No permission for the Cloudfunction") 

226 case _: 

227 logging.error( 

228 f"cloudfunction_thumbnailer failed with code: {resp.status_code} and data: {resp.content}") 

229 return 

230 

231 try: 

232 response_data = resp.json() 

233 except Exception as e: 

234 logging.error(f"response could not be converted in json failed with: {e=}") 

235 return 

236 if "error" in response_data: 

237 logging.error(f"cloudfunction_thumbnailer failed with: {response_data.get('error')}") 

238 return 

239 

240 return response_data 

241 

242 file_name = html.unescape(fileSkel["name"]) 

243 

244 if not (url := getsignedurl()): 

245 return 

246 dataDict = { 

247 "url": url, 

248 "name": fileSkel["name"], 

249 "params": params, 

250 "minetype": fileSkel["mimetype"], 

251 "baseUrl": current.request.get().request.host_url.lower(), 

252 "targetKey": fileSkel["dlkey"], 

253 "nameOnly": True 

254 } 

255 if not (derivedData := make_request()): 

256 return 

257 

258 uploadUrls = {} 

259 for data in derivedData["values"]: 

260 fileName = File.sanitize_filename(data["name"]) 

261 blob = bucket.blob(f"""{fileSkel["dlkey"]}/derived/{fileName}""") 

262 uploadUrls[fileSkel["dlkey"] + fileName] = blob.create_resumable_upload_session(timeout=60, 

263 content_type=data["mimeType"]) 

264 

265 if not (url := getsignedurl()): 

266 return 

267 

268 dataDict["url"] = url 

269 dataDict["nameOnly"] = False 

270 dataDict["uploadUrls"] = uploadUrls 

271 

272 if not (derivedData := make_request()): 

273 return 

274 reslist = [] 

275 try: 

276 for derived in derivedData["values"]: 

277 for key, value in derived.items(): 

278 reslist.append((key, value["size"], value["mimetype"], value["customData"])) 

279 

280 except Exception as e: 

281 logging.error(f"cloudfunction_thumbnailer failed with: {e=}") 

282 return reslist 

283 

284 

285class DownloadUrlBone(BaseBone): 

286 """ 

287 This bone is used to inject a freshly signed download url into a FileSkel. 

288 """ 

289 

290 def unserialize(self, skel, name): 

291 if "dlkey" in skel.dbEntity and "name" in skel.dbEntity: 

292 skel.accessedValues[name] = File.create_download_url( 

293 skel["dlkey"], skel["name"], expires=conf.render_json_download_url_expiration 

294 ) 

295 return True 

296 

297 return False 

298 

299 

300class FileLeafSkel(TreeSkel): 

301 """ 

302 Default file leaf skeleton. 

303 """ 

304 kindName = "file" 

305 

306 size = StringBone( 

307 descr="Size", 

308 readOnly=True, 

309 searchable=True, 

310 ) 

311 

312 dlkey = StringBone( 

313 descr="Download-Key", 

314 readOnly=True, 

315 ) 

316 

317 name = StringBone( 

318 descr="Filename", 

319 caseSensitive=False, 

320 searchable=True, 

321 vfunc=lambda val: None if File.is_valid_filename(val) else "Invalid filename provided", 

322 ) 

323 

324 mimetype = StringBone( 

325 descr="MIME-Type", 

326 readOnly=True, 

327 ) 

328 

329 weak = BooleanBone( 

330 descr="Weak reference", 

331 readOnly=True, 

332 visible=False, 

333 ) 

334 

335 pending = BooleanBone( 

336 descr="Pending upload", 

337 readOnly=True, 

338 visible=False, 

339 defaultValue=False, 

340 ) 

341 

342 width = NumericBone( 

343 descr="Width", 

344 readOnly=True, 

345 searchable=True, 

346 ) 

347 

348 height = NumericBone( 

349 descr="Height", 

350 readOnly=True, 

351 searchable=True, 

352 ) 

353 

354 downloadUrl = DownloadUrlBone( 

355 descr="Download-URL", 

356 readOnly=True, 

357 visible=False, 

358 ) 

359 

360 derived = BaseBone( 

361 descr="Derived Files", 

362 readOnly=True, 

363 visible=False, 

364 ) 

365 

366 pendingparententry = KeyBone( 

367 descr="Pending key Reference", 

368 readOnly=True, 

369 visible=False, 

370 ) 

371 

372 crc32c_checksum = StringBone( 

373 descr="CRC32C checksum", 

374 readOnly=True, 

375 ) 

376 

377 md5_checksum = StringBone( 

378 descr="MD5 checksum", 

379 readOnly=True, 

380 ) 

381 

382 public = BooleanBone( 

383 descr="Public File", 

384 readOnly=True, 

385 defaultValue=False, 

386 ) 

387 

388 serving_url = StringBone( 

389 descr="Serving-URL", 

390 readOnly=True, 

391 params={ 

392 "tooltip": "The 'serving_url' is only available in public file repositories.", 

393 } 

394 ) 

395 

396 @classmethod 

397 def _inject_serving_url(cls, skel: SkeletonInstance) -> None: 

398 """Inject the serving url for public image files into a FileSkel""" 

399 if ( 

400 skel["public"] 

401 and skel["mimetype"] 

402 and skel["mimetype"].startswith("image/") 

403 and not skel["serving_url"] 

404 ): 

405 bucket = File.get_bucket(skel["dlkey"]) 

406 filename = f"/gs/{bucket.name}/{skel['dlkey']}/source/{skel['name']}" 

407 

408 # Trying this on local development server will raise a 

409 # `google.appengine.runtime.apiproxy_errors.RPCFailedError` 

410 if conf.instance.is_dev_server: 

411 logging.warning(f"Can't inject serving_url for {filename!r} on local development server") 

412 return 

413 

414 try: 

415 skel["serving_url"] = images.get_serving_url(None, secure_url=True, filename=filename) 

416 

417 except Exception as e: 

418 logging.warning(f"Failed to create serving_url for {filename!r} with exception {e!r}") 

419 logging.exception(e) 

420 

421 def preProcessBlobLocks(self, locks): 

422 """ 

423 Ensure that our dlkey is locked even if we don't have a filebone here 

424 """ 

425 if not self["weak"] and self["dlkey"]: 

426 locks.add(self["dlkey"]) 

427 return locks 

428 

429 @classmethod 

430 def refresh(cls, skel): 

431 super().refresh(skel) 

432 if conf.viur2import_blobsource: 

433 importData = importBlobFromViur2(skel["dlkey"], skel["name"]) 

434 if importData: 

435 if not skel["downloadUrl"]: 

436 skel["downloadUrl"] = importData 

437 skel["pendingparententry"] = None 

438 

439 cls._inject_serving_url(skel) 

440 

441 @classmethod 

442 def write(cls, skel, **kwargs): 

443 cls._inject_serving_url(skel) 

444 return super().write(skel, **kwargs) 

445 

446 

447class FileNodeSkel(TreeSkel): 

448 """ 

449 Default file node skeleton. 

450 """ 

451 kindName = "file_rootNode" # FIXME: VIUR4, don't use "_rootNode" kindname 

452 

453 name = StringBone( 

454 descr="Name", 

455 required=True, 

456 searchable=True 

457 ) 

458 

459 rootNode = BooleanBone( 

460 descr="Is RootNode", 

461 defaultValue=False, 

462 readOnly=True, 

463 visible=False, 

464 ) 

465 

466 public = BooleanBone( 

467 descr="Is public?", 

468 defaultValue=False, 

469 readOnly=True, 

470 visible=False, 

471 ) 

472 

473 viurCurrentSeoKeys = None 

474 

475 

476class File(Tree): 

477 PENDING_POSTFIX = " (pending)" 

478 DOWNLOAD_URL_PREFIX = "/file/download/" 

479 INTERNAL_SERVING_URL_PREFIX = "/file/serve/" 

480 MAX_FILENAME_LEN = 256 

481 IMAGE_META_MAX_SIZE: t.Final[int] = 10 * 1024 ** 2 

482 """Maximum size of image files that should be analysed in :meth:`set_image_meta`. 

483 Default: 10 MiB""" 

484 

485 leafSkelCls = FileLeafSkel 

486 nodeSkelCls = FileNodeSkel 

487 

488 handler = "tree.simple.file" 

489 adminInfo = { 

490 "icon": "folder-fill", 

491 "handler": handler, # fixme: Use static handler; Remove with VIUR4! 

492 } 

493 

494 roles = { 

495 "*": "view", 

496 "editor": ("add", "edit"), 

497 "admin": "*", 

498 } 

499 

500 default_order = "name" 

501 

502 # Helper functions currently resist here 

503 

504 @staticmethod 

505 def get_bucket(dlkey: str) -> google.cloud.storage.bucket.Bucket: 

506 """ 

507 Retrieves a Google Cloud Storage bucket for the given dlkey. 

508 """ 

509 global _public_bucket 

510 if dlkey and dlkey.endswith(PUBLIC_DLKEY_SUFFIX): 

511 if _public_bucket or (_public_bucket := GOOGLE_STORAGE_CLIENT.lookup_bucket(PUBLIC_BUCKET_NAME)): 

512 return _public_bucket 

513 

514 raise ValueError( 

515 f"""The bucket '{PUBLIC_BUCKET_NAME}' does not exist! Please create it with ACL access.""" 

516 ) 

517 

518 return _private_bucket 

519 

520 @staticmethod 

521 def is_valid_filename(filename: str) -> bool: 

522 """ 

523 Verifies a valid filename. 

524 

525 The filename should be valid on Linux, Mac OS and Windows. 

526 It should not be longer than MAX_FILENAME_LEN chars. 

527 

528 Rule set: https://stackoverflow.com/a/31976060/3749896 

529 Regex test: https://regex101.com/r/iBYpoC/1 

530 """ 

531 if len(filename) > File.MAX_FILENAME_LEN: 

532 return False 

533 

534 return bool(re.match(VALID_FILENAME_REGEX, filename)) 

535 

536 @staticmethod 

537 def hmac_sign(data: t.Any) -> str: 

538 assert conf.file_hmac_key is not None, "No hmac-key set!" 

539 if not isinstance(data, bytes): 

540 data = str(data).encode("UTF-8") 

541 return hmac.new(conf.file_hmac_key, msg=data, digestmod=hashlib.sha3_384).hexdigest() 

542 

543 @staticmethod 

544 def hmac_verify(data: t.Any, signature: str) -> bool: 

545 return hmac.compare_digest(File.hmac_sign(data.encode("ASCII")), signature) 

546 

547 @staticmethod 

548 def create_internal_serving_url( 

549 serving_url: str, 

550 size: int = 0, 

551 filename: str = "", 

552 options: str = "", 

553 download: bool = False 

554 ) -> str: 

555 """ 

556 Helper function to generate an internal serving url (endpoint: /file/serve) from a Google serving url. 

557 

558 This is needed to hide requests to Google as they are internally be routed, and can be the result of a 

559 legal requirement like GDPR. 

560 

561 :param serving_url: Is the original serving URL as generated from FileLeafSkel._inject_serving_url() 

562 :param size: Optional size setting 

563 :param filename: Optonal filename setting 

564 :param options: Additional options parameter-pass through to /file/serve 

565 :param download: Download parameter-pass through to /file/serve 

566 """ 

567 

568 # Split a serving URL into its components, used by serve function. 

569 res = re.match( 

570 r"^https:\/\/(.*?)\.googleusercontent\.com\/(.*?)$", 

571 serving_url 

572 ) 

573 

574 if not res: 

575 raise ValueError(f"Invalid {serving_url=!r} provided") 

576 

577 # Create internal serving URL 

578 serving_url = File.INTERNAL_SERVING_URL_PREFIX + "/".join(res.groups()) 

579 

580 # Append additional parameters 

581 if params := { 

582 k: v for k, v in { 

583 "download": download, 

584 "filename": filename, 

585 "options": options, 

586 "size": size, 

587 }.items() if v 

588 }: 

589 serving_url += f"?{urlencode(params)}" 

590 

591 return serving_url 

592 

593 @staticmethod 

594 def create_download_url( 

595 dlkey: str, 

596 filename: str, 

597 derived: bool = False, 

598 expires: t.Optional[datetime.timedelta | int] = datetime.timedelta(hours=1), 

599 download_filename: t.Optional[str] = None 

600 ) -> str: 

601 """ 

602 Utility function that creates a signed download-url for the given folder/filename combination 

603 

604 :param folder: The GCS-Folder (= the download-key) for that file 

605 :param filename: The name of the file. Either the original filename or the name of a derived file. 

606 :param derived: True, if it points to a derived file, False if it points to the original uploaded file 

607 :param expires: 

608 None if the file is supposed to be public (which causes it to be cached on the google ede caches), 

609 otherwise a datetime.timedelta of how long that link should be valid 

610 :param download_filename: If set, browser is enforced to download this blob with the given alternate 

611 filename 

612 :return: The signed download-url relative to the current domain (eg /download/...) 

613 """ 

614 if isinstance(expires, int): 

615 expires = datetime.timedelta(minutes=expires) 

616 

617 # Undo escaping on ()= performed on fileNames 

618 filename = filename.replace("&#040;", "(").replace("&#041;", ")").replace("&#061;", "=") 

619 filepath = f"""{dlkey}/{"derived" if derived else "source"}/{filename}""" 

620 

621 if download_filename: 

622 if not File.is_valid_filename(download_filename): 

623 raise errors.UnprocessableEntity(f"Invalid download_filename {download_filename!r} provided") 

624 

625 download_filename = urlquote(download_filename) 

626 

627 expires = (datetime.datetime.now() + expires).strftime("%Y%m%d%H%M") if expires else 0 

628 

629 data = base64.urlsafe_b64encode(f"""{filepath}\0{expires}\0{download_filename or ""}""".encode("UTF-8")) 

630 sig = File.hmac_sign(data) 

631 

632 return f"""{File.DOWNLOAD_URL_PREFIX}{data.decode("ASCII")}?sig={sig}""" 

633 

634 @staticmethod 

635 def parse_download_url(url) -> t.Optional[FilePath]: 

636 """ 

637 Parses a file download URL in the format `/file/download/xxxx?sig=yyyy` into its FilePath. 

638 

639 If the URL cannot be parsed, the function returns None. 

640 

641 :param url: The file download URL to be parsed. 

642 :return: A FilePath on success, None otherwise. 

643 """ 

644 if not url.startswith(File.DOWNLOAD_URL_PREFIX) or "?" not in url: 

645 return None 

646 

647 data, sig = url.removeprefix(File.DOWNLOAD_URL_PREFIX).split("?", 1) # Strip "/file/download/" and split on "?" 

648 sig = sig.removeprefix("sig=") 

649 

650 if not File.hmac_verify(data, sig): 

651 # Invalid signature 

652 return None 

653 

654 # Split the blobKey into the individual fields it should contain 

655 data = base64.urlsafe_b64decode(data).decode("UTF-8") 

656 

657 match data.count("\0"): 

658 case 2: 

659 dlpath, valid_until, _ = data.split("\0") 

660 case 1: 

661 # It's the old format, without an downloadFileName 

662 dlpath, valid_until = data.split("\0") 

663 case _: 

664 # Invalid path 

665 return None 

666 

667 if valid_until != "0" and datetime.strptime(valid_until, "%Y%m%d%H%M") < datetime.now(): 

668 # Signature expired 

669 return None 

670 

671 if dlpath.count("/") != 2: 

672 # Invalid path 

673 return None 

674 

675 dlkey, derived, filename = dlpath.split("/") 

676 return FilePath(dlkey, derived != "source", filename) 

677 

678 @staticmethod 

679 def create_src_set( 

680 file: t.Union["SkeletonInstance", dict, str], 

681 expires: t.Optional[datetime.timedelta | int] = datetime.timedelta(hours=1), 

682 width: t.Optional[int] = None, 

683 height: t.Optional[int] = None, 

684 language: t.Optional[str] = None, 

685 ) -> str: 

686 """ 

687 Generates a string suitable for use as the srcset tag in html. This functionality provides the browser 

688 with a list of images in different sizes and allows it to choose the smallest file that will fill it's 

689 viewport without upscaling. 

690 

691 :param file: The file skeleton (or if multiple=True a single value from it) to generate the srcset. 

692 :param expires: 

693 None if the file is supposed to be public (which causes it to be cached on the google edecaches), 

694 otherwise it's lifetime in seconds 

695 :param width: 

696 A list of widths that should be included in the srcset. 

697 If a given width is not available, it will be skipped. 

698 :param height: A list of heights that should be included in the srcset. If a given height is not available, 

699 it will be skipped. 

700 :param language: Language overwrite if file has multiple languages, and we want to explicitly specify one 

701 :return: The srctag generated or an empty string if a invalid file object was supplied 

702 """ 

703 if not width and not height: 

704 logging.error("Neither width or height supplied") 

705 return "" 

706 

707 if isinstance(file, str): 

708 file = db.Query("file").filter("dlkey =", file).order(("creationdate", db.SortOrder.Ascending)).getEntry() 

709 

710 if not file: 

711 return "" 

712 

713 if isinstance(file, LanguageWrapper): 

714 language = language or current.language.get() 

715 if not language or not (file := file.get(language)): 

716 return "" 

717 

718 if "dlkey" not in file and "dest" in file: 

719 file = file["dest"] 

720 

721 from viur.core.skeleton import SkeletonInstance # avoid circular imports 

722 

723 if not ( 

724 isinstance(file, (SkeletonInstance, dict)) 

725 and "dlkey" in file 

726 and "derived" in file 

727 ): 

728 logging.error("Invalid file supplied") 

729 return "" 

730 

731 if not isinstance(file["derived"], dict): 

732 logging.error("No derives available") 

733 return "" 

734 

735 src_set = [] 

736 for filename, derivate in file["derived"]["files"].items(): 

737 customData = derivate.get("customData", {}) 

738 

739 if width and customData.get("width") in width: 

740 src_set.append( 

741 f"""{File.create_download_url(file["dlkey"], filename, True, expires)} {customData["width"]}w""" 

742 ) 

743 

744 if height and customData.get("height") in height: 

745 src_set.append( 

746 f"""{File.create_download_url(file["dlkey"], filename, True, expires)} {customData["height"]}h""" 

747 ) 

748 

749 return ", ".join(src_set) 

750 

751 def write( 

752 self, 

753 filename: str, 

754 content: t.Any, 

755 mimetype: str = "text/plain", 

756 width: int = None, 

757 height: int = None, 

758 public: bool = False, 

759 ) -> db.Key: 

760 """ 

761 Write a file from any buffer into the file module. 

762 

763 :param filename: Filename to be written. 

764 :param content: The file content to be written, as bytes-like object. 

765 :param mimetype: The file's mimetype. 

766 :param width: Optional width information for the file. 

767 :param height: Optional height information for the file. 

768 :param public: True if the file should be publicly accessible. 

769 :return: Returns the key of the file object written. This can be associated e.g. with a FileBone. 

770 """ 

771 # logging.info(f"{filename=} {mimetype=} {width=} {height=} {public=}") 

772 if not File.is_valid_filename(filename): 

773 raise ValueError(f"{filename=} is invalid") 

774 

775 dl_key = utils.string.random() 

776 

777 if public: 

778 dl_key += PUBLIC_DLKEY_SUFFIX # mark file as public 

779 

780 bucket = File.get_bucket(dl_key) 

781 

782 blob = bucket.blob(f"{dl_key}/source/{filename}") 

783 blob.upload_from_file(io.BytesIO(content), content_type=mimetype) 

784 

785 skel = self.addSkel("leaf") 

786 skel["name"] = filename 

787 skel["size"] = blob.size 

788 skel["mimetype"] = mimetype 

789 skel["dlkey"] = dl_key 

790 skel["weak"] = True 

791 skel["public"] = public 

792 skel["width"] = width 

793 skel["height"] = height 

794 skel["crc32c_checksum"] = base64.b64decode(blob.crc32c).hex() 

795 skel["md5_checksum"] = base64.b64decode(blob.md5_hash).hex() 

796 

797 skel.write() 

798 return skel["key"] 

799 

800 def read( 

801 self, 

802 key: db.Key | int | str | None = None, 

803 path: str | None = None, 

804 ) -> tuple[io.BytesIO, str]: 

805 """ 

806 Read a file from the Cloud Storage. 

807 

808 If a key and a path are provided, the key is preferred. 

809 This means that the entry in the db is searched first and if this is not found, the path is used. 

810 

811 :param key: Key of the LeafSkel that contains the "dlkey" and the "name". 

812 :param path: The path of the file in the Cloud Storage Bucket. 

813 

814 :return: Returns the file as a io.BytesIO buffer and the content-type 

815 """ 

816 if not key and not path: 

817 raise ValueError("Please provide a key or a path") 

818 

819 if key: 

820 skel = self.viewSkel("leaf") 

821 if not skel.read(db.keyHelper(key, skel.kindName)): 

822 if not path: 

823 raise ValueError("This skeleton is not in the database!") 

824 else: 

825 path = f"""{skel["dlkey"]}/source/{skel["name"]}""" 

826 

827 bucket = File.get_bucket(skel["dlkey"]) 

828 else: 

829 bucket = File.get_bucket(path.split("/", 1)[0]) # path's first part is dlkey plus eventual postfix 

830 

831 blob = bucket.blob(path) 

832 return io.BytesIO(blob.download_as_bytes()), blob.content_type 

833 

834 @CallDeferred 

835 def deleteRecursive(self, parentKey): 

836 files = db.Query(self.leafSkelCls().kindName).filter("parentdir =", parentKey).iter() 

837 for fileEntry in files: 

838 self.mark_for_deletion(fileEntry["dlkey"]) 

839 skel = self.leafSkelCls() 

840 

841 if skel.read(str(fileEntry.key())): 

842 skel.delete() 

843 dirs = db.Query(self.nodeSkelCls().kindName).filter("parentdir", parentKey).iter() 

844 for d in dirs: 

845 self.deleteRecursive(d.key) 

846 skel = self.nodeSkelCls() 

847 if skel.read(d.key): 

848 skel.delete() 

849 

850 @exposed 

851 @skey 

852 def getUploadURL( 

853 self, 

854 fileName: str, 

855 mimeType: str, 

856 size: t.Optional[int] = None, 

857 node: t.Optional[str | db.Key] = None, 

858 authData: t.Optional[str] = None, 

859 authSig: t.Optional[str] = None, 

860 public: bool = False, 

861 ): 

862 filename = fileName.strip() # VIUR4 FIXME: just for compatiblity of the parameter names 

863 

864 if not File.is_valid_filename(filename): 

865 raise errors.UnprocessableEntity(f"Invalid filename {filename!r} provided") 

866 

867 # Validate the mimetype from the client seems legit 

868 mimetype = mimeType.strip().lower() 

869 if not ( 

870 mimetype 

871 and mimetype.count("/") == 1 

872 and all(ch in string.ascii_letters + string.digits + "/-.+" for ch in mimetype) 

873 ): 

874 raise errors.UnprocessableEntity(f"Invalid mime-type {mimetype!r} provided") 

875 

876 # Validate authentication data 

877 if authData and authSig: 

878 # First, validate the signature, otherwise we don't need to proceed further 

879 if not self.hmac_verify(authData, authSig): 

880 raise errors.Unauthorized() 

881 

882 authData = json.loads(base64.b64decode(authData.encode("ASCII")).decode("UTF-8")) 

883 

884 if datetime.datetime.strptime(authData["validUntil"], "%Y%m%d%H%M") < datetime.datetime.now(): 

885 raise errors.Gone("The upload URL has expired") 

886 

887 if authData["validMimeTypes"]: 

888 for validMimeType in authData["validMimeTypes"]: 

889 if ( 

890 validMimeType == mimetype 

891 or (validMimeType.endswith("*") and mimetype.startswith(validMimeType[:-1])) 

892 ): 

893 break 

894 else: 

895 raise errors.UnprocessableEntity(f"Invalid mime-type {mimetype} provided") 

896 

897 node = authData["node"] 

898 maxSize = authData["maxSize"] 

899 

900 else: 

901 rootNode = None 

902 if node and not (rootNode := self.getRootNode(node)): 

903 raise errors.NotFound(f"No valid root node found for {node=}") 

904 

905 if not self.canAdd("leaf", rootNode): 

906 raise errors.Forbidden() 

907 

908 if rootNode and public != bool(rootNode.get("public")): 

909 raise errors.Forbidden("Cannot upload a public file into private repository or vice versa") 

910 

911 maxSize = None # The user has some file/add permissions, don't restrict fileSize 

912 

913 if maxSize: 

914 if size > maxSize: 

915 raise errors.UnprocessableEntity(f"Size {size} exceeds maximum size {maxSize}") 

916 else: 

917 size = None 

918 

919 # Create upload-URL and download key 

920 dlkey = utils.string.random() # let's roll a random key 

921 

922 if public: 

923 dlkey += PUBLIC_DLKEY_SUFFIX # mark file as public 

924 

925 blob = File.get_bucket(dlkey).blob(f"{dlkey}/source/{filename}") 

926 upload_url = blob.create_resumable_upload_session(content_type=mimeType, size=size, timeout=60) 

927 

928 # Create a corresponding file-lock object early, otherwise we would have to ensure that the file-lock object 

929 # the user creates matches the file he had uploaded 

930 file_skel = self.addSkel("leaf") 

931 

932 file_skel["name"] = filename + self.PENDING_POSTFIX 

933 file_skel["size"] = 0 

934 file_skel["mimetype"] = "application/octetstream" 

935 file_skel["dlkey"] = dlkey 

936 file_skel["parentdir"] = None 

937 file_skel["pendingparententry"] = db.keyHelper(node, self.addSkel("node").kindName) if node else None 

938 file_skel["pending"] = True 

939 file_skel["weak"] = True 

940 file_skel["public"] = public 

941 file_skel["width"] = 0 

942 file_skel["height"] = 0 

943 

944 file_skel.write() 

945 key = str(file_skel["key"]) 

946 

947 # Mark that entry dirty as we might never receive an add 

948 self.mark_for_deletion(dlkey) 

949 

950 # In this case, we'd have to store the key in the users session so he can call add() later on 

951 if authData and authSig: 

952 session = current.session.get() 

953 

954 if "pendingFileUploadKeys" not in session: 

955 session["pendingFileUploadKeys"] = [] 

956 

957 session["pendingFileUploadKeys"].append(key) 

958 

959 # Clamp to the latest 50 pending uploads 

960 session["pendingFileUploadKeys"] = session["pendingFileUploadKeys"][-50:] 

961 session.markChanged() 

962 

963 return self.render.view({ 

964 "uploadKey": key, 

965 "uploadUrl": upload_url, 

966 }) 

967 

968 @exposed 

969 def download(self, blobKey: str, fileName: str = "", download: bool = False, sig: str = "", *args, **kwargs): 

970 """ 

971 Download a file. 

972 :param blobKey: The unique blob key of the file. 

973 :param fileName: Optional filename to provide in the header. 

974 :param download: Set header to attachment retrival, set explictly to "1" if download is wanted. 

975 """ 

976 if filename := fileName.strip(): 

977 if not File.is_valid_filename(filename): 

978 raise errors.UnprocessableEntity(f"The provided filename {filename!r} is invalid!") 

979 

980 try: 

981 values = base64.urlsafe_b64decode(blobKey).decode("UTF-8").split("\0") 

982 except ValueError: 

983 raise errors.BadRequest(f"Invalid encoding of blob key {blobKey!r}!") 

984 try: 

985 dlPath, validUntil, *download_filename = values 

986 # Maybe it's the old format, without a download_filename 

987 download_filename = download_filename[0] if download_filename else "" 

988 except ValueError: 

989 logging.error(f"Encoding of {blobKey=!r} OK. {values=} invalid.") 

990 raise errors.BadRequest(f"The blob key {blobKey!r} has an invalid amount of encoded values!") 

991 

992 bucket = File.get_bucket(dlPath.split("/", 1)[0]) 

993 

994 if not sig: 

995 # Check if the current user has the right to download *any* blob present in this application. 

996 # blobKey is then the path inside cloudstore - not a base64 encoded tuple 

997 if not (usr := current.user.get()): 

998 raise errors.Unauthorized() 

999 if "root" not in usr["access"] and "file-view" not in usr["access"]: 

1000 raise errors.Forbidden() 

1001 validUntil = "-1" # Prevent this from being cached down below 

1002 blob = bucket.get_blob(blobKey) 

1003 

1004 else: 

1005 # We got an request including a signature (probably a guest or a user without file-view access) 

1006 # First, validate the signature, otherwise we don't need to proceed any further 

1007 if not self.hmac_verify(blobKey, sig): 

1008 raise errors.Forbidden() 

1009 

1010 if validUntil != "0" and datetime.datetime.strptime(validUntil, "%Y%m%d%H%M") < datetime.datetime.now(): 

1011 blob = None 

1012 else: 

1013 blob = bucket.get_blob(dlPath) 

1014 

1015 if not blob: 

1016 raise errors.Gone("The requested blob has expired.") 

1017 

1018 if not filename: 

1019 filename = download_filename or urlquote(blob.name.rsplit("/", 1)[-1]) 

1020 

1021 content_disposition = utils.build_content_disposition_header(filename, attachment=download) 

1022 

1023 if isinstance(_CREDENTIALS, ServiceAccountCredentials): 

1024 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60) 

1025 signedUrl = blob.generate_signed_url(expiresAt, response_disposition=content_disposition, version="v4") 

1026 raise errors.Redirect(signedUrl) 

1027 

1028 elif conf.instance.is_dev_server: # No Service-Account to sign with - Serve everything directly 

1029 response = current.request.get().response 

1030 response.headers["Content-Type"] = blob.content_type 

1031 if content_disposition: 

1032 response.headers["Content-Disposition"] = content_disposition 

1033 return blob.download_as_bytes() 

1034 

1035 if validUntil == "0" or blobKey.endswith(PUBLIC_DLKEY_SUFFIX): # Its an indefinitely valid URL 

1036 if blob.size < 5 * 1024 * 1024: # Less than 5 MB - Serve directly and push it into the ede caches 

1037 response = current.request.get().response 

1038 response.headers["Content-Type"] = blob.content_type 

1039 response.headers["Cache-Control"] = "public, max-age=604800" # 7 Days 

1040 if content_disposition: 

1041 response.headers["Content-Disposition"] = content_disposition 

1042 return blob.download_as_bytes() 

1043 

1044 # Default fallback - create a signed URL and redirect 

1045 authRequest = google.auth.transport.requests.Request() 

1046 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60) 

1047 signing_credentials = google.auth.compute_engine.IDTokenCredentials(authRequest, "") 

1048 signedUrl = blob.generate_signed_url( 

1049 expiresAt, 

1050 credentials=signing_credentials, 

1051 response_disposition=content_disposition, 

1052 version="v4") 

1053 

1054 raise errors.Redirect(signedUrl) 

1055 

1056 SERVE_VALID_OPTIONS = { 

1057 "c", 

1058 "p", 

1059 "fv", 

1060 "fh", 

1061 "r90", 

1062 "r180", 

1063 "r270", 

1064 "nu", 

1065 } 

1066 """ 

1067 Valid modification option shorts for the serve-function. 

1068 This is passed-through to the Google UserContent API, and hast to be supported there. 

1069 """ 

1070 

1071 SERVE_VALID_FORMATS = { 

1072 "jpg": "rj", 

1073 "jpeg": "rj", 

1074 "png": "rp", 

1075 "webp": "rw", 

1076 } 

1077 """ 

1078 Valid file-formats to the serve-function. 

1079 This is passed-through to the Google UserContent API, and hast to be supported there. 

1080 """ 

1081 

1082 @exposed 

1083 def serve( 

1084 self, 

1085 host: str, 

1086 key: str, 

1087 size: t.Optional[int] = None, 

1088 filename: t.Optional[str] = None, 

1089 options: str = "", 

1090 download: bool = False, 

1091 ): 

1092 """ 

1093 Requests an image using the serving url to bypass direct Google requests. 

1094 

1095 :param host: the google host prefix i.e. lh3 

1096 :param key: the serving url key 

1097 :param size: the target image size 

1098 :param filename: a random string with an extention, valid extentions are (defined in File.SERVE_VALID_FORMATS). 

1099 :param options: - seperated options (defined in File.SERVE_VALID_OPTIONS). 

1100 c - crop 

1101 p - face crop 

1102 fv - vertrical flip 

1103 fh - horizontal flip 

1104 rXXX - rotate 90, 180, 270 

1105 nu - no upscale 

1106 :param download: Serves the content as download (Content-Disposition) or not. 

1107 

1108 :return: Returns the requested content on success, raises a proper HTTP exception otherwise. 

1109 """ 

1110 

1111 if any(c not in conf.search_valid_chars for c in host): 

1112 raise errors.BadRequest("key contains invalid characters") 

1113 

1114 # extract format from filename 

1115 file_fmt = "webp" 

1116 

1117 if filename: 

1118 fmt = filename.rsplit(".", 1)[-1].lower() 

1119 if fmt in self.SERVE_VALID_FORMATS: 

1120 file_fmt = fmt 

1121 else: 

1122 raise errors.UnprocessableEntity(f"Unsupported filetype {fmt}") 

1123 

1124 url = f"https://{host}.googleusercontent.com/{key}" 

1125 

1126 if options and not all(param in self.SERVE_VALID_OPTIONS for param in options.split("-")): 

1127 raise errors.BadRequest("Invalid options provided") 

1128 

1129 options += f"-{self.SERVE_VALID_FORMATS[file_fmt]}" 

1130 

1131 if size: 

1132 options = f"s{size}-" + options 

1133 

1134 url += "=" + options 

1135 

1136 response = current.request.get().response 

1137 response.headers["Content-Type"] = f"image/{file_fmt}" 

1138 response.headers["Cache-Control"] = "public, max-age=604800" # 7 Days 

1139 response.headers["Content-Disposition"] = utils.build_content_disposition_header(filename, attachment=download) 

1140 

1141 answ = requests.get(url, timeout=20) 

1142 if not answ.ok: 

1143 logging.error(f"{answ.status_code} {answ.text}") 

1144 raise errors.BadRequest("Unable to fetch a file with these parameters") 

1145 

1146 return answ.content 

1147 

1148 @exposed 

1149 @force_ssl 

1150 @force_post 

1151 @skey(allow_empty=True) 

1152 def add(self, skelType: SkelType, node: db.Key | int | str | None = None, *args, **kwargs): 

1153 # We can't add files directly (they need to be uploaded 

1154 if skelType == "leaf": # We need to handle leafs separately here 

1155 targetKey = kwargs.get("key") 

1156 skel = self.addSkel("leaf") 

1157 

1158 if not skel.read(targetKey): 

1159 raise errors.NotFound() 

1160 

1161 if not skel["pending"]: 

1162 raise errors.PreconditionFailed() 

1163 

1164 skel["pending"] = False 

1165 skel["parententry"] = skel["pendingparententry"] 

1166 

1167 if skel["parententry"]: 

1168 rootNode = self.getRootNode(skel["parententry"]) 

1169 else: 

1170 rootNode = None 

1171 

1172 if not self.canAdd("leaf", rootNode): 

1173 # Check for a marker in this session (created if using a signed upload URL) 

1174 session = current.session.get() 

1175 if targetKey not in (session.get("pendingFileUploadKeys") or []): 

1176 raise errors.Forbidden() 

1177 session["pendingFileUploadKeys"].remove(targetKey) 

1178 session.markChanged() 

1179 

1180 # Now read the blob from the dlkey folder 

1181 bucket = File.get_bucket(skel["dlkey"]) 

1182 

1183 blobs = list(bucket.list_blobs(prefix=f"""{skel["dlkey"]}/""")) 

1184 if len(blobs) != 1: 

1185 logging.error("Invalid number of blobs in folder") 

1186 logging.error(targetKey) 

1187 raise errors.PreconditionFailed() 

1188 

1189 # only one item is allowed here! 

1190 blob = blobs[0] 

1191 

1192 # update the corresponding file skeleton 

1193 skel["name"] = skel["name"].removesuffix(self.PENDING_POSTFIX) 

1194 skel["mimetype"] = utils.string.escape(blob.content_type) 

1195 skel["size"] = blob.size 

1196 skel["parentrepo"] = rootNode["key"] if rootNode else None 

1197 skel["weak"] = rootNode is None 

1198 skel["crc32c_checksum"] = base64.b64decode(blob.crc32c).hex() 

1199 skel["md5_checksum"] = base64.b64decode(blob.md5_hash).hex() 

1200 self.onAdd("leaf", skel) 

1201 skel.write() 

1202 self.onAdded("leaf", skel) 

1203 

1204 # Add updated download-URL as the auto-generated isn't valid yet 

1205 skel["downloadUrl"] = self.create_download_url(skel["dlkey"], skel["name"]) 

1206 

1207 return self.render.addSuccess(skel) 

1208 

1209 return super().add(skelType, node, *args, **kwargs) 

1210 

1211 @exposed 

1212 def get_download_url( 

1213 self, 

1214 key: t.Optional[db.Key] = None, 

1215 dlkey: t.Optional[str] = None, 

1216 filename: t.Optional[str] = None, 

1217 derived: bool = False, 

1218 ): 

1219 """ 

1220 Request a download url for a given file 

1221 :param key: The key of the file 

1222 :param dlkey: The download key of the file 

1223 :param filename: The filename to be given. If no filename is provided 

1224 downloadUrls for all derived files are returned in case of `derived=True`. 

1225 :param derived: True, if a derived file download URL is being requested. 

1226 """ 

1227 skel = self.viewSkel("leaf") 

1228 if dlkey is not None: 

1229 skel = skel.all().filter("dlkey", dlkey).getSkel() 

1230 elif key is None and dlkey is None: 

1231 raise errors.BadRequest("No key or dlkey provided") 

1232 

1233 if not (skel and skel.read(key)): 

1234 raise errors.NotFound() 

1235 

1236 if not self.canView("leaf", skel): 

1237 raise errors.Unauthorized() 

1238 

1239 dlkey = skel["dlkey"] 

1240 

1241 if derived and filename is None: 

1242 res = {} 

1243 for filename in skel["derived"]["files"]: 

1244 res[filename] = self.create_download_url(dlkey, filename, derived) 

1245 else: 

1246 if derived: 

1247 # Check if Filename exist in the Derives. We sign nothing that not exist. 

1248 if filename not in skel["derived"]["files"]: 

1249 raise errors.NotFound("File not in derives") 

1250 else: 

1251 if filename is None: 

1252 filename = skel["name"] 

1253 elif filename != skel["name"]: 

1254 raise errors.NotFound("Filename not match") 

1255 

1256 res = self.create_download_url(dlkey, filename, derived) 

1257 

1258 return self.render.view(res) 

1259 

1260 def onEdit(self, skelType: SkelType, skel: SkeletonInstance): 

1261 super().onEdit(skelType, skel) 

1262 

1263 if skelType == "leaf": 

1264 old_skel = self.editSkel(skelType) 

1265 old_skel.setEntity(skel.dbEntity) 

1266 

1267 if old_skel["name"] == skel["name"]: # name not changed we can return 

1268 return 

1269 

1270 # Move Blob to new name 

1271 # https://cloud.google.com/storage/docs/copying-renaming-moving-objects 

1272 old_path = f"{skel['dlkey']}/source/{html.unescape(old_skel['name'])}" 

1273 new_path = f"{skel['dlkey']}/source/{html.unescape(skel['name'])}" 

1274 

1275 bucket = File.get_bucket(skel['dlkey']) 

1276 

1277 if not (old_blob := bucket.get_blob(old_path)): 

1278 raise errors.Gone() 

1279 

1280 bucket.copy_blob(old_blob, bucket, new_path, if_generation_match=0) 

1281 bucket.delete_blob(old_path) 

1282 

1283 def onAdded(self, skelType: SkelType, skel: SkeletonInstance) -> None: 

1284 if skelType == "leaf" and skel["mimetype"].startswith("image/"): 

1285 if skel["size"] > self.IMAGE_META_MAX_SIZE: 

1286 logging.warning(f"File size {skel['size']} exceeds limit {self.IMAGE_META_MAX_SIZE=}") 

1287 return 

1288 self.set_image_meta(skel["key"]) 

1289 

1290 super().onAdded(skelType, skel) 

1291 

1292 @CallDeferred 

1293 def set_image_meta(self, key: db.Key) -> None: 

1294 """Write image metadata (height and width) to FileSkel""" 

1295 skel = self.editSkel("leaf", key) 

1296 if not skel.read(key): 

1297 logging.error(f"File {key} does not exist") 

1298 return 

1299 if skel["width"] and skel["height"]: 

1300 logging.info(f'File {skel["key"]} has already {skel["width"]=} and {skel["height"]=}') 

1301 return 

1302 file_name = html.unescape(skel["name"]) 

1303 blob = self.get_bucket(skel["dlkey"]).get_blob(f"""{skel["dlkey"]}/source/{file_name}""") 

1304 if not blob: 

1305 logging.error(f'Blob {skel["dlkey"]}/source/{file_name} is missing in Cloud Storage!') 

1306 return 

1307 

1308 file_obj = io.BytesIO() 

1309 blob.download_to_file(file_obj) 

1310 file_obj.seek(0) 

1311 try: 

1312 img = Image.open(file_obj) 

1313 except Image.UnidentifiedImageError as e: # Can't load this image 

1314 logging.exception(f'Cannot open {skel["key"]} | {skel["name"]} to set image meta data: {e}') 

1315 return 

1316 

1317 skel.patch( 

1318 values={ 

1319 "width": img.width, 

1320 "height": img.height, 

1321 }, 

1322 ) 

1323 

1324 def mark_for_deletion(self, dlkey: str) -> None: 

1325 """ 

1326 Adds a marker to the datastore that the file specified as *dlkey* can be deleted. 

1327 

1328 Once the mark has been set, the data store is checked four times (default: every 4 hours) 

1329 if the file is in use somewhere. If it is still in use, the mark goes away, otherwise 

1330 the mark and the file are removed from the datastore. These delayed checks are necessary 

1331 due to database inconsistency. 

1332 

1333 :param dlkey: Unique download-key of the file that shall be marked for deletion. 

1334 """ 

1335 fileObj = db.Query("viur-deleted-files").filter("dlkey", dlkey).getEntry() 

1336 

1337 if fileObj: # Its allready marked 

1338 return 

1339 

1340 fileObj = db.Entity(db.Key("viur-deleted-files")) 

1341 fileObj["itercount"] = 0 

1342 fileObj["dlkey"] = str(dlkey) 

1343 

1344 db.Put(fileObj) 

1345 

1346 

1347@PeriodicTask(interval=datetime.timedelta(hours=4)) 

1348def startCheckForUnreferencedBlobs(): 

1349 """ 

1350 Start searching for blob locks that have been recently freed 

1351 """ 

1352 doCheckForUnreferencedBlobs() 

1353 

1354 

1355@CallDeferred 

1356def doCheckForUnreferencedBlobs(cursor=None): 

1357 def getOldBlobKeysTxn(dbKey): 

1358 obj = db.Get(dbKey) 

1359 res = obj["old_blob_references"] or [] 

1360 if obj["is_stale"]: 

1361 db.Delete(dbKey) 

1362 else: 

1363 obj["has_old_blob_references"] = False 

1364 obj["old_blob_references"] = [] 

1365 db.Put(obj) 

1366 return res 

1367 

1368 query = db.Query("viur-blob-locks").filter("has_old_blob_references", True).setCursor(cursor) 

1369 for lockObj in query.run(100): 

1370 oldBlobKeys = db.RunInTransaction(getOldBlobKeysTxn, lockObj.key) 

1371 for blobKey in oldBlobKeys: 

1372 if db.Query("viur-blob-locks").filter("active_blob_references =", blobKey).getEntry(): 

1373 # This blob is referenced elsewhere 

1374 logging.info(f"Stale blob is still referenced, {blobKey}") 

1375 continue 

1376 # Add a marker and schedule it for deletion 

1377 fileObj = db.Query("viur-deleted-files").filter("dlkey", blobKey).getEntry() 

1378 if fileObj: # Its already marked 

1379 logging.info(f"Stale blob already marked for deletion, {blobKey}") 

1380 return 

1381 fileObj = db.Entity(db.Key("viur-deleted-files")) 

1382 fileObj["itercount"] = 0 

1383 fileObj["dlkey"] = str(blobKey) 

1384 logging.info(f"Stale blob marked dirty, {blobKey}") 

1385 db.Put(fileObj) 

1386 newCursor = query.getCursor() 

1387 if newCursor: 

1388 doCheckForUnreferencedBlobs(newCursor) 

1389 

1390 

1391@PeriodicTask(interval=datetime.timedelta(hours=4)) 

1392def startCleanupDeletedFiles(): 

1393 """ 

1394 Increase deletion counter on each blob currently not referenced and delete 

1395 it if that counter reaches maxIterCount 

1396 """ 

1397 doCleanupDeletedFiles() 

1398 

1399 

1400@CallDeferred 

1401def doCleanupDeletedFiles(cursor=None): 

1402 maxIterCount = 2 # How often a file will be checked for deletion 

1403 query = db.Query("viur-deleted-files") 

1404 if cursor: 

1405 query.setCursor(cursor) 

1406 for file in query.run(100): 

1407 if "dlkey" not in file: 

1408 db.Delete(file.key) 

1409 elif db.Query("viur-blob-locks").filter("active_blob_references =", file["dlkey"]).getEntry(): 

1410 logging.info(f"""is referenced, {file["dlkey"]}""") 

1411 db.Delete(file.key) 

1412 else: 

1413 if file["itercount"] > maxIterCount: 

1414 logging.info(f"""Finally deleting, {file["dlkey"]}""") 

1415 bucket = File.get_bucket(file["dlkey"]) 

1416 blobs = bucket.list_blobs(prefix=f"""{file["dlkey"]}/""") 

1417 for blob in blobs: 

1418 blob.delete() 

1419 db.Delete(file.key) 

1420 # There should be exactly 1 or 0 of these 

1421 for f in skeletonByKind("file")().all().filter("dlkey =", file["dlkey"]).fetch(99): 

1422 f.delete() 

1423 

1424 if f["serving_url"]: 

1425 bucket = File.get_bucket(f["dlkey"]) 

1426 blob_key = blobstore.create_gs_key( 

1427 f"/gs/{bucket.name}/{f['dlkey']}/source/{f['name']}" 

1428 ) 

1429 images.delete_serving_url(blob_key) # delete serving url 

1430 else: 

1431 logging.debug(f"""Increasing count, {file["dlkey"]}""") 

1432 file["itercount"] += 1 

1433 db.Put(file) 

1434 newCursor = query.getCursor() 

1435 if newCursor: 

1436 doCleanupDeletedFiles(newCursor) 

1437 

1438 

1439@PeriodicTask(interval=datetime.timedelta(hours=4)) 

1440def start_delete_pending_files(): 

1441 """ 

1442 Start deletion of pending FileSkels that are older than 7 days. 

1443 """ 

1444 DeleteEntitiesIter.startIterOnQuery( 

1445 FileLeafSkel().all() 

1446 .filter("pending =", True) 

1447 .filter("creationdate <", utils.utcNow() - datetime.timedelta(days=7)) 

1448 ) 

1449 

1450 

1451# DEPRECATED ATTRIBUTES HANDLING 

1452 

1453def __getattr__(attr: str) -> object: 

1454 if entry := { 

1455 # stuff prior viur-core < 3.7 

1456 "GOOGLE_STORAGE_BUCKET": ("File.get_bucket()", _private_bucket), 

1457 }.get(attr): 

1458 msg = f"{attr} was replaced by {entry[0]}" 

1459 warnings.warn(msg, DeprecationWarning, stacklevel=2) 

1460 logging.warning(msg, stacklevel=2) 

1461 return entry[1] 

1462 

1463 return super(__import__(__name__).__class__).__getattribute__(attr)