Coverage for /home/runner/work/viur-core/viur-core/viur/src/viur/core/modules/file.py: 0%

790 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-13 11:04 +0000

1import base64 

2import datetime 

3import hashlib 

4import hmac 

5import html 

6import io 

7import json 

8import logging 

9import re 

10import string 

11import typing as t 

12import warnings 

13from collections import namedtuple 

14from urllib.parse import quote as urlquote, urlencode 

15from urllib.request import urlopen 

16 

17import PIL 

18import PIL.ImageCms 

19import google.auth 

20import requests 

21from PIL import Image 

22from google.appengine.api import blobstore, images 

23from google.cloud import storage 

24from google.oauth2.service_account import Credentials as ServiceAccountCredentials 

25 

26from viur.core import conf, current, db, errors, utils, i18n 

27from viur.core.bones import BaseBone, BooleanBone, JsonBone, KeyBone, NumericBone, StringBone 

28 

29from viur.core.decorators import * 

30from viur.core.prototypes.tree import SkelType, Tree, TreeSkel 

31from viur.core.skeleton import SkeletonInstance, skeletonByKind 

32from viur.core.tasks import CallDeferred, DeleteEntitiesIter, PeriodicTask 

33 

34# Globals for connectivity 

35 

36VALID_FILENAME_REGEX = re.compile( 

37 # || MAY NOT BE THE NAME | MADE OF SPECIAL CHARS | SPECIAL CHARS + `. `|` 

38 r"^(?!^(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])$)[^\x00-\x1F<>:\"\/\\|?*]*[^\x00-\x1F<>:\"\/\\|?*. ]$", 

39 re.IGNORECASE 

40) 

41 

42_CREDENTIALS, _PROJECT_ID = google.auth.default() 

43GOOGLE_STORAGE_CLIENT = storage.Client(_PROJECT_ID, _CREDENTIALS) 

44 

45PRIVATE_BUCKET_NAME = f"""{_PROJECT_ID}.appspot.com""" 

46PUBLIC_BUCKET_NAME = f"""public-dot-{_PROJECT_ID}""" 

47PUBLIC_DLKEY_SUFFIX = "_pub" 

48 

49_private_bucket = GOOGLE_STORAGE_CLIENT.lookup_bucket(PRIVATE_BUCKET_NAME) 

50_public_bucket = None 

51 

52# FilePath is a descriptor for ViUR file components 

53FilePath = namedtuple("FilePath", ("dlkey", "is_derived", "filename")) 

54 

55 

56def importBlobFromViur2(dlKey, fileName): 

57 bucket = conf.main_app.file.get_bucket(dlKey) 

58 

59 if not conf.viur2import_blobsource: 

60 return False 

61 existingImport = db.get(db.Key("viur-viur2-blobimport", dlKey)) 

62 if existingImport: 

63 if existingImport["success"]: 

64 return existingImport["dlurl"] 

65 return False 

66 if conf.viur2import_blobsource["infoURL"]: 

67 try: 

68 importDataReq = urlopen(conf.viur2import_blobsource["infoURL"] + dlKey) 

69 except Exception as e: 

70 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey)) 

71 marker["success"] = False 

72 marker["error"] = "Failed URL-FETCH 1" 

73 db.put(marker) 

74 return False 

75 if importDataReq.status != 200: 

76 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey)) 

77 marker["success"] = False 

78 marker["error"] = "Failed URL-FETCH 2" 

79 db.put(marker) 

80 return False 

81 importData = json.loads(importDataReq.read()) 

82 oldBlobName = conf.viur2import_blobsource["gsdir"] + "/" + importData["key"] 

83 srcBlob = storage.Blob(bucket=bucket, 

84 name=conf.viur2import_blobsource["gsdir"] + "/" + importData["key"]) 

85 else: 

86 oldBlobName = conf.viur2import_blobsource["gsdir"] + "/" + dlKey 

87 srcBlob = storage.Blob(bucket=bucket, name=conf.viur2import_blobsource["gsdir"] + "/" + dlKey) 

88 if not srcBlob.exists(): 

89 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey)) 

90 marker["success"] = False 

91 marker["error"] = "Local SRC-Blob missing" 

92 marker["oldBlobName"] = oldBlobName 

93 db.put(marker) 

94 return False 

95 bucket.rename_blob(srcBlob, f"{dlKey}/source/{fileName}") 

96 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey)) 

97 marker["success"] = True 

98 marker["old_src_key"] = dlKey 

99 marker["old_src_name"] = fileName 

100 marker["dlurl"] = conf.main_app.file.create_download_url(dlKey, fileName, False, None) 

101 db.put(marker) 

102 return marker["dlurl"] 

103 

104 

105def thumbnailer(fileSkel, existingFiles, params): 

106 file_name = html.unescape(fileSkel["name"]) 

107 bucket = conf.main_app.file.get_bucket(fileSkel["dlkey"]) 

108 blob = bucket.get_blob(f"""{fileSkel["dlkey"]}/source/{file_name}""") 

109 if not blob: 

110 logging.warning(f"""Blob {fileSkel["dlkey"]}/source/{file_name} is missing from cloud storage!""") 

111 return 

112 fileData = io.BytesIO() 

113 blob.download_to_file(fileData) 

114 resList = [] 

115 for sizeDict in params: 

116 fileData.seek(0) 

117 outData = io.BytesIO() 

118 try: 

119 img = PIL.Image.open(fileData) 

120 except PIL.Image.UnidentifiedImageError: # Can't load this image; so there's no need to try other resolutions 

121 return [] 

122 iccProfile = img.info.get('icc_profile') 

123 if iccProfile: 

124 # JPEGs might be encoded with a non-standard color-profile; we need to compensate for this if we convert 

125 # to WEBp as we'll loose this color-profile information 

126 f = io.BytesIO(iccProfile) 

127 src_profile = PIL.ImageCms.ImageCmsProfile(f) 

128 dst_profile = PIL.ImageCms.createProfile('sRGB') 

129 try: 

130 img = PIL.ImageCms.profileToProfile( 

131 img, 

132 inputProfile=src_profile, 

133 outputProfile=dst_profile, 

134 outputMode="RGBA" if img.has_transparency_data else "RGB") 

135 except Exception as e: 

136 logging.exception(e) 

137 continue 

138 fileExtension = sizeDict.get("fileExtension", "webp") 

139 if "width" in sizeDict and "height" in sizeDict: 

140 width = sizeDict["width"] 

141 height = sizeDict["height"] 

142 targetName = f"thumbnail-{width}-{height}.{fileExtension}" 

143 elif "width" in sizeDict: 

144 width = sizeDict["width"] 

145 height = int((float(img.size[1]) * float(width / float(img.size[0])))) 

146 targetName = f"thumbnail-w{width}.{fileExtension}" 

147 else: # No default fallback - ignore 

148 continue 

149 mimeType = sizeDict.get("mimeType", "image/webp") 

150 img = img.resize((width, height), PIL.Image.LANCZOS) 

151 img.save(outData, fileExtension) 

152 outSize = outData.tell() 

153 outData.seek(0) 

154 targetBlob = bucket.blob(f"""{fileSkel["dlkey"]}/derived/{targetName}""") 

155 targetBlob.upload_from_file(outData, content_type=mimeType) 

156 resList.append((targetName, outSize, mimeType, {"mimetype": mimeType, "width": width, "height": height})) 

157 return resList 

158 

159 

160def cloudfunction_thumbnailer(fileSkel, existingFiles, params): 

161 """External Thumbnailer for images. 

162 

163 The corresponding cloudfunction can be found here . 

164 https://github.com/viur-framework/viur-cloudfunctions/tree/main/thumbnailer 

165 

166 You can use it like so: 

167 main.py: 

168 

169 .. code-block:: python 

170 

171 from viur.core.modules.file import cloudfunction_thumbnailer 

172 

173 conf.file_thumbnailer_url = "https://xxxxx.cloudfunctions.net/imagerenderer" 

174 conf.file_derivations = {"thumbnail": cloudfunction_thumbnailer} 

175 

176 conf.derives_pdf = { 

177 "thumbnail": [{"width": 1920,"sites":"1,2"}] 

178 } 

179 

180 skeletons/xxx.py: 

181 .. code-block:: python 

182 

183 test = FileBone(derive=conf.derives_pdf) 

184 """ 

185 

186 if not conf.file_thumbnailer_url: 

187 raise ValueError("conf.file_thumbnailer_url is not set") 

188 

189 bucket = conf.main_app.file.get_bucket(fileSkel["dlkey"]) 

190 

191 def getsignedurl(): 

192 if conf.instance.is_dev_server: 

193 signedUrl = conf.main_app.file.create_download_url(fileSkel["dlkey"], fileSkel["name"]) 

194 else: 

195 path = f"""{fileSkel["dlkey"]}/source/{file_name}""" 

196 if not (blob := bucket.get_blob(path)): 

197 logging.warning(f"Blob {path} is missing from cloud storage!") 

198 return None 

199 authRequest = google.auth.transport.requests.Request() 

200 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60) 

201 signing_credentials = google.auth.compute_engine.IDTokenCredentials(authRequest, "") 

202 content_disposition = utils.build_content_disposition_header(fileSkel["name"]) 

203 signedUrl = blob.generate_signed_url( 

204 expiresAt, 

205 credentials=signing_credentials, 

206 response_disposition=content_disposition, 

207 version="v4") 

208 return signedUrl 

209 

210 def make_request(): 

211 headers = {"Content-Type": "application/json"} 

212 data_str = base64.b64encode(json.dumps(dataDict).encode("UTF-8")) 

213 sig = conf.main_app.file.hmac_sign(data_str) 

214 datadump = json.dumps({"dataStr": data_str.decode('ASCII'), "sign": sig}) 

215 resp = requests.post(conf.file_thumbnailer_url, data=datadump, headers=headers, allow_redirects=False) 

216 if resp.status_code != 200: # Error Handling 

217 match resp.status_code: 

218 case 302: 

219 # The problem is Google resposen 302 to an auth Site when the cloudfunction was not found 

220 # https://cloud.google.com/functions/docs/troubleshooting#login 

221 logging.error("Cloudfunction not found") 

222 case 404: 

223 logging.error("Cloudfunction not found") 

224 case 403: 

225 logging.error("No permission for the Cloudfunction") 

226 case _: 

227 logging.error( 

228 f"cloudfunction_thumbnailer failed with code: {resp.status_code} and data: {resp.content}") 

229 return 

230 

231 try: 

232 response_data = resp.json() 

233 except Exception as e: 

234 logging.error(f"response could not be converted in json failed with: {e=}") 

235 return 

236 if "error" in response_data: 

237 logging.error(f"cloudfunction_thumbnailer failed with: {response_data.get('error')}") 

238 return 

239 

240 return response_data 

241 

242 file_name = html.unescape(fileSkel["name"]) 

243 

244 if not (url := getsignedurl()): 

245 return 

246 dataDict = { 

247 "url": url, 

248 "name": fileSkel["name"], 

249 "params": params, 

250 "minetype": fileSkel["mimetype"], 

251 "baseUrl": current.request.get().request.host_url.lower(), 

252 "targetKey": fileSkel["dlkey"], 

253 "nameOnly": True 

254 } 

255 if not (derivedData := make_request()): 

256 return 

257 

258 uploadUrls = {} 

259 for data in derivedData["values"]: 

260 fileName = conf.main_app.file.sanitize_filename(data["name"]) 

261 blob = bucket.blob(f"""{fileSkel["dlkey"]}/derived/{fileName}""") 

262 uploadUrls[fileSkel["dlkey"] + fileName] = blob.create_resumable_upload_session(timeout=60, 

263 content_type=data["mimeType"]) 

264 

265 if not (url := getsignedurl()): 

266 return 

267 

268 dataDict["url"] = url 

269 dataDict["nameOnly"] = False 

270 dataDict["uploadUrls"] = uploadUrls 

271 

272 if not (derivedData := make_request()): 

273 return 

274 reslist = [] 

275 try: 

276 for derived in derivedData["values"]: 

277 for key, value in derived.items(): 

278 reslist.append((key, value["size"], value["mimetype"], value["customData"])) 

279 

280 except Exception as e: 

281 logging.error(f"cloudfunction_thumbnailer failed with: {e=}") 

282 return reslist 

283 

284 

285class DownloadUrlBone(BaseBone): 

286 """ 

287 This bone is used to inject a freshly signed download url into a FileSkel. 

288 """ 

289 

290 def unserialize(self, skel, name): 

291 if "dlkey" in skel.dbEntity and "name" in skel.dbEntity: 

292 skel.accessedValues[name] = conf.main_app.file.create_download_url( 

293 skel["dlkey"], skel["name"], expires=conf.render_json_download_url_expiration 

294 ) 

295 return True 

296 

297 return False 

298 

299 

300class FileLeafSkel(TreeSkel): 

301 """ 

302 Default file leaf skeleton. 

303 """ 

304 kindName = "file" 

305 

306 name = StringBone( 

307 descr="Filename", 

308 caseSensitive=False, 

309 searchable=True, 

310 vfunc=lambda val: None if File.is_valid_filename(val) else "Invalid filename provided", 

311 ) 

312 

313 alt = StringBone( 

314 descr=i18n.translate( 

315 "viur.core.image.alt", 

316 defaultText="Alternative description", 

317 ), 

318 searchable=True, 

319 languages=conf.i18n.available_languages, 

320 ) 

321 

322 size = NumericBone( 

323 descr="Filesize in Bytes", 

324 readOnly=True, 

325 searchable=True, 

326 ) 

327 

328 dlkey = StringBone( 

329 descr="Download-Key", 

330 readOnly=True, 

331 ) 

332 

333 mimetype = StringBone( 

334 descr="MIME-Type", 

335 readOnly=True, 

336 ) 

337 

338 weak = BooleanBone( 

339 descr="Weak reference", 

340 readOnly=True, 

341 visible=False, 

342 ) 

343 

344 pending = BooleanBone( 

345 descr="Pending upload", 

346 readOnly=True, 

347 visible=False, 

348 defaultValue=False, 

349 ) 

350 

351 width = NumericBone( 

352 descr="Width", 

353 readOnly=True, 

354 searchable=True, 

355 ) 

356 

357 height = NumericBone( 

358 descr="Height", 

359 readOnly=True, 

360 searchable=True, 

361 ) 

362 

363 downloadUrl = DownloadUrlBone( 

364 descr="Download-URL", 

365 readOnly=True, 

366 visible=False, 

367 ) 

368 

369 derived = JsonBone( 

370 descr="Derived Files", 

371 readOnly=True, 

372 visible=False, 

373 ) 

374 

375 pendingparententry = KeyBone( 

376 descr="Pending key Reference", 

377 readOnly=True, 

378 visible=False, 

379 ) 

380 

381 crc32c_checksum = StringBone( 

382 descr="CRC32C checksum", 

383 readOnly=True, 

384 ) 

385 

386 md5_checksum = StringBone( 

387 descr="MD5 checksum", 

388 readOnly=True, 

389 ) 

390 

391 public = BooleanBone( 

392 descr="Public File", 

393 readOnly=True, 

394 defaultValue=False, 

395 ) 

396 

397 serving_url = StringBone( 

398 descr="Serving-URL", 

399 readOnly=True, 

400 params={ 

401 "tooltip": "The 'serving_url' is only available in public file repositories.", 

402 } 

403 ) 

404 

405 @classmethod 

406 def _inject_serving_url(cls, skel: SkeletonInstance) -> None: 

407 """Inject the serving url for public image files into a FileSkel""" 

408 if ( 

409 skel["public"] 

410 and skel["mimetype"] 

411 and skel["mimetype"].startswith("image/") 

412 and not skel["serving_url"] 

413 ): 

414 bucket = File.get_bucket(skel["dlkey"]) 

415 filename = f"/gs/{bucket.name}/{skel['dlkey']}/source/{skel['name']}" 

416 

417 # Trying this on local development server will raise a 

418 # `google.appengine.runtime.apiproxy_errors.RPCFailedError` 

419 if conf.instance.is_dev_server: 

420 logging.warning(f"Can't inject serving_url for {filename!r} on local development server") 

421 return 

422 

423 try: 

424 skel["serving_url"] = images.get_serving_url(None, secure_url=True, filename=filename) 

425 

426 except Exception as e: 

427 logging.warning(f"Failed to create serving_url for {filename!r} with exception {e!r}") 

428 logging.exception(e) 

429 

430 def preProcessBlobLocks(self, locks): 

431 """ 

432 Ensure that our dlkey is locked even if we don't have a filebone here 

433 """ 

434 if not self["weak"] and self["dlkey"]: 

435 locks.add(self["dlkey"]) 

436 return locks 

437 

438 @classmethod 

439 def refresh(cls, skel): 

440 super().refresh(skel) 

441 if conf.viur2import_blobsource: 

442 importData = importBlobFromViur2(skel["dlkey"], skel["name"]) 

443 if importData: 

444 if not skel["downloadUrl"]: 

445 skel["downloadUrl"] = importData 

446 skel["pendingparententry"] = None 

447 

448 cls._inject_serving_url(skel) 

449 

450 @classmethod 

451 def write(cls, skel, **kwargs): 

452 cls._inject_serving_url(skel) 

453 return super().write(skel, **kwargs) 

454 

455 

456class FileNodeSkel(TreeSkel): 

457 """ 

458 Default file node skeleton. 

459 """ 

460 kindName = "file_rootNode" # FIXME: VIUR4, don't use "_rootNode" kindname 

461 

462 name = StringBone( 

463 descr="Name", 

464 required=True, 

465 searchable=True 

466 ) 

467 

468 rootNode = BooleanBone( 

469 descr="Is RootNode", 

470 defaultValue=False, 

471 readOnly=True, 

472 visible=False, 

473 ) 

474 

475 public = BooleanBone( 

476 descr="Is public?", 

477 defaultValue=False, 

478 readOnly=True, 

479 visible=False, 

480 ) 

481 

482 viurCurrentSeoKeys = None 

483 

484 

485class File(Tree): 

486 PENDING_POSTFIX = " (pending)" 

487 DOWNLOAD_URL_PREFIX = "/file/download/" 

488 INTERNAL_SERVING_URL_PREFIX = "/file/serve/" 

489 MAX_FILENAME_LEN = 256 

490 IMAGE_META_MAX_SIZE: t.Final[int] = 10 * 1024 ** 2 

491 """Maximum size of image files that should be analysed in :meth:`set_image_meta`. 

492 Default: 10 MiB""" 

493 

494 leafSkelCls = FileLeafSkel 

495 nodeSkelCls = FileNodeSkel 

496 

497 handler = "tree.simple.file" 

498 adminInfo = { 

499 "icon": "folder-fill", 

500 "handler": handler, # fixme: Use static handler; Remove with VIUR4! 

501 } 

502 

503 roles = { 

504 "*": "view", 

505 "editor": ("add", "edit"), 

506 "admin": "*", 

507 } 

508 

509 default_order = "name" 

510 

511 # Helper functions currently resist here 

512 

513 @staticmethod 

514 def get_bucket(dlkey: str) -> google.cloud.storage.bucket.Bucket: 

515 """ 

516 Retrieves a Google Cloud Storage bucket for the given dlkey. 

517 """ 

518 global _public_bucket 

519 if dlkey and dlkey.endswith(PUBLIC_DLKEY_SUFFIX): 

520 if _public_bucket or (_public_bucket := GOOGLE_STORAGE_CLIENT.lookup_bucket(PUBLIC_BUCKET_NAME)): 

521 return _public_bucket 

522 

523 raise ValueError( 

524 f"""The bucket '{PUBLIC_BUCKET_NAME}' does not exist! Please create it with ACL access.""" 

525 ) 

526 

527 return _private_bucket 

528 

529 @classmethod 

530 def is_valid_filename(cls, filename: str) -> bool: 

531 """ 

532 Verifies a valid filename. 

533 

534 The filename should be valid on Linux, Mac OS and Windows. 

535 It should not be longer than MAX_FILENAME_LEN chars. 

536 

537 Rule set: https://stackoverflow.com/a/31976060/3749896 

538 Regex test: https://regex101.com/r/iBYpoC/1 

539 """ 

540 if not filename.strip(): 

541 return False 

542 

543 if len(filename) > cls.MAX_FILENAME_LEN: 

544 return False 

545 

546 return bool(re.match(VALID_FILENAME_REGEX, filename)) 

547 

548 @staticmethod 

549 def hmac_sign(data: t.Any) -> str: 

550 assert conf.file_hmac_key is not None, "No hmac-key set!" 

551 if not isinstance(data, bytes): 

552 data = str(data).encode("UTF-8") 

553 return hmac.new(conf.file_hmac_key, msg=data, digestmod=hashlib.sha3_384).hexdigest() 

554 

555 @classmethod 

556 def hmac_verify(cls, data: t.Any, signature: str) -> bool: 

557 return hmac.compare_digest(cls.hmac_sign(data.encode("ASCII")), signature) 

558 

559 @classmethod 

560 def create_internal_serving_url( 

561 cls, 

562 serving_url: str, 

563 size: int = 0, 

564 filename: str = "", 

565 options: str = "", 

566 download: bool = False 

567 ) -> str: 

568 """ 

569 Helper function to generate an internal serving url (endpoint: /file/serve) from a Google serving url. 

570 

571 This is needed to hide requests to Google as they are internally be routed, and can be the result of a 

572 legal requirement like GDPR. 

573 

574 :param serving_url: Is the original serving URL as generated from FileLeafSkel._inject_serving_url() 

575 :param size: Optional size setting 

576 :param filename: Optonal filename setting 

577 :param options: Additional options parameter-pass through to /file/serve 

578 :param download: Download parameter-pass through to /file/serve 

579 """ 

580 

581 # Split a serving URL into its components, used by serve function. 

582 res = re.match( 

583 r"^https:\/\/(.*?)\.googleusercontent\.com\/(.*?)$", 

584 serving_url 

585 ) 

586 

587 if not res: 

588 raise ValueError(f"Invalid {serving_url=!r} provided") 

589 

590 # Create internal serving URL 

591 serving_url = cls.INTERNAL_SERVING_URL_PREFIX + "/".join(res.groups()) 

592 

593 # Append additional parameters 

594 if params := { 

595 k: v for k, v in { 

596 "download": download, 

597 "filename": filename, 

598 "options": options, 

599 "size": size, 

600 }.items() if v 

601 }: 

602 serving_url += f"?{urlencode(params)}" 

603 

604 return serving_url 

605 

606 @classmethod 

607 def create_download_url( 

608 cls, 

609 dlkey: str, 

610 filename: str, 

611 derived: bool = False, 

612 expires: t.Optional[datetime.timedelta | int] = datetime.timedelta(hours=1), 

613 download_filename: t.Optional[str] = None 

614 ) -> str: 

615 """ 

616 Utility function that creates a signed download-url for the given folder/filename combination 

617 

618 :param folder: The GCS-Folder (= the download-key) for that file 

619 :param filename: The name of the file. Either the original filename or the name of a derived file. 

620 :param derived: True, if it points to a derived file, False if it points to the original uploaded file 

621 :param expires: 

622 None if the file is supposed to be public (which causes it to be cached on the google ede caches), 

623 otherwise a datetime.timedelta of how long that link should be valid 

624 :param download_filename: If set, browser is enforced to download this blob with the given alternate 

625 filename 

626 :return: The signed download-url relative to the current domain (eg /download/...) 

627 """ 

628 if isinstance(expires, int): 

629 expires = datetime.timedelta(minutes=expires) 

630 

631 # Undo escaping on ()= performed on fileNames 

632 filename = filename.replace("&#040;", "(").replace("&#041;", ")").replace("&#061;", "=") 

633 filepath = f"""{dlkey}/{"derived" if derived else "source"}/{filename}""" 

634 

635 if download_filename: 

636 if not cls.is_valid_filename(download_filename): 

637 raise errors.UnprocessableEntity(f"Invalid download_filename {download_filename!r} provided") 

638 

639 download_filename = urlquote(download_filename) 

640 

641 expires = (datetime.datetime.now() + expires).strftime("%Y%m%d%H%M") if expires else 0 

642 

643 data = base64.urlsafe_b64encode(f"""{filepath}\0{expires}\0{download_filename or ""}""".encode("UTF-8")) 

644 sig = cls.hmac_sign(data) 

645 

646 return f"""{cls.DOWNLOAD_URL_PREFIX}{data.decode("ASCII")}?sig={sig}""" 

647 

648 @classmethod 

649 def parse_download_url(cls, url) -> t.Optional[FilePath]: 

650 """ 

651 Parses a file download URL in the format `/file/download/xxxx?sig=yyyy` into its FilePath. 

652 

653 If the URL cannot be parsed, the function returns None. 

654 

655 :param url: The file download URL to be parsed. 

656 :return: A FilePath on success, None otherwise. 

657 """ 

658 if not url.startswith(cls.DOWNLOAD_URL_PREFIX) or "?" not in url: 

659 return None 

660 

661 data, sig = url.removeprefix(cls.DOWNLOAD_URL_PREFIX).split("?", 1) # Strip "/file/download/" and split on "?" 

662 sig = sig.removeprefix("sig=") 

663 

664 if not cls.hmac_verify(data, sig): 

665 # Invalid signature 

666 return None 

667 

668 # Split the blobKey into the individual fields it should contain 

669 data = base64.urlsafe_b64decode(data).decode("UTF-8") 

670 

671 match data.count("\0"): 

672 case 2: 

673 dlpath, valid_until, _ = data.split("\0") 

674 case 1: 

675 # It's the old format, without an downloadFileName 

676 dlpath, valid_until = data.split("\0") 

677 case _: 

678 # Invalid path 

679 return None 

680 

681 if valid_until != "0" and datetime.strptime(valid_until, "%Y%m%d%H%M") < datetime.now(): 

682 # Signature expired 

683 return None 

684 

685 if dlpath.count("/") != 2: 

686 # Invalid path 

687 return None 

688 

689 dlkey, derived, filename = dlpath.split("/") 

690 return FilePath(dlkey, derived != "source", filename) 

691 

692 @classmethod 

693 def create_src_set( 

694 cls, 

695 file: t.Union["SkeletonInstance", dict, str], 

696 expires: t.Optional[datetime.timedelta | int] = datetime.timedelta(hours=1), 

697 width: t.Optional[int] = None, 

698 height: t.Optional[int] = None, 

699 language: t.Optional[str] = None, 

700 ) -> str: 

701 """ 

702 Generates a string suitable for use as the srcset tag in html. This functionality provides the browser 

703 with a list of images in different sizes and allows it to choose the smallest file that will fill it's 

704 viewport without upscaling. 

705 

706 :param file: The file skeleton (or if multiple=True a single value from it) to generate the srcset. 

707 :param expires: 

708 None if the file is supposed to be public (which causes it to be cached on the google edecaches), 

709 otherwise it's lifetime in seconds 

710 :param width: 

711 A list of widths that should be included in the srcset. 

712 If a given width is not available, it will be skipped. 

713 :param height: A list of heights that should be included in the srcset. If a given height is not available, 

714 it will be skipped. 

715 :param language: Language overwrite if file has multiple languages, and we want to explicitly specify one 

716 :return: The srctag generated or an empty string if a invalid file object was supplied 

717 """ 

718 if not width and not height: 

719 logging.error("Neither width or height supplied") 

720 return "" 

721 

722 if isinstance(file, str): 

723 file = db.Query("file").filter("dlkey =", file).order(("creationdate", db.SortOrder.Ascending)).getEntry() 

724 

725 if not file: 

726 return "" 

727 

728 if isinstance(file, i18n.LanguageWrapper): 

729 language = language or current.language.get() 

730 if not language or not (file := cls.get(language)): 

731 return "" 

732 

733 if "dlkey" not in file and "dest" in file: 

734 file = file["dest"] 

735 

736 from viur.core.skeleton import SkeletonInstance # avoid circular imports 

737 

738 if not ( 

739 isinstance(file, (SkeletonInstance, dict)) 

740 and "dlkey" in file 

741 and "derived" in file 

742 ): 

743 logging.error("Invalid file supplied") 

744 return "" 

745 

746 if not isinstance(file["derived"], dict): 

747 logging.error("No derives available") 

748 return "" 

749 

750 src_set = [] 

751 for filename, derivate in file["derived"]["files"].items(): 

752 customData = derivate.get("customData", {}) 

753 

754 if width and customData.get("width") in width: 

755 src_set.append( 

756 f"""{cls.create_download_url(file["dlkey"], filename, True, expires)} {customData["width"]}w""" 

757 ) 

758 

759 if height and customData.get("height") in height: 

760 src_set.append( 

761 f"""{cls.create_download_url(file["dlkey"], filename, True, expires)} {customData["height"]}h""" 

762 ) 

763 

764 return ", ".join(src_set) 

765 

766 def write( 

767 self, 

768 filename: str, 

769 content: t.Any, 

770 mimetype: str = "text/plain", 

771 *, 

772 width: int = None, 

773 height: int = None, 

774 public: bool = False, 

775 rootnode: t.Optional[db.Key] = None, 

776 folder: t.Iterable[str] | str = (), 

777 ) -> db.Key: 

778 """ 

779 Write a file from any bytes-like object into the file module. 

780 

781 If *folder* and *rootnode* are both set, the file is added to the repository in that folder. 

782 If only *folder* is set, the file is added to the default repository in that folder. 

783 If only *rootnode* is set, the file is added to that repository in the root folder. 

784 

785 If both are not set, the file is added without a path or repository as a weak file. 

786 It will not be visible in admin in this case. 

787 

788 :param filename: Filename to be written. 

789 :param content: The file content to be written, as bytes-like object. 

790 :param mimetype: The file's mimetype. 

791 :param width: Optional width information for the file. 

792 :param height: Optional height information for the file. 

793 :param public: True if the file should be publicly accessible. 

794 :param rootnode: Optional root-node of the repository to add the file to 

795 :param folder: Optional folder the file should be written into. 

796 

797 :return: Returns the key of the file object written. This can be associated e.g. with a FileBone. 

798 """ 

799 # logging.info(f"{filename=} {mimetype=} {width=} {height=} {public=}") 

800 if not self.is_valid_filename(filename): 

801 raise ValueError(f"{filename=} is invalid") 

802 

803 # Folder mode? 

804 if folder: 

805 # Validate correct folder naming 

806 if isinstance(folder, str): 

807 folder = folder, # make it a tuple 

808 

809 for foldername in folder: 

810 if not self.is_valid_filename(foldername): 

811 raise ValueError(f"{foldername=} is invalid") 

812 

813 # When in folder-mode, a rootnode must exist! 

814 if rootnode is None: 

815 rootnode = self.ensureOwnModuleRootNode() 

816 

817 parentrepokey = rootnode.key 

818 parentfolderkey = rootnode.key 

819 

820 for foldername in folder: 

821 query = self.addSkel("node").all() 

822 query.filter("parentrepo", parentrepokey) 

823 query.filter("parententry", parentfolderkey) 

824 query.filter("name", foldername) 

825 

826 if folder_skel := query.getSkel(): 

827 # Skip existing folder 

828 parentfolderkey = folder_skel["key"] 

829 else: 

830 # Create new folder 

831 folder_skel = self.addSkel("node") 

832 

833 folder_skel["name"] = foldername 

834 folder_skel["parentrepo"] = parentrepokey 

835 folder_skel["parententry"] = parentfolderkey 

836 folder_skel.write() 

837 

838 parentfolderkey = folder_skel["key"] 

839 

840 else: 

841 parentrepokey = None 

842 parentfolderkey = None 

843 

844 # Write the file 

845 dl_key = utils.string.random() 

846 

847 if public: 

848 dl_key += PUBLIC_DLKEY_SUFFIX # mark file as public 

849 

850 bucket = self.get_bucket(dl_key) 

851 

852 blob = bucket.blob(f"{dl_key}/source/{filename}") 

853 blob.upload_from_file(io.BytesIO(content), content_type=mimetype) 

854 

855 fileskel = self.addSkel("leaf") 

856 

857 fileskel["parentrepo"] = parentrepokey 

858 fileskel["parententry"] = parentfolderkey 

859 fileskel["name"] = filename 

860 fileskel["size"] = blob.size 

861 fileskel["mimetype"] = mimetype 

862 fileskel["dlkey"] = dl_key 

863 fileskel["weak"] = bool(parentrepokey) 

864 fileskel["public"] = public 

865 fileskel["width"] = width 

866 fileskel["height"] = height 

867 fileskel["crc32c_checksum"] = base64.b64decode(blob.crc32c).hex() 

868 fileskel["md5_checksum"] = base64.b64decode(blob.md5_hash).hex() 

869 fileskel["pending"] = False 

870 

871 return fileskel.write()["key"] 

872 

873 def read( 

874 self, 

875 key: db.Key | int | str | None = None, 

876 path: str | None = None, 

877 ) -> tuple[io.BytesIO, str]: 

878 """ 

879 Read a file from the Cloud Storage. 

880 

881 If a key and a path are provided, the key is preferred. 

882 This means that the entry in the db is searched first and if this is not found, the path is used. 

883 

884 :param key: Key of the LeafSkel that contains the "dlkey" and the "name". 

885 :param path: The path of the file in the Cloud Storage Bucket. 

886 

887 :return: Returns the file as a io.BytesIO buffer and the content-type 

888 """ 

889 if not key and not path: 

890 raise ValueError("Please provide a key or a path") 

891 

892 if key: 

893 skel = self.viewSkel("leaf") 

894 if not skel.read(db.key_helper(key, skel.kindName)): 

895 if not path: 

896 raise ValueError("This skeleton is not in the database!") 

897 else: 

898 path = f"""{skel["dlkey"]}/source/{skel["name"]}""" 

899 

900 bucket = self.get_bucket(skel["dlkey"]) 

901 else: 

902 bucket = self.get_bucket(path.split("/", 1)[0]) # path's first part is dlkey plus eventual postfix 

903 

904 blob = bucket.blob(path) 

905 return io.BytesIO(blob.download_as_bytes()), blob.content_type 

906 

907 @CallDeferred 

908 def deleteRecursive(self, parentKey): 

909 files = db.Query(self.leafSkelCls().kindName).filter("parentdir =", parentKey).iter() 

910 for fileEntry in files: 

911 self.mark_for_deletion(fileEntry["dlkey"]) 

912 skel = self.leafSkelCls() 

913 

914 if skel.read(str(fileEntry.key())): 

915 skel.delete() 

916 dirs = db.Query(self.nodeSkelCls().kindName).filter("parentdir", parentKey).iter() 

917 for d in dirs: 

918 self.deleteRecursive(d.key) 

919 skel = self.nodeSkelCls() 

920 if skel.read(d.key): 

921 skel.delete() 

922 

923 @exposed 

924 @skey 

925 def getUploadURL( 

926 self, 

927 fileName: str, 

928 mimeType: str, 

929 size: t.Optional[int] = None, 

930 node: t.Optional[str | db.Key] = None, 

931 authData: t.Optional[str] = None, 

932 authSig: t.Optional[str] = None, 

933 public: bool = False, 

934 ): 

935 filename = fileName.strip() # VIUR4 FIXME: just for compatiblity of the parameter names 

936 

937 if not self.is_valid_filename(filename): 

938 raise errors.UnprocessableEntity(f"Invalid filename {filename!r} provided") 

939 

940 # Validate the mimetype from the client seems legit 

941 mimetype = mimeType.strip().lower() 

942 if not ( 

943 mimetype 

944 and mimetype.count("/") == 1 

945 and all(ch in string.ascii_letters + string.digits + "/-.+" for ch in mimetype) 

946 ): 

947 raise errors.UnprocessableEntity(f"Invalid mime-type {mimetype!r} provided") 

948 

949 # Validate authentication data 

950 if authData and authSig: 

951 # First, validate the signature, otherwise we don't need to proceed further 

952 if not self.hmac_verify(authData, authSig): 

953 raise errors.Unauthorized() 

954 

955 authData = json.loads(base64.b64decode(authData.encode("ASCII")).decode("UTF-8")) 

956 

957 if datetime.datetime.strptime(authData["validUntil"], "%Y%m%d%H%M") < datetime.datetime.now(): 

958 raise errors.Gone("The upload URL has expired") 

959 

960 if authData["validMimeTypes"]: 

961 for validMimeType in authData["validMimeTypes"]: 

962 if ( 

963 validMimeType == mimetype 

964 or (validMimeType.endswith("*") and mimetype.startswith(validMimeType[:-1])) 

965 ): 

966 break 

967 else: 

968 raise errors.UnprocessableEntity(f"Invalid mime-type {mimetype} provided") 

969 

970 node = authData["node"] 

971 maxSize = authData["maxSize"] 

972 

973 else: 

974 rootNode = None 

975 if node and not (rootNode := self.getRootNode(node)): 

976 raise errors.NotFound(f"No valid root node found for {node=}") 

977 

978 if not self.canAdd("leaf", rootNode): 

979 raise errors.Forbidden() 

980 

981 if rootNode and public != bool(rootNode.get("public")): 

982 raise errors.Forbidden("Cannot upload a public file into private repository or vice versa") 

983 

984 maxSize = None # The user has some file/add permissions, don't restrict fileSize 

985 

986 if maxSize: 

987 if size > maxSize: 

988 raise errors.UnprocessableEntity(f"Size {size} exceeds maximum size {maxSize}") 

989 else: 

990 size = None 

991 

992 # Create upload-URL and download key 

993 dlkey = utils.string.random() # let's roll a random key 

994 

995 if public: 

996 dlkey += PUBLIC_DLKEY_SUFFIX # mark file as public 

997 

998 blob = self.get_bucket(dlkey).blob(f"{dlkey}/source/{filename}") 

999 upload_url = blob.create_resumable_upload_session(content_type=mimeType, size=size, timeout=60) 

1000 

1001 # Create a corresponding file-lock object early, otherwise we would have to ensure that the file-lock object 

1002 # the user creates matches the file he had uploaded 

1003 file_skel = self.addSkel("leaf") 

1004 

1005 file_skel["name"] = filename + self.PENDING_POSTFIX 

1006 file_skel["size"] = 0 

1007 file_skel["mimetype"] = "application/octetstream" 

1008 file_skel["dlkey"] = dlkey 

1009 file_skel["parentdir"] = None 

1010 file_skel["pendingparententry"] = db.key_helper(node, self.addSkel("node").kindName) if node else None 

1011 file_skel["pending"] = True 

1012 file_skel["weak"] = True 

1013 file_skel["public"] = public 

1014 file_skel["width"] = 0 

1015 file_skel["height"] = 0 

1016 

1017 file_skel.write() 

1018 key = str(file_skel["key"]) 

1019 

1020 # Mark that entry dirty as we might never receive an add 

1021 self.mark_for_deletion(dlkey) 

1022 

1023 # In this case, we'd have to store the key in the users session so he can call add() later on 

1024 if authData and authSig: 

1025 session = current.session.get() 

1026 

1027 if "pendingFileUploadKeys" not in session: 

1028 session["pendingFileUploadKeys"] = [] 

1029 

1030 session["pendingFileUploadKeys"].append(key) 

1031 

1032 # Clamp to the latest 50 pending uploads 

1033 session["pendingFileUploadKeys"] = session["pendingFileUploadKeys"][-50:] 

1034 session.markChanged() 

1035 

1036 return self.render.view({ 

1037 "uploadKey": key, 

1038 "uploadUrl": upload_url, 

1039 }) 

1040 

1041 @exposed 

1042 def download(self, blobKey: str, fileName: str = "", download: bool = False, sig: str = "", *args, **kwargs): 

1043 """ 

1044 Download a file. 

1045 :param blobKey: The unique blob key of the file. 

1046 :param fileName: Optional filename to provide in the header. 

1047 :param download: Set header to attachment retrival, set explictly to "1" if download is wanted. 

1048 """ 

1049 if filename := fileName.strip(): 

1050 if not self.is_valid_filename(filename): 

1051 raise errors.UnprocessableEntity(f"The provided filename {filename!r} is invalid!") 

1052 

1053 try: 

1054 values = base64.urlsafe_b64decode(blobKey).decode("UTF-8").split("\0") 

1055 except ValueError: 

1056 raise errors.BadRequest(f"Invalid encoding of blob key {blobKey!r}!") 

1057 try: 

1058 dlPath, validUntil, *download_filename = values 

1059 # Maybe it's the old format, without a download_filename 

1060 download_filename = download_filename[0] if download_filename else "" 

1061 except ValueError: 

1062 logging.error(f"Encoding of {blobKey=!r} OK. {values=} invalid.") 

1063 raise errors.BadRequest(f"The blob key {blobKey!r} has an invalid amount of encoded values!") 

1064 

1065 bucket = self.get_bucket(dlPath.split("/", 1)[0]) 

1066 

1067 if not sig: 

1068 # Check if the current user has the right to download *any* blob present in this application. 

1069 # blobKey is then the path inside cloudstore - not a base64 encoded tuple 

1070 if not (usr := current.user.get()): 

1071 raise errors.Unauthorized() 

1072 if "root" not in usr["access"] and "file-view" not in usr["access"]: 

1073 raise errors.Forbidden() 

1074 validUntil = "-1" # Prevent this from being cached down below 

1075 blob = bucket.get_blob(blobKey) 

1076 

1077 else: 

1078 # We got an request including a signature (probably a guest or a user without file-view access) 

1079 # First, validate the signature, otherwise we don't need to proceed any further 

1080 if not self.hmac_verify(blobKey, sig): 

1081 raise errors.Forbidden() 

1082 

1083 if validUntil != "0" and datetime.datetime.strptime(validUntil, "%Y%m%d%H%M") < datetime.datetime.now(): 

1084 blob = None 

1085 else: 

1086 blob = bucket.get_blob(dlPath) 

1087 

1088 if not blob: 

1089 raise errors.Gone("The requested blob has expired.") 

1090 

1091 if not filename: 

1092 filename = download_filename or urlquote(blob.name.rsplit("/", 1)[-1]) 

1093 

1094 content_disposition = utils.build_content_disposition_header(filename, attachment=download) 

1095 

1096 if isinstance(_CREDENTIALS, ServiceAccountCredentials): 

1097 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60) 

1098 signedUrl = blob.generate_signed_url(expiresAt, response_disposition=content_disposition, version="v4") 

1099 raise errors.Redirect(signedUrl) 

1100 

1101 elif conf.instance.is_dev_server: # No Service-Account to sign with - Serve everything directly 

1102 response = current.request.get().response 

1103 response.headers["Content-Type"] = blob.content_type 

1104 if content_disposition: 

1105 response.headers["Content-Disposition"] = content_disposition 

1106 return blob.download_as_bytes() 

1107 

1108 if validUntil == "0" or blobKey.endswith(PUBLIC_DLKEY_SUFFIX): # Its an indefinitely valid URL 

1109 if blob.size < 5 * 1024 * 1024: # Less than 5 MB - Serve directly and push it into the ede caches 

1110 response = current.request.get().response 

1111 response.headers["Content-Type"] = blob.content_type 

1112 response.headers["Cache-Control"] = "public, max-age=604800" # 7 Days 

1113 if content_disposition: 

1114 response.headers["Content-Disposition"] = content_disposition 

1115 return blob.download_as_bytes() 

1116 

1117 # Default fallback - create a signed URL and redirect 

1118 authRequest = google.auth.transport.requests.Request() 

1119 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60) 

1120 signing_credentials = google.auth.compute_engine.IDTokenCredentials(authRequest, "") 

1121 signedUrl = blob.generate_signed_url( 

1122 expiresAt, 

1123 credentials=signing_credentials, 

1124 response_disposition=content_disposition, 

1125 version="v4") 

1126 

1127 raise errors.Redirect(signedUrl) 

1128 

1129 SERVE_VALID_OPTIONS = { 

1130 "c", 

1131 "p", 

1132 "fv", 

1133 "fh", 

1134 "r90", 

1135 "r180", 

1136 "r270", 

1137 "nu", 

1138 } 

1139 """ 

1140 Valid modification option shorts for the serve-function. 

1141 This is passed-through to the Google UserContent API, and hast to be supported there. 

1142 """ 

1143 

1144 SERVE_VALID_FORMATS = { 

1145 "jpg": "rj", 

1146 "jpeg": "rj", 

1147 "png": "rp", 

1148 "webp": "rw", 

1149 } 

1150 """ 

1151 Valid file-formats to the serve-function. 

1152 This is passed-through to the Google UserContent API, and hast to be supported there. 

1153 """ 

1154 

1155 @exposed 

1156 def serve( 

1157 self, 

1158 host: str, 

1159 key: str, 

1160 size: t.Optional[int] = None, 

1161 filename: t.Optional[str] = None, 

1162 options: str = "", 

1163 download: bool = False, 

1164 ): 

1165 """ 

1166 Requests an image using the serving url to bypass direct Google requests. 

1167 

1168 :param host: the google host prefix i.e. lh3 

1169 :param key: the serving url key 

1170 :param size: the target image size 

1171 :param filename: a random string with an extention, valid extentions are (defined in File.SERVE_VALID_FORMATS). 

1172 :param options: - seperated options (defined in File.SERVE_VALID_OPTIONS). 

1173 c - crop 

1174 p - face crop 

1175 fv - vertrical flip 

1176 fh - horizontal flip 

1177 rXXX - rotate 90, 180, 270 

1178 nu - no upscale 

1179 :param download: Serves the content as download (Content-Disposition) or not. 

1180 

1181 :return: Returns the requested content on success, raises a proper HTTP exception otherwise. 

1182 """ 

1183 

1184 if any(c not in conf.search_valid_chars for c in host): 

1185 raise errors.BadRequest("key contains invalid characters") 

1186 

1187 # extract format from filename 

1188 file_fmt = "webp" 

1189 

1190 if filename: 

1191 fmt = filename.rsplit(".", 1)[-1].lower() 

1192 if fmt in self.SERVE_VALID_FORMATS: 

1193 file_fmt = fmt 

1194 else: 

1195 raise errors.UnprocessableEntity(f"Unsupported filetype {fmt}") 

1196 

1197 url = f"https://{host}.googleusercontent.com/{key}" 

1198 

1199 if options and not all(param in self.SERVE_VALID_OPTIONS for param in options.split("-")): 

1200 raise errors.BadRequest("Invalid options provided") 

1201 

1202 options += f"-{self.SERVE_VALID_FORMATS[file_fmt]}" 

1203 

1204 if size: 

1205 options = f"s{size}-" + options 

1206 

1207 url += "=" + options 

1208 

1209 response = current.request.get().response 

1210 response.headers["Content-Type"] = f"image/{file_fmt}" 

1211 response.headers["Cache-Control"] = "public, max-age=604800" # 7 Days 

1212 response.headers["Content-Disposition"] = utils.build_content_disposition_header(filename, attachment=download) 

1213 

1214 answ = requests.get(url, timeout=20) 

1215 if not answ.ok: 

1216 logging.error(f"{answ.status_code} {answ.text}") 

1217 raise errors.BadRequest("Unable to fetch a file with these parameters") 

1218 

1219 return answ.content 

1220 

1221 @exposed 

1222 @force_ssl 

1223 @force_post 

1224 @skey(allow_empty=True) 

1225 def add(self, skelType: SkelType, node: db.Key | int | str | None = None, *args, **kwargs): 

1226 # We can't add files directly (they need to be uploaded 

1227 if skelType == "leaf": # We need to handle leafs separately here 

1228 targetKey = kwargs.get("key") 

1229 skel = self.addSkel("leaf") 

1230 

1231 if not skel.read(targetKey): 

1232 raise errors.NotFound() 

1233 

1234 if not skel["pending"]: 

1235 raise errors.PreconditionFailed() 

1236 

1237 skel["pending"] = False 

1238 skel["parententry"] = skel["pendingparententry"] 

1239 

1240 if skel["parententry"]: 

1241 rootNode = self.getRootNode(skel["parententry"]) 

1242 else: 

1243 rootNode = None 

1244 

1245 if not self.canAdd("leaf", rootNode): 

1246 # Check for a marker in this session (created if using a signed upload URL) 

1247 session = current.session.get() 

1248 if targetKey not in (session.get("pendingFileUploadKeys") or []): 

1249 raise errors.Forbidden() 

1250 session["pendingFileUploadKeys"].remove(targetKey) 

1251 session.markChanged() 

1252 

1253 # Now read the blob from the dlkey folder 

1254 bucket = self.get_bucket(skel["dlkey"]) 

1255 

1256 blobs = list(bucket.list_blobs(prefix=f"""{skel["dlkey"]}/""")) 

1257 if len(blobs) != 1: 

1258 logging.error("Invalid number of blobs in folder") 

1259 logging.error(targetKey) 

1260 raise errors.PreconditionFailed() 

1261 

1262 # only one item is allowed here! 

1263 blob = blobs[0] 

1264 

1265 # update the corresponding file skeleton 

1266 skel["name"] = skel["name"].removesuffix(self.PENDING_POSTFIX) 

1267 skel["mimetype"] = utils.string.escape(blob.content_type) 

1268 skel["size"] = blob.size 

1269 skel["parentrepo"] = rootNode["key"] if rootNode else None 

1270 skel["weak"] = rootNode is None 

1271 skel["crc32c_checksum"] = base64.b64decode(blob.crc32c).hex() 

1272 skel["md5_checksum"] = base64.b64decode(blob.md5_hash).hex() 

1273 self.onAdd("leaf", skel) 

1274 skel.write() 

1275 self.onAdded("leaf", skel) 

1276 

1277 # Add updated download-URL as the auto-generated isn't valid yet 

1278 skel["downloadUrl"] = self.create_download_url(skel["dlkey"], skel["name"]) 

1279 

1280 return self.render.addSuccess(skel) 

1281 

1282 return super().add(skelType, node, *args, **kwargs) 

1283 

1284 @exposed 

1285 def get_download_url( 

1286 self, 

1287 key: t.Optional[db.Key] = None, 

1288 dlkey: t.Optional[str] = None, 

1289 filename: t.Optional[str] = None, 

1290 derived: bool = False, 

1291 ): 

1292 """ 

1293 Request a download url for a given file 

1294 :param key: The key of the file 

1295 :param dlkey: The download key of the file 

1296 :param filename: The filename to be given. If no filename is provided 

1297 downloadUrls for all derived files are returned in case of `derived=True`. 

1298 :param derived: True, if a derived file download URL is being requested. 

1299 """ 

1300 skel = self.viewSkel("leaf") 

1301 if dlkey is not None: 

1302 skel = skel.all().filter("dlkey", dlkey).getSkel() 

1303 elif key is None and dlkey is None: 

1304 raise errors.BadRequest("No key or dlkey provided") 

1305 

1306 if not (skel and skel.read(key)): 

1307 raise errors.NotFound() 

1308 

1309 if not self.canView("leaf", skel): 

1310 raise errors.Unauthorized() 

1311 

1312 dlkey = skel["dlkey"] 

1313 

1314 if derived and filename is None: 

1315 res = {} 

1316 for filename in skel["derived"]["files"]: 

1317 res[filename] = self.create_download_url(dlkey, filename, derived) 

1318 else: 

1319 if derived: 

1320 # Check if Filename exist in the Derives. We sign nothing that not exist. 

1321 if filename not in skel["derived"]["files"]: 

1322 raise errors.NotFound("File not in derives") 

1323 else: 

1324 if filename is None: 

1325 filename = skel["name"] 

1326 elif filename != skel["name"]: 

1327 raise errors.NotFound("Filename not match") 

1328 

1329 res = self.create_download_url(dlkey, filename, derived) 

1330 

1331 return self.render.view(res) 

1332 

1333 def onEdit(self, skelType: SkelType, skel: SkeletonInstance): 

1334 super().onEdit(skelType, skel) 

1335 

1336 if skelType == "leaf": 

1337 old_skel = self.editSkel(skelType) 

1338 old_skel.setEntity(skel.dbEntity) 

1339 

1340 if old_skel["name"] == skel["name"]: # name not changed we can return 

1341 return 

1342 

1343 # Move Blob to new name 

1344 # https://cloud.google.com/storage/docs/copying-renaming-moving-objects 

1345 old_path = f"""{skel["dlkey"]}/source/{html.unescape(old_skel["name"])}""" 

1346 new_path = f"""{skel["dlkey"]}/source/{html.unescape(skel["name"])}""" 

1347 

1348 bucket = self.get_bucket(skel["dlkey"]) 

1349 

1350 if not (old_blob := bucket.get_blob(old_path)): 

1351 raise errors.Gone() 

1352 

1353 bucket.copy_blob(old_blob, bucket, new_path, if_generation_match=0) 

1354 bucket.delete_blob(old_path) 

1355 

1356 def onAdded(self, skelType: SkelType, skel: SkeletonInstance) -> None: 

1357 if skelType == "leaf" and skel["mimetype"].startswith("image/"): 

1358 if skel["size"] > self.IMAGE_META_MAX_SIZE: 

1359 logging.warning(f"File size {skel['size']} exceeds limit {self.IMAGE_META_MAX_SIZE=}") 

1360 return 

1361 self.set_image_meta(skel["key"]) 

1362 

1363 super().onAdded(skelType, skel) 

1364 

1365 @CallDeferred 

1366 def set_image_meta(self, key: db.Key) -> None: 

1367 """Write image metadata (height and width) to FileSkel""" 

1368 skel = self.editSkel("leaf", key) 

1369 if not skel.read(key): 

1370 logging.error(f"File {key} does not exist") 

1371 return 

1372 if skel["width"] and skel["height"]: 

1373 logging.info(f'File {skel["key"]} has already {skel["width"]=} and {skel["height"]=}') 

1374 return 

1375 file_name = html.unescape(skel["name"]) 

1376 blob = self.get_bucket(skel["dlkey"]).get_blob(f"""{skel["dlkey"]}/source/{file_name}""") 

1377 if not blob: 

1378 logging.error(f'Blob {skel["dlkey"]}/source/{file_name} is missing in Cloud Storage!') 

1379 return 

1380 

1381 file_obj = io.BytesIO() 

1382 blob.download_to_file(file_obj) 

1383 file_obj.seek(0) 

1384 try: 

1385 img = Image.open(file_obj) 

1386 except Image.UnidentifiedImageError as e: # Can't load this image 

1387 logging.exception(f'Cannot open {skel["key"]} | {skel["name"]} to set image meta data: {e}') 

1388 return 

1389 

1390 skel.patch( 

1391 values={ 

1392 "width": img.width, 

1393 "height": img.height, 

1394 }, 

1395 ) 

1396 

1397 def mark_for_deletion(self, dlkey: str) -> None: 

1398 """ 

1399 Adds a marker to the datastore that the file specified as *dlkey* can be deleted. 

1400 

1401 Once the mark has been set, the data store is checked four times (default: every 4 hours) 

1402 if the file is in use somewhere. If it is still in use, the mark goes away, otherwise 

1403 the mark and the file are removed from the datastore. These delayed checks are necessary 

1404 due to database inconsistency. 

1405 

1406 :param dlkey: Unique download-key of the file that shall be marked for deletion. 

1407 """ 

1408 fileObj = db.Query("viur-deleted-files").filter("dlkey", dlkey).getEntry() 

1409 

1410 if fileObj: # Its allready marked 

1411 return 

1412 

1413 fileObj = db.Entity(db.Key("viur-deleted-files")) 

1414 fileObj["itercount"] = 0 

1415 fileObj["dlkey"] = str(dlkey) 

1416 

1417 db.put(fileObj) 

1418 

1419 

1420@PeriodicTask(interval=datetime.timedelta(hours=4)) 

1421def startCheckForUnreferencedBlobs(): 

1422 """ 

1423 Start searching for blob locks that have been recently freed 

1424 """ 

1425 doCheckForUnreferencedBlobs() 

1426 

1427 

1428@CallDeferred 

1429def doCheckForUnreferencedBlobs(cursor=None): 

1430 def getOldBlobKeysTxn(dbKey): 

1431 obj = db.get(dbKey) 

1432 res = obj["old_blob_references"] or [] 

1433 if obj["is_stale"]: 

1434 db.delete(dbKey) 

1435 else: 

1436 obj["has_old_blob_references"] = False 

1437 obj["old_blob_references"] = [] 

1438 db.put(obj) 

1439 return res 

1440 

1441 query = db.Query("viur-blob-locks").filter("has_old_blob_references", True).setCursor(cursor) 

1442 for lockObj in query.run(100): 

1443 oldBlobKeys = db.run_in_transaction(getOldBlobKeysTxn, lockObj.key) 

1444 for blobKey in oldBlobKeys: 

1445 if db.Query("viur-blob-locks").filter("active_blob_references =", blobKey).getEntry(): 

1446 # This blob is referenced elsewhere 

1447 logging.info(f"Stale blob is still referenced, {blobKey}") 

1448 continue 

1449 # Add a marker and schedule it for deletion 

1450 fileObj = db.Query("viur-deleted-files").filter("dlkey", blobKey).getEntry() 

1451 if fileObj: # Its already marked 

1452 logging.info(f"Stale blob already marked for deletion, {blobKey}") 

1453 return 

1454 fileObj = db.Entity(db.Key("viur-deleted-files")) 

1455 fileObj["itercount"] = 0 

1456 fileObj["dlkey"] = str(blobKey) 

1457 logging.info(f"Stale blob marked dirty, {blobKey}") 

1458 db.put(fileObj) 

1459 newCursor = query.getCursor() 

1460 if newCursor: 

1461 doCheckForUnreferencedBlobs(newCursor) 

1462 

1463 

1464@PeriodicTask(interval=datetime.timedelta(hours=4)) 

1465def startCleanupDeletedFiles(): 

1466 """ 

1467 Increase deletion counter on each blob currently not referenced and delete 

1468 it if that counter reaches maxIterCount 

1469 """ 

1470 doCleanupDeletedFiles() 

1471 

1472 

1473@CallDeferred 

1474def doCleanupDeletedFiles(cursor=None): 

1475 maxIterCount = 2 # How often a file will be checked for deletion 

1476 query = db.Query("viur-deleted-files") 

1477 if cursor: 

1478 query.setCursor(cursor) 

1479 for file in query.run(100): 

1480 if "dlkey" not in file: 

1481 db.delete(file.key) 

1482 elif db.Query("viur-blob-locks").filter("active_blob_references =", file["dlkey"]).getEntry(): 

1483 logging.info(f"""is referenced, {file["dlkey"]}""") 

1484 db.delete(file.key) 

1485 else: 

1486 if file["itercount"] > maxIterCount: 

1487 logging.info(f"""Finally deleting, {file["dlkey"]}""") 

1488 bucket = conf.main_app.file.get_bucket(file["dlkey"]) 

1489 blobs = bucket.list_blobs(prefix=f"""{file["dlkey"]}/""") 

1490 for blob in blobs: 

1491 blob.delete() 

1492 db.delete(file.key) 

1493 # There should be exactly 1 or 0 of these 

1494 for f in skeletonByKind("file")().all().filter("dlkey =", file["dlkey"]).fetch(99): 

1495 f.delete() 

1496 

1497 if f["serving_url"]: 

1498 bucket = conf.main_app.file.get_bucket(f["dlkey"]) 

1499 blob_key = blobstore.create_gs_key( 

1500 f"/gs/{bucket.name}/{f['dlkey']}/source/{f['name']}" 

1501 ) 

1502 images.delete_serving_url(blob_key) # delete serving url 

1503 else: 

1504 logging.debug(f"""Increasing count, {file["dlkey"]}""") 

1505 file["itercount"] += 1 

1506 db.put(file) 

1507 newCursor = query.getCursor() 

1508 if newCursor: 

1509 doCleanupDeletedFiles(newCursor) 

1510 

1511 

1512@PeriodicTask(interval=datetime.timedelta(hours=4)) 

1513def start_delete_pending_files(): 

1514 """ 

1515 Start deletion of pending FileSkels that are older than 7 days. 

1516 """ 

1517 DeleteEntitiesIter.startIterOnQuery( 

1518 FileLeafSkel().all() 

1519 .filter("pending =", True) 

1520 .filter("creationdate <", utils.utcNow() - datetime.timedelta(days=7)) 

1521 ) 

1522 

1523 

1524# DEPRECATED ATTRIBUTES HANDLING 

1525 

1526def __getattr__(attr: str) -> object: 

1527 if entry := { 

1528 # stuff prior viur-core < 3.7 

1529 "GOOGLE_STORAGE_BUCKET": ("conf.main_app.file.get_bucket()", _private_bucket), 

1530 }.get(attr): 

1531 msg = f"{attr} was replaced by {entry[0]}" 

1532 warnings.warn(msg, DeprecationWarning, stacklevel=2) 

1533 logging.warning(msg, stacklevel=2) 

1534 return entry[1] 

1535 

1536 return super(__import__(__name__).__class__).__getattribute__(attr)