Coverage for /home/runner/work/viur-core/viur-core/viur/src/viur/core/modules/file.py: 0%

795 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-09-29 09:00 +0000

1import base64 

2import datetime 

3import hashlib 

4import hmac 

5import html 

6import io 

7import json 

8import logging 

9import re 

10import string 

11import typing as t 

12import warnings 

13from collections import namedtuple 

14from urllib.parse import quote as urlquote, urlencode 

15from urllib.request import urlopen 

16 

17import PIL 

18import PIL.ImageCms 

19import google.auth 

20import requests 

21from PIL import Image 

22from google.appengine.api import blobstore, images 

23from google.cloud import storage 

24from google.oauth2.service_account import Credentials as ServiceAccountCredentials 

25 

26from viur.core import conf, current, db, errors, utils, i18n 

27from viur.core.bones import BaseBone, BooleanBone, JsonBone, KeyBone, NumericBone, StringBone 

28 

29from viur.core.decorators import * 

30from viur.core.prototypes.tree import SkelType, Tree, TreeSkel 

31from viur.core.skeleton import SkeletonInstance, skeletonByKind 

32from viur.core.tasks import CallDeferred, DeleteEntitiesIter, PeriodicTask 

33 

34# Globals for connectivity 

35 

36VALID_FILENAME_REGEX = re.compile( 

37 # || MAY NOT BE THE NAME | MADE OF SPECIAL CHARS | SPECIAL CHARS + `. `|` 

38 r"^(?!^(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])$)[^\x00-\x1F<>:\"\/\\|?*]*[^\x00-\x1F<>:\"\/\\|?*. ]$", 

39 re.IGNORECASE 

40) 

41 

42_CREDENTIALS, _PROJECT_ID = google.auth.default() 

43GOOGLE_STORAGE_CLIENT = storage.Client(_PROJECT_ID, _CREDENTIALS) 

44 

45PRIVATE_BUCKET_NAME = f"""{_PROJECT_ID}.appspot.com""" 

46PUBLIC_BUCKET_NAME = f"""public-dot-{_PROJECT_ID}""" 

47PUBLIC_DLKEY_SUFFIX = "_pub" 

48 

49_private_bucket = GOOGLE_STORAGE_CLIENT.lookup_bucket(PRIVATE_BUCKET_NAME) 

50_public_bucket = None 

51 

52# FilePath is a descriptor for ViUR file components 

53FilePath = namedtuple("FilePath", ("dlkey", "is_derived", "filename")) 

54 

55 

56def importBlobFromViur2(dlKey, fileName): 

57 bucket = conf.main_app.file.get_bucket(dlKey) 

58 

59 if not conf.viur2import_blobsource: 

60 return False 

61 existingImport = db.get(db.Key("viur-viur2-blobimport", dlKey)) 

62 if existingImport: 

63 if existingImport["success"]: 

64 return existingImport["dlurl"] 

65 return False 

66 if conf.viur2import_blobsource["infoURL"]: 

67 try: 

68 importDataReq = urlopen(conf.viur2import_blobsource["infoURL"] + dlKey) 

69 except Exception as e: 

70 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey)) 

71 marker["success"] = False 

72 marker["error"] = "Failed URL-FETCH 1" 

73 db.put(marker) 

74 return False 

75 if importDataReq.status != 200: 

76 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey)) 

77 marker["success"] = False 

78 marker["error"] = "Failed URL-FETCH 2" 

79 db.put(marker) 

80 return False 

81 importData = json.loads(importDataReq.read()) 

82 oldBlobName = conf.viur2import_blobsource["gsdir"] + "/" + importData["key"] 

83 srcBlob = storage.Blob(bucket=bucket, 

84 name=conf.viur2import_blobsource["gsdir"] + "/" + importData["key"]) 

85 else: 

86 oldBlobName = conf.viur2import_blobsource["gsdir"] + "/" + dlKey 

87 srcBlob = storage.Blob(bucket=bucket, name=conf.viur2import_blobsource["gsdir"] + "/" + dlKey) 

88 if not srcBlob.exists(): 

89 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey)) 

90 marker["success"] = False 

91 marker["error"] = "Local SRC-Blob missing" 

92 marker["oldBlobName"] = oldBlobName 

93 db.put(marker) 

94 return False 

95 bucket.rename_blob(srcBlob, f"{dlKey}/source/{fileName}") 

96 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey)) 

97 marker["success"] = True 

98 marker["old_src_key"] = dlKey 

99 marker["old_src_name"] = fileName 

100 marker["dlurl"] = conf.main_app.file.create_download_url(dlKey, fileName, False, None) 

101 db.put(marker) 

102 return marker["dlurl"] 

103 

104 

105def thumbnailer(fileSkel, existingFiles, params): 

106 file_name = html.unescape(fileSkel["name"]) 

107 bucket = conf.main_app.file.get_bucket(fileSkel["dlkey"]) 

108 

109 blob = bucket.get_blob(f"""{fileSkel["dlkey"]}/source/{file_name}""") 

110 if not blob: 

111 logging.warning(f"""Blob {fileSkel["dlkey"]}/source/{file_name} is missing from cloud storage!""") 

112 return 

113 

114 source = io.BytesIO() 

115 blob.download_to_file(source) 

116 

117 result = [] 

118 

119 for info in params: 

120 # Read the image into PIL 

121 try: 

122 source.seek(0) 

123 img = PIL.Image.open(source) 

124 except PIL.Image.UnidentifiedImageError: # Can't load this image; so there's no need to try other resolutions 

125 break 

126 

127 if icc_profile := img.info.get("icc_profile"): 

128 # JPEGs might be encoded with a non-standard color-profile; we need to compensate for this if we convert 

129 # to WEBp as we'll loose this color-profile information 

130 f = io.BytesIO(icc_profile) 

131 src_profile = PIL.ImageCms.ImageCmsProfile(f) 

132 dst_profile = PIL.ImageCms.createProfile("sRGB") 

133 try: 

134 img = PIL.ImageCms.profileToProfile( 

135 img, 

136 inputProfile=src_profile, 

137 outputProfile=dst_profile, 

138 outputMode="RGBA" if img.has_transparency_data else "RGB") 

139 except Exception as e: 

140 logging.debug(f"{info=}") 

141 logging.exception(e) 

142 continue 

143 

144 file_extension = info.get("fileExtension", "webp") 

145 mimetype = info.get("mimeType", "image/webp") 

146 

147 if "width" in info and "height" in info: 

148 width = info["width"] 

149 height = info["height"] 

150 target_filename = f"thumbnail-{width}-{height}.{file_extension}" 

151 

152 elif "width" in info: 

153 width = info["width"] 

154 height = int((float(img.size[1]) * float(width / float(img.size[0])))) 

155 target_filename = f"thumbnail-w{width}.{file_extension}" 

156 

157 else: # No default fallback - ignore 

158 continue 

159 

160 # Create resized version of the source 

161 target = io.BytesIO() 

162 

163 try: 

164 img = img.resize((width, height), PIL.Image.LANCZOS) 

165 except ValueError as e: 

166 # Usually happens to some files, like TIFF-images. 

167 logging.debug(f"{info=}") 

168 logging.exception(e) 

169 break 

170 

171 img.save(target, file_extension) 

172 

173 # Safe derived target file 

174 target_size = target.tell() 

175 target.seek(0) 

176 target_blob = bucket.blob(f"""{fileSkel["dlkey"]}/derived/{target_filename}""") 

177 target_blob.upload_from_file(target, content_type=mimetype) 

178 

179 result.append( 

180 (target_filename, target_size, mimetype, {"mimetype": mimetype, "width": width, "height": height}) 

181 ) 

182 

183 return result 

184 

185 

186def cloudfunction_thumbnailer(fileSkel, existingFiles, params): 

187 """External Thumbnailer for images. 

188 

189 The corresponding cloudfunction can be found here . 

190 https://github.com/viur-framework/viur-cloudfunctions/tree/main/thumbnailer 

191 

192 You can use it like so: 

193 main.py: 

194 

195 .. code-block:: python 

196 

197 from viur.core.modules.file import cloudfunction_thumbnailer 

198 

199 conf.file_thumbnailer_url = "https://xxxxx.cloudfunctions.net/imagerenderer" 

200 conf.file_derivations = {"thumbnail": cloudfunction_thumbnailer} 

201 

202 conf.derives_pdf = { 

203 "thumbnail": [{"width": 1920,"sites":"1,2"}] 

204 } 

205 

206 skeletons/xxx.py: 

207 .. code-block:: python 

208 

209 test = FileBone(derive=conf.derives_pdf) 

210 """ 

211 

212 if not conf.file_thumbnailer_url: 

213 raise ValueError("conf.file_thumbnailer_url is not set") 

214 

215 bucket = conf.main_app.file.get_bucket(fileSkel["dlkey"]) 

216 

217 def getsignedurl(): 

218 if conf.instance.is_dev_server: 

219 signedUrl = conf.main_app.file.create_download_url(fileSkel["dlkey"], fileSkel["name"]) 

220 else: 

221 path = f"""{fileSkel["dlkey"]}/source/{file_name}""" 

222 if not (blob := bucket.get_blob(path)): 

223 logging.warning(f"Blob {path} is missing from cloud storage!") 

224 return None 

225 authRequest = google.auth.transport.requests.Request() 

226 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60) 

227 signing_credentials = google.auth.compute_engine.IDTokenCredentials(authRequest, "") 

228 content_disposition = utils.build_content_disposition_header(fileSkel["name"]) 

229 signedUrl = blob.generate_signed_url( 

230 expiresAt, 

231 credentials=signing_credentials, 

232 response_disposition=content_disposition, 

233 version="v4") 

234 return signedUrl 

235 

236 def make_request(): 

237 headers = {"Content-Type": "application/json"} 

238 data_str = base64.b64encode(json.dumps(dataDict).encode("UTF-8")) 

239 sig = conf.main_app.file.hmac_sign(data_str) 

240 datadump = json.dumps({"dataStr": data_str.decode('ASCII'), "sign": sig}) 

241 resp = requests.post(conf.file_thumbnailer_url, data=datadump, headers=headers, allow_redirects=False) 

242 if resp.status_code != 200: # Error Handling 

243 match resp.status_code: 

244 case 302: 

245 # The problem is Google resposen 302 to an auth Site when the cloudfunction was not found 

246 # https://cloud.google.com/functions/docs/troubleshooting#login 

247 logging.error("Cloudfunction not found") 

248 case 404: 

249 logging.error("Cloudfunction not found") 

250 case 403: 

251 logging.error("No permission for the Cloudfunction") 

252 case _: 

253 logging.error( 

254 f"cloudfunction_thumbnailer failed with code: {resp.status_code} and data: {resp.content}") 

255 return 

256 

257 try: 

258 response_data = resp.json() 

259 except Exception as e: 

260 logging.error(f"response could not be converted in json failed with: {e=}") 

261 return 

262 if "error" in response_data: 

263 logging.error(f"cloudfunction_thumbnailer failed with: {response_data.get('error')}") 

264 return 

265 

266 return response_data 

267 

268 file_name = html.unescape(fileSkel["name"]) 

269 

270 if not (url := getsignedurl()): 

271 return 

272 dataDict = { 

273 "url": url, 

274 "name": fileSkel["name"], 

275 "params": params, 

276 "minetype": fileSkel["mimetype"], 

277 "baseUrl": current.request.get().request.host_url.lower(), 

278 "targetKey": fileSkel["dlkey"], 

279 "nameOnly": True 

280 } 

281 if not (derivedData := make_request()): 

282 return 

283 

284 uploadUrls = {} 

285 for data in derivedData["values"]: 

286 fileName = conf.main_app.file.sanitize_filename(data["name"]) 

287 blob = bucket.blob(f"""{fileSkel["dlkey"]}/derived/{fileName}""") 

288 uploadUrls[fileSkel["dlkey"] + fileName] = blob.create_resumable_upload_session(timeout=60, 

289 content_type=data["mimeType"]) 

290 

291 if not (url := getsignedurl()): 

292 return 

293 

294 dataDict["url"] = url 

295 dataDict["nameOnly"] = False 

296 dataDict["uploadUrls"] = uploadUrls 

297 

298 if not (derivedData := make_request()): 

299 return 

300 reslist = [] 

301 try: 

302 for derived in derivedData["values"]: 

303 for key, value in derived.items(): 

304 reslist.append((key, value["size"], value["mimetype"], value["customData"])) 

305 

306 except Exception as e: 

307 logging.error(f"cloudfunction_thumbnailer failed with: {e=}") 

308 return reslist 

309 

310 

311class DownloadUrlBone(BaseBone): 

312 """ 

313 This bone is used to inject a freshly signed download url into a FileSkel. 

314 """ 

315 

316 def unserialize(self, skel, name): 

317 if "dlkey" in skel.dbEntity and "name" in skel.dbEntity: 

318 skel.accessedValues[name] = conf.main_app.file.create_download_url( 

319 skel["dlkey"], skel["name"], expires=conf.render_json_download_url_expiration 

320 ) 

321 return True 

322 

323 return False 

324 

325 

326class FileLeafSkel(TreeSkel): 

327 """ 

328 Default file leaf skeleton. 

329 """ 

330 kindName = "file" 

331 

332 name = StringBone( 

333 descr="Filename", 

334 caseSensitive=False, 

335 searchable=True, 

336 vfunc=lambda val: None if File.is_valid_filename(val) else "Invalid filename provided", 

337 ) 

338 

339 alt = StringBone( 

340 descr=i18n.translate( 

341 "viur.core.image.alt", 

342 defaultText="Alternative description", 

343 ), 

344 searchable=True, 

345 languages=conf.i18n.available_languages, 

346 ) 

347 

348 size = NumericBone( 

349 descr="Filesize in Bytes", 

350 readOnly=True, 

351 searchable=True, 

352 ) 

353 

354 dlkey = StringBone( 

355 descr="Download-Key", 

356 readOnly=True, 

357 ) 

358 

359 mimetype = StringBone( 

360 descr="MIME-Type", 

361 readOnly=True, 

362 ) 

363 

364 weak = BooleanBone( 

365 descr="Weak reference", 

366 readOnly=True, 

367 visible=False, 

368 ) 

369 

370 pending = BooleanBone( 

371 descr="Pending upload", 

372 readOnly=True, 

373 visible=False, 

374 defaultValue=False, 

375 ) 

376 

377 width = NumericBone( 

378 descr="Width", 

379 readOnly=True, 

380 searchable=True, 

381 ) 

382 

383 height = NumericBone( 

384 descr="Height", 

385 readOnly=True, 

386 searchable=True, 

387 ) 

388 

389 downloadUrl = DownloadUrlBone( 

390 descr="Download-URL", 

391 readOnly=True, 

392 visible=False, 

393 ) 

394 

395 derived = JsonBone( 

396 descr="Derived Files", 

397 readOnly=True, 

398 visible=False, 

399 ) 

400 

401 pendingparententry = KeyBone( 

402 descr="Pending key Reference", 

403 readOnly=True, 

404 visible=False, 

405 ) 

406 

407 crc32c_checksum = StringBone( 

408 descr="CRC32C checksum", 

409 readOnly=True, 

410 ) 

411 

412 md5_checksum = StringBone( 

413 descr="MD5 checksum", 

414 readOnly=True, 

415 ) 

416 

417 public = BooleanBone( 

418 descr="Public File", 

419 readOnly=True, 

420 defaultValue=False, 

421 ) 

422 

423 serving_url = StringBone( 

424 descr="Serving-URL", 

425 readOnly=True, 

426 params={ 

427 "tooltip": "The 'serving_url' is only available in public file repositories.", 

428 } 

429 ) 

430 

431 @classmethod 

432 def _inject_serving_url(cls, skel: SkeletonInstance) -> None: 

433 """Inject the serving url for public image files into a FileSkel""" 

434 if ( 

435 skel["public"] 

436 and skel["mimetype"] 

437 and skel["mimetype"].startswith("image/") 

438 and not skel["serving_url"] 

439 ): 

440 bucket = File.get_bucket(skel["dlkey"]) 

441 filename = f"/gs/{bucket.name}/{skel['dlkey']}/source/{skel['name']}" 

442 

443 # Trying this on local development server will raise a 

444 # `google.appengine.runtime.apiproxy_errors.RPCFailedError` 

445 if conf.instance.is_dev_server: 

446 logging.warning(f"Can't inject serving_url for {filename!r} on local development server") 

447 return 

448 

449 try: 

450 skel["serving_url"] = images.get_serving_url(None, secure_url=True, filename=filename) 

451 

452 except Exception as e: 

453 logging.warning(f"Failed to create serving_url for {filename!r} with exception {e!r}") 

454 logging.exception(e) 

455 

456 def preProcessBlobLocks(self, locks): 

457 """ 

458 Ensure that our dlkey is locked even if we don't have a filebone here 

459 """ 

460 if not self["weak"] and self["dlkey"]: 

461 locks.add(self["dlkey"]) 

462 return locks 

463 

464 @classmethod 

465 def refresh(cls, skel): 

466 super().refresh(skel) 

467 if conf.viur2import_blobsource: 

468 importData = importBlobFromViur2(skel["dlkey"], skel["name"]) 

469 if importData: 

470 if not skel["downloadUrl"]: 

471 skel["downloadUrl"] = importData 

472 skel["pendingparententry"] = None 

473 

474 cls._inject_serving_url(skel) 

475 

476 @classmethod 

477 def write(cls, skel, **kwargs): 

478 cls._inject_serving_url(skel) 

479 return super().write(skel, **kwargs) 

480 

481 

482class FileNodeSkel(TreeSkel): 

483 """ 

484 Default file node skeleton. 

485 """ 

486 kindName = "file_rootNode" # FIXME: VIUR4, don't use "_rootNode" kindname 

487 

488 name = StringBone( 

489 descr="Name", 

490 required=True, 

491 searchable=True 

492 ) 

493 

494 rootNode = BooleanBone( 

495 descr="Is RootNode", 

496 defaultValue=False, 

497 readOnly=True, 

498 visible=False, 

499 ) 

500 

501 public = BooleanBone( 

502 descr="Is public?", 

503 defaultValue=False, 

504 readOnly=True, 

505 visible=False, 

506 ) 

507 

508 viurCurrentSeoKeys = None 

509 

510 

511class File(Tree): 

512 PENDING_POSTFIX = " (pending)" 

513 DOWNLOAD_URL_PREFIX = "/file/download/" 

514 INTERNAL_SERVING_URL_PREFIX = "/file/serve/" 

515 MAX_FILENAME_LEN = 256 

516 IMAGE_META_MAX_SIZE: t.Final[int] = 10 * 1024 ** 2 

517 """Maximum size of image files that should be analysed in :meth:`set_image_meta`. 

518 Default: 10 MiB""" 

519 

520 leafSkelCls = FileLeafSkel 

521 nodeSkelCls = FileNodeSkel 

522 

523 handler = "tree.simple.file" 

524 adminInfo = { 

525 "icon": "folder-fill", 

526 "handler": handler, # fixme: Use static handler; Remove with VIUR4! 

527 } 

528 

529 roles = { 

530 "*": "view", 

531 "editor": ("add", "edit"), 

532 "admin": "*", 

533 } 

534 

535 default_order = "name" 

536 

537 # Helper functions currently resist here 

538 

539 @staticmethod 

540 def get_bucket(dlkey: str) -> google.cloud.storage.bucket.Bucket: 

541 """ 

542 Retrieves a Google Cloud Storage bucket for the given dlkey. 

543 """ 

544 global _public_bucket 

545 if dlkey and dlkey.endswith(PUBLIC_DLKEY_SUFFIX): 

546 if _public_bucket or (_public_bucket := GOOGLE_STORAGE_CLIENT.lookup_bucket(PUBLIC_BUCKET_NAME)): 

547 return _public_bucket 

548 

549 raise ValueError( 

550 f"""The bucket '{PUBLIC_BUCKET_NAME}' does not exist! Please create it with ACL access.""" 

551 ) 

552 

553 return _private_bucket 

554 

555 @classmethod 

556 def is_valid_filename(cls, filename: str) -> bool: 

557 """ 

558 Verifies a valid filename. 

559 

560 The filename should be valid on Linux, Mac OS and Windows. 

561 It should not be longer than MAX_FILENAME_LEN chars. 

562 

563 Rule set: https://stackoverflow.com/a/31976060/3749896 

564 Regex test: https://regex101.com/r/iBYpoC/1 

565 """ 

566 if not filename.strip(): 

567 return False 

568 

569 if len(filename) > cls.MAX_FILENAME_LEN: 

570 return False 

571 

572 return bool(re.match(VALID_FILENAME_REGEX, filename)) 

573 

574 @staticmethod 

575 def hmac_sign(data: t.Any) -> str: 

576 assert conf.file_hmac_key is not None, "No hmac-key set!" 

577 if not isinstance(data, bytes): 

578 data = str(data).encode("UTF-8") 

579 return hmac.new(conf.file_hmac_key, msg=data, digestmod=hashlib.sha3_384).hexdigest() 

580 

581 @classmethod 

582 def hmac_verify(cls, data: t.Any, signature: str) -> bool: 

583 return hmac.compare_digest(cls.hmac_sign(data.encode("ASCII")), signature) 

584 

585 @classmethod 

586 def create_internal_serving_url( 

587 cls, 

588 serving_url: str, 

589 size: int = 0, 

590 filename: str = "", 

591 options: str = "", 

592 download: bool = False 

593 ) -> str: 

594 """ 

595 Helper function to generate an internal serving url (endpoint: /file/serve) from a Google serving url. 

596 

597 This is needed to hide requests to Google as they are internally be routed, and can be the result of a 

598 legal requirement like GDPR. 

599 

600 :param serving_url: Is the original serving URL as generated from FileLeafSkel._inject_serving_url() 

601 :param size: Optional size setting 

602 :param filename: Optonal filename setting 

603 :param options: Additional options parameter-pass through to /file/serve 

604 :param download: Download parameter-pass through to /file/serve 

605 """ 

606 

607 # Split a serving URL into its components, used by serve function. 

608 res = re.match( 

609 r"^https:\/\/(.*?)\.googleusercontent\.com\/(.*?)$", 

610 serving_url 

611 ) 

612 

613 if not res: 

614 raise ValueError(f"Invalid {serving_url=!r} provided") 

615 

616 # Create internal serving URL 

617 serving_url = cls.INTERNAL_SERVING_URL_PREFIX + "/".join(res.groups()) 

618 

619 # Append additional parameters 

620 if params := { 

621 k: v for k, v in { 

622 "download": download, 

623 "filename": filename, 

624 "options": options, 

625 "size": size, 

626 }.items() if v 

627 }: 

628 serving_url += f"?{urlencode(params)}" 

629 

630 return serving_url 

631 

632 @classmethod 

633 def create_download_url( 

634 cls, 

635 dlkey: str, 

636 filename: str, 

637 derived: bool = False, 

638 expires: t.Optional[datetime.timedelta | int] = datetime.timedelta(hours=1), 

639 download_filename: t.Optional[str] = None 

640 ) -> str: 

641 """ 

642 Utility function that creates a signed download-url for the given folder/filename combination 

643 

644 :param folder: The GCS-Folder (= the download-key) for that file 

645 :param filename: The name of the file. Either the original filename or the name of a derived file. 

646 :param derived: True, if it points to a derived file, False if it points to the original uploaded file 

647 :param expires: 

648 None if the file is supposed to be public (which causes it to be cached on the google ede caches), 

649 otherwise a datetime.timedelta of how long that link should be valid 

650 :param download_filename: If set, browser is enforced to download this blob with the given alternate 

651 filename 

652 :return: The signed download-url relative to the current domain (eg /download/...) 

653 """ 

654 if isinstance(expires, int): 

655 expires = datetime.timedelta(minutes=expires) 

656 

657 # Undo escaping on ()= performed on fileNames 

658 filename = filename.replace("&#040;", "(").replace("&#041;", ")").replace("&#061;", "=") 

659 filepath = f"""{dlkey}/{"derived" if derived else "source"}/{filename}""" 

660 

661 if download_filename: 

662 if not cls.is_valid_filename(download_filename): 

663 raise errors.UnprocessableEntity(f"Invalid download_filename {download_filename!r} provided") 

664 

665 download_filename = urlquote(download_filename) 

666 

667 expires = (datetime.datetime.now() + expires).strftime("%Y%m%d%H%M") if expires else 0 

668 

669 data = base64.urlsafe_b64encode(f"""{filepath}\0{expires}\0{download_filename or ""}""".encode("UTF-8")) 

670 sig = cls.hmac_sign(data) 

671 

672 return f"""{cls.DOWNLOAD_URL_PREFIX}{data.decode("ASCII")}?sig={sig}""" 

673 

674 @classmethod 

675 def parse_download_url(cls, url) -> t.Optional[FilePath]: 

676 """ 

677 Parses a file download URL in the format `/file/download/xxxx?sig=yyyy` into its FilePath. 

678 

679 If the URL cannot be parsed, the function returns None. 

680 

681 :param url: The file download URL to be parsed. 

682 :return: A FilePath on success, None otherwise. 

683 """ 

684 if not url.startswith(cls.DOWNLOAD_URL_PREFIX) or "?" not in url: 

685 return None 

686 

687 data, sig = url.removeprefix(cls.DOWNLOAD_URL_PREFIX).split("?", 1) # Strip "/file/download/" and split on "?" 

688 sig = sig.removeprefix("sig=") 

689 

690 if not cls.hmac_verify(data, sig): 

691 # Invalid signature 

692 return None 

693 

694 # Split the blobKey into the individual fields it should contain 

695 data = base64.urlsafe_b64decode(data).decode("UTF-8") 

696 

697 match data.count("\0"): 

698 case 2: 

699 dlpath, valid_until, _ = data.split("\0") 

700 case 1: 

701 # It's the old format, without an downloadFileName 

702 dlpath, valid_until = data.split("\0") 

703 case _: 

704 # Invalid path 

705 return None 

706 

707 if valid_until != "0" and datetime.strptime(valid_until, "%Y%m%d%H%M") < datetime.now(): 

708 # Signature expired 

709 return None 

710 

711 if dlpath.count("/") != 2: 

712 # Invalid path 

713 return None 

714 

715 dlkey, derived, filename = dlpath.split("/") 

716 return FilePath(dlkey, derived != "source", filename) 

717 

718 @classmethod 

719 def create_src_set( 

720 cls, 

721 file: t.Union["SkeletonInstance", dict, str], 

722 expires: t.Optional[datetime.timedelta | int] = datetime.timedelta(hours=1), 

723 width: t.Optional[int] = None, 

724 height: t.Optional[int] = None, 

725 language: t.Optional[str] = None, 

726 ) -> str: 

727 """ 

728 Generates a string suitable for use as the srcset tag in html. This functionality provides the browser 

729 with a list of images in different sizes and allows it to choose the smallest file that will fill it's 

730 viewport without upscaling. 

731 

732 :param file: The file skeleton (or if multiple=True a single value from it) to generate the srcset. 

733 :param expires: 

734 None if the file is supposed to be public (which causes it to be cached on the google edecaches), 

735 otherwise it's lifetime in seconds 

736 :param width: 

737 A list of widths that should be included in the srcset. 

738 If a given width is not available, it will be skipped. 

739 :param height: A list of heights that should be included in the srcset. If a given height is not available, 

740 it will be skipped. 

741 :param language: Language overwrite if file has multiple languages, and we want to explicitly specify one 

742 :return: The srctag generated or an empty string if a invalid file object was supplied 

743 """ 

744 if not width and not height: 

745 logging.error("Neither width or height supplied") 

746 return "" 

747 

748 if isinstance(file, str): 

749 file = db.Query("file").filter("dlkey =", file).order(("creationdate", db.SortOrder.Ascending)).getEntry() 

750 

751 if not file: 

752 return "" 

753 

754 if isinstance(file, i18n.LanguageWrapper): 

755 language = language or current.language.get() 

756 if not language or not (file := cls.get(language)): 

757 return "" 

758 

759 if "dlkey" not in file and "dest" in file: 

760 file = file["dest"] 

761 

762 from viur.core.skeleton import SkeletonInstance # avoid circular imports 

763 

764 if not ( 

765 isinstance(file, (SkeletonInstance, dict)) 

766 and "dlkey" in file 

767 and "derived" in file 

768 ): 

769 logging.error("Invalid file supplied") 

770 return "" 

771 

772 if not isinstance(file["derived"], dict): 

773 logging.error("No derives available") 

774 return "" 

775 

776 src_set = [] 

777 for filename, derivate in file["derived"]["files"].items(): 

778 customData = derivate.get("customData", {}) 

779 

780 if width and customData.get("width") in width: 

781 src_set.append( 

782 f"""{cls.create_download_url(file["dlkey"], filename, True, expires)} {customData["width"]}w""" 

783 ) 

784 

785 if height and customData.get("height") in height: 

786 src_set.append( 

787 f"""{cls.create_download_url(file["dlkey"], filename, True, expires)} {customData["height"]}h""" 

788 ) 

789 

790 return ", ".join(src_set) 

791 

792 def write( 

793 self, 

794 filename: str, 

795 content: t.Any, 

796 mimetype: str = "text/plain", 

797 *, 

798 width: int = None, 

799 height: int = None, 

800 public: bool = False, 

801 rootnode: t.Optional[db.Key] = None, 

802 folder: t.Iterable[str] | str = (), 

803 ) -> db.Key: 

804 """ 

805 Write a file from any bytes-like object into the file module. 

806 

807 If *folder* and *rootnode* are both set, the file is added to the repository in that folder. 

808 If only *folder* is set, the file is added to the default repository in that folder. 

809 If only *rootnode* is set, the file is added to that repository in the root folder. 

810 

811 If both are not set, the file is added without a path or repository as a weak file. 

812 It will not be visible in admin in this case. 

813 

814 :param filename: Filename to be written. 

815 :param content: The file content to be written, as bytes-like object. 

816 :param mimetype: The file's mimetype. 

817 :param width: Optional width information for the file. 

818 :param height: Optional height information for the file. 

819 :param public: True if the file should be publicly accessible. 

820 :param rootnode: Optional root-node of the repository to add the file to 

821 :param folder: Optional folder the file should be written into. 

822 

823 :return: Returns the key of the file object written. This can be associated e.g. with a FileBone. 

824 """ 

825 # logging.info(f"{filename=} {mimetype=} {width=} {height=} {public=}") 

826 if not self.is_valid_filename(filename): 

827 raise ValueError(f"{filename=} is invalid") 

828 

829 # Folder mode? 

830 if folder: 

831 # Validate correct folder naming 

832 if isinstance(folder, str): 

833 folder = folder, # make it a tuple 

834 

835 for foldername in folder: 

836 if not self.is_valid_filename(foldername): 

837 raise ValueError(f"{foldername=} is invalid") 

838 

839 # When in folder-mode, a rootnode must exist! 

840 if rootnode is None: 

841 rootnode = self.ensureOwnModuleRootNode() 

842 

843 parentrepokey = rootnode.key 

844 parentfolderkey = rootnode.key 

845 

846 for foldername in folder: 

847 query = self.addSkel("node").all() 

848 query.filter("parentrepo", parentrepokey) 

849 query.filter("parententry", parentfolderkey) 

850 query.filter("name", foldername) 

851 

852 if folder_skel := query.getSkel(): 

853 # Skip existing folder 

854 parentfolderkey = folder_skel["key"] 

855 else: 

856 # Create new folder 

857 folder_skel = self.addSkel("node") 

858 

859 folder_skel["name"] = foldername 

860 folder_skel["parentrepo"] = parentrepokey 

861 folder_skel["parententry"] = parentfolderkey 

862 folder_skel.write() 

863 

864 parentfolderkey = folder_skel["key"] 

865 

866 else: 

867 parentrepokey = None 

868 parentfolderkey = None 

869 

870 # Write the file 

871 dl_key = utils.string.random() 

872 

873 if public: 

874 dl_key += PUBLIC_DLKEY_SUFFIX # mark file as public 

875 

876 bucket = self.get_bucket(dl_key) 

877 

878 blob = bucket.blob(f"{dl_key}/source/{filename}") 

879 blob.upload_from_file(io.BytesIO(content), content_type=mimetype) 

880 

881 fileskel = self.addSkel("leaf") 

882 

883 fileskel["parentrepo"] = parentrepokey 

884 fileskel["parententry"] = parentfolderkey 

885 fileskel["name"] = filename 

886 fileskel["size"] = blob.size 

887 fileskel["mimetype"] = mimetype 

888 fileskel["dlkey"] = dl_key 

889 fileskel["weak"] = bool(parentrepokey) 

890 fileskel["public"] = public 

891 fileskel["width"] = width 

892 fileskel["height"] = height 

893 fileskel["crc32c_checksum"] = base64.b64decode(blob.crc32c).hex() 

894 fileskel["md5_checksum"] = base64.b64decode(blob.md5_hash).hex() 

895 fileskel["pending"] = False 

896 

897 return fileskel.write()["key"] 

898 

899 def read( 

900 self, 

901 key: db.Key | int | str | None = None, 

902 path: str | None = None, 

903 ) -> tuple[io.BytesIO, str]: 

904 """ 

905 Read a file from the Cloud Storage. 

906 

907 If a key and a path are provided, the key is preferred. 

908 This means that the entry in the db is searched first and if this is not found, the path is used. 

909 

910 :param key: Key of the LeafSkel that contains the "dlkey" and the "name". 

911 :param path: The path of the file in the Cloud Storage Bucket. 

912 

913 :return: Returns the file as a io.BytesIO buffer and the content-type 

914 """ 

915 if not key and not path: 

916 raise ValueError("Please provide a key or a path") 

917 

918 if key: 

919 skel = self.viewSkel("leaf") 

920 if not skel.read(db.key_helper(key, skel.kindName)): 

921 if not path: 

922 raise ValueError("This skeleton is not in the database!") 

923 else: 

924 path = f"""{skel["dlkey"]}/source/{skel["name"]}""" 

925 

926 bucket = self.get_bucket(skel["dlkey"]) 

927 else: 

928 bucket = self.get_bucket(path.split("/", 1)[0]) # path's first part is dlkey plus eventual postfix 

929 

930 blob = bucket.blob(path) 

931 return io.BytesIO(blob.download_as_bytes()), blob.content_type 

932 

933 @CallDeferred 

934 def deleteRecursive(self, parentKey): 

935 files = db.Query(self.leafSkelCls().kindName).filter("parentdir =", parentKey).iter() 

936 for fileEntry in files: 

937 self.mark_for_deletion(fileEntry["dlkey"]) 

938 skel = self.leafSkelCls() 

939 

940 if skel.read(str(fileEntry.key())): 

941 skel.delete() 

942 dirs = db.Query(self.nodeSkelCls().kindName).filter("parentdir", parentKey).iter() 

943 for d in dirs: 

944 self.deleteRecursive(d.key) 

945 skel = self.nodeSkelCls() 

946 if skel.read(d.key): 

947 skel.delete() 

948 

949 @exposed 

950 @skey 

951 def getUploadURL( 

952 self, 

953 fileName: str, 

954 mimeType: str, 

955 size: t.Optional[int] = None, 

956 node: t.Optional[str | db.Key] = None, 

957 authData: t.Optional[str] = None, 

958 authSig: t.Optional[str] = None, 

959 public: bool = False, 

960 ): 

961 filename = fileName.strip() # VIUR4 FIXME: just for compatiblity of the parameter names 

962 

963 if not self.is_valid_filename(filename): 

964 raise errors.UnprocessableEntity(f"Invalid filename {filename!r} provided") 

965 

966 # Validate the mimetype from the client seems legit 

967 mimetype = mimeType.strip().lower() 

968 if not ( 

969 mimetype 

970 and mimetype.count("/") == 1 

971 and all(ch in string.ascii_letters + string.digits + "/-.+" for ch in mimetype) 

972 ): 

973 raise errors.UnprocessableEntity(f"Invalid mime-type {mimetype!r} provided") 

974 

975 # Validate authentication data 

976 if authData and authSig: 

977 # First, validate the signature, otherwise we don't need to proceed further 

978 if not self.hmac_verify(authData, authSig): 

979 raise errors.Unauthorized() 

980 

981 authData = json.loads(base64.b64decode(authData.encode("ASCII")).decode("UTF-8")) 

982 

983 if datetime.datetime.strptime(authData["validUntil"], "%Y%m%d%H%M") < datetime.datetime.now(): 

984 raise errors.Gone("The upload URL has expired") 

985 

986 if authData["validMimeTypes"]: 

987 for validMimeType in authData["validMimeTypes"]: 

988 if ( 

989 validMimeType == mimetype 

990 or (validMimeType.endswith("*") and mimetype.startswith(validMimeType[:-1])) 

991 ): 

992 break 

993 else: 

994 raise errors.UnprocessableEntity(f"Invalid mime-type {mimetype} provided") 

995 

996 node = authData["node"] 

997 maxSize = authData["maxSize"] 

998 

999 else: 

1000 rootNode = None 

1001 if node and not (rootNode := self.getRootNode(node)): 

1002 raise errors.NotFound(f"No valid root node found for {node=}") 

1003 

1004 if not self.canAdd("leaf", rootNode): 

1005 raise errors.Forbidden() 

1006 

1007 if rootNode and public != bool(rootNode.get("public")): 

1008 raise errors.Forbidden("Cannot upload a public file into private repository or vice versa") 

1009 

1010 maxSize = None # The user has some file/add permissions, don't restrict fileSize 

1011 

1012 if maxSize: 

1013 if size > maxSize: 

1014 raise errors.UnprocessableEntity(f"Size {size} exceeds maximum size {maxSize}") 

1015 else: 

1016 size = None 

1017 

1018 # Create upload-URL and download key 

1019 dlkey = utils.string.random() # let's roll a random key 

1020 

1021 if public: 

1022 dlkey += PUBLIC_DLKEY_SUFFIX # mark file as public 

1023 

1024 blob = self.get_bucket(dlkey).blob(f"{dlkey}/source/{filename}") 

1025 upload_url = blob.create_resumable_upload_session(content_type=mimeType, size=size, timeout=60) 

1026 

1027 # Create a corresponding file-lock object early, otherwise we would have to ensure that the file-lock object 

1028 # the user creates matches the file he had uploaded 

1029 file_skel = self.addSkel("leaf") 

1030 

1031 file_skel["name"] = filename + self.PENDING_POSTFIX 

1032 file_skel["size"] = 0 

1033 file_skel["mimetype"] = "application/octetstream" 

1034 file_skel["dlkey"] = dlkey 

1035 file_skel["parentdir"] = None 

1036 file_skel["pendingparententry"] = db.key_helper(node, self.addSkel("node").kindName) if node else None 

1037 file_skel["pending"] = True 

1038 file_skel["weak"] = True 

1039 file_skel["public"] = public 

1040 file_skel["width"] = 0 

1041 file_skel["height"] = 0 

1042 

1043 file_skel.write() 

1044 key = str(file_skel["key"]) 

1045 

1046 # Mark that entry dirty as we might never receive an add 

1047 self.mark_for_deletion(dlkey) 

1048 

1049 # In this case, we'd have to store the key in the users session so he can call add() later on 

1050 if authData and authSig: 

1051 session = current.session.get() 

1052 

1053 if "pendingFileUploadKeys" not in session: 

1054 session["pendingFileUploadKeys"] = [] 

1055 

1056 session["pendingFileUploadKeys"].append(key) 

1057 

1058 # Clamp to the latest 50 pending uploads 

1059 session["pendingFileUploadKeys"] = session["pendingFileUploadKeys"][-50:] 

1060 session.markChanged() 

1061 

1062 return self.render.view({ 

1063 "uploadKey": key, 

1064 "uploadUrl": upload_url, 

1065 }) 

1066 

1067 @exposed 

1068 def download(self, blobKey: str, fileName: str = "", download: bool = False, sig: str = "", *args, **kwargs): 

1069 """ 

1070 Download a file. 

1071 :param blobKey: The unique blob key of the file. 

1072 :param fileName: Optional filename to provide in the header. 

1073 :param download: Set header to attachment retrival, set explictly to "1" if download is wanted. 

1074 """ 

1075 if filename := fileName.strip(): 

1076 if not self.is_valid_filename(filename): 

1077 raise errors.UnprocessableEntity(f"The provided filename {filename!r} is invalid!") 

1078 

1079 try: 

1080 values = base64.urlsafe_b64decode(blobKey).decode("UTF-8").split("\0") 

1081 except ValueError: 

1082 raise errors.BadRequest(f"Invalid encoding of blob key {blobKey!r}!") 

1083 try: 

1084 dlPath, validUntil, *download_filename = values 

1085 # Maybe it's the old format, without a download_filename 

1086 download_filename = download_filename[0] if download_filename else "" 

1087 except ValueError: 

1088 logging.error(f"Encoding of {blobKey=!r} OK. {values=} invalid.") 

1089 raise errors.BadRequest(f"The blob key {blobKey!r} has an invalid amount of encoded values!") 

1090 

1091 bucket = self.get_bucket(dlPath.split("/", 1)[0]) 

1092 

1093 if not sig: 

1094 # Check if the current user has the right to download *any* blob present in this application. 

1095 # blobKey is then the path inside cloudstore - not a base64 encoded tuple 

1096 if not (usr := current.user.get()): 

1097 raise errors.Unauthorized() 

1098 if "root" not in usr["access"] and "file-view" not in usr["access"]: 

1099 raise errors.Forbidden() 

1100 validUntil = "-1" # Prevent this from being cached down below 

1101 blob = bucket.get_blob(blobKey) 

1102 

1103 else: 

1104 # We got an request including a signature (probably a guest or a user without file-view access) 

1105 # First, validate the signature, otherwise we don't need to proceed any further 

1106 if not self.hmac_verify(blobKey, sig): 

1107 raise errors.Forbidden() 

1108 

1109 if validUntil != "0" and datetime.datetime.strptime(validUntil, "%Y%m%d%H%M") < datetime.datetime.now(): 

1110 blob = None 

1111 else: 

1112 blob = bucket.get_blob(dlPath) 

1113 

1114 if not blob: 

1115 raise errors.Gone("The requested blob has expired.") 

1116 

1117 if not filename: 

1118 filename = download_filename or urlquote(blob.name.rsplit("/", 1)[-1]) 

1119 

1120 content_disposition = utils.build_content_disposition_header(filename, attachment=download) 

1121 

1122 if isinstance(_CREDENTIALS, ServiceAccountCredentials): 

1123 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60) 

1124 signedUrl = blob.generate_signed_url(expiresAt, response_disposition=content_disposition, version="v4") 

1125 raise errors.Redirect(signedUrl) 

1126 

1127 elif conf.instance.is_dev_server: # No Service-Account to sign with - Serve everything directly 

1128 response = current.request.get().response 

1129 response.headers["Content-Type"] = blob.content_type 

1130 if content_disposition: 

1131 response.headers["Content-Disposition"] = content_disposition 

1132 return blob.download_as_bytes() 

1133 

1134 if validUntil == "0" or blobKey.endswith(PUBLIC_DLKEY_SUFFIX): # Its an indefinitely valid URL 

1135 if blob.size < 5 * 1024 * 1024: # Less than 5 MB - Serve directly and push it into the ede caches 

1136 response = current.request.get().response 

1137 response.headers["Content-Type"] = blob.content_type 

1138 response.headers["Cache-Control"] = "public, max-age=604800" # 7 Days 

1139 if content_disposition: 

1140 response.headers["Content-Disposition"] = content_disposition 

1141 return blob.download_as_bytes() 

1142 

1143 # Default fallback - create a signed URL and redirect 

1144 authRequest = google.auth.transport.requests.Request() 

1145 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60) 

1146 signing_credentials = google.auth.compute_engine.IDTokenCredentials(authRequest, "") 

1147 signedUrl = blob.generate_signed_url( 

1148 expiresAt, 

1149 credentials=signing_credentials, 

1150 response_disposition=content_disposition, 

1151 version="v4") 

1152 

1153 raise errors.Redirect(signedUrl) 

1154 

1155 SERVE_VALID_OPTIONS = { 

1156 "c", 

1157 "p", 

1158 "fv", 

1159 "fh", 

1160 "r90", 

1161 "r180", 

1162 "r270", 

1163 "nu", 

1164 } 

1165 """ 

1166 Valid modification option shorts for the serve-function. 

1167 This is passed-through to the Google UserContent API, and hast to be supported there. 

1168 """ 

1169 

1170 SERVE_VALID_FORMATS = { 

1171 "jpg": "rj", 

1172 "jpeg": "rj", 

1173 "png": "rp", 

1174 "webp": "rw", 

1175 } 

1176 """ 

1177 Valid file-formats to the serve-function. 

1178 This is passed-through to the Google UserContent API, and hast to be supported there. 

1179 """ 

1180 

1181 @exposed 

1182 def serve( 

1183 self, 

1184 host: str, 

1185 key: str, 

1186 size: t.Optional[int] = None, 

1187 filename: t.Optional[str] = None, 

1188 options: str = "", 

1189 download: bool = False, 

1190 ): 

1191 """ 

1192 Requests an image using the serving url to bypass direct Google requests. 

1193 

1194 :param host: the google host prefix i.e. lh3 

1195 :param key: the serving url key 

1196 :param size: the target image size 

1197 :param filename: a random string with an extention, valid extentions are (defined in File.SERVE_VALID_FORMATS). 

1198 :param options: - seperated options (defined in File.SERVE_VALID_OPTIONS). 

1199 c - crop 

1200 p - face crop 

1201 fv - vertrical flip 

1202 fh - horizontal flip 

1203 rXXX - rotate 90, 180, 270 

1204 nu - no upscale 

1205 :param download: Serves the content as download (Content-Disposition) or not. 

1206 

1207 :return: Returns the requested content on success, raises a proper HTTP exception otherwise. 

1208 """ 

1209 

1210 if any(c not in conf.search_valid_chars for c in host): 

1211 raise errors.BadRequest("key contains invalid characters") 

1212 

1213 # extract format from filename 

1214 file_fmt = "webp" 

1215 

1216 if filename: 

1217 fmt = filename.rsplit(".", 1)[-1].lower() 

1218 if fmt in self.SERVE_VALID_FORMATS: 

1219 file_fmt = fmt 

1220 else: 

1221 raise errors.UnprocessableEntity(f"Unsupported filetype {fmt}") 

1222 

1223 url = f"https://{host}.googleusercontent.com/{key}" 

1224 

1225 if options and not all(param in self.SERVE_VALID_OPTIONS for param in options.split("-")): 

1226 raise errors.BadRequest("Invalid options provided") 

1227 

1228 options += f"-{self.SERVE_VALID_FORMATS[file_fmt]}" 

1229 

1230 if size: 

1231 options = f"s{size}-" + options 

1232 

1233 url += "=" + options 

1234 

1235 response = current.request.get().response 

1236 response.headers["Content-Type"] = f"image/{file_fmt}" 

1237 response.headers["Cache-Control"] = "public, max-age=604800" # 7 Days 

1238 response.headers["Content-Disposition"] = utils.build_content_disposition_header(filename, attachment=download) 

1239 

1240 answ = requests.get(url, timeout=20) 

1241 if not answ.ok: 

1242 logging.error(f"{answ.status_code} {answ.text}") 

1243 raise errors.BadRequest("Unable to fetch a file with these parameters") 

1244 

1245 return answ.content 

1246 

1247 @exposed 

1248 @force_ssl 

1249 @force_post 

1250 @skey(allow_empty=True) 

1251 def add(self, skelType: SkelType, node: db.Key | int | str | None = None, *args, **kwargs): 

1252 # We can't add files directly (they need to be uploaded 

1253 if skelType == "leaf": # We need to handle leafs separately here 

1254 targetKey = kwargs.get("key") 

1255 skel = self.addSkel("leaf") 

1256 

1257 if not skel.read(targetKey): 

1258 raise errors.NotFound() 

1259 

1260 if not skel["pending"]: 

1261 raise errors.PreconditionFailed() 

1262 

1263 skel["pending"] = False 

1264 skel["parententry"] = skel["pendingparententry"] 

1265 

1266 if skel["parententry"]: 

1267 rootNode = self.getRootNode(skel["parententry"]) 

1268 else: 

1269 rootNode = None 

1270 

1271 if not self.canAdd("leaf", rootNode): 

1272 # Check for a marker in this session (created if using a signed upload URL) 

1273 session = current.session.get() 

1274 if targetKey not in (session.get("pendingFileUploadKeys") or []): 

1275 raise errors.Forbidden() 

1276 session["pendingFileUploadKeys"].remove(targetKey) 

1277 session.markChanged() 

1278 

1279 # Now read the blob from the dlkey folder 

1280 bucket = self.get_bucket(skel["dlkey"]) 

1281 

1282 blobs = list(bucket.list_blobs(prefix=f"""{skel["dlkey"]}/""")) 

1283 if len(blobs) != 1: 

1284 logging.error("Invalid number of blobs in folder") 

1285 logging.error(targetKey) 

1286 raise errors.PreconditionFailed() 

1287 

1288 # only one item is allowed here! 

1289 blob = blobs[0] 

1290 

1291 # update the corresponding file skeleton 

1292 skel["name"] = skel["name"].removesuffix(self.PENDING_POSTFIX) 

1293 skel["mimetype"] = utils.string.escape(blob.content_type) 

1294 skel["size"] = blob.size 

1295 skel["parentrepo"] = rootNode["key"] if rootNode else None 

1296 skel["weak"] = rootNode is None 

1297 skel["crc32c_checksum"] = base64.b64decode(blob.crc32c).hex() 

1298 skel["md5_checksum"] = base64.b64decode(blob.md5_hash).hex() 

1299 self.onAdd("leaf", skel) 

1300 skel.write() 

1301 self.onAdded("leaf", skel) 

1302 

1303 # Add updated download-URL as the auto-generated isn't valid yet 

1304 skel["downloadUrl"] = self.create_download_url(skel["dlkey"], skel["name"]) 

1305 

1306 return self.render.addSuccess(skel) 

1307 

1308 return super().add(skelType, node, *args, **kwargs) 

1309 

1310 @exposed 

1311 def get_download_url( 

1312 self, 

1313 key: t.Optional[db.Key] = None, 

1314 dlkey: t.Optional[str] = None, 

1315 filename: t.Optional[str] = None, 

1316 derived: bool = False, 

1317 ): 

1318 """ 

1319 Request a download url for a given file 

1320 :param key: The key of the file 

1321 :param dlkey: The download key of the file 

1322 :param filename: The filename to be given. If no filename is provided 

1323 downloadUrls for all derived files are returned in case of `derived=True`. 

1324 :param derived: True, if a derived file download URL is being requested. 

1325 """ 

1326 skel = self.viewSkel("leaf") 

1327 if dlkey is not None: 

1328 skel = skel.all().filter("dlkey", dlkey).getSkel() 

1329 elif key is None and dlkey is None: 

1330 raise errors.BadRequest("No key or dlkey provided") 

1331 

1332 if not (skel and skel.read(key)): 

1333 raise errors.NotFound() 

1334 

1335 if not self.canView("leaf", skel): 

1336 raise errors.Unauthorized() 

1337 

1338 dlkey = skel["dlkey"] 

1339 

1340 if derived and filename is None: 

1341 res = {} 

1342 for filename in skel["derived"]["files"]: 

1343 res[filename] = self.create_download_url(dlkey, filename, derived) 

1344 else: 

1345 if derived: 

1346 # Check if Filename exist in the Derives. We sign nothing that not exist. 

1347 if filename not in skel["derived"]["files"]: 

1348 raise errors.NotFound("File not in derives") 

1349 else: 

1350 if filename is None: 

1351 filename = skel["name"] 

1352 elif filename != skel["name"]: 

1353 raise errors.NotFound("Filename not match") 

1354 

1355 res = self.create_download_url(dlkey, filename, derived) 

1356 

1357 return self.render.view(res) 

1358 

1359 def onEdit(self, skelType: SkelType, skel: SkeletonInstance): 

1360 super().onEdit(skelType, skel) 

1361 

1362 if skelType == "leaf": 

1363 old_skel = self.editSkel(skelType) 

1364 old_skel.setEntity(skel.dbEntity) 

1365 

1366 if old_skel["name"] == skel["name"]: # name not changed we can return 

1367 return 

1368 

1369 # Move Blob to new name 

1370 # https://cloud.google.com/storage/docs/copying-renaming-moving-objects 

1371 old_path = f"""{skel["dlkey"]}/source/{html.unescape(old_skel["name"])}""" 

1372 new_path = f"""{skel["dlkey"]}/source/{html.unescape(skel["name"])}""" 

1373 

1374 bucket = self.get_bucket(skel["dlkey"]) 

1375 

1376 if not (old_blob := bucket.get_blob(old_path)): 

1377 raise errors.Gone() 

1378 

1379 bucket.copy_blob(old_blob, bucket, new_path, if_generation_match=0) 

1380 bucket.delete_blob(old_path) 

1381 

1382 def onAdded(self, skelType: SkelType, skel: SkeletonInstance) -> None: 

1383 if skelType == "leaf" and skel["mimetype"].startswith("image/"): 

1384 if skel["size"] > self.IMAGE_META_MAX_SIZE: 

1385 logging.warning(f"File size {skel['size']} exceeds limit {self.IMAGE_META_MAX_SIZE=}") 

1386 return 

1387 self.set_image_meta(skel["key"]) 

1388 

1389 super().onAdded(skelType, skel) 

1390 

1391 @CallDeferred 

1392 def set_image_meta(self, key: db.Key) -> None: 

1393 """Write image metadata (height and width) to FileSkel""" 

1394 skel = self.editSkel("leaf", key) 

1395 if not skel.read(key): 

1396 logging.error(f"File {key} does not exist") 

1397 return 

1398 if skel["width"] and skel["height"]: 

1399 logging.info(f'File {skel["key"]} has already {skel["width"]=} and {skel["height"]=}') 

1400 return 

1401 file_name = html.unescape(skel["name"]) 

1402 blob = self.get_bucket(skel["dlkey"]).get_blob(f"""{skel["dlkey"]}/source/{file_name}""") 

1403 if not blob: 

1404 logging.error(f'Blob {skel["dlkey"]}/source/{file_name} is missing in Cloud Storage!') 

1405 return 

1406 

1407 file_obj = io.BytesIO() 

1408 blob.download_to_file(file_obj) 

1409 file_obj.seek(0) 

1410 try: 

1411 img = Image.open(file_obj) 

1412 except Image.UnidentifiedImageError as e: # Can't load this image 

1413 logging.exception(f'Cannot open {skel["key"]} | {skel["name"]} to set image meta data: {e}') 

1414 return 

1415 

1416 skel.patch( 

1417 values={ 

1418 "width": img.width, 

1419 "height": img.height, 

1420 }, 

1421 ) 

1422 

1423 def mark_for_deletion(self, dlkey: str) -> None: 

1424 """ 

1425 Adds a marker to the datastore that the file specified as *dlkey* can be deleted. 

1426 

1427 Once the mark has been set, the data store is checked four times (default: every 4 hours) 

1428 if the file is in use somewhere. If it is still in use, the mark goes away, otherwise 

1429 the mark and the file are removed from the datastore. These delayed checks are necessary 

1430 due to database inconsistency. 

1431 

1432 :param dlkey: Unique download-key of the file that shall be marked for deletion. 

1433 """ 

1434 fileObj = db.Query("viur-deleted-files").filter("dlkey", dlkey).getEntry() 

1435 

1436 if fileObj: # Its allready marked 

1437 return 

1438 

1439 fileObj = db.Entity(db.Key("viur-deleted-files")) 

1440 fileObj["itercount"] = 0 

1441 fileObj["dlkey"] = str(dlkey) 

1442 

1443 db.put(fileObj) 

1444 

1445 

1446@PeriodicTask(interval=datetime.timedelta(hours=4)) 

1447def startCheckForUnreferencedBlobs(): 

1448 """ 

1449 Start searching for blob locks that have been recently freed 

1450 """ 

1451 doCheckForUnreferencedBlobs() 

1452 

1453 

1454@CallDeferred 

1455def doCheckForUnreferencedBlobs(cursor=None): 

1456 def getOldBlobKeysTxn(dbKey): 

1457 obj = db.get(dbKey) 

1458 res = obj["old_blob_references"] or [] 

1459 if obj["is_stale"]: 

1460 db.delete(dbKey) 

1461 else: 

1462 obj["has_old_blob_references"] = False 

1463 obj["old_blob_references"] = [] 

1464 db.put(obj) 

1465 return res 

1466 

1467 query = db.Query("viur-blob-locks").filter("has_old_blob_references", True).setCursor(cursor) 

1468 for lockObj in query.run(100): 

1469 oldBlobKeys = db.run_in_transaction(getOldBlobKeysTxn, lockObj.key) 

1470 for blobKey in oldBlobKeys: 

1471 if db.Query("viur-blob-locks").filter("active_blob_references =", blobKey).getEntry(): 

1472 # This blob is referenced elsewhere 

1473 logging.info(f"Stale blob is still referenced, {blobKey}") 

1474 continue 

1475 # Add a marker and schedule it for deletion 

1476 fileObj = db.Query("viur-deleted-files").filter("dlkey", blobKey).getEntry() 

1477 if fileObj: # Its already marked 

1478 logging.info(f"Stale blob already marked for deletion, {blobKey}") 

1479 return 

1480 fileObj = db.Entity(db.Key("viur-deleted-files")) 

1481 fileObj["itercount"] = 0 

1482 fileObj["dlkey"] = str(blobKey) 

1483 logging.info(f"Stale blob marked dirty, {blobKey}") 

1484 db.put(fileObj) 

1485 newCursor = query.getCursor() 

1486 if newCursor: 

1487 doCheckForUnreferencedBlobs(newCursor) 

1488 

1489 

1490@PeriodicTask(interval=datetime.timedelta(hours=4)) 

1491def startCleanupDeletedFiles(): 

1492 """ 

1493 Increase deletion counter on each blob currently not referenced and delete 

1494 it if that counter reaches maxIterCount 

1495 """ 

1496 doCleanupDeletedFiles() 

1497 

1498 

1499@CallDeferred 

1500def doCleanupDeletedFiles(cursor=None): 

1501 maxIterCount = 2 # How often a file will be checked for deletion 

1502 query = db.Query("viur-deleted-files") 

1503 if cursor: 

1504 query.setCursor(cursor) 

1505 for file in query.run(100): 

1506 if "dlkey" not in file: 

1507 db.delete(file.key) 

1508 elif db.Query("viur-blob-locks").filter("active_blob_references =", file["dlkey"]).getEntry(): 

1509 logging.info(f"""is referenced, {file["dlkey"]}""") 

1510 db.delete(file.key) 

1511 else: 

1512 if file["itercount"] > maxIterCount: 

1513 logging.info(f"""Finally deleting, {file["dlkey"]}""") 

1514 bucket = conf.main_app.file.get_bucket(file["dlkey"]) 

1515 blobs = bucket.list_blobs(prefix=f"""{file["dlkey"]}/""") 

1516 for blob in blobs: 

1517 blob.delete() 

1518 db.delete(file.key) 

1519 # There should be exactly 1 or 0 of these 

1520 for f in skeletonByKind("file")().all().filter("dlkey =", file["dlkey"]).fetch(99): 

1521 f.delete() 

1522 

1523 if f["serving_url"]: 

1524 bucket = conf.main_app.file.get_bucket(f["dlkey"]) 

1525 blob_key = blobstore.create_gs_key( 

1526 f"/gs/{bucket.name}/{f['dlkey']}/source/{f['name']}" 

1527 ) 

1528 images.delete_serving_url(blob_key) # delete serving url 

1529 else: 

1530 logging.debug(f"""Increasing count, {file["dlkey"]}""") 

1531 file["itercount"] += 1 

1532 db.put(file) 

1533 newCursor = query.getCursor() 

1534 if newCursor: 

1535 doCleanupDeletedFiles(newCursor) 

1536 

1537 

1538@PeriodicTask(interval=datetime.timedelta(hours=4)) 

1539def start_delete_pending_files(): 

1540 """ 

1541 Start deletion of pending FileSkels that are older than 7 days. 

1542 """ 

1543 DeleteEntitiesIter.startIterOnQuery( 

1544 FileLeafSkel().all() 

1545 .filter("pending =", True) 

1546 .filter("creationdate <", utils.utcNow() - datetime.timedelta(days=7)) 

1547 ) 

1548 

1549 

1550# DEPRECATED ATTRIBUTES HANDLING 

1551 

1552def __getattr__(attr: str) -> object: 

1553 if entry := { 

1554 # stuff prior viur-core < 3.7 

1555 "GOOGLE_STORAGE_BUCKET": ("conf.main_app.file.get_bucket()", _private_bucket), 

1556 }.get(attr): 

1557 msg = f"{attr} was replaced by {entry[0]}" 

1558 warnings.warn(msg, DeprecationWarning, stacklevel=2) 

1559 logging.warning(msg, stacklevel=2) 

1560 return entry[1] 

1561 

1562 return super(__import__(__name__).__class__).__getattribute__(attr)