Coverage for  / home / runner / work / viur-core / viur-core / viur / src / viur / core / modules / file.py: 16%

798 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-21 21:16 +0000

1import base64 

2import datetime 

3import hashlib 

4import hmac 

5import html 

6import io 

7import json 

8import logging 

9import re 

10import string 

11import typing as t 

12import warnings 

13from collections import namedtuple 

14from urllib.parse import quote as urlquote, urlencode 

15from urllib.request import urlopen 

16 

17import PIL 

18import PIL.ImageCms 

19import google.auth 

20import requests 

21from PIL import Image 

22from google.appengine.api import blobstore, images 

23from google.cloud import storage 

24from google.oauth2.service_account import Credentials as ServiceAccountCredentials 

25 

26from viur.core import conf, current, db, errors, utils, i18n 

27from viur.core.bones import BaseBone, BooleanBone, JsonBone, KeyBone, NumericBone, StringBone 

28 

29from viur.core.decorators import * 

30from viur.core.prototypes.tree import SkelType, Tree, TreeSkel 

31from viur.core.skeleton import SkeletonInstance, skeletonByKind 

32from viur.core.tasks import CallDeferred, DeleteEntitiesIter, PeriodicTask 

33 

34# Globals for connectivity 

35 

36VALID_FILENAME_REGEX = re.compile( 

37 # || MAY NOT BE THE NAME | MADE OF SPECIAL CHARS | SPECIAL CHARS + `. `|` 

38 r"^(?!^(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])$)[^\x00-\x1F<>:\"\/\\|?*]*[^\x00-\x1F<>:\"\/\\|?*. ]$", 

39 re.IGNORECASE 

40) 

41 

42_CREDENTIALS, _PROJECT_ID = google.auth.default() 

43GOOGLE_STORAGE_CLIENT = storage.Client(_PROJECT_ID, _CREDENTIALS) 

44 

45PRIVATE_BUCKET_NAME = f"""{_PROJECT_ID}.appspot.com""" 

46PUBLIC_BUCKET_NAME = f"""public-dot-{_PROJECT_ID}""" 

47PUBLIC_DLKEY_SUFFIX = "_pub" 

48 

49_private_bucket = GOOGLE_STORAGE_CLIENT.lookup_bucket(PRIVATE_BUCKET_NAME) 

50_public_bucket = None 

51 

52# FilePath is a descriptor for ViUR file components 

53FilePath = namedtuple("FilePath", ("dlkey", "is_derived", "filename")) 

54 

55 

56def importBlobFromViur2(dlKey, fileName): 

57 bucket = conf.main_app.file.get_bucket(dlKey) 

58 

59 if not conf.viur2import_blobsource: 

60 return False 

61 existingImport = db.get(db.Key("viur-viur2-blobimport", dlKey)) 

62 if existingImport: 

63 if existingImport["success"]: 

64 return existingImport["dlurl"] 

65 return False 

66 if conf.viur2import_blobsource["infoURL"]: 

67 try: 

68 importDataReq = urlopen(conf.viur2import_blobsource["infoURL"] + dlKey) 

69 except Exception as e: 

70 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey)) 

71 marker["success"] = False 

72 marker["error"] = "Failed URL-FETCH 1" 

73 db.put(marker) 

74 return False 

75 if importDataReq.status != 200: 

76 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey)) 

77 marker["success"] = False 

78 marker["error"] = "Failed URL-FETCH 2" 

79 db.put(marker) 

80 return False 

81 importData = json.loads(importDataReq.read()) 

82 oldBlobName = conf.viur2import_blobsource["gsdir"] + "/" + importData["key"] 

83 srcBlob = storage.Blob(bucket=bucket, 

84 name=conf.viur2import_blobsource["gsdir"] + "/" + importData["key"]) 

85 else: 

86 oldBlobName = conf.viur2import_blobsource["gsdir"] + "/" + dlKey 

87 srcBlob = storage.Blob(bucket=bucket, name=conf.viur2import_blobsource["gsdir"] + "/" + dlKey) 

88 if not srcBlob.exists(): 

89 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey)) 

90 marker["success"] = False 

91 marker["error"] = "Local SRC-Blob missing" 

92 marker["oldBlobName"] = oldBlobName 

93 db.put(marker) 

94 return False 

95 bucket.rename_blob(srcBlob, f"{dlKey}/source/{fileName}") 

96 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey)) 

97 marker["success"] = True 

98 marker["old_src_key"] = dlKey 

99 marker["old_src_name"] = fileName 

100 marker["dlurl"] = conf.main_app.file.create_download_url(dlKey, fileName, False, None) 

101 db.put(marker) 

102 return marker["dlurl"] 

103 

104 

105def thumbnailer(fileSkel, existingFiles, params): 

106 file_name = html.unescape(fileSkel["name"]) 

107 bucket = conf.main_app.file.get_bucket(fileSkel["dlkey"]) 

108 

109 blob = bucket.get_blob(f"""{fileSkel["dlkey"]}/source/{file_name}""") 

110 if not blob: 

111 logging.warning(f"""Blob {fileSkel["dlkey"]}/source/{file_name} is missing from cloud storage!""") 

112 return 

113 

114 source = io.BytesIO() 

115 blob.download_to_file(source) 

116 

117 result = [] 

118 

119 for info in params: 

120 # Read the image into PIL 

121 try: 

122 source.seek(0) 

123 img = PIL.Image.open(source) 

124 except PIL.Image.UnidentifiedImageError: # Can't load this image; so there's no need to try other resolutions 

125 break 

126 

127 if icc_profile := img.info.get("icc_profile"): 

128 # JPEGs might be encoded with a non-standard color-profile; we need to compensate for this if we convert 

129 # to WEBp as we'll loose this color-profile information 

130 f = io.BytesIO(icc_profile) 

131 src_profile = PIL.ImageCms.ImageCmsProfile(f) 

132 dst_profile = PIL.ImageCms.createProfile("sRGB") 

133 try: 

134 img = PIL.ImageCms.profileToProfile( 

135 img, 

136 inputProfile=src_profile, 

137 outputProfile=dst_profile, 

138 outputMode="RGBA" if img.has_transparency_data else "RGB") 

139 except Exception as e: 

140 logging.debug(f"{info=}") 

141 logging.exception(e) 

142 continue 

143 

144 file_extension = info.get("fileExtension", "webp") 

145 mimetype = info.get("mimeType", "image/webp") 

146 

147 if "width" in info and "height" in info: 

148 width = info["width"] 

149 height = info["height"] 

150 target_filename = f"thumbnail-{width}-{height}.{file_extension}" 

151 

152 elif "width" in info: 

153 width = info["width"] 

154 height = int((float(img.size[1]) * float(width / float(img.size[0])))) 

155 target_filename = f"thumbnail-w{width}.{file_extension}" 

156 

157 else: # No default fallback - ignore 

158 continue 

159 

160 # Create resized version of the source 

161 target = io.BytesIO() 

162 

163 try: 

164 img = img.resize((width, height), PIL.Image.LANCZOS) 

165 except ValueError as e: 

166 # Usually happens to some files, like TIFF-images. 

167 logging.debug(f"{info=}") 

168 logging.exception(e) 

169 break 

170 

171 img.save(target, file_extension) 

172 

173 # Safe derived target file 

174 target_size = target.tell() 

175 target.seek(0) 

176 target_blob = bucket.blob(f"""{fileSkel["dlkey"]}/derived/{target_filename}""") 

177 target_blob.upload_from_file(target, content_type=mimetype) 

178 

179 result.append( 

180 (target_filename, target_size, mimetype, {"mimetype": mimetype, "width": width, "height": height}) 

181 ) 

182 

183 return result 

184 

185 

186def cloudfunction_thumbnailer(fileSkel, existingFiles, params): 

187 """External Thumbnailer for images. 

188 

189 The corresponding cloudfunction can be found here . 

190 https://github.com/viur-framework/viur-cloudfunctions/tree/main/thumbnailer 

191 

192 You can use it like so: 

193 main.py: 

194 

195 .. code-block:: python 

196 

197 from viur.core.modules.file import cloudfunction_thumbnailer 

198 

199 conf.file_thumbnailer_url = "https://xxxxx.cloudfunctions.net/imagerenderer" 

200 conf.file_derivations = {"thumbnail": cloudfunction_thumbnailer} 

201 

202 conf.derives_pdf = { 

203 "thumbnail": [{"width": 1920,"sites":"1,2"}] 

204 } 

205 

206 skeletons/xxx.py: 

207 .. code-block:: python 

208 

209 test = FileBone(derive=conf.derives_pdf) 

210 """ 

211 

212 if not conf.file_thumbnailer_url: 

213 raise ValueError("conf.file_thumbnailer_url is not set") 

214 

215 bucket = conf.main_app.file.get_bucket(fileSkel["dlkey"]) 

216 

217 def getsignedurl(): 

218 if conf.instance.is_dev_server: 

219 signedUrl = conf.main_app.file.create_download_url(fileSkel["dlkey"], fileSkel["name"]) 

220 else: 

221 path = f"""{fileSkel["dlkey"]}/source/{file_name}""" 

222 if not (blob := bucket.get_blob(path)): 

223 logging.warning(f"Blob {path} is missing from cloud storage!") 

224 return None 

225 authRequest = google.auth.transport.requests.Request() 

226 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60) 

227 signing_credentials = google.auth.compute_engine.IDTokenCredentials(authRequest, "") 

228 content_disposition = utils.build_content_disposition_header(fileSkel["name"]) 

229 signedUrl = blob.generate_signed_url( 

230 expiresAt, 

231 credentials=signing_credentials, 

232 response_disposition=content_disposition, 

233 version="v4") 

234 return signedUrl 

235 

236 def make_request(): 

237 headers = {"Content-Type": "application/json"} 

238 data_str = base64.b64encode(json.dumps(dataDict).encode("UTF-8")) 

239 sig = conf.main_app.file.hmac_sign(data_str) 

240 datadump = json.dumps({"dataStr": data_str.decode('ASCII'), "sign": sig}) 

241 resp = requests.post(conf.file_thumbnailer_url, data=datadump, headers=headers, allow_redirects=False) 

242 if resp.status_code != 200: # Error Handling 

243 match resp.status_code: 

244 case 302: 

245 # The problem is Google resposen 302 to an auth Site when the cloudfunction was not found 

246 # https://cloud.google.com/functions/docs/troubleshooting#login 

247 logging.error("Cloudfunction not found") 

248 case 404: 

249 logging.error("Cloudfunction not found") 

250 case 403: 

251 logging.error("No permission for the Cloudfunction") 

252 case _: 

253 logging.error( 

254 f"cloudfunction_thumbnailer failed with code: {resp.status_code} and data: {resp.content}") 

255 return 

256 

257 try: 

258 response_data = resp.json() 

259 except Exception as e: 

260 logging.error(f"response could not be converted in json failed with: {e=}") 

261 return 

262 if "error" in response_data: 

263 logging.error(f"cloudfunction_thumbnailer failed with: {response_data.get('error')}") 

264 return 

265 

266 return response_data 

267 

268 file_name = html.unescape(fileSkel["name"]) 

269 

270 if not (url := getsignedurl()): 

271 return 

272 dataDict = { 

273 "url": url, 

274 "name": fileSkel["name"], 

275 "params": params, 

276 "minetype": fileSkel["mimetype"], 

277 "baseUrl": current.request.get().request.host_url.lower(), 

278 "targetKey": fileSkel["dlkey"], 

279 "nameOnly": True 

280 } 

281 if not (derivedData := make_request()): 

282 return 

283 

284 uploadUrls = {} 

285 for data in derivedData["values"]: 

286 fileName = conf.main_app.file.sanitize_filename(data["name"]) 

287 blob = bucket.blob(f"""{fileSkel["dlkey"]}/derived/{fileName}""") 

288 uploadUrls[fileSkel["dlkey"] + fileName] = blob.create_resumable_upload_session(timeout=60, 

289 content_type=data["mimeType"]) 

290 

291 if not (url := getsignedurl()): 

292 return 

293 

294 dataDict["url"] = url 

295 dataDict["nameOnly"] = False 

296 dataDict["uploadUrls"] = uploadUrls 

297 

298 if not (derivedData := make_request()): 

299 return 

300 reslist = [] 

301 try: 

302 for derived in derivedData["values"]: 

303 for key, value in derived.items(): 

304 reslist.append((key, value["size"], value["mimetype"], value["customData"])) 

305 

306 except Exception as e: 

307 logging.error(f"cloudfunction_thumbnailer failed with: {e=}") 

308 return reslist 

309 

310 

311class DownloadUrlBone(BaseBone): 

312 """ 

313 This bone is used to inject a freshly signed download url into a FileSkel. 

314 """ 

315 

316 def unserialize(self, skel, name): 

317 if "dlkey" in skel.dbEntity and "name" in skel.dbEntity: 

318 skel.accessedValues[name] = conf.main_app.file.create_download_url( 

319 skel["dlkey"], skel["name"], expires=conf.render_json_download_url_expiration 

320 ) 

321 return True 

322 

323 return False 

324 

325 

326class FileLeafSkel(TreeSkel): 

327 """ 

328 Default file leaf skeleton. 

329 """ 

330 kindName = "file" 

331 

332 name = StringBone( 

333 descr="Filename", 

334 caseSensitive=False, 

335 searchable=True, 

336 vfunc=lambda val: None if File.is_valid_filename(val) else "Invalid filename provided", 

337 ) 

338 

339 alt = StringBone( 

340 descr=i18n.translate( 

341 "viur.core.image.alt", 

342 defaultText="Alternative description", 

343 ), 

344 searchable=True, 

345 languages=conf.i18n.available_languages, 

346 ) 

347 

348 size = NumericBone( 

349 descr="Filesize in Bytes", 

350 readOnly=True, 

351 searchable=True, 

352 ) 

353 

354 dlkey = StringBone( 

355 descr="Download-Key", 

356 readOnly=True, 

357 ) 

358 

359 mimetype = StringBone( 

360 descr="MIME-Type", 

361 readOnly=True, 

362 ) 

363 

364 weak = BooleanBone( 

365 descr="Weak reference", 

366 readOnly=True, 

367 visible=False, 

368 ) 

369 

370 pending = BooleanBone( 

371 descr="Pending upload", 

372 readOnly=True, 

373 visible=False, 

374 defaultValue=False, 

375 ) 

376 

377 width = NumericBone( 

378 descr="Width", 

379 readOnly=True, 

380 searchable=True, 

381 ) 

382 

383 height = NumericBone( 

384 descr="Height", 

385 readOnly=True, 

386 searchable=True, 

387 ) 

388 

389 downloadUrl = DownloadUrlBone( 

390 descr="Download-URL", 

391 readOnly=True, 

392 visible=False, 

393 ) 

394 

395 derived = JsonBone( 

396 descr="Derived Files", 

397 readOnly=True, 

398 visible=False, 

399 ) 

400 

401 pendingparententry = KeyBone( 

402 descr="Pending key Reference", 

403 readOnly=True, 

404 visible=False, 

405 ) 

406 

407 crc32c_checksum = StringBone( 

408 descr="CRC32C checksum", 

409 readOnly=True, 

410 ) 

411 

412 md5_checksum = StringBone( 

413 descr="MD5 checksum", 

414 readOnly=True, 

415 ) 

416 

417 public = BooleanBone( 

418 descr="Public File", 

419 readOnly=True, 

420 defaultValue=False, 

421 ) 

422 

423 serving_url = StringBone( 

424 descr="Serving-URL", 

425 readOnly=True, 

426 params={ 

427 "tooltip": "The 'serving_url' is only available in public file repositories.", 

428 } 

429 ) 

430 

431 @classmethod 

432 def _inject_serving_url(cls, skel: SkeletonInstance) -> None: 

433 """Inject the serving url for public image files into a FileSkel""" 

434 if ( 

435 skel["public"] 

436 and skel["mimetype"] 

437 and skel["mimetype"].startswith("image/") 

438 and not skel["serving_url"] 

439 ): 

440 bucket = File.get_bucket(skel["dlkey"]) 

441 filename = f"/gs/{bucket.name}/{skel['dlkey']}/source/{utils.string.unescape(skel['name'])}" 

442 

443 # Trying this on local development server will raise a 

444 # `google.appengine.runtime.apiproxy_errors.RPCFailedError` 

445 if conf.instance.is_dev_server: 

446 logging.warning(f"Can't inject serving_url for {filename!r} on local development server") 

447 return 

448 

449 try: 

450 skel["serving_url"] = images.get_serving_url(None, secure_url=True, filename=filename) 

451 

452 except Exception as e: 

453 logging.warning(f"Failed to create serving_url for {filename!r} with exception {e!r}") 

454 logging.exception(e) 

455 

456 def preProcessBlobLocks(self, locks): 

457 """ 

458 Ensure that our dlkey is locked even if we don't have a filebone here 

459 """ 

460 if not self["weak"] and self["dlkey"]: 

461 locks.add(self["dlkey"]) 

462 return locks 

463 

464 @classmethod 

465 def refresh(cls, skel): 

466 super().refresh(skel) 

467 if conf.viur2import_blobsource: 

468 importData = importBlobFromViur2(skel["dlkey"], skel["name"]) 

469 if importData: 

470 if not skel["downloadUrl"]: 

471 skel["downloadUrl"] = importData 

472 skel["pendingparententry"] = None 

473 

474 cls._inject_serving_url(skel) 

475 

476 @classmethod 

477 def write(cls, skel, **kwargs): 

478 cls._inject_serving_url(skel) 

479 return super().write(skel, **kwargs) 

480 

481 

482class FileNodeSkel(TreeSkel): 

483 """ 

484 Default file node skeleton. 

485 """ 

486 kindName = "file_rootNode" # FIXME: VIUR4, don't use "_rootNode" kindname 

487 

488 name = StringBone( 

489 descr="Name", 

490 required=True, 

491 searchable=True 

492 ) 

493 

494 rootNode = BooleanBone( 

495 descr="Is RootNode", 

496 defaultValue=False, 

497 readOnly=True, 

498 visible=False, 

499 ) 

500 

501 public = BooleanBone( 

502 descr="Is public?", 

503 defaultValue=False, 

504 readOnly=True, 

505 visible=False, 

506 ) 

507 

508 viurCurrentSeoKeys = None 

509 

510 

511class File(Tree): 

512 PENDING_POSTFIX = " (pending)" 

513 DOWNLOAD_URL_PREFIX = "/file/download/" 

514 INTERNAL_SERVING_URL_PREFIX = "/file/serve/" 

515 MAX_FILENAME_LEN = 256 

516 IMAGE_META_MAX_SIZE: t.Final[int] = 10 * 1024 ** 2 

517 """Maximum size of image files that should be analysed in :meth:`set_image_meta`. 

518 Default: 10 MiB""" 

519 

520 leafSkelCls = FileLeafSkel 

521 nodeSkelCls = FileNodeSkel 

522 

523 handler = "tree.simple.file" 

524 adminInfo = { 

525 "icon": "folder-fill", 

526 "handler": handler, # fixme: Use static handler; Remove with VIUR4! 

527 } 

528 

529 roles = { 

530 "*": "view", 

531 "editor": ("add", "edit"), 

532 "admin": "*", 

533 } 

534 

535 default_order = "name" 

536 

537 # Helper functions currently resist here 

538 

539 @staticmethod 

540 def get_bucket(dlkey: str) -> google.cloud.storage.bucket.Bucket: 

541 """ 

542 Retrieves a Google Cloud Storage bucket for the given dlkey. 

543 """ 

544 global _public_bucket 

545 if dlkey and dlkey.endswith(PUBLIC_DLKEY_SUFFIX): 

546 if _public_bucket or (_public_bucket := GOOGLE_STORAGE_CLIENT.lookup_bucket(PUBLIC_BUCKET_NAME)): 

547 return _public_bucket 

548 

549 raise ValueError( 

550 f"""The bucket '{PUBLIC_BUCKET_NAME}' does not exist! Please create it with ACL access.""" 

551 ) 

552 

553 return _private_bucket 

554 

555 @classmethod 

556 def is_valid_filename(cls, filename: str) -> bool: 

557 """ 

558 Verifies a valid filename. 

559 

560 The filename should be valid on Linux, Mac OS and Windows. 

561 It should not be longer than MAX_FILENAME_LEN chars. 

562 

563 Rule set: https://stackoverflow.com/a/31976060/3749896 

564 Regex test: https://regex101.com/r/iBYpoC/1 

565 """ 

566 if not filename.strip(): 

567 return False 

568 

569 if len(filename) > cls.MAX_FILENAME_LEN: 

570 return False 

571 

572 return bool(re.match(VALID_FILENAME_REGEX, filename)) 

573 

574 @staticmethod 

575 def hmac_sign(data: t.Any) -> str: 

576 assert conf.file_hmac_key is not None, "No hmac-key set!" 

577 if not isinstance(data, bytes): 577 ↛ 578line 577 didn't jump to line 578 because the condition on line 577 was never true

578 data = str(data).encode("UTF-8") 

579 return hmac.new(conf.file_hmac_key, msg=data, digestmod=hashlib.sha3_384).hexdigest() 

580 

581 @classmethod 

582 def hmac_verify(cls, data: t.Any, signature: str) -> bool: 

583 try: 

584 return hmac.compare_digest(cls.hmac_sign(data.encode("ASCII")), signature) 

585 except (TypeError, UnicodeEncodeError): 

586 return False 

587 

588 @classmethod 

589 def create_internal_serving_url( 

590 cls, 

591 serving_url: str, 

592 size: int = 0, 

593 filename: str = "", 

594 options: str = "", 

595 download: bool = False 

596 ) -> str: 

597 """ 

598 Helper function to generate an internal serving url (endpoint: /file/serve) from a Google serving url. 

599 

600 This is needed to hide requests to Google as they are internally be routed, and can be the result of a 

601 legal requirement like GDPR. 

602 

603 :param serving_url: Is the original serving URL as generated from FileLeafSkel._inject_serving_url() 

604 :param size: Optional size setting 

605 :param filename: Optonal filename setting 

606 :param options: Additional options parameter-pass through to /file/serve 

607 :param download: Download parameter-pass through to /file/serve 

608 """ 

609 

610 # Split a serving URL into its components, used by serve function. 

611 res = re.match( 

612 r"^https:\/\/(.*?)\.googleusercontent\.com\/(.*?)$", 

613 serving_url 

614 ) 

615 

616 if not res: 

617 raise ValueError(f"Invalid {serving_url=!r} provided") 

618 

619 # Create internal serving URL 

620 serving_url = cls.INTERNAL_SERVING_URL_PREFIX + "/".join(res.groups()) 

621 

622 # Append additional parameters 

623 if params := { 

624 k: v for k, v in { 

625 "download": download, 

626 "filename": filename, 

627 "options": options, 

628 "size": size, 

629 }.items() if v 

630 }: 

631 serving_url += f"?{urlencode(params)}" 

632 

633 return serving_url 

634 

635 @classmethod 

636 def create_download_url( 

637 cls, 

638 dlkey: str, 

639 filename: str, 

640 derived: bool = False, 

641 expires: t.Optional[datetime.timedelta | int] = datetime.timedelta(hours=1), 

642 download_filename: t.Optional[str] = None 

643 ) -> str: 

644 """ 

645 Utility function that creates a signed download-url for the given folder/filename combination 

646 

647 :param folder: The GCS-Folder (= the download-key) for that file 

648 :param filename: The name of the file. Either the original filename or the name of a derived file. 

649 :param derived: True, if it points to a derived file, False if it points to the original uploaded file 

650 :param expires: 

651 None if the file is supposed to be public (which causes it to be cached on the google ede caches), 

652 otherwise a datetime.timedelta of how long that link should be valid 

653 :param download_filename: If set, browser is enforced to download this blob with the given alternate 

654 filename 

655 :return: The signed download-url relative to the current domain (eg /download/...) 

656 """ 

657 if isinstance(expires, int): 657 ↛ 658line 657 didn't jump to line 658 because the condition on line 657 was never true

658 expires = datetime.timedelta(minutes=expires) 

659 

660 filename = html.unescape(filename) 

661 filepath = f"""{dlkey}/{"derived" if derived else "source"}/{filename}""" 

662 

663 if download_filename: 663 ↛ 664line 663 didn't jump to line 664 because the condition on line 663 was never true

664 if not cls.is_valid_filename(download_filename): 

665 raise errors.UnprocessableEntity(f"Invalid download_filename {download_filename!r} provided") 

666 

667 download_filename = urlquote(download_filename) 

668 

669 expires = (datetime.datetime.now() + expires).strftime("%Y%m%d%H%M") if expires else 0 

670 

671 data = base64.urlsafe_b64encode(f"""{filepath}\0{expires}\0{download_filename or ""}""".encode("UTF-8")) 

672 sig = cls.hmac_sign(data) 

673 

674 return f"""{cls.DOWNLOAD_URL_PREFIX}{data.decode("ASCII")}?sig={sig}""" 

675 

676 @classmethod 

677 def parse_download_url(cls, url) -> t.Optional[FilePath]: 

678 """ 

679 Parses a file download URL in the format `/file/download/xxxx?sig=yyyy` into its FilePath. 

680 

681 If the URL cannot be parsed, the function returns None. 

682 

683 :param url: The file download URL to be parsed. 

684 :return: A FilePath on success, None otherwise. 

685 """ 

686 if not url.startswith(cls.DOWNLOAD_URL_PREFIX) or "?" not in url: 686 ↛ 687line 686 didn't jump to line 687 because the condition on line 686 was never true

687 return None 

688 

689 data, sig = url.removeprefix(cls.DOWNLOAD_URL_PREFIX).split("?", 1) # Strip "/file/download/" and split on "?" 

690 sig = sig.removeprefix("sig=") 

691 

692 if not cls.hmac_verify(data, sig): 692 ↛ 694line 692 didn't jump to line 694 because the condition on line 692 was never true

693 # Invalid signature 

694 return None 

695 

696 # Split the blobKey into the individual fields it should contain 

697 data = base64.urlsafe_b64decode(data).decode("UTF-8") 

698 

699 match data.count("\0"): 

700 case 2: 700 ↛ 702line 700 didn't jump to line 702 because the pattern on line 700 always matched

701 dlpath, valid_until, _ = data.split("\0") 

702 case 1: 

703 # It's the old format, without an downloadFileName 

704 dlpath, valid_until = data.split("\0") 

705 case _: 

706 # Invalid path 

707 return None 

708 

709 if valid_until != "0" and datetime.strptime(valid_until, "%Y%m%d%H%M") < datetime.now(): 709 ↛ 711line 709 didn't jump to line 711 because the condition on line 709 was never true

710 # Signature expired 

711 return None 

712 

713 if dlpath.count("/") != 2: 713 ↛ 715line 713 didn't jump to line 715 because the condition on line 713 was never true

714 # Invalid path 

715 return None 

716 

717 dlkey, derived, filename = dlpath.split("/") 

718 return FilePath(dlkey, derived != "source", filename) 

719 

720 @classmethod 

721 def create_src_set( 

722 cls, 

723 file: t.Union["SkeletonInstance", dict, str], 

724 expires: t.Optional[datetime.timedelta | int] = datetime.timedelta(hours=1), 

725 width: t.Optional[int] = None, 

726 height: t.Optional[int] = None, 

727 language: t.Optional[str] = None, 

728 ) -> str: 

729 """ 

730 Generates a string suitable for use as the srcset tag in html. This functionality provides the browser 

731 with a list of images in different sizes and allows it to choose the smallest file that will fill it's 

732 viewport without upscaling. 

733 

734 :param file: The file skeleton (or if multiple=True a single value from it) to generate the srcset. 

735 :param expires: 

736 None if the file is supposed to be public (which causes it to be cached on the google edecaches), 

737 otherwise it's lifetime in seconds 

738 :param width: 

739 A list of widths that should be included in the srcset. 

740 If a given width is not available, it will be skipped. 

741 :param height: A list of heights that should be included in the srcset. If a given height is not available, 

742 it will be skipped. 

743 :param language: Language overwrite if file has multiple languages, and we want to explicitly specify one 

744 :return: The srctag generated or an empty string if a invalid file object was supplied 

745 """ 

746 if not width and not height: 

747 logging.error("Neither width or height supplied") 

748 return "" 

749 

750 if isinstance(file, str): 

751 file = db.Query("file").filter("dlkey =", file).order(("creationdate", db.SortOrder.Ascending)).getEntry() 

752 

753 if not file: 

754 return "" 

755 

756 if isinstance(file, i18n.LanguageWrapper): 

757 language = language or current.language.get() 

758 if not language or not (file := cls.get(language)): 

759 return "" 

760 

761 if "dlkey" not in file and "dest" in file: 

762 file = file["dest"] 

763 

764 from viur.core.skeleton import SkeletonInstance # avoid circular imports 

765 

766 if not ( 

767 isinstance(file, (SkeletonInstance, dict)) 

768 and "dlkey" in file 

769 and "derived" in file 

770 ): 

771 logging.error("Invalid file supplied") 

772 return "" 

773 

774 if not isinstance(file["derived"], dict): 

775 logging.error("No derives available") 

776 return "" 

777 

778 src_set = [] 

779 for filename, derivate in file["derived"]["files"].items(): 

780 customData = derivate.get("customData", {}) 

781 

782 if width and customData.get("width") in width: 

783 src_set.append( 

784 f"""{cls.create_download_url(file["dlkey"], filename, True, expires)} {customData["width"]}w""" 

785 ) 

786 

787 if height and customData.get("height") in height: 

788 src_set.append( 

789 f"""{cls.create_download_url(file["dlkey"], filename, True, expires)} {customData["height"]}h""" 

790 ) 

791 

792 return ", ".join(src_set) 

793 

794 def write( 

795 self, 

796 filename: str, 

797 content: t.Any, 

798 mimetype: str = "text/plain", 

799 *, 

800 width: int = None, 

801 height: int = None, 

802 public: bool = False, 

803 rootnode: t.Optional[db.Key] = None, 

804 folder: t.Iterable[str] | str = (), 

805 ) -> db.Key: 

806 """ 

807 Write a file from any bytes-like object into the file module. 

808 

809 If *folder* and *rootnode* are both set, the file is added to the repository in that folder. 

810 If only *folder* is set, the file is added to the default repository in that folder. 

811 If only *rootnode* is set, the file is added to that repository in the root folder. 

812 

813 If both are not set, the file is added without a path or repository as a weak file. 

814 It will not be visible in admin in this case. 

815 

816 :param filename: Filename to be written. 

817 :param content: The file content to be written, as bytes-like object. 

818 :param mimetype: The file's mimetype. 

819 :param width: Optional width information for the file. 

820 :param height: Optional height information for the file. 

821 :param public: True if the file should be publicly accessible. 

822 :param rootnode: Optional root-node of the repository to add the file to 

823 :param folder: Optional folder the file should be written into. 

824 

825 :return: Returns the key of the file object written. This can be associated e.g. with a FileBone. 

826 """ 

827 # logging.info(f"{filename=} {mimetype=} {width=} {height=} {public=}") 

828 if not self.is_valid_filename(filename): 

829 raise ValueError(f"{filename=} is invalid") 

830 

831 # Folder mode? 

832 if folder: 

833 # Validate correct folder naming 

834 if isinstance(folder, str): 

835 folder = folder, # make it a tuple 

836 

837 for foldername in folder: 

838 if not self.is_valid_filename(foldername): 

839 raise ValueError(f"{foldername=} is invalid") 

840 

841 # When in folder-mode, a rootnode must exist! 

842 if rootnode is None: 

843 rootnode = self.ensureOwnModuleRootNode() 

844 

845 parentrepokey = rootnode.key 

846 parentfolderkey = rootnode.key 

847 

848 for foldername in folder: 

849 query = self.addSkel("node").all() 

850 query.filter("parentrepo", parentrepokey) 

851 query.filter("parententry", parentfolderkey) 

852 query.filter("name", foldername) 

853 

854 if folder_skel := query.getSkel(): 

855 # Skip existing folder 

856 parentfolderkey = folder_skel["key"] 

857 else: 

858 # Create new folder 

859 folder_skel = self.addSkel("node") 

860 

861 folder_skel["name"] = foldername 

862 folder_skel["parentrepo"] = parentrepokey 

863 folder_skel["parententry"] = parentfolderkey 

864 folder_skel.write() 

865 

866 parentfolderkey = folder_skel["key"] 

867 

868 else: 

869 parentrepokey = None 

870 parentfolderkey = None 

871 

872 # Write the file 

873 dl_key = utils.string.random() 

874 

875 if public: 

876 dl_key += PUBLIC_DLKEY_SUFFIX # mark file as public 

877 

878 bucket = self.get_bucket(dl_key) 

879 

880 blob = bucket.blob(f"{dl_key}/source/{filename}") 

881 blob.upload_from_file(io.BytesIO(content), content_type=mimetype) 

882 

883 fileskel = self.addSkel("leaf") 

884 

885 fileskel["parentrepo"] = parentrepokey 

886 fileskel["parententry"] = parentfolderkey 

887 fileskel["name"] = filename 

888 fileskel["size"] = blob.size 

889 fileskel["mimetype"] = mimetype 

890 fileskel["dlkey"] = dl_key 

891 fileskel["weak"] = bool(parentrepokey) 

892 fileskel["public"] = public 

893 fileskel["width"] = width 

894 fileskel["height"] = height 

895 fileskel["crc32c_checksum"] = base64.b64decode(blob.crc32c).hex() 

896 fileskel["md5_checksum"] = base64.b64decode(blob.md5_hash).hex() 

897 fileskel["pending"] = False 

898 

899 return fileskel.write()["key"] 

900 

901 def read( 

902 self, 

903 key: db.Key | int | str | None = None, 

904 path: str | None = None, 

905 ) -> tuple[io.BytesIO, str]: 

906 """ 

907 Read a file from the Cloud Storage. 

908 

909 If a key and a path are provided, the key is preferred. 

910 This means that the entry in the db is searched first and if this is not found, the path is used. 

911 

912 :param key: Key of the LeafSkel that contains the "dlkey" and the "name". 

913 :param path: The path of the file in the Cloud Storage Bucket. 

914 

915 :return: Returns the file as a io.BytesIO buffer and the content-type 

916 """ 

917 if not key and not path: 

918 raise ValueError("Please provide a key or a path") 

919 

920 if key: 

921 skel = self.viewSkel("leaf") 

922 if not skel.read(db.key_helper(key, skel.kindName)): 

923 if not path: 

924 raise ValueError("This skeleton is not in the database!") 

925 else: 

926 path = f"""{skel["dlkey"]}/source/{skel["name"]}""" 

927 

928 bucket = self.get_bucket(skel["dlkey"]) 

929 else: 

930 bucket = self.get_bucket(path.split("/", 1)[0]) # path's first part is dlkey plus eventual postfix 

931 

932 blob = bucket.blob(path) 

933 return io.BytesIO(blob.download_as_bytes()), blob.content_type 

934 

935 @CallDeferred 

936 def deleteRecursive(self, parentKey): 

937 files = db.Query(self.leafSkelCls().kindName).filter("parentdir =", parentKey).iter() 

938 for fileEntry in files: 

939 self.mark_for_deletion(fileEntry["dlkey"]) 

940 skel = self.leafSkelCls() 

941 

942 if skel.read(str(fileEntry.key())): 

943 skel.delete() 

944 dirs = db.Query(self.nodeSkelCls().kindName).filter("parentdir", parentKey).iter() 

945 for d in dirs: 

946 self.deleteRecursive(d.key) 

947 skel = self.nodeSkelCls() 

948 if skel.read(d.key): 

949 skel.delete() 

950 

951 @exposed 

952 @skey 

953 def getUploadURL( 

954 self, 

955 fileName: str, 

956 mimeType: str, 

957 size: t.Optional[int] = None, 

958 node: t.Optional[str | db.Key] = None, 

959 authData: t.Optional[str] = None, 

960 authSig: t.Optional[str] = None, 

961 public: bool = False, 

962 ): 

963 filename = fileName.strip() # VIUR4 FIXME: just for compatiblity of the parameter names 

964 

965 if not self.is_valid_filename(filename): 

966 raise errors.UnprocessableEntity(f"Invalid filename {filename!r} provided") 

967 

968 # Validate the mimetype from the client seems legit 

969 mimetype = mimeType.strip().lower() 

970 if not ( 

971 mimetype 

972 and mimetype.count("/") == 1 

973 and all(ch in string.ascii_letters + string.digits + "/-.+" for ch in mimetype) 

974 ): 

975 raise errors.UnprocessableEntity(f"Invalid mime-type {mimetype!r} provided") 

976 

977 # Validate authentication data 

978 if authData and authSig: 

979 # First, validate the signature, otherwise we don't need to proceed further 

980 if not self.hmac_verify(authData, authSig): 

981 raise errors.Unauthorized() 

982 

983 authData = json.loads(base64.b64decode(authData.encode("ASCII")).decode("UTF-8")) 

984 

985 if datetime.datetime.strptime(authData["validUntil"], "%Y%m%d%H%M") < datetime.datetime.now(): 

986 raise errors.Gone("The upload URL has expired") 

987 

988 if authData["validMimeTypes"]: 

989 for validMimeType in authData["validMimeTypes"]: 

990 if ( 

991 validMimeType == mimetype 

992 or (validMimeType.endswith("*") and mimetype.startswith(validMimeType[:-1])) 

993 ): 

994 break 

995 else: 

996 raise errors.UnprocessableEntity(f"Invalid mime-type {mimetype} provided") 

997 

998 node = authData["node"] 

999 maxSize = authData["maxSize"] 

1000 

1001 else: 

1002 rootNode = None 

1003 if node and not (rootNode := self.getRootNode(node)): 

1004 raise errors.NotFound(f"No valid root node found for {node=}") 

1005 

1006 if not self.canAdd("leaf", rootNode): 

1007 raise errors.Forbidden() 

1008 

1009 if rootNode and public != bool(rootNode.get("public")): 

1010 raise errors.Forbidden("Cannot upload a public file into private repository or vice versa") 

1011 

1012 maxSize = None # The user has some file/add permissions, don't restrict fileSize 

1013 

1014 if maxSize: 

1015 if size > maxSize: 

1016 raise errors.UnprocessableEntity(f"Size {size} exceeds maximum size {maxSize}") 

1017 else: 

1018 size = None 

1019 

1020 # Create upload-URL and download key 

1021 dlkey = utils.string.random() # let's roll a random key 

1022 

1023 if public: 

1024 dlkey += PUBLIC_DLKEY_SUFFIX # mark file as public 

1025 

1026 blob = self.get_bucket(dlkey).blob(f"{dlkey}/source/{filename}") 

1027 upload_url = blob.create_resumable_upload_session(content_type=mimeType, size=size, timeout=60) 

1028 

1029 # Create a corresponding file-lock object early, otherwise we would have to ensure that the file-lock object 

1030 # the user creates matches the file he had uploaded 

1031 file_skel = self.addSkel("leaf") 

1032 

1033 file_skel["name"] = filename + self.PENDING_POSTFIX 

1034 file_skel["size"] = 0 

1035 file_skel["mimetype"] = "application/octetstream" 

1036 file_skel["dlkey"] = dlkey 

1037 file_skel["parentdir"] = None 

1038 file_skel["pendingparententry"] = db.key_helper(node, self.addSkel("node").kindName) if node else None 

1039 file_skel["pending"] = True 

1040 file_skel["weak"] = True 

1041 file_skel["public"] = public 

1042 file_skel["width"] = 0 

1043 file_skel["height"] = 0 

1044 

1045 file_skel.write() 

1046 key = str(file_skel["key"]) 

1047 

1048 # Mark that entry dirty as we might never receive an add 

1049 self.mark_for_deletion(dlkey) 

1050 

1051 # In this case, we'd have to store the key in the users session so he can call add() later on 

1052 if authData and authSig: 

1053 session = current.session.get() 

1054 

1055 if "pendingFileUploadKeys" not in session: 

1056 session["pendingFileUploadKeys"] = [] 

1057 

1058 session["pendingFileUploadKeys"].append(key) 

1059 

1060 # Clamp to the latest 50 pending uploads 

1061 session["pendingFileUploadKeys"] = session["pendingFileUploadKeys"][-50:] 

1062 session.markChanged() 

1063 

1064 return self.render.view({ 

1065 "uploadKey": key, 

1066 "uploadUrl": upload_url, 

1067 }) 

1068 

1069 @exposed 

1070 def download(self, blobKey: str, fileName: str = "", download: bool = False, sig: str = "", *args, **kwargs): 

1071 """ 

1072 Download a file. 

1073 :param blobKey: The unique blob key of the file. 

1074 :param fileName: Optional filename to provide in the header. 

1075 :param download: Set header to attachment retrival, set explictly to "1" if download is wanted. 

1076 """ 

1077 if filename := fileName.strip(): 

1078 if not self.is_valid_filename(filename): 

1079 raise errors.UnprocessableEntity(f"The provided filename {filename!r} is invalid!") 

1080 

1081 try: 

1082 values = base64.urlsafe_b64decode(blobKey).decode("UTF-8").split("\0") 

1083 except ValueError: 

1084 raise errors.BadRequest(f"Invalid encoding of blob key {blobKey!r}!") 

1085 try: 

1086 dlPath, validUntil, *download_filename = values 

1087 # Maybe it's the old format, without a download_filename 

1088 download_filename = download_filename[0] if download_filename else "" 

1089 except ValueError: 

1090 logging.error(f"Encoding of {blobKey=!r} OK. {values=} invalid.") 

1091 raise errors.BadRequest(f"The blob key {blobKey!r} has an invalid amount of encoded values!") 

1092 

1093 bucket = self.get_bucket(dlPath.split("/", 1)[0]) 

1094 

1095 if not sig: 

1096 # Check if the current user has the right to download *any* blob present in this application. 

1097 # blobKey is then the path inside cloudstore - not a base64 encoded tuple 

1098 if not (usr := current.user.get()): 

1099 raise errors.Unauthorized() 

1100 if "root" not in usr["access"] and "file-view" not in usr["access"]: 

1101 raise errors.Forbidden() 

1102 validUntil = "-1" # Prevent this from being cached down below 

1103 blob = bucket.get_blob(blobKey) 

1104 

1105 else: 

1106 # We got an request including a signature (probably a guest or a user without file-view access) 

1107 # First, validate the signature, otherwise we don't need to proceed any further 

1108 if not self.hmac_verify(blobKey, sig): 

1109 raise errors.Forbidden() 

1110 

1111 if validUntil != "0" and datetime.datetime.strptime(validUntil, "%Y%m%d%H%M") < datetime.datetime.now(): 

1112 blob = None 

1113 else: 

1114 blob = bucket.get_blob(dlPath) 

1115 

1116 if not blob: 

1117 raise errors.Gone("The requested blob has expired.") 

1118 

1119 if not filename: 

1120 filename = download_filename or urlquote(blob.name.rsplit("/", 1)[-1]) 

1121 

1122 content_disposition = utils.build_content_disposition_header(filename, attachment=download) 

1123 

1124 if isinstance(_CREDENTIALS, ServiceAccountCredentials): 

1125 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60) 

1126 signedUrl = blob.generate_signed_url(expiresAt, response_disposition=content_disposition, version="v4") 

1127 raise errors.Redirect(signedUrl) 

1128 

1129 elif conf.instance.is_dev_server: # No Service-Account to sign with - Serve everything directly 

1130 response = current.request.get().response 

1131 response.headers["Content-Type"] = blob.content_type 

1132 if content_disposition: 

1133 response.headers["Content-Disposition"] = content_disposition 

1134 return blob.download_as_bytes() 

1135 

1136 if validUntil == "0" or blobKey.endswith(PUBLIC_DLKEY_SUFFIX): # Its an indefinitely valid URL 

1137 if blob.size < 5 * 1024 * 1024: # Less than 5 MB - Serve directly and push it into the ede caches 

1138 response = current.request.get().response 

1139 response.headers["Content-Type"] = blob.content_type 

1140 response.headers["Cache-Control"] = "public, max-age=604800" # 7 Days 

1141 if content_disposition: 

1142 response.headers["Content-Disposition"] = content_disposition 

1143 return blob.download_as_bytes() 

1144 

1145 # Default fallback - create a signed URL and redirect 

1146 authRequest = google.auth.transport.requests.Request() 

1147 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60) 

1148 signing_credentials = google.auth.compute_engine.IDTokenCredentials(authRequest, "") 

1149 signedUrl = blob.generate_signed_url( 

1150 expiresAt, 

1151 credentials=signing_credentials, 

1152 response_disposition=content_disposition, 

1153 version="v4") 

1154 

1155 raise errors.Redirect(signedUrl) 

1156 

1157 SERVE_VALID_OPTIONS = { 

1158 "c", 

1159 "p", 

1160 "fv", 

1161 "fh", 

1162 "r90", 

1163 "r180", 

1164 "r270", 

1165 "nu", 

1166 } 

1167 """ 

1168 Valid modification option shorts for the serve-function. 

1169 This is passed-through to the Google UserContent API, and hast to be supported there. 

1170 """ 

1171 

1172 SERVE_VALID_FORMATS = { 

1173 "jpg": "rj", 

1174 "jpeg": "rj", 

1175 "png": "rp", 

1176 "webp": "rw", 

1177 } 

1178 """ 

1179 Valid file-formats to the serve-function. 

1180 This is passed-through to the Google UserContent API, and hast to be supported there. 

1181 """ 

1182 

1183 @exposed 

1184 def serve( 

1185 self, 

1186 host: str, 

1187 key: str, 

1188 size: t.Optional[int] = None, 

1189 filename: t.Optional[str] = None, 

1190 options: str = "", 

1191 download: bool = False, 

1192 ): 

1193 """ 

1194 Requests an image using the serving url to bypass direct Google requests. 

1195 

1196 :param host: the google host prefix i.e. lh3 

1197 :param key: the serving url key 

1198 :param size: the target image size 

1199 :param filename: a random string with an extention, valid extentions are (defined in File.SERVE_VALID_FORMATS). 

1200 :param options: - seperated options (defined in File.SERVE_VALID_OPTIONS). 

1201 c - crop 

1202 p - face crop 

1203 fv - vertrical flip 

1204 fh - horizontal flip 

1205 rXXX - rotate 90, 180, 270 

1206 nu - no upscale 

1207 :param download: Serves the content as download (Content-Disposition) or not. 

1208 

1209 :return: Returns the requested content on success, raises a proper HTTP exception otherwise. 

1210 """ 

1211 

1212 if any(c not in conf.search_valid_chars for c in host): 

1213 raise errors.BadRequest("key contains invalid characters") 

1214 

1215 # extract format from filename 

1216 file_fmt = "webp" 

1217 

1218 if filename: 

1219 fmt = filename.rsplit(".", 1)[-1].lower() 

1220 if fmt in self.SERVE_VALID_FORMATS: 

1221 file_fmt = fmt 

1222 else: 

1223 raise errors.UnprocessableEntity(f"Unsupported filetype {fmt}") 

1224 

1225 url = f"https://{host}.googleusercontent.com/{key}" 

1226 

1227 if options and not all(param in self.SERVE_VALID_OPTIONS for param in options.split("-")): 

1228 raise errors.BadRequest("Invalid options provided") 

1229 

1230 options += f"-{self.SERVE_VALID_FORMATS[file_fmt]}" 

1231 

1232 if size: 

1233 options = f"s{size}-" + options 

1234 

1235 url += "=" + options 

1236 

1237 response = current.request.get().response 

1238 response.headers["Content-Type"] = f"image/{file_fmt}" 

1239 response.headers["Cache-Control"] = "public, max-age=604800" # 7 Days 

1240 response.headers["Content-Disposition"] = utils.build_content_disposition_header(filename, attachment=download) 

1241 

1242 answ = requests.get(url, timeout=20) 

1243 if not answ.ok: 

1244 logging.error(f"{answ.status_code} {answ.text}") 

1245 raise errors.BadRequest("Unable to fetch a file with these parameters") 

1246 

1247 return answ.content 

1248 

1249 @exposed 

1250 @force_ssl 

1251 @force_post 

1252 @skey(allow_empty=True) 

1253 def add(self, skelType: SkelType, node: db.Key | int | str | None = None, *args, **kwargs): 

1254 # We can't add files directly (they need to be uploaded 

1255 if skelType == "leaf": # We need to handle leafs separately here 

1256 targetKey = kwargs.get("key") 

1257 skel = self.addSkel("leaf") 

1258 

1259 if not skel.read(targetKey): 

1260 raise errors.NotFound() 

1261 

1262 if not skel["pending"]: 

1263 raise errors.PreconditionFailed() 

1264 

1265 skel["pending"] = False 

1266 skel["parententry"] = skel["pendingparententry"] 

1267 

1268 if skel["parententry"]: 

1269 rootNode = self.getRootNode(skel["parententry"]) 

1270 else: 

1271 rootNode = None 

1272 

1273 if not self.canAdd("leaf", rootNode): 

1274 # Check for a marker in this session (created if using a signed upload URL) 

1275 session = current.session.get() 

1276 if targetKey not in (session.get("pendingFileUploadKeys") or []): 

1277 raise errors.Forbidden() 

1278 session["pendingFileUploadKeys"].remove(targetKey) 

1279 session.markChanged() 

1280 

1281 # Now read the blob from the dlkey folder 

1282 bucket = self.get_bucket(skel["dlkey"]) 

1283 

1284 blobs = list(bucket.list_blobs(prefix=f"""{skel["dlkey"]}/""")) 

1285 if len(blobs) != 1: 

1286 logging.error("Invalid number of blobs in folder") 

1287 logging.error(targetKey) 

1288 raise errors.PreconditionFailed() 

1289 

1290 # only one item is allowed here! 

1291 blob = blobs[0] 

1292 

1293 # update the corresponding file skeleton 

1294 skel["name"] = skel["name"].removesuffix(self.PENDING_POSTFIX) 

1295 skel["mimetype"] = utils.string.escape(blob.content_type) 

1296 skel["size"] = blob.size 

1297 skel["parentrepo"] = rootNode["key"] if rootNode else None 

1298 skel["weak"] = rootNode is None 

1299 skel["crc32c_checksum"] = base64.b64decode(blob.crc32c).hex() 

1300 skel["md5_checksum"] = base64.b64decode(blob.md5_hash).hex() 

1301 self.onAdd("leaf", skel) 

1302 skel.write() 

1303 self.onAdded("leaf", skel) 

1304 

1305 # Add updated download-URL as the auto-generated isn't valid yet 

1306 skel["downloadUrl"] = self.create_download_url(skel["dlkey"], skel["name"]) 

1307 

1308 return self.render.addSuccess(skel) 

1309 

1310 return super().add(skelType, node, *args, **kwargs) 

1311 

1312 @exposed 

1313 def get_download_url( 

1314 self, 

1315 key: t.Optional[db.Key] = None, 

1316 dlkey: t.Optional[str] = None, 

1317 filename: t.Optional[str] = None, 

1318 derived: bool = False, 

1319 ): 

1320 """ 

1321 Request a download url for a given file 

1322 :param key: The key of the file 

1323 :param dlkey: The download key of the file 

1324 :param filename: The filename to be given. If no filename is provided 

1325 downloadUrls for all derived files are returned in case of `derived=True`. 

1326 :param derived: True, if a derived file download URL is being requested. 

1327 """ 

1328 skel = self.viewSkel("leaf") 

1329 if dlkey is not None: 

1330 skel = skel.all().filter("dlkey", dlkey).getSkel() 

1331 elif key is None and dlkey is None: 

1332 raise errors.BadRequest("No key or dlkey provided") 

1333 

1334 if not (skel and skel.read(key)): 

1335 raise errors.NotFound() 

1336 

1337 if not self.canView("leaf", skel): 

1338 raise errors.Unauthorized() 

1339 

1340 dlkey = skel["dlkey"] 

1341 

1342 if derived and filename is None: 

1343 res = {} 

1344 for filename in skel["derived"]["files"]: 

1345 res[filename] = self.create_download_url(dlkey, filename, derived) 

1346 else: 

1347 if derived: 

1348 # Check if Filename exist in the Derives. We sign nothing that not exist. 

1349 if filename not in skel["derived"]["files"]: 

1350 raise errors.NotFound("File not in derives") 

1351 else: 

1352 if filename is None: 

1353 filename = skel["name"] 

1354 elif filename != skel["name"]: 

1355 raise errors.NotFound("Filename not match") 

1356 

1357 res = self.create_download_url(dlkey, filename, derived) 

1358 

1359 return self.render.view(res) 

1360 

1361 def onEdit(self, skelType: SkelType, skel: SkeletonInstance): 

1362 super().onEdit(skelType, skel) 

1363 

1364 if skelType == "leaf": 

1365 old_skel = self.editSkel(skelType) 

1366 old_skel.setEntity(skel.dbEntity) 

1367 

1368 if old_skel["name"] == skel["name"]: # name not changed we can return 

1369 return 

1370 

1371 # Move Blob to new name 

1372 # https://cloud.google.com/storage/docs/copying-renaming-moving-objects 

1373 old_path = f"""{skel["dlkey"]}/source/{html.unescape(old_skel["name"])}""" 

1374 new_path = f"""{skel["dlkey"]}/source/{html.unescape(skel["name"])}""" 

1375 

1376 bucket = self.get_bucket(skel["dlkey"]) 

1377 

1378 if not (old_blob := bucket.get_blob(old_path)): 

1379 raise errors.Gone() 

1380 

1381 bucket.copy_blob(old_blob, bucket, new_path, if_generation_match=0) 

1382 bucket.delete_blob(old_path) 

1383 

1384 def onAdded(self, skelType: SkelType, skel: SkeletonInstance) -> None: 

1385 if skelType == "leaf" and skel["mimetype"].startswith("image/"): 

1386 if skel["size"] > self.IMAGE_META_MAX_SIZE: 

1387 logging.warning(f"File size {skel['size']} exceeds limit {self.IMAGE_META_MAX_SIZE=}") 

1388 return 

1389 self.set_image_meta(skel["key"]) 

1390 

1391 super().onAdded(skelType, skel) 

1392 

1393 @CallDeferred 

1394 def set_image_meta(self, key: db.Key) -> None: 

1395 """Write image metadata (height and width) to FileSkel""" 

1396 skel = self.editSkel("leaf", key) 

1397 if not skel.read(key): 

1398 logging.error(f"File {key} does not exist") 

1399 return 

1400 if skel["width"] and skel["height"]: 

1401 logging.info(f'File {skel["key"]} has already {skel["width"]=} and {skel["height"]=}') 

1402 return 

1403 file_name = html.unescape(skel["name"]) 

1404 blob = self.get_bucket(skel["dlkey"]).get_blob(f"""{skel["dlkey"]}/source/{file_name}""") 

1405 if not blob: 

1406 logging.error(f'Blob {skel["dlkey"]}/source/{file_name} is missing in Cloud Storage!') 

1407 return 

1408 

1409 file_obj = io.BytesIO() 

1410 blob.download_to_file(file_obj) 

1411 file_obj.seek(0) 

1412 try: 

1413 img = Image.open(file_obj) 

1414 except Image.UnidentifiedImageError as e: # Can't load this image 

1415 logging.exception(f'Cannot open {skel["key"]} | {skel["name"]} to set image meta data: {e}') 

1416 return 

1417 

1418 skel.patch( 

1419 values={ 

1420 "width": img.width, 

1421 "height": img.height, 

1422 }, 

1423 ) 

1424 

1425 def mark_for_deletion(self, dlkey: str) -> None: 

1426 """ 

1427 Adds a marker to the datastore that the file specified as *dlkey* can be deleted. 

1428 

1429 Once the mark has been set, the data store is checked four times (default: every 4 hours) 

1430 if the file is in use somewhere. If it is still in use, the mark goes away, otherwise 

1431 the mark and the file are removed from the datastore. These delayed checks are necessary 

1432 due to database inconsistency. 

1433 

1434 :param dlkey: Unique download-key of the file that shall be marked for deletion. 

1435 """ 

1436 fileObj = db.Query("viur-deleted-files").filter("dlkey", dlkey).getEntry() 

1437 

1438 if fileObj: # Its allready marked 

1439 return 

1440 

1441 fileObj = db.Entity(db.Key("viur-deleted-files")) 

1442 fileObj["itercount"] = 0 

1443 fileObj["dlkey"] = str(dlkey) 

1444 

1445 db.put(fileObj) 

1446 

1447 

1448@PeriodicTask(interval=datetime.timedelta(hours=4)) 

1449def startCheckForUnreferencedBlobs(): 

1450 """ 

1451 Start searching for blob locks that have been recently freed 

1452 """ 

1453 doCheckForUnreferencedBlobs() 

1454 

1455 

1456@CallDeferred 

1457def doCheckForUnreferencedBlobs(cursor=None): 

1458 def getOldBlobKeysTxn(dbKey): 

1459 obj = db.get(dbKey) 

1460 res = obj["old_blob_references"] or [] 

1461 if obj["is_stale"]: 

1462 db.delete(dbKey) 

1463 else: 

1464 obj["has_old_blob_references"] = False 

1465 obj["old_blob_references"] = [] 

1466 db.put(obj) 

1467 return res 

1468 

1469 query = db.Query("viur-blob-locks").filter("has_old_blob_references", True).setCursor(cursor) 

1470 for lockObj in query.run(100): 

1471 oldBlobKeys = db.run_in_transaction(getOldBlobKeysTxn, lockObj.key) 

1472 for blobKey in oldBlobKeys: 

1473 if db.Query("viur-blob-locks").filter("active_blob_references =", blobKey).getEntry(): 

1474 # This blob is referenced elsewhere 

1475 logging.info(f"Stale blob is still referenced, {blobKey}") 

1476 continue 

1477 # Add a marker and schedule it for deletion 

1478 fileObj = db.Query("viur-deleted-files").filter("dlkey", blobKey).getEntry() 

1479 if fileObj: # Its already marked 

1480 logging.info(f"Stale blob already marked for deletion, {blobKey}") 

1481 return 

1482 fileObj = db.Entity(db.Key("viur-deleted-files")) 

1483 fileObj["itercount"] = 0 

1484 fileObj["dlkey"] = str(blobKey) 

1485 logging.info(f"Stale blob marked dirty, {blobKey}") 

1486 db.put(fileObj) 

1487 newCursor = query.getCursor() 

1488 if newCursor: 

1489 doCheckForUnreferencedBlobs(newCursor) 

1490 

1491 

1492@PeriodicTask(interval=datetime.timedelta(hours=4)) 

1493def startCleanupDeletedFiles(): 

1494 """ 

1495 Increase deletion counter on each blob currently not referenced and delete 

1496 it if that counter reaches maxIterCount 

1497 """ 

1498 doCleanupDeletedFiles() 

1499 

1500 

1501@CallDeferred 

1502def doCleanupDeletedFiles(cursor=None): 

1503 maxIterCount = 2 # How often a file will be checked for deletion 

1504 query = db.Query("viur-deleted-files") 

1505 if cursor: 

1506 query.setCursor(cursor) 

1507 for file in query.run(100): 

1508 if "dlkey" not in file: 

1509 db.delete(file.key) 

1510 elif db.Query("viur-blob-locks").filter("active_blob_references =", file["dlkey"]).getEntry(): 

1511 logging.info(f"""is referenced, {file["dlkey"]}""") 

1512 db.delete(file.key) 

1513 else: 

1514 if file["itercount"] > maxIterCount: 

1515 logging.info(f"""Finally deleting, {file["dlkey"]}""") 

1516 bucket = conf.main_app.file.get_bucket(file["dlkey"]) 

1517 blobs = bucket.list_blobs(prefix=f"""{file["dlkey"]}/""") 

1518 for blob in blobs: 

1519 blob.delete() 

1520 db.delete(file.key) 

1521 # There should be exactly 1 or 0 of these 

1522 for f in skeletonByKind("file")().all().filter("dlkey =", file["dlkey"]).fetch(99): 

1523 f.delete() 

1524 

1525 if f["serving_url"]: 

1526 bucket = conf.main_app.file.get_bucket(f["dlkey"]) 

1527 blob_key = blobstore.create_gs_key( 

1528 f"/gs/{bucket.name}/{f['dlkey']}/source/{f['name']}" 

1529 ) 

1530 images.delete_serving_url(blob_key) # delete serving url 

1531 else: 

1532 logging.debug(f"""Increasing count, {file["dlkey"]}""") 

1533 file["itercount"] += 1 

1534 db.put(file) 

1535 newCursor = query.getCursor() 

1536 if newCursor: 

1537 doCleanupDeletedFiles(newCursor) 

1538 

1539 

1540@PeriodicTask(interval=datetime.timedelta(hours=4)) 

1541def start_delete_pending_files(): 

1542 """ 

1543 Start deletion of pending FileSkels that are older than 7 days. 

1544 """ 

1545 DeleteEntitiesIter.startIterOnQuery( 

1546 FileLeafSkel().all() 

1547 .filter("pending =", True) 

1548 .filter("creationdate <", utils.utcNow() - datetime.timedelta(days=7)) 

1549 ) 

1550 

1551 

1552# DEPRECATED ATTRIBUTES HANDLING 

1553 

1554def __getattr__(attr: str) -> object: 

1555 if entry := { 1555 ↛ 1559line 1555 didn't jump to line 1559 because the condition on line 1555 was never true

1556 # stuff prior viur-core < 3.7 

1557 "GOOGLE_STORAGE_BUCKET": ("conf.main_app.file.get_bucket()", _private_bucket), 

1558 }.get(attr): 

1559 msg = f"{attr} was replaced by {entry[0]}" 

1560 warnings.warn(msg, DeprecationWarning, stacklevel=2) 

1561 logging.warning(msg, stacklevel=2) 

1562 return entry[1] 

1563 

1564 return super(__import__(__name__).__class__).__getattribute__(attr)