5
5
6
6
from typing import Optional , Tuple , List
7
7
8
+ import base64
8
9
import json
9
10
import msgpack
10
11
import pyarrow .fs
13
14
import deltacat
14
15
15
16
from deltacat .constants import (
17
+ METAFILE_FORMAT ,
16
18
REVISION_DIR_NAME ,
17
19
METAFILE_EXT ,
18
20
TXN_DIR_NAME ,
22
24
from deltacat .storage .model .list_result import ListResult
23
25
from deltacat .storage .model .locator import Locator
24
26
from deltacat .storage .model .types import TransactionOperationType
25
- from deltacat .utils .common import env_string
26
27
from deltacat .utils .filesystem import (
27
28
resolve_path_and_filesystem ,
28
29
list_directory ,
29
30
get_file_info ,
30
31
)
31
32
32
- DELTACAT_METAFILE_FORMAT = env_string ("DELTACAT_METAFILE_FORMAT" , "msgpack" )
33
-
34
33
35
34
class MetafileRevisionInfo (dict ):
36
35
"""
@@ -500,7 +499,7 @@ def read(
500
499
cls ,
501
500
path : str ,
502
501
filesystem : Optional [pyarrow .fs .FileSystem ] = None ,
503
- format : str = DELTACAT_METAFILE_FORMAT ,
502
+ format : Optional [ str ] = METAFILE_FORMAT ,
504
503
) -> Metafile :
505
504
"""
506
505
Read a metadata file and return the deserialized object.
@@ -513,11 +512,19 @@ def read(
513
512
path , filesystem = resolve_path_and_filesystem (path , filesystem )
514
513
with filesystem .open_input_stream (path ) as file :
515
514
binary = file .readall ()
516
- loader = {
517
- "json" : lambda b : json .loads (b .decode ("utf-8" )),
515
+ reader = {
516
+ "json" : lambda b : json .loads (
517
+ b .decode ("utf-8" ),
518
+ object_hook = lambda obj : {
519
+ k : base64 .b64decode (v )
520
+ if isinstance (v , str ) and v .startswith ("b64:" )
521
+ else v
522
+ for k , v in obj .items ()
523
+ },
524
+ ),
518
525
"msgpack" : msgpack .loads ,
519
526
}[format ]
520
- obj = cls (** loader (binary )).from_serializable (path , filesystem )
527
+ obj = cls (** reader (binary )).from_serializable (path , filesystem )
521
528
return obj
522
529
523
530
def write_txn (
@@ -560,7 +567,7 @@ def write(
560
567
self ,
561
568
path : str ,
562
569
filesystem : Optional [pyarrow .fs .FileSystem ] = None ,
563
- format : str = DELTACAT_METAFILE_FORMAT ,
570
+ format : Optional [ str ] = METAFILE_FORMAT ,
564
571
) -> None :
565
572
"""
566
573
Serialize and write this object to a metadata file.
@@ -570,17 +577,29 @@ def write(
570
577
the catalog root path.
571
578
param: format: Format to use for serializing the metadata file.
572
579
"""
580
+ if format not in {"json" , "msgpack" }:
581
+ raise ValueError (
582
+ f"Unsupported format '{ format } '. Must be 'json' or 'msgpack'."
583
+ )
584
+
573
585
if not filesystem :
574
586
path , filesystem = resolve_path_and_filesystem (path , filesystem )
575
587
revision_dir_path = posixpath .dirname (path )
576
588
filesystem .create_dir (revision_dir_path , recursive = True )
589
+
590
+ writer = {
591
+ "json" : lambda data : json .dumps (
592
+ data ,
593
+ indent = 4 ,
594
+ default = lambda b : base64 .b64encode (b ).decode ("utf-8" )
595
+ if isinstance (b , bytes )
596
+ else b ,
597
+ ).encode ("utf-8" ),
598
+ "msgpack" : msgpack .dumps ,
599
+ }[format ]
600
+
577
601
with filesystem .open_output_stream (path ) as file :
578
- file .write (
579
- {
580
- "json" : lambda data : json .dumps (data , indent = 4 ).encode ("utf-8" ),
581
- "msgpack" : msgpack .dumps ,
582
- }[format ](self .to_serializable ())
583
- )
602
+ file .write (writer (self .to_serializable ()))
584
603
585
604
def equivalent_to (self , other : Metafile ) -> bool :
586
605
"""
0 commit comments