@@ -140,37 +140,43 @@ impl EventFormat for Event {
140
140
// also extract the arrow schema, tags and metadata from the incoming json
141
141
fn to_data (
142
142
self ,
143
- static_schema_flag : bool ,
144
- stored_schema : & HashMap < String , Arc < Field > > ,
145
143
time_partition : Option < & String > ,
146
144
time_partition_limit : Option < NonZeroU32 > ,
147
145
custom_partitions : Option < & String > ,
148
146
schema_version : SchemaVersion ,
149
- ) -> anyhow:: Result < ( Self :: Data , Vec < Arc < Field > > , bool ) > {
150
- let flattened = self . flatten_logs (
147
+ ) -> anyhow:: Result < Self :: Data > {
148
+ self . flatten_logs (
151
149
time_partition,
152
150
time_partition_limit,
153
151
custom_partitions,
154
152
schema_version,
155
- ) ?;
153
+ )
154
+ }
156
155
156
+ fn infer_schema (
157
+ data : & Self :: Data ,
158
+ stored_schema : & HashMap < String , Arc < Field > > ,
159
+ time_partition : Option < & String > ,
160
+ static_schema_flag : bool ,
161
+ schema_version : SchemaVersion ,
162
+ ) -> anyhow:: Result < ( super :: EventSchema , bool ) > {
157
163
// collect all the keys from all the json objects in the request body
158
- let fields = collect_keys ( flattened . iter ( ) ) ;
164
+ let fields = collect_keys ( data . iter ( ) ) ;
159
165
160
166
let mut is_first = false ;
161
167
let schema = if let Some ( schema) = derive_arrow_schema ( stored_schema, fields) {
162
168
schema
163
169
} else {
164
170
// TODO:
165
171
let mut infer_schema = infer_json_schema_from_iterator (
166
- flattened . iter ( ) . map ( |obj| Ok ( Value :: Object ( obj. clone ( ) ) ) ) ,
172
+ data . iter ( ) . map ( |obj| Ok ( Value :: Object ( obj. clone ( ) ) ) ) ,
167
173
)
168
174
. map_err ( |err| anyhow ! ( "Could not infer schema for this event due to err {:?}" , err) ) ?;
169
175
let new_infer_schema = super :: update_field_type_in_schema (
170
176
Arc :: new ( infer_schema) ,
171
177
Some ( stored_schema) ,
172
178
time_partition,
173
- Some ( & flattened ) ,
179
+ Some ( data ) ,
174
180
schema_version,
175
181
) ;
176
182
infer_schema = Schema :: new ( new_infer_schema. fields ( ) . clone ( ) ) ;
@@ -194,7 +200,7 @@ impl EventFormat for Event {
194
200
. collect ( )
195
201
} ;
196
202
197
- if flattened
203
+ if data
198
204
. iter ( )
199
205
. any ( |value| fields_mismatch ( & schema, value, schema_version) )
200
206
{
@@ -205,7 +211,7 @@ impl EventFormat for Event {
205
211
206
212
let schema = Self :: prepare_and_validate_schema ( schema, stored_schema, static_schema_flag) ?;
207
213
208
- Ok ( ( flattened , schema, is_first) )
214
+ Ok ( ( schema, is_first) )
209
215
}
210
216
211
217
// Convert the Data type (defined above) to arrow record batch
@@ -231,19 +237,24 @@ impl EventFormat for Event {
231
237
let static_schema_flag = stream. get_static_schema_flag ( ) ;
232
238
let custom_partitions = stream. get_custom_partition ( ) ;
233
239
let schema_version = stream. get_schema_version ( ) ;
234
- let storage_schema = stream. get_schema_raw ( ) ;
240
+ let stored_schema = stream. get_schema_raw ( ) ;
235
241
let stream_type = stream. get_stream_type ( ) ;
236
242
237
243
let p_timestamp = self . p_timestamp ;
238
244
let origin_size = self . origin_size ;
239
- let ( data, schema, is_first_event) = self . to_data (
240
- static_schema_flag,
241
- & storage_schema,
245
+ let data = self . to_data (
242
246
time_partition. as_ref ( ) ,
243
247
time_partition_limit,
244
248
custom_partitions. as_ref ( ) ,
245
249
schema_version,
246
250
) ?;
251
+ let ( schema, is_first_event) = Self :: infer_schema (
252
+ & data,
253
+ & stored_schema,
254
+ time_partition. as_ref ( ) ,
255
+ static_schema_flag,
256
+ schema_version,
257
+ ) ?;
247
258
248
259
let mut partitions = HashMap :: new ( ) ;
249
260
for json in data {
@@ -500,9 +511,11 @@ mod tests {
500
511
} ) ;
501
512
502
513
let store_schema = HashMap :: default ( ) ;
503
- let ( data, schema , _ ) = Event :: new ( json, 0 /* doesn't matter */ , LogSource :: Json )
504
- . to_data ( false , & store_schema , None , None , None , SchemaVersion :: V0 )
514
+ let data = Event :: new ( json, 0 /* doesn't matter */ , LogSource :: Json )
515
+ . to_data ( None , None , None , SchemaVersion :: V0 )
505
516
. unwrap ( ) ;
517
+ let ( schema, _) =
518
+ Event :: infer_schema ( & data, & store_schema, None , false , SchemaVersion :: V0 ) . unwrap ( ) ;
506
519
let rb =
507
520
Event :: into_recordbatch ( Utc :: now ( ) , data, & schema, None , SchemaVersion :: V0 ) . unwrap ( ) ;
508
521
@@ -531,9 +544,11 @@ mod tests {
531
544
} ) ;
532
545
533
546
let store_schema = HashMap :: default ( ) ;
534
- let ( data, schema , _ ) = Event :: new ( json, 0 /* doesn't matter */ , LogSource :: Json )
535
- . to_data ( false , & store_schema , None , None , None , SchemaVersion :: V0 )
547
+ let data = Event :: new ( json, 0 /* doesn't matter */ , LogSource :: Json )
548
+ . to_data ( None , None , None , SchemaVersion :: V0 )
536
549
. unwrap ( ) ;
550
+ let ( schema, _) =
551
+ Event :: infer_schema ( & data, & store_schema, None , false , SchemaVersion :: V0 ) . unwrap ( ) ;
537
552
let rb =
538
553
Event :: into_recordbatch ( Utc :: now ( ) , data, & schema, None , SchemaVersion :: V0 ) . unwrap ( ) ;
539
554
@@ -564,9 +579,11 @@ mod tests {
564
579
]
565
580
. into_iter ( ) ,
566
581
) ;
567
- let ( data, schema , _ ) = Event :: new ( json, 0 /* doesn't matter */ , LogSource :: Json )
568
- . to_data ( false , & store_schema , None , None , None , SchemaVersion :: V0 )
582
+ let data = Event :: new ( json, 0 /* doesn't matter */ , LogSource :: Json )
583
+ . to_data ( None , None , None , SchemaVersion :: V0 )
569
584
. unwrap ( ) ;
585
+ let ( schema, _) =
586
+ Event :: infer_schema ( & data, & store_schema, None , false , SchemaVersion :: V0 ) . unwrap ( ) ;
570
587
let rb =
571
588
Event :: into_recordbatch ( Utc :: now ( ) , data, & schema, None , SchemaVersion :: V0 ) . unwrap ( ) ;
572
589
@@ -598,9 +615,11 @@ mod tests {
598
615
. into_iter ( ) ,
599
616
) ;
600
617
601
- assert ! ( Event :: new( json, 0 /* doesn't matter */ , LogSource :: Json )
602
- . to_data( false , & store_schema, None , None , None , SchemaVersion :: V0 , )
603
- . is_err( ) ) ;
618
+ let data = Event :: new ( json, 0 /* doesn't matter */ , LogSource :: Json )
619
+ . to_data ( None , None , None , SchemaVersion :: V0 )
620
+ . unwrap ( ) ;
621
+
622
+ assert ! ( Event :: infer_schema( & data, & store_schema, None , false , SchemaVersion :: V0 ) . is_err( ) ) ;
604
623
}
605
624
606
625
#[ test]
@@ -616,9 +635,11 @@ mod tests {
616
635
. into_iter ( ) ,
617
636
) ;
618
637
619
- let ( data, schema , _ ) = Event :: new ( json, 0 /* doesn't matter */ , LogSource :: Json )
620
- . to_data ( false , & store_schema , None , None , None , SchemaVersion :: V0 )
638
+ let data = Event :: new ( json, 0 /* doesn't matter */ , LogSource :: Json )
639
+ . to_data ( None , None , None , SchemaVersion :: V0 )
621
640
. unwrap ( ) ;
641
+ let ( schema, _) =
642
+ Event :: infer_schema ( & data, & store_schema, None , false , SchemaVersion :: V0 ) . unwrap ( ) ;
622
643
let rb =
623
644
Event :: into_recordbatch ( Utc :: now ( ) , data, & schema, None , SchemaVersion :: V0 ) . unwrap ( ) ;
624
645
@@ -645,9 +666,11 @@ mod tests {
645
666
] ) ;
646
667
647
668
let store_schema = HashMap :: new ( ) ;
648
- let ( data, schema , _ ) = Event :: new ( json, 0 /* doesn't matter */ , LogSource :: Json )
649
- . to_data ( false , & store_schema , None , None , None , SchemaVersion :: V0 )
669
+ let data = Event :: new ( json, 0 /* doesn't matter */ , LogSource :: Json )
670
+ . to_data ( None , None , None , SchemaVersion :: V0 )
650
671
. unwrap ( ) ;
672
+ let ( schema, _) =
673
+ Event :: infer_schema ( & data, & store_schema, None , false , SchemaVersion :: V0 ) . unwrap ( ) ;
651
674
let rb =
652
675
Event :: into_recordbatch ( Utc :: now ( ) , data, & schema, None , SchemaVersion :: V0 ) . unwrap ( ) ;
653
676
@@ -696,9 +719,11 @@ mod tests {
696
719
] ) ;
697
720
698
721
let store_schema = HashMap :: new ( ) ;
699
- let ( data, schema , _ ) = Event :: new ( json, 0 /* doesn't matter */ , LogSource :: Json )
700
- . to_data ( false , & store_schema , None , None , None , SchemaVersion :: V0 )
722
+ let data = Event :: new ( json, 0 /* doesn't matter */ , LogSource :: Json )
723
+ . to_data ( None , None , None , SchemaVersion :: V0 )
701
724
. unwrap ( ) ;
725
+ let ( schema, _) =
726
+ Event :: infer_schema ( & data, & store_schema, None , false , SchemaVersion :: V0 ) . unwrap ( ) ;
702
727
let rb =
703
728
Event :: into_recordbatch ( Utc :: now ( ) , data, & schema, None , SchemaVersion :: V0 ) . unwrap ( ) ;
704
729
@@ -746,9 +771,11 @@ mod tests {
746
771
]
747
772
. into_iter ( ) ,
748
773
) ;
749
- let ( data, schema , _ ) = Event :: new ( json, 0 /* doesn't matter */ , LogSource :: Json )
750
- . to_data ( false , & store_schema , None , None , None , SchemaVersion :: V0 )
774
+ let data = Event :: new ( json, 0 /* doesn't matter */ , LogSource :: Json )
775
+ . to_data ( None , None , None , SchemaVersion :: V0 )
751
776
. unwrap ( ) ;
777
+ let ( schema, _) =
778
+ Event :: infer_schema ( & data, & store_schema, None , false , SchemaVersion :: V0 ) . unwrap ( ) ;
752
779
let rb =
753
780
Event :: into_recordbatch ( Utc :: now ( ) , data, & schema, None , SchemaVersion :: V0 ) . unwrap ( ) ;
754
781
@@ -797,9 +824,11 @@ mod tests {
797
824
. into_iter ( ) ,
798
825
) ;
799
826
800
- assert ! ( Event :: new( json, 0 /* doesn't matter */ , LogSource :: Json )
801
- . to_data( false , & store_schema, None , None , None , SchemaVersion :: V0 , )
802
- . is_err( ) ) ;
827
+ let data = Event :: new ( json, 0 /* doesn't matter */ , LogSource :: Json )
828
+ . to_data ( None , None , None , SchemaVersion :: V0 )
829
+ . unwrap ( ) ;
830
+
831
+ assert ! ( Event :: infer_schema( & data, & store_schema, None , false , SchemaVersion :: V0 ) . is_err( ) ) ;
803
832
}
804
833
805
834
#[ test]
@@ -827,9 +856,11 @@ mod tests {
827
856
] ) ;
828
857
829
858
let store_schema = HashMap :: new ( ) ;
830
- let ( data, schema , _ ) = Event :: new ( json, 0 /* doesn't matter */ , LogSource :: Json )
831
- . to_data ( false , & store_schema , None , None , None , SchemaVersion :: V0 )
859
+ let data = Event :: new ( json, 0 /* doesn't matter */ , LogSource :: Json )
860
+ . to_data ( None , None , None , SchemaVersion :: V0 )
832
861
. unwrap ( ) ;
862
+ let ( schema, _) =
863
+ Event :: infer_schema ( & data, & store_schema, None , false , SchemaVersion :: V0 ) . unwrap ( ) ;
833
864
let rb =
834
865
Event :: into_recordbatch ( Utc :: now ( ) , data, & schema, None , SchemaVersion :: V0 ) . unwrap ( ) ;
835
866
@@ -903,9 +934,11 @@ mod tests {
903
934
] ) ;
904
935
905
936
let store_schema = HashMap :: new ( ) ;
906
- let ( data, schema , _ ) = Event :: new ( json, 0 /* doesn't matter */ , LogSource :: Json )
907
- . to_data ( false , & store_schema , None , None , None , SchemaVersion :: V1 )
937
+ let data = Event :: new ( json, 0 /* doesn't matter */ , LogSource :: Json )
938
+ . to_data ( None , None , None , SchemaVersion :: V1 )
908
939
. unwrap ( ) ;
940
+ let ( schema, _) =
941
+ Event :: infer_schema ( & data, & store_schema, None , false , SchemaVersion :: V1 ) . unwrap ( ) ;
909
942
let rb =
910
943
Event :: into_recordbatch ( Utc :: now ( ) , data, & schema, None , SchemaVersion :: V1 ) . unwrap ( ) ;
911
944
0 commit comments