@@ -134,7 +134,7 @@ impl Event {
134
134
}
135
135
136
136
impl EventFormat for Event {
137
- type Data = Vec < Json > ;
137
+ type Data = Json ;
138
138
139
139
// convert the incoming json to a vector of json values
140
140
// also extract the arrow schema, tags and metadata from the incoming json
@@ -144,7 +144,7 @@ impl EventFormat for Event {
144
144
time_partition_limit : Option < NonZeroU32 > ,
145
145
custom_partitions : Option < & String > ,
146
146
schema_version : SchemaVersion ,
147
- ) -> anyhow:: Result < Self :: Data > {
147
+ ) -> anyhow:: Result < Vec < Self :: Data > > {
148
148
self . flatten_logs (
149
149
time_partition,
150
150
time_partition_limit,
@@ -161,17 +161,18 @@ impl EventFormat for Event {
161
161
schema_version : SchemaVersion ,
162
162
) -> anyhow:: Result < ( super :: EventSchema , bool ) > {
163
163
// collect all the keys from all the json objects in the request body
164
- let fields = collect_keys ( data. iter ( ) ) ;
164
+ let fields = collect_keys ( data) ;
165
165
166
166
let mut is_first = false ;
167
167
let schema = if let Some ( schema) = derive_arrow_schema ( stored_schema, fields) {
168
168
schema
169
169
} else {
170
170
// TODO:
171
- let mut infer_schema = infer_json_schema_from_iterator (
172
- data. iter ( ) . map ( |obj| Ok ( Value :: Object ( obj. clone ( ) ) ) ) ,
173
- )
174
- . map_err ( |err| anyhow ! ( "Could not infer schema for this event due to err {:?}" , err) ) ?;
171
+ let mut infer_schema =
172
+ infer_json_schema_from_iterator ( [ Ok ( Value :: Object ( data. clone ( ) ) ) ] . into_iter ( ) )
173
+ . map_err ( |err| {
174
+ anyhow ! ( "Could not infer schema for this event due to err {:?}" , err)
175
+ } ) ?;
175
176
let new_infer_schema = super :: update_field_type_in_schema (
176
177
Arc :: new ( infer_schema) ,
177
178
Some ( stored_schema) ,
@@ -200,10 +201,7 @@ impl EventFormat for Event {
200
201
. collect ( )
201
202
} ;
202
203
203
- if data
204
- . iter ( )
205
- . any ( |value| fields_mismatch ( & schema, value, schema_version) )
206
- {
204
+ if fields_mismatch ( & schema, data, schema_version) {
207
205
return Err ( anyhow ! (
208
206
"Could not process this event due to mismatch in datatype"
209
207
) ) ;
@@ -215,14 +213,14 @@ impl EventFormat for Event {
215
213
}
216
214
217
215
// Convert the Data type (defined above) to arrow record batch
218
- fn decode ( data : Self :: Data , schema : Arc < Schema > ) -> anyhow:: Result < RecordBatch > {
216
+ fn decode ( data : & [ Self :: Data ] , schema : Arc < Schema > ) -> anyhow:: Result < RecordBatch > {
219
217
let array_capacity = round_upto_multiple_of_64 ( data. len ( ) ) ;
220
218
let mut reader = ReaderBuilder :: new ( schema)
221
219
. with_batch_size ( array_capacity)
222
220
. with_coerce_primitive ( false )
223
221
. build_decoder ( ) ?;
224
222
225
- reader. serialize ( & data) ?;
223
+ reader. serialize ( data) ?;
226
224
match reader. flush ( ) {
227
225
Ok ( Some ( recordbatch) ) => Ok ( recordbatch) ,
228
226
Err ( err) => Err ( anyhow ! ( "Failed to create recordbatch due to {:?}" , err) ) ,
@@ -248,16 +246,18 @@ impl EventFormat for Event {
248
246
custom_partitions. as_ref ( ) ,
249
247
schema_version,
250
248
) ?;
251
- let ( schema, is_first_event) = Self :: infer_schema (
252
- & data,
253
- & stored_schema,
254
- time_partition. as_ref ( ) ,
255
- static_schema_flag,
256
- schema_version,
257
- ) ?;
258
249
250
+ let mut is_first_event = false ;
259
251
let mut partitions = HashMap :: new ( ) ;
260
252
for json in data {
253
+ let ( schema, is_first) = Self :: infer_schema (
254
+ & json,
255
+ & stored_schema,
256
+ time_partition. as_ref ( ) ,
257
+ static_schema_flag,
258
+ schema_version,
259
+ ) ?;
260
+ is_first_event = is_first_event || is_first;
261
261
let custom_partition_values = match custom_partitions. as_ref ( ) {
262
262
Some ( custom_partitions) => {
263
263
let custom_partitions = custom_partitions. split ( ',' ) . collect_vec ( ) ;
@@ -273,7 +273,7 @@ impl EventFormat for Event {
273
273
274
274
let batch = Self :: into_recordbatch (
275
275
p_timestamp,
276
- vec ! [ json] ,
276
+ & [ json] ,
277
277
& schema,
278
278
time_partition. as_ref ( ) ,
279
279
schema_version,
@@ -368,15 +368,8 @@ fn derive_arrow_schema(
368
368
369
369
// Returns a list of keys that are present in the given iterable of JSON objects
370
370
// Returns None if even one of the value is not an Object
371
- fn collect_keys < ' a > ( objects : impl Iterator < Item = & ' a Json > ) -> HashSet < & ' a str > {
372
- let mut keys = HashSet :: new ( ) ;
373
- for object in objects {
374
- for key in object. keys ( ) {
375
- keys. insert ( key. as_str ( ) ) ;
376
- }
377
- }
378
-
379
- keys
371
+ fn collect_keys ( object : & Json ) -> HashSet < & str > {
372
+ object. keys ( ) . map ( |k| k. as_str ( ) ) . collect ( )
380
373
}
381
374
382
375
// Returns true when the field doesn't exist in schema or has an invalid type
@@ -515,9 +508,9 @@ mod tests {
515
508
. to_data ( None , None , None , SchemaVersion :: V0 )
516
509
. unwrap ( ) ;
517
510
let ( schema, _) =
518
- Event :: infer_schema ( & data, & store_schema, None , false , SchemaVersion :: V0 ) . unwrap ( ) ;
511
+ Event :: infer_schema ( & data[ 0 ] , & store_schema, None , false , SchemaVersion :: V0 ) . unwrap ( ) ;
519
512
let rb =
520
- Event :: into_recordbatch ( Utc :: now ( ) , data, & schema, None , SchemaVersion :: V0 ) . unwrap ( ) ;
513
+ Event :: into_recordbatch ( Utc :: now ( ) , & data, & schema, None , SchemaVersion :: V0 ) . unwrap ( ) ;
521
514
522
515
assert_eq ! ( rb. num_rows( ) , 1 ) ;
523
516
assert_eq ! ( rb. num_columns( ) , 4 ) ;
@@ -548,9 +541,9 @@ mod tests {
548
541
. to_data ( None , None , None , SchemaVersion :: V0 )
549
542
. unwrap ( ) ;
550
543
let ( schema, _) =
551
- Event :: infer_schema ( & data, & store_schema, None , false , SchemaVersion :: V0 ) . unwrap ( ) ;
544
+ Event :: infer_schema ( & data[ 0 ] , & store_schema, None , false , SchemaVersion :: V0 ) . unwrap ( ) ;
552
545
let rb =
553
- Event :: into_recordbatch ( Utc :: now ( ) , data, & schema, None , SchemaVersion :: V0 ) . unwrap ( ) ;
546
+ Event :: into_recordbatch ( Utc :: now ( ) , & data, & schema, None , SchemaVersion :: V0 ) . unwrap ( ) ;
554
547
555
548
assert_eq ! ( rb. num_rows( ) , 1 ) ;
556
549
assert_eq ! ( rb. num_columns( ) , 3 ) ;
@@ -583,9 +576,9 @@ mod tests {
583
576
. to_data ( None , None , None , SchemaVersion :: V0 )
584
577
. unwrap ( ) ;
585
578
let ( schema, _) =
586
- Event :: infer_schema ( & data, & store_schema, None , false , SchemaVersion :: V0 ) . unwrap ( ) ;
579
+ Event :: infer_schema ( & data[ 0 ] , & store_schema, None , false , SchemaVersion :: V0 ) . unwrap ( ) ;
587
580
let rb =
588
- Event :: into_recordbatch ( Utc :: now ( ) , data, & schema, None , SchemaVersion :: V0 ) . unwrap ( ) ;
581
+ Event :: into_recordbatch ( Utc :: now ( ) , & data, & schema, None , SchemaVersion :: V0 ) . unwrap ( ) ;
589
582
590
583
assert_eq ! ( rb. num_rows( ) , 1 ) ;
591
584
assert_eq ! ( rb. num_columns( ) , 3 ) ;
@@ -619,7 +612,9 @@ mod tests {
619
612
. to_data ( None , None , None , SchemaVersion :: V0 )
620
613
. unwrap ( ) ;
621
614
622
- assert ! ( Event :: infer_schema( & data, & store_schema, None , false , SchemaVersion :: V0 ) . is_err( ) ) ;
615
+ assert ! (
616
+ Event :: infer_schema( & data[ 0 ] , & store_schema, None , false , SchemaVersion :: V0 ) . is_err( )
617
+ ) ;
623
618
}
624
619
625
620
#[ test]
@@ -639,9 +634,9 @@ mod tests {
639
634
. to_data ( None , None , None , SchemaVersion :: V0 )
640
635
. unwrap ( ) ;
641
636
let ( schema, _) =
642
- Event :: infer_schema ( & data, & store_schema, None , false , SchemaVersion :: V0 ) . unwrap ( ) ;
637
+ Event :: infer_schema ( & data[ 0 ] , & store_schema, None , false , SchemaVersion :: V0 ) . unwrap ( ) ;
643
638
let rb =
644
- Event :: into_recordbatch ( Utc :: now ( ) , data, & schema, None , SchemaVersion :: V0 ) . unwrap ( ) ;
639
+ Event :: into_recordbatch ( Utc :: now ( ) , & data, & schema, None , SchemaVersion :: V0 ) . unwrap ( ) ;
645
640
646
641
assert_eq ! ( rb. num_rows( ) , 1 ) ;
647
642
assert_eq ! ( rb. num_columns( ) , 1 ) ;
@@ -670,9 +665,9 @@ mod tests {
670
665
. to_data ( None , None , None , SchemaVersion :: V0 )
671
666
. unwrap ( ) ;
672
667
let ( schema, _) =
673
- Event :: infer_schema ( & data, & store_schema, None , false , SchemaVersion :: V0 ) . unwrap ( ) ;
668
+ Event :: infer_schema ( & data[ 1 ] , & store_schema, None , false , SchemaVersion :: V0 ) . unwrap ( ) ;
674
669
let rb =
675
- Event :: into_recordbatch ( Utc :: now ( ) , data, & schema, None , SchemaVersion :: V0 ) . unwrap ( ) ;
670
+ Event :: into_recordbatch ( Utc :: now ( ) , & data, & schema, None , SchemaVersion :: V0 ) . unwrap ( ) ;
676
671
677
672
assert_eq ! ( rb. num_rows( ) , 3 ) ;
678
673
assert_eq ! ( rb. num_columns( ) , 4 ) ;
@@ -723,9 +718,9 @@ mod tests {
723
718
. to_data ( None , None , None , SchemaVersion :: V0 )
724
719
. unwrap ( ) ;
725
720
let ( schema, _) =
726
- Event :: infer_schema ( & data, & store_schema, None , false , SchemaVersion :: V0 ) . unwrap ( ) ;
721
+ Event :: infer_schema ( & data[ 1 ] , & store_schema, None , false , SchemaVersion :: V0 ) . unwrap ( ) ;
727
722
let rb =
728
- Event :: into_recordbatch ( Utc :: now ( ) , data, & schema, None , SchemaVersion :: V0 ) . unwrap ( ) ;
723
+ Event :: into_recordbatch ( Utc :: now ( ) , & data, & schema, None , SchemaVersion :: V0 ) . unwrap ( ) ;
729
724
730
725
assert_eq ! ( rb. num_rows( ) , 3 ) ;
731
726
assert_eq ! ( rb. num_columns( ) , 4 ) ;
@@ -775,9 +770,9 @@ mod tests {
775
770
. to_data ( None , None , None , SchemaVersion :: V0 )
776
771
. unwrap ( ) ;
777
772
let ( schema, _) =
778
- Event :: infer_schema ( & data, & store_schema, None , false , SchemaVersion :: V0 ) . unwrap ( ) ;
773
+ Event :: infer_schema ( & data[ 0 ] , & store_schema, None , false , SchemaVersion :: V0 ) . unwrap ( ) ;
779
774
let rb =
780
- Event :: into_recordbatch ( Utc :: now ( ) , data, & schema, None , SchemaVersion :: V0 ) . unwrap ( ) ;
775
+ Event :: into_recordbatch ( Utc :: now ( ) , & data, & schema, None , SchemaVersion :: V0 ) . unwrap ( ) ;
781
776
782
777
assert_eq ! ( rb. num_rows( ) , 3 ) ;
783
778
assert_eq ! ( rb. num_columns( ) , 4 ) ;
@@ -797,23 +792,12 @@ mod tests {
797
792
798
793
#[ test]
799
794
fn arr_schema_mismatch ( ) {
800
- let json = json ! ( [
801
- {
802
- "a" : null,
803
- "b" : "hello" ,
804
- "c" : 1.24
805
- } ,
806
- {
807
- "a" : 1 ,
808
- "b" : "hello" ,
809
- "c" : 1
810
- } ,
811
- {
812
- "a" : 1 ,
813
- "b" : "hello" ,
814
- "c" : null
815
- } ,
816
- ] ) ;
795
+ let json = json ! (
796
+ {
797
+ "a" : 1 ,
798
+ "b" : "hello" ,
799
+ "c" : 1
800
+ } ) ;
817
801
818
802
let store_schema = fields_to_map (
819
803
[
@@ -824,11 +808,14 @@ mod tests {
824
808
. into_iter ( ) ,
825
809
) ;
826
810
827
- let data = Event :: new ( json, 0 /* doesn't matter */ , LogSource :: Json )
828
- . to_data ( None , None , None , SchemaVersion :: V0 )
829
- . unwrap ( ) ;
830
-
831
- assert ! ( Event :: infer_schema( & data, & store_schema, None , false , SchemaVersion :: V0 ) . is_err( ) ) ;
811
+ assert ! ( Event :: infer_schema(
812
+ json. as_object( ) . unwrap( ) ,
813
+ & store_schema,
814
+ None ,
815
+ false ,
816
+ SchemaVersion :: V0
817
+ )
818
+ . is_err( ) ) ;
832
819
}
833
820
834
821
#[ test]
@@ -860,9 +847,9 @@ mod tests {
860
847
. to_data ( None , None , None , SchemaVersion :: V0 )
861
848
. unwrap ( ) ;
862
849
let ( schema, _) =
863
- Event :: infer_schema ( & data, & store_schema, None , false , SchemaVersion :: V0 ) . unwrap ( ) ;
850
+ Event :: infer_schema ( & data[ 3 ] , & store_schema, None , false , SchemaVersion :: V0 ) . unwrap ( ) ;
864
851
let rb =
865
- Event :: into_recordbatch ( Utc :: now ( ) , data, & schema, None , SchemaVersion :: V0 ) . unwrap ( ) ;
852
+ Event :: into_recordbatch ( Utc :: now ( ) , & data, & schema, None , SchemaVersion :: V0 ) . unwrap ( ) ;
866
853
867
854
assert_eq ! ( rb. num_rows( ) , 4 ) ;
868
855
assert_eq ! ( rb. num_columns( ) , 5 ) ;
@@ -938,9 +925,9 @@ mod tests {
938
925
. to_data ( None , None , None , SchemaVersion :: V1 )
939
926
. unwrap ( ) ;
940
927
let ( schema, _) =
941
- Event :: infer_schema ( & data, & store_schema, None , false , SchemaVersion :: V1 ) . unwrap ( ) ;
928
+ Event :: infer_schema ( & data[ 3 ] , & store_schema, None , false , SchemaVersion :: V1 ) . unwrap ( ) ;
942
929
let rb =
943
- Event :: into_recordbatch ( Utc :: now ( ) , data, & schema, None , SchemaVersion :: V1 ) . unwrap ( ) ;
930
+ Event :: into_recordbatch ( Utc :: now ( ) , & data, & schema, None , SchemaVersion :: V1 ) . unwrap ( ) ;
944
931
945
932
assert_eq ! ( rb. num_rows( ) , 4 ) ;
946
933
assert_eq ! ( rb. num_columns( ) , 5 ) ;
0 commit comments