Skip to content

Commit

Permalink
[fix](agg) Aggregating string types with null values may result in in…
Browse files Browse the repository at this point in the history
…correct result (#42067)

Aggregating string types with null values may result in incorrect result
because using the replace_column_data function can cause incorrect
offsets in the column.

A reproducible case:
```
CREATE TABLE `test_scan_keys_with_bool_type` (
    `col1` tinyint NOT NULL,
    `col2` int NOT NULL,
    `col3` tinyint NOT NULL,
    `col5` boolean REPLACE NOT NULL,
    `col4` datetime(2) REPLACE NOT NULL,
    `col6` double REPLACE_IF_NOT_NULL NULL,
    `col7` varchar(100) REPLACE_IF_NOT_NULL NULL
) ENGINE=OLAP
AGGREGATE KEY(`col1`, `col2`, `col3`)
DISTRIBUTED BY HASH(`col1`, `col2`, `col3`) BUCKETS 1
PROPERTIES (
    "replication_allocation" = "tag.location.default: 1",
    "disable_auto_compaction" = "true"
);

insert into test_scan_keys_with_bool_type values
( -100 ,    1 ,  -82 ,    1 , '2024-02-16 04:37:37.00' , -1299962421.904282 , NULL ),
( -100 ,    0 ,  -82 ,    1 , '2024-02-16 04:37:37.00' , -1299962421.904282 , "hi" ),
( -100 ,    1 ,   92 ,    1 , '2024-02-16 04:37:37.00' ,   23423423.0324234 , NULL );

 insert into test_scan_keys_with_bool_type values
( -100 ,    1 ,  1 ,    1 , '2024-02-16 04:37:37.00' , -1299962421.904282 , "doris" );

MySQL [test]> select * from test_scan_keys_with_bool_type;
+------+------+------+------+------------------------+---------------------+-------+
| col1 | col2 | col3 | col5 | col4                   | col6                | col7  |
+------+------+------+------+------------------------+---------------------+-------+
| -100 |    0 |  -82 |    1 | 2024-02-16 04:37:37.00 | -1299962421.9042821 | hi    |
| -100 |    1 |  -82 |    1 | 2024-02-16 04:37:37.00 | -1299962421.9042821 | NULL  |
| -100 |    1 |    1 |    1 | 2024-02-16 04:37:37.00 | -1299962421.9042821 | hidor |
| -100 |    1 |   92 |    1 | 2024-02-16 04:37:37.00 |    23423423.0324234 | NULL  |
+------+------+------+------+------------------------+---------------------+-------+
4 rows in set (0.057 sec)

MySQL [test]> set skip_storage_engine_merge = true; select * from test_scan_keys_with_bool_type;
+------+------+------+------+------------------------+---------------------+-------+
| col1 | col2 | col3 | col5 | col4                   | col6                | col7  |
+------+------+------+------+------------------------+---------------------+-------+
| -100 |    0 |  -82 |    1 | 2024-02-16 04:37:37.00 | -1299962421.9042821 | hi    |
| -100 |    1 |  -82 |    1 | 2024-02-16 04:37:37.00 | -1299962421.9042821 | NULL  |
| -100 |    1 |   92 |    1 | 2024-02-16 04:37:37.00 |    23423423.0324234 | NULL  |
| -100 |    1 |    1 |    1 | 2024-02-16 04:37:37.00 | -1299962421.9042821 | doris |
+------+------+------+------+------------------------+---------------------+-------+
4 rows in set (0.023 sec)
```
#33493 By supporting variant type
aggregation, this issue has been resolved.So versions after 2.1 do not
have this issue.
  • Loading branch information
liaoxin01 authored and xiaokang committed Nov 19, 2024
1 parent 0f65853 commit 4a6b26e
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 2 deletions.
4 changes: 2 additions & 2 deletions be/src/vec/olap/block_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -470,10 +470,10 @@ size_t BlockReader::_copy_agg_data() {
auto& dst_column = _stored_data_columns[idx];
if (_stored_has_variable_length_tag[idx]) {
//variable length type should replace ordered
dst_column->clear();
for (size_t i = 0; i < copy_size; i++) {
auto& ref = _stored_row_ref[i];
dst_column->replace_column_data(*ref.block->get_by_position(idx).column,
ref.row_pos, i);
dst_column->insert_from(*ref.block->get_by_position(idx).column, ref.row_pos);
}
} else {
for (auto& it : _temp_ref_map) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,9 @@ datetimev2_value_min_2 datetime(6) Yes false \N MIN
datetimev2_value_replace_2 datetime(6) Yes false \N REPLACE
datetimev2_value_replace_if_not_null_2 datetime(6) Yes false \N REPLACE_IF_NOT_NULL

-- !string_agg_table_with_null --
-100 0 -82 true 2024-02-16T04:37:37 -1.299962421904282E9 hi
-100 1 -82 true 2024-02-16T04:37:37 -1.299962421904282E9 \N
-100 1 1 true 2024-02-16T04:37:37 1.399962421904282E9 doris
-100 1 92 true 2024-02-16T04:37:37 2.34234230324234E7 NULL

Original file line number Diff line number Diff line change
Expand Up @@ -99,4 +99,36 @@ suite("test_aggregate_table") {
qt_desc_date_table """desc date_agg"""
sql """DROP TABLE date_agg"""

sql """DROP TABLE IF EXISTS test_string_agg_with_null"""
sql """
CREATE TABLE `test_string_agg_with_null` (
`col1` tinyint NOT NULL,
`col2` int NOT NULL,
`col3` tinyint NOT NULL,
`col5` boolean REPLACE NOT NULL,
`col4` datetime(2) REPLACE NOT NULL,
`col6` double REPLACE_IF_NOT_NULL NULL,
`col7` varchar(100) REPLACE_IF_NOT_NULL NULL
) ENGINE=OLAP
AGGREGATE KEY(`col1`, `col2`, `col3`)
DISTRIBUTED BY HASH(`col1`, `col2`, `col3`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"disable_auto_compaction" = "true"
);
"""

sql """ insert into test_string_agg_with_null values
( -100 , 1 , -82 , 1 , '2024-02-16 04:37:37.00' , -1299962421.904282 , NULL ),
( -100 , 0 , -82 , 1 , '2024-02-16 04:37:37.00' , -1299962421.904282 , "hi" ),
( -100 , 1 , 92 , 1 , '2024-02-16 04:37:37.00' , 23423423.0324234 , "NULL" );
"""

sql """ insert into test_string_agg_with_null values
( -100 , 1 , 1 , 1 , '2024-02-16 04:37:37.00' , 1399962421.904282 , "doris" );
"""

qt_string_agg_table_with_null """ select * from test_string_agg_with_null """
sql """DROP TABLE test_string_agg_with_null"""

}

0 comments on commit 4a6b26e

Please sign in to comment.