Skip to content

Commit 91eec42

Browse files
committed
Merge branch 'tickets/DM-43625'
2 parents 3214c1e + 14696fb commit 91eec42

File tree

3 files changed

+127
-7
lines changed

3 files changed

+127
-7
lines changed

src/replica/mysql/DatabaseMySQLGenerator.cc

+47-5
Original file line numberDiff line numberDiff line change
@@ -104,11 +104,7 @@ string QueryGenerator::createTable(SqlId const& sqlId, bool ifNotExists, list<Sq
104104

105105
string QueryGenerator::insertPacked(string const& tableName, string const& packedColumns,
106106
vector<string> const& packedValues) const {
107-
if (packedValues.empty()) {
108-
string const msg = "QueryGenerator::" + string(__func__) +
109-
" the collection of the packed values can not be empty.";
110-
throw invalid_argument(msg);
111-
}
107+
_assertNotEmpty(__func__, packedValues);
112108
string sql = "INSERT INTO " + id(tableName).str + " (" + packedColumns + ") VALUES ";
113109
for (size_t i = 0, size = packedValues.size(); i < size; ++i) {
114110
if (i != 0) sql += ",";
@@ -117,6 +113,52 @@ string QueryGenerator::insertPacked(string const& tableName, string const& packe
117113
return sql;
118114
}
119115

116+
vector<string> QueryGenerator::insertPacked(string const& tableName, string const& packedColumns,
117+
vector<string> const& packedValues,
118+
size_t const maxQueryLength) const {
119+
_assertNotEmpty(__func__, packedValues);
120+
vector<string> queries;
121+
string sql;
122+
size_t numRowsPacked = 0;
123+
for (vector<string>::const_iterator itr = packedValues.cbegin(); itr != packedValues.cend();) {
124+
string const& row = *itr;
125+
if (sql.empty()) {
126+
sql = "INSERT INTO " + id(tableName).str + " (" + packedColumns + ") VALUES ";
127+
}
128+
// 2 more characters are needed for injecting the first row: "(" + row + ")"
129+
// And 1 more - for subsequent rows: ",(" + row + ")"
130+
size_t const extraSpacePerRow = (numRowsPacked == 0 ? 2 : 3);
131+
size_t const projectedQueryLength = sql.size() + extraSpacePerRow + row.size();
132+
if (projectedQueryLength <= maxQueryLength) {
133+
// -- Extend the current query and move on to the next row (if any)
134+
if (numRowsPacked != 0) sql += ",";
135+
sql += "(" + row + ")";
136+
numRowsPacked++;
137+
++itr;
138+
} else {
139+
// -- Flush the current query and start building the next one
140+
if (numRowsPacked == 0) {
141+
string const msg = "QueryGenerator::" + string(__func__) + " the generated query length " +
142+
to_string(projectedQueryLength) + " exceeds the limit " +
143+
to_string(maxQueryLength);
144+
throw invalid_argument(msg);
145+
}
146+
queries.push_back(move(sql));
147+
sql = string();
148+
numRowsPacked = 0;
149+
}
150+
}
151+
// -- Flush the current query
152+
if (!sql.empty()) queries.push_back(move(sql));
153+
return queries;
154+
}
155+
156+
void QueryGenerator::_assertNotEmpty(string const& func, vector<string> const& coll) {
157+
if (coll.empty()) {
158+
throw invalid_argument("QueryGenerator::" + func + " the input collection is empty.");
159+
}
160+
}
161+
120162
string QueryGenerator::showVars(SqlVarScope scope, string const& pattern) const {
121163
string const like = pattern.empty() ? string() : " LIKE " + val(pattern).str;
122164
switch (scope) {

src/replica/mysql/DatabaseMySQLGenerator.h

+38-2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#define LSST_QSERV_REPLICA_DATABASEMYSQLGENERATOR_H
2323

2424
// System headers
25+
#include <list>
2526
#include <memory>
2627
#include <string>
2728
#include <tuple>
@@ -402,6 +403,33 @@ class QueryGenerator {
402403
std::string insertPacked(std::string const& tableName, std::string const& packedColumns,
403404
std::vector<std::string> const& packedValues) const;
404405

406+
/**
407+
* Generate a collection of complete INSERT statements for the given input, where
408+
* the maximum size of each query string is determined by a value of
409+
* the parameter 'maxQueryLength'.
410+
*
411+
* Here is an example:
412+
* @code
413+
* std::size_t const maxQueryLength = 1024*1024;
414+
* QueryGenerator const g(conn);
415+
* std::vector<std::string> const queries =
416+
* g.insertPacked("table",
417+
* g.packIds("id", "timestamp", "name"),
418+
* {g.packVals(Sql::NULL_, Sql::NOW, "John Smith"),
419+
* g.packVals(Sql::NULL_, Sql::NOW, "Vera Rubin"),
420+
* g.packVals(Sql::NULL_, Sql::NOW, "Igor Gaponenko")});
421+
* @endcode
422+
* @param tableName The name of a table where the rows will be insert.
423+
* @param packedColumns A collection of column names packed into a string.
424+
* @param packedValues A collection of the packed rows.
425+
* @return A collection of the generated queries
426+
* @throws std::invalid_argument If the collection of rows is empty, or if it has
427+
* rows which are too large for generating queries constrained by the given limit.
428+
*/
429+
std::vector<std::string> insertPacked(std::string const& tableName, std::string const& packedColumns,
430+
std::vector<std::string> const& packedValues,
431+
std::size_t const maxQueryLength) const;
432+
405433
/**
406434
* @brief Generate and return an SQL expression for a binary operator applied
407435
* over a pair of the pre-processed expressions.
@@ -1083,6 +1111,14 @@ class QueryGenerator {
10831111
std::string call(DoNotProcess const& packedProcAndArgs) const;
10841112

10851113
private:
1114+
/**
1115+
* Check if the specified collection is not empty.
1116+
* @param func A scope from which the check was requested.
1117+
* @param coll A collection to be evaluated.
1118+
* @throws std::invalid_argument If the input collection is empty.
1119+
*/
1120+
static void _assertNotEmpty(std::string const& func, std::vector<std::string> const& coll);
1121+
10861122
/// @return A string that's ready to be included into the queries.
10871123
template <typename... Targs>
10881124
std::string _values(Targs... Fargs) const {
@@ -1151,8 +1187,8 @@ class QueryGenerator {
11511187
/// @param scope The scope of the variable (SESSION, GLOBAL, etc.)
11521188
/// @param packedVars Partial SQL for setting values of the variables.
11531189
/// @return The well-formed SQL for setting the variables
1154-
/// @throws std::invalid_argument If a value of \param packedVars is empty,
1155-
/// or in case if the specified value of \param scope is not supported.
1190+
/// @throws std::invalid_argument If a value of the parameter 'packedVars' is empty,
1191+
/// or in case if the specified value of the parameter 'scope' is not supported.
11561192
std::string _setVars(SqlVarScope scope, std::string const& packedVars) const;
11571193

11581194
std::string _createIndex(SqlId const& tableId, std::string const& indexName, std::string const& spec,

src/replica/tests/testQueryGenerator.cc

+42
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@
2323
#include "lsst/log/Log.h"
2424

2525
// System headers
26+
#include <algorithm>
2627
#include <list>
28+
#include <stdexcept>
2729
#include <string>
2830
#include <tuple>
2931
#include <vector>
@@ -435,6 +437,46 @@ BOOST_AUTO_TEST_CASE(QueryGeneratorTest) {
435437
BOOST_CHECK_EQUAL(test.first, test.second);
436438
}
437439

440+
// Test bulk insert of many rows
441+
vector<string> const expectedInsertQueries1 = {"INSERT INTO `Table` (`num`,`str`) VALUES (1,'a')",
442+
"INSERT INTO `Table` (`num`,`str`) VALUES (2,'b')",
443+
"INSERT INTO `Table` (`num`,`str`) VALUES (3,'c')",
444+
"INSERT INTO `Table` (`num`,`str`) VALUES (4,'d')"};
445+
vector<string> const expectedInsertQueries2 = {
446+
"INSERT INTO `Table` (`num`,`str`) VALUES (1,'a'),(2,'b')",
447+
"INSERT INTO `Table` (`num`,`str`) VALUES (3,'c'),(4,'d')"};
448+
vector<string> const expectedInsertQueries3 = {
449+
"INSERT INTO `Table` (`num`,`str`) VALUES (1,'a'),(2,'b'),(3,'c')",
450+
"INSERT INTO `Table` (`num`,`str`) VALUES (4,'d')"};
451+
vector<string> const expectedInsertQueries4 = {
452+
"INSERT INTO `Table` (`num`,`str`) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d')"};
453+
454+
// The test should throw because the collection of rows is empty
455+
auto const packedIds = g.packIds("num", "str");
456+
vector<string> const emptyInsertData;
457+
size_t maxQueryLength = expectedInsertQueries4[0].size();
458+
BOOST_CHECK_THROW(g.insertPacked("Table", packedIds, emptyInsertData, maxQueryLength), invalid_argument);
459+
460+
// The test should throw because the generated statements would exceed a limit.
461+
vector<string> const insertData = {g.packVals(1, "a"), g.packVals(2, "b"), g.packVals(3, "c"),
462+
g.packVals(4, "d")};
463+
maxQueryLength = expectedInsertQueries1[0].size() - 1;
464+
BOOST_CHECK_THROW(g.insertPacked("Table", packedIds, insertData, maxQueryLength), invalid_argument);
465+
LOGS_INFO("QueryGenerator #1");
466+
467+
// None of the following tests should throw
468+
vector<vector<string>> const expectedInsertQueries = {expectedInsertQueries1, expectedInsertQueries2,
469+
expectedInsertQueries3, expectedInsertQueries4};
470+
for (auto const& expectedQueries : expectedInsertQueries) {
471+
LOGS_INFO("QueryGenerator #2");
472+
size_t const maxQueryLength = expectedQueries[0].size();
473+
vector<string> const generatedQueries =
474+
g.insertPacked("Table", packedIds, insertData, maxQueryLength);
475+
BOOST_CHECK_EQUAL(generatedQueries.size(), expectedQueries.size());
476+
for (size_t i = 0; i < min(generatedQueries.size(), expectedQueries.size()); ++i) {
477+
BOOST_CHECK_EQUAL(generatedQueries[i], expectedQueries[i]);
478+
}
479+
}
438480
LOGS_INFO("QueryGenerator test ends");
439481
}
440482

0 commit comments

Comments
 (0)