Skip to content
This repository was archived by the owner on Oct 18, 2023. It is now read-only.

Commit bc6cc0d

Browse files
Mark WongMark Wong
Mark Wong
authored and
Mark Wong
committed
Add table data generator
Basic functionality. Create a .ddf (data definition file) that specifies the number of rows and columns to generate data for. Currently only creates sequences.
1 parent 7d7bd27 commit bc6cc0d

17 files changed

+658
-0
lines changed

.gitignore

+6
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,15 @@ Testing
1717

1818
*.patch
1919

20+
config.h
21+
22+
src/bin/*
23+
!src/bin/CMakeLists.txt
24+
2025
src/test/*
2126
!/src/test/CMakeLists.txt
2227
!/src/test/*.c
2328
!/src/test/*.expected
2429
!/src/test/*.sh
2530
!/src/test/*.sql
31+
!/src/test/*.ddf

CMakeLists.txt

+6
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,10 @@ ENABLE_TESTING()
1313

1414
SET(CMAKE_C_FLAGS "-g -Wall")
1515

16+
IF(ENABLE_CASSERT EQUAL 1)
17+
SET(ENABLE_CASSERT 1)
18+
ELSE()
19+
UNSET(ENABLE_CASSERT)
20+
ENDIF(ENABLE_CASSERT EQUAL 1)
21+
1622
ADD_SUBDIRECTORY(src)

INSTALL

+5
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@ Installing
55
make
66
make install
77

8+
Debugging
9+
=========
10+
11+
Use `-DENABLE_CASSERT=1` when running `cmake` for additional debugging.
12+
813
Uninstalling
914
============
1015

README

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Touchstone is a library that can be used for random data generation.

doc/table.txt

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
Table definition files instruct the data generator how to generate data. Files
2+
should be named after the name of the table to generate with a .ddf extension.
3+
The resulting data files will have a .data extension. For example,
4+
tablename.ddf will generate a tablename.data file.
5+
6+
The format of the file will contain the total number of row to generate on the
7+
first line. Each subsequent line in the file will represent the columns to
8+
generate.
9+
10+
For example:
11+
12+
10
13+
s1
14+
15+
This will generate a data file with 10 rows and 1 column, where that column is
16+
a sequence of numbers starting from 1.

src/CMakeLists.txt

+7
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,10 @@ ADD_SUBDIRECTORY(test)
44
INSTALL(FILES touchstone.h DESTINATION "${INSTALLDEST}/include")
55
INSTALL(PROGRAMS libtouchstone.so DESTINATION "${INSTALLDEST}/lib")
66
SET_TARGET_PROPERTIES(touchstone PROPERTIES LINK_FLAGS "-lm")
7+
8+
CONFIGURE_FILE(
9+
${CMAKE_CURRENT_SOURCE_DIR}/config.h.in
10+
${CMAKE_CURRENT_BINARY_DIR}/config.h
11+
)
12+
13+
ADD_SUBDIRECTORY(bin)

src/bin/CMakeLists.txt

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
ADD_EXECUTABLE(${PROJECT_NAME}-generate-table-data ../generate-table-data.c)
2+
3+
INSTALL(PROGRAMS ${PROJECT_NAME}-generate-table-data
4+
DESTINATION "${INSTALLDEST}/bin")

src/config.h.in

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
#cmakedefine ENABLE_CASSERT @ENABLE_CASSERT@

src/generate-table-data.c

+288
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,288 @@
1+
/*
2+
* Copyright 2019 PostgreSQL Global Development Group
3+
*/
4+
5+
#include <stdio.h>
6+
#include <stdlib.h>
7+
#include <string.h>
8+
#include <getopt.h>
9+
#include <errno.h>
10+
#include <limits.h>
11+
12+
#include "config.h"
13+
14+
#define MAX_BUFFER_LEN 1024
15+
#define MAX_COLS 255
16+
17+
struct sequence_t
18+
{
19+
long long arg1;
20+
};
21+
22+
union arguments_t
23+
{
24+
struct sequence_t sequence;
25+
};
26+
27+
struct column_t
28+
{
29+
char type;
30+
union arguments_t arguments;
31+
};
32+
33+
struct table_definition_t
34+
{
35+
long long rows;
36+
int columns;
37+
struct column_t column[MAX_COLS];
38+
};
39+
40+
void sequence(char *, long long);
41+
42+
void usage(char *filename)
43+
{
44+
printf("usage: %s [options]\n", filename);
45+
printf(" options:\n");
46+
printf(" -d <char> - column delimiter, default <tab>\n");
47+
printf(" -f <filename> - data definition file\n");
48+
printf(" -o <dir> - location to create data file else use stdout\n");
49+
}
50+
51+
int generate_data(FILE *stream, struct table_definition_t *table,
52+
char delimiter)
53+
{
54+
char str[MAX_BUFFER_LEN];
55+
int end = table->columns - 1;
56+
57+
for (long long row = 0; row < table->rows; row++) {
58+
for (long long col = 0; col < table->columns; col++) {
59+
switch (table->column[col].type) {
60+
case 's':
61+
sequence(str, row +
62+
((struct sequence_t *)
63+
&table->column[col].arguments)->arg1);
64+
fprintf(stream, "%s", str);
65+
break;
66+
default:
67+
fprintf(stderr, "ERROR: unhandled column definition: %c\n",
68+
table->column[col].type);
69+
return 7;
70+
}
71+
if (col < end)
72+
fprintf(stream, "%c", delimiter);
73+
}
74+
fprintf(stream, "\n");
75+
fflush(stream);
76+
}
77+
78+
return 0;
79+
}
80+
81+
int read_data_definition_file(struct table_definition_t *table, char *filename)
82+
{
83+
FILE *f;
84+
char *line = NULL;
85+
size_t len = 0;
86+
ssize_t nread;
87+
int *column;
88+
89+
fprintf(stderr, "reading %s\n", filename);
90+
91+
f = fopen(filename, "r");
92+
if (f == NULL) {
93+
fprintf(stderr, "ERROR: cannot open data definition file [%d]: %s\n",
94+
errno, filename);
95+
return 1;
96+
}
97+
98+
/* Read just the first line for the table cardinality. */
99+
100+
nread = getline(&line, &len, f);
101+
if (nread == -1) {
102+
fprintf(stderr, "ERROR: no data on first line of ddf?: %d",
103+
errno);
104+
return 2;
105+
}
106+
errno = 0;
107+
table->rows = strtoll(line, NULL, 10);
108+
if (errno != 0 || table->rows == 0) {
109+
fprintf(stderr, "ERROR: number of rows invalid [errno %d]: %s\n",
110+
errno, line);
111+
free(line);
112+
fclose(f);
113+
return 5;
114+
}
115+
116+
fprintf(stderr, "%lld row(s)\n", table->rows);
117+
118+
/* Read the column definition from rest of the file. */
119+
120+
fprintf(stderr, "column definitions:\n");
121+
while ((nread = getline(&line, &len, f)) != -1) {
122+
column = &table->columns;
123+
124+
if (nread == 0) {
125+
fprintf(stderr, "ERROR: empty line in data definition file file, "
126+
"aborting\n");
127+
free(line);
128+
fclose(f);
129+
return 4;
130+
}
131+
132+
fprintf(stderr, "[%d] %s", table->columns + 1, line);
133+
134+
table->column[table->columns].type = line[0];
135+
switch (line[0]) {
136+
case 's':
137+
errno = 0;
138+
((struct sequence_t *) &table->column[*column].arguments)->arg1 =
139+
strtoll(line + 1, NULL, 10);
140+
if (errno != 0 || table->rows == 0) {
141+
fprintf(stderr,
142+
"ERROR: invalid argument to sequence [errno %d]: %s\n",
143+
errno, line + 1);
144+
free(line);
145+
fclose(f);
146+
return 7;
147+
}
148+
break;
149+
default:
150+
fprintf(stderr, "ERROR: unrecognized column definition: %s\n",
151+
line);
152+
free(line);
153+
fclose(f);
154+
return 6;
155+
}
156+
157+
++(*column);
158+
}
159+
160+
free(line);
161+
fclose(f);
162+
163+
if (table->columns == 0) {
164+
fprintf(stderr, "ERROR: no columns defined\n");
165+
return 3;
166+
}
167+
168+
if (table->columns > MAX_COLS) {
169+
fprintf(stderr, "ERROR: more than %d columns defined\n", MAX_COLS);
170+
return 3;
171+
}
172+
173+
fprintf(stderr, "%d column(s)\n", *column);
174+
175+
return 0;
176+
}
177+
178+
void sequence(char *result, long long value)
179+
{
180+
int rc;
181+
182+
rc = snprintf(result, MAX_BUFFER_LEN - 1, "%lld", value);
183+
#ifdef ENABLE_CASSERT
184+
if (rc < 0 || rc == MAX_BUFFER_LEN - 1)
185+
fprintf(stderr, "WARNING: sequence issue converting %lld to string\n",
186+
value);
187+
#endif /* ENABLE_CASSERT */
188+
}
189+
190+
int main(int argc, char *argv[])
191+
{
192+
int c;
193+
struct table_definition_t table;
194+
char datafile[FILENAME_MAX] = "";
195+
196+
char tmp[FILENAME_MAX];
197+
char *p;
198+
199+
FILE *stream = stdout;
200+
char delimiter = '\t';
201+
char data_definition_file[FILENAME_MAX] = "";
202+
char outdir[FILENAME_MAX] = "";
203+
204+
memset(&table, 0, sizeof(struct table_definition_t));
205+
206+
if (argc == 1) {
207+
usage(argv[0]);
208+
return 1;
209+
}
210+
211+
while (1) {
212+
int option_index = 1;
213+
static struct option long_options[] = {
214+
{0, 0, 0, 0,}
215+
};
216+
217+
c = getopt_long(argc, argv, "d:f:ho:", long_options, &option_index);
218+
if (c == -1)
219+
break;
220+
221+
switch (c) {
222+
case 0:
223+
break;
224+
case 'd':
225+
delimiter = optarg[0];
226+
break;
227+
case 'f':
228+
strncpy(data_definition_file, optarg, FILENAME_MAX - 1);
229+
break;
230+
case 'h':
231+
usage(argv[0]);
232+
return 0;
233+
case 'o':
234+
strncpy(outdir, optarg, FILENAME_MAX - 1);
235+
break;
236+
default:
237+
printf("?? getopt returned character code 0%o ??\n", c);
238+
return 2;
239+
}
240+
}
241+
242+
if (data_definition_file[0] == '\0') {
243+
fprintf(stderr, "ERROR: use -f to specify data definition file\n");
244+
return 3;
245+
}
246+
247+
if (outdir[0] != '\0') {
248+
/* Naively remove any extension to the data definition file. */
249+
strncpy(tmp, data_definition_file, FILENAME_MAX -1);
250+
p = strstr(tmp, ".");
251+
*p = '\0';
252+
253+
/* Make sure the new filename doesn't exceed FILENAME_MAX. */
254+
c = FILENAME_MAX - (strlen(outdir) + strlen(tmp) + 7);
255+
if (c < 0) {
256+
fprintf(stderr, "ERROR: resulting datafile path and name is too "
257+
"long: %s/%s.data\n", outdir, tmp);
258+
return 6;
259+
}
260+
261+
strcat(datafile, outdir);
262+
strcat(datafile, "/");
263+
strcat(datafile, tmp);
264+
strcat(datafile, ".data");
265+
266+
stream = fopen(datafile, "w");
267+
if (stream == NULL) {
268+
fprintf(stderr, "ERROR: cannot open datafile [%d]: %s\n",
269+
errno, datafile);
270+
return 7;
271+
}
272+
273+
fprintf(stderr, "datafile: %s\n", datafile);
274+
}
275+
276+
c = read_data_definition_file(&table, data_definition_file);
277+
if (c != 0)
278+
return 4;
279+
280+
c = generate_data(stream, &table, delimiter);
281+
if (c != 0)
282+
return 5;
283+
if (outdir[0] != '\0') {
284+
fclose(stream);
285+
}
286+
287+
return 0;
288+
}

src/test/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ ADD_EXECUTABLE(tget_days tget_days.c)
1414
SET_TARGET_PROPERTIES(${BFILES} PROPERTIES LINK_FLAGS "-L.. -ltouchstone")
1515

1616
ADD_TEST(alpha sh tget_alpha.sh)
17+
ADD_TEST(data_generator sh tdata_generator.sh)
1718
ADD_TEST(days sh tget_days.sh)
1819
ADD_TEST(generate_analyze sh tgenerate_analyze.sh)
1920
ADD_TEST(generate_plan sh tgenerate_plan.sh)

src/test/empty.ddf

Whitespace-only changes.
+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
10
2+
0

src/test/invalid-row-line.ddf

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
this should be a number
2+
1

src/test/no-columns.ddf

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
1

0 commit comments

Comments
 (0)