-
Notifications
You must be signed in to change notification settings - Fork 21
/
Copy pathgenerate_csv_files.py
executable file
·120 lines (110 loc) · 4.58 KB
/
generate_csv_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/env python
#
# This script generates CSV files with random data. It allows to
# specify the number of rows and columns through their types. The
# format can be tweaked to generate different types of data to test
# various CSV readers.
#
# Usage:
# python generate_csv_files.py --columns <column1_type> <column2_type> ... \
# [ --nr-rows <nr_rows> ] \
# [ --no-header ] \
# [ --separator <separator> ] \
# [ --quote <quote> ] \
# [ --add-trailing-separator ] \
# [ --add-random-spaces-around-values ] \
# [ --file-type <file_type> ] \
# [ --strings-have-spaces ] \
# [ --headers-have-spaces ] \
# [ --float-format <float_format> ] \
# [ --help ]
#
# The column types can be:
# - int: generates random integers between -1_000 and 1_000
# - float: generates random floating point numbers between -1.e3 and 1.e3
# - string: generates random strings with length between 1 and 10
#
# The file types can be:
# - Windows: uses '\r\n' as end-of-line character
# - Unix: uses '\n' as end-of-line character
# - Mac: uses '\r' as end-of-line character
#
# The default values are:
# - nr-rows = 100
# - separator = ','
# - quote = '"'
# - end-of-line = '\n'
# - float-format = '.6f'
import argparse
import random
import string
import sys
def generate_csv_file(args):
# Set end-of-line character
if args.file_type == 'Windows':
args.end_of_line = '\r\n'
elif args.file_type == 'Mac':
args.end_of_line = '\r'
else:
args.end_of_line = '\n'
# Generate header
if not args.no_header:
if args.headers_have_spaces:
header = [f'{args.quote}column {i}{args.quote}' for i in range(1, len(args.columns) + 1)]
else:
header = [f'column{i}' for i in range(1, len(args.columns) + 1)]
print(args.separator.join(header), end=args.end_of_line)
# Generate rows
character_set = string.ascii_letters + string.digits
float_format_str = f'{{value:{args.float_format}}}'
if args.strings_have_spaces:
character_set += ' '
for _ in range(args.nr_rows):
row = []
for column in args.columns:
if column == 'int':
value = str(random.randint(-1_000, 1_000))
elif column == 'float':
value = float_format_str.format(value=random.uniform(-1.e3, 1.e3))
else:
s = ''.join(random.choices(character_set, k=random.randint(1, 10)))
value = f'{args.quote}{s}{args.quote}'
if args.add_random_spaces_around_values:
if random.choice([True, False]):
value = f' {value}'
if random.choice([True, False]):
value = f'{value} '
row.append(value)
if args.add_trailing_separator:
row.append(args.separator)
print(args.separator.join(row), end=args.end_of_line)
def main():
parser = argparse.ArgumentParser(description='Generate CSV files with random data.')
parser.add_argument('--columns', nargs='+', required=True,
choices=['int', 'float', 'string'], help='List of column types')
parser.add_argument('--nr-rows', type=int, default=100,
help='Number of rows')
parser.add_argument('--no-header', action='store_true',
help='Do not include header in the CSV file')
parser.add_argument('--separator', default=',',
help='Separator character')
parser.add_argument('--quote', default='"',
help='Quote character')
parser.add_argument('--add-trailing-separator', action='store_true',
help='Add trailing separator to rows')
parser.add_argument('--add-random-spaces-around-values', action='store_true',
help='Add random spaces around values')
parser.add_argument('--file-type', default='Unix',
choices=['Windows', 'Unix', 'Mac'], help='Type of file')
parser.add_argument('--strings-have-spaces', action='store_true',
help='Add spaces to strings')
parser.add_argument('--headers-have-spaces', action='store_true',
help='Add spaces in header names')
parser.add_argument('--float-format', default='.6f',
help='Format for floating point values')
args = parser.parse_args()
# Generate CSV file
generate_csv_file(args)
if __name__ == '__main__':
main()
sys.exit(0)