|
| 1 | +""" |
| 2 | +This is a script to convert curve info in other formats to NRML, v0.5. |
| 3 | +
|
| 4 | +It is being modified on a needs basis. |
| 5 | +
|
| 6 | +
|
| 7 | +""" |
| 8 | + |
| 9 | +import csv |
| 10 | +import numpy |
| 11 | + |
| 12 | +import xlrd |
| 13 | +from hazimp.misc import csv2dict |
| 14 | + |
| 15 | + |
| 16 | +FLOOD_HOUSE_FABRIC = 'structural_domestic_flood_2012' |
| 17 | +FLOOD_HOUSE_CONTENTS = 'contents_domestic_flood_2012' |
| 18 | +LOSS_CAT_FABRIC = 'structural_loss_ratio' |
| 19 | +LOSS_CAT_CONTENTS = 'contents_loss_ratio' |
| 20 | +FLOOD_IMT = 'water depth above ground floor (m)' |
| 21 | + |
| 22 | + |
| 23 | +def xml_write_variable(xml_h, name, value): |
| 24 | + """ |
| 25 | + Add a variable name and value to an xml file. |
| 26 | +
|
| 27 | + :param xml_h: A handle to the xml file. |
| 28 | + :param name: The name of the variable. |
| 29 | + :param value: The value of the variable. |
| 30 | + """ |
| 31 | + xml_h.write('%s="' % name) |
| 32 | + try: |
| 33 | + xml_h.write(value) |
| 34 | + except TypeError: |
| 35 | + if numpy.isnan(value): |
| 36 | + # This is what we need for blank string values. |
| 37 | + # Probably not universal though. |
| 38 | + xml_h.write('') |
| 39 | + else: |
| 40 | + # to rethrow error |
| 41 | + xml_h.write(value) |
| 42 | + xml_h.write('" ') |
| 43 | + |
| 44 | + |
| 45 | +def write_nrml_top(xml_h, vulnerability_set_id, asset_category, loss_category): |
| 46 | + """ |
| 47 | + Write the top section of an nrml file. |
| 48 | +
|
| 49 | + :param xml_h: A handle to the xml file. |
| 50 | + :param vulnerability_set_id: String name of the vulnerability set. |
| 51 | + :param asset_category: String name of the assert category. |
| 52 | + :param loss_category: String name of the loss category. |
| 53 | + :param imt: String name of the intensity measure type. |
| 54 | + :param imls: 1D vector of the intensity measure values (x-axis) of the |
| 55 | + vuln curve. |
| 56 | + """ |
| 57 | + |
| 58 | + intro = """<?xml version='1.0' encoding='utf-8'?> |
| 59 | +<nrml xmlns="http://openquake.org/xmlns/nrml/0.5" |
| 60 | + xmlns:gml="http://www.opengis.net/gml"> |
| 61 | +
|
| 62 | + <vulnerabilityModel """ |
| 63 | + xml_h.write(intro) |
| 64 | + xml_write_variable(xml_h, "id", vulnerability_set_id) |
| 65 | + xml_write_variable(xml_h, "assetCategory", asset_category) |
| 66 | + xml_write_variable(xml_h, "lossCategory", loss_category) |
| 67 | + xml_h.write('>\n') |
| 68 | + |
| 69 | + |
| 70 | +def write_nrml_curve(xml_h, vulnerability_function_id, imls: list, imt: str, |
| 71 | + loss_ratio, coef_var): |
| 72 | + """ |
| 73 | + Write the curve info of an nrml file. |
| 74 | +
|
| 75 | + :param xml_h: A handle to the xml file. |
| 76 | + :param vulnerability_function_id: String name of the vuln function. |
| 77 | + :param imls: 1D vector of the intensity measure values (x-axis) of the |
| 78 | + vuln curve. |
| 79 | + :param imt: intensity measure type |
| 80 | + :param loss_ratio: 1D vector of the loss ratio values (y-axis) of the |
| 81 | + vuln curve. |
| 82 | + :param coef_var: 1D vector of the coefficient of variation values (y-axis) |
| 83 | + of the vuln curve. |
| 84 | + """ |
| 85 | + xml_h.write("<vulnerabilityFunction ") |
| 86 | + xml_write_variable(xml_h, "id", |
| 87 | + vulnerability_function_id) |
| 88 | + xml_h.write('') |
| 89 | + xml_h.write('dist="LN">\n <imls ') |
| 90 | + xml_write_variable(xml_h, "imt", imt) |
| 91 | + xml_h.write('>') |
| 92 | + for iml in imls: |
| 93 | + if numpy.isnan(iml): |
| 94 | + continue |
| 95 | + xml_h.write(str(iml) + ' ') |
| 96 | + xml_h.write('</imls>\n') |
| 97 | + xml_h.write('<meanLRs>') |
| 98 | + xml_h.write(loss_ratio) |
| 99 | + xml_h.write('</meanLRs>\n') |
| 100 | + xml_h.write('<covLRs>') |
| 101 | + xml_h.write(coef_var) |
| 102 | + xml_h.write('</covLRs>\n') |
| 103 | + xml_h.write('</vulnerabilityFunction>\n\n') |
| 104 | + |
| 105 | + |
| 106 | +def write_nrml_close(xml_h): |
| 107 | + """ |
| 108 | + Write the final section of an nrml file and close it. |
| 109 | +
|
| 110 | + :param xml_h: A handle to the xml file. |
| 111 | + """ |
| 112 | + xml_h.write('</vulnerabilityModel>\n') |
| 113 | + xml_h.write('</nrml>\n') |
| 114 | + |
| 115 | + |
| 116 | +def csv_curve2nrml(csv_filename, cov_filename, xml_filename): |
| 117 | + """ |
| 118 | + Read in a csv hazard curve file and convert it to an NRML file. |
| 119 | +
|
| 120 | + :param csv_filename: The csv file to be read. |
| 121 | + :param xml_filename: The NRML file to be written. |
| 122 | + """ |
| 123 | + # Read the file twice. |
| 124 | + # Once for the non-per-curve info and then |
| 125 | + # for the per curve info. |
| 126 | + |
| 127 | + csv_dict = csv2dict(csv_filename) |
| 128 | + vulnerability_set_id = csv_dict['vulnerabilitySetID'][0] |
| 129 | + try: |
| 130 | + asset_category = csv_dict['assetCategory'][0] |
| 131 | + except IndexError: |
| 132 | + # Assume asset_category is empty |
| 133 | + asset_category = '' |
| 134 | + loss_category = csv_dict['lossCategory'][0] |
| 135 | + imls = [v for v in csv_dict['IML'] if not v == ''] |
| 136 | + |
| 137 | + # open the csv file to read the rows |
| 138 | + reader = csv.DictReader(open(csv_filename, 'r')) |
| 139 | + reader_cov = csv.DictReader(open(cov_filename, 'r')) |
| 140 | + with open(xml_filename, 'w') as xml_h: |
| 141 | + write_nrml_top(xml_h, vulnerability_set_id, asset_category, |
| 142 | + loss_category) |
| 143 | + |
| 144 | + # Loop over the csv file info |
| 145 | + for row_DI, row_cov in zip(reader, reader_cov): |
| 146 | + row_DI = {k.strip(): v.strip() for k, v in list(row_DI.items())} |
| 147 | + row_cov = {k.strip(): v.strip() for k, v in list(row_cov.items())} |
| 148 | + if row_DI['Alpha'] == 'N/A': |
| 149 | + # This row has no model |
| 150 | + continue |
| 151 | + coef_var = '' |
| 152 | + loss_ratio = '' |
| 153 | + for iml in imls: |
| 154 | + if numpy.isnan(iml): |
| 155 | + continue |
| 156 | + loss_ratio += str(row_DI[str(int(iml))]) + ' ' |
| 157 | + coef_var += str(row_cov[str(int(iml))]) + ' ' |
| 158 | + write_nrml_curve(xml_h, row_DI['vulnerabilityFunctionID'], |
| 159 | + imls, csv_dict['IMT'][0], |
| 160 | + loss_ratio, coef_var) |
| 161 | + |
| 162 | + write_nrml_close(xml_h) |
| 163 | + |
| 164 | + |
| 165 | +def validate_excel_curve_data(excel_file): |
| 166 | + """ |
| 167 | + Check that the titles and the water depths do not change |
| 168 | + from sheet to sheet. |
| 169 | + The first 2 rows are titles. |
| 170 | + The first coulmn is the water depth. |
| 171 | +
|
| 172 | + :param excel_file: The excel file to validate. |
| 173 | + """ |
| 174 | + |
| 175 | + default = None |
| 176 | + valid = True |
| 177 | + titles = {} |
| 178 | + wb = xlrd.open_workbook(excel_file) |
| 179 | + for s in wb.sheets(): |
| 180 | + title = [] |
| 181 | + # The first 3 rows should be titles that are the same, |
| 182 | + # except for the 2nd value on the 1st row. |
| 183 | + for row in [0, 1, 2]: |
| 184 | + values = [] |
| 185 | + for col in range(s.ncols): |
| 186 | + val = s.cell(row, col).value |
| 187 | + |
| 188 | + # This is just for visualising. |
| 189 | + try: |
| 190 | + val = str(val) |
| 191 | + except TypeError: |
| 192 | + pass |
| 193 | + |
| 194 | + values.append(val) |
| 195 | + title.append(values) |
| 196 | + # Remove the 2nd value on the 1st row. |
| 197 | + del title[0][1] |
| 198 | + titles[s.name] = title |
| 199 | + default = title |
| 200 | + |
| 201 | + if default is None: |
| 202 | + valid = False |
| 203 | + else: |
| 204 | + # Check that all sheets have the same title info |
| 205 | + for title in list(titles.values()): |
| 206 | + if not title == default: |
| 207 | + print(("title", title)) |
| 208 | + print(("default", default)) |
| 209 | + valid = False |
| 210 | + break |
| 211 | + |
| 212 | + return valid and check_identical_depths(wb) |
| 213 | + |
| 214 | + |
| 215 | +def check_identical_depths(wb): |
| 216 | + """ |
| 217 | + Check that the depth values are the same for all workbooks. |
| 218 | + Check that the first colum, starting at the 4th row, is identical. |
| 219 | +
|
| 220 | + :param wb: The excel workbook xlrd object. |
| 221 | + """ |
| 222 | + |
| 223 | + valid = True |
| 224 | + default = None |
| 225 | + depths = {} |
| 226 | + for s in wb.sheets(): |
| 227 | + values = [] |
| 228 | + for row in range(3, s.nrows): |
| 229 | + col = 0 |
| 230 | + val = s.cell(row, col).value |
| 231 | + values.append(val) |
| 232 | + depths[s.name] = values |
| 233 | + default = values |
| 234 | + |
| 235 | + if default is None: |
| 236 | + valid = False |
| 237 | + else: |
| 238 | + # Check that all sheets have the same title info |
| 239 | + for depth in list(depths.values()): |
| 240 | + if not depth == default: |
| 241 | + print(("depth", depth)) |
| 242 | + print(("default", default)) |
| 243 | + valid = False |
| 244 | + break |
| 245 | + |
| 246 | + return valid |
| 247 | + |
| 248 | + |
| 249 | +def read_excel_curve_data(excel_file): |
| 250 | + """ |
| 251 | + Read in the excel file info. Specific, undocumented format. |
| 252 | +
|
| 253 | + :param excel_file: The excel workbook. |
| 254 | + """ |
| 255 | + wb = xlrd.open_workbook(excel_file) |
| 256 | + a_sheet = wb.sheets()[0] |
| 257 | + |
| 258 | + # Get a list of the depths |
| 259 | + depths = [] |
| 260 | + for row in range(3, a_sheet.nrows): |
| 261 | + col = 0 |
| 262 | + val = a_sheet.cell(row, col).value |
| 263 | + depths.append(val) |
| 264 | + fabric_vuln_curves, contents_vuln_curves = read_excel_worksheet(wb) |
| 265 | + |
| 266 | + return depths, fabric_vuln_curves, contents_vuln_curves |
| 267 | + |
| 268 | + |
| 269 | +def read_excel_worksheet(wb): |
| 270 | + """ |
| 271 | + Read an excel worksheet |
| 272 | +
|
| 273 | + :param wb: The excel workbook xlrd object. |
| 274 | + """ |
| 275 | + fabric_vuln_curves = {} # the keys are curve names. |
| 276 | + contents_vuln_curves = {} # the keys are curve names. |
| 277 | + |
| 278 | + for s in wb.sheets(): |
| 279 | + di_block = [] |
| 280 | + for row in range(3, s.nrows): |
| 281 | + values = [] |
| 282 | + for col in range(s.ncols): |
| 283 | + values.append(s.cell(row, col).value) |
| 284 | + di_block.append(values) |
| 285 | + # Get individual curves from the curve block. |
| 286 | + # Convert the curves into an array |
| 287 | + di_array = numpy.asarray(di_block) |
| 288 | + insure = {"INSURED": 0, "UNINSURED": 4} |
| 289 | + for key in insure: |
| 290 | + # Read in the structure type |
| 291 | + # The 2nd value on the 1st row. |
| 292 | + curve_id_base = s.cell(0, 1).value.split()[0] + '_' + key |
| 293 | + fabric_vuln_curves[curve_id_base] = di_array[:, 1 + insure[key]] |
| 294 | + tag_offset = {'_SAVE': 2, '_NOACTION': 3, '_EXPOSE': 4} |
| 295 | + for tag in tag_offset: |
| 296 | + curve_id = curve_id_base + tag |
| 297 | + contents_vuln_curves[curve_id] = di_array[:, tag_offset[tag] |
| 298 | + + insure[key]] |
| 299 | + return fabric_vuln_curves, contents_vuln_curves |
| 300 | + |
| 301 | + |
| 302 | +def excel_curve2nrml(contents_filename, fabric_filename, xls_filename): |
| 303 | + """ |
| 304 | + Read in an excel flood curve file and convert it to an NRML file. |
| 305 | +
|
| 306 | + The excel file format is specific and best understood by looking |
| 307 | + at the file flood_2012_test.xlsx. |
| 308 | +
|
| 309 | + :param contents_filename: The contents NRML file to be created. |
| 310 | + :param fabric_filename: The fabric NRML file to be created. |
| 311 | + :param xls_filename: The excel file that is the basis of the NRML files. |
| 312 | + """ |
| 313 | + |
| 314 | + validate_excel_curve_data(xls_filename) |
| 315 | + |
| 316 | + depths, fabric_vuln_curves, contents_vuln_curves = read_excel_curve_data( |
| 317 | + xls_filename) |
| 318 | + curve_info = [{'curves': fabric_vuln_curves, |
| 319 | + 'set_id': FLOOD_HOUSE_FABRIC, |
| 320 | + 'asset': '', |
| 321 | + 'loss_category': LOSS_CAT_FABRIC, |
| 322 | + 'file_name': fabric_filename}, |
| 323 | + {'curves': contents_vuln_curves, |
| 324 | + 'set_id': FLOOD_HOUSE_CONTENTS, |
| 325 | + 'asset': '', |
| 326 | + 'loss_category': LOSS_CAT_CONTENTS, |
| 327 | + 'file_name': contents_filename}] |
| 328 | + |
| 329 | + for set_id in curve_info: |
| 330 | + |
| 331 | + with open(set_id['file_name'], 'w') as xml_h: |
| 332 | + write_nrml_top( |
| 333 | + xml_h, |
| 334 | + set_id['set_id'], |
| 335 | + set_id['asset'], |
| 336 | + set_id['loss_category'] |
| 337 | + ) |
| 338 | + |
| 339 | + # Loop over the csv file info |
| 340 | + for curve_dic_key in set_id['curves']: |
| 341 | + curve_values = set_id['curves'][curve_dic_key] |
| 342 | + coef_var = '' |
| 343 | + loss_ratio = '' |
| 344 | + # creating the coef_var vector |
| 345 | + for iml in curve_values: |
| 346 | + loss_ratio += str(iml) + ' ' |
| 347 | + coef_var += '0 ' |
| 348 | + write_nrml_curve(xml_h, curve_dic_key, depths, FLOOD_IMT, |
| 349 | + loss_ratio, coef_var) |
| 350 | + |
| 351 | + write_nrml_close(xml_h) |
| 352 | + |
| 353 | + |
| 354 | +# ----------------------------------------------------------- |
| 355 | +if __name__ == "__main__": |
| 356 | + |
| 357 | + import os |
| 358 | + import argparse |
| 359 | + parser = argparse.ArgumentParser( |
| 360 | + description="Convert curve data to NRML format" |
| 361 | + ) |
| 362 | + |
| 363 | + parser.add_argument("-i", "--input", required=True, |
| 364 | + help="Input curve file (either Excel or csv)") |
| 365 | + parser.add_argument("-c", "--cov_input", required=True, |
| 366 | + help="Input cov file (either Excel or csv)") |
| 367 | + parser.add_argument("-o", "--output", help="Output file name") |
| 368 | + parser.add_argument("-f", "--format", choices=['csv', 'xlsx'], |
| 369 | + help="File format (inferred from input file if not given)") |
| 370 | + |
| 371 | + args = parser.parse_args() |
| 372 | + |
| 373 | + input_file = args.input |
| 374 | + base, ext = os.path.splitext(input_file) |
| 375 | + input_file_cov = args.input |
| 376 | + base, ext = os.path.splitext(input_file_cov) |
| 377 | + |
| 378 | + if args.output: |
| 379 | + output_file = args.output |
| 380 | + else: |
| 381 | + output_file = f"{base}.xml" |
| 382 | + |
| 383 | + if args.format: |
| 384 | + informat = args.format |
| 385 | + else: |
| 386 | + if ext=='.csv': |
| 387 | + informat = 'csv' |
| 388 | + elif ext.strip('.') in ['xls', 'xlsx']: |
| 389 | + informat = 'xlsx' |
| 390 | + else: |
| 391 | + print("Not sure what the file format is") |
| 392 | + print("Use the -f option to specify") |
| 393 | + |
| 394 | + if informat == 'csv': |
| 395 | + csv_curve2nrml(input_file, input_file_cov, output_file) |
| 396 | + elif informat == 'xlsx': |
| 397 | + output_contents_file = f"{base}_contents.xml" |
| 398 | + output_fabric_file = f"{base}_fabric.xml" |
| 399 | + excel_curve2nrml(output_contents_file, output_fabric_file, input_file) |
0 commit comments