Source code for openquake.hazardlib.nrml

# -*- coding: utf-8 -*-
# vim: tabstop=4 shiftwidth=4 softtabstop=4
#
# Copyright (C) 2014-2023 GEM Foundation
#
# OpenQuake is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# OpenQuake is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with OpenQuake. If not, see <http://www.gnu.org/licenses/>.
"""\
It is possible to save a Node object into a NRML file by using the
function ``write(nodes, output)`` where output is a file
object. If you want to make sure that the generated file is valid
according to the NRML schema just open it in 'w+' mode: immediately
after writing it will be read and validated. It is also possible to
convert a NRML file into a Node object with the routine
``read(node, input)`` where input is the path name of the
NRML file or a file object opened for reading. The file will be
validated as soon as opened.

For instance an exposure file like the following::

  <?xml version='1.0' encoding='utf-8'?>
  <nrml xmlns="http://openquake.org/xmlns/nrml/0.4"
        xmlns:gml="http://www.opengis.net/gml">
    <exposureModel
        id="my_exposure_model_for_population"
        category="population"
        taxonomySource="fake population datasource">

      <description>
        Sample population
      </description>

      <assets>
        <asset id="asset_01" number="7" taxonomy="IT-PV">
            <location lon="9.15000" lat="45.16667" />
        </asset>

        <asset id="asset_02" number="7" taxonomy="IT-CE">
            <location lon="9.15333" lat="45.12200" />
        </asset>
      </assets>
    </exposureModel>
  </nrml>

can be converted as follows:

>> nrml = read(<path_to_the_exposure_file.xml>)

Then subnodes and attributes can be conveniently accessed:

>> nrml.exposureModel.assets[0]['taxonomy']
'IT-PV'
>> nrml.exposureModel.assets[0]['id']
'asset_01'
>> nrml.exposureModel.assets[0].location['lon']
'9.15000'
>> nrml.exposureModel.assets[0].location['lat']
'45.16667'

The Node class provides no facility to cast strings into Python types;
this is a job for the Node class which can be subclassed and
supplemented by a dictionary of validators.
"""
import io
import re
import sys
import operator
import collections.abc

import numpy

from openquake.baselib import hdf5
from openquake.baselib.general import CallableDict, groupby, gettemp
from openquake.baselib.node import (
    node_to_xml, Node, striptag, ValidatingXmlParser, floatformat)
from openquake.hazardlib import valid, sourceconverter, InvalidFile

F64 = numpy.float64
NAMESPACE = 'http://openquake.org/xmlns/nrml/0.4'
NRML05 = 'http://openquake.org/xmlns/nrml/0.5'
GML_NAMESPACE = 'http://www.opengis.net/gml'
SERIALIZE_NS_MAP = {None: NAMESPACE, 'gml': GML_NAMESPACE}
PARSE_NS_MAP = {'nrml': NAMESPACE, 'gml': GML_NAMESPACE}


[docs]class DuplicatedID(Exception): """Raised when two sources with the same ID are found in a source model"""
[docs]def check_unique(ids, msg=''): """ Raise a DuplicatedID exception if there are duplicated IDs """ unique, counts = numpy.unique(ids, return_counts=True) for u, c in zip(unique, counts): if c > 1: raise DuplicatedID('%s %s' % (u, msg))
[docs]class SourceModel(collections.abc.Sequence): """ A container of source groups with attributes name, investigation_time and start_time. It is serialize on hdf5 as follows: >> with openquake.baselib.hdf5.File('/tmp/sm.hdf5', 'w') as f: .. f['/'] = source_model """ def __init__(self, src_groups, name='', investigation_time='', start_time=''): self.src_groups = src_groups self.name = name self.investigation_time = investigation_time self.start_time = start_time def __getitem__(self, i): return self.src_groups[i] def __len__(self): return len(self.src_groups) def __toh5__(self): dic = {} for i, grp in enumerate(self.src_groups): grpname = grp.name or 'group-%d' % i srcs = [(src.source_id, src) for src in grp if hasattr(src, '__toh5__')] if srcs: dic[grpname] = hdf5.Group(srcs, {'trt': grp.trt}) attrs = dict(name=self.name, investigation_time=self.investigation_time or 'NA', start_time=self.start_time or 'NA') if not dic: raise ValueError('There are no serializable sources in %s' % self) return dic, attrs def __fromh5__(self, dic, attrs): vars(self).update(attrs) self.src_groups = [] for grp_name, grp in dic.items(): trt = grp.attrs['trt'] srcs = [] for src_id in sorted(grp): src = grp[src_id] src.num_ruptures = src.count_ruptures() srcs.append(src) grp = sourceconverter.SourceGroup(trt, srcs, grp_name) self.src_groups.append(grp)
[docs]class GeometryModel(object): """ Contains a dictionary of sections """ def __init__(self, sections): check_unique(sections) self.sections = sections self.src_groups = []
[docs]def get_tag_version(nrml_node): """ Extract from a node of kind NRML the tag and the version. For instance from '{http://openquake.org/xmlns/nrml/0.4}fragilityModel' one gets the pair ('fragilityModel', 'nrml/0.4'). """ version, tag = re.search(r'(nrml/[\d\.]+)\}(\w+)', nrml_node.tag).groups() return tag, version
[docs]def to_python(fname, *args): """ Parse a NRML file and return an associated Python object. It works by calling nrml.read() and node_to_obj() in sequence. """ [node] = read(fname) return node_to_obj(node, fname, *args)
node_to_obj = CallableDict(keyfunc=get_tag_version, keymissing=lambda n, f: n) # dictionary of functions with at least two arguments, node and fname default = sourceconverter.SourceConverter(area_source_discretization=10, rupture_mesh_spacing=10)
[docs]@node_to_obj.add(('ruptureCollection', 'nrml/0.5')) def get_rupture_collection(node, fname, converter): return converter.convert_node(node)
[docs]@node_to_obj.add(('geometryModel', 'nrml/0.5')) def get_geometry_model(node, fname, converter): return GeometryModel(converter.convert_node(node))
[docs]@node_to_obj.add(('sourceModel', 'nrml/0.4')) def get_source_model_04(node, fname, converter=default): sources = [] source_ids = set() converter.fname = fname for src_node in node: src = converter.convert_node(src_node) if src is None: continue if src.source_id in source_ids: raise DuplicatedID( 'The source ID %s is duplicated!' % src.source_id) sources.append(src) source_ids.add(src.source_id) groups = groupby( sources, operator.attrgetter('tectonic_region_type')) src_groups = sorted(sourceconverter.SourceGroup( trt, srcs, min_mag=converter.minimum_magnitude) for trt, srcs in groups.items()) return SourceModel(src_groups, node.get('name', ''))
[docs]@node_to_obj.add(('sourceModel', 'nrml/0.5')) def get_source_model_05(node, fname, converter=default): converter.fname = fname groups = [] # expect a sequence of sourceGroup nodes for src_group in node: if 'sourceGroup' not in src_group.tag: raise InvalidFile( '%s: you have an incorrect declaration ' 'xmlns="http://openquake.org/xmlns/nrml/0.5"; it should be ' 'xmlns="http://openquake.org/xmlns/nrml/0.4"' % fname) sg = converter.convert_node(src_group) if sg and len(sg): # a source group can be empty if the source_id filtering is on groups.append(sg) itime = node.get('investigation_time') if itime is not None: itime = valid.positivefloat(itime) stime = node.get('start_time') if stime is not None: stime = valid.positivefloat(stime) return SourceModel(sorted(groups), node.get('name'), itime, stime)
validators = { 'backarc': valid.boolean, 'strike': valid.strike_range, 'dip': valid.dip_range, 'rake': valid.rake_range, 'magnitude': valid.positivefloat, 'lon': valid.longitude, 'lat': valid.latitude, 'depth': valid.float_, 'upperSeismoDepth': valid.float_, 'lowerSeismoDepth': valid.float_, 'posList': valid.posList, 'pos': valid.lon_lat, 'aValue': float, 'a_val': valid.floats, 'bValue': valid.positivefloat, 'b_val': valid.positivefloats, 'cornerMag': valid.positivefloat, 'magScaleRel': valid.mag_scale_rel, 'tectonicRegion': str, 'ruptAspectRatio': valid.positivefloat, 'maxMag': valid.positivefloat, 'minMag': valid.positivefloat, 'min_mag': valid.positivefloats, 'max_mag': valid.positivefloats, 'lengths': valid.positiveints, 'size': valid.positiveint, 'binWidth': valid.positivefloat, 'bin_width': valid.positivefloats, 'probability': valid.probability, 'occurRates': valid.positivefloats, # they can be > 1 'weight': valid.probability, 'uncertaintyModel': valid.uncertainty_model, 'uncertaintyWeight': float, 'alongStrike': valid.probability, 'downDip': valid.probability, 'slipRate': valid.positivefloat, 'slip': valid.positivefloat, 'rigidity': valid.positivefloat, 'totalMomentRate': valid.positivefloat, 'characteristicRate': valid.positivefloat, 'char_rate': valid.positivefloats, 'characteristicMag': valid.positivefloat, 'char_mag': valid.positivefloats, 'magnitudes': valid.positivefloats, 'id': valid.simple_id, 'occurrence_rate': valid.positivefloat, 'rupture.id': valid.positiveint, 'ruptureId': valid.positiveint, 'discretization': valid.compose(valid.positivefloat, valid.nonzero), 'IML': valid.positivefloats, # used in NRML 0.4 'imt': valid.intensity_measure_type, 'imls': valid.positivefloats, 'poes': valid.positivefloats, 'description': valid.utf8_not_empty, 'noDamageLimit': valid.NoneOr(valid.positivefloat), 'poEs': valid.probabilities, 'gsimTreePath': lambda v: v.split('_'), 'sourceModelTreePath': lambda v: v.split('_'), 'IMT': str, 'saPeriod': valid.positivefloat, 'saDamping': valid.positivefloat, 'quantileValue': valid.positivefloat, 'investigationTime': valid.positivefloat, 'poE': valid.probability, 'periods': valid.positivefloats, 'IMLs': valid.positivefloats, 'magBinEdges': valid.integers, 'distBinEdges': valid.integers, 'epsBinEdges': valid.integers, 'lonBinEdges': valid.longitudes, 'latBinEdges': valid.latitudes, 'type': valid.simple_id, 'dims': valid.positiveints, 'iml': valid.positivefloat, 'index': valid.positiveints, 'value': valid.positivefloat, 'assetLifeExpectancy': valid.positivefloat, 'interestRate': valid.positivefloat, 'statistics': valid.Choice('mean', 'quantile'), 'gmv': valid.positivefloat, 'spacing': valid.positivefloat, 'srcs_weights': valid.positivefloats, 'grp_probability': valid.probability, }
[docs]def read_source_models(fnames, converter): """ :param fnames: list of source model files :param converter: a :class:`openquake.hazardlib.sourceconverter.SourceConverter` instance :yields: SourceModel instances """ for fname in fnames: if fname.endswith(('.xml', '.nrml')): sm = to_python(fname, converter) else: raise ValueError('Unrecognized extension in %s' % fname) sm.fname = fname # check investigation time for NonParametricSeismicSources cit = converter.investigation_time np = [s for sg in sm.src_groups for s in sg if hasattr(s, 'data')] if np and sm.investigation_time != cit: raise ValueError( 'The source model %s contains an investigation_time ' 'of %s, while the job.ini has %s' % ( fname, sm.investigation_time, cit)) yield sm
[docs]def read(source, stop=None): """ Convert a NRML file into a validated Node object. Keeps the entire tree in memory. :param source: a file name or file object open for reading """ vparser = ValidatingXmlParser(validators, stop) nrml = vparser.parse_file(source) if striptag(nrml.tag) != 'nrml': raise ValueError('%s: expected a node of kind nrml, got %s' % (source, nrml.tag)) # extract the XML namespace URL ('http://openquake.org/xmlns/nrml/0.5') xmlns = nrml.tag.split('}')[0][1:] nrml['xmlns'] = xmlns nrml['xmlns:gml'] = GML_NAMESPACE return nrml
[docs]def write(nodes, output=sys.stdout, fmt='%.7E', gml=True, xmlns=None): """ Convert nodes into a NRML file. output must be a file object open in write mode. If you want to perform a consistency check, open it in read-write mode, then it will be read after creation and validated. :params nodes: an iterable over Node objects :params output: a file-like object in write or read-write mode :param fmt: format used for writing the floats (default '%.7E') :param gml: add the http://www.opengis.net/gml namespace :param xmlns: NRML namespace like http://openquake.org/xmlns/nrml/0.4 """ root = Node('nrml', nodes=nodes) namespaces = {xmlns or NRML05: ''} if gml: namespaces[GML_NAMESPACE] = 'gml:' with floatformat(fmt): node_to_xml(root, output, namespaces) if hasattr(output, 'mode') and '+' in output.mode: # read-write mode output.seek(0) read(output) # validate the written file
[docs]def to_string(node): """ Convert a node into a string in NRML format """ with io.BytesIO() as f: write([node], f) return f.getvalue().decode('utf-8')
[docs]def get(xml, investigation_time=50., rupture_mesh_spacing=5., width_of_mfd_bin=1.0, area_source_discretization=10): """ :param xml: the XML representation of a source :param investigation_time: investigation time :param rupture_mesh_spacing: rupture mesh spacing :param width_of_mfd_bin: width of MFD bin :param area_source_discretization: area source discretization :returns: a python source object """ text = '''<?xml version='1.0' encoding='UTF-8'?> <nrml xmlns="http://openquake.org/xmlns/nrml/0.4" xmlns:gml="http://www.opengis.net/gml"> %s </nrml>''' % xml [node] = read(gettemp(text)) conv = sourceconverter.SourceConverter( investigation_time, rupture_mesh_spacing, width_of_mfd_bin=width_of_mfd_bin, area_source_discretization=area_source_discretization) src = conv.convert_node(node) src.grp_id = src.id = 0 return src
if __name__ == '__main__': import sys for fname in sys.argv[1:]: print('****** %s ******' % fname) print(read(fname).to_str()) print()