# -*- coding: utf-8 -*-
# vim: tabstop=4 shiftwidth=4 softtabstop=4
#
# Copyright (C) 2014-2017 GEM Foundation
#
# OpenQuake is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# OpenQuake is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with OpenQuake. If not, see <http://www.gnu.org/licenses/>.
from __future__ import division
import os
import sys
import abc
import pdb
import logging
import operator
import itertools
import traceback
from functools import partial
from datetime import datetime
from shapely import wkt
import numpy
from openquake.baselib import (
config, general, hdf5, datastore, __version__ as engine_version)
from openquake.baselib.performance import Monitor
from openquake.risklib import riskinput, riskmodels
from openquake.commonlib import readinput, source, calc, writers
from openquake.baselib.parallel import Starmap
from openquake.baselib.python3compat import with_metaclass
from openquake.calculators.export import export as exp
from openquake.calculators.getters import GmfDataGetter, PmapGetter
get_taxonomy = operator.attrgetter('taxonomy')
get_weight = operator.attrgetter('weight')
get_trt = operator.attrgetter('src_group_id')
get_imt = operator.attrgetter('imt')
calculators = general.CallableDict(operator.attrgetter('calculation_mode'))
F32 = numpy.float32
[docs]class InvalidCalculationID(Exception):
"""
Raised when running a post-calculation on top of an incompatible
pre-calculation
"""
logversion = True
PRECALC_MAP = dict(
classical=['psha'],
disaggregation=['psha'],
scenario_risk=['scenario'],
scenario_damage=['scenario'],
classical_risk=['classical'],
classical_bcr=['classical'],
classical_damage=['classical'],
event_based=['event_based', 'event_based_rupture', 'ebrisk',
'event_based_risk', 'ucerf_rupture'],
event_based_risk=['event_based', 'event_based_rupture', 'ucerf_rupture',
'event_based_risk', 'ucerf_hazard'],
ucerf_classical=['ucerf_psha'],
ucerf_hazard=['ucerf_rupture'])
[docs]def set_array(longarray, shortarray):
"""
:param longarray: a numpy array of floats of length L >= l
:param shortarray: a numpy array of floats of length l
Fill `longarray` with the values of `shortarray`, starting from the left.
If `shortarry` is shorter than `longarray`, then the remaining elements on
the right are filled with `numpy.nan` values.
"""
longarray[:len(shortarray)] = shortarray
longarray[len(shortarray):] = numpy.nan
[docs]def check_precalc_consistency(calc_mode, precalc_mode):
"""
Defensive programming against users providing an incorrect pre-calculation
ID (with ``--hazard-calculation-id``)
:param calc_mode:
calculation_mode of the current calculation
:param precalc_mode:
calculation_mode of the previous calculation
"""
ok_mode = PRECALC_MAP[calc_mode]
if calc_mode != precalc_mode and precalc_mode not in ok_mode:
raise InvalidCalculationID(
'In order to run a risk calculation of kind %r, '
'you need to provide a calculation of kind %r, '
'but you provided a %r instead' %
(calc_mode, ok_mode, precalc_mode))
[docs]class BaseCalculator(with_metaclass(abc.ABCMeta)):
"""
Abstract base class for all calculators.
:param oqparam: OqParam object
:param monitor: monitor object
:param calc_id: numeric calculation ID
"""
from_engine = False # set by engine.run_calc
sitecol = datastore.persistent_attribute('sitecol')
performance = datastore.persistent_attribute('performance')
pre_calculator = None # to be overridden
is_stochastic = False # True for scenario and event based calculators
def __init__(self, oqparam, monitor=Monitor(), calc_id=None):
self._monitor = monitor
self.datastore = datastore.DataStore(calc_id)
self.oqparam = oqparam
[docs] def monitor(self, operation, **kw):
"""
:returns: a new Monitor instance
"""
mon = self._monitor(operation, hdf5path=self.datastore.hdf5path)
self._monitor.calc_id = mon.calc_id = self.datastore.calc_id
vars(mon).update(kw)
return mon
[docs] def save_params(self, **kw):
"""
Update the current calculation parameters and save engine_version
"""
vars(self.oqparam).update(**kw)
self.datastore['oqparam'] = self.oqparam # save the updated oqparam
attrs = self.datastore['/'].attrs
attrs['engine_version'] = engine_version
attrs['date'] = datetime.now().isoformat()[:19]
if 'checksum32' not in attrs:
attrs['checksum32'] = readinput.get_checksum32(self.oqparam)
self.datastore.flush()
[docs] def run(self, pre_execute=True, concurrent_tasks=None, close=True, **kw):
"""
Run the calculation and return the exported outputs.
"""
global logversion
self.close = close
self.set_log_format()
if logversion: # make sure this is logged only once
logging.info('Running %s', self.oqparam.inputs['job_ini'])
logging.info('Using engine version %s', engine_version)
logversion = False
if concurrent_tasks is None: # use the job.ini parameter
ct = self.oqparam.concurrent_tasks
else: # used the parameter passed in the command-line
ct = concurrent_tasks
if ct == 0: # disable distribution temporarily
oq_distribute = os.environ.get('OQ_DISTRIBUTE')
os.environ['OQ_DISTRIBUTE'] = 'no'
if ct != self.oqparam.concurrent_tasks:
# save the used concurrent_tasks
self.oqparam.concurrent_tasks = ct
self.save_params(**kw)
Starmap.init()
try:
if pre_execute:
self.pre_execute()
self.result = self.execute()
if self.result is not None:
self.post_execute(self.result)
self.before_export()
self.export(kw.get('exports', ''))
except:
if kw.get('pdb'): # post-mortem debug
tb = sys.exc_info()[2]
traceback.print_tb(tb)
pdb.post_mortem(tb)
else:
logging.critical('', exc_info=True)
raise
finally:
if ct == 0: # restore OQ_DISTRIBUTE
if oq_distribute is None: # was not set
del os.environ['OQ_DISTRIBUTE']
else:
os.environ['OQ_DISTRIBUTE'] = oq_distribute
Starmap.shutdown()
return getattr(self, 'exported', {})
[docs] def core_task(*args):
"""
Core routine running on the workers.
"""
raise NotImplementedError
[docs] @abc.abstractmethod
def pre_execute(self):
"""
Initialization phase.
"""
[docs] @abc.abstractmethod
def execute(self):
"""
Execution phase. Usually will run in parallel the core
function and return a dictionary with the results.
"""
[docs] @abc.abstractmethod
def post_execute(self, result):
"""
Post-processing phase of the aggregated output. It must be
overridden with the export code. It will return a dictionary
of output files.
"""
[docs] def export(self, exports=None):
"""
Export all the outputs in the datastore in the given export formats.
Individual outputs are not exported if there are multiple realizations.
"""
self.exported = getattr(self.precalc, 'exported', {})
if isinstance(exports, tuple):
fmts = exports
elif exports: # is a string
fmts = exports.split(',')
elif isinstance(self.oqparam.exports, tuple):
fmts = self.oqparam.exports
else: # is a string
fmts = self.oqparam.exports.split(',')
keys = set(self.datastore)
has_hcurves = 'hcurves' in self.datastore or 'poes' in self.datastore
if has_hcurves:
keys.add('hcurves')
for fmt in fmts:
if not fmt:
continue
for key in sorted(keys): # top level keys
if 'rlzs' in key and self.R > 1:
continue # skip individual curves
self._export((key, fmt))
if has_hcurves and self.oqparam.hazard_maps:
self._export(('hmaps', fmt))
if has_hcurves and self.oqparam.uniform_hazard_spectra:
self._export(('uhs', fmt))
if self.close: # in the engine we close later
self.result = None
try:
self.datastore.close()
except (RuntimeError, ValueError):
# sometimes produces errors but they are difficult to
# reproduce
logging.warn('', exc_info=True)
def _export(self, ekey):
if ekey not in exp or self.exported.get(ekey): # already exported
return
with self.monitor('export'):
self.exported[ekey] = fnames = exp(ekey, self.datastore)
if fnames:
logging.info('exported %s: %s', ekey[0], fnames)
[docs] def before_export(self):
"""
Set the attributes nbytes
"""
# sanity check that eff_ruptures have been set, i.e. are not -1
csm_info = self.datastore['csm_info']
for sm in csm_info.source_models:
for sg in sm.src_groups:
assert sg.eff_ruptures != -1, sg
for key in self.datastore:
self.datastore.set_nbytes(key)
self.datastore.flush()
[docs]def check_time_event(oqparam, occupancy_periods):
"""
Check the `time_event` parameter in the datastore, by comparing
with the periods found in the exposure.
"""
time_event = oqparam.time_event
if time_event and time_event not in occupancy_periods:
raise ValueError(
'time_event is %s in %s, but the exposure contains %s' %
(time_event, oqparam.inputs['job_ini'],
', '.join(occupancy_periods)))
[docs]class HazardCalculator(BaseCalculator):
"""
Base class for hazard calculators based on source models
"""
precalc = None
[docs] def can_read_parent(self):
"""
:returns:
the parent datastore if it is present and can be read from the
workers, None otherwise
"""
read_access = (
config.distribution.oq_distribute in ('no', 'processpool') or
config.directory.shared_dir)
if self.oqparam.hazard_calculation_id and read_access:
self.datastore.parent.close() # make sure it is closed
return self.datastore.parent
[docs] def compute_previous(self):
precalc = calculators[self.pre_calculator](
self.oqparam, self.monitor('precalculator'),
self.datastore.calc_id)
precalc.run(close=False)
if 'scenario' not in self.oqparam.calculation_mode:
self.csm = precalc.csm
pre_attrs = vars(precalc)
for name in ('riskmodel', 'assets_by_site'):
if name in pre_attrs:
setattr(self, name, getattr(precalc, name))
return precalc
[docs] def read_previous(self, precalc_id):
"""
Read the previous calculation datastore by checking the consistency
of the calculation_mode, then read the risk data.
"""
parent = datastore.read(precalc_id)
check_precalc_consistency(
self.oqparam.calculation_mode, parent['oqparam'].calculation_mode)
self.datastore.parent = parent
# copy missing parameters from the parent
params = {name: value for name, value in
vars(parent['oqparam']).items()
if name not in vars(self.oqparam)}
self.save_params(**params)
return parent
[docs] def pre_execute(self):
"""
Check if there is a pre_calculator or a previous calculation ID.
If yes, read the inputs by invoking the precalculator or by retrieving
the previous calculation; if not, read the inputs directly.
"""
precalc_id = self.oqparam.hazard_calculation_id
if self.pre_calculator is not None:
# the parameter hazard_calculation_id is only meaningful if
# there is a precalculator
if precalc_id is None:
self.precalc = self.compute_previous()
else:
self.read_previous(precalc_id)
self.read_risk_data()
self.init()
else: # we are in a basic calculator
self.read_inputs()
self.param = {} # used in the risk calculators
[docs] def init(self):
"""
To be overridden to initialize the datasets needed by the calculation
"""
if not self.oqparam.imtls:
if self.datastore.parent:
self.oqparam.risk_imtls = (
self.datastore.parent['oqparam'].risk_imtls)
else:
raise ValueError('Missing intensity_measure_types!')
if self.precalc:
self.rlzs_assoc = self.precalc.rlzs_assoc
elif 'csm_info' in self.datastore:
self.rlzs_assoc = self.datastore['csm_info'].get_rlzs_assoc()
elif hasattr(self, 'csm'):
self.rlzs_assoc = self.csm.info.get_rlzs_assoc()
self.datastore['csm_info'] = self.csm.info
else: # build a fake; used by risk-from-file calculators
self.datastore['csm_info'] = fake = source.CompositionInfo.fake()
self.rlzs_assoc = fake.get_rlzs_assoc()
[docs] def read_exposure(self, haz_sitecol=None):
"""
Read the exposure, the riskmodel and update the attributes
.sitecol, .assetcol
"""
logging.info('Reading the exposure')
with self.monitor('reading exposure', autoflush=True):
self.sitecol, self.assetcol = readinput.get_sitecol_assetcol(
self.oqparam, haz_sitecol)
[docs] def get_min_iml(self, oq):
# set the minimum_intensity
if hasattr(self, 'riskmodel') and not oq.minimum_intensity:
# infer it from the risk models if not directly set in job.ini
oq.minimum_intensity = self.riskmodel.get_min_iml()
min_iml = calc.fix_minimum_intensity(
oq.minimum_intensity, oq.imtls)
if min_iml.sum() == 0:
logging.warn('The GMFs are not filtered: '
'you may want to set a minimum_intensity')
else:
logging.info('minimum_intensity=%s', oq.minimum_intensity)
return min_iml
[docs] def load_riskmodel(self):
"""
Read the risk model and set the attribute .riskmodel.
The riskmodel can be empty for hazard calculations.
Save the loss ratios (if any) in the datastore.
"""
logging.info('Reading the risk model if present')
self.riskmodel = rm = readinput.get_risk_model(self.oqparam)
if not self.riskmodel:
parent = self.datastore.parent
if 'composite_risk_model' in parent:
self.riskmodel = riskinput.read_composite_risk_model(parent)
return
self.save_params() # re-save oqparam
# save the risk models and loss_ratios in the datastore
self.datastore['composite_risk_model'] = rm
attrs = self.datastore.getitem('composite_risk_model').attrs
attrs['min_iml'] = hdf5.array_of_vstr(sorted(rm.get_min_iml().items()))
self.datastore.set_nbytes('composite_risk_model')
self.datastore.hdf5.flush()
[docs] def read_risk_data(self):
"""
Read the exposure (if any), the risk model (if any) and then the
site collection, possibly extracted from the exposure.
"""
oq = self.oqparam
with self.monitor('reading site collection', autoflush=True):
haz_sitecol = readinput.get_site_collection(oq)
if haz_sitecol is not None:
logging.info('There are %d hazard site(s)', len(haz_sitecol))
oq_hazard = (self.datastore.parent['oqparam']
if self.datastore.parent else None)
if 'exposure' in oq.inputs:
self.read_exposure(haz_sitecol)
self.load_riskmodel() # must be called *after* read_exposure
self.datastore['assetcol'] = self.assetcol
elif 'assetcol' in self.datastore.parent:
if self.oqparam.region_constraint:
region = wkt.loads(self.oqparam.region_constraint)
self.sitecol = haz_sitecol.within(region)
assetcol = self.datastore.parent['assetcol']
self.assetcol = assetcol.reduce(self.sitecol.sids)
self.datastore['assetcol'] = self.assetcol
logging.info('There are %d/%d assets in the region',
len(self.assetcol), len(assetcol))
else:
self.assetcol = self.datastore.parent['assetcol']
self.load_riskmodel()
else: # no exposure
self.load_riskmodel()
self.sitecol = haz_sitecol
if oq_hazard:
parent = self.datastore.parent
if 'assetcol' in parent:
check_time_event(oq, parent['assetcol'].occupancy_periods)
if oq_hazard.time_event and oq_hazard.time_event != oq.time_event:
raise ValueError(
'The risk configuration file has time_event=%s but the '
'hazard was computed with time_event=%s' % (
oq.time_event, oq_hazard.time_event))
if self.oqparam.job_type == 'risk':
taxonomies = set(taxo for taxo in self.assetcol.tagcol.taxonomy
if taxo != '?')
# check that we are covering all the taxonomies in the exposure
missing = taxonomies - set(self.riskmodel.taxonomies)
if self.riskmodel and missing:
raise RuntimeError('The exposure contains the taxonomies %s '
'which are not in the risk model' % missing)
# same check for the consequence models, if any
consequence_models = riskmodels.get_risk_models(
self.oqparam, 'consequence')
for lt, cm in consequence_models.items():
missing = taxonomies - set(cm)
if missing:
raise ValueError(
'Missing consequenceFunctions for %s' %
' '.join(missing))
[docs] def count_eff_ruptures(self, result_dict, src_group_id):
"""
Returns the number of ruptures in the src_group (after filtering)
or 0 if the src_group has been filtered away.
:param result_dict: a dictionary with keys (grp_id, gsim)
:param src_group_id: the source group ID
"""
return result_dict.eff_ruptures.get(src_group_id, 0)
[docs] def store_source_info(self, infos, acc):
# save the calculation times per each source
if infos:
rows = sorted(
infos.values(),
key=operator.attrgetter('calc_time'),
reverse=True)
array = numpy.zeros(len(rows), source.SourceInfo.dt)
for i, row in enumerate(rows):
for name in array.dtype.names:
value = getattr(row, name)
if name == 'grp_id' and isinstance(value, list):
# same ID sources; store only the first
value = value[0]
array[i][name] = value
self.datastore['source_info'] = array
infos.clear()
self.csm.info.update_eff_ruptures(
partial(self.count_eff_ruptures, acc))
self.rlzs_assoc = self.csm.info.get_rlzs_assoc(self.oqparam.sm_lt_path)
if not self.rlzs_assoc:
raise RuntimeError('Empty logic tree: too much filtering?')
self.datastore['csm_info'] = self.csm.info
if 'source_info' in self.datastore:
# the table is missing for UCERF, we should fix that
self.datastore.set_attrs(
'source_info', nbytes=array.nbytes,
has_dupl_sources=self.csm.has_dupl_sources)
self.datastore.flush()
[docs] def post_process(self):
"""For compatibility with the engine"""
[docs]class RiskCalculator(HazardCalculator):
"""
Base class for all risk calculators. A risk calculator must set the
attributes .riskmodel, .sitecol, .assetcol, .riskinputs in the
pre_execute phase.
"""
[docs] def make_eps(self, num_ruptures):
"""
:param num_ruptures: the size of the epsilon array for each asset
"""
oq = self.oqparam
with self.monitor('building epsilons', autoflush=True):
return riskinput.make_eps(
self.assetcol, num_ruptures,
oq.master_seed, oq.asset_correlation)
[docs] def execute(self):
"""
Parallelize on the riskinputs and returns a dictionary of results.
Require a `.core_task` to be defined with signature
(riskinputs, riskmodel, rlzs_assoc, monitor).
"""
mon = self.monitor('risk')
all_args = [(riskinput, self.riskmodel, self.param, mon)
for riskinput in self.riskinputs]
res = Starmap(self.core_task.__func__, all_args).reduce(self.combine)
return res
[docs] def combine(self, acc, res):
return acc + res
U16 = numpy.uint16
U32 = numpy.uint32
U64 = numpy.uint64
F32 = numpy.float32
[docs]def get_gmv_data(sids, gmfs):
"""
Convert an array of shape (R, N, E, I) into an array of type gmv_data_dt
"""
R, N, E, I = gmfs.shape
gmv_data_dt = numpy.dtype(
[('rlzi', U16), ('sid', U32), ('eid', U64), ('gmv', (F32, (I,)))])
# NB: ordering of the loops: first site, then event, then realization
# it is such that save_gmf_data saves the indices correctly for each sid
it = ((r, sids[s], eid, gmfa[s, eid])
for s, eid in itertools.product(
numpy.arange(N, dtype=U32), numpy.arange(E, dtype=U64))
for r, gmfa in enumerate(gmfs))
return numpy.fromiter(it, gmv_data_dt)
[docs]def get_gmfs(calculator):
"""
:param calculator: a scenario_risk/damage or event_based_risk calculator
:returns: a pair (eids, R) where R is the number of realizations
"""
dstore = calculator.datastore
oq = calculator.oqparam
sitecol = calculator.sitecol
if dstore.parent:
haz_sitecol = dstore.parent['sitecol'] # S sites
else:
haz_sitecol = sitecol # N sites
N = len(haz_sitecol.complete)
I = len(oq.imtls)
if 'gmfs' in oq.inputs: # from file
logging.info('Reading gmfs from file')
eids, gmfs = readinput.get_gmfs(oq)
E = len(eids)
if hasattr(oq, 'number_of_ground_motion_fields'):
if oq.number_of_ground_motion_fields != E:
raise RuntimeError(
'Expected %d ground motion fields, found %d' %
(oq.number_of_ground_motion_fields, E))
else: # set the number of GMFs from the file
oq.number_of_ground_motion_fields = E
# NB: get_gmfs redefine oq.sites in case of GMFs from XML or CSV
haz_sitecol = readinput.get_site_collection(oq) or haz_sitecol
R, N, E, I = gmfs.shape
idx = (slice(None) if haz_sitecol.indices is None
else haz_sitecol.indices)
save_gmf_data(dstore, haz_sitecol, gmfs[:, idx])
# store the events, useful when read the GMFs from a file
events = numpy.zeros(E, readinput.stored_event_dt)
events['eid'] = eids
dstore['events'] = events
return eids, len(gmfs)
elif calculator.precalc: # from previous step
num_assocs = dstore['csm_info'].get_num_rlzs()
E = oq.number_of_ground_motion_fields
eids = numpy.arange(E)
gmfs = numpy.zeros((num_assocs, N, E, I))
for g, gsim in enumerate(calculator.precalc.gsims):
gmfs[g, sitecol.sids] = calculator.precalc.gmfa[gsim]
return eids, len(gmfs)
else: # with --hc option
return (calculator.datastore['events']['eid'],
calculator.datastore['csm_info'].get_num_rlzs())
[docs]def save_gmf_data(dstore, sitecol, gmfs):
"""
:param dstore: a :class:`openquake.baselib.datastore.DataStore` instance
:param sitecol: a :class:`openquake.hazardlib.site.SiteCollection` instance
:param gmfs: an array of shape (R, N, E, I)
"""
offset = 0
dstore['gmf_data/data'] = gmfa = get_gmv_data(sitecol.sids, gmfs)
dic = general.group_array(gmfa, 'sid')
lst = []
for sid in sitecol.complete.sids:
rows = dic.get(sid, ())
n = len(rows)
lst.append(numpy.array([(offset, offset + n)], riskinput.indices_dt))
offset += n
dstore.save_vlen('gmf_data/indices', lst)
dstore.set_attrs('gmf_data', num_gmfs=len(gmfs))
[docs]def import_gmfs(dstore, fname, sids):
"""
Import in the datastore a ground motion field CSV file.
:param dstore: the datastore
:param fname: the CSV file
:param sids: the site IDs (complete)
:returns: event_ids, num_rlzs
"""
array = writers.read_composite_array(fname)
n_imts = len(array.dtype.names[3:]) # rlzi, sid, eid, gmv_PGA, ...
gmf_data_dt = numpy.dtype(
[('rlzi', U16), ('sid', U32), ('eid', U64), ('gmv', (F32, (n_imts,)))])
# store the events
eids = numpy.unique(array['eid'])
eids.sort()
events = numpy.zeros(len(eids), readinput.stored_event_dt)
events['eid'] = eids
dstore['events'] = events
# store the GMFs
dic = general.group_array(array.view(gmf_data_dt), 'sid')
lst = []
offset = 0
for sid in sids:
n = len(dic.get(sid, []))
lst.append(numpy.array([(offset, offset + n)], riskinput.indices_dt))
if n:
offset += n
dstore.extend('gmf_data/data', dic[sid])
dstore.save_vlen('gmf_data/indices', lst)
# FIXME: if there is no data for the maximum realization
# the inferred number of realizations will be wrong
num_rlzs = array['rlzi'].max() + 1
# compute gmdata
dic = general.group_array(array.view(gmf_data_dt), 'rlzi')
gmdata = {r: numpy.zeros(n_imts + 2, F32) for r in range(num_rlzs)}
for r in dic:
gmv = dic[r]['gmv']
rec = gmdata[r] # (imt1, ..., imtM, nevents, nbytes)
rec[:-2] += gmv.sum(axis=0)
rec[-2] += len(gmv)
rec[-1] += gmv.nbytes
return eids, num_rlzs, gmdata