Source code for openquake.engine.calculators.hazard.general

# -*- coding: utf-8 -*-
# vim: tabstop=4 shiftwidth=4 softtabstop=4

# Copyright (c) 2010-2014, GEM Foundation.
#
# OpenQuake is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# OpenQuake is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with OpenQuake.  If not, see <http://www.gnu.org/licenses/>.

"""Common code for the hazard calculators."""

import itertools
import collections
from operator import attrgetter

import numpy

from openquake.hazardlib.imt import from_string

# FIXME: one must import the engine before django to set DJANGO_SETTINGS_MODULE
from openquake.engine.db import models

from openquake.baselib import general
from openquake.commonlib import readinput, risk_parsers, valid
from openquake.commonlib.readinput import get_site_collection

from openquake.engine.input import exposure
from openquake.engine import logs
from openquake.engine import writer
from openquake.engine.utils import config
from openquake.engine.calculators import base
from openquake.risklib import scientific

from openquake.engine.calculators.hazard.post_processing import (
    hazard_curves_to_hazard_map, do_uhs_post_proc)

from openquake.engine.performance import EnginePerformanceMonitor
from openquake.engine.utils import tasks

QUANTILE_PARAM_NAME = "QUANTILE_LEVELS"
POES_PARAM_NAME = "POES"
# Dilation in decimal degrees (http://en.wikipedia.org/wiki/Decimal_degrees)
# 1e-5 represents the approximate distance of one meter at the equator.
DILATION_ONE_METER = 1E-5


[docs]class InputWeightLimit(Exception):
    pass


[docs]class OutputWeightLimit(Exception):
    pass


[docs]def all_equal(obj, value):
    """
    :param obj: a numpy array or something else
    :param value: a numeric value
    :returns: a boolean
    """
    eq = (obj == value)
    if isinstance(eq, numpy.ndarray):
        return eq.all()
    else:
        return eq


[docs]class BaseHazardCalculator(base.Calculator):
    """
    Abstract base class for hazard calculators. Contains a bunch of common
    functionality, like initialization procedures.
    """
    tilepath = ()  # set only by the tiling calculator

    def __init__(self, job):
        super(BaseHazardCalculator, self).__init__(job)
        # a dictionary trt_model_id -> num_ruptures
        self.num_ruptures = collections.defaultdict(int)
        # now a dictionary (trt_model_id, gsim) -> poes
        self.acc = general.AccumDict()
        self.mean_hazard_curves = self.oqparam.mean_hazard_curves
        self.quantile_hazard_curves = self.oqparam.quantile_hazard_curves
        self._hazard_curves = []
        self._realizations = []
        self._source_models = []

    @EnginePerformanceMonitor.monitor
    def execute(self):
        """
        Run the `.core_calc_task` in parallel, by using the apply_reduce
        distribution, but it can be overridden in subclasses.
        """
        csm = self.composite_model
        rlzs_assoc = csm.get_rlzs_assoc()
        # temporary hack
        if self.__class__.__name__ == 'EventBasedHazardCalculator':
            info = rlzs_assoc.csm_info
        else:
            info = rlzs_assoc.gsims_by_trt_id
        self.acc = tasks.apply_reduce(
            self.core_calc_task,
            (csm.get_sources(), self.site_collection, info, self.monitor),
            agg=self.agg_curves, acc=self.acc,
            weight=attrgetter('weight'), key=attrgetter('trt_model_id'),
            concurrent_tasks=self.concurrent_tasks)

    @EnginePerformanceMonitor.monitor
    def agg_curves(self, acc, result):
        """
        This is used to incrementally update hazard curve results by combining
        an initial value with some new results. (Each set of new results is
        computed over only a subset of seismic sources defined in the
        calculation model.)

        :param acc:
            A dictionary of curves
        :param result:
            A dictionary `{trt_model_id: (curves_by_gsim, bbs)}`.
            `curves_by_gsim` is a list of pairs `(gsim, curves_by_imt)`
            where `curves_by_imt` is a list of 2-D numpy arrays
            representing the new results which need to be combined
            with the current value. These should be the same shape as
            `acc[tr_model_id, gsim][j]` where `gsim` is the GSIM
            name and `j` is the IMT ordinal.
        """
        for trt_model_id, (curves_by_gsim, bbs) in result.iteritems():
            for gsim, probs in curves_by_gsim:
                pnes = []
                for prob, zero in itertools.izip(probs, self.zeros):
                    pnes.append(1 - (zero if all_equal(prob, 0) else prob))
                pnes1 = numpy.array(pnes)
                pnes2 = 1 - acc.get((trt_model_id, gsim), self.zeros)
                acc[trt_model_id, gsim] = 1 - pnes1 * pnes2

            if self.oqparam.poes_disagg:
                for bb in bbs:
                    self.bb_dict[bb.lt_model_id, bb.site_id].update_bb(bb)

        return acc

[docs]    def pre_execute(self):
        """
        Initialize risk models, site model and sources
        """
        self.parse_risk_model()
        self.initialize_site_collection()
        self.initialize_sources()
        info = readinput.get_job_info(
            self.oqparam, self.composite_model, self.site_collection)
        models.JobInfo.objects.create(
            oq_job=self.job,
            num_sites=info['n_sites'],
            num_realizations=info['n_realizations'],
            num_imts=info['n_imts'],
            num_levels=info['n_levels'],
            input_weight=info['input_weight'],
            output_weight=info['output_weight'])
        self.check_limits(info['input_weight'], info['output_weight'])
        self.init_zeros_ones()
        return info['input_weight'], info['output_weight']

[docs]    def init_zeros_ones(self):
        imtls = self.oqparam.imtls
        if None in imtls.values():  # no levels, cannot compute curves
            return
        n_sites = len(self.site_collection)
        self.zeros = numpy.array(
            [numpy.zeros((n_sites, len(imtls[imt])))
             for imt in sorted(imtls)])
        self.ones = [numpy.zeros(len(imtls[imt]), dtype=float)
                     for imt in sorted(imtls)]

[docs]    def check_limits(self, input_weight, output_weight):
        """
        Compute the total weight of the source model and the expected
        output size and compare them with the parameters max_input_weight
        and max_output_weight in openquake.cfg; if the parameters are set
        """
        if (self.max_input_weight and
                input_weight > self.max_input_weight):
            raise InputWeightLimit(
                'A limit of %d on the maximum source model weight was set. '
                'The weight of your model is %d. Please reduce your model '
                'or raise the parameter max_input_weight in openquake.cfg'
                % (self.max_input_weight, input_weight))
        elif self.max_output_weight and output_weight > self.max_output_weight:
            raise OutputWeightLimit(
                'A limit of %d on the maximum output weight was set. '
                'The weight of your output is %d. Please reduce the number '
                'of sites, the number of IMTs, the number of realizations '
                'or the number of stochastic event sets; otherwise, '
                'raise the parameter max_output_weight in openquake.cfg'
                % (self.max_output_weight, output_weight))

[docs]    def post_execute(self, result=None):
        """Inizialize realizations"""
        self.initialize_realizations()
        # must be called after the realizations are known
        self.save_hazard_curves()

    @EnginePerformanceMonitor.monitor
    def initialize_sources(self):
        """
        Parse source models, apply uncertainties and validate source logic
        trees. Save in the database LtSourceModel and TrtModel objects.
        """
        logs.LOG.progress("initializing sources")
        parallel_source_splitting = valid.boolean(
            config.get('hazard', 'parallel_source_splitting') or 'false')
        self.composite_model = readinput.get_composite_source_model(
            self.oqparam, self.site_collection,
            no_distribute=not parallel_source_splitting)
        for sm in self.composite_model:
            # create an LtSourceModel for each distinct source model
            lt_model = models.LtSourceModel.objects.create(
                hazard_calculation=self.job,
                sm_lt_path=self.tilepath + sm.path,
                ordinal=sm.ordinal, sm_name=sm.name, weight=sm.weight,
                samples=sm.samples)
            self._source_models.append(lt_model)
            gsims_by_trt = sm.gsim_lt.values
            # save TrtModels for each tectonic region type
            # and stored the db ID in the in-memory models
            for trt_mod in sm.trt_models:
                trt_mod.id = models.TrtModel.objects.create(
                    lt_model=lt_model,
                    tectonic_region_type=trt_mod.trt,
                    num_sources=len(trt_mod),
                    num_ruptures=trt_mod.num_ruptures,
                    min_mag=trt_mod.min_mag,
                    max_mag=trt_mod.max_mag,
                    gsims=gsims_by_trt[trt_mod.trt]).id

    @EnginePerformanceMonitor.monitor
    def parse_risk_model(self):
        """
        If any risk model is given in the hazard calculation, the
        computation will be driven by risk data. In this case the
        locations will be extracted from the exposure file (if there
        is one) and the imt (and levels) will be extracted from the
        vulnerability model (if there is one)
        """
        oqparam = self.job.get_oqparam()
        if 'exposure' in oqparam.inputs:
            with logs.tracing('storing exposure'):
                exposure.ExposureDBWriter(
                    self.job).serialize(
                    risk_parsers.ExposureModelParser(
                        oqparam.inputs['exposure']))
        models.Imt.save_new(map(from_string, oqparam.imtls))

    @EnginePerformanceMonitor.monitor
    def initialize_site_collection(self):
        """
        Populate the hazard site table and create a sitecollection attribute.
        """
        logs.LOG.progress("initializing sites")
        points, site_ids = self.job.save_hazard_sites()
        if not site_ids:
            raise RuntimeError('No sites were imported!')

        logs.LOG.progress("initializing site collection")
        oqparam = self.job.get_oqparam()
        self.site_collection = get_site_collection(oqparam, points, site_ids)

[docs]    def initialize_realizations(self):
        """
        Create records for the `hzrdr.lt_realization`.

        This function works either in random sampling mode (when lt_realization
        models get the random seed value) or in enumeration mode (when weight
        values are populated). In both cases we record the logic tree paths
        for both trees in the `lt_realization` record, as well as ordinal
        number of the realization (zero-based).
        """
        logs.LOG.progress("initializing realizations")
        cm = self.composite_model
        self._realizations = []
        self.rlzs_assoc = cm.get_rlzs_assoc(
            lambda trt_model: models.TrtModel.objects.get(
                pk=trt_model.id).num_ruptures)
        gsims_by_trt_id = self.rlzs_assoc.gsims_by_trt_id
        for lt_model, rlzs in zip(
                self._source_models, self.rlzs_assoc.rlzs_by_smodel):
            logs.LOG.info('Creating %d realization(s) for model '
                          '%s, %s', len(rlzs), lt_model.sm_name,
                          '_'.join(lt_model.sm_lt_path))
            for rlz in rlzs:
                gsim_by_trt = self.rlzs_assoc.gsim_by_trt[rlz.ordinal]
                lt_rlz = models.LtRealization.objects.create(
                    lt_model=lt_model, gsim_lt_path=rlz.gsim_rlz.lt_uid,
                    weight=rlz.weight, ordinal=rlz.ordinal)
                rlz.id = lt_rlz.id
                self._realizations.append(lt_rlz)
                for trt_model in lt_model.trtmodel_set.filter(
                        pk__in=gsims_by_trt_id):
                    trt = trt_model.tectonic_region_type
                    # populate the association table rlz <-> trt_model
                    models.AssocLtRlzTrtModel.objects.create(
                        rlz=lt_rlz, trt_model=trt_model,
                        gsim=gsim_by_trt[trt])
                    trt_model.gsims = [
                        gsim.__class__.__name__
                        for gsim in gsims_by_trt_id[trt_model.id]]
                    trt_model.save()

    # this could be parallelized in the future, however in all the cases
    # I have seen until now, the serialized approach is fast enough (MS)
    @EnginePerformanceMonitor.monitor
    def save_hazard_curves(self):
        """
        Post-execution actions. At the moment, all we do is finalize the hazard
        curve results.
        """
        if not self.acc:
            return
        imtls = self.oqparam.imtls
        points = models.HazardSite.objects.filter(
            hazard_calculation=self.job).order_by('id')
        sorted_imts = sorted(imtls)
        curves_by_imt = dict((imt, []) for imt in sorted_imts)
        individual_curves = self.job.get_param(
            'individual_curves', missing=True)
        for rlz in self._realizations:
            if individual_curves:
                # create a multi-imt curve
                multicurve = models.Output.objects.create_output(
                    self.job, "hc-multi-imt-rlz-%s" % rlz.id,
                    "hazard_curve_multi")
                models.HazardCurve.objects.create(
                    output=multicurve, lt_realization=rlz,
                    investigation_time=self.oqparam.investigation_time)

            with self.monitor('building curves per realization'):
                the_curves = models.build_curves(rlz, self.acc)
                if all_equal(the_curves, 0):
                    the_curves = self.zeros
                for imt, curves in zip(sorted_imts, the_curves):
                    if individual_curves:
                        self.save_curves_for_rlz_imt(
                            rlz, imt, imtls[imt], points, curves)
                    curves_by_imt[imt].append(curves)

        self.acc = {}  # save memory for the post-processing phase
        if self.mean_hazard_curves or self.quantile_hazard_curves:
            self.curves_by_imt = curves_by_imt

[docs]    def save_curves_for_rlz_imt(self, rlz, imt, imls, points, curves):
        """
        Save the curves corresponding to a given realization and IMT.

        :param rlz: a LtRealization instance
        :param imt: an IMT string
        :param imls: the intensity measure levels for the given IMT
        :param points: the points associated to the curves
        :param curves: the curves
        """
        # create a new `HazardCurve` 'container' record for each
        # realization for each intensity measure type
        hc_im_type, sa_period, sa_damping = from_string(imt)

        # save output
        hco = models.Output.objects.create(
            oq_job=self.job,
            display_name="Hazard Curve rlz-%s-%s" % (rlz.id, imt),
            output_type='hazard_curve',
        )

        # save hazard_curve
        haz_curve = models.HazardCurve.objects.create(
            output=hco,
            lt_realization=rlz,
            investigation_time=self.oqparam.investigation_time,
            imt=hc_im_type,
            imls=imls,
            sa_period=sa_period,
            sa_damping=sa_damping,
        )
        self._hazard_curves.append(haz_curve)

        # save hazard_curve_data
        logs.LOG.info('saving %d hazard curves for %s, imt=%s',
                      len(points), hco, imt)
        writer.CacheInserter.saveall([models.HazardCurveData(
            hazard_curve=haz_curve,
            poes=list(poes),
            location='POINT(%s %s)' % (p.lon, p.lat),
            weight=rlz.weight)
            for p, poes in zip(points, curves)])

    @EnginePerformanceMonitor.monitor
    def do_aggregate_post_proc(self):
        """
        Grab hazard data for all realizations and sites from the database and
        compute mean and/or quantile aggregates (depending on which options are
        enabled in the calculation).

        Post-processing results will be stored directly into the database.
        """
        num_rlzs = len(self._realizations)
        if not num_rlzs:
            logs.LOG.warn('No realizations for hazard_calculation_id=%d',
                          self.job.id)
            return
        elif num_rlzs == 1 and self.quantile_hazard_curves:
            logs.LOG.warn(
                'There is only one realization, the configuration parameter '
                'quantile_hazard_curves should not be set')
            return

        weights = (None if self.oqparam.number_of_logic_tree_samples
                   else [rlz.weight for rlz in self._realizations])

        if self.oqparam.mean_hazard_curves:
            # create a new `HazardCurve` 'container' record for mean
            # curves (virtual container for multiple imts)
            models.HazardCurve.objects.create(
                output=models.Output.objects.create_output(
                    self.job, "mean-curves-multi-imt",
                    "hazard_curve_multi"),
                statistics="mean",
                imt=None,
                investigation_time=self.oqparam.investigation_time)

        for quantile in self.quantile_hazard_curves:
            # create a new `HazardCurve` 'container' record for quantile
            # curves (virtual container for multiple imts)
            models.HazardCurve.objects.create(
                output=models.Output.objects.create_output(
                    self.job, 'quantile(%s)-curves' % quantile,
                    "hazard_curve_multi"),
                statistics="quantile",
                imt=None,
                quantile=quantile,
                investigation_time=self.oqparam.investigation_time)

        for imt, imls in self.oqparam.imtls.items():
            im_type, sa_period, sa_damping = from_string(imt)

            # prepare `output` and `hazard_curve` containers in the DB:
            container_ids = dict()
            if self.oqparam.mean_hazard_curves:
                mean_output = self.job.get_or_create_output(
                    display_name='Mean Hazard Curves %s' % imt,
                    output_type='hazard_curve'
                )
                mean_hc = models.HazardCurve.objects.create(
                    output=mean_output,
                    investigation_time=self.oqparam.investigation_time,
                    imt=im_type,
                    imls=imls,
                    sa_period=sa_period,
                    sa_damping=sa_damping,
                    statistics='mean'
                )
                self._hazard_curves.append(mean_hc)
                container_ids['mean'] = mean_hc.id

            for quantile in self.quantile_hazard_curves:
                q_output = self.job.get_or_create_output(
                    display_name=(
                        '%s quantile Hazard Curves %s' % (quantile, imt)),
                    output_type='hazard_curve')
                q_hc = models.HazardCurve.objects.create(
                    output=q_output,
                    investigation_time=self.oqparam.investigation_time,
                    imt=im_type,
                    imls=imls,
                    sa_period=sa_period,
                    sa_damping=sa_damping,
                    statistics='quantile',
                    quantile=quantile
                )
                self._hazard_curves.append(q_hc)
                container_ids['q%s' % quantile] = q_hc.id

            # num_rlzs * num_sites * num_levels
            # NB: different IMTs can have different num_levels
            all_curves_for_imt = numpy.array(self.curves_by_imt[imt])
            del self.curves_by_imt[imt]  # save memory

            inserter = writer.CacheInserter(
                models.HazardCurveData, max_cache_size=10000)

            # curve_poes below is an array num_rlzs * num_levels
            for i, site in enumerate(self.site_collection):
                wkt = site.location.wkt2d
                curve_poes = numpy.array(
                    [c_by_rlz[i] for c_by_rlz in all_curves_for_imt])

                # calc quantiles first
                for quantile in self.quantile_hazard_curves:
                    q_curve = scientific.quantile_curve(
                        curve_poes, quantile, weights)
                    inserter.add(
                        models.HazardCurveData(
                            hazard_curve_id=(
                                container_ids['q%s' % quantile]),
                            poes=q_curve.tolist(),
                            location=wkt))

                # then means
                if self.mean_hazard_curves and len(curve_poes):
                    m_curve = scientific.mean_curve(curve_poes, weights)
                    inserter.add(
                        models.HazardCurveData(
                            hazard_curve_id=container_ids['mean'],
                            poes=m_curve.tolist(),
                            location=wkt))
            inserter.flush()

[docs]    def post_process(self):
        """
        Optionally generates aggregate curves, hazard maps and
        uniform_hazard_spectra.
        """
        # means/quantiles:
        if self.mean_hazard_curves or self.quantile_hazard_curves:
            self.do_aggregate_post_proc()

        # hazard maps:
        # required for computing UHS
        # if `hazard_maps` is false but `uniform_hazard_spectra` is true,
        # just don't export the maps
        if (self.oqparam.hazard_maps or self.oqparam.uniform_hazard_spectra):
            with self.monitor('generating hazard maps', autoflush=True) as mon:
                tasks.apply_reduce(
                    hazard_curves_to_hazard_map,
                    (self._hazard_curves, self.oqparam.poes, mon),
                    concurrent_tasks=self.concurrent_tasks)
        if self.oqparam.uniform_hazard_spectra:
            do_uhs_post_proc(self.job)