Source code for openquake.hazardlib.probability_map

#  -*- coding: utf-8 -*-
#  vim: tabstop=4 shiftwidth=4 softtabstop=4

#  Copyright (c) 2016-2017 GEM Foundation

#  OpenQuake is free software: you can redistribute it and/or modify it
#  under the terms of the GNU Affero General Public License as published
#  by the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.

#  OpenQuake is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.

#  You should have received a copy of the GNU Affero General Public License
#  along with OpenQuake.  If not, see <http://www.gnu.org/licenses/>.
from openquake.baselib.python3compat import zip
from openquake.hazardlib.stats import compute_stats
import numpy

F64 = numpy.float64
BYTES_PER_FLOAT = 8


[docs]class ProbabilityCurve(object):
    """
    This class is a small wrapper over an array of PoEs associated to
    a set of intensity measure types and levels. It provides a few operators,
    including the complement operator `~`

    ~p = 1 - p

    and the inclusive or operator `|`

    p = p1 | p2 = ~(~p1 * ~p2)

    Such operators are implemented efficiently at the numpy level, by
    dispatching on the underlying array.

    Here is an example of use:

    >>> poe = ProbabilityCurve(numpy.array([0.1, 0.2, 0.3, 0, 0]))
    >>> ~(poe | poe) * .5
    <ProbabilityCurve
    [ 0.405  0.32   0.245  0.5    0.5  ]>
    """
    def __init__(self, array):
        self.array = array

    def __or__(self, other):
        if other == 0:
            return self
        else:
            return self.__class__(1. - (1. - self.array) * (1. - other.array))
    __ror__ = __or__

    def __iadd__(self, other):
        # this is used when composing mutually exclusive probabilities
        self.array += other.array
        return self

    def __mul__(self, other):
        if isinstance(other, self.__class__):
            return self.__class__(self.array * other.array)
        elif other == 1:
            return self
        else:
            return self.__class__(self.array * other)
    __rmul__ = __mul__

    def __invert__(self):
        return self.__class__(1. - self.array)

    def __nonzero__(self):
        return bool(self.array.any())

    def __repr__(self):
        return '<ProbabilityCurve\n%s>' % self.array

    # used when exporting to HDF5
[docs]    def convert(self, imtls, idx=0):
        """
        Convert a probability curve into a record of dtype `imtls.dt`.

        :param imtls: DictArray instance
        :param idx: extract the data corresponding to the given inner index
        """
        curve = numpy.zeros(1, imtls.dt)
        for imt in imtls:
            curve[imt] = self.array[imtls.slicedic[imt], idx]
        return curve[0]


[docs]class ProbabilityMap(dict):
    """
    A dictionary site_id -> ProbabilityCurve. It defines the complement
    operator `~`, performing the complement on each curve

    ~p = 1 - p

    and the "inclusive or" operator `|`:

    m = m1 | m2 = {sid: m1[sid] | m2[sid] for sid in all_sids}

    Such operators are implemented efficiently at the numpy level, by
    dispatching on the underlying array. Moreover there is a classmethod
    .build(L, I, sids, initvalue) to build initialized instances of
    :class:`ProbabilityMap`. The map can be represented as 3D array of shape
    (shape_x, shape_y, shape_z) = (N, L, I), where N is the number of site IDs,
    L the total number of hazard levels and I the number of GSIMs.
    """
    @classmethod
[docs]    def build(cls, shape_y, shape_z, sids, initvalue=0.):
        """
        :param shape_y: the total number of intensity measure levels
        :param shape_z: the number of inner levels
        :param sids: a set of site indices
        :param initvalue: the initial value of the probability (default 0)
        :returns: a ProbabilityMap dictionary
        """
        dic = cls(shape_y, shape_z)
        for sid in sids:
            dic.setdefault(sid, initvalue)
        return dic

    def __init__(self, shape_y, shape_z):
        self.shape_y = shape_y
        self.shape_z = shape_z

[docs]    def setdefault(self, sid, value):
        """
        Works like `dict.setdefault`: if the `sid` key is missing, it fills
        it with an array and returns it.

        :param sid: site ID
        :param value: value used to fill the returned array
        """
        try:
            return self[sid]
        except KeyError:
            array = numpy.empty((self.shape_y, self.shape_z), F64)
            array.fill(value)
            pc = ProbabilityCurve(array)
            self[sid] = pc
            return pc

    @property
    def sids(self):
        """The ordered keys of the map as a numpy.uint32 array"""
        return numpy.array(sorted(self), numpy.uint32)

    @property
    def array(self):
        """
        The underlying array of shape (N, L, I)
        """
        return numpy.array([self[sid].array for sid in sorted(self)])

    @property
    def nbytes(self):
        """The size of the underlying array"""
        N, L, I = get_shape([self])
        return BYTES_PER_FLOAT * N * L * I

    # used when exporting to HDF5
[docs]    def convert(self, imtls, nsites=None, idx=0):
        """
        Convert a probability map into a composite array of length `nsites`
        and dtype `imtls.dt`.

        :param imtls: DictArray instance
        :param nsites: the total number of sites (or None)
        :param idx: extract the data corresponding to the given inner index
        """
        if nsites is None:
            nsites = len(self)
        curves = numpy.zeros(nsites, imtls.dt)
        for imt in curves.dtype.names:
            curves_by_imt = curves[imt]
            for sid in self:
                curves_by_imt[sid] = self[sid].array[imtls.slicedic[imt], idx]
        return curves

[docs]    def filter(self, sids):
        """
        Extracs a submap of self for the given sids.
        """
        dic = self.__class__(self.shape_y, self.shape_z)
        for sid in sids:
            try:
                dic[sid] = self[sid]
            except KeyError:
                pass
        return dic

[docs]    def extract(self, inner_idx):
        """
        Extracts a component of the underlying ProbabilityCurves,
        specified by the index `inner_idx`.
        """
        out = self.__class__(self.shape_y, 1)
        for sid in self:
            curve = self[sid]
            array = curve.array[:, inner_idx].reshape(-1, 1)
            out[sid] = ProbabilityCurve(array)
        return out

    def __ior__(self, other):
        self_sids = set(self)
        other_sids = set(other)
        for sid in self_sids & other_sids:
            self[sid] = self[sid] | other[sid]
        for sid in other_sids - self_sids:
            self[sid] = other[sid]
        return self

    def __or__(self, other):
        new = self.__class__(self.shape_y, self.shape_z)
        new.update(self)
        new |= other
        return new

    __ror__ = __or__

    def __mul__(self, other):
        sids = set(self) | set(other)
        new = self.__class__(self.shape_y, self.shape_z)
        for sid in sids:
            new[sid] = self.get(sid, 1) * other.get(sid, 1)
        return new

    def __invert__(self):
        new = self.__class__(self.shape_y, self.shape_z)
        for sid in self:
            if (self[sid].array != 1.).any():
                new[sid] = ~self[sid]  # store only nonzero probabilities
        return new

    def __toh5__(self):
        # converts to an array of shape (num_sids, shape_y, shape_z)
        size = len(self)
        sids = self.sids
        shape = (size, self.shape_y, self.shape_z)
        array = numpy.zeros(shape, F64)
        for i, sid in numpy.ndenumerate(sids):
            array[i] = self[sid].array
        return array, dict(sids=sids)

    def __fromh5__(self, array, attrs):
        # rebuild the map from sids and probs arrays
        self.shape_y = array.shape[1]
        self.shape_z = array.shape[2]
        for sid, prob in zip(attrs['sids'], array):
            self[sid] = ProbabilityCurve(prob)


[docs]def get_shape(pmaps):
    """
    :param pmaps: a set of homogenous ProbabilityMaps
    :returns: the common shape (N, L, I)
    """
    for pmap in pmaps:
        if pmap:
            sid = next(iter(pmap))
            break
    else:
        raise ValueError('All probability maps where empty!')
    return (len(pmap),) + pmap[sid].array.shape


[docs]class PmapStats(object):
    """
    A class to perform statistics on ProbabilityMaps.

    :param weights: a list of weights
    :param quantiles: a list of floats in the range 0..1

    Here is an example:

    >>> pm1 = ProbabilityMap.build(3, 1, sids=[0, 1],
    ...                            initvalue=1.0)
    >>> pm2 = ProbabilityMap.build(3, 1, sids=[0],
    ...                            initvalue=0.8)
    >>> PmapStats(quantiles=[]).compute(sids=[0, 1], pmaps=[pm1, pm2])
    [('mean', {0: <ProbabilityCurve
    [[ 0.9]
     [ 0.9]
     [ 0.9]]>, 1: <ProbabilityCurve
    [[ 0.5]
     [ 0.5]
     [ 0.5]]>})]
    """
    def __init__(self, quantiles, weights=None):
        self.quantiles = quantiles
        self.weights = weights

    # the tests are in the engine
[docs]    def compute_pmap(self, sids, pmaps):
        """
        :params sids: array of N site IDs
        :param pmaps: array of R simple ProbabilityMaps
        :returns: a ProbabilityMap with arrays of size (num_levels, num_stats)
        """
        if len(pmaps) == 0:
            raise ValueError('No probability maps!')
        elif len(pmaps) == 1:  # the mean is the only pmap
            assert not self.quantiles, self.quantiles
            return pmaps[0]
        elif sum(len(pmap) for pmap in pmaps) == 0:  # all empty pmaps
            raise ValueError('All empty probability maps!')
        N, L, I = get_shape(pmaps)
        nstats = len(self.quantiles) + 1
        stats = ProbabilityMap.build(L, nstats, sids)
        curves_by_rlz = numpy.zeros((len(pmaps), len(sids), L), numpy.float64)
        for i, pmap in enumerate(pmaps):
            for j, sid in enumerate(sids):
                if sid in pmap:
                    curves_by_rlz[i][j] = pmap[sid].array[:, 0]
        mq = compute_stats(curves_by_rlz, self.quantiles, self.weights)
        for i, array in enumerate(mq):
            for j, sid in numpy.ndenumerate(sids):
                stats[sid].array[:, i] = array[j]
        return stats

[docs]    def compute(self, sids, pmaps):
        """
        :params sids:
            array of N site IDs
        :param pmaps:
            array of R simple ProbabilityMaps
        :returns:
            a list of pairs [('mean', ...), ('quantile-XXX', ...), ...]
        """
        stats = self.compute_pmap(sids, pmaps)
        names = ['mean'] + ['quantile-%s' % q for q in self.quantiles]
        return [(name, stats.extract(i)) for i, name in enumerate(names)]