# base libraries
import numpy as np
import scipy as sp
import xarray as xr
import pandas as pd

# plotting libraries
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import seaborn as sn
import cartopy
from cartopy import crs
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
import cartopy.feature as cfeature

from mystatsfunctions import OLSE,LMoments


txx_eobs = xr.open_dataset('/home/leachl/Documents/Datasets/Reanalyses/E-OBS/preproc/txx_ens_mean_0.1deg_reg_v23.1e_pre1950_merge.nc').tx


GMST = pd.read_csv('https://www-users.york.ac.uk/~kdc3/papers/coverage2013/had4_krig_annual_v2_0_0.txt',delim_whitespace=True,index_col=0,usecols=[0,1],names=['year','raw'])

## use a lowess smoother to remove the internal variability
import statsmodels.api as sm
lowess = sm.nonparametric.lowess

## I'll use 30-year blocks
GMST['smooth'] = lowess(GMST.raw,GMST.index,frac = 30/GMST.size)[:,1]


GMST.plot(color=['r','k'])
plt.legend(frameon=False)
plt.xlim(1850,2020)
plt.title('Cowtan-Way timeseries')
plt.ylabel('GMST / K')
sn.despine()


## the inputs to OLSE.simple must be numpy arrays
Y = txx_eobs.values
X = GMST.loc[txx_eobs['time.year'].values,'smooth'].values

## mask out gridpoints if there are fewer than 80 years valid data
mask = np.isnan(Y) + (np.isnan(Y).sum(axis=0)>20)


## initialise simple linear regression object with the dependent variable and mask arrays
SLR = OLSE.simple(Y,mask=mask)

## fit to (broadcasted) X array
SLR.fit(np.broadcast_to(X[:,None,None],Y.shape))


## We'll store the outputs in an DataArray for convenience

## model parameters
intercept = xr.zeros_like(txx_eobs.isel(time=-1)).rename('trend')+SLR.b0
slope = xr.zeros_like(txx_eobs.isel(time=-1)).rename('trend')+SLR.b1
slope_err = xr.zeros_like(txx_eobs.isel(time=-1)).rename('trend error')+SLR.err_b1

## model correlation
cor = xr.zeros_like(txx_eobs.isel(time=-1)).rename('correlation')+SLR.cor()

## linear model estimates + confidence intervals
model=xr.zeros_like(txx_eobs).rename('model')+SLR.pred()
CI=xr.zeros_like(txx_eobs).expand_dims({'percentile':[5,95]}).rename('CI')+SLR.CI()
PI=xr.zeros_like(txx_eobs).expand_dims({'percentile':[5,95]}).rename('PI')+SLR.PI()


## set the figure layout
fig = plt.figure(figsize=(20,5))
gs = fig.add_gridspec(1,2,wspace=0.1)
map_ax = fig.add_subplot(gs[:,0],projection=crs.PlateCarree())
ts_ax = fig.add_subplot(gs[:,1])

## plot the trend map
p=slope.plot(ax=map_ax,levels=np.linspace(-5,5,11),extend='both',cmap='RdBu_r')
## add in significance stippling (stippling = not significant @ 2-sided 95 % level)
(slope_err*sp.stats.t((~mask).sum(axis=0)).ppf(0.975) > xr.ufuncs.fabs(slope)).plot.contourf(ax=map_ax,alpha=0,levels=[-0.5,0.5,1],hatches=[None,'..'],add_colorbar=False)

p.axes.coastlines('50m')
p.axes.set_title('Regression slope')
p.colorbar.set_label('trend / K K$^{-1}$')

## timeseries plot for Oxford gridpoint
ts_ax.plot(txx_eobs['time.year'].values,txx_eobs.sel(latitude=51.75,longitude=-1.25,method='nearest').values,'r.',label='E-OBS')
ts_ax.plot(model['time.year'].values,model.sel(latitude=51.75,longitude=-1.25,method='nearest').values,'k',label='regression model')
ts_ax.fill_between(model['time.year'].values,*CI.sel(latitude=51.75,longitude=-1.25,method='nearest').values,color='k',alpha=0.3,lw=0,label='90% CI')
ts_ax.plot(model['time.year'].values,PI.sel(latitude=51.75,longitude=-1.25,method='nearest').values.T,color='k',ls=':',label='90% PI')

ts_ax.set_title('Oxford gridpoint timeseries + linear model')
ts_ax.set_ylabel('TXx / \N{DEGREE SIGN}C')
ts_ax.set_xlabel('year')
ts_ax.set_xlim(1920,2020)
ts_ax.legend(frameon=False,loc=(1,0))

sn.despine(ax=ts_ax)


## initialise a generalised extreme value distribution object
GEV = LMoments.gev()

## fit this to the best-estimate residuals
GEV.fit(SLR.res)


## we now set the location parameter equal to the model mean estimate for the 1961-1990 average
GEV.X = SLR.pred(GMST.loc['1961':'1990','smooth'].mean())


## compute the 1-in-x year event based on this distribution fit:
event_thresholds = xr.concat([xr.zeros_like(txx_eobs.isel(time=-1)).expand_dims({'quantile':[1-1/x]}) + GEV.qf(1-1/x) for x in [10,20,50,100]],dim='quantile')


import copy
## now obtain present-day & 1920 GEV estimates by shifting the distribution locations:
GEV_2020 = copy.deepcopy(GEV)
GEV_2020.X = SLR.pred(GMST.loc[2020,'smooth'])

GEV_1920 = copy.deepcopy(GEV)
GEV_1920.X = SLR.pred(GMST.loc[1920,'smooth'])


p_1920 = xr.zeros_like(event_thresholds)
p_2020 = xr.zeros_like(event_thresholds)

for quantile in p_1920['quantile']:
    p_1920.loc[quantile] = 1-GEV_1920.cdf(event_thresholds.sel(quantile=quantile))*(GEV.k/GEV.k)
    p_2020.loc[quantile] = 1-GEV_2020.cdf(event_thresholds.sel(quantile=quantile))*(GEV.k/GEV.k)


fig,ax=plt.subplots(1,4,figsize=(20,8),subplot_kw=dict(projection=crs.PlateCarree()))

c=[ax.flatten()[i].contourf(p_2020.longitude,p_2020.latitude,(p_2020 / p_1920).sel(quantile=quantile),levels=[1,2,5,10,20,50,100,200,500,1000],extend='both',norm=matplotlib.colors.LogNorm(1,1000),cmap='Reds') for i,quantile in enumerate(p_2020['quantile'])]

axins = inset_axes(ax[-1], width="5%",height="100%",loc='lower left',bbox_to_anchor=(1.025, 0.00, 1, 1),bbox_transform=ax[-1].transAxes,borderpad=0)
cbar = plt.colorbar(c[-1],cax=axins,)
cbar.set_label('$P_{2020}/P_{1920}$',labelpad=3)

[a.coastlines() for a in ax.flatten()]
[ax[i].set_title(x) for i,x in enumerate(p_2020['quantile'].values)]
''

/home/leachl/miniconda3/lib/python3.7/site-packages/cartopy/mpl/geoaxes.py:1508: UserWarning: Log scale: values of z <= 0 have been masked
  result = matplotlib.axes.Axes.contourf(self, *args, **kwargs)
/home/leachl/miniconda3/lib/python3.7/site-packages/cartopy/mpl/geoaxes.py:1508: UserWarning: Log scale: values of z <= 0 have been masked
  result = matplotlib.axes.Axes.contourf(self, *args, **kwargs)
/home/leachl/miniconda3/lib/python3.7/site-packages/cartopy/mpl/geoaxes.py:1508: UserWarning: Log scale: values of z <= 0 have been masked
  result = matplotlib.axes.Axes.contourf(self, *args, **kwargs)

''


y_raw = txx_eobs.sel(latitude=51.75,longitude=-1.25,method='nearest').values
x_raw = GMST.loc[txx_eobs['time.year'].values,'smooth'].values


## resample with replacement over these series using numpy fancy indexing:
N_boot = 10000
indices = np.random.choice(y_raw.size,y_raw.size*N_boot)

Y = y_raw[indices].reshape(y_raw.size,-1)
X = x_raw[indices].reshape(x_raw.size,-1)


## simple linear regression (no masked values this time!):
SLR_boot = OLSE.simple(Y)
SLR_boot.fit(X)


## fit GEV
GEV_boot = LMoments.gev()
GEV_boot.fit(SLR_boot.res)


## set the location parameter equal to the 1961-1990 model mean
### Note that the input data must be type numpy array (ie. not masked arrays etc.) or you may get weird results...
GEV_boot.X = SLR_boot.pred(GMST.loc['1961':'1990','smooth'].mean()).data


## now obtain present-day & 1920 GEV estimates by shifting the (bootstrapped) distribution locations:
GEV_boot_2020 = copy.deepcopy(GEV_boot)
GEV_boot_2020.X = SLR_boot.pred(GMST.loc[2020,'smooth']).data

GEV_boot_1920 = copy.deepcopy(GEV_boot)
GEV_boot_1920.X = SLR_boot.pred(GMST.loc[1920,'smooth']).data


## here I'll focus on the 1-in-50 year events only
p_boot_2020 = 1-GEV_boot_2020.cdf(GEV_boot.qf(0.98))
p_boot_1920 = 1-GEV_boot_1920.cdf(GEV_boot.qf(0.98))

/home/leachl/Documents/scripts/mystatsfunctions/mystatsfunctions/LMoments.py:244: RuntimeWarning: invalid value encountered in power
  cdf = np.exp(-1*(1-self.k*((x-self.X)/self.a))**(1/self.k))


# create a plot of all the distributions
fig,ax = plt.subplots(1,2,figsize=(12,4))

## timeseries plot
ax[0].plot(txx_eobs['time.year'].values,y_raw,'r.',label='E-OBS')
ax[0].plot(txx_eobs['time.year'].values,np.median(SLR_boot.pred(x_raw[:,None]),axis=1),'k',label='best-estimate')
ax[0].fill_between(txx_eobs['time.year'].values,*np.quantile(SLR_boot.pred(x_raw[:,None]),[0.05,0.95],axis=1),color='k',alpha=0.3,lw=0,label='90% CI')
ax[0].set_xlim(1920,2020)
ax[0].set_xlabel('year')
ax[0].set_ylabel('TXx / \N{DEGREE SIGN}C')
ax[0].set_title('Oxford timeseries')

## distribution plot
xrange=np.arange(20,50,0.1)
ax[1].plot(xrange,np.median(GEV_boot_2020.pdf(xrange[:,None]),axis=1),color='r',label='present-day')
ax[1].fill_between(xrange,*np.quantile(GEV_boot_2020.pdf(xrange[:,None]),[0.05,0.95],axis=1),color='r',alpha=0.2,lw=0)
ax[1].plot(xrange,np.median(GEV_boot_1920.pdf(xrange[:,None]),axis=1),color='g',label='1920')
ax[1].fill_between(xrange,*np.quantile(GEV_boot_1920.pdf(xrange[:,None]),[0.05,0.95],axis=1),color='g',alpha=0.2,lw=0)

ax[1].legend(frameon=False)
ax[1].set_xlabel('TXx / \N{DEGREE SIGN}C')
ax[1].set_ylabel('density')
ax[1].set_ylim(0,0.2)
ax[1].set_xlim(20,45)
ax[1].set_title('climatological distribution estimates')

sn.despine()

/home/leachl/miniconda3/lib/python3.7/site-packages/numpy/core/fromnumeric.py:753: UserWarning: Warning: 'partition' will ignore the 'mask' of the MaskedArray.
  a.partition(kth, axis=axis, kind=kind, order=order)
/home/leachl/Documents/scripts/mystatsfunctions/mystatsfunctions/LMoments.py:231: RuntimeWarning: invalid value encountered in power
  pdf = (1/self.a) * ( 1-self.k*( (x-self.X)/self.a ) )**( (1/self.k)-1 ) * np.exp( -1*(1-self.k*( (x-self.X)/self.a) )**(1/self.k) )
/home/leachl/Documents/scripts/mystatsfunctions/mystatsfunctions/LMoments.py:231: RuntimeWarning: invalid value encountered in power
  pdf = (1/self.a) * ( 1-self.k*( (x-self.X)/self.a ) )**( (1/self.k)-1 ) * np.exp( -1*(1-self.k*( (x-self.X)/self.a) )**(1/self.k) )
/home/leachl/Documents/scripts/mystatsfunctions/mystatsfunctions/LMoments.py:231: RuntimeWarning: invalid value encountered in power
  pdf = (1/self.a) * ( 1-self.k*( (x-self.X)/self.a ) )**( (1/self.k)-1 ) * np.exp( -1*(1-self.k*( (x-self.X)/self.a) )**(1/self.k) )


## finally print out some risk ratios + confidence for a 1-in-50 year event

print('1-in-50 year event RR [90 % CI]:',round(np.median(p_boot_2020/p_boot_1920),2),np.round(np.quantile(p_boot_2020/p_boot_1920,[0.05,0.95]),2),'\n')

print('1-in-50 year event FAR [90 % CI]:',round(np.median(1-p_boot_1920/p_boot_2020),2),np.round(np.quantile(1-p_boot_1920/p_boot_2020,[0.05,0.95]),2))

1-in-50 year event RR [90 % CI]: 18.17 [  3.81 260.84] 

1-in-50 year event FAR [90 % CI]: 0.94 [0.74 1.  ]

/home/leachl/miniconda3/lib/python3.7/site-packages/ipykernel_launcher.py:3: RuntimeWarning: divide by zero encountered in true_divide
  This is separate from the ipykernel package so we can avoid doing imports until

njleach

njleach

mystatsfunctions

mystatsfunctions¶

Import modules¶

Retrieve some example data¶

I. Trend in maximum temperatures¶

II. Probability of extreme events¶

III. A more careful analysis: bootstrapping¶

A final note¶

njleach

njleach

mystatsfunctions

mystatsfunctions¶

Import modules¶

Retrieve some example data¶

I. Trend in maximum temperatures¶

II. Probability of extreme events¶

III. A more careful analysis: bootstrapping¶

A final note¶

Related Posts

PhDone 02 Feb 2023

The heatwave that never was 24 Aug 2022

ExSamples - Generating samples of extreme winters to support climate adaptation 27 Jan 2022