Source code for atra.stats.network_failure_stats

"""Summarise length of edges/number of nodes within each boundary (commune, district, province)

Purpose
-------

Collect network attributes
    - Combine with boundary Polygons to collect network-boundary intersection attributes
    - Write final results to an Excel sheet

Input data requirements
-----------------------

1. Correct paths to all files and correct input parameters

2. Shapefiles of networks with attributes:
    - edge_id or node_id - String/Integer/Float Edge ID or Node ID of network
    - length - Float length of edge intersecting with hazards
    - geometry - Shapely geometry of edges as LineString or nodes as Points

3. Shapefile of administrative boundaries of Argentina with attributes:
    - province_i - String/Integer ID of Province
    - pro_name_e - String name of Province in English
    - district_i - String/Integer ID of District
    - dis_name_e - String name of District in English
    - commune_id - String/Integer ID of Commune
    - name_eng - String name of Commune in English
    - geometry - Shapely geometry of boundary Polygon

Results
-------

1. Excel sheet of network-hazard-boundary intersection with attributes:
    - edge_id/node_id - String name of intersecting edge ID or node ID
    - length - Float length of intersection of edge LineString and hazard Polygon: Only for edges
    - province_id - String/Integer ID of Province
    - province_name - String name of Province in English
    - district_id - String/Integer ID of District
    - district_name - String name of District in English
    - commune_id - String/Integer ID of Commune
    - commune_name - String name of Commune in English
"""
import itertools
import os
import sys

import geopandas as gpd
import pandas as pd
from shapely.geometry import Polygon
from atra.utils import *
from atra.transport_flow_and_failure_functions import *
from tqdm import tqdm


[docs]def risk_results_reorganise(risk_dataframe,id_column): risk_columns = [] flood_types = ['fluvial flooding', 'pluvial flooding'] climate_scenarios = ['Future_Med','Future_High'] all_ids = pd.DataFrame(list(set(risk_dataframe[id_column].values.tolist())),columns=[id_column]) for ft in flood_types: ht = risk_dataframe[risk_dataframe['hazard_type'] == ft] current = list(set(list(zip(ht[id_column].values.tolist(),ht['current'].values.tolist())))) current = pd.DataFrame(current,columns=[id_column,'{} current'.format(ft)]) risk_columns.append('{} current'.format(ft)) all_ids = pd.merge(all_ids,current,how='left',on=[id_column]).fillna(0) for cs in climate_scenarios: ht = risk_dataframe[(risk_dataframe['hazard_type'] == ft) & (risk_dataframe['climate_scenario'] == cs)] future = list(set(list(zip(ht[id_column].values.tolist(),ht['future'].values.tolist(),ht['change'].values.tolist())))) future = pd.DataFrame(future,columns=[id_column,'{} {} future'.format(ft,cs),'{} {} change'.format(ft,cs)]) risk_columns.append('{} {} future'.format(ft,cs)) risk_columns.append('{} {} change'.format(ft,cs)) all_ids = pd.merge(all_ids,future,how='left',on=[id_column]).fillna(0) return all_ids, risk_columns
[docs]def risk_results_reorganise_climate_outlooks(risk_dataframe,id_column): risk_columns = [] climate_scenarios = ['Future_Med','Future_High'] all_ids = risk_dataframe[[id_column,'current']] # all_ids = pd.DataFrame(list(set(risk_dataframe[id_column].values.tolist())),columns=[id_column]) # current = list(set(list(zip(risk_dataframe[id_column].values.tolist(), # risk_dataframe['current'].values.tolist())))) # current = pd.DataFrame(current,columns=[id_column,'current']) risk_columns.append('current') # all_ids = pd.merge(all_ids,current,how='left',on=[id_column]).fillna(0) for cs in climate_scenarios: # ht = risk_dataframe[risk_dataframe['climate_scenario'] == cs] # future = list(set(list(zip(risk_dataframe[id_column].values.tolist(), # risk_dataframe['future'].values.tolist(), # risk_dataframe['change'].values.tolist())))) # future = pd.DataFrame(future,columns=[id_column,'{} value'.format(cs),'{} change'.format(cs)]) # risk_columns.append('{} value'.format(cs)) # risk_columns.append('{} change'.format(cs)) all_ids = pd.merge(all_ids, risk_dataframe[risk_dataframe['climate_scenario'] == cs][[id_column,'future','change']], how='left',on=[id_column]).fillna(0) all_ids.rename(columns={'future':'{} value'.format(cs),'change':'{} change'.format(cs)},inplace=True) risk_columns.append('{} value'.format(cs)) risk_columns.append('{} change'.format(cs)) return all_ids, risk_columns
[docs]def change_matrix(risk_dataframe,value_threshold,change_threshold): total_counts_df = risk_dataframe.groupby(['hazard_type','climate_scenario']).size().reset_index(name='total_counts') # print (total_counts_df) scenario_df = risk_dataframe[risk_dataframe['change'] >= change_threshold].groupby(['hazard_type','climate_scenario']).size().reset_index(name='change_counts') # print (change_df) total_counts_df = pd.merge(total_counts_df,scenario_df,how='left',on=['hazard_type','climate_scenario']).fillna(0) total_counts_df['percent'] = 100.0*total_counts_df['change_counts']/total_counts_df['total_counts'] scenario_df = risk_dataframe[risk_dataframe['future'] >= value_threshold].groupby(['hazard_type','climate_scenario']).size().reset_index(name='future_counts') total_counts_df = pd.merge(total_counts_df,scenario_df,how='left',on=['hazard_type','climate_scenario']).fillna(0) total_counts_df['percent_future'] = 100.0*total_counts_df['future_counts']/total_counts_df['total_counts'] scenario_df = risk_dataframe[risk_dataframe['current'] >= value_threshold].groupby(['hazard_type','climate_scenario']).size().reset_index(name='current_counts') total_counts_df = pd.merge(total_counts_df,scenario_df,how='left',on=['hazard_type','climate_scenario']).fillna(0) total_counts_df['percent_current'] = 100.0*total_counts_df['current_counts']/total_counts_df['total_counts'] scenario_df = risk_dataframe[(risk_dataframe['future'] >= value_threshold) & (risk_dataframe['change'] >= change_threshold)].groupby(['hazard_type','climate_scenario']).size().reset_index(name='future_percent_counts') total_counts_df = pd.merge(total_counts_df,scenario_df,how='left',on=['hazard_type','climate_scenario']).fillna(0) print (total_counts_df)
[docs]def main(): """Summarise 1. Specify the paths from where you to read and write: - Input data - Intermediate calcuations data - Output results 2. Supply input data and parameters - Names of the three Provinces - List of string types - Names of modes - List of strings - Names of output modes - List of strings - Names of hazard bands - List of integers - Names of hazard thresholds - List of integers - Condition 'Yes' or 'No' is the users wants to process results 3. Give the paths to the input data files: - Commune boundary and stats data shapefile - String name of sheet in hazard datasets description Excel file 4. Specify the output files and paths to be created """ tqdm.pandas() incoming_data_path,data_path, calc_path, output_path = load_config()['paths']['incoming_data'],load_config()['paths']['data'], load_config()[ 'paths']['calc'], load_config()['paths']['output'] # Supply input data and parameters modes = ['road','rail','bridge'] risk_types = ['risks','eael','risks'] val_cols = ['min_total_tons','max_total_tons', 'min_tr_loss','max_tr_loss', 'min_econ_loss','max_econ_loss', 'min_econ_impact','max_econ_impact'] od_output_excel = os.path.join(os.path.join(output_path,'network_stats','network_failures_ranked.xlsx')) failure_excel_writer = pd.ExcelWriter(od_output_excel) od_output_excel = os.path.join(os.path.join(output_path,'network_stats','network_combined_risks_ranked.xlsx')) risk_excel_writer = pd.ExcelWriter(od_output_excel) od_output_excel = os.path.join(os.path.join(output_path,'network_stats','network_combined_adaptations_ranked.xlsx')) adapt_excel_writer = pd.ExcelWriter(od_output_excel) duration = [10,20,30] growth_rate = '2p8' for m in range(len(modes)): network_stats = pd.read_excel(os.path.join(output_path, 'network_stats', 'national_scale_boundary_stats.xlsx'), sheet_name=modes[m], encoding='utf-8-sig') if modes[m] in ['road','rail','bridge']: failure_results = pd.read_csv(os.path.join(output_path, 'failure_results', 'minmax_combined_scenarios', 'single_edge_failures_minmax_{}_100_percent_disrupt.csv'.format(modes[m])), encoding='utf-8-sig') risk_results = pd.read_csv(os.path.join(output_path, 'network_stats', 'national_{}_{}_climate_change_combined.csv'.format(modes[m], risk_types[m]) ), encoding='utf-8-sig') risk_results = risk_results.sort_values(by=['future'],ascending=False) risk_results.drop('year',axis=1,inplace=True) risk_results[['current','future']] = 1.0*risk_results[['current','future']]/1000000 failure_results = failure_results.sort_values(by=['max_econ_impact'],ascending=False) failure_results.drop('no_access',axis=1,inplace=True) failure_results[['min_total_tons','max_total_tons']] = 1.0*failure_results[['min_total_tons','max_total_tons']]/1000.0 failure_results[['min_tr_loss','max_tr_loss','min_econ_loss','max_econ_loss','min_econ_impact','max_econ_impact']] = 1.0*failure_results[['min_tr_loss','max_tr_loss','min_econ_loss','max_econ_loss','min_econ_impact','max_econ_impact']]/1000000 if modes[m] == 'bridge': edges = pd.read_csv(os.path.join(data_path,'network','bridges.csv'),encoding='utf-8-sig') edges = edges[['bridge_id','edge_id','structure_type']] roads = pd.read_csv(os.path.join(data_path,'network','road_edges.csv'),encoding='utf-8-sig') roads = roads[['edge_id','road_name']] edges = pd.merge(edges,roads,how='left',on=['edge_id']) del roads edge_id = 'bridge_id' # print ('* Changes for {}'.format(modes[m])) # change_matrix(risk_results,0.1,100) risk_results = risk_results[risk_results['future'] > 1.0] # risk_vals,risk_cols = risk_results_reorganise(risk_results,edge_id) risk_vals,risk_cols = risk_results_reorganise_climate_outlooks(risk_results,edge_id) risk_vals = pd.merge(risk_vals,edges,how='left',on=[edge_id]) risk_vals = pd.merge(risk_vals,network_stats[[edge_id,'department_name','province_name']],how='left',on=[edge_id]) risk_vals.drop_duplicates(subset=['bridge_id'],keep='first',inplace=True) risk_vals['totals'] = risk_vals[[cols for cols in risk_cols if 'change' not in cols]].sum(axis=1) risk_vals = risk_vals.sort_values(by='totals',ascending=False) risk_vals.drop(['bridge_id','edge_id','totals'],axis=1,inplace=True) risk_vals.set_index(['road_name','structure_type','province_name']+risk_cols+['department_name']).to_excel(risk_excel_writer,modes[m],encoding='utf-8-sig') print ('* Done with {} risks'.format(modes[m])) failure_results = failure_results[failure_results['max_econ_impact'] > 0.5] failure_results = pd.merge(failure_results,edges,how='left',on=[edge_id]) del edges failure_results = pd.merge(failure_results,network_stats[[edge_id,'department_name','province_name']],how='left',on=[edge_id]) failure_results.drop_duplicates(subset=['bridge_id'],keep='first',inplace=True) failure_results.drop(['bridge_id','edge_id'],axis=1,inplace=True) failure_results.set_index(['road_name','structure_type','province_name']+val_cols+['department_name']).to_excel(failure_excel_writer,modes[m],encoding='utf-8-sig') print ('* Done with {} failures'.format(modes[m])) for dur in duration: adapt_results = pd.read_csv(os.path.join(output_path, 'adaptation_results', 'combined_climate', 'output_adaptation_{}_{}_days_max_{}_growth_disruption_fixed_parameters.csv'.format(modes[m], dur, growth_rate)), encoding='utf-8-sig') adapt_results = pd.merge(adapt_results,network_stats[[edge_id,'department_name','province_name']],how='left',on=[edge_id]) adapt_results['min_eael'] = dur*adapt_results['min_eael_per_day'] adapt_results['max_eael'] = dur*adapt_results['max_eael_per_day'] adapt_results = adapt_results.sort_values(by='max_bc_ratio',ascending=False) adapt_results.drop_duplicates(subset=[edge_id],keep='first',inplace=True) adapt_results.to_csv(os.path.join(output_path, 'network_stats', '{}_adaptation_summary_{}_days_combined_disruption.csv'.format(modes[m],dur)), encoding='utf-8-sig') adapt_results = adapt_results.head(50) adapt_results.set_index(['road_name','structure_type', 'province_name','department_name','climate_scenario','max_exposure_length', 'max_ini_adap_cost','max_tot_adap_cost', 'min_benefit','max_benefit','min_bc_ratio', 'max_bc_ratio']).to_excel(adapt_excel_writer,'{}_{}_days'.format(modes[m],dur),encoding='utf-8-sig') adapt_excel_writer.save() del adapt_results print ('* Done with {} adaptation'.format(modes[m])) elif modes[m] == 'road': edges = pd.read_csv(os.path.join(data_path,'network','road_edges.csv'),encoding='utf-8-sig') edges = edges[['edge_id','road_name','road_type']] edge_id = 'edge_id' # print ('* Changes for {}'.format(modes[m])) # change_matrix(risk_results,0.1,100) risk_results = risk_results[risk_results['future'] > 1.0] # risk_vals,risk_cols = risk_results_reorganise(risk_results,edge_id) risk_vals,risk_cols = risk_results_reorganise_climate_outlooks(risk_results,edge_id) print ('* Done with reorganising') risk_vals = pd.merge(risk_vals,edges,how='left',on=[edge_id]) risk_vals = pd.merge(risk_vals,network_stats[[edge_id,'department_name','province_name']],how='left',on=[edge_id]) print ('* Done with merging') risk_vals['totals'] = risk_vals[[cols for cols in risk_cols if 'change' not in cols]].sum(axis=1) risk_vals = risk_vals.sort_values(by='totals',ascending=False) print ('* Done with sorting') risk_vals.drop([edge_id,'totals'],axis=1,inplace=True) print ('* Length:',len(risk_vals.index)) risk_vals.set_index(['road_name','road_type','province_name']+risk_cols+['department_name']).to_excel(risk_excel_writer,modes[m],encoding='utf-8-sig') print ('* Done with {} risks'.format(modes[m])) failure_results = failure_results[failure_results['max_econ_impact'] > 0.5] failure_results = pd.merge(failure_results,edges,how='left',on=[edge_id]) del edges failure_results = pd.merge(failure_results,network_stats[[edge_id,'department_name','province_name']],how='left',on=[edge_id]) failure_results.drop(edge_id,axis=1,inplace=True) failure_results.set_index(['road_name','road_type','province_name']+val_cols+['department_name']).to_excel(failure_excel_writer,modes[m],encoding='utf-8-sig') print ('* Done with {} failures'.format(modes[m])) for dur in duration: adapt_results = pd.read_csv(os.path.join(output_path, 'adaptation_results', 'combined_climate', 'output_adaptation_{}_{}_days_max_{}_growth_disruption_fixed_parameters.csv'.format(modes[m], dur, growth_rate)), encoding='utf-8-sig') adapt_results = pd.merge(adapt_results,network_stats[[edge_id,'department_name','province_name']],how='left',on=[edge_id]) adapt_results['min_eael'] = dur*adapt_results['min_eael_per_day'] adapt_results['max_eael'] = dur*adapt_results['max_eael_per_day'] adapt_results = adapt_results.sort_values(by='max_bc_ratio',ascending=False) adapt_results.drop_duplicates(subset=[edge_id],keep='first',inplace=True) adapt_results.to_csv(os.path.join(output_path, 'network_stats', '{}_adaptation_summary_{}_days_combined_disruption.csv'.format(modes[m],dur)), encoding='utf-8-sig') adapt_results = adapt_results.head(50) adapt_results.set_index(['road_name', 'province_name','department_name','climate_scenario','max_exposure_length', 'max_ini_adap_cost','max_tot_adap_cost', 'min_benefit','max_benefit','min_bc_ratio', 'max_bc_ratio']).to_excel(adapt_excel_writer, '{}_{}_days'.format(modes[m],dur), encoding='utf-8-sig') adapt_excel_writer.save() del adapt_results print ('* Done with {} adaptation'.format(modes[m])) else: edges = pd.read_csv(os.path.join(data_path,'network','rail_edges.csv'),encoding='utf-8-sig') edges = edges[['edge_id','operador','linea']] edge_id = 'edge_id' # print ('* Changes for {}'.format(modes[m])) # change_matrix(risk_results,1.0,100) risk_results = risk_results[risk_results['future'] > 1.0] # risk_vals,risk_cols = risk_results_reorganise(risk_results,edge_id) risk_vals,risk_cols = risk_results_reorganise_climate_outlooks(risk_results,edge_id) risk_vals = pd.merge(risk_vals,edges,how='left',on=[edge_id]) # del edges risk_vals = pd.merge(risk_vals,network_stats[[edge_id,'department_name','province_name']],how='left',on=[edge_id]) risk_vals['totals'] = risk_vals[[cols for cols in risk_cols if 'change' not in cols]].sum(axis=1) risk_vals = risk_vals.sort_values(by='totals',ascending=False) risk_vals.drop([edge_id,'totals'],axis=1,inplace=True) risk_vals.set_index(['operador','linea','province_name']+risk_cols+['department_name']).to_excel(risk_excel_writer,modes[m],encoding='utf-8-sig') print ('* Done with {} risks'.format(modes[m])) failure_results = failure_results[failure_results['max_econ_impact'] > 1.0] failure_results = pd.merge(failure_results,edges,how='left',on=[edge_id]) del edges failure_results = pd.merge(failure_results,network_stats[[edge_id,'department_name','province_name']],how='left',on=[edge_id]) failure_results.drop(edge_id,axis=1,inplace=True) # risk_vals = risk_vals.sort_values(by=[cols for cols in risk_cols if 'future' in cols],ascending=False) failure_results.set_index(['operador','linea','province_name']+val_cols+['department_name']).to_excel(failure_excel_writer,modes[m],encoding='utf-8-sig') print ('* Done with {} failures'.format(modes[m])) # failure_results.set_index([edge_id,'province_name']).to_excel(os.path.join(output_path,'network_stats','{}_failures_ranked.csv'.format(modes[m])),encoding='utf-8-sig') failure_excel_writer.save() risk_excel_writer.save()
if __name__ == "__main__": main()