"""
Source code for Global Multihazard Transport Risk Analysis (GMTRA)
Preprocessing functions.
Copyright (C) 2019 Elco Koks. All versions released under the GNU Affero General Public License v3.0 license.
"""
import os
import numpy
import pandas
import geopandas
import country_converter as coco
import urllib.request
from tqdm import tqdm
from pathos.multiprocessing import Pool,cpu_count
from shapely.geometry import MultiPolygon
from geopy.distance import vincenty
from gmtra.utils import load_config,create_folder_lookup,map_roads,line_length
from gmtra.fetch import bridges
[docs]def region_bridges(n):
"""
This function will extract all bridges from OpenStreetMap for the specified region.
Arguments:
*n* : the index ID of a region in the specified shapefile with all the regions.
Returns:
*GeoDataFrame* : A geopandas GeoDataFrame with all bridges in a region. Will also save this to a .csv file.
"""
# specify the file path where all data is located.
data_path = load_config()['paths']['data']
# load shapefile with unique information for each region
global_regions = geopandas.read_file(os.path.join(data_path,'input_data','global_regions_v2.shp'))
# grab the row of the region from the global region shapefile
x = global_regions.iloc[n]
# get name of the region
region = x.GID_2
# extract bridges from OpenStreetMAp
bridges_osm = bridges(data_path,region,regional=True)
# estimate length of each bridges in meters
bridges_osm['length'] = bridges_osm.geometry.apply(line_length)
bridges_osm['length'] = bridges_osm['length']*1000
road_dict = map_roads()
# map roads to primary, secondary, tertiary and other roads.
bridges_osm['road_type'] = bridges_osm.road_type.apply(lambda y: road_dict[y])
bridges_osm['region'] = region
bridges_osm['country'] = region[:3]
# save to .csv
bridges_osm.to_csv(os.path.join(data_path,'bridges_osm','{}.csv'.format(region)))
print('{} finished!'.format(region))
return bridges_osm
# except Exception as e:
# print('Failed to finish {} because of {}!'.format(region,e))
[docs]def remove_tiny_shapes(x,regionalized=False):
"""This function will remove the small shapes of multipolygons. Will reduce the size of the file.
Arguments:
*x* : a geometry feature (Polygon) to simplify. Countries which are very large will see larger (unhabitated) islands being removed.
Optional Arguments:
*regionalized* : Default is **False**. Set to **True** will use lower threshold settings (default: **False**).
Returns:
*MultiPolygon* : a shapely geometry MultiPolygon without tiny shapes.
"""
# if its a single polygon, just return the polygon geometry
if x.geometry.geom_type == 'Polygon':
return x.geometry
# if its a multipolygon, we start trying to simplify and remove shapes if its too big.
elif x.geometry.geom_type == 'MultiPolygon':
if regionalized == False:
area1 = 0.1
area2 = 250
elif regionalized == True:
area1 = 0.01
area2 = 50
# dont remove shapes if total area is already very small
if x.geometry.area < area1:
return x.geometry
# remove bigger shapes if country is really big
if x['GID_0'] in ['CHL','IDN']:
threshold = 0.01
elif x['GID_0'] in ['RUS','GRL','CAN','USA']:
if regionalized == True:
threshold = 0.01
else:
threshold = 0.01
elif x.geometry.area > area2:
threshold = 0.1
else:
threshold = 0.001
# save remaining polygons as new multipolygon for the specific country
new_geom = []
for y in x.geometry:
if y.area > threshold:
new_geom.append(y)
return MultiPolygon(new_geom)
[docs]def planet_osm():
"""
This function will download the planet file from the OSM servers.
"""
data_path = load_config()['paths']['data']
osm_path_in = os.path.join(data_path,'planet_osm')
# create directory to save planet osm file if that directory does not exit yet.
if not os.path.exists(osm_path_in):
os.makedirs(osm_path_in)
# if planet file is not downloaded yet, download it.
if 'planet-latest.osm.pbf' not in os.listdir(osm_path_in):
url = 'https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf'
urllib.request.urlretrieve(url, os.path.join(osm_path_in,'planet-latest.osm.pbf'))
else:
print('Planet file is already downloaded')
[docs]def global_shapefiles(regionalized=False):
"""
This function will simplify shapes and add necessary columns, to make further processing more quickly
For now, we will make use of the latest GADM data: https://gadm.org/download_world.html
Optional Arguments:
*regionalized* : Default is **False**. Set to **True** will also create the global_regions.shp file.
"""
data_path = load_config()['paths']['data']
# path to country GADM file
if regionalized == False:
# load country file
country_gadm_path = os.path.join(data_path,'GADM36','gadm36_0.shp')
gadm_level0 = geopandas.read_file(country_gadm_path)
# remove antarctica, no roads there anyways
gadm_level0 = gadm_level0.loc[~gadm_level0['NAME_0'].isin(['Antarctica'])]
# remove tiny shapes to reduce size substantially
gadm_level0['geometry'] = gadm_level0.apply(remove_tiny_shapes,axis=1)
# simplify geometries
gadm_level0['geometry'] = gadm_level0.simplify(tolerance = 0.005, preserve_topology=True).buffer(0.01).simplify(tolerance = 0.005, preserve_topology=True)
# add additional info
glob_info_path = os.path.join(data_path,'input_data','global_information.xlsx')
load_glob_info = pandas.read_excel(glob_info_path)
gadm_level0 = gadm_level0.merge(load_glob_info,left_on='GID_0',right_on='ISO_3digit')
#save to new country file
glob_ctry_path = os.path.join(data_path,'input_data','global_countries.shp')
gadm_level0.to_file(glob_ctry_path)
else:
# this is dependent on the country file, so check whether that one is already created:
glob_ctry_path = os.path.join(data_path,'input_data','global_countries.shp')
if os.path.exists(glob_ctry_path):
gadm_level0 = geopandas.read_file(os.path.join(data_path,'input_data','global_countries.shp'))
else:
print('ERROR: You need to create the country file first')
return None
# load region file
region_gadm_path = os.path.join(data_path,'GADM36','gadm36_2.shp')
gadm_level1 = geopandas.read_file(region_gadm_path)
# remove tiny shapes to reduce size substantially
gadm_level1['geometry'] = gadm_level1.apply(remove_tiny_shapes,axis=1)
# simplify geometries
gadm_level1['geometry'] = gadm_level1.simplify(tolerance = 0.005, preserve_topology=True).buffer(0.01).simplify(tolerance = 0.005, preserve_topology=True)
# add additional info
glob_info_path = os.path.join(data_path,'input_data','global_information.xlsx')
load_glob_info = pandas.read_excel(glob_info_path)
gadm_level1 = gadm_level1.merge(load_glob_info,left_on='GID_0',right_on='ISO_3digit')
gadm_level1.rename(columns={'coordinates':'coordinate'}, inplace=True)
# add some missing geometries from countries with no subregions
get_missing_countries = list(set(list(gadm_level0.GID_0.unique())).difference(list(gadm_level1.GID_0.unique())))
mis_country = gadm_level0.loc[gadm_level0['GID_0'].isin(get_missing_countries)]#
mis_country['GID_1'] = mis_country['GID_0']+'_'+str(0)+'_'+str(1)
gadm_level1 = geopandas.GeoDataFrame( pandas.concat( [gadm_level1,mis_country] ,ignore_index=True) )
gadm_level1.reset_index(drop=True,inplace=True)
#save to new country file
gadm_level1.to_file(os.path.join(data_path,'input_data','global_regions.shp'))
[docs]def poly_files(data_path,global_shape,save_shapefile=False,regionalized=False):
"""
This function will create the .poly files from the world shapefile. These
.poly files are used to extract data from the openstreetmap files.
This function is adapted from the OSMPoly function in QGIS.
Arguments:
*data_path* : base path to location of all files.
*global_shape*: exact path to the global shapefile used to create the poly files.
Optional Arguments:
*save_shape_file* : Default is **False**. Set to **True** will the new shapefile with the
countries that we include in this analysis will be saved.
*regionalized* : Default is **False**. Set to **True** will perform the analysis
on a regional level.
Returns:
*.poly file* for each country in a new dir in the working directory.
"""
# =============================================================================
# """ Create output dir for .poly files if it is doesnt exist yet"""
# =============================================================================
poly_dir = os.path.join(data_path,'country_poly_files')
if regionalized == True:
poly_dir = os.path.join(data_path,'regional_poly_files')
if not os.path.exists(poly_dir):
os.makedirs(poly_dir)
# =============================================================================
# """ Set the paths for the files we are going to use """
# =============================================================================
wb_poly_out = os.path.join(data_path,'input_data','country_shapes.shp')
if regionalized == True:
wb_poly_out = os.path.join(data_path,'input_data','regional_shapes.shp')
# =============================================================================
# """Load country shapes and country list and only keep the required countries"""
# =============================================================================
wb_poly = geopandas.read_file(global_shape)
# filter polygon file
if regionalized == True:
wb_poly = wb_poly.loc[wb_poly['GID_0'] != '-']
wb_poly = wb_poly.loc[wb_poly['TYPE_1'] != 'Water body']
else:
wb_poly = wb_poly.loc[wb_poly['ISO_3digit'] != '-']
wb_poly.crs = {'init' :'epsg:4326'}
# and save the new country shapefile if requested
if save_shapefile == True:
wb_poly.to_file(wb_poly_out)
# we need to simplify the country shapes a bit. If the polygon is too diffcult,
# osmconvert cannot handle it.
# wb_poly['geometry'] = wb_poly.simplify(tolerance = 0.1, preserve_topology=False)
# =============================================================================
# """ The important part of this function: create .poly files to clip the country
# data from the openstreetmap file """
# =============================================================================
num = 0
# iterate over the counties (rows) in the world shapefile
for f in wb_poly.iterrows():
f = f[1]
num = num + 1
geom=f.geometry
# try:
# this will create a list of the different subpolygons
if geom.geom_type == 'MultiPolygon':
polygons = geom
# the list will be lenght 1 if it is just one polygon
elif geom.geom_type == 'Polygon':
polygons = [geom]
# define the name of the output file, based on the ISO3 code
ctry = f['GID_0']
if regionalized == True:
attr=f['GID_1']
else:
attr=f['GID_0']
# start writing the .poly file
f = open(poly_dir + "/" + attr +'.poly', 'w')
f.write(attr + "\n")
i = 0
# loop over the different polygons, get their exterior and write the
# coordinates of the ring to the .poly file
for polygon in polygons:
if ctry == 'CAN':
dist = vincenty(polygon.centroid.coords[:1][0], (83.24,-79.80), ellipsoid='WGS-84').kilometers
if dist < 2000:
continue
if ctry == 'RUS':
dist = vincenty(polygon.centroid.coords[:1][0], (58.89,82.26), ellipsoid='WGS-84').kilometers
if dist < 500:
print(attr)
continue
polygon = numpy.array(polygon.exterior)
j = 0
f.write(str(i) + "\n")
for ring in polygon:
j = j + 1
f.write(" " + str(ring[0]) + " " + str(ring[1]) +"\n")
i = i + 1
# close the ring of one subpolygon if done
f.write("END" +"\n")
# close the file when done
f.write("END" +"\n")
f.close()
# except:
# print(f['GID_1'])
[docs]def clip_osm(data_path,planet_path,area_poly,area_pbf):
""" Clip the an area osm file from the larger continent (or planet) file and save to a new osm.pbf file.
This is much faster compared to clipping the osm.pbf file while extracting through ogr2ogr.
This function uses the osmconvert tool, which can be found at http://wiki.openstreetmap.org/wiki/Osmconvert.
Either add the directory where this executable is located to your environmental variables or just put it in the 'scripts' directory.
Arguments:
*continent_osm*: path string to the osm.pbf file of the continent associated with the country.
*area_poly*: path string to the .poly file, made through the 'create_poly_files' function.
*area_pbf*: path string indicating the final output dir and output name of the new .osm.pbf file.
Returns:
a clipped .osm.pbf file.
"""
print('{} started!'.format(area_pbf))
osm_convert_path = os.path.join(data_path,'osmconvert','osmconvert64')
try:
if (os.path.exists(area_pbf) is not True):
os.system('{} {} -B={} --complete-ways -o={}'.format(osm_convert_path,planet_path,area_poly,area_pbf))
print('{} finished!'.format(area_pbf))
except:
print('{} did not finish!'.format(area_pbf))
[docs]def single_country(country,regionalized=False,create_poly_files=False):
"""
Clip a country from the planet osm file and save to individual osm.pbf files
This function has the option to extract individual regions
Arguments:
*country* : The country for which we want extract the data.
Keyword Arguments:
*regionalized* : Default is **False**. Set to **True** will parallelize the extraction over all regions within a country.
*create_poly_files* : Default is **False**. Set to **True** will create new .poly files.
"""
# set data path
data_path = load_config()['paths']['data']
# path to planet file
planet_path = os.path.join(data_path,'planet_osm','planet-latest.osm.pbf')
# global shapefile path
if regionalized == True:
world_path = os.path.join(data_path,'input_data','global_regions.shp')
else:
world_path = os.path.join(data_path,'input_data','global_countries.shp')
# create poly files for all countries
if create_poly_files == True:
poly_files(data_path,world_path,save_shapefile=False,regionalized=regionalized)
if not os.path.exists(os.path.join(data_path,'country_poly_files')):
os.makedirs(os.path.join(data_path,'country_poly_files'))
if not os.path.exists(os.path.join(data_path,'country_osm')):
os.makedirs(os.path.join(data_path,'country_osm'))
ctry_poly = os.path.join(data_path,'country_poly_files','{}.poly'.format(country))
ctry_pbf = os.path.join(data_path,'country_osm','{}.osm.pbf'.format(country))
if regionalized == False:
clip_osm(data_path,planet_path,ctry_poly,ctry_pbf)
elif regionalized == True:
if (os.path.exists(ctry_pbf) is not True):
clip_osm(data_path,planet_path,ctry_poly,ctry_pbf)
if not os.path.exists(os.path.join(data_path,'regional_poly_files')):
os.makedirs(os.path.join(data_path,'regional_poly_files'))
if not os.path.exists(os.path.join(data_path,'region_osm_admin1')):
os.makedirs(os.path.join(data_path,'region_osm_admin1'))
get_poly_files = [x for x in os.listdir(os.path.join(data_path,'regional_poly_files')) if x.startswith(country)]
polyPaths = [os.path.join(data_path,'regional_poly_files',x) for x in get_poly_files]
area_pbfs = [os.path.join(data_path,'region_osm_admin1',x.split('.')[0]+'.osm.pbf') for x in get_poly_files]
data_paths = [data_path]*len(polyPaths)
planet_paths = [ctry_pbf]*len(polyPaths)
# and run all regions parallel to each other
pool = Pool(cpu_count()-1)
pool.starmap(clip_osm, zip(data_paths,planet_paths,polyPaths,area_pbfs))
[docs]def all_countries(subset = [], regionalized=False,reversed_order=False):
"""
Clip all countries from the planet osm file and save them to individual osm.pbf files
Optional Arguments:
*subset* : allow for a pre-defined subset of countries. REquires ISO3 codes. Will run all countries if left empty.
*regionalized* : Default is **False**. Set to **True** if you want to have the regions of a country as well.
*reversed_order* : Default is **False**. Set to **True** to work backwards for a second process of the same country set to prevent overlapping calculations.
Returns:
clipped osm.pbf files for the defined set of countries (either the whole world by default or the specified subset)
"""
# set data path
data_path = load_config()['paths']['data']
# path to planet file
planet_path = os.path.join(data_path,'planet_osm','planet-latest.osm.pbf')
# global shapefile path
if regionalized == True:
world_path = os.path.join(data_path,'input_data','global_regions.shp')
else:
world_path = os.path.join(data_path,'input_data','global_countries.shp')
# create poly files for all countries
poly_files(data_path,world_path,save_shapefile=False,regionalized=regionalized)
# prepare lists for multiprocessing
if not os.path.exists(os.path.join(data_path,'country_poly_files')):
os.makedirs(os.path.join(data_path,'country_poly_files'))
if not os.path.exists(os.path.join(data_path,'country_osm')):
os.makedirs(os.path.join(data_path,'country_osm'))
if regionalized == False:
get_poly_files = os.listdir(os.path.join(data_path,'country_poly_files'))
if len(subset) > 0:
polyPaths = [os.path.join(data_path,'country_poly_files',x) for x in get_poly_files if x[:3] in subset]
area_pbfs = [os.path.join(data_path,'region_osm_admin1',x.split('.')[0]+'.osm.pbf') for x in get_poly_files if x[:3] in subset]
else:
polyPaths = [os.path.join(data_path,'country_poly_files',x) for x in get_poly_files]
area_pbfs = [os.path.join(data_path,'region_osm_admin1',x.split('.')[0]+'.osm.pbf') for x in get_poly_files]
big_osm_paths = [planet_path]*len(polyPaths)
elif regionalized == True:
if not os.path.exists(os.path.join(data_path,'regional_poly_files')):
os.makedirs(os.path.join(data_path,'regional_poly_files'))
if not os.path.exists(os.path.join(data_path,'region_osm')):
os.makedirs(os.path.join(data_path,'region_osm_admin1'))
get_poly_files = os.listdir(os.path.join(data_path,'regional_poly_files'))
if len(subset) > 0:
polyPaths = [os.path.join(data_path,'regional_poly_files',x) for x in get_poly_files if x[:3] in subset]
area_pbfs = [os.path.join(data_path,'region_osm_admin1',x.split('.')[0]+'.osm.pbf') for x in get_poly_files if x[:3] in subset]
big_osm_paths = [os.path.join(data_path,'country_osm',x[:3]+'.osm.pbf') for x in get_poly_files if x[:3] in subset]
else:
polyPaths = [os.path.join(data_path,'regional_poly_files',x) for x in get_poly_files]
area_pbfs = [os.path.join(data_path,'region_osm_admin1',x.split('.')[0]+'.osm.pbf') for x in get_poly_files]
big_osm_paths = [os.path.join(data_path,'country_osm',x[:3]+'.osm.pbf') for x in get_poly_files]
data_paths = [data_path]*len(polyPaths)
# allow for reversed order if you want to run two at the same time (convenient to work backwards for the second process, to prevent overlapping calculation)
if reversed_order == True:
polyPaths = polyPaths[::-1]
area_pbfs = area_pbfs[::-1]
big_osm_paths = big_osm_paths[::-1]
# extract all country osm files through multiprocesing
pool = Pool(cpu_count()-1)
pool.starmap(clip_osm, zip(data_paths,big_osm_paths,polyPaths,area_pbfs))
[docs]def merge_SSBN_maps(country):
"""
Function to merge SSBN maps to a country level.
Arguments:
*country* : ISO3 code of the country for which we want to merge the river and surface flood maps to country level.
"""
try:
print('{} started!'.format(country))
# get path where all hazards all located
hazard_path = load_config()['paths']['hazard_data']
# get dictioniary in which we can lookup the name of the country used in the FATHOM flood files.
folder_lookup = create_folder_lookup()
# get ISO2 and full country names for each country
country_ISO2 = coco.convert(names=[country], to='ISO2')
country_full = folder_lookup[country]
rps = ['5','10','20','50','75','100','200','250','1000']
flood_types = ['fluvial_undefended','pluvial_undefended']
flood_types_abb = ['FU','PU']
flood_mapping = dict(zip(flood_types,flood_types_abb))
# merge all subcountry files into one country file for each hazard.
for flood_type in flood_types:
new_folder = os.path.join(hazard_path,'InlandFlooding',country_full,'{}_{}_merged'.format(country_ISO2,flood_type))
try:
os.mkdir(new_folder)
except:
None
path_to_all_files = os.path.join(hazard_path,'InlandFlooding',country_full,'{}_{}'.format(country_ISO2,flood_type))
full_paths = [os.path.join(path_to_all_files,x) for x in os.listdir(path_to_all_files) if x.endswith('.tif')]
for rp in tqdm(rps,desc=flood_type+'_'+country,leave=False,total=len(rps),unit='rp'):
get_one_rp = [x for x in full_paths if '-{}-'.format(rp) in x]
stringlist_rp = ' '.join(get_one_rp)
rp_out = os.path.join(new_folder,'{}-{}-{}.tif'.format(country_ISO2,flood_mapping[flood_type],rp))
os.system('gdal_merge.py -q -o {} {} -co COMPRESS=LZW -co BIGTIFF=YES -co PREDICTOR=2 -co TILED=YES'.format(rp_out,stringlist_rp))
print('{} finished!'.format(country))
except:
print('{} failed! It seems we do not have proper flood data for this country.'.format(country))