# Import modules
import numpy as np
import pandas as pd
import geopandas as gpd
import networkx as nx
import osmnx as ox
import re
from shapely.geometry import Point, LineString, Polygon
import matplotlib.pyplot as plt
from tqdm import tqdm
import multiprocessing as mp
import folium
import itertools
import os
import time
import warnings
import IPython
import requests
from IPython.display import display, clear_output

warnings.filterwarnings("ignore")
print('\n'.join(f'{m.__name__}=={m.__version__}' for m in globals().values() if getattr(m, '__version__', None)))


# Check working directory
os.getcwd()

'/home/jovyan/work/RPr-Kang-2020'


# Use to set work directory properly
if os.path.basename(os.getcwd()) == 'code':
    os.chdir('../../')
os.getcwd()

'/home/jovyan/work/RPr-Kang-2020'


# Read in at risk population data
atrisk_data = gpd.read_file('./data/raw/public/PopData/Illinois_Tract.shp')
atrisk_data.head()


# Read in covid case data
covid_data = gpd.read_file('./data/raw/public/PopData/Chicago_ZIPCODE.shp')
covid_data['cases'] = covid_data['cases']
covid_data.head()


# Read in hospital data
hospitals = gpd.read_file('./data/raw/public/HospitalData/Chicago_Hospital_Info.shp')
hospitals.head()


# Plot hospital data
m = folium.Map(location=[41.85, -87.65], tiles='cartodbpositron', zoom_start=10)
for i in range(0, len(hospitals)):
    folium.CircleMarker(
      location=[hospitals.iloc[i]['Y'], hospitals.iloc[i]['X']],
      popup="{}{}\n{}{}\n{}{}".format('Hospital Name: ',hospitals.iloc[i]['Hospital'],
                                      'ICU Beds: ',hospitals.iloc[i]['Adult ICU'],
                                      'Ventilators: ', hospitals.iloc[i]['Total Vent']),
      radius=5,
      color='blue',
      fill=True,
      fill_opacity=0.6,
      legend_name = 'Hospitals'
    ).add_to(m)
legend_html =   '''<div style="position: fixed; width: 20%; heigh: auto;
                            bottom: 10px; left: 10px;
                            solid grey; z-index:9999; font-size:14px;
                            ">&nbsp; Legend<br>'''

m


# Read in and plot grid file for Chicago
grid_file = gpd.read_file('./data/raw/public/GridFile/Chicago_Grid.shp')
grid_file.plot(figsize=(8,8))

<AxesSubplot:>


%%time
# To create a new graph from OpenStreetMap, delete or rename data/raw/private/Chicago_Network_Buffer.graphml 
# (if it exists), and set OSM to True 
OSM = False

# if buffered street network is not saved, and OSM is preferred, # generate a new graph from OpenStreetMap and save it
if not os.path.exists("./data/raw/private/Chicago_Network_Buffer.graphml") and OSM:
    print("Loading buffered Chicago road network from OpenStreetMap. Please wait... runtime may exceed 9min...", flush=True)
    G = ox.graph_from_place('Chicago', network_type='drive', buffer_dist=24140.2) 
    print("Saving Chicago road network to raw/private/Chicago_Network_Buffer.graphml. Please wait...", flush=True)
    ox.save_graphml(G, './data/raw/private/Chicago_Network_Buffer.graphml')
    print("Data saved.")

# otherwise, if buffered street network is not saved, download graph from the OSF project
elif not os.path.exists("./data/raw/private/Chicago_Network_Buffer.graphml"):
    print("Downloading buffered Chicago road network from OSF...", flush=True)
    url = 'https://osf.io/download/z8ery/'
    r = requests.get(url, allow_redirects=True)
    print("Saving buffered Chicago road network to file...", flush=True)
    open('./data/raw/private/Chicago_Network_Buffer.graphml', 'wb').write(r.content)

# if the buffered street network is already saved, load it
if os.path.exists("./data/raw/private/Chicago_Network_Buffer.graphml"):
    print("Loading buffered Chicago road network from raw/private/Chicago_Network_Buffer.graphml. Please wait...", flush=True)
    G = ox.load_graphml('./data/raw/private/Chicago_Network_Buffer.graphml') 
    print("Data loaded.") 
else:
    print("Error: could not load the road network from file.")

Loading buffered Chicago road network from raw/private/Chicago_Network_Buffer.graphml. Please wait...
Data loaded.
CPU times: user 43 s, sys: 0 ns, total: 43 s
Wall time: 42.9 s


%%time
ox.plot_graph(G, node_size = 1, bgcolor = 'white', node_color = 'black', edge_color = "#333333", node_alpha = 0.5, edge_linewidth = 0.5)

CPU times: user 52 s, sys: 0 ns, total: 52 s
Wall time: 51.8 s

(<Figure size 576x576 with 1 Axes>, <AxesSubplot:>)


%%time
# Turn nodes and edges into geodataframes
nodes, edges = ox.graph_to_gdfs(G, nodes=True, edges=True)

# Get unique counts of road segments for each speed limit
#print(edges['maxspeed'].value_counts())
print(str(len(edges)) + " edges in graph")

# can we also visualize highways / roads with higher speed limits to check accuracy?
# the code above converts the graph into an edges geodataframe, which could theoretically be filtered
# by fast road segments and mapped, e.g. in folium

384240 edges in graph
CPU times: user 32.1 s, sys: 0 ns, total: 32.1 s
Wall time: 32.1 s


# Print types of roads
print(edges['highway'].value_counts())

residential                     296481
secondary                        30909
tertiary                         29216
primary                          19277
motorway_link                     2322
unclassified                      1840
motorway                          1449
trunk                              843
primary_link                       833
secondary_link                     356
living_street                      238
trunk_link                         157
tertiary_link                      121
[residential, unclassified]         69
[tertiary, residential]             66
[secondary, primary]                15
[secondary, tertiary]               10
[motorway, motorway_link]            6
[tertiary, unclassified]             6
[motorway, trunk]                    4
[residential, living_street]         4
[secondary, secondary_link]          3
busway                               2
[motorway, primary]                  2
[tertiary, motorway_link]            2
emergency_bay                        2
[trunk, primary]                     2
[tertiary, tertiary_link]            1
[trunk, motorway]                    1
[primary, motorway_link]             1
[secondary, motorway_link]           1
[primary_link, residential]          1
Name: highway, dtype: int64


# Print speed limit data
print(edges['maxspeed'].value_counts())

25 mph                        4793
30 mph                        3555
35 mph                        3364
40 mph                        2093
45 mph                        1418
20 mph                        1155
55 mph                         614
60 mph                         279
50 mph                         191
40                              79
15 mph                          76
70 mph                          71
65 mph                          54
10 mph                          38
[40 mph, 45 mph]                27
[30 mph, 35 mph]                26
45,30                           24
[40 mph, 35 mph]                22
70                              21
25                              20
[55 mph, 45 mph]                16
25, east                        14
[45 mph, 35 mph]                13
[30 mph, 25 mph]                10
[45 mph, 50 mph]                 8
50                               8
[40 mph, 30 mph]                 7
[35 mph, 25 mph]                 6
[55 mph, 60 mph]                 5
20                               4
[70 mph, 60 mph]                 3
[65 mph, 60 mph]                 3
[40 mph, 45 mph, 35 mph]         3
[70 mph, 65 mph]                 2
[70 mph, 45 mph, 5 mph]          2
[40, 45 mph]                     2
[35 mph, 50 mph]                 2
35                               2
[55 mph, 65 mph]                 2
[40 mph, 50 mph]                 2
[15 mph, 25 mph]                 2
[40 mph, 35 mph, 25 mph]         2
[15 mph, 40 mph, 30 mph]         2
[20 mph, 25 mph]                 2
[30 mph, 25, east]               2
[65 mph, 55 mph]                 2
[20 mph, 35 mph]                 2
[55 mph, 55]                     2
55                               2
[15 mph, 30 mph]                 2
[45 mph, 30 mph]                 2
[15 mph, 45 mph]                 2
[55 mph, 45, east, 50 mph]       2
[20 mph, 30 mph]                 1
[5 mph, 45 mph, 35 mph]          1
[55 mph, 35 mph]                 1
[5 mph, 35 mph]                  1
[55 mph, 50 mph]                 1
Name: maxspeed, dtype: int64


# two things about this function:
# 1) the work to remove nodes is hardly worth it now that OSMnx cleans graphs by default
# the function is now only pruning < 300 nodes
# 2) try using the OSMnx speed module for setting speeds, travel times
# https://osmnx.readthedocs.io/en/stable/user-reference.html#module-osmnx.speed
# just be careful about units of speed and time!
# the remainder of this code expects 'time' to be measured in minutes

"""
def network_setting(network):
    _nodes_removed = len([n for (n, deg) in network.out_degree() if deg ==0])
    network.remove_nodes_from([n for (n, deg) in network.out_degree() if deg ==0])
    for component in list(nx.strongly_connected_components(network)):
        if len(component)<10:
            for node in component:
                _nodes_removed+=1
                network.remove_node(node)
    for u, v, k, data in tqdm(G.edges(data=True, keys=True),position=0):
        if 'maxspeed' in data.keys():
            speed_type = type(data['maxspeed'])
            if (speed_type==str):
                # Add in try/except blocks to catch maxspeed formats that don't fit Kang et al's cases
                try:
                    if len(data['maxspeed'].split(','))==2:
                        data['maxspeed_fix']=float(data['maxspeed'].split(',')[0])                  
                    elif data['maxspeed']=='signals':
                        data['maxspeed_fix']=30.0 # drive speed setting as 35 miles
                    else:
                        data['maxspeed_fix']=float(data['maxspeed'].split()[0])
                except:
                    data['maxspeed_fix']=30.0 #miles
            else:
                try:
                    data['maxspeed_fix']=float(data['maxspeed'][0].split()[0])
                except:
                    data['maxspeed_fix']=30.0 #miles
        else:
            data['maxspeed_fix']=30.0 #miles
        data['maxspeed_meters'] = data['maxspeed_fix']*26.8223 # convert mile per hour to meters per minute
        data['time'] = float(data['length'])/ data['maxspeed_meters'] # meters / meters per minute = minutes
    print("Removed {} nodes ({:2.4f}%) from the OSMNX network".format(_nodes_removed, _nodes_removed/float(network.number_of_nodes())))
    print("Number of nodes: {}".format(network.number_of_nodes()))
    print("Number of edges: {}".format(network.number_of_edges()))    
    return(network)
"""

# Reanalysis changes

# Apply speeds (kph) to edges in road network
G = ox.speed.add_edge_speeds(G)

# Add travel times to edges; in this way, units (kph vs mph) do not matter
G = ox.speed.add_edge_travel_times(G)


%%time
# G, hospitals, grid_file, pop_data = file_import (population_dropdown.value)
#G = network_setting(G)
# Create point geometries for each node in the graph, to make constructing catchment area polygons easier
for node, data in G.nodes(data=True):
    data['geometry']=Point(data['x'], data['y'])
# Modify code to react to processor dropdown (got rid of file_import function)

CPU times: user 1.44 s, sys: 0 ns, total: 1.44 s
Wall time: 1.44 s


%%time
## Get unique counts for each road network
# Turn nodes and edges in geodataframes
nodes, edges = ox.graph_to_gdfs(G, nodes=True, edges=True)

# Print number of edges in road network
print(str(len(edges)) + " edges in graph")

# Print updated speed limit values
new_speed = edges['speed_kph'].value_counts()

new_speed = new_speed

print(new_speed)

384240 edges in graph
39.2     291570
48.3      29823
56.7      26355
60.1      14993
40.2       5625
56.3       3364
86.4       2253
64.4       2093
32.2       1890
42.9       1828
72.4       1418
69.8        654
88.5        614
90.6        565
96.6        279
80.5        191
51.0        118
40.0         80
24.1         76
112.7        71
104.6        54
16.1         38
25.0         34
68.0         29
52.0         26
45.3         24
60.0         24
70.0         21
64.0         18
80.0         16
44.0         12
56.0          9
76.0          8
50.0          8
48.0          8
36.0          6
92.0          5
96.0          4
52.4          4
71.0          4
20.0          4
104.0         3
32.0          3
72.0          3
45.0          3
100.0         3
55.0          2
108.0         2
35.0          2
53.0          2
84.0          1
Name: speed_kph, dtype: int64
CPU times: user 32.6 s, sys: 0 ns, total: 32.6 s
Wall time: 32.5 s


def hospital_setting(hospitals, G):
    # Create an empty column 
    hospitals['nearest_osm']=None
    # Append the neaerest osm column with each hospitals neaerest osm node
    for i in tqdm(hospitals.index, desc="Find the nearest network node from hospitals", position=0):
        hospitals['nearest_osm'][i] = ox.get_nearest_node(G, [hospitals['Y'][i], hospitals['X'][i]], method='euclidean') # find the nearest node from hospital location
    print ('hospital setting is done')
    return(hospitals)


def pop_centroid (pop_data, pop_type):
    pop_data = pop_data.to_crs({'init': 'epsg:4326'})
    # If pop is selected in dropdown, select at risk pop where population is greater than 0
    if pop_type =="pop":
        pop_data=pop_data[pop_data['OverFifty']>=0]
    # If covid is selected in dropdown, select where covid cases are greater than 0
    if pop_type =="covid":
        pop_data=pop_data[pop_data['cases']>=0]
    pop_cent = pop_data.centroid # it make the polygon to the point without any other information
    # Convert to gdf
    pop_centroid = gpd.GeoDataFrame()
    i = 0
    for point in tqdm(pop_cent, desc='Pop Centroid File Setting', position=0):
        if pop_type== "pop":
            pop = pop_data.iloc[i]['OverFifty']
            code = pop_data.iloc[i]['GEOID']
        if pop_type =="covid":
            pop = pop_data.iloc[i]['cases']
            code = pop_data.iloc[i].ZCTA5CE10
        pop_centroid = pop_centroid.append({'code':code,'pop': pop,'geometry': point}, ignore_index=True)
        i = i+1
    return(pop_centroid)


def dijkstra_cca_polygons(G, nearest_osm, distances, distance_unit = "travel_time"):
    
    '''
    
    Before running: must assign point geometries to street nodes
    
    # create point geometries for the entire graph
    for node, data in G.nodes(data=True):
    data['geometry']=Point(data['x'], data['y'])
    
    '''
    
    ## CREATE DICTIONARIES
    # create dictionary of nearest nodes
    nearest_nodes_30 = nx.single_source_dijkstra_path_length(G, nearest_osm, distances[2], distance_unit) # creating the largest graph from which 10 and 20 minute drive times can be extracted from
    
    # extract values within 20 and 10 (respectively) minutes drive times; 
        # convert to seconds due to OSMnx functions
    nearest_nodes_20 = dict()
    nearest_nodes_10 = dict()
    for key, value in nearest_nodes_30.items():
        if value <= 1200:
            nearest_nodes_20[key] = value
        if value <= 600:
            nearest_nodes_10[key] = value
    
    ## CREATE POLYGONS FOR 3 DISTANCE CATEGORIES (10 min, 20 min, 30 min)
    # 30 MIN
    # If the graph already has a geometry attribute with point data,
    # this line will create a GeoPandas GeoDataFrame from the nearest_nodes_30 dictionary
    points_30 = gpd.GeoDataFrame(gpd.GeoSeries(nx.get_node_attributes(G.subgraph(nearest_nodes_30), 'geometry')))

    # This line converts the nearest_nodes_30 dictionary into a Pandas data frame and joins it to points
    # left_index=True and right_index=True are options for merge() to join on the index values
    points_30 = points_30.merge(pd.Series(nearest_nodes_30).to_frame(), left_index=True, right_index=True)

    # Re-name the columns and set the geodataframe geometry to the geometry column
    points_30 = points_30.rename(columns={'0_x':'geometry','0_y':'z'}).set_geometry('geometry')

    # Create a convex hull polygon from the points
    polygon_30 = gpd.GeoDataFrame(gpd.GeoSeries(points_30.unary_union.convex_hull))
    polygon_30 = polygon_30.rename(columns={0:'geometry'}).set_geometry('geometry')
    
    # 20 MIN
    # Select nodes less than or equal to 20
    # convert to seconds due to OSMnx functions
    points_20 = points_30.query("z <= 1200")
    
    # Create a convex hull polygon from the points
    polygon_20 = gpd.GeoDataFrame(gpd.GeoSeries(points_20.unary_union.convex_hull))
    polygon_20 = polygon_20.rename(columns={0:'geometry'}).set_geometry('geometry')
    
    # 10 MIN
    # Select nodes less than or equal to 10
    # convert to seconds due to OSMnx functions
    points_10 = points_30.query("z <= 600")
    
    # Create a convex hull polygon from the points
    polygon_10 = gpd.GeoDataFrame(gpd.GeoSeries(points_10.unary_union.convex_hull))
    polygon_10 = polygon_10.rename(columns={0:'geometry'}).set_geometry('geometry')
    
    # Create empty list and append polygons
    polygons = []
    
    # Append
    polygons.append(polygon_10)
    polygons.append(polygon_20)
    polygons.append(polygon_30)
    
    # Clip the overlapping distance ploygons (create two donuts + hole)
    for i in reversed(range(1, len(distances))):
        polygons[i] = gpd.overlay(polygons[i], polygons[i-1], how="difference")

    return polygons


def hospital_measure_acc (_thread_id, hospital, pop_data, distances, weights):
    # Create polygons
    polygons = dijkstra_cca_polygons(G, hospital['nearest_osm'], distances)
    
    # Calculate accessibility measurements
    num_pops = []
    for j in pop_data.index:
        point = pop_data['geometry'][j]
        # Multiply polygons by weights
        for k in range(len(polygons)):
            if len(polygons[k]) > 0: # To exclude the weirdo (convex hull is not polygon)
                if (point.within(polygons[k].iloc[0]["geometry"])):
                    num_pops.append(pop_data['pop'][j]*weights[k])  
    total_pop = sum(num_pops)
    for i in range(len(distances)):
        polygons[i]['time']=distances[i]
        polygons[i]['total_pop']=total_pop
        polygons[i]['hospital_icu_beds'] = float(hospital['Adult ICU'])/polygons[i]['total_pop'] # proportion of # of beds over pops in 10 mins
        polygons[i]['hospital_vents'] = float(hospital['Total Vent'])/polygons[i]['total_pop'] # proportion of # of beds over pops in 10 mins
        polygons[i].crs = { 'init' : 'epsg:4326'}
        polygons[i] = polygons[i].to_crs({'init':'epsg:32616'})
    print('{:.0f}'.format(_thread_id), end=" ", flush=True)
    return(_thread_id, [ polygon.copy(deep=True) for polygon in polygons ])


def hospital_acc_unpacker(args):
    return hospital_measure_acc(*args)

# WHERE THE RESULTS ARE POOLED AND THEN REAGGREGATED
def measure_acc_par (hospitals, pop_data, network, distances, weights, num_proc = 4):
    catchments = []
    for distance in distances:
        catchments.append(gpd.GeoDataFrame())
    pool = mp.Pool(processes = num_proc)
    hospital_list = [ hospitals.iloc[i] for i in range(len(hospitals)) ]
    print("Calculating", len(hospital_list), "hospital catchments...\ncompleted number:", end=" ")
    results = pool.map(hospital_acc_unpacker, zip(range(len(hospital_list)), hospital_list, itertools.repeat(pop_data), itertools.repeat(distances), itertools.repeat(weights)))
    pool.close()
    results.sort()
    results = [ r[1] for r in results ]
    for i in range(len(results)):
        for j in range(len(distances)):
            catchments[j] = catchments[j].append(results[i][j], sort=False)
    return catchments


from collections import Counter
def overlap_calc(_id, poly, grid_file, weight, service_type):
    #setting up value_dict
    value_dict = Counter()
    # Has to have at least one of either ICU bed or ventilator 
    if type(poly.iloc[0][service_type])!=type(None): 
        # Multiply by weight for the catchment (1, 0.68, 0.22)
        value = float(poly[service_type])*weight 
        #intersections of hexagon grid and catchment area polygons
        intersect = gpd.overlay(grid_file, poly, how='intersection') 
        # Find the area of the parts which overlap
        intersect['overlapped']= intersect.area
        # How much of the catchment area is within each hexagon
        intersect['percent'] = intersect['overlapped']/intersect['area']
        # Threshold for inclusion: we are commenting this out for now
        #intersect=intersect[intersect['percent']>=0.5]
        value_new = intersect['percent']*value
        intersect_region = intersect['id']
        for intersect_id in intersect_region:
            try:
                value_dict[intersect_id] +=value_new
            except:
                value_dict[intersect_id] = value_new
    return(_id, value_dict)

def overlap_calc_unpacker(args):
    return overlap_calc(*args)


def overlapping_function (grid_file, catchments, service_type, weights, num_proc = 4):
    grid_file[service_type]=0
    pool = mp.Pool(processes = num_proc)
    acc_list = []
    for i in range(len(catchments)):
        acc_list.extend([ catchments[i][j:j+1] for j in range(len(catchments[i])) ])
    acc_weights = []
    for i in range(len(catchments)):
        acc_weights.extend( [weights[i]]*len(catchments[i]) )
    results = pool.map(overlap_calc_unpacker, zip(range(len(acc_list)), acc_list, itertools.repeat(grid_file), acc_weights, itertools.repeat(service_type)))
    pool.close()
    results.sort()
    results = [ r[1] for r in results ]
    service_values = results[0]
    for result in results[1:]:
        service_values+=result
    for intersect_id, value in service_values.items():
        grid_file.loc[grid_file['id']==intersect_id, service_type] += value
    return(grid_file)


def normalization (result_new, res):
    result_new[res]=(result_new[res]-min(result_new[res]))/(max(result_new[res])-min(result_new[res]))
    return result_new


def output_map(output_grid, base_map, hospitals, resource):
    ax=output_grid.plot(column=resource, cmap='PuBuGn',figsize=(18,12), legend=True, zorder=1)
    # Next two lines set bounds for our x- and y-axes because it looks like there's a weird 
    # Point at the bottom left of the map that's messing up our frame (Maja)
    ax.set_xlim([314000, 370000])
    ax.set_ylim([540000, 616000])
    base_map.plot(ax=ax, facecolor="none", edgecolor='gray', lw=0.1)
    hospitals.plot(ax=ax, markersize=10, zorder=1, c='blue')


import ipywidgets
from IPython.display import display

processor_dropdown = ipywidgets.Dropdown( options=[("1", 1), ("2", 2), ("3", 3), ("4", 4)],
    value = 4, description = "Processor: ")

population_dropdown = ipywidgets.Dropdown( options=[("Population at Risk", "pop"), ("COVID-19 Patients", "covid") ],
    value = "pop", description = "Population: ")

resource_dropdown = ipywidgets.Dropdown( options=[("ICU Beds", "hospital_icu_beds"), ("Ventilators", "hospital_vents") ],
    value = "hospital_icu_beds", description = "Resource: ")

hospital_dropdown =  ipywidgets.Dropdown( options=[("All hospitals", "hospitals"), ("Subset", "hospital_subset") ],
    value = "hospitals", description = "Hospital:")

display(processor_dropdown,population_dropdown,resource_dropdown,hospital_dropdown)

Dropdown(description='Processor: ', index=3, options=(('1', 1), ('2', 2), ('3', 3), ('4', 4)), value=4)

Dropdown(description='Population: ', options=(('Population at Risk', 'pop'), ('COVID-19 Patients', 'covid')), …

Dropdown(description='Resource: ', options=(('ICU Beds', 'hospital_icu_beds'), ('Ventilators', 'hospital_vents…

Dropdown(description='Hospital:', options=(('All hospitals', 'hospitals'), ('Subset', 'hospital_subset')), val…


if population_dropdown.value == "pop":
    pop_data = pop_centroid(atrisk_data, population_dropdown.value)
elif population_dropdown.value == "covid":
    pop_data = pop_centroid(covid_data, population_dropdown.value)
distances=[600,1200,1800] # Distances in travel time; convert to seconds based on OSMnx speed functions
weights=[1.0, 0.68, 0.22] # Weights where weights[0] is applied to distances[0]
# Other weighting options representing different distance decays
# weights1, weights2, weights3 = [1.0, 0.42, 0.09], [1.0, 0.75, 0.5], [1.0, 0.5, 0.1]
# it is surprising how long this function takes just to calculate centroids.
# why not do it with the geopandas/pandas functions rather than iterating through every item?

Pop Centroid File Setting: 100%|██████████| 3121/3121 [03:22<00:00, 15.39it/s]


# Set hospitals according to hospital dropdown
if hospital_dropdown.value == "hospital_subset":
    hospitals = hospital_setting(hospitals[:1], G)
else: 
    hospitals = hospital_setting(hospitals, G)
resources = ["hospital_icu_beds", "hospital_vents"] # resources
# this is also slower than it needs to be; if network nodes and hospitals are both
# geopandas data frames, it should be possible to do a much faster spatial join rather than iterating through every hospital

Find the nearest network node from hospitals: 100%|██████████| 66/66 [01:20<00:00,  1.23s/it]

hospital setting is done


# Create point geometries for entire graph
# what is the pupose of the following two lines? Can this be deleted?
# for node, data in G.nodes(data=True):
#     data['geometry']=Point(data['x'], data['y'])

# which hospital to visualize? 
fighosp = 4

# Create catchment for hospital 0
poly = dijkstra_cca_polygons(G, hospitals['nearest_osm'][fighosp], distances)

# Reproject polygons
for i in range(len(poly)):
    poly[i].crs = { 'init' : 'epsg:4326'}
    poly[i] = poly[i].to_crs({'init':'epsg:32616'})

# Reproject hospitals 
# Possible to map from the hospitals data rather than creating hospital_subset?
hospital_subset = hospitals.iloc[[fighosp]].to_crs(epsg=32616)

fig, ax = plt.subplots(figsize=(12,8))

min_10 = poly[0].plot(ax=ax, color="royalblue", label="10 min drive")
min_20 = poly[1].plot(ax=ax, color="cornflowerblue", label="20 min drive")
min_30 = poly[2].plot(ax=ax, color="lightsteelblue", label="30 min drive")

hospital_subset.plot(ax=ax, color="red", legend=True, label = "hospital")

# Add legend
ax.legend()

<matplotlib.legend.Legend at 0x7f7d69aa2a00>


poly

[                                            geometry
 0  POLYGON ((443322.063 4615428.578, 438387.446 4...,
                                             geometry
 0  POLYGON ((430166.403 4612556.599, 426354.176 4...,
                                             geometry
 0  POLYGON ((413026.900 4616136.325, 413066.153 4...]


%%time
catchments = measure_acc_par(hospitals, pop_data, G, distances, weights, num_proc=processor_dropdown.value)

Calculating 66 hospital catchments...
completed number: 15 5 0 10 6 16 1 11 7 2 17 12 3 8 18 13 4 9 19 14 20 25 30 35 21 31 26 36 32 22 27 37 28 2333  38 29 34 24 39 40 45 50 55 41 46 51 56 42 47 57 52 43 48 58 53 44 49 59 54 60 65 61 62 63 64 CPU times: user 2.75 s, sys: 0 ns, total: 2.75 s
Wall time: 2min 9s


%%time
for j in range(len(catchments)):
    catchments[j] = catchments[j][catchments[j][resource_dropdown.value]!=float('inf')]
#result=overlapping_function(grid_file, catchments, resource_dropdown.value, weights, num_proc=processor_dropdown.value)

CPU times: user 34.8 ms, sys: 0 ns, total: 34.8 ms
Wall time: 32.6 ms


# add weight field to each catchment polygon
for i in range(len(weights)):
    catchments[i]['weight'] = weights[i]
# combine the three sets of catchment polygons into one geodataframe
geocatchments = pd.concat([catchments[0], catchments[1], catchments[2]])
geocatchments


%%time
# set weighted to False for original 50% threshold method
# switch to True for area-weighted overlay
weighted = True

# if the value to be calculated is already in the hegaxon grid, delete it
# otherwise, the field name gets a suffix _1 in the overlay step
if resource_dropdown.value in list(grid_file.columns.values):
    grid_file = grid_file.drop(resource_dropdown.value, axis = 1)
    
# calculate hexagon 'target' areas
grid_file['area'] = grid_file.area
    
# Intersection overlay of hospital catchments and hexagon grid
print("Intersecting hospital catchments with hexagon grid...")
fragments = gpd.overlay(grid_file, geocatchments, how='intersection')

# Calculate percent coverage of the hexagon by the hospital catchment as
# fragment area / target(hexagon) area
fragments['percent'] = fragments.area / fragments['area']

# if using weighted aggregation... 
if weighted:
    print("Calculating area-weighted value...")
    # multiply the service/population ratio by the distance weight and the percent coverage
    fragments['value'] = fragments[resource_dropdown.value] * fragments['weight'] * fragments['percent']

# if using the 50% coverage rule for unweighted aggregation...
else:
    print("Calculating value for hexagons with >=50% overlap...")
    # filter for only the fragments with > 50% coverage by hospital catchment
    fragments = fragments[fragments['percent']>=0.5]
    # multiply the service/population ration by the distance weight
    fragments['value'] = fragments[resource_dropdown.value] * fragments['weight']

# select just the hexagon id and value from the fragments,
# group the fragments by the (hexagon) id,
# and sum the values
print("Summarizing results by hexagon id...")
sum_results = fragments[['id', 'value']].groupby(by = ['id']).sum()

# join the results to the hexagon grid_file based on hexagon id
print("Joining results to hexagons...")
result_new = pd.merge(grid_file, sum_results, how="left", on = "id")

# rename value column name to the resource name
result_new = result_new.rename(columns = {'value' : resource_dropdown.value})

Intersecting hospital catchments with hexagon grid...
Calculating area-weighted value...
Summarizing results by hexagon id...
Joining results to hexagons...
CPU times: user 11.5 s, sys: 0 ns, total: 11.5 s
Wall time: 11.5 s


print(result_new)

               left           top          right        bottom    id  \
0     440843.416087  4.638515e+06  441420.766356  4.638015e+06  4158   
1     440843.416087  4.638015e+06  441420.766356  4.637515e+06  4159   
2     440843.416087  4.639515e+06  441420.766356  4.639015e+06  4156   
3     440843.416087  4.639015e+06  441420.766356  4.638515e+06  4157   
4     440843.416087  4.640515e+06  441420.766356  4.640015e+06  4154   
...             ...           ...            ...           ...   ...   
3274  440843.416087  4.643015e+06  441420.766356  4.642515e+06  4149   
3275  440843.416087  4.644515e+06  441420.766356  4.644015e+06  4146   
3276  440843.416087  4.644015e+06  441420.766356  4.643515e+06  4147   
3277  440843.416087  4.645515e+06  441420.766356  4.645015e+06  4144   
3278  440843.416087  4.645015e+06  441420.766356  4.644515e+06  4145   

               area                                           geometry  \
0     216506.350946  POLYGON ((440843.416 4638265.403, 440987.754 4...   
1     216506.350946  POLYGON ((440843.416 4637765.403, 440987.754 4...   
2     216506.350946  POLYGON ((440843.416 4639265.403, 440987.754 4...   
3     216506.350946  POLYGON ((440843.416 4638765.403, 440987.754 4...   
4     216506.350946  POLYGON ((440843.416 4640265.403, 440987.754 4...   
...             ...                                                ...   
3274  216506.350946  POLYGON ((440843.416 4642765.403, 440987.754 4...   
3275  216506.350946  POLYGON ((440843.416 4644265.403, 440987.754 4...   
3276  216506.350946  POLYGON ((440843.416 4643765.403, 440987.754 4...   
3277  216506.350946  POLYGON ((440843.416 4645265.403, 440987.754 4...   
3278  216506.350946  POLYGON ((440843.416 4644765.403, 440987.754 4...   

      hospital_icu_beds  
0              0.003558  
1              0.003611  
2              0.003624  
3              0.003577  
4              0.003670  
...                 ...  
3274           0.003580  
3275           0.003468  
3276           0.003508  
3277           0.003422  
3278           0.003440  

[3279 rows x 8 columns]


%%time
result_new = normalization (result_new, resource_dropdown.value)

CPU times: user 2.13 ms, sys: 0 ns, total: 2.13 ms
Wall time: 1.97 ms


%%time
hospitals = hospitals.to_crs({'init': 'epsg:26971'})
result_new = result_new.to_crs({'init': 'epsg:26971'})
output_map(result_new, pop_data, hospitals, resource_dropdown.value)

CPU times: user 2.37 s, sys: 0 ns, total: 2.37 s
Wall time: 1.77 s

	GEOID	STATEFP	COUNTYFP	TRACTCE	NAMELSAD	Pop	Unnamed_ 0	NAME	OverFifty	TotalPop	geometry
0	17091011700	17	091	011700	Census Tract 117	3688	588	Census Tract 117, Kankakee County, Illinois	1135	3688	POLYGON ((-87.88768 41.13594, -87.88764 41.136...
1	17091011800	17	091	011800	Census Tract 118	2623	220	Census Tract 118, Kankakee County, Illinois	950	2623	POLYGON ((-87.89410 41.14388, -87.89400 41.143...
2	17119400951	17	119	400951	Census Tract 4009.51	5005	2285	Census Tract 4009.51, Madison County, Illinois	2481	5005	POLYGON ((-90.11192 38.70281, -90.11128 38.703...
3	17119400952	17	119	400952	Census Tract 4009.52	3014	2299	Census Tract 4009.52, Madison County, Illinois	1221	3014	POLYGON ((-90.09442 38.72031, -90.09360 38.720...
4	17135957500	17	135	957500	Census Tract 9575	2869	1026	Census Tract 9575, Montgomery County, Illinois	1171	2869	POLYGON ((-89.70369 39.34803, -89.69928 39.348...

	ZCTA5CE10	County	State	Join	ZONE	ZONENAME	FIPS	pop	cases	geometry
0	60660	Cook County	IL	Cook County IL	IL_E	Illinois East	1201	43242	78	POLYGON ((-87.65049 41.99735, -87.65029 41.996...
1	60640	Cook County	IL	Cook County IL	IL_E	Illinois East	1201	69715	117	POLYGON ((-87.64645 41.97965, -87.64565 41.978...
2	60614	Cook County	IL	Cook County IL	IL_E	Illinois East	1201	71308	134	MULTIPOLYGON (((-87.67703 41.91845, -87.67705 ...
3	60712	Cook County	IL	Cook County IL	IL_E	Illinois East	1201	12539	42	MULTIPOLYGON (((-87.76181 42.00465, -87.76156 ...
4	60076	Cook County	IL	Cook County IL	IL_E	Illinois East	1201	31867	114	MULTIPOLYGON (((-87.74782 42.01540, -87.74526 ...

	FID	Hospital	City	ZIP_Code	X	Y	Total_Bed	Adult ICU	Total Vent	geometry
0	2	Methodist Hospital of Chicago	Chicago	60640	-87.671079	41.972800	145	36	12	MULTIPOINT (-87.67108 41.97280)
1	4	Advocate Christ Medical Center	Oak Lawn	60453	-87.732483	41.720281	785	196	64	MULTIPOINT (-87.73248 41.72028)
2	13	Evanston Hospital	Evanston	60201	-87.683288	42.065393	354	89	29	MULTIPOINT (-87.68329 42.06539)
3	24	AMITA Health Adventist Medical Center Hinsdale	Hinsdale	60521	-87.920116	41.805613	261	65	21	MULTIPOINT (-87.92012 41.80561)
4	25	Holy Cross Hospital	Chicago	60629	-87.690841	41.770001	264	66	21	MULTIPOINT (-87.69084 41.77000)

	geometry	time	total_pop	hospital_icu_beds	hospital_vents	weight
0	POLYGON ((446359.955 4637144.048, 444654.345 4...	600	789023.74	0.000046	0.000015	1.00
0	POLYGON ((438353.601 4609853.779, 432065.727 4...	600	718489.92	0.000273	0.000089	1.00
0	POLYGON ((442878.135 4648745.067, 441056.875 4...	600	469346.52	0.000190	0.000062	1.00
0	POLYGON ((423900.989 4621140.151, 421031.920 4...	600	735110.64	0.000088	0.000029	1.00
0	POLYGON ((443322.063 4615428.578, 438387.446 4...	600	716375.12	0.000092	0.000029	1.00
...	...	...	...	...	...	...
0	POLYGON ((439884.526 4604782.264, 415910.447 4...	1800	1018558.48	0.000027	0.000009	0.22
0	MULTIPOLYGON (((418680.569 4620247.323, 411754...	1800	757050.08	0.000059	0.000018	0.22
0	POLYGON ((421589.871 4617483.974, 415910.447 4...	1800	975802.04	0.000086	0.000028	0.22
0	POLYGON ((415910.447 4618609.875, 410587.177 4...	1800	940777.78	0.000065	0.000021	0.22
0	POLYGON ((428248.191 4600502.152, 416049.771 4...	1800	815814.84	0.000115	0.000037	0.22

Title: Reproduction of Spatial Accessibility of COVID-19 Healthcare Resources in Illinois¶

Original Data¶

Modules¶

Check Directories¶

Load and Visualize Data¶

Population and COVID-19 Cases Data by County¶

Load Hospital Data¶

Generate and Plot Map of Hospitals¶

Load and Plot Hexagon Grids (500-meter resolution)¶

Load the Road Network¶

Plot the Road Network¶

Check speed limit values¶

network_setting function¶

Preprocess the Network using network_setting¶

Re-check speed limit values¶

"Helper" Functions¶

hospital_setting¶

pop_centroid¶

djikstra_cca_polygons¶

hospital_measure_acc (adjusted to incorporate dijkstra_cca_polygons)¶

measure_acc_par¶

overlap_calc¶

overlapping_function¶

normalization¶

file_import¶

Run the model¶

Process population data¶

Process hospital data¶

Visualize catchment areas for first hospital¶

Calculate hospital catchment areas¶

Calculate accessibility¶

Results & Discussion¶

Accessibility Map¶

Conclusion¶

References¶