African Cities Density

In [1]:
import pandas as pd
import geopandas as gpd

from lets_plot import *
from lets_plot.geo_data import *
from lets_plot import tilesets
The geodata is provided by © OpenStreetMap contributors and is made available here under the Open Database License (ODbL).
In [2]:
LetsPlot.setup_html()
In [3]:
def get_naturalearth_data(data_type, columns=None):
    import shapefile
    from shapely.geometry import shape

    naturalearth_url = "https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/" + \
                       "data/naturalearth/{0}/data.shp?raw=true".format(data_type)
    sf = shapefile.Reader(naturalearth_url)

    gdf = gpd.GeoDataFrame(
        [
            dict(zip([field[0] for field in sf.fields[1:]], record))
            for record in sf.records()
        ],
        geometry=[shape(s) for s in sf.shapes()]
    )
    if columns is not None:
        gdf = gdf[columns]
        gdf.columns = [col.lower() for col in gdf.columns]

    return gdf
In [4]:
world_gdf = get_naturalearth_data("admin_0_countries", columns=["NAME", "ADM0_A3", "CONTINENT"])
print(world_gdf.shape)
world_gdf.head()
(177, 3)
Out[4]:
name adm0_a3 continent
0 Fiji FJI Oceania
1 Tanzania TZA Africa
2 W. Sahara SAH Africa
3 Canada CAN North America
4 United States of America USA North America
In [5]:
continent_gdf = world_gdf[world_gdf["continent"] == "Africa"].drop(columns=["continent"]).reset_index(drop=True)
continent_gdf["name"] = continent_gdf["name"].replace({
    "Central African Rep.": "Central African Republic",
    "Dem. Rep. Congo": "Democratic Republic of the Congo",
    "Eq. Guinea": "Equatorial Guinea",
})
continent_gdf = continent_gdf[~continent_gdf["name"].isin(["Somaliland"])]
print(continent_gdf.shape)
continent_gdf.head()
(50, 2)
Out[5]:
name adm0_a3
0 Tanzania TZA
1 W. Sahara SAH
2 Democratic Republic of the Congo COD
3 Somalia SOM
4 Kenya KEN
In [6]:
capitals_gdf = get_naturalearth_data("populated_places", columns=["NAME", "ADM0_A3", "ADM0CAP", "geometry"])
capitals_gdf = capitals_gdf[capitals_gdf["adm0cap"] == 1].drop(columns=["adm0cap"]).reset_index(drop=True)
print(capitals_gdf.shape)
capitals_gdf.head()
(199, 3)
Out[6]:
name adm0_a3 geometry
0 Vatican City VAT POINT (12.45339 41.90328)
1 San Marino SMR POINT (12.44177 43.93610)
2 Vaduz LIE POINT (9.51667 47.13372)
3 Luxembourg LUX POINT (6.13000 49.61166)
4 Palikir FSM POINT (158.14997 6.91664)
In [7]:
continent_capitals_gdf = pd.merge(continent_gdf, capitals_gdf, on="adm0_a3", how='inner').drop(columns=["adm0_a3"])
continent_capitals_gdf.columns = ["country", "capital", "geometry"]
continent_capitals_gdf = continent_capitals_gdf.sort_values(by="country").reset_index(drop=True)
print(continent_capitals_gdf.shape)
continent_capitals_gdf.head()
(51, 3)
Out[7]:
country capital geometry
0 Algeria Algiers POINT (3.04861 36.76501)
1 Angola Luanda POINT (13.23248 -8.83634)
2 Benin Cotonou POINT (2.40435 6.36298)
3 Botswana Gaborone POINT (25.91195 -24.64631)
4 Burkina Faso Ouagadougou POINT (-1.52667 12.37226)
In [8]:
def geocode_country(name, capital, geometry):
    result = geocode_cities().scope(name).ignore_not_found().get_centroids()[["found name", "geometry"]]
    result.columns = ["name", "geometry"]
    result = result.assign(is_capital = (result["name"] == capital))
    if not result["is_capital"].any():
        capital_row = gpd.GeoDataFrame({"name": [capital], "geometry": [geometry], "is_capital": True}, crs=result.crs)
        result = pd.concat([result, capital_row], ignore_index=True)
    return result.assign(country=name).sort_values(by=["is_capital", "name"])

gdf = pd.concat([
    geocode_country(row["country"], row["capital"], row["geometry"])
    for i, row in continent_capitals_gdf.iterrows()
]).reset_index(drop=True)
print(gdf.shape)
gdf.head()
(19404, 4)
Out[8]:
name geometry is_capital country
0 Abadla POINT (-2.58267 31.03055) False Algeria
1 Abalessa POINT (3.13138 21.97663) False Algeria
2 Abdelkader Azil POINT (5.01498 35.32368) False Algeria
3 Abi Youcef POINT (4.34427 36.52429) False Algeria
4 Abou El Hassen POINT (1.19316 36.40833) False Algeria
In [9]:
ggplot() + \
    geom_livemap(data_size_zoomin=1, tiles=tilesets.LETS_PLOT_LIGHT) + \
    geom_pointdensity(aes(fill="..scaled..",
                          stroke="is_capital", size="is_capital",
                          group=["country"]),
                      map=gdf, adjust=.15, shape=21, color="red", show_legend=False,
                      tooltips=layer_tooltips().title("@name\n@country")
                                               .line("neighbours count|@..count..")
                                               .line("is capital|@is_capital")) + \
    scale_fill_viridis() + \
    scale_manual("size", values=[1, 2]) + \
    scale_manual("stroke", values=[0, .5]) + \
    ggsize(800, 800)
Out[9]: