{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "65aIalqEt1LR" }, "source": [ "# Generate GeoJSON from Natural Earth Data" ] }, { "cell_type": "markdown", "metadata": { "id": "L4PY3Z15t1LS" }, "source": [ "## Install Dependencies" ] }, { "cell_type": "markdown", "metadata": { "id": "6_H7qbzIt1LS" }, "source": [ "```\n", "pip install geopandas shapely matplotlib\n", "```" ] }, { "cell_type": "markdown", "metadata": { "id": "hvA0SEXVt1LS" }, "source": [ "## Download Data\n", "\n", "Download datasets (_Admin 0 - Countries_ in [1:10](https://www.naturalearthdata.com/downloads/10m-cultural-vectors/), and _Admin 1 – States, Provinces_ in 1:10 and [1:50](https://www.naturalearthdata.com/downloads/50m-cultural-vectors/)) from Natural Earch Data:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Dependencies\n", "\n", "import os\n", "import json\n", "import requests\n", "import geopandas as gpd\n", "import matplotlib.pyplot as plt\n", "import shapely\n", "import pandas as pd\n", "import shapely.geometry\n", "import shapely.ops\n", "import shapely.affinity\n", "from shapely.geometry import Polygon, MultiPolygon\n", "import shutil" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "speed_run = False\n", "# set this to True if you want to skip all the rendering of previews in this notebook and just get an update of GeoJSON/TS/JSON files." ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "VjGrqW4Kt1LS", "outputId": "2e2accda-5ee4-4270-872e-ecb78d0d02a2" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Done. \n" ] } ], "source": [ "data_dir = os.path.expanduser(\"~/Downloads\")\n", "if not os.path.exists(data_dir):\n", " os.mkdir(data_dir)\n", "\n", "def download_files(skip_existing: bool):\n", " for url in [\n", " \"https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip\",\n", " \"https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_1_states_provinces.zip\",\n", " \"https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/50m/cultural/ne_50m_admin_1_states_provinces.zip\"\n", " ]:\n", " file_name = url.split('/')[-1]\n", " full_file_name = f'{data_dir}/{file_name}'\n", " # temporary fix\n", " url = url.replace(\"https://www.naturalearthdata.com/http//www.naturalearthdata.com/download\", \"https://naciscdn.org/naturalearth\")\n", " with requests.get(\n", " url,\n", " headers={\n", " \"accept-encoding\": \"gzip, deflate, br\",\n", " \"user-agent\": \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36\"\n", " },\n", " stream=True,\n", " ) as res:\n", " file_size = int(res.headers['content-length'])\n", " if res.status_code != 200:\n", " print(\"Error downloading files. Please open the URL to download them from browser manually.\")\n", " break\n", " if (\n", " skip_existing and\n", " os.path.exists(full_file_name) and\n", " file_size == os.path.getsize(full_file_name)\n", " ):\n", " print(f\"Skip {file_name} because it already exists\")\n", " continue\n", " print(f\"Downloading {file_name}... \\r\", end=\"\")\n", " with open(full_file_name, \"wb\") as fh:\n", " fh.write(res.content)\n", " print(\"Done. \")\n", "\n", "download_files(skip_existing=False)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "EL0e9DEVt1LT", "outputId": "16cd6450-d4a3-457a-b205-9797bbce33fc" }, "outputs": [], "source": [ "# Read Natural Earth data files into GeoDataFrames\n", "df_admin0_10m = gpd.read_file(f\"{data_dir}/ne_10m_admin_0_countries.zip\")\n", "df_10m = gpd.read_file(f\"{data_dir}/ne_10m_admin_1_states_provinces.zip\")\n", "df_50m = gpd.read_file(f\"{data_dir}/ne_50m_admin_1_states_provinces.zip\")\n", "\n", "# Convert column names to lowercase\n", "df_admin0_10m.columns = df_admin0_10m.columns.str.lower()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "id": "DUrz04nYt1LT", "outputId": "18d7cdb0-8ab6-4238-e50c-925c5dc117b0" }, "outputs": [ { "data": { "text/html": [ "
| \n", " | featurecla | \n", "scalerank | \n", "adm1_code | \n", "diss_me | \n", "iso_3166_2 | \n", "wikipedia | \n", "iso_a2 | \n", "adm0_sr | \n", "name | \n", "name_alt | \n", "... | \n", "FCLASS_ID | \n", "FCLASS_PL | \n", "FCLASS_GR | \n", "FCLASS_IT | \n", "FCLASS_NL | \n", "FCLASS_SE | \n", "FCLASS_BD | \n", "FCLASS_UA | \n", "FCLASS_TLC | \n", "geometry | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| admin | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
| Australia | \n", "9 | \n", "9 | \n", "9 | \n", "9 | \n", "9 | \n", "0 | \n", "9 | \n", "9 | \n", "9 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "9 | \n", "
| Brazil | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "27 | \n", "0 | \n", "27 | \n", "27 | \n", "27 | \n", "13 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "27 | \n", "
| Canada | \n", "13 | \n", "13 | \n", "13 | \n", "13 | \n", "13 | \n", "13 | \n", "13 | \n", "13 | \n", "13 | \n", "9 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "13 | \n", "
| China | \n", "31 | \n", "31 | \n", "31 | \n", "31 | \n", "31 | \n", "0 | \n", "31 | \n", "31 | \n", "31 | \n", "30 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "31 | \n", "
| India | \n", "36 | \n", "36 | \n", "36 | \n", "36 | \n", "36 | \n", "0 | \n", "36 | \n", "36 | \n", "36 | \n", "13 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "36 | \n", "
| Indonesia | \n", "33 | \n", "33 | \n", "33 | \n", "33 | \n", "33 | \n", "0 | \n", "33 | \n", "33 | \n", "33 | \n", "30 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "33 | \n", "
| Russia | \n", "85 | \n", "85 | \n", "85 | \n", "85 | \n", "85 | \n", "1 | \n", "85 | \n", "85 | \n", "85 | \n", "84 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "85 | \n", "
| South Africa | \n", "9 | \n", "9 | \n", "9 | \n", "9 | \n", "9 | \n", "0 | \n", "9 | \n", "9 | \n", "9 | \n", "9 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "9 | \n", "
| United States of America | \n", "51 | \n", "51 | \n", "51 | \n", "51 | \n", "51 | \n", "51 | \n", "51 | \n", "51 | \n", "51 | \n", "51 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "51 | \n", "
9 rows × 121 columns
\n", "| \n", " | featurecla | \n", "scalerank | \n", "labelrank | \n", "sov_a3 | \n", "type | \n", "admin | \n", "adm0_a3 | \n", "gu_a3 | \n", "name | \n", "abbrev | \n", "... | \n", "name_sv | \n", "name_tr | \n", "name_uk | \n", "name_ur | \n", "name_vi | \n", "name_zh_x | \n", "name_zht | \n", "geometry | \n", "name_zh_y | \n", "iso_3166_2 | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "Admin-0 country | \n", "0 | \n", "4 | \n", "CH1 | \n", "Country | \n", "Hong Kong S.A.R. | \n", "HKG | \n", "HKG | \n", "Hong Kong | \n", "H.K. | \n", "... | \n", "Hongkong | \n", "Hong Kong | \n", "Гонконг | \n", "ہانگ کانگ | \n", "Hồng Kông | \n", "香港 | \n", "香港 | \n", "MULTIPOLYGON (((114.22983 22.55581, 114.23471 ... | \n", "香港特别行政区 | \n", "CN-91 | \n", "
| 1 | \n", "Admin-0 country | \n", "0 | \n", "3 | \n", "TWN | \n", "Sovereign country | \n", "Taiwan | \n", "TWN | \n", "TWN | \n", "Taiwan | \n", "Taiwan | \n", "... | \n", "Taiwan | \n", "Çin Cumhuriyeti | \n", "Республіка Китай | \n", "تائیوان | \n", "Đài Loan | \n", "中华民国 | \n", "中華民國 | \n", "MULTIPOLYGON (((121.90577 24.9501, 121.83473 2... | \n", "中国台湾 | \n", "CN-71 | \n", "
| 2 | \n", "Admin-0 country | \n", "0 | \n", "4 | \n", "CH1 | \n", "Country | \n", "Macao S.A.R | \n", "MAC | \n", "MAC | \n", "Macao | \n", "Mac. | \n", "... | \n", "Macao | \n", "Makao | \n", "Аоминь | \n", "مکاؤ | \n", "Ma Cao | \n", "澳门 | \n", "澳門 | \n", "MULTIPOLYGON (((113.5586 22.16303, 113.56943 2... | \n", "澳门特别行政区 | \n", "CN-92 | \n", "
3 rows × 51 columns
\n", "