From 74a4efbcb2a332f7655904fa3197298d414e3b11 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 13 May 2024 22:16:57 +0000 Subject: [PATCH] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- notebooks/mobility/mobility.ipynb | 339 ++++++++++-------- .../01_process_data/03_aggregate.R | 36 +- .../02_analysis/ntl_analysis.html | 28 +- .../02_analysis/ntl_analysis.qmd | 68 ++-- .../libs/Proj4Leaflet-1.0.1/proj4leaflet.js | 2 +- .../libs/bootstrap/bootstrap-icons.css | 2 +- .../libs/bootstrap/bootstrap.min.js | 2 +- .../libs/clipboard/clipboard.min.js | 2 +- .../libs/jquery-3.6.0/jquery-3.6.0.min.map | 2 +- .../libs/leaflet-1.3.1/leaflet.js | 2 +- .../leaflet-providers_2.0.0.js | 6 +- .../libs/proj4-2.6.2/proj4.min.js | 2 +- .../libs/quarto-html/anchor.min.js | 2 +- .../libs/quarto-html/popper.min.js | 1 - .../libs/quarto-html/tippy.css | 2 +- .../libs/quarto-html/tippy.umd.min.js | 1 - 16 files changed, 264 insertions(+), 233 deletions(-) diff --git a/notebooks/mobility/mobility.ipynb b/notebooks/mobility/mobility.ipynb index 77d2c04..26cbf06 100644 --- a/notebooks/mobility/mobility.ipynb +++ b/notebooks/mobility/mobility.ipynb @@ -54,18 +54,19 @@ "outputs": [], "source": [ "import pandas as pd\n", + "\n", "# import geopandas as gpd\n", "import dask.dataframe as dd\n", + "\n", "# import pytz\n", - "from datetime import timedelta\n", "import datetime\n", + "\n", "# from shapely import Point\n", "# import numpy as np\n", - "import plotly.express as px\n", - "import pyarrow as pa\n", "from dask.distributed import Client\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", + "\n", "sns.set()" ] }, @@ -104,7 +105,7 @@ }, "outputs": [], "source": [ - "client = Client(n_workers = 1)\n", + "client = Client(n_workers=1)\n", "client" ] }, @@ -165,24 +166,24 @@ } ], "source": [ - "path = './data/data_*.parquet'\n", - "columns = ['uid', 'datetime', 'hex_id']\n", - "ddf = dd.read_parquet(path, columns = columns)\n", - "ddf['datetime'] = dd.to_datetime(ddf['datetime'])\n", - "ddf['local_datetime'] = ddf['datetime'].dt.tz_convert('Asia/Yangon')\n", - "ddf['local_datetime'] = ddf['local_datetime'].dt.tz_convert(None)\n", - "ddf['date'] = ddf['local_datetime'].dt.date\n", - "ddf['year'] = ddf.local_datetime.dt.year\n", - "ddf['month'] = ddf.local_datetime.dt.month\n", + "path = \"./data/data_*.parquet\"\n", + "columns = [\"uid\", \"datetime\", \"hex_id\"]\n", + "ddf = dd.read_parquet(path, columns=columns)\n", + "ddf[\"datetime\"] = dd.to_datetime(ddf[\"datetime\"])\n", + "ddf[\"local_datetime\"] = ddf[\"datetime\"].dt.tz_convert(\"Asia/Yangon\")\n", + "ddf[\"local_datetime\"] = ddf[\"local_datetime\"].dt.tz_convert(None)\n", + "ddf[\"date\"] = ddf[\"local_datetime\"].dt.date\n", + "ddf[\"year\"] = ddf.local_datetime.dt.year\n", + "ddf[\"month\"] = ddf.local_datetime.dt.month\n", "\n", - "agg_by_day = ddf.groupby(['date'])['uid'].nunique().compute()\n", + "agg_by_day = ddf.groupby([\"date\"])[\"uid\"].nunique().compute()\n", "agg_by_day = agg_by_day.to_frame()\n", - "agg_by_day.sort_values('date',inplace = True)\n", + "agg_by_day.sort_values(\"date\", inplace=True)\n", "\n", - "plt.figure(figsize=(20,6))\n", + "plt.figure(figsize=(20, 6))\n", "plt.plot(agg_by_day.index, agg_by_day.uid)\n", - "plt.ylabel('Number of users')\n", - "plt.title('Veraset users by day in Myanmar')\n", + "plt.ylabel(\"Number of users\")\n", + "plt.title(\"Veraset users by day in Myanmar\")\n", "plt.xticks(rotation=90)\n", "plt.show();" ] @@ -210,16 +211,16 @@ ], "source": [ "data = agg_by_day.reset_index()\n", - "data['date_str'] = data.date.astype('str')\n", - "data['month'] = data['date'].apply(lambda x: x.month)\n", - "data['year'] = data['date'].apply(lambda x: str(x.year))\n", - "data['month-year'] = data['date_str'].apply(lambda x: x[:7])\n", - "agg_by_year = data.groupby('year').uid.sum()\n", + "data[\"date_str\"] = data.date.astype(\"str\")\n", + "data[\"month\"] = data[\"date\"].apply(lambda x: x.month)\n", + "data[\"year\"] = data[\"date\"].apply(lambda x: str(x.year))\n", + "data[\"month-year\"] = data[\"date_str\"].apply(lambda x: x[:7])\n", + "agg_by_year = data.groupby(\"year\").uid.sum()\n", "\n", "plt.bar(agg_by_year.index, agg_by_year)\n", - "plt.title( 'Veraset users by day in Myanmar')\n", - "plt.xlabel('year')\n", - "plt.ylabel('Number of users');" + "plt.title(\"Veraset users by day in Myanmar\")\n", + "plt.xlabel(\"year\")\n", + "plt.ylabel(\"Number of users\");" ] }, { @@ -244,12 +245,12 @@ } ], "source": [ - "agg_by_month_year = data.groupby('month-year').uid.sum()\n", - "plt.figure(figsize=(18,6))\n", + "agg_by_month_year = data.groupby(\"month-year\").uid.sum()\n", + "plt.figure(figsize=(18, 6))\n", "plt.plot(agg_by_month_year.index, agg_by_month_year)\n", - "plt.ylabel('Number of users')\n", - "plt.title('Veraset users by day in Myanmar')\n", - "plt.xticks(rotation = 90);" + "plt.ylabel(\"Number of users\")\n", + "plt.title(\"Veraset users by day in Myanmar\")\n", + "plt.xticks(rotation=90);" ] }, { @@ -275,9 +276,9 @@ }, "outputs": [], "source": [ - "path = './data/data*.parquet'\n", - "columns = ['uid', 'datetime', 'id_type']\n", - "ddf = dd.read_parquet(path, columns = columns)" + "path = \"./data/data*.parquet\"\n", + "columns = [\"uid\", \"datetime\", \"id_type\"]\n", + "ddf = dd.read_parquet(path, columns=columns)" ] }, { @@ -328,9 +329,9 @@ }, "outputs": [], "source": [ - "path = './data/data*.parquet'\n", - "columns = ['uid', 'datetime', 'id_type']\n", - "ddf = dd.read_parquet(path, columns = columns)" + "path = \"./data/data*.parquet\"\n", + "columns = [\"uid\", \"datetime\", \"id_type\"]\n", + "ddf = dd.read_parquet(path, columns=columns)" ] }, { @@ -344,7 +345,7 @@ }, "outputs": [], "source": [ - "pings_per_user = ddf.groupby('uid').size().compute()" + "pings_per_user = ddf.groupby(\"uid\").size().compute()" ] }, { @@ -392,7 +393,7 @@ "outputs": [], "source": [ "pings_per_user = pings_per_user.to_frame()\n", - "pings_per_user.rename(columns = {0:'pings'}, inplace = True)" + "pings_per_user.rename(columns={0: \"pings\"}, inplace=True)" ] }, { @@ -417,13 +418,13 @@ } ], "source": [ - "#create a subplot by year\n", - "fig, ax = plt.subplots(nrows = 1, ncols = 2, figsize = (15, 8))\n", - "sns.boxplot(x = pings_per_user['pings'], ax = ax[0])\n", - "ax[0].title.set_text('Pings per user from 2020 to 2024')\n", - "sns.boxplot(x = pings_per_user['pings'], ax = ax[1])\n", + "# create a subplot by year\n", + "fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 8))\n", + "sns.boxplot(x=pings_per_user[\"pings\"], ax=ax[0])\n", + "ax[0].title.set_text(\"Pings per user from 2020 to 2024\")\n", + "sns.boxplot(x=pings_per_user[\"pings\"], ax=ax[1])\n", "ax[1].set_xlim([0, 100])\n", - "ax[1].title.set_text('Pings per user from 2020 to 2024 with x axis limited to 100')\n", + "ax[1].title.set_text(\"Pings per user from 2020 to 2024 with x axis limited to 100\")\n", "plt.show()" ] }, @@ -467,34 +468,36 @@ } ], "source": [ - "path = './data/data_*.parquet'\n", - "columns = ['uid', 'datetime', 'hex_id']\n", - "ddf = dd.read_parquet(path, columns = columns)\n", - "ddf['datetime'] = dd.to_datetime(ddf['datetime'])\n", - "ddf['local_datetime'] = ddf['datetime'].dt.tz_convert('Asia/Yangon')\n", - "ddf['local_datetime'] = ddf['local_datetime'].dt.tz_convert(None)\n", - "ddf['date'] = ddf['local_datetime'].dt.date\n", - "ddf['year'] = ddf.local_datetime.dt.year\n", - "ddf['month'] = ddf.local_datetime.dt.month\n", - "agg_by_year_month = ddf.groupby(['year', 'month','uid']).size().compute()\n", + "path = \"./data/data_*.parquet\"\n", + "columns = [\"uid\", \"datetime\", \"hex_id\"]\n", + "ddf = dd.read_parquet(path, columns=columns)\n", + "ddf[\"datetime\"] = dd.to_datetime(ddf[\"datetime\"])\n", + "ddf[\"local_datetime\"] = ddf[\"datetime\"].dt.tz_convert(\"Asia/Yangon\")\n", + "ddf[\"local_datetime\"] = ddf[\"local_datetime\"].dt.tz_convert(None)\n", + "ddf[\"date\"] = ddf[\"local_datetime\"].dt.date\n", + "ddf[\"year\"] = ddf.local_datetime.dt.year\n", + "ddf[\"month\"] = ddf.local_datetime.dt.month\n", + "agg_by_year_month = ddf.groupby([\"year\", \"month\", \"uid\"]).size().compute()\n", "agg_by_year_month = agg_by_year_month.reset_index()\n", - "agg_by_year_month.rename(columns = {0: 'pings/user'}, inplace = True)\n", - "agg_by_year_month['year-month'] = agg_by_year_month.apply(lambda row: '{}-{}'.format(row.year, row.month), axis = 1)\n", + "agg_by_year_month.rename(columns={0: \"pings/user\"}, inplace=True)\n", + "agg_by_year_month[\"year-month\"] = agg_by_year_month.apply(\n", + " lambda row: \"{}-{}\".format(row.year, row.month), axis=1\n", + ")\n", "\n", - "#create a subplot by year\n", - "fig, ax = plt.subplots(nrows = 2, ncols = 1, figsize = (15, 15))\n", - "sns.boxplot(data=agg_by_year_month, x = 'year-month', y = 'pings/user', ax = ax[0])\n", - "ax[0].tick_params(axis = 'x', rotation = 45)\n", - "ax[0].set_xlabel('year-month')\n", - "ax[0].set_ylabel('pings per user')\n", - "ax[0].title.set_text('Pings per users across the years')\n", + "# create a subplot by year\n", + "fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15, 15))\n", + "sns.boxplot(data=agg_by_year_month, x=\"year-month\", y=\"pings/user\", ax=ax[0])\n", + "ax[0].tick_params(axis=\"x\", rotation=45)\n", + "ax[0].set_xlabel(\"year-month\")\n", + "ax[0].set_ylabel(\"pings per user\")\n", + "ax[0].title.set_text(\"Pings per users across the years\")\n", "\n", - "sns.boxplot(data=agg_by_year_month, x = 'year-month', y = 'pings/user', ax = ax[1])\n", + "sns.boxplot(data=agg_by_year_month, x=\"year-month\", y=\"pings/user\", ax=ax[1])\n", "ax[1].set_ylim([0, 200])\n", - "ax[1].tick_params(axis = 'x', rotation = 45)\n", - "ax[1].set_xlabel('year-month')\n", - "ax[1].set_ylabel('pings per user')\n", - "ax[1].title.set_text('Pings per users across the years (y limited to 200)')\n", + "ax[1].tick_params(axis=\"x\", rotation=45)\n", + "ax[1].set_xlabel(\"year-month\")\n", + "ax[1].set_ylabel(\"pings per user\")\n", + "ax[1].title.set_text(\"Pings per users across the years (y limited to 200)\")\n", "\n", "plt.show()" ] @@ -539,7 +542,7 @@ } ], "source": [ - "len(agg_by_year_month[agg_by_year_month['pings/user']>30])" + "len(agg_by_year_month[agg_by_year_month[\"pings/user\"] > 30])" ] }, { @@ -570,7 +573,7 @@ } ], "source": [ - "agg_by_year_month[agg_by_year_month['pings/user']>30].groupby('uid').size().describe()" + "agg_by_year_month[agg_by_year_month[\"pings/user\"] > 30].groupby(\"uid\").size().describe()" ] }, { @@ -595,7 +598,7 @@ }, "outputs": [], "source": [ - "pings_by_date = ddf.groupby(['date'])['uid'].size().compute()" + "pings_by_date = ddf.groupby([\"date\"])[\"uid\"].size().compute()" ] }, { @@ -610,8 +613,8 @@ "outputs": [], "source": [ "pings_by_date = pings_by_date.reset_index()\n", - "pings_by_date.rename(columns = {'uid':'pings'}, inplace = True)\n", - "pings_by_date.sort_values('date',inplace = True)" + "pings_by_date.rename(columns={\"uid\": \"pings\"}, inplace=True)\n", + "pings_by_date.sort_values(\"date\", inplace=True)" ] }, { @@ -636,11 +639,18 @@ } ], "source": [ - "plt.figure(figsize=(20,6))\n", - "plt.plot(pings_by_date.date, pings_by_date.pings, label = 'pings')\n", - "plt.plot([datetime.date(year=2021, month=4, day = 2), datetime.date(year=2021, month=4, day = 2)], [0,6500000], label = '2nd of April')\n", - "plt.ylabel('Number of pings')\n", - "plt.title('Veraset registered pings by day in Myanmar')\n", + "plt.figure(figsize=(20, 6))\n", + "plt.plot(pings_by_date.date, pings_by_date.pings, label=\"pings\")\n", + "plt.plot(\n", + " [\n", + " datetime.date(year=2021, month=4, day=2),\n", + " datetime.date(year=2021, month=4, day=2),\n", + " ],\n", + " [0, 6500000],\n", + " label=\"2nd of April\",\n", + ")\n", + "plt.ylabel(\"Number of pings\")\n", + "plt.title(\"Veraset registered pings by day in Myanmar\")\n", "plt.xticks(rotation=90)\n", "plt.legend()\n", "plt.show();" @@ -688,7 +698,7 @@ }, "outputs": [], "source": [ - "admin1 = gpd.read_file('./data/mmr_polbnda_adm2_250k_mimu_1.shp')" + "admin1 = gpd.read_file(\"./data/mmr_polbnda_adm2_250k_mimu_1.shp\")" ] }, { @@ -748,15 +758,15 @@ }, "outputs": [], "source": [ - "path = './data/data_*.parquet'\n", - "columns = ['uid', 'datetime', 'hex_id']\n", - "ddf = dd.read_parquet(path, columns = columns)\n", - "ddf['datetime'] = dd.to_datetime(ddf['datetime'])\n", - "ddf['local_datetime'] = ddf['datetime'].dt.tz_convert('Asia/Yangon')\n", - "ddf['local_datetime'] = ddf['local_datetime'].dt.tz_convert(None)\n", - "ddf['date'] = ddf['local_datetime'].dt.date\n", - "ddf['year'] = ddf.local_datetime.dt.year\n", - "ddf['month'] = ddf.local_datetime.dt.month" + "path = \"./data/data_*.parquet\"\n", + "columns = [\"uid\", \"datetime\", \"hex_id\"]\n", + "ddf = dd.read_parquet(path, columns=columns)\n", + "ddf[\"datetime\"] = dd.to_datetime(ddf[\"datetime\"])\n", + "ddf[\"local_datetime\"] = ddf[\"datetime\"].dt.tz_convert(\"Asia/Yangon\")\n", + "ddf[\"local_datetime\"] = ddf[\"local_datetime\"].dt.tz_convert(None)\n", + "ddf[\"date\"] = ddf[\"local_datetime\"].dt.date\n", + "ddf[\"year\"] = ddf.local_datetime.dt.year\n", + "ddf[\"month\"] = ddf.local_datetime.dt.month" ] }, { @@ -770,7 +780,7 @@ }, "outputs": [], "source": [ - "mapper, h3_tessellation = get_h3_tessellation(admin1, name=\"ST\", resolution=7)" + "mapper, h3_tessellation = get_h3_tessellation(admin1, name=\"ST\", resolution=7)" ] }, { @@ -784,8 +794,8 @@ }, "outputs": [], "source": [ - "mapper_df = pd.DataFrame.from_dict(mapper, orient = 'index')\n", - "mapper_df.rename(columns = {0:'admin1'}, inplace = True)" + "mapper_df = pd.DataFrame.from_dict(mapper, orient=\"index\")\n", + "mapper_df.rename(columns={0: \"admin1\"}, inplace=True)" ] }, { @@ -799,12 +809,26 @@ }, "outputs": [], "source": [ - "region_dict = {'Kachin':'North', 'Shan (North)':'North', \n", - " 'Sagaing':'Central', 'Magway':'Central', 'Mandalay':'Central', \n", - " 'Mon':'South', 'Kayin':'South', 'Kayah':'South', 'Tanintharyi':'South','Bago (East)':'South', \n", - " 'Rakhine':'West', 'Chin':'West',\n", - " 'Yangon':'Other', 'Ayeyarwady':'Other','Nay Pyi Taw':'Other', 'Bago (West)': 'Other',\n", - " 'Shan (South)': 'Other', 'Shan (East)':'Other'}" + "region_dict = {\n", + " \"Kachin\": \"North\",\n", + " \"Shan (North)\": \"North\",\n", + " \"Sagaing\": \"Central\",\n", + " \"Magway\": \"Central\",\n", + " \"Mandalay\": \"Central\",\n", + " \"Mon\": \"South\",\n", + " \"Kayin\": \"South\",\n", + " \"Kayah\": \"South\",\n", + " \"Tanintharyi\": \"South\",\n", + " \"Bago (East)\": \"South\",\n", + " \"Rakhine\": \"West\",\n", + " \"Chin\": \"West\",\n", + " \"Yangon\": \"Other\",\n", + " \"Ayeyarwady\": \"Other\",\n", + " \"Nay Pyi Taw\": \"Other\",\n", + " \"Bago (West)\": \"Other\",\n", + " \"Shan (South)\": \"Other\",\n", + " \"Shan (East)\": \"Other\",\n", + "}" ] }, { @@ -818,7 +842,7 @@ }, "outputs": [], "source": [ - "mapper_df['region'] = mapper_df['admin1'].apply(lambda x: region_dict[x])" + "mapper_df[\"region\"] = mapper_df[\"admin1\"].apply(lambda x: region_dict[x])" ] }, { @@ -841,11 +865,13 @@ } ], "source": [ - "ddf_ping_admin1 = dd.merge(ddf, mapper_df, how = 'left', left_on = 'hex_id', right_index = True)\n", - "agg_pings_by_date = ddf_ping_admin1.groupby(['region', 'date'])['uid'].size().compute()\n", + "ddf_ping_admin1 = dd.merge(\n", + " ddf, mapper_df, how=\"left\", left_on=\"hex_id\", right_index=True\n", + ")\n", + "agg_pings_by_date = ddf_ping_admin1.groupby([\"region\", \"date\"])[\"uid\"].size().compute()\n", "agg_pings_by_date = agg_pings_by_date.to_frame()\n", - "agg_pings_by_date.sort_values('date',inplace = True)\n", - "agg_pings_by_date.rename(columns = {'uid':'pings'}, inplace = True)\n", + "agg_pings_by_date.sort_values(\"date\", inplace=True)\n", + "agg_pings_by_date.rename(columns={\"uid\": \"pings\"}, inplace=True)\n", "agg_pings_by_date = agg_pings_by_date.reset_index()" ] }, @@ -871,13 +897,13 @@ } ], "source": [ - "plt.figure(figsize=(20,6))\n", - "for region in ['North', 'South', 'West', 'Central', 'Other']:\n", - " data = agg_pings_by_date[agg_pings_by_date['region']==region].copy()\n", - " plt.plot(data.date, data.pings, label = region)\n", - " \n", - "plt.ylabel('Pings')\n", - "plt.title('Veraset pings by day in Myanmar by region')\n", + "plt.figure(figsize=(20, 6))\n", + "for region in [\"North\", \"South\", \"West\", \"Central\", \"Other\"]:\n", + " data = agg_pings_by_date[agg_pings_by_date[\"region\"] == region].copy()\n", + " plt.plot(data.date, data.pings, label=region)\n", + "\n", + "plt.ylabel(\"Pings\")\n", + "plt.title(\"Veraset pings by day in Myanmar by region\")\n", "plt.xticks(rotation=90)\n", "plt.legend()\n", "plt.show();" @@ -905,19 +931,22 @@ } ], "source": [ - "#create a subplot by year\n", - "fig, ax = plt.subplots(nrows = 4, ncols = 1, figsize = (20, 15))\n", - "year_grid = {2021:[0,1], 2022:[1,1], 2023:[2,1], 2024:[3,1]}\n", - "agg_pings_by_date['year'] = agg_pings_by_date['date'].apply(lambda x: x.year)\n", + "# create a subplot by year\n", + "fig, ax = plt.subplots(nrows=4, ncols=1, figsize=(20, 15))\n", + "year_grid = {2021: [0, 1], 2022: [1, 1], 2023: [2, 1], 2024: [3, 1]}\n", + "agg_pings_by_date[\"year\"] = agg_pings_by_date[\"date\"].apply(lambda x: x.year)\n", "for year, grid in year_grid.items():\n", - " for region in ['North', 'South', 'West', 'Central', 'Other']:\n", - " data = agg_pings_by_date[(agg_pings_by_date['year']==year)&(agg_pings_by_date['region']==region)].copy()\n", - " ax[grid[0]].plot(data.date, data.pings, label = region)\n", - " \n", - " ax[grid[0]].set_ylabel('Pings')\n", - " ax[grid[0]].set_title('Veraset pings by day in Myanmar by region')\n", + " for region in [\"North\", \"South\", \"West\", \"Central\", \"Other\"]:\n", + " data = agg_pings_by_date[\n", + " (agg_pings_by_date[\"year\"] == year)\n", + " & (agg_pings_by_date[\"region\"] == region)\n", + " ].copy()\n", + " ax[grid[0]].plot(data.date, data.pings, label=region)\n", + "\n", + " ax[grid[0]].set_ylabel(\"Pings\")\n", + " ax[grid[0]].set_title(\"Veraset pings by day in Myanmar by region\")\n", " ax[grid[0]].legend()\n", - "plt.savefig('number_users_by_year.png')\n", + "plt.savefig(\"number_users_by_year.png\")\n", "plt.show();" ] }, @@ -932,7 +961,7 @@ }, "outputs": [], "source": [ - "ddf_admin1 = dd.merge(ddf, mapper_df, how = 'left', left_on = 'hex_id', right_index = True)" + "ddf_admin1 = dd.merge(ddf, mapper_df, how=\"left\", left_on=\"hex_id\", right_index=True)" ] }, { @@ -946,7 +975,7 @@ }, "outputs": [], "source": [ - "agg_region_date = ddf_admin1.groupby(['region', 'date'])['uid'].nunique()\n", + "agg_region_date = ddf_admin1.groupby([\"region\", \"date\"])[\"uid\"].nunique()\n", "agg_region_date = agg_region_date.compute()" ] }, @@ -975,7 +1004,7 @@ }, "outputs": [], "source": [ - "agg_region_date.rename(columns = {'uid':'number_users'}, inplace = True)" + "agg_region_date.rename(columns={\"uid\": \"number_users\"}, inplace=True)" ] }, { @@ -989,7 +1018,7 @@ }, "outputs": [], "source": [ - "agg_region_date.sort_values('date',inplace = True)" + "agg_region_date.sort_values(\"date\", inplace=True)" ] }, { @@ -1014,13 +1043,13 @@ } ], "source": [ - "plt.figure(figsize=(20,6))\n", - "for region in ['North', 'South', 'West', 'Central', 'Other']:\n", - " data = agg_region_date[agg_region_date['region']==region].copy()\n", - " plt.plot(data.date, data.number_users, label = region)\n", - " \n", - "plt.ylabel('Number of users')\n", - "plt.title('Veraset users by day in Myanmar by region')\n", + "plt.figure(figsize=(20, 6))\n", + "for region in [\"North\", \"South\", \"West\", \"Central\", \"Other\"]:\n", + " data = agg_region_date[agg_region_date[\"region\"] == region].copy()\n", + " plt.plot(data.date, data.number_users, label=region)\n", + "\n", + "plt.ylabel(\"Number of users\")\n", + "plt.title(\"Veraset users by day in Myanmar by region\")\n", "plt.xticks(rotation=90)\n", "plt.legend()\n", "plt.show();" @@ -1048,22 +1077,22 @@ } ], "source": [ - "#create a subplot by year\n", - "fig, ax = plt.subplots(nrows = 4, ncols = 1, figsize = (20, 15))\n", - "year_grid = {2021:[0,1], 2022:[1,1], 2023:[2,1], 2024:[3,1]}\n", - "agg_region_date['year'] = agg_region_date['date'].apply(lambda x: x.year)\n", + "# create a subplot by year\n", + "fig, ax = plt.subplots(nrows=4, ncols=1, figsize=(20, 15))\n", + "year_grid = {2021: [0, 1], 2022: [1, 1], 2023: [2, 1], 2024: [3, 1]}\n", + "agg_region_date[\"year\"] = agg_region_date[\"date\"].apply(lambda x: x.year)\n", "for year, grid in year_grid.items():\n", - " for region in ['North', 'South', 'West', 'Central', 'Other']:\n", - " data = agg_region_date[(agg_region_date['year']==year)&(agg_region_date['region']==region)].copy()\n", - " ax[grid[0]].plot(data.date, data.number_users, label = region)\n", - " \n", - " ax[grid[0]].set_ylabel('Number of users')\n", - " ax[grid[0]].set_title('Veraset users by day in Myanmar by region')\n", - " ax[grid[0]].legend()\n", - "plt.savefig('number_users_by_year.png')\n", - "plt.show();\n", + " for region in [\"North\", \"South\", \"West\", \"Central\", \"Other\"]:\n", + " data = agg_region_date[\n", + " (agg_region_date[\"year\"] == year) & (agg_region_date[\"region\"] == region)\n", + " ].copy()\n", + " ax[grid[0]].plot(data.date, data.number_users, label=region)\n", "\n", - " " + " ax[grid[0]].set_ylabel(\"Number of users\")\n", + " ax[grid[0]].set_title(\"Veraset users by day in Myanmar by region\")\n", + " ax[grid[0]].legend()\n", + "plt.savefig(\"number_users_by_year.png\")\n", + "plt.show();" ] }, { @@ -1077,7 +1106,11 @@ }, "outputs": [], "source": [ - "conflict = pd.read_excel('./data/Analysis_ACLED_01_Jan_2021_29_Mar_2024_regional_weekly.xlsx', sheet_name = 'Normalized_Conflict', skiprows = 2,)" + "conflict = pd.read_excel(\n", + " \"./data/Analysis_ACLED_01_Jan_2021_29_Mar_2024_regional_weekly.xlsx\",\n", + " sheet_name=\"Normalized_Conflict\",\n", + " skiprows=2,\n", + ")" ] }, { @@ -1102,13 +1135,13 @@ } ], "source": [ - "plt.figure(figsize=(20,6))\n", - "for region in ['The North', 'The South', 'The West', 'The Central', 'Other']:\n", - " plt.plot(conflict.Week, conflict[region], label = region)\n", + "plt.figure(figsize=(20, 6))\n", + "for region in [\"The North\", \"The South\", \"The West\", \"The Central\", \"Other\"]:\n", + " plt.plot(conflict.Week, conflict[region], label=region)\n", "plt.legend()\n", - "plt.title('Conflict Intensity Index (Normalized)')\n", - "plt.ylabel('Conflict Intensity Index')\n", - "plt.xticks(rotation = 90);" + "plt.title(\"Conflict Intensity Index (Normalized)\")\n", + "plt.ylabel(\"Conflict Intensity Index\")\n", + "plt.xticks(rotation=90);" ] }, { diff --git a/notebooks/nighttime-lights/analysis-2024/01_process_data/03_aggregate.R b/notebooks/nighttime-lights/analysis-2024/01_process_data/03_aggregate.R index 01b5b7a..eba55de 100644 --- a/notebooks/nighttime-lights/analysis-2024/01_process_data/03_aggregate.R +++ b/notebooks/nighttime-lights/analysis-2024/01_process_data/03_aggregate.R @@ -40,7 +40,7 @@ for(adm_level in c("bound1", "bound2", "sez", "0", "1", "2", "3", "0_rm_border_2_5km", "0_rm_border_5km", "0_rm_border_10km")){ - + if(adm_level == "sez"){ roi_sf <- sez_sf } else if(adm_level == "bound1"){ @@ -56,12 +56,12 @@ for(adm_level in c("bound1", "bound2", "sez", "0", "1", "2", "3", } else if(adm_level == "border_10km"){ roi_sf <- border_10km_sf } else if(adm_level == "rwi"){ - + source("https://raw.githubusercontent.com/ramarty/fast-functions/master/R/functions_in_chunks.R") rwi_df <- read_csv(file.path(data_dir, "Relative Wealth", "MMR_relative_wealth_index.csv")) rwi_sf <- st_as_sf(x = rwi_df, coords = c("longitude", "latitude"), crs = 4326) rwi_buff_sf <- rwi_sf %>% st_buffer_chunks(dist = 1200, chunk_size = 500) - + roi_sf <- rwi_buff_sf } else if(adm_level == "0_rm_sez"){ roi_sf <- adm0_rm_sez_sf @@ -76,41 +76,41 @@ for(adm_level in c("bound1", "bound2", "sez", "0", "1", "2", "3", } else{ roi_sf <- read_sf(file.path(gadm_dir, "rawdata", paste0("gadm41_MMR_",adm_level,".json"))) } - + # Loop through product ------------------------------------------------------- # VNP46A2 = daily # VNP46A3 = monthly # VNP46A4 = annually - + for(product in c("VNP46A4", "VNP46A3")){ - + ## Make directory to export files - organized by ROI and prduct OUT_DIR <- file.path(ntl_bm_dir, "FinalData", "aggregated", paste0("adm", adm_level, "_", product)) dir.create(OUT_DIR) - + # Loop through rasters ----------------------------------------------------- r_name_vec <- file.path(ntl_bm_dir, "FinalData", paste0(product, "_rasters")) %>% list.files() - + for(r_name_i in r_name_vec){ - + OUT_FILE <- file.path(OUT_DIR, r_name_i %>% str_replace_all(".tif", ".Rds")) - + ## Check if file exists if(!file.exists(OUT_FILE)){ - + ## Load raster and create rasters for just gas flaring and non gas flaring locations r <- raster(file.path(ntl_bm_dir, "FinalData", paste0(product, "_rasters"), r_name_i)) - + ## Extract data ntl_df <- exact_extract(r, roi_sf, fun = c("mean", "median", "sum")) roi_sf$ntl_bm_mean <- ntl_df$mean roi_sf$ntl_bm_median <- ntl_df$median roi_sf$ntl_bm_sum <- ntl_df$sum - + ## Prep for export roi_df <- roi_sf %>% st_drop_geometry() - + ## Add date if(product == "VNP46A2"){ year <- r_name_i %>% substring(12,15) %>% as.numeric() @@ -122,7 +122,7 @@ for(adm_level in c("bound1", "bound2", "sez", "0", "1", "2", "3", paste0("-01") %>% ymd() } - + if(product == "VNP46A3"){ date_r <- r_name_i %>% str_replace_all("VNP46A3_t", "") %>% @@ -131,7 +131,7 @@ for(adm_level in c("bound1", "bound2", "sez", "0", "1", "2", "3", paste0("-01") %>% ymd() } - + if(product == "VNP46A4"){ # Just grab year date_r <- r_name_i %>% @@ -139,9 +139,9 @@ for(adm_level in c("bound1", "bound2", "sez", "0", "1", "2", "3", str_replace_all(".tif", "") %>% as.numeric() } - + roi_df$date <- date_r - + ## Export saveRDS(roi_df, OUT_FILE) } diff --git a/notebooks/nighttime-lights/analysis-2024/02_analysis/ntl_analysis.html b/notebooks/nighttime-lights/analysis-2024/02_analysis/ntl_analysis.html index d66a23c..1dd089f 100644 --- a/notebooks/nighttime-lights/analysis-2024/02_analysis/ntl_analysis.html +++ b/notebooks/nighttime-lights/analysis-2024/02_analysis/ntl_analysis.html @@ -19,7 +19,7 @@ ul.task-list{list-style: none;} ul.task-list li input[type="checkbox"] { width: 0.8em; - margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ + margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ vertical-align: middle; } /* CSS for syntax highlighting */ @@ -90,7 +90,7 @@
- +