From fe8b58e248e017a69eb905c7c581187444526de8 Mon Sep 17 00:00:00 2001 From: Nelofar Qulizada Date: Wed, 6 Aug 2025 01:27:20 +0000 Subject: [PATCH 1/2] Update environment.yml with analysis dependencies --- environment.yml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/environment.yml b/environment.yml index d91c3c80..001ec318 100644 --- a/environment.yml +++ b/environment.yml @@ -1,7 +1,11 @@ -name: cookbook-dev +name: biascorr channels: - conda-forge dependencies: - - jupyterlab - - mystmd - - jupyterlab-myst + - python=3.11 + - pandas + - numpy + - matplotlib + - statsmodels + - scikit-learn + - jupyter \ No newline at end of file From 7288ed5b6ad5479fad2249127a8ddc9c3d4c6a95 Mon Sep 17 00:00:00 2001 From: Nelofar Qulizada Date: Wed, 6 Aug 2025 01:53:37 +0000 Subject: [PATCH 2/2] Update bias-correction model notebook --- .../03_bias_correction_model_robust.ipynb | 2580 +++++++++++++++++ 1 file changed, 2580 insertions(+) create mode 100644 notebooks/03_bias_correction_model_robust.ipynb diff --git a/notebooks/03_bias_correction_model_robust.ipynb b/notebooks/03_bias_correction_model_robust.ipynb new file mode 100644 index 00000000..3cf41654 --- /dev/null +++ b/notebooks/03_bias_correction_model_robust.ipynb @@ -0,0 +1,2580 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e21c8a3d", + "metadata": {}, + "source": [ + "# 03 — Bias Correction Model (Robust: sparse-safe + impute + de-collinear)\n", + "\n", + "This notebook is designed to be robust for large, sparse covariate sets:\n", + "\n", + "- Loads `../processed_data/weekly_with_covariates.csv`\n", + "- Target: `EcoCntr_weekly_SUM`\n", + "- Predictors: `SUM_total_trip_count` + all other covariates (excluding IDs/dates)\n", + "- Drops covariate **columns** with > **80% NaN** (too sparse)\n", + "- Train/test split, then **impute** remaining NaNs with **train means** (no leakage)\n", + "- Removes **constant** and **duplicate** columns to reduce multicollinearity\n", + "- Fits OLS (HC3 robust SEs), evaluates, and exports predictions + metrics\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "5fc8309b", + "metadata": {}, + "outputs": [], + "source": [ + "# Imports\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import statsmodels.api as sm\n", + "\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import mean_squared_error, r2_score\n", + "from sklearn.impute import SimpleImputer\n", + "\n", + "pd.set_option(\"display.max_columns\", 200)" + ] + }, + { + "cell_type": "markdown", + "id": "1bbf14ef", + "metadata": {}, + "source": [ + "## 1) Load merged weekly dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "926f80e0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loaded: (3377, 278)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GRID_ID.1STARTweek_stravaDATE_castGRID_IDENDweek_stravaDATE_castSUM_total_trip_countEcoCntr_weekly_SUMMAX_slopePctSTARTweek_TimeHEXid_weekIDHEXid_WeekIDENDweek_Timeweek_idweek_start_GRID_JOIN_OBJECTID *Shape *SOURCE_IDGRID_ID_covslopePctTrailCounterSqMetersASOSIDAR_asosID_hexIDSlope_10m_MEANOBJECTIDGRID_ID.1_covTIMESTEP_stravaDATE_castSTARTTIME_stravaDATE_castENDTIME_stravaDATE_castSUM_EcoCounter_WeeklyAggregationSUM_total_trip_count_covSUM_ride_countMAX_activity_typeMAX_slopePct_covSUM_forward_people_countSUM_reverse_people_countSUM_forward_commute_trip_countSUM_reverse_commute_trip_countSUM_forward_leisure_trip_countSUM_reverse_leisure_trip_countSUM_ebike_ride_countSUM_forward_morning_trip_countSUM_reverse_morning_trip_countSUM_forward_midday_trip_countSUM_reverse_midday_trip_countSUM_forward_evening_trip_countSUM_reverse_evening_trip_countSUM_forward_male_people_countSUM_reverse_male_people_countSUM_forward_female_people_countSUM_reverse_female_people_countSUM_forward_18_34_people_countSUM_reverse_18_34_people_countSUM_forward_35_54_people_countSUM_reverse_35_54_people_countSUM_forward_55_64_people_countSUM_reverse_55_64_people_countMIN_slopePcthexid_weekid_joinASOS_IDHexIDstationASOS ID WeekID JoinasosID_encodeSTEPTIMESTEP_weekIDSTARTTIME_WeekdayENDTIME_WeekdaystationMAX_max_temp_fMIN_min_temp_fMAX_max_dewpoint_fMIN_min_dewpoint_fSUM_precip_inMEAN_avg_wind_speed_ktsMEAN_avg_wind_drctMIN_min_rhMEAN_avg_rhMAX_max_rhSUM_snow_inMIN_min_feelMEAN_avg_feelMAX_max_feelMAX_max_wind_speed_ktsMAX_max_wind_gust_ktsMAX_climo_high_fMIN_climo_low_fSUM_climo_precip_inLocation of EcocounterCOUNTMEAN_forward_average_speed_meters_per_secondMEAN_reverse_average_speed_meters_per_secondSUM_forward_overnight_trip_countSUM_reverse_overnight_trip_countSUM_forward_unspecified_people_countSUM_reverse_unspecified_people_countSUM_forward_65_plus_people_countSUM_reverse_65_plus_people_countOBJECTID.1HEX GRID_IDSlope Pct CategorySlope MAXpct...2029 Other Race Population2029 Other Race Population: Percent2029 Diversity Index2022 Pop 25+: HS Diploma (ACS 5-Yr)2022 Pop 25+: HS Diploma (ACS 5-Yr): Percent2022 Pop 25+: Some College (ACS 5-Yr)2022 Pop 25+: Some College (ACS 5-Yr): Percent2022 Pop 25+: Assoc Degree (ACS 5-Yr)2022 Pop 25+: Assoc Degree (ACS 5-Yr): Percent2022 Pop 25+: Bach Degree (ACS 5-Yr)2022 Pop 25+: Bach Degree (ACS 5-Yr): Percent2022 Pop 25+: Master`s Deg (ACS 5-Yr)2022 Pop 25+: Master`s Deg (ACS 5-Yr): Percent2022 Pop 25+: Prof Sch Deg (ACS 5-Yr)2022 Pop 25+: Prof Sch Deg (ACS 5-Yr): Percent2022 Pop 25+: Doctorate (ACS 5-Yr)2022 Pop 25+: Doctorate (ACS 5-Yr): Percent2024 Pop Age 25+: High School Diploma2024 Pop Age 25+: High School Diploma: Percent2024 Pop Age 25+: GED2024 Pop Age 25+: GED: Percent2020 Total Population2024 Total Population2024 Population Density2022 Civilian Pop 18+: Veteran (ACS 5-Yr)2022 Civilian Pop 18+: Veteran (ACS 5-Yr): Percent2022 Civilian Pop 18+: Nonveteran (ACS 5-Yr)2022 Civilian Pop 18+: Nonveteran (ACS 5-Yr): Percent2022 Workers 16+: Bicycle (ACS 5-Yr)2022 Workers 16+: Bicycle (ACS 5-Yr): Percent2022 Workers 16+: Walked (ACS 5-Yr)2022 Workers 16+: Walked (ACS 5-Yr): Percent2022 Commute to Work: 15-19 Min (ACS 5-Yr)2022 Commute to Work: 15-19 Min (ACS 5-Yr): Percent2022 Commute to Work: 10-14 Min (ACS 5-Yr)2022 Commute to Work: 10-14 Min (ACS 5-Yr): Percent2022 Commute to Work: 5-9 Min (ACS 5-Yr)2022 Commute to Work: 5-9 Min (ACS 5-Yr): Percent2022 Commute to Work: <5 Min (ACS 5-Yr)2022 Commute to Work: <5 Min (ACS 5-Yr): Percent2022 Avg Commute to Work (ACS 5-Yr)2024 Median Age2024 Median Age: Index2024 Senior Population2024 Senior Population: Percent2020 Multiple Races Pop 35-392020 Multiple Races Pop 35-39: Percent2024 Population Age 0-42024 Population Age 0-4: Percent2022 Poverty Index (ACS 5-Yr)2022 HHs: Inc Below Poverty Level (ACS 5-Yr)2022 HHs: Inc Below Poverty Level (ACS 5-Yr): Percent2022 HHs w/Public Assist Income (ACS 5-Yr)2022 HHs w/Public Assist Income (ACS 5-Yr): Percent2022 Race: White (ACS 5-Yr)2022 Race: White (ACS 5-Yr): Percent2022 Race: Black (ACS 5-Yr)2022 Race: Black (ACS 5-Yr): PercentGRID_ID.2Trail Counter NameSq_MetersAREAShape_LengthShape_AreaSpatial Component 1Spatial Component 2Spatial Component 3Spatial Component 4Spatial Component 5Spatial Component 6Spatial Component 7Spatial Component 8Spatial Component 9Spatial Component 10Spatial Component 11Spatial Component 12Spatial Component 13Spatial Component 14Spatial Component 15Spatial Component 16Spatial Component 17Spatial Component 18Spatial Component 19Spatial Component 20Spatial Component 21Spatial Component 22Spatial Component 23Spatial Component 24Spatial Component 25Spatial Component 26Spatial Component 27Spatial Component 28Spatial Component 29Spatial Component 30Spatial Component 31Spatial Component 32Spatial Component 33Spatial Component 34Spatial Component 35Spatial Component 36Spatial Component 37
0BD-172022-10-17BD-172022-10-23 23:59:002765.01181NaN2022-10-17BD-17_198BD-17_1982022-10-23 23:59:00BD-17_1982022-10-17BD-178134Polygon8134BD-1712 to 25%N Walton - Bentonville489982.325556VBTBD-17_VBT16.163683<Null><Null>922020-10-05 06:00:002020-10-12 05:59:59183730503005Ride,EBikeRide12 to 25%133016150014051690351952056656905107751070125022027530535580094011516512 to 25%BD-17_92VBTBD-17_VBTVBT_92VBT_92922020-10-05 06:00:002020-10-12 05:59:59VBT8843.066.939.00.00004.139276162.92353633.99657074.086262100.00.041.26356067.06006987.30833415.00000019.11747775.447.20.63N Walton - Bentonville653.1540004.02630800030200<Null>BD-1712 to 25%25...241.4039.6907.7320717.78615.2455147.3418816.15332.84272.32867.9000.00161516611043.11289.60120590.40101.1500.0013918.3921027.788611.38486.350.044.111226115.71139.03875.2430213.7171.24176394.58402.15<Null>N Walton - Bentonville489982.3255562604.986301489734.0191180.0157830.0408550.0272910.017883-0.0038950.0135020.021565-0.0198810.026586-0.008986-0.001549-0.014623-0.013211-0.0129380.017165-0.017385-0.0107530.0037790.007025-0.0011570.0142340.0057930.016226-0.0164560.0263060.0071630.0172730.0009700.005897-0.008076-0.005118-0.004417-0.019202-0.001520-0.0046880.012160-0.010374
1BD-172020-10-05BD-172020-10-11 23:59:003050.01837NaN2020-10-05BD-17_92BD-17_922020-10-11 23:59:00BD-17_922020-10-05BD-178134Polygon8134BD-1712 to 25%N Walton - Bentonville489982.325556VBTBD-17_VBT16.163683<Null><Null>922020-10-05 06:00:002020-10-12 05:59:59183730503005Ride,EBikeRide12 to 25%133016150014051690351952056656905107751070125022027530535580094011516512 to 25%BD-17_92VBTBD-17_VBTVBT_92VBT_92922020-10-05 06:00:002020-10-12 05:59:59VBT8843.066.939.00.00004.139276162.92353633.99657074.086262100.00.041.26356067.06006987.30833415.00000019.11747775.447.20.63N Walton - Bentonville653.1540004.02630800030200<Null>BD-1712 to 25%25...241.4039.6907.7320717.78615.2455147.3418816.15332.84272.32867.9000.00161516611043.11289.60120590.40101.1500.0013918.3921027.788611.38486.350.044.111226115.71139.03875.2430213.7171.24176394.58402.15<Null>N Walton - Bentonville489982.3255562604.986301489734.0191180.0157830.0408550.0272910.017883-0.0038950.0135020.021565-0.0198810.026586-0.008986-0.001549-0.014623-0.013211-0.0129380.017165-0.017385-0.0107530.0037790.007025-0.0011570.0142340.0057930.016226-0.0164560.0263060.0071630.0172730.0009700.005897-0.008076-0.005118-0.004417-0.019202-0.001520-0.0046880.012160-0.010374
2BE-152022-04-11BE-152022-04-17 23:59:0020680.03440NaN2022-04-11BE-15_171BE-15_1712022-04-17 23:59:00BE-15_1712022-04-11BE-158442Polygon8442BE-158 to 12%North Trail (RBG) - Bentonville489983.971087VBTBE-15_VBT14.190971<Null><Null>02018-12-31 06:00:002019-01-07 05:59:59303556155565Ride,EBikeRide8 to 12%228530057510523803100011015015052060635770197525751653257501030102014101102158 to 12%BE-15_0VBTBE-15_VBTVBT_0VBT_002018-12-31 06:00:002019-01-07 05:59:59VBT6527.045.026.11.94015.524672235.68964439.35653781.974665100.00.019.90337636.27100964.90000021.72440526.06928646.324.60.67North Trail (RGB) - Bentonville2022.6995542.724752000000<Null>BE-158 to 12%12...72.5243.01914.181914.1853.733828.363022.3953.7332.242614.8610.57264270321.574.6414596.0310.9132.733130.691413.861514.8565.940.034.488259.2614.35197.0491711.2900.0017779.0262.68<Null>North Trail (RBG) - Bentonville489983.9710872604.986127489733.9539930.0199260.0322650.0327380.014478-0.0087400.0085350.028416-0.0181120.023882-0.019235-0.006239-0.0051240.004725-0.0083640.018894-0.011290-0.0148840.0058780.003777-0.0008410.0117530.0007910.009333-0.0194130.0275400.0105740.000171-0.002385-0.005621-0.004850-0.007405-0.004869-0.022160-0.010040-0.0090150.0060140.005778
3BH-262020-12-21BH-262020-12-27 23:59:002820.0668NaN2020-12-21BH-26_103BH-26_1032020-12-27 23:59:00BH-26_1032020-12-21BH-267367Polygon7367BH-261 to 3%Horsebarn - Rogers489989.142339VBTBH-26_VBT2.903954<Null><Null>1522021-11-29 06:00:002021-12-06 05:59:5984224502365Ride,EBikeRide1 to 3%121012559550116012501001001506756803303708308503004002753005656002801901 to 3%BH-26_152VBTBH-26_VBTVBT_152VBT_1521522021-11-29 06:00:002021-12-06 05:59:59VBT7534.063.033.10.50025.713273177.16430931.86082869.994705100.00.030.07688356.16083373.90000020.00000030.41417053.728.90.86Horsebarn - Rogers765.0723685.211579000505075<Null>BH-261 to 3%3...19.0961.700.0000.0000.0000.0000.0000.0000.00228.5700.0001034.400.0000.0000.0000.0000.0000.0000.0000.000.035.089220.0000.00110.00000.0000.0000.0000.00<Null>Horsebarn - Rogers489989.1423392604.986401489734.056717-0.0350470.0160620.023415-0.0106110.023480-0.0089630.0091210.0203000.0012270.0048430.0029970.0028780.009927-0.0011730.0103980.029699-0.005415-0.001269-0.001177-0.000546-0.0303930.003949-0.0150630.0087060.0035190.019769-0.0173290.005003-0.008240-0.009508-0.024286-0.0020290.006402-0.001713-0.0025550.0028570.001643
4BQ-252020-08-17BQ-252020-08-23 23:59:008965.00NaN2020-08-17BQ-25_85BQ-25_852020-08-23 23:59:00BQ-25_852020-08-17BQ-257444Polygon7444BQ-251 to 3%Foerster Park - Rogers490003.801737ROGBQ-25_ROG1.669559<Null><Null>2172023-02-27 06:00:002023-03-06 05:59:59051004810Ride,EBikeRide1 to 3%2175279023030195528152200151270130077512151470187062597033555098011504405501 to 3%BQ-25_217ROGBQ-25_ROGROG_217ROG_2172172023-02-27 06:00:002023-03-06 05:59:59ROG7530.057.925.02.05009.574210210.68573316.55799962.636715100.00.025.31819550.98758873.90000030.00000039.00000056.630.70.89Foerster Park - Rogers1484.2724324.434122022000245110<Null>BQ-251 to 3%3...26120.4277.824934.5816623.06547.50446.1110.1460.8300.0029137.07779.8111771228447.5424.6586295.4600.0020.33529.5110619.385910.79213.840.037.39519916.21147.95725.861476218.2900.0084259.3400.00<Null>Foerster Park - Rogers490003.8017372604.986401489734.056717-0.024320-0.0235650.041983-0.0129290.014677-0.0209800.0103920.015039-0.0103220.0018680.020474-0.002792-0.002008-0.0098090.009287-0.0024580.007774-0.000434-0.0059060.0079070.0040700.003692-0.015666-0.014223-0.013315-0.0056380.0116270.007847-0.0128790.0228070.022764-0.010053-0.0109540.022039-0.0196980.014580-0.015723
\n", + "

5 rows × 278 columns

\n", + "
" + ], + "text/plain": [ + " GRID_ID.1 STARTweek_stravaDATE_cast GRID_ID ENDweek_stravaDATE_cast \\\n", + "0 BD-17 2022-10-17 BD-17 2022-10-23 23:59:00 \n", + "1 BD-17 2020-10-05 BD-17 2020-10-11 23:59:00 \n", + "2 BE-15 2022-04-11 BE-15 2022-04-17 23:59:00 \n", + "3 BH-26 2020-12-21 BH-26 2020-12-27 23:59:00 \n", + "4 BQ-25 2020-08-17 BQ-25 2020-08-23 23:59:00 \n", + "\n", + " SUM_total_trip_count EcoCntr_weekly_SUM MAX_slopePct STARTweek_Time \\\n", + "0 2765.0 1181 NaN 2022-10-17 \n", + "1 3050.0 1837 NaN 2020-10-05 \n", + "2 20680.0 3440 NaN 2022-04-11 \n", + "3 2820.0 668 NaN 2020-12-21 \n", + "4 8965.0 0 NaN 2020-08-17 \n", + "\n", + " HEXid_weekID HEXid_WeekID ENDweek_Time week_id week_start \\\n", + "0 BD-17_198 BD-17_198 2022-10-23 23:59:00 BD-17_198 2022-10-17 \n", + "1 BD-17_92 BD-17_92 2020-10-11 23:59:00 BD-17_92 2020-10-05 \n", + "2 BE-15_171 BE-15_171 2022-04-17 23:59:00 BE-15_171 2022-04-11 \n", + "3 BH-26_103 BH-26_103 2020-12-27 23:59:00 BH-26_103 2020-12-21 \n", + "4 BQ-25_85 BQ-25_85 2020-08-23 23:59:00 BQ-25_85 2020-08-17 \n", + "\n", + " _GRID_JOIN_ OBJECTID * Shape * SOURCE_ID GRID_ID_cov slopePct \\\n", + "0 BD-17 8134 Polygon 8134 BD-17 12 to 25% \n", + "1 BD-17 8134 Polygon 8134 BD-17 12 to 25% \n", + "2 BE-15 8442 Polygon 8442 BE-15 8 to 12% \n", + "3 BH-26 7367 Polygon 7367 BH-26 1 to 3% \n", + "4 BQ-25 7444 Polygon 7444 BQ-25 1 to 3% \n", + "\n", + " TrailCounter SqMeters ASOSID AR_asosID_hexID \\\n", + "0 N Walton - Bentonville 489982.325556 VBT BD-17_VBT \n", + "1 N Walton - Bentonville 489982.325556 VBT BD-17_VBT \n", + "2 North Trail (RBG) - Bentonville 489983.971087 VBT BE-15_VBT \n", + "3 Horsebarn - Rogers 489989.142339 VBT BH-26_VBT \n", + "4 Foerster Park - Rogers 490003.801737 ROG BQ-25_ROG \n", + "\n", + " Slope_10m_MEAN OBJECTID GRID_ID.1_cov TIMESTEP_stravaDATE_cast \\\n", + "0 16.163683 92 \n", + "1 16.163683 92 \n", + "2 14.190971 0 \n", + "3 2.903954 152 \n", + "4 1.669559 217 \n", + "\n", + " STARTTIME_stravaDATE_cast ENDTIME_stravaDATE_cast \\\n", + "0 2020-10-05 06:00:00 2020-10-12 05:59:59 \n", + "1 2020-10-05 06:00:00 2020-10-12 05:59:59 \n", + "2 2018-12-31 06:00:00 2019-01-07 05:59:59 \n", + "3 2021-11-29 06:00:00 2021-12-06 05:59:59 \n", + "4 2023-02-27 06:00:00 2023-03-06 05:59:59 \n", + "\n", + " SUM_EcoCounter_WeeklyAggregation SUM_total_trip_count_cov SUM_ride_count \\\n", + "0 1837 3050 3005 \n", + "1 1837 3050 3005 \n", + "2 3035 5615 5565 \n", + "3 842 2450 2365 \n", + "4 0 5100 4810 \n", + "\n", + " MAX_activity_type MAX_slopePct_cov SUM_forward_people_count \\\n", + "0 Ride,EBikeRide 12 to 25% 1330 \n", + "1 Ride,EBikeRide 12 to 25% 1330 \n", + "2 Ride,EBikeRide 8 to 12% 2285 \n", + "3 Ride,EBikeRide 1 to 3% 1210 \n", + "4 Ride,EBikeRide 1 to 3% 2175 \n", + "\n", + " SUM_reverse_people_count SUM_forward_commute_trip_count \\\n", + "0 1615 0 \n", + "1 1615 0 \n", + "2 3005 75 \n", + "3 1255 95 \n", + "4 2790 230 \n", + "\n", + " SUM_reverse_commute_trip_count SUM_forward_leisure_trip_count \\\n", + "0 0 1405 \n", + "1 0 1405 \n", + "2 105 2380 \n", + "3 50 1160 \n", + "4 30 1955 \n", + "\n", + " SUM_reverse_leisure_trip_count SUM_ebike_ride_count \\\n", + "0 1690 35 \n", + "1 1690 35 \n", + "2 3100 0 \n", + "3 1250 100 \n", + "4 2815 220 \n", + "\n", + " SUM_forward_morning_trip_count SUM_reverse_morning_trip_count \\\n", + "0 195 205 \n", + "1 195 205 \n", + "2 110 150 \n", + "3 100 150 \n", + "4 0 15 \n", + "\n", + " SUM_forward_midday_trip_count SUM_reverse_midday_trip_count \\\n", + "0 665 690 \n", + "1 665 690 \n", + "2 1505 2060 \n", + "3 675 680 \n", + "4 1270 1300 \n", + "\n", + " SUM_forward_evening_trip_count SUM_reverse_evening_trip_count \\\n", + "0 510 775 \n", + "1 510 775 \n", + "2 635 770 \n", + "3 330 370 \n", + "4 775 1215 \n", + "\n", + " SUM_forward_male_people_count SUM_reverse_male_people_count \\\n", + "0 1070 1250 \n", + "1 1070 1250 \n", + "2 1975 2575 \n", + "3 830 850 \n", + "4 1470 1870 \n", + "\n", + " SUM_forward_female_people_count SUM_reverse_female_people_count \\\n", + "0 220 275 \n", + "1 220 275 \n", + "2 165 325 \n", + "3 300 400 \n", + "4 625 970 \n", + "\n", + " SUM_forward_18_34_people_count SUM_reverse_18_34_people_count \\\n", + "0 305 355 \n", + "1 305 355 \n", + "2 750 1030 \n", + "3 275 300 \n", + "4 335 550 \n", + "\n", + " SUM_forward_35_54_people_count SUM_reverse_35_54_people_count \\\n", + "0 800 940 \n", + "1 800 940 \n", + "2 1020 1410 \n", + "3 565 600 \n", + "4 980 1150 \n", + "\n", + " SUM_forward_55_64_people_count SUM_reverse_55_64_people_count \\\n", + "0 115 165 \n", + "1 115 165 \n", + "2 110 215 \n", + "3 280 190 \n", + "4 440 550 \n", + "\n", + " MIN_slopePct hexid_weekid_join ASOS_ID HexIDstation ASOS ID WeekID Join \\\n", + "0 12 to 25% BD-17_92 VBT BD-17_VBT VBT_92 \n", + "1 12 to 25% BD-17_92 VBT BD-17_VBT VBT_92 \n", + "2 8 to 12% BE-15_0 VBT BE-15_VBT VBT_0 \n", + "3 1 to 3% BH-26_152 VBT BH-26_VBT VBT_152 \n", + "4 1 to 3% BQ-25_217 ROG BQ-25_ROG ROG_217 \n", + "\n", + " asosID_encodeSTEP TIMESTEP_weekID STARTTIME_Weekday \\\n", + "0 VBT_92 92 2020-10-05 06:00:00 \n", + "1 VBT_92 92 2020-10-05 06:00:00 \n", + "2 VBT_0 0 2018-12-31 06:00:00 \n", + "3 VBT_152 152 2021-11-29 06:00:00 \n", + "4 ROG_217 217 2023-02-27 06:00:00 \n", + "\n", + " ENDTIME_Weekday station MAX_max_temp_f MIN_min_temp_f \\\n", + "0 2020-10-12 05:59:59 VBT 88 43.0 \n", + "1 2020-10-12 05:59:59 VBT 88 43.0 \n", + "2 2019-01-07 05:59:59 VBT 65 27.0 \n", + "3 2021-12-06 05:59:59 VBT 75 34.0 \n", + "4 2023-03-06 05:59:59 ROG 75 30.0 \n", + "\n", + " MAX_max_dewpoint_f MIN_min_dewpoint_f SUM_precip_in \\\n", + "0 66.9 39.0 0.0000 \n", + "1 66.9 39.0 0.0000 \n", + "2 45.0 26.1 1.9401 \n", + "3 63.0 33.1 0.5002 \n", + "4 57.9 25.0 2.0500 \n", + "\n", + " MEAN_avg_wind_speed_kts MEAN_avg_wind_drct MIN_min_rh MEAN_avg_rh \\\n", + "0 4.139276 162.923536 33.996570 74.086262 \n", + "1 4.139276 162.923536 33.996570 74.086262 \n", + "2 5.524672 235.689644 39.356537 81.974665 \n", + "3 5.713273 177.164309 31.860828 69.994705 \n", + "4 9.574210 210.685733 16.557999 62.636715 \n", + "\n", + " MAX_max_rh SUM_snow_in MIN_min_feel MEAN_avg_feel MAX_max_feel \\\n", + "0 100.0 0.0 41.263560 67.060069 87.308334 \n", + "1 100.0 0.0 41.263560 67.060069 87.308334 \n", + "2 100.0 0.0 19.903376 36.271009 64.900000 \n", + "3 100.0 0.0 30.076883 56.160833 73.900000 \n", + "4 100.0 0.0 25.318195 50.987588 73.900000 \n", + "\n", + " MAX_max_wind_speed_kts MAX_max_wind_gust_kts MAX_climo_high_f \\\n", + "0 15.000000 19.117477 75.4 \n", + "1 15.000000 19.117477 75.4 \n", + "2 21.724405 26.069286 46.3 \n", + "3 20.000000 30.414170 53.7 \n", + "4 30.000000 39.000000 56.6 \n", + "\n", + " MIN_climo_low_f SUM_climo_precip_in Location of Ecocounter \\\n", + "0 47.2 0.63 N Walton - Bentonville \n", + "1 47.2 0.63 N Walton - Bentonville \n", + "2 24.6 0.67 North Trail (RGB) - Bentonville \n", + "3 28.9 0.86 Horsebarn - Rogers \n", + "4 30.7 0.89 Foerster Park - Rogers \n", + "\n", + " COUNT MEAN_forward_average_speed_meters_per_second \\\n", + "0 65 3.154000 \n", + "1 65 3.154000 \n", + "2 202 2.699554 \n", + "3 76 5.072368 \n", + "4 148 4.272432 \n", + "\n", + " MEAN_reverse_average_speed_meters_per_second \\\n", + "0 4.026308 \n", + "1 4.026308 \n", + "2 2.724752 \n", + "3 5.211579 \n", + "4 4.434122 \n", + "\n", + " SUM_forward_overnight_trip_count SUM_reverse_overnight_trip_count \\\n", + "0 0 0 \n", + "1 0 0 \n", + "2 0 0 \n", + "3 0 0 \n", + "4 0 220 \n", + "\n", + " SUM_forward_unspecified_people_count SUM_reverse_unspecified_people_count \\\n", + "0 0 30 \n", + "1 0 30 \n", + "2 0 0 \n", + "3 0 50 \n", + "4 0 0 \n", + "\n", + " SUM_forward_65_plus_people_count SUM_reverse_65_plus_people_count \\\n", + "0 20 0 \n", + "1 20 0 \n", + "2 0 0 \n", + "3 50 75 \n", + "4 245 110 \n", + "\n", + " OBJECTID.1 HEX GRID_ID Slope Pct Category Slope MAXpct ... \\\n", + "0 BD-17 12 to 25% 25 ... \n", + "1 BD-17 12 to 25% 25 ... \n", + "2 BE-15 8 to 12% 12 ... \n", + "3 BH-26 1 to 3% 3 ... \n", + "4 BQ-25 1 to 3% 3 ... \n", + "\n", + " 2029 Other Race Population 2029 Other Race Population: Percent \\\n", + "0 24 1.40 \n", + "1 24 1.40 \n", + "2 7 2.52 \n", + "3 1 9.09 \n", + "4 261 20.42 \n", + "\n", + " 2029 Diversity Index 2022 Pop 25+: HS Diploma (ACS 5-Yr) \\\n", + "0 39.6 90 \n", + "1 39.6 90 \n", + "2 43.0 19 \n", + "3 61.7 0 \n", + "4 77.8 249 \n", + "\n", + " 2022 Pop 25+: HS Diploma (ACS 5-Yr): Percent \\\n", + "0 7.73 \n", + "1 7.73 \n", + "2 14.18 \n", + "3 0.00 \n", + "4 34.58 \n", + "\n", + " 2022 Pop 25+: Some College (ACS 5-Yr) \\\n", + "0 207 \n", + "1 207 \n", + "2 19 \n", + "3 0 \n", + "4 166 \n", + "\n", + " 2022 Pop 25+: Some College (ACS 5-Yr): Percent \\\n", + "0 17.78 \n", + "1 17.78 \n", + "2 14.18 \n", + "3 0.00 \n", + "4 23.06 \n", + "\n", + " 2022 Pop 25+: Assoc Degree (ACS 5-Yr) \\\n", + "0 61 \n", + "1 61 \n", + "2 5 \n", + "3 0 \n", + "4 54 \n", + "\n", + " 2022 Pop 25+: Assoc Degree (ACS 5-Yr): Percent \\\n", + "0 5.24 \n", + "1 5.24 \n", + "2 3.73 \n", + "3 0.00 \n", + "4 7.50 \n", + "\n", + " 2022 Pop 25+: Bach Degree (ACS 5-Yr) \\\n", + "0 551 \n", + "1 551 \n", + "2 38 \n", + "3 0 \n", + "4 44 \n", + "\n", + " 2022 Pop 25+: Bach Degree (ACS 5-Yr): Percent \\\n", + "0 47.34 \n", + "1 47.34 \n", + "2 28.36 \n", + "3 0.00 \n", + "4 6.11 \n", + "\n", + " 2022 Pop 25+: Master`s Deg (ACS 5-Yr) \\\n", + "0 188 \n", + "1 188 \n", + "2 30 \n", + "3 0 \n", + "4 1 \n", + "\n", + " 2022 Pop 25+: Master`s Deg (ACS 5-Yr): Percent \\\n", + "0 16.15 \n", + "1 16.15 \n", + "2 22.39 \n", + "3 0.00 \n", + "4 0.14 \n", + "\n", + " 2022 Pop 25+: Prof Sch Deg (ACS 5-Yr) \\\n", + "0 33 \n", + "1 33 \n", + "2 5 \n", + "3 0 \n", + "4 6 \n", + "\n", + " 2022 Pop 25+: Prof Sch Deg (ACS 5-Yr): Percent \\\n", + "0 2.84 \n", + "1 2.84 \n", + "2 3.73 \n", + "3 0.00 \n", + "4 0.83 \n", + "\n", + " 2022 Pop 25+: Doctorate (ACS 5-Yr) \\\n", + "0 27 \n", + "1 27 \n", + "2 3 \n", + "3 0 \n", + "4 0 \n", + "\n", + " 2022 Pop 25+: Doctorate (ACS 5-Yr): Percent \\\n", + "0 2.32 \n", + "1 2.32 \n", + "2 2.24 \n", + "3 0.00 \n", + "4 0.00 \n", + "\n", + " 2024 Pop Age 25+: High School Diploma \\\n", + "0 86 \n", + "1 86 \n", + "2 26 \n", + "3 2 \n", + "4 291 \n", + "\n", + " 2024 Pop Age 25+: High School Diploma: Percent 2024 Pop Age 25+: GED \\\n", + "0 7.90 0 \n", + "1 7.90 0 \n", + "2 14.86 1 \n", + "3 28.57 0 \n", + "4 37.07 77 \n", + "\n", + " 2024 Pop Age 25+: GED: Percent 2020 Total Population \\\n", + "0 0.00 1615 \n", + "1 0.00 1615 \n", + "2 0.57 264 \n", + "3 0.00 0 \n", + "4 9.81 1177 \n", + "\n", + " 2024 Total Population 2024 Population Density \\\n", + "0 1661 1043.1 \n", + "1 1661 1043.1 \n", + "2 270 321.5 \n", + "3 10 34.4 \n", + "4 1228 447.5 \n", + "\n", + " 2022 Civilian Pop 18+: Veteran (ACS 5-Yr) \\\n", + "0 128 \n", + "1 128 \n", + "2 7 \n", + "3 0 \n", + "4 42 \n", + "\n", + " 2022 Civilian Pop 18+: Veteran (ACS 5-Yr): Percent \\\n", + "0 9.60 \n", + "1 9.60 \n", + "2 4.64 \n", + "3 0.00 \n", + "4 4.65 \n", + "\n", + " 2022 Civilian Pop 18+: Nonveteran (ACS 5-Yr) \\\n", + "0 1205 \n", + "1 1205 \n", + "2 145 \n", + "3 0 \n", + "4 862 \n", + "\n", + " 2022 Civilian Pop 18+: Nonveteran (ACS 5-Yr): Percent \\\n", + "0 90.40 \n", + "1 90.40 \n", + "2 96.03 \n", + "3 0.00 \n", + "4 95.46 \n", + "\n", + " 2022 Workers 16+: Bicycle (ACS 5-Yr) \\\n", + "0 10 \n", + "1 10 \n", + "2 1 \n", + "3 0 \n", + "4 0 \n", + "\n", + " 2022 Workers 16+: Bicycle (ACS 5-Yr): Percent \\\n", + "0 1.15 \n", + "1 1.15 \n", + "2 0.91 \n", + "3 0.00 \n", + "4 0.00 \n", + "\n", + " 2022 Workers 16+: Walked (ACS 5-Yr) \\\n", + "0 0 \n", + "1 0 \n", + "2 3 \n", + "3 0 \n", + "4 2 \n", + "\n", + " 2022 Workers 16+: Walked (ACS 5-Yr): Percent \\\n", + "0 0.00 \n", + "1 0.00 \n", + "2 2.73 \n", + "3 0.00 \n", + "4 0.33 \n", + "\n", + " 2022 Commute to Work: 15-19 Min (ACS 5-Yr) \\\n", + "0 139 \n", + "1 139 \n", + "2 31 \n", + "3 0 \n", + "4 52 \n", + "\n", + " 2022 Commute to Work: 15-19 Min (ACS 5-Yr): Percent \\\n", + "0 18.39 \n", + "1 18.39 \n", + "2 30.69 \n", + "3 0.00 \n", + "4 9.51 \n", + "\n", + " 2022 Commute to Work: 10-14 Min (ACS 5-Yr) \\\n", + "0 210 \n", + "1 210 \n", + "2 14 \n", + "3 0 \n", + "4 106 \n", + "\n", + " 2022 Commute to Work: 10-14 Min (ACS 5-Yr): Percent \\\n", + "0 27.78 \n", + "1 27.78 \n", + "2 13.86 \n", + "3 0.00 \n", + "4 19.38 \n", + "\n", + " 2022 Commute to Work: 5-9 Min (ACS 5-Yr) \\\n", + "0 86 \n", + "1 86 \n", + "2 15 \n", + "3 0 \n", + "4 59 \n", + "\n", + " 2022 Commute to Work: 5-9 Min (ACS 5-Yr): Percent \\\n", + "0 11.38 \n", + "1 11.38 \n", + "2 14.85 \n", + "3 0.00 \n", + "4 10.79 \n", + "\n", + " 2022 Commute to Work: <5 Min (ACS 5-Yr) \\\n", + "0 48 \n", + "1 48 \n", + "2 6 \n", + "3 0 \n", + "4 21 \n", + "\n", + " 2022 Commute to Work: <5 Min (ACS 5-Yr): Percent \\\n", + "0 6.35 \n", + "1 6.35 \n", + "2 5.94 \n", + "3 0.00 \n", + "4 3.84 \n", + "\n", + " 2022 Avg Commute to Work (ACS 5-Yr) 2024 Median Age \\\n", + "0 0.0 44.1 \n", + "1 0.0 44.1 \n", + "2 0.0 34.4 \n", + "3 0.0 35.0 \n", + "4 0.0 37.3 \n", + "\n", + " 2024 Median Age: Index 2024 Senior Population \\\n", + "0 112 261 \n", + "1 112 261 \n", + "2 88 25 \n", + "3 89 2 \n", + "4 95 199 \n", + "\n", + " 2024 Senior Population: Percent 2020 Multiple Races Pop 35-39 \\\n", + "0 15.71 13 \n", + "1 15.71 13 \n", + "2 9.26 1 \n", + "3 20.00 0 \n", + "4 16.21 14 \n", + "\n", + " 2020 Multiple Races Pop 35-39: Percent 2024 Population Age 0-4 \\\n", + "0 9.03 87 \n", + "1 9.03 87 \n", + "2 4.35 19 \n", + "3 0.00 1 \n", + "4 7.95 72 \n", + "\n", + " 2024 Population Age 0-4: Percent 2022 Poverty Index (ACS 5-Yr) \\\n", + "0 5.24 30 \n", + "1 5.24 30 \n", + "2 7.04 91 \n", + "3 10.00 0 \n", + "4 5.86 147 \n", + "\n", + " 2022 HHs: Inc Below Poverty Level (ACS 5-Yr) \\\n", + "0 21 \n", + "1 21 \n", + "2 7 \n", + "3 0 \n", + "4 62 \n", + "\n", + " 2022 HHs: Inc Below Poverty Level (ACS 5-Yr): Percent \\\n", + "0 3.71 \n", + "1 3.71 \n", + "2 11.29 \n", + "3 0.00 \n", + "4 18.29 \n", + "\n", + " 2022 HHs w/Public Assist Income (ACS 5-Yr) \\\n", + "0 7 \n", + "1 7 \n", + "2 0 \n", + "3 0 \n", + "4 0 \n", + "\n", + " 2022 HHs w/Public Assist Income (ACS 5-Yr): Percent \\\n", + "0 1.24 \n", + "1 1.24 \n", + "2 0.00 \n", + "3 0.00 \n", + "4 0.00 \n", + "\n", + " 2022 Race: White (ACS 5-Yr) 2022 Race: White (ACS 5-Yr): Percent \\\n", + "0 1763 94.58 \n", + "1 1763 94.58 \n", + "2 177 79.02 \n", + "3 0 0.00 \n", + "4 842 59.34 \n", + "\n", + " 2022 Race: Black (ACS 5-Yr) 2022 Race: Black (ACS 5-Yr): Percent \\\n", + "0 40 2.15 \n", + "1 40 2.15 \n", + "2 6 2.68 \n", + "3 0 0.00 \n", + "4 0 0.00 \n", + "\n", + " GRID_ID.2 Trail Counter Name Sq_MetersAREA Shape_Length \\\n", + "0 N Walton - Bentonville 489982.325556 2604.986301 \n", + "1 N Walton - Bentonville 489982.325556 2604.986301 \n", + "2 North Trail (RBG) - Bentonville 489983.971087 2604.986127 \n", + "3 Horsebarn - Rogers 489989.142339 2604.986401 \n", + "4 Foerster Park - Rogers 490003.801737 2604.986401 \n", + "\n", + " Shape_Area Spatial Component 1 Spatial Component 2 \\\n", + "0 489734.019118 0.015783 0.040855 \n", + "1 489734.019118 0.015783 0.040855 \n", + "2 489733.953993 0.019926 0.032265 \n", + "3 489734.056717 -0.035047 0.016062 \n", + "4 489734.056717 -0.024320 -0.023565 \n", + "\n", + " Spatial Component 3 Spatial Component 4 Spatial Component 5 \\\n", + "0 0.027291 0.017883 -0.003895 \n", + "1 0.027291 0.017883 -0.003895 \n", + "2 0.032738 0.014478 -0.008740 \n", + "3 0.023415 -0.010611 0.023480 \n", + "4 0.041983 -0.012929 0.014677 \n", + "\n", + " Spatial Component 6 Spatial Component 7 Spatial Component 8 \\\n", + "0 0.013502 0.021565 -0.019881 \n", + "1 0.013502 0.021565 -0.019881 \n", + "2 0.008535 0.028416 -0.018112 \n", + "3 -0.008963 0.009121 0.020300 \n", + "4 -0.020980 0.010392 0.015039 \n", + "\n", + " Spatial Component 9 Spatial Component 10 Spatial Component 11 \\\n", + "0 0.026586 -0.008986 -0.001549 \n", + "1 0.026586 -0.008986 -0.001549 \n", + "2 0.023882 -0.019235 -0.006239 \n", + "3 0.001227 0.004843 0.002997 \n", + "4 -0.010322 0.001868 0.020474 \n", + "\n", + " Spatial Component 12 Spatial Component 13 Spatial Component 14 \\\n", + "0 -0.014623 -0.013211 -0.012938 \n", + "1 -0.014623 -0.013211 -0.012938 \n", + "2 -0.005124 0.004725 -0.008364 \n", + "3 0.002878 0.009927 -0.001173 \n", + "4 -0.002792 -0.002008 -0.009809 \n", + "\n", + " Spatial Component 15 Spatial Component 16 Spatial Component 17 \\\n", + "0 0.017165 -0.017385 -0.010753 \n", + "1 0.017165 -0.017385 -0.010753 \n", + "2 0.018894 -0.011290 -0.014884 \n", + "3 0.010398 0.029699 -0.005415 \n", + "4 0.009287 -0.002458 0.007774 \n", + "\n", + " Spatial Component 18 Spatial Component 19 Spatial Component 20 \\\n", + "0 0.003779 0.007025 -0.001157 \n", + "1 0.003779 0.007025 -0.001157 \n", + "2 0.005878 0.003777 -0.000841 \n", + "3 -0.001269 -0.001177 -0.000546 \n", + "4 -0.000434 -0.005906 0.007907 \n", + "\n", + " Spatial Component 21 Spatial Component 22 Spatial Component 23 \\\n", + "0 0.014234 0.005793 0.016226 \n", + "1 0.014234 0.005793 0.016226 \n", + "2 0.011753 0.000791 0.009333 \n", + "3 -0.030393 0.003949 -0.015063 \n", + "4 0.004070 0.003692 -0.015666 \n", + "\n", + " Spatial Component 24 Spatial Component 25 Spatial Component 26 \\\n", + "0 -0.016456 0.026306 0.007163 \n", + "1 -0.016456 0.026306 0.007163 \n", + "2 -0.019413 0.027540 0.010574 \n", + "3 0.008706 0.003519 0.019769 \n", + "4 -0.014223 -0.013315 -0.005638 \n", + "\n", + " Spatial Component 27 Spatial Component 28 Spatial Component 29 \\\n", + "0 0.017273 0.000970 0.005897 \n", + "1 0.017273 0.000970 0.005897 \n", + "2 0.000171 -0.002385 -0.005621 \n", + "3 -0.017329 0.005003 -0.008240 \n", + "4 0.011627 0.007847 -0.012879 \n", + "\n", + " Spatial Component 30 Spatial Component 31 Spatial Component 32 \\\n", + "0 -0.008076 -0.005118 -0.004417 \n", + "1 -0.008076 -0.005118 -0.004417 \n", + "2 -0.004850 -0.007405 -0.004869 \n", + "3 -0.009508 -0.024286 -0.002029 \n", + "4 0.022807 0.022764 -0.010053 \n", + "\n", + " Spatial Component 33 Spatial Component 34 Spatial Component 35 \\\n", + "0 -0.019202 -0.001520 -0.004688 \n", + "1 -0.019202 -0.001520 -0.004688 \n", + "2 -0.022160 -0.010040 -0.009015 \n", + "3 0.006402 -0.001713 -0.002555 \n", + "4 -0.010954 0.022039 -0.019698 \n", + "\n", + " Spatial Component 36 Spatial Component 37 \n", + "0 0.012160 -0.010374 \n", + "1 0.012160 -0.010374 \n", + "2 0.006014 0.005778 \n", + "3 0.002857 0.001643 \n", + "4 0.014580 -0.015723 \n", + "\n", + "[5 rows x 278 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Path to merged CSV\n", + "DATA_PATH = \"../processed_data/weekly_with_covariates.csv\"\n", + "data = pd.read_csv(DATA_PATH, parse_dates=[\"week_start\"], low_memory=False)\n", + "print(\"Loaded:\", data.shape)\n", + "\n", + "# Sanity checks\n", + "assert \"EcoCntr_weekly_SUM\" in data.columns, \"EcoCntr_weekly_SUM not found.\"\n", + "assert \"SUM_total_trip_count\" in data.columns, \"SUM_total_trip_count not found.\"\n", + "\n", + "data.head()" + ] + }, + { + "cell_type": "markdown", + "id": "ed45dea3", + "metadata": {}, + "source": [ + "## 2) Build predictors" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "94819e39", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Rows after minimal filter: 3377\n", + "Predictor count (incl. Strava): 263\n" + ] + } + ], + "source": [ + "# Exclude ID/date/geometry columns\n", + "core_cols = [\n", + " \"week_id\", \"week_start\", \"EcoCntr_weekly_SUM\",\n", + " \"SUM_total_trip_count\", \"GRID_ID\", \"GRID_ID.1\",\n", + " \"HEXid_WeekID\", \"HEXid_weekID\", \"STARTweek_Time\", \"ENDweek_Time\",\n", + " \"STARTweek_stravaDATE_cast\", \"ENDweek_stravaDATE_cast\",\n", + " \"_GRID_JOIN_\", \"OBJECTID *\", \"Shape *\", \"OBJECTID\", \"Shape\"\n", + "]\n", + "\n", + "predictors = [\"SUM_total_trip_count\"] + [c for c in data.columns if c not in core_cols]\n", + "\n", + "# Coerce numerics (non-numeric -> NaN)\n", + "for c in predictors:\n", + " data[c] = pd.to_numeric(data[c], errors=\"coerce\")\n", + "\n", + "# y and X (minimal row filter: target + main predictor must exist)\n", + "y = pd.to_numeric(data[\"EcoCntr_weekly_SUM\"], errors=\"coerce\")\n", + "X = data[predictors].copy()\n", + "mask = (~y.isna()) & (~X[\"SUM_total_trip_count\"].isna())\n", + "X = X.loc[mask].copy()\n", + "y = y.loc[mask].copy()\n", + "\n", + "print(\"Rows after minimal filter:\", X.shape[0])\n", + "print(\"Predictor count (incl. Strava):\", len(predictors))" + ] + }, + { + "cell_type": "markdown", + "id": "0588a328", + "metadata": {}, + "source": [ + "## 3) Drop very sparse columns (>80% NaN)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "9e5b3b76", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predictors retained after sparsity filter: 228\n" + ] + } + ], + "source": [ + "# Compute NaN fraction per column, drop columns with >80% NaN\n", + "nan_frac = X.isna().mean()\n", + "keep_cols = nan_frac[nan_frac <= 0.80].index.tolist()\n", + "X = X[keep_cols].copy()\n", + "print(\"Predictors retained after sparsity filter:\", len(keep_cols))" + ] + }, + { + "cell_type": "markdown", + "id": "861533bd", + "metadata": {}, + "source": [ + "## 4) Train/test split + mean imputation (fit on train)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "0c16cf4d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train/Test shapes post-impute: (2701, 228) (676, 228)\n" + ] + } + ], + "source": [ + "# Split\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " X, y, test_size=0.2, random_state=42\n", + ")\n", + "\n", + "# Imputer fit on train only (no leakage)\n", + "imputer = SimpleImputer(strategy=\"mean\")\n", + "X_train_imp = pd.DataFrame(imputer.fit_transform(X_train), columns=X_train.columns, index=X_train.index)\n", + "X_test_imp = pd.DataFrame(imputer.transform(X_test), columns=X_test.columns, index=X_test.index)\n", + "\n", + "print(\"Train/Test shapes post-impute:\", X_train_imp.shape, X_test_imp.shape)" + ] + }, + { + "cell_type": "markdown", + "id": "42e1260b", + "metadata": {}, + "source": [ + "## 5) Remove constant and duplicate columns (reduce multicollinearity)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "cea1af06", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predictors retained after collinearity filter: 222\n" + ] + } + ], + "source": [ + "# Drop constant columns\n", + "nonconst_cols = X_train_imp.columns[X_train_imp.nunique() > 1]\n", + "X_train_imp = X_train_imp[nonconst_cols]\n", + "X_test_imp = X_test_imp[nonconst_cols]\n", + "\n", + "# Drop duplicate columns\n", + "X_train_imp = X_train_imp.loc[:, ~X_train_imp.T.duplicated()]\n", + "# Align test to train columns\n", + "X_test_imp = X_test_imp[X_train_imp.columns]\n", + "\n", + "print(f\"Predictors retained after collinearity filter: {len(X_train_imp.columns)}\")" + ] + }, + { + "cell_type": "markdown", + "id": "2cf54ae9", + "metadata": {}, + "source": [ + "## 6) Fit OLS (HC3) and evaluate" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "52df2448", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " OLS Regression Results \n", + "==============================================================================\n", + "Dep. Variable: EcoCntr_weekly_SUM R-squared: 0.490\n", + "Model: OLS Adj. R-squared: 0.487\n", + "Method: Least Squares F-statistic: 728.0\n", + "Date: Wed, 06 Aug 2025 Prob (F-statistic): 0.00\n", + "Time: 01:24:50 Log-Likelihood: -23860.\n", + "No. Observations: 2701 AIC: 4.775e+04\n", + "Df Residuals: 2687 BIC: 4.783e+04\n", + "Df Model: 13 \n", + "Covariance Type: HC3 \n", + "=========================================================================================================================\n", + " coef std err z P>|z| [0.025 0.975]\n", + "-------------------------------------------------------------------------------------------------------------------------\n", + "const -1.067e-07 2.08e-08 -5.120 0.000 -1.47e-07 -6.58e-08\n", + "SUM_total_trip_count 0.1611 0.007 21.878 0.000 0.147 0.175\n", + "SOURCE_ID 0.0500 0.010 5.182 0.000 0.031 0.069\n", + "SqMeters -0.0522 0.010 -5.123 0.000 -0.072 -0.032\n", + "Slope_10m_MEAN 0.0005 4.53e-05 12.087 0.000 0.000 0.001\n", + "TIMESTEP_stravaDATE_cast -0.0029 0.002 -1.624 0.104 -0.006 0.001\n", + "SUM_EcoCounter_WeeklyAggregation 0.3718 0.024 15.437 0.000 0.325 0.419\n", + "SUM_total_trip_count_cov -0.0464 0.007 -6.723 0.000 -0.060 -0.033\n", + "SUM_ride_count -0.0433 0.008 -5.682 0.000 -0.058 -0.028\n", + "SUM_forward_people_count 0.0029 0.006 0.496 0.620 -0.009 0.015\n", + "SUM_reverse_people_count -0.0662 0.007 -8.985 0.000 -0.081 -0.052\n", + "SUM_forward_commute_trip_count -0.0079 0.000 -25.272 0.000 -0.009 -0.007\n", + "SUM_reverse_commute_trip_count 0.0022 0.000 4.860 0.000 0.001 0.003\n", + "SUM_forward_leisure_trip_count 0.0282 0.009 3.297 0.001 0.011 0.045\n", + "SUM_reverse_leisure_trip_count -0.0674 0.007 -9.160 0.000 -0.082 -0.053\n", + "SUM_ebike_ride_count -0.0054 0.001 -3.631 0.000 -0.008 -0.002\n", + "SUM_forward_morning_trip_count 0.0325 0.007 4.525 0.000 0.018 0.047\n", + "SUM_reverse_morning_trip_count 0.0326 0.007 4.723 0.000 0.019 0.046\n", + "SUM_forward_midday_trip_count -0.0319 0.005 -6.164 0.000 -0.042 -0.022\n", + "SUM_reverse_midday_trip_count -0.0594 0.015 -3.939 0.000 -0.089 -0.030\n", + "SUM_forward_evening_trip_count 0.0128 0.005 2.817 0.005 0.004 0.022\n", + "SUM_reverse_evening_trip_count -0.0372 0.003 -12.670 0.000 -0.043 -0.031\n", + "SUM_forward_male_people_count 0.0012 0.003 0.420 0.674 -0.004 0.007\n", + "SUM_reverse_male_people_count -0.0474 0.011 -4.500 0.000 -0.068 -0.027\n", + "SUM_forward_female_people_count -0.0008 0.005 -0.142 0.887 -0.011 0.010\n", + "SUM_reverse_female_people_count -0.0254 0.004 -7.155 0.000 -0.032 -0.018\n", + "SUM_forward_18_34_people_count 0.0094 0.002 5.163 0.000 0.006 0.013\n", + "SUM_reverse_18_34_people_count -0.0167 0.006 -2.963 0.003 -0.028 -0.006\n", + "SUM_forward_35_54_people_count 0.0043 0.005 0.861 0.389 -0.005 0.014\n", + "SUM_reverse_35_54_people_count -0.0275 0.002 -11.729 0.000 -0.032 -0.023\n", + "SUM_forward_55_64_people_count -0.0063 0.002 -3.272 0.001 -0.010 -0.003\n", + "SUM_reverse_55_64_people_count -0.0133 0.001 -10.092 0.000 -0.016 -0.011\n", + "MAX_max_temp_f 0.0028 0.001 5.306 0.000 0.002 0.004\n", + "MIN_min_temp_f 0.0044 0.001 7.766 0.000 0.003 0.005\n", + "MAX_max_dewpoint_f 0.0025 0.000 4.929 0.000 0.001 0.003\n", + "MIN_min_dewpoint_f 0.0043 0.001 8.097 0.000 0.003 0.005\n", + "SUM_precip_in -0.0001 2.97e-05 -4.149 0.000 -0.000 -6.49e-05\n", + "MEAN_avg_wind_speed_kts -0.0002 2.04e-05 -10.551 0.000 -0.000 -0.000\n", + "MEAN_avg_wind_drct -0.0049 0.001 -5.282 0.000 -0.007 -0.003\n", + "MIN_min_rh 0.0016 8.54e-05 18.824 0.000 0.001 0.002\n", + "MEAN_avg_rh 0.0002 0.000 1.424 0.154 -9.22e-05 0.001\n", + "MAX_max_rh -0.0003 1.96e-05 -13.128 0.000 -0.000 -0.000\n", + "MIN_min_feel 0.0050 0.001 7.347 0.000 0.004 0.006\n", + "MEAN_avg_feel 0.0043 0.001 5.461 0.000 0.003 0.006\n", + "MAX_max_feel 0.0035 0.001 5.687 0.000 0.002 0.005\n", + "MAX_max_wind_speed_kts -0.0006 0.000 -6.289 0.000 -0.001 -0.000\n", + "MAX_max_wind_gust_kts -0.0008 0.000 -6.353 0.000 -0.001 -0.001\n", + "MAX_climo_high_f 0.0036 0.001 5.599 0.000 0.002 0.005\n", + "MIN_climo_low_f 0.0035 0.001 5.947 0.000 0.002 0.005\n", + "SUM_climo_precip_in 3.829e-05 2.75e-06 13.917 0.000 3.29e-05 4.37e-05\n", + "COUNT -0.0003 0.001 -0.385 0.700 -0.002 0.001\n", + "MEAN_forward_average_speed_meters_per_second -0.0002 1.01e-05 -16.697 0.000 -0.000 -0.000\n", + "MEAN_reverse_average_speed_meters_per_second -0.0002 1.62e-05 -10.649 0.000 -0.000 -0.000\n", + "SUM_forward_overnight_trip_count 0.0090 0.002 4.356 0.000 0.005 0.013\n", + "SUM_reverse_overnight_trip_count -0.0040 0.001 -3.465 0.001 -0.006 -0.002\n", + "SUM_forward_unspecified_people_count 0.0016 0.000 10.237 0.000 0.001 0.002\n", + "SUM_reverse_unspecified_people_count 0.0006 0.000 2.945 0.003 0.000 0.001\n", + "SUM_forward_65_plus_people_count -0.0079 0.001 -7.280 0.000 -0.010 -0.006\n", + "SUM_reverse_65_plus_people_count -0.0019 0.000 -4.503 0.000 -0.003 -0.001\n", + "Slope MAXpct 0.0007 0.000 6.148 0.000 0.000 0.001\n", + "HEX Area ACRES -1.289e-05 2.52e-06 -5.123 0.000 -1.78e-05 -7.96e-06\n", + "Count LC Polys 2023 3.505e-05 0.000 0.281 0.779 -0.000 0.000\n", + "Minority LandClass Pct -0.0009 0.000 -2.443 0.015 -0.002 -0.000\n", + "Majority LandClass Pct -0.0015 0.000 -3.325 0.001 -0.002 -0.001\n", + "Replica OSMfeature COUNT 0.0011 0.001 1.149 0.250 -0.001 0.003\n", + "SUM_AADT_replicaOSM -0.0040 0.002 -2.365 0.018 -0.007 -0.001\n", + "MEAN_MPH_replicaOSM -0.0003 0.000 -1.160 0.246 -0.001 0.000\n", + "pMEAN_length 0.0029 0.001 2.290 0.022 0.000 0.005\n", + "MINOR RoadClass Pct 0.0005 0.000 3.940 0.000 0.000 0.001\n", + "MAJOR RoadClass Pct 0.0006 0.000 3.976 0.000 0.000 0.001\n", + "Topo Rough Index MEAN 0.0002 1.44e-05 12.755 0.000 0.000 0.000\n", + "Topo Rough Index MEDIAN 0.0002 1.21e-05 12.434 0.000 0.000 0.000\n", + "latWGS84 1.158e-07 2.3e-08 5.040 0.000 7.08e-08 1.61e-07\n", + "longWGS84 1.038e-05 2.06e-06 5.051 0.000 6.35e-06 1.44e-05\n", + "UTMnad83z15_E -0.0065 0.012 -0.556 0.579 -0.029 0.016\n", + "UTMnad83z15_N 0.0134 0.003 4.989 0.000 0.008 0.019\n", + "AADT 2023 ArDOT sum 0.0355 0.008 4.176 0.000 0.019 0.052\n", + "AADT mean 2023 ArDOT -0.0112 0.008 -1.450 0.147 -0.026 0.004\n", + "ArDOT Sum Lgth 2023 -0.0418 0.006 -7.199 0.000 -0.053 -0.030\n", + "Dist2Park Meters 0.0588 0.005 10.875 0.000 0.048 0.069\n", + "Distance to K12 Meters 0.0026 0.019 0.142 0.887 -0.034 0.039\n", + "Meters from Transit Stop -0.0254 0.014 -1.875 0.061 -0.052 0.001\n", + "Sum of Sidewalks Meters -0.0744 0.016 -4.643 0.000 -0.106 -0.043\n", + "Join_Count -0.0002 1.51e-05 -14.565 0.000 -0.000 -0.000\n", + "TAZ_ID 0.0224 0.003 7.463 0.000 0.017 0.028\n", + "COUNTY -0.0031 0.001 -4.777 0.000 -0.004 -0.002\n", + "2024 Median Household Income -0.1123 0.010 -11.597 0.000 -0.131 -0.093\n", + "2024 Median Household Income: Index -0.0001 1.07e-05 -13.247 0.000 -0.000 -0.000\n", + "2029 Median Household Income 0.0685 0.009 7.676 0.000 0.051 0.086\n", + "2029 Median Household Income: Index 4.195e-05 7.79e-06 5.387 0.000 2.67e-05 5.72e-05\n", + "2022 Median HH Income (ACS 5-Yr) 0.0350 0.007 5.234 0.000 0.022 0.048\n", + "2024 Diversity Index -0.0004 0.000 -1.700 0.089 -0.001 6.52e-05\n", + "2024 White Population 0.0200 0.009 2.195 0.028 0.002 0.038\n", + "2024 White Population: Percent 0.0002 0.000 1.181 0.238 -0.000 0.001\n", + "2024 Black Population -0.0005 0.000 -1.631 0.103 -0.001 0.000\n", + "2024 Black Population: Percent -8.206e-05 2.38e-05 -3.442 0.001 -0.000 -3.53e-05\n", + "2024 American Indian Population 0.0002 0.000 1.371 0.170 -7.37e-05 0.000\n", + "2024 American Indian Population: Percent -2.874e-05 9.18e-06 -3.131 0.002 -4.67e-05 -1.08e-05\n", + "2024 Asian Population 0.0045 0.001 7.999 0.000 0.003 0.006\n", + "2024 Asian Population: Percent 0.0005 0.000 1.978 0.048 4.35e-06 0.001\n", + "2024 Pacific Islander Population 0.0005 0.000 3.689 0.000 0.000 0.001\n", + "2024 Pacific Islander Population: Percent -6.206e-05 2.18e-05 -2.845 0.004 -0.000 -1.93e-05\n", + "2024 Other Race Population -0.0006 0.001 -0.403 0.687 -0.003 0.002\n", + "2024 Other Race Population: Percent -0.0003 0.000 -3.114 0.002 -0.001 -0.000\n", + "2024 Population of 2+ Races 0.0009 0.001 1.032 0.302 -0.001 0.002\n", + "2024 Population of 2+ Races: Percent -0.0002 5.62e-05 -4.139 0.000 -0.000 -0.000\n", + "2010 Diversity Index -0.0007 0.000 -2.833 0.005 -0.001 -0.000\n", + "2022 Race: American Indian (ACS 5-Yr) 0.0009 5.7e-05 15.298 0.000 0.001 0.001\n", + "2022 Race: American Indian (ACS 5-Yr): Percent 5.334e-05 9.72e-06 5.488 0.000 3.43e-05 7.24e-05\n", + "2022 Race: Asian (ACS 5-Yr) 0.0042 0.000 11.331 0.000 0.003 0.005\n", + "2022 Race: Asian (ACS 5-Yr): Percent 0.0006 0.000 4.535 0.000 0.000 0.001\n", + "2022 Race: Native Hawaiian (ACS 5-Yr) -0.0003 3.57e-05 -7.484 0.000 -0.000 -0.000\n", + "2022 Race: Native Hawaiian (ACS 5-Yr): Percent -0.0001 1.57e-05 -7.033 0.000 -0.000 -7.97e-05\n", + "2022 Race: Other (ACS 5-Yr) -0.0094 0.001 -10.669 0.000 -0.011 -0.008\n", + "2022 Race: Other (ACS 5-Yr): Percent -0.0005 5.07e-05 -10.084 0.000 -0.001 -0.000\n", + "2022 Race: Two or More (ACS 5-Yr) -0.0087 0.001 -10.991 0.000 -0.010 -0.007\n", + "2022 Race: Two or More (ACS 5-Yr): Percent -0.0010 0.000 -6.108 0.000 -0.001 -0.001\n", + "2029 White Population 0.0301 0.009 3.277 0.001 0.012 0.048\n", + "2029 White Population: Percent 0.0003 0.000 1.402 0.161 -0.000 0.001\n", + "2029 Black Population -0.0002 0.000 -0.574 0.566 -0.001 0.000\n", + "2029 Black Population: Percent -6.762e-05 2.05e-05 -3.293 0.001 -0.000 -2.74e-05\n", + "2029 American Indian Population 0.0003 0.000 2.142 0.032 2.28e-05 0.001\n", + "2029 American Indian Population: Percent -2.951e-05 9.15e-06 -3.225 0.001 -4.74e-05 -1.16e-05\n", + "2029 Asian Population 0.0060 0.001 9.259 0.000 0.005 0.007\n", + "2029 Asian Population: Percent 0.0005 0.000 1.982 0.047 5.64e-06 0.001\n", + "2029 Pacific Islander Population 0.0006 0.000 3.992 0.000 0.000 0.001\n", + "2029 Pacific Islander Population: Percent -4.424e-05 1.77e-05 -2.492 0.013 -7.9e-05 -9.45e-06\n", + "2029 Other Race Population 9.816e-05 0.002 0.059 0.953 -0.003 0.003\n", + "2029 Other Race Population: Percent -0.0004 0.000 -3.131 0.002 -0.001 -0.000\n", + "2029 Diversity Index -0.0004 0.000 -1.721 0.085 -0.001 6.06e-05\n", + "2022 Pop 25+: HS Diploma (ACS 5-Yr) -0.0053 0.001 -10.078 0.000 -0.006 -0.004\n", + "2022 Pop 25+: HS Diploma (ACS 5-Yr): Percent -0.0008 0.000 -7.762 0.000 -0.001 -0.001\n", + "2022 Pop 25+: Some College (ACS 5-Yr) -0.0061 0.002 -3.610 0.000 -0.009 -0.003\n", + "2022 Pop 25+: Some College (ACS 5-Yr): Percent -0.0007 3.23e-05 -21.982 0.000 -0.001 -0.001\n", + "2022 Pop 25+: Assoc Degree (ACS 5-Yr) 0.0028 0.001 3.665 0.000 0.001 0.004\n", + "2022 Pop 25+: Assoc Degree (ACS 5-Yr): Percent 0.0003 2.84e-05 10.858 0.000 0.000 0.000\n", + "2022 Pop 25+: Bach Degree (ACS 5-Yr) 0.0100 0.005 2.091 0.037 0.001 0.019\n", + "2022 Pop 25+: Bach Degree (ACS 5-Yr): Percent 0.0010 0.000 5.080 0.000 0.001 0.001\n", + "2022 Pop 25+: Master`s Deg (ACS 5-Yr) 0.0046 0.001 3.271 0.001 0.002 0.007\n", + "2022 Pop 25+: Master`s Deg (ACS 5-Yr): Percent 0.0003 0.000 1.659 0.097 -5.24e-05 0.001\n", + "2022 Pop 25+: Prof Sch Deg (ACS 5-Yr) 0.0013 0.000 3.563 0.000 0.001 0.002\n", + "2022 Pop 25+: Prof Sch Deg (ACS 5-Yr): Percent 0.0001 3.19e-05 3.504 0.000 4.93e-05 0.000\n", + "2022 Pop 25+: Doctorate (ACS 5-Yr) -0.0015 0.000 -3.668 0.000 -0.002 -0.001\n", + "2022 Pop 25+: Doctorate (ACS 5-Yr): Percent -0.0001 3.06e-05 -3.597 0.000 -0.000 -5.01e-05\n", + "2024 Pop Age 25+: High School Diploma -0.0032 0.001 -4.942 0.000 -0.004 -0.002\n", + "2024 Pop Age 25+: High School Diploma: Percent -0.0008 0.000 -7.901 0.000 -0.001 -0.001\n", + "2024 Pop Age 25+: GED -0.0018 0.000 -9.533 0.000 -0.002 -0.001\n", + "2024 Pop Age 25+: GED: Percent -0.0002 3.87e-05 -5.659 0.000 -0.000 -0.000\n", + "2020 Total Population 0.0100 0.010 0.961 0.337 -0.010 0.030\n", + "2024 Total Population 0.0250 0.009 2.653 0.008 0.007 0.043\n", + "2024 Population Density -0.0030 0.014 -0.209 0.835 -0.031 0.025\n", + "2022 Civilian Pop 18+: Veteran (ACS 5-Yr) 0.0008 0.001 0.956 0.339 -0.001 0.002\n", + "2022 Civilian Pop 18+: Veteran (ACS 5-Yr): Percent 0.0001 1.01e-05 11.670 0.000 9.82e-05 0.000\n", + "2022 Civilian Pop 18+: Nonveteran (ACS 5-Yr) 0.0055 0.009 0.584 0.559 -0.013 0.024\n", + "2022 Civilian Pop 18+: Nonveteran (ACS 5-Yr): Percent -0.0003 0.000 -2.744 0.006 -0.001 -8.6e-05\n", + "2022 Workers 16+: Bicycle (ACS 5-Yr) -0.0004 8.63e-05 -4.275 0.000 -0.001 -0.000\n", + "2022 Workers 16+: Bicycle (ACS 5-Yr): Percent -4.304e-05 7.88e-06 -5.463 0.000 -5.85e-05 -2.76e-05\n", + "2022 Workers 16+: Walked (ACS 5-Yr) -0.0003 0.000 -2.491 0.013 -0.001 -6.1e-05\n", + "2022 Workers 16+: Walked (ACS 5-Yr): Percent 0.0001 6.01e-05 2.303 0.021 2.06e-05 0.000\n", + "2022 Commute to Work: 15-19 Min (ACS 5-Yr) 0.0035 0.001 2.841 0.005 0.001 0.006\n", + "2022 Commute to Work: 15-19 Min (ACS 5-Yr): Percent 0.0019 0.000 5.498 0.000 0.001 0.003\n", + "2022 Commute to Work: 10-14 Min (ACS 5-Yr) -0.0027 0.001 -2.609 0.009 -0.005 -0.001\n", + "2022 Commute to Work: 10-14 Min (ACS 5-Yr): Percent -0.0004 7.02e-05 -6.354 0.000 -0.001 -0.000\n", + "2022 Commute to Work: 5-9 Min (ACS 5-Yr) 0.0032 0.000 7.451 0.000 0.002 0.004\n", + "2022 Commute to Work: 5-9 Min (ACS 5-Yr): Percent 0.0007 5.79e-05 11.582 0.000 0.001 0.001\n", + "2022 Commute to Work: <5 Min (ACS 5-Yr) 0.0018 0.000 3.953 0.000 0.001 0.003\n", + "2022 Commute to Work: <5 Min (ACS 5-Yr): Percent 0.0002 2.42e-05 6.585 0.000 0.000 0.000\n", + "2024 Median Age 0.0002 0.000 1.285 0.199 -8.91e-05 0.000\n", + "2024 Median Age: Index 0.0004 0.000 1.193 0.233 -0.000 0.001\n", + "2024 Senior Population 0.0082 0.003 2.821 0.005 0.003 0.014\n", + "2024 Senior Population: Percent 0.0007 0.000 3.431 0.001 0.000 0.001\n", + "2020 Multiple Races Pop 35-39 -0.0002 9.32e-05 -2.076 0.038 -0.000 -1.08e-05\n", + "2020 Multiple Races Pop 35-39: Percent -9.79e-05 5.74e-05 -1.707 0.088 -0.000 1.45e-05\n", + "2024 Population Age 0-4 0.0006 0.000 2.819 0.005 0.000 0.001\n", + "2024 Population Age 0-4: Percent -3.35e-06 7.67e-06 -0.437 0.662 -1.84e-05 1.17e-05\n", + "2022 Poverty Index (ACS 5-Yr) -0.0063 0.001 -8.417 0.000 -0.008 -0.005\n", + "2022 HHs: Inc Below Poverty Level (ACS 5-Yr) -0.0023 0.000 -8.973 0.000 -0.003 -0.002\n", + "2022 HHs: Inc Below Poverty Level (ACS 5-Yr): Percent -0.0008 9.41e-05 -8.427 0.000 -0.001 -0.001\n", + "2022 HHs w/Public Assist Income (ACS 5-Yr) 0.0007 7.24e-05 8.984 0.000 0.001 0.001\n", + "2022 HHs w/Public Assist Income (ACS 5-Yr): Percent 6.312e-05 1.06e-05 5.932 0.000 4.23e-05 8.4e-05\n", + "2022 Race: White (ACS 5-Yr) 0.0026 0.013 0.205 0.838 -0.022 0.027\n", + "2022 Race: White (ACS 5-Yr): Percent 0.0008 0.000 6.744 0.000 0.001 0.001\n", + "2022 Race: Black (ACS 5-Yr) 0.0016 0.001 2.950 0.003 0.001 0.003\n", + "2022 Race: Black (ACS 5-Yr): Percent -4.896e-05 2.02e-05 -2.419 0.016 -8.86e-05 -9.29e-06\n", + "Shape_Length -0.0003 5.43e-05 -5.120 0.000 -0.000 -0.000\n", + "Shape_Area -0.0522 0.010 -5.120 0.000 -0.072 -0.032\n", + "Spatial Component 1 -9.315e-07 2.79e-07 -3.339 0.001 -1.48e-06 -3.85e-07\n", + "Spatial Component 2 -1.181e-06 2.97e-07 -3.971 0.000 -1.76e-06 -5.98e-07\n", + "Spatial Component 3 4.284e-07 2.36e-07 1.816 0.069 -3.39e-08 8.91e-07\n", + "Spatial Component 4 -2.077e-07 7.64e-08 -2.719 0.007 -3.57e-07 -5.8e-08\n", + "Spatial Component 5 -7.931e-07 1.33e-07 -5.959 0.000 -1.05e-06 -5.32e-07\n", + "Spatial Component 6 4.589e-07 1.66e-07 2.772 0.006 1.34e-07 7.83e-07\n", + "Spatial Component 7 -6.213e-07 1.75e-07 -3.547 0.000 -9.65e-07 -2.78e-07\n", + "Spatial Component 8 -1.232e-07 1.06e-07 -1.163 0.245 -3.31e-07 8.44e-08\n", + "Spatial Component 9 -6.305e-07 3.04e-07 -2.074 0.038 -1.23e-06 -3.48e-08\n", + "Spatial Component 10 -1.622e-07 1.55e-07 -1.050 0.294 -4.65e-07 1.41e-07\n", + "Spatial Component 11 -9.687e-07 2.44e-07 -3.970 0.000 -1.45e-06 -4.9e-07\n", + "Spatial Component 12 1.875e-07 5.03e-08 3.726 0.000 8.89e-08 2.86e-07\n", + "Spatial Component 13 1.365e-07 1.93e-07 0.706 0.480 -2.43e-07 5.16e-07\n", + "Spatial Component 14 7.848e-07 3.26e-07 2.411 0.016 1.47e-07 1.42e-06\n", + "Spatial Component 15 -4.019e-07 6.54e-08 -6.150 0.000 -5.3e-07 -2.74e-07\n", + "Spatial Component 16 1.008e-06 1.53e-07 6.591 0.000 7.08e-07 1.31e-06\n", + "Spatial Component 17 -8.008e-07 1.9e-07 -4.204 0.000 -1.17e-06 -4.27e-07\n", + "Spatial Component 18 -3.013e-07 7.6e-08 -3.963 0.000 -4.5e-07 -1.52e-07\n", + "Spatial Component 19 5.633e-07 1.79e-07 3.151 0.002 2.13e-07 9.14e-07\n", + "Spatial Component 20 4.71e-07 1.43e-07 3.289 0.001 1.9e-07 7.52e-07\n", + "Spatial Component 21 -5.812e-07 2.98e-07 -1.952 0.051 -1.16e-06 2.3e-09\n", + "Spatial Component 22 -2.01e-08 1.7e-07 -0.118 0.906 -3.53e-07 3.12e-07\n", + "Spatial Component 23 -7.728e-07 2.29e-07 -3.379 0.001 -1.22e-06 -3.24e-07\n", + "Spatial Component 24 -2.029e-07 5.85e-08 -3.466 0.001 -3.18e-07 -8.82e-08\n", + "Spatial Component 25 1.582e-07 1.16e-07 1.368 0.171 -6.85e-08 3.85e-07\n", + "Spatial Component 26 1.051e-06 7.41e-08 14.177 0.000 9.05e-07 1.2e-06\n", + "Spatial Component 27 -1.277e-06 9.87e-08 -12.936 0.000 -1.47e-06 -1.08e-06\n", + "Spatial Component 28 1.739e-07 4.85e-08 3.584 0.000 7.88e-08 2.69e-07\n", + "Spatial Component 29 9.27e-07 2.5e-07 3.715 0.000 4.38e-07 1.42e-06\n", + "Spatial Component 30 -7.488e-07 1.01e-07 -7.444 0.000 -9.46e-07 -5.52e-07\n", + "Spatial Component 31 -9.83e-07 9.51e-08 -10.336 0.000 -1.17e-06 -7.97e-07\n", + "Spatial Component 32 4.068e-07 8.83e-08 4.609 0.000 2.34e-07 5.8e-07\n", + "Spatial Component 33 -7.583e-07 1.11e-07 -6.836 0.000 -9.76e-07 -5.41e-07\n", + "Spatial Component 34 -9.564e-08 3.7e-07 -0.259 0.796 -8.2e-07 6.29e-07\n", + "Spatial Component 35 1.196e-07 5.4e-08 2.214 0.027 1.37e-08 2.25e-07\n", + "Spatial Component 36 -8.537e-07 6.67e-08 -12.799 0.000 -9.84e-07 -7.23e-07\n", + "Spatial Component 37 3.215e-08 2.07e-07 0.155 0.877 -3.74e-07 4.39e-07\n", + "==============================================================================\n", + "Omnibus: 1973.578 Durbin-Watson: 1.888\n", + "Prob(Omnibus): 0.000 Jarque-Bera (JB): 90362.534\n", + "Skew: 2.957 Prob(JB): 0.00\n", + "Kurtosis: 30.712 Cond. No. 1.28e+22\n", + "==============================================================================\n", + "\n", + "Notes:\n", + "[1] Standard Errors are heteroscedasticity robust (HC3)\n", + "[2] The smallest eigenvalue is 2.8e-28. This might indicate that there are\n", + "strong multicollinearity problems or that the design matrix is singular.\n", + "{'rmse_test': np.float64(1419.9331418012075), 'r2_test': 0.5591267369016439}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/srv/conda/envs/notebook/lib/python3.10/site-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 222, but rank is 14\n", + " warnings.warn('covariance of constraints does not have full '\n" + ] + } + ], + "source": [ + "# Add constant\n", + "X_train_const = sm.add_constant(X_train_imp, has_constant=\"add\")\n", + "X_test_const = sm.add_constant(X_test_imp, has_constant=\"add\")\n", + "\n", + "# Fit OLS\n", + "model = sm.OLS(y_train, X_train_const).fit(cov_type=\"HC3\")\n", + "\n", + "# Predict & evaluate (manual RMSE for older sklearns)\n", + "y_pred = model.predict(X_test_const)\n", + "y_true = pd.Series(y_test).astype(float)\n", + "y_hat = pd.Series(y_pred).astype(float)\n", + "mask_eval = y_true.notna() & y_hat.notna()\n", + "y_true = y_true[mask_eval]\n", + "y_hat = y_hat[mask_eval]\n", + "\n", + "mse = mean_squared_error(y_true, y_hat)\n", + "rmse = np.sqrt(mse)\n", + "r2 = r2_score(y_true, y_hat)\n", + "\n", + "print(model.summary())\n", + "print({\"rmse_test\": rmse, \"r2_test\": r2})" + ] + }, + { + "cell_type": "markdown", + "id": "0b29cfc6", + "metadata": {}, + "source": [ + "## 7) Predict for all rows and export artifacts" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d7dd45bb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(PosixPath('../processed_data/weekly_predictions_with_covariates.csv'),\n", + " PosixPath('../processed_data/model_metrics.txt'))" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from pathlib import Path\n", + "\n", + "# Impute all rows and apply same column filtering\n", + "X_all = data[[c for c in X.columns]].copy()\n", + "for c in X_all.columns:\n", + " X_all[c] = pd.to_numeric(X_all[c], errors=\"coerce\")\n", + "X_all_imp = pd.DataFrame(imputer.transform(X_all), columns=X_all.columns, index=X_all.index)\n", + "\n", + "# Apply same non-constant and duplicate filtering as train\n", + "X_all_imp = X_all_imp[X_train_imp.columns]\n", + "\n", + "X_all_const = sm.add_constant(X_all_imp, has_constant=\"add\")\n", + "pred_all = model.predict(X_all_const)\n", + "\n", + "out = data.copy()\n", + "out[\"predicted_eco_from_model\"] = pred_all\n", + "\n", + "# Save outputs\n", + "out_dir = Path(\"../processed_data\")\n", + "out_dir.mkdir(exist_ok=True)\n", + "pred_path = out_dir / \"weekly_predictions_with_covariates.csv\"\n", + "meta_path = out_dir / \"model_metrics.txt\"\n", + "\n", + "keep_cols = [\"week_id\", \"week_start\", \"EcoCntr_weekly_SUM\", \"SUM_total_trip_count\", \"predicted_eco_from_model\"]\n", + "for k in [\"GRID_ID\", \"GRID_ID.1\", \"HEXid_WeekID\", \"HEXid_weekID\"]:\n", + " if k in out.columns and k not in keep_cols:\n", + " keep_cols.append(k)\n", + "\n", + "out[keep_cols].to_csv(pred_path, index=False)\n", + "\n", + "with open(meta_path, \"w\") as f:\n", + " f.write(model.summary().as_text())\n", + " f.write(f\"\\nRMSE_test: {rmse:.4f}\\nR2_test: {r2:.4f}\\n\")\n", + "\n", + "pred_path, meta_path" + ] + }, + { + "cell_type": "markdown", + "id": "3c468876", + "metadata": {}, + "source": [ + "## 8) Quick diagnostic plot" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "438b4ae5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "PosixPath('../figures/obs_vs_pred_test.png')" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure()\n", + "plt.scatter(y_true, y_hat, alpha=0.4)\n", + "plt.xlabel(\"Observed EcoCounter (test)\")\n", + "plt.ylabel(\"Predicted (test)\")\n", + "plt.title(\"Observed vs Predicted — test set\")\n", + "from pathlib import Path\n", + "plot_path = Path(\"../figures/obs_vs_pred_test.png\")\n", + "plot_path.parent.mkdir(exist_ok=True)\n", + "plt.savefig(plot_path, bbox_inches=\"tight\")\n", + "plot_path" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}