Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions rules/build_electricity.smk
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,26 @@ rule build_powerplants:
"../scripts/build_powerplants.py"


rule build_outages:
input:
outages="data/entsoe/outages_raw.csv",
powerplants=resources("powerplants_s_{clusters}.csv"),
output:
mapped=resources("outages_mapped_s_{clusters}.csv"),
unmapped=resources("outages_unmapped_s_{clusters}.csv"),
log:
logs("build_outages_s_{clusters}.log"),
benchmark:
benchmarks("build_outages_s_{clusters}")
threads: 1
resources:
mem_mb=4000,
conda:
"../envs/environment.yaml"
script:
"../scripts/build_outages.py"


def input_base_network(w):
base_network = config_provider("electricity", "base_network")(w)
osm_prebuilt_version = config_provider("electricity", "osm-prebuilt-version")(w)
Expand Down
20 changes: 20 additions & 0 deletions rules/retrieve.smk
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,26 @@ if config["enable"]["retrieve"]:
"../scripts/retrieve_bidding_zones.py"


if config["enable"]["retrieve"]:

rule retrieve_entsoe_outages:
params:
start_year=2014,
end_year=2024,
entsoe_token=config_provider("secrets", "entsoe_token"),
output:
"data/entsoe/outages_raw.csv",
log:
"logs/retrieve_entsoe_outages.log",
resources:
mem_mb=8000,
retries: 2
conda:
"../envs/environment.yaml"
script:
"../scripts/retrieve_entsoe_outages.py"


if config["enable"]["retrieve"] and config["enable"].get("retrieve_cutout", True):

rule retrieve_cutout:
Expand Down
132 changes: 132 additions & 0 deletions scripts/build_outages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# SPDX-FileCopyrightText: Contributors to PyPSA-Eur <https://github.com/pypsa/pypsa-eur>
#
# SPDX-License-Identifier: MIT
"""
Retrieve power plant outage data from ENTSOE.
"""

import ast
import logging

import numpy as np
import pandas as pd

from scripts._helpers import configure_logging, set_scenario_config

logger = logging.getLogger(__name__)


def outage_as_dense(df: pd.DataFrame) -> pd.DataFrame:
"""
Expand outage data to a dense DataFrame with hourly time index and
production_resource_id columns.

Parameters
----------
df : pd.DataFrame
DataFrame containing ENTSOE outage data

Returns
-------
pd.DataFrame
"""

if df.empty:
return pd.DataFrame()

df["start_hour"] = df.start.dt.tz_convert("UTC").dt.floor("h")
df["end_hour"] = df.end.dt.tz_convert("UTC").dt.floor("h")
df["avail_qty"] = df["avail_qty"].astype(float)
df["unavail_mw"] = df["nominal_power"] - df["avail_qty"]

nominal = (
df.drop_duplicates("production_resource_id")
.set_index("production_resource_id")
.nominal_power
)

def _expand_outage(r: pd.Series) -> pd.Series:
return pd.Series(
r.unavail_mw,
index=pd.date_range(r.start_hour, r.end_hour, freq="h", inclusive="both"),
)

outages_t = df.apply(_expand_outage, axis=1)
outages_t.index = df.production_resource_id

outages_t = (
outages_t.groupby(level=0).sum(min_count=1).T.clip(upper=nominal, axis=1)
)

return outages_t


def entsoe_to_powerplantmatching_id(c: str, ppl: pd.DataFrame) -> float | int:
"""
Map ENTSOE IDs (EIC) to Powerplantmatching IDs.

Parameters
----------
c : str
ENTSOE ID (EIC)
ppl : pd.DataFrame
Powerplantmatching DataFrame

Returns
-------
float | int
Powerplantmatching ID or NaN if not found
"""
idx = ppl.loc[ppl.ENTSOE.str.contains(c, na=False)].index
return idx[0] if len(idx) == 1 else np.nan


if __name__ == "__main__":
if "snakemake" not in globals():
from scripts._helpers import mock_snakemake

snakemake = mock_snakemake("build_outages")
rootpath = ".."
else:
rootpath = "."
configure_logging(snakemake)
set_scenario_config(snakemake)

units = pd.read_csv(snakemake.input.outages, index_col=0)

ppl = pd.read_csv(snakemake.input.powerplants, index_col=0)
ppl["ENTSOE"] = (
ppl["projectID"]
.map(lambda x: ast.literal_eval(x).get("ENTSOE", np.nan))
.combine_first(ppl["EIC"])
.astype(str)
)

outages = []

planned = units.query(
"businesstype == 'Planned maintenance' and docstatus != 'Cancelled'"
).copy()
outages.append(outage_as_dense(planned))

unplanned = units.query("businesstype == 'Unplanned outage'").copy()
outages.append(outage_as_dense(unplanned))

outages_t = (
pd.concat(outages, axis=1)
.T.groupby(level=0)
.sum()
.T.replace(0, np.nan)
.dropna(how="all")
.dropna(how="all", axis=1)
)
mapped_columns = outages_t.columns.map(
lambda c: entsoe_to_powerplantmatching_id(c, ppl)
)
outages_t_mapped = outages_t.loc[:, mapped_columns.notna()]
outages_t_unmapped = outages_t.loc[:, mapped_columns.isna()]
outages_t_mapped.columns = mapped_columns.dropna()
outages_t_mapped.columns.name = "powerplantmatching_id"

outages_t_mapped.to_csv(snakemake.output.mapped)
outages_t_unmapped.to_csv(snakemake.output.unmapped)
117 changes: 117 additions & 0 deletions scripts/retrieve_entsoe_outages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# SPDX-FileCopyrightText: Contributors to PyPSA-Eur <https://github.com/pypsa/pypsa-eur>
#
# SPDX-License-Identifier: MIT
"""
Retrieve power plant outage data from ENTSOE.
"""

import logging

import pandas as pd
from entsoe import EntsoePandasClient
from entsoe.exceptions import NoMatchingDataError

from scripts._helpers import configure_logging, set_scenario_config

logger = logging.getLogger(__name__)

COUNTRIES = [
"AL",
"AT",
"BE",
"BA",
"BG",
"CH",
"CY",
"CZ",
"DE_LU",
"DE_AT_LU",
"DK_1",
"DK_2",
"EE",
"ES",
"FI",
"FR",
"GB",
"GR",
"HR",
"HU",
"IE",
"IT",
"LT",
"LU",
"LV",
"MD",
"ME",
"MK",
"NL",
"NO",
"PL",
"PT",
"RO",
"RS",
"SE",
"SI",
"SK",
"UA",
"XK",
]

if __name__ == "__main__":
if "snakemake" not in globals():
from scripts._helpers import mock_snakemake

snakemake = mock_snakemake("retrieve_entsoe_outages")
rootpath = ".."
else:
rootpath = "."
configure_logging(snakemake)
set_scenario_config(snakemake)

token = snakemake.params.entsoe_token
start_year = snakemake.params.start_year
end_year = snakemake.params.end_year

if token:
logger.info(
"ENTSOE token provided. Retrieving live data from transparency platform."
)

client = EntsoePandasClient(api_key=token)
units = []
for year in range(start_year, end_year + 1):
start = f"{year}-01-01"
end = f"{year + 1}-01-01"
start = pd.Timestamp(start, tz="UTC")
end = pd.Timestamp(end, tz="UTC")
for c in COUNTRIES:
logger.info(f"Retrieving data for country {c} year {year}")
try:
ugu = client.query_unavailability_of_generation_units(
c, start=start, end=end
)
units.append(ugu)
except NoMatchingDataError:
print(
f"No generation data entries found for country {c} in year {year}."
)
try:
upu = client.query_unavailability_of_production_units(
c, start=start, end=end
)
units.append(upu)
except NoMatchingDataError:
print(
f"No production data entries found for country {c} in year {year}."
)

df = pd.concat(units) if units else pd.DataFrame()

else:
logger.info(
"No ENTSOE token provided. Retrieving pre-built data from data bundle."
)
prebuilt_url = ""
df = pd.read_csv(prebuilt_url)

df.to_csv(snakemake.output[0])
Loading