Hub Price Pull Example

Imports

[1]:
import time
import os
import pandas as pd
from tyba_client.client import Client, Market

PAT = os.environ["TYBA_PAT"]
client = Client(PAT)

# Relevant API services
services = client.services
lmp = services.lmp
ancillary = services.ancillary

User inputs

[2]:
iso = 'CAISO'
start_year = 2024
end_year = 2024

Energy Pull - define ISO/RTO

[3]:
services.get_all_isos().json()
[3]:
['PJMISO', 'CAISO', 'NEISO', 'NYISO', 'ERCOT', 'SPPISO', 'MISO']

Pull All Nodes for the ISO/RTO

[4]:
# Get all nodes for RTO
price_nodes = lmp.get_all_nodes(iso=iso).json()
node_directory = pd.DataFrame(price_nodes)
print(node_directory.head())  # Display the first few rows to check the content
   da_end_year  rt_end_year  rt_start_year                   name  \
0       2025.0         2025           2023       CLAP_WWRSR1-APND
1       2025.0         2025           2015  ELCENTRO_2_N001:IVLY2
2       2025.0         2025           2022       KERMAC_LNODEITED
3       2025.0         2025           2021       VIEWLND_LNODE787
4       2025.0         2025           2010           BUTTE_1_N101

            id  da_start_year  zone       type        substation
0  10017280350         2023.0  SDGE  GENERATOR               NaN
1  10003899356         2015.0            SPTIE
2  10016667720         2022.0   PNM       LOAD
3  10016243530         2021.0   SCL       LOAD  VIEWLAND-HOFFMAN
4  20000000353         2010.0   PGE       LOAD             BUTTE
[5]:
node_directory
[5]:
da_end_year rt_end_year rt_start_year name id da_start_year zone type substation
0 2025.0 2025 2023 CLAP_WWRSR1-APND 10017280350 2023.0 SDGE GENERATOR NaN
1 2025.0 2025 2015 ELCENTRO_2_N001:IVLY2 10003899356 2015.0 SPTIE
2 2025.0 2025 2022 KERMAC_LNODEITED 10016667720 2022.0 PNM LOAD
3 2025.0 2025 2021 VIEWLND_LNODE787 10016243530 2021.0 SCL LOAD VIEWLAND-HOFFMAN
4 2025.0 2025 2010 BUTTE_1_N101 20000000353 2010.0 PGE LOAD BUTTE
... ... ... ... ... ... ... ... ... ...
22178 2025.0 2025 2022 HUGHSON_LNODEDR8 10016664459 2022.0 TIDC LOAD HUGHSON
22179 2025.0 2025 2022 COTTONWD_1_COTTONWODLNODE 10016930051 2022.0 AVA LOAD NaN
22180 2025.0 2025 2010 SPICAMIN_1_N001 20000003437 2010.0 PGE LOAD SPICAMIN
22181 2025.0 2025 2023 FREDONIA_LNODELD 10017110123 2023.0 WALC LOAD NaN
22182 2025.0 2025 2016 VENWIND_1_N104 10002898249 2016.0 SCE GENERATOR

22183 rows × 9 columns

[6]:
node_directory.type.unique()
[6]:
array(['GENERATOR', 'SPTIE', 'LOAD', 'N/A', 'INTERTIE', 'AGGREGATE',
       'HUB'], dtype=object)

Filter down to zonal LMPs

[12]:
multiple_nodes = node_directory[node_directory['type'] == 'HUB']
[13]:
multiple_nodes
[13]:
da_end_year rt_end_year rt_start_year name id da_start_year zone type substation
1651 2024.0 2024 2022 TH_NP15_GEN_OFFPEAK-APND 10000802793 2022.0 N/A HUB
2373 2025.0 2025 2010 TH_NP15_GEN-APND 20000004677 2010.0 N/A HUB
3543 2024.0 2024 2022 TH_ZP26_GEN_ONPEAK-APND 10000802780 2022.0 N/A HUB
7511 2024.0 2024 2022 TH_SP15_GEN_OFFPEAK-APND 10000802794 2022.0 N/A HUB
9244 2025.0 2025 2010 TH_ZP26_GEN-APND 20000004670 2010.0 N/A HUB
15438 2025.0 2025 2010 TH_SP15_GEN-APND 20000004682 2010.0 N/A HUB
17254 2024.0 2024 2022 TH_NP15_GEN_ONPEAK-APND 10000802781 2022.0 N/A HUB
17780 2024.0 2024 2022 TH_SP15_GEN_ONPEAK-APND 10000802779 2022.0 N/A HUB
18085 2024.0 2024 2022 TH_ZP26_GEN_OFFPEAK-APND 10000802792 2022.0 N/A HUB

Filter out nodes with insufficient data range

[14]:
# Filter out nodes with insufficient data range
valid_nodes = multiple_nodes[
    (multiple_nodes['rt_start_year'] <= start_year) &
    (multiple_nodes['rt_end_year'] >= end_year) &
    (multiple_nodes['da_start_year'] <= start_year) &
    (multiple_nodes['da_end_year'] >= end_year)
]
print(valid_nodes)
       da_end_year  rt_end_year  rt_start_year                      name  \
1651        2024.0         2024           2022  TH_NP15_GEN_OFFPEAK-APND
2373        2025.0         2025           2010          TH_NP15_GEN-APND
3543        2024.0         2024           2022   TH_ZP26_GEN_ONPEAK-APND
7511        2024.0         2024           2022  TH_SP15_GEN_OFFPEAK-APND
9244        2025.0         2025           2010          TH_ZP26_GEN-APND
15438       2025.0         2025           2010          TH_SP15_GEN-APND
17254       2024.0         2024           2022   TH_NP15_GEN_ONPEAK-APND
17780       2024.0         2024           2022   TH_SP15_GEN_ONPEAK-APND
18085       2024.0         2024           2022  TH_ZP26_GEN_OFFPEAK-APND

                id  da_start_year zone type substation
1651   10000802793         2022.0  N/A  HUB
2373   20000004677         2010.0  N/A  HUB
3543   10000802780         2022.0  N/A  HUB
7511   10000802794         2022.0  N/A  HUB
9244   20000004670         2010.0  N/A  HUB
15438  20000004682         2010.0  N/A  HUB
17254  10000802781         2022.0  N/A  HUB
17780  10000802779         2022.0  N/A  HUB
18085  10000802792         2022.0  N/A  HUB

Helper Functions

[15]:
def reformat_df(result):
    df = pd.concat(
        {node: pd.DataFrame(results['prices'], index=pd.to_datetime(results['datetimes'])) for node, results in result.items()},
        axis=1
    )
    return df.droplevel(axis=1, level=1).tz_localize(None)

def lmp_pull(directory, start_year, end_year, markets):
    import pandas as pd

    nodemap = dict(zip(directory.id, directory.name))  # Converts the nodes DataFrame into a dictionary
    market_options = {'RT': Market.RT, 'DA': Market.DA}  # Maps market keys to their corresponding Market options
    results = {}

    # Split node IDs into batches of 8
    node_ids = list(nodemap.keys())
    batches = [node_ids[i:i + 8] for i in range(0, len(node_ids), 8)]

    for market in markets:
        print(f"Pulling data for market: {market_options[market]}")

        # Temporary list to store batch results
        batch_results = []

        for batch in batches:
            print(f"Processing batch: {batch}")
            prices = lmp.get_prices(
                node_ids=batch,  # Pass batch of 8 node IDs
                market=market_options[market],  # Specify the market
                start_year=int(start_year),
                end_year=int(end_year),
            ).json()

            # Reformat and rename columns with the node names
            batch_df = reformat_df(prices).rename(nodemap, axis=1)
            batch_results.append(batch_df)

        # Concatenate all batch results for this market
        results[market] = pd.concat(batch_results, axis=0)

    # Combine all markets into a single DataFrame with a MultiIndex
    return pd.concat(results, axis=1)

Pull prices for valid HUBs

[16]:
lmps = lmp_pull(valid_nodes, start_year, end_year, ['RT', 'DA'])
Pulling data for market: realtime
Processing batch: ['10000802793', '20000004677', '10000802780', '10000802794', '20000004670', '20000004682', '10000802781', '10000802779']
Processing batch: ['10000802792']
Pulling data for market: dayahead
Processing batch: ['10000802793', '20000004677', '10000802780', '10000802794', '20000004670', '20000004682', '10000802781', '10000802779']
Processing batch: ['10000802792']
[17]:
lmps.head()
[17]:
RT DA
TH_NP15_GEN_OFFPEAK-APND TH_NP15_GEN-APND TH_ZP26_GEN_ONPEAK-APND TH_SP15_GEN_OFFPEAK-APND TH_ZP26_GEN-APND TH_SP15_GEN-APND TH_NP15_GEN_ONPEAK-APND TH_SP15_GEN_ONPEAK-APND TH_ZP26_GEN_OFFPEAK-APND TH_NP15_GEN_OFFPEAK-APND TH_NP15_GEN-APND TH_ZP26_GEN_ONPEAK-APND TH_SP15_GEN_OFFPEAK-APND TH_ZP26_GEN-APND TH_SP15_GEN-APND TH_NP15_GEN_ONPEAK-APND TH_SP15_GEN_ONPEAK-APND TH_ZP26_GEN_OFFPEAK-APND
2024-01-01 00:00:00 46.0780 46.0780 0.0 44.5333 44.9106 44.5333 0.0 0.0 NaN 46.75774 46.75774 0.0 46.08000 45.84287 46.08000 0.0 0.0 NaN
2024-01-01 01:00:00 46.4224 46.4224 0.0 44.1787 44.9486 44.1787 0.0 0.0 NaN 46.04052 46.04052 0.0 45.32571 45.16708 45.32571 0.0 0.0 NaN
2024-01-01 02:00:00 47.3373 47.3373 0.0 45.3447 45.9565 45.3447 0.0 0.0 NaN 45.13062 45.13062 0.0 44.91129 44.51086 44.91129 0.0 0.0 NaN
2024-01-01 03:00:00 46.9617 46.9617 0.0 44.7927 45.5005 44.7927 0.0 0.0 NaN 44.93121 44.93121 0.0 44.56314 44.22495 44.56314 0.0 0.0 NaN
2024-01-01 04:00:00 47.1285 47.1285 0.0 44.9711 45.6992 44.9711 0.0 0.0 NaN 44.55260 44.55260 0.0 44.17126 43.81786 44.17126 0.0 0.0 NaN
[16]:
lmps.to_csv(f"Output/{iso}_hub_energyprices_{start_year}_{end_year}.csv")