Source code for metloom.pointdata.nws_forecast

import logging
from datetime import timedelta, timezone
from typing import List
import pandas as pd
import geopandas as gpd
import requests
from geopandas import GeoDataFrame

from metloom.dataframe_utils import merge_df, resample_whole_df
from metloom.pointdata.base import GenericPoint
from metloom.variables import SensorDescription, NWSForecastVariables


LOG = logging.getLogger(__name__)


[docs] class NWSForecastPointData(GenericPoint): """ Implementation for NWS forecast API https://www.weather.gov/documentation/services-web-api We can call the points api to get the URL for the forecast, i.e. https://api.weather.gov/points/42,-119 In this example, 3 forecast URLs are available "forecast": "https://api.weather.gov/gridpoints/BOI/28,28/forecast", "forecastHourly": "https://api.weather.gov/gridpoints/BOI/28,28/forecast/hourly", "forecastGridData": "https://api.weather.gov/gridpoints/BOI/28,28", forecast can be used to return the 12 hour increments forecastHourly returns hourly data (hourly in local tz) forecastGridData returns the 'raw' grid data (hourly UTC). We will use the `forecastGridData` endpoint for this implementation The API returns data from a grid, meaning the `geometry` column in the returned geodataframes will represent the **CENTER** of the forecast grid cell. """ DATASOURCE = "NWS Forecast" ALLOWED_VARIABLES = NWSForecastVariables URL = "https://api.weather.gov" POINTS_FROM_GEOM_DEFAULTS = { 'within_geometry': True, 'token_json': "~/.frost_token.json", 'buffer': 0.0 } def __init__( self, station_id, name, initial_metadata=None, metadata=None, ): """ Args: station_id: id of station name: name of station initial_metadata: shapely point required to find the forecast grid cell metadata: optional metadata for the station (shapely point) """ if initial_metadata is None: raise ValueError("Initial metadata is required for the NWS Forecast class") super(NWSForecastPointData, self).__init__( station_id, name, metadata=metadata ) self._inital_metadata = initial_metadata # default UTC time self._tzinfo = timezone(timedelta(hours=0)) # set the forecast grid parameters self._gridx = None self._gridy = None self._office = None self._grid_outline = None def _get_initial_metadata(self): """ Get all metadata from the API for one point. """ # use the initial metadata to find the grid points for the forecast resp = requests.get( f"{self.URL}/points/{self._inital_metadata.y},{self._inital_metadata.x}" ) resp.raise_for_status() data = resp.json() properties = data["properties"] return properties def _get_metadata(self): """ See docstring for PointData._get_metadata This setts the _metadata and _gridx and _gridy parameters """ properties = self._get_initial_metadata() self._gridx = properties["gridX"] self._gridy = properties["gridY"] self._office = properties["gridId"] # use the grid points to find the center of the forecast cell url = f"{self.URL}/gridpoints/" \ f"{self._office}/{self._gridx},{self._gridy}" resp = requests.get(url) resp.raise_for_status() data = resp.json() grid_properties = data["properties"] # Parse the polygon into a geodataframe df_loc = gpd.GeoDataFrame.from_features([data]) self._grid_outline = df_loc.geometry.values[0] # find the center center = df_loc.centroid[0] return gpd.points_from_xy( [center.x], [center.y], z=[grid_properties["elevation"]["value"] * 3.28084] # convert to ft )[0] def _get_observations(self): """ Get the hourly data for a 7 day forecast Example request: https://api.weather.gov/gridpoints/BOI/28,28 """ # ensure we have office, gridx, and gridy set if self._metadata is None: self._get_metadata() url = f"{self.URL}/gridpoints/" \ f"{self._office}/{self._gridx},{self._gridy}" resp = requests.get(url) resp.raise_for_status() data = resp.json() return data["properties"] def _sensor_response_to_df( self, response_data, sensor, final_columns, resample_duration=None ): """ Process the response from the API into a dataframe for 1 sensor Args: response_data: list of entries from the API sensor: single variable object final_columns: expected columns resample_duration: if a resample is desired, a duration that can be parsed by pandas Returns Geodataframe of data """ # Get the list of response data sensor_data = response_data[sensor.code] unit_str = sensor_data["uom"].split("wmoUnit:")[-1] df = pd.DataFrame.from_records(sensor_data["values"]) # Rename and resample column_map = { "validTime": "datetime", "value": sensor.name, } df.rename( columns=column_map, inplace=True, ) # parse midway through dates # (example datetime is '2024-06-19T04:00:00+00:00/PT1H') date_starts = pd.to_datetime( df["datetime"].str.split("/").apply(lambda x: x[0]) ) date_durations = pd.to_timedelta( df["datetime"].str.split("/").apply(lambda x: x[1]) ) date_mids = date_starts + date_durations / 2.0 df["datetime"] = date_mids df = df.set_index("datetime") # resample to the desired duration if resample_duration is not None: df = resample_whole_df( df, sensor, interval=resample_duration ) # add other expected columns df[f"{sensor.name}_units"] = [unit_str] * len(df) df["site"] = [self.id] * len(df) # keep the column names final_columns += [ sensor.name, f"{sensor.name}_units", ] df = GeoDataFrame( df, geometry=[self.metadata] * len(df) ) # double check utc conversion df = df.tz_convert(self.desired_tzinfo) # set index so joining works df = df.filter(final_columns) df = df.loc[pd.notna(df[sensor.name])] return df def _get_data( self, variables: List[SensorDescription], desired_duration=None, ): """ Args: variables: List of metloom.variables.SensorDescription object from self.ALLOWED_VARIABLES desired_duration: desired resample duration ("D", "h"). Data is hourly be default Returns: GeoDataFrame of data, indexed on datetime, site """ df = None final_columns = ["geometry", "site"] # Get data from the API response_data = self._get_observations() if response_data: # Parse data for each variable for sensor in variables: # TODO: how does resampling of non-hourly precip work sensor_df = self._sensor_response_to_df( response_data, sensor, final_columns, resample_duration=desired_duration ) df = merge_df(df, sensor_df) if df is not None: if len(df.index) > 0: # Set the datasource df["datasource"] = [self.DATASOURCE] * len(df.index) df.reset_index(inplace=True) df.set_index(keys=["datetime", "site"], inplace=True) df.index.set_names(["datetime", "site"], inplace=True) else: df = None self.validate_sensor_df(df) return df
[docs] def get_daily_forecast( self, variables: List[SensorDescription], ): """ Get a geopandas dataframe with daily results for a 7 day forecast. The geometry column will be the center of the forecast gridcell Args: variables: list of variables to return """ return self._get_data(variables, desired_duration="D")
[docs] def get_hourly_forecast( self, variables: List[SensorDescription], ): """ Get a geopandas dataframe with hourly results for a 7 day forecast. The geometry column will be the center of the forecast gridcell Args: variables: list of variables to return """ return self._get_data(variables, desired_duration="h")
[docs] def get_forecast( self, variables: List[SensorDescription], ): """ Get a geopandas dataframe with hourly results for a 7 day forecast. The geometry column will be the center of the forecast gridcell Args: variables: list of variables to return """ # Do not resample return self._get_data(variables)