KPIs - Tracker Availability¶
KpiTrackerAvailability(baze)
¶
Class used for handling tracker availability.
Source code in echo_baze/baze_root.py
def __init__(self, baze: e_bz.Baze) -> None:
"""Base class that all subclasses should inherit from.
Parameters
----------
baze : Baze
Top level object carrying all functionality and the connection handler.
"""
# check inputs
if not isinstance(baze, e_bz.Baze):
raise ValueError(f"baze must be of type Baze, not {type(baze)}")
self.baze: e_bz.Baze = baze
get(period, object_names=None, subperiod_size=timedelta(days=1), return_type='Availability', **kwargs)
¶
Gets tracker Availability (misaligned) and not communicating for the specified objects and period. Both values are in percentage of time.
It will only consider values during the day based on the site location defined in Bazefield.
Parameters:
-
(period¶DateTimeRange) –Period of the analysis. It will be forced to start at 00:00:00 and end at 23:59:59.
-
(object_names¶list[str] | None, default:None) –Names of the desired objects (tracker motors). If set to None all will be considered. By default None.
-
(subperiod_size¶timedelta, default:timedelta(days=1)) –Size of the subperiod to get availability. For exemple, if the period lasts 1 week and subperiod is 1 day, the result will contain availability for each day inside the period and also for the whole period. By default timedelta(days=1)
-
(return_type¶Literal['Availability', 'Amounts'], default:'Availability') –Type of the return. "Availability" will return the percentage of time the tracker was misaligned or not communicating. "Amounts" will return the amount of time (in seconds) the tracker was misaligned or not communicating. By default "Availability".
-
(points_request_interval¶timedelta) –Length of the subperiod to get the points data. By default timedelta(days=365).
Returns:
-
DataFrame–DataFrame containing the availability for each object and subperiod.
Index: - It's index will be the start of each subperiod.
Columns: MultiIndex with the following levels: - object_name: The name of the object. - quantity: The quantity being analyzed. It can be "Misaligned", "NotCommunicating" or "Available". Values: - If return_type == "Availability", returns the percentage of time the tracker was misaligned, not communicating or available during the subperiod. - If return_type == "Amounts", returns the amount of time (in seconds) the tracker was misaligned, not communicating or available during the subperiod.
Source code in echo_baze/kpi_trackeravailability.py
@validate_call
def get(
self,
period: DateTimeRange,
object_names: list[str] | None = None,
subperiod_size: timedelta = timedelta(days=1),
return_type: Literal["Availability", "Amounts"] = "Availability",
**kwargs,
) -> DataFrame:
"""Gets tracker Availability (misaligned) and not communicating for the specified objects and period. Both values are in percentage of time.
It will only consider values during the day based on the site location defined in Bazefield.
Parameters
----------
period : DateTimeRange
Period of the analysis. It will be forced to start at 00:00:00 and end at 23:59:59.
object_names : list[str] | None, optional
Names of the desired objects (tracker motors). If set to None all will be considered. By default None.
subperiod_size : timedelta, optional
Size of the subperiod to get availability. For exemple, if the period lasts 1 week and subperiod is 1 day, the result will contain availability for each day inside the period and also for the whole period.
By default timedelta(days=1)
return_type : Literal["Availability", "Amounts"], optional
Type of the return. "Availability" will return the percentage of time the tracker was misaligned or not communicating. "Amounts" will return the amount of time (in seconds) the tracker was misaligned or not communicating.
By default "Availability".
points_request_interval : timedelta, optional
Length of the subperiod to get the points data. By default timedelta(days=365).
Returns
-------
DataFrame
DataFrame containing the availability for each object and subperiod.
Index:
- It's index will be the start of each subperiod.
Columns: MultiIndex with the following levels:
- object_name: The name of the object.
- quantity: The quantity being analyzed. It can be "Misaligned", "NotCommunicating" or "Available".
Values:
- If return_type == "Availability", returns the percentage of time the tracker was misaligned, not communicating or available during the subperiod.
- If return_type == "Amounts", returns the amount of time (in seconds) the tracker was misaligned, not communicating or available during the subperiod.
"""
if not subperiod_size > timedelta(0):
raise ValueError("subperiod_size must be greater than 0.")
# converting period to start at 00:00:00 and end at 23:59:59
period.start = period.start.replace(hour=0, minute=0, second=0, microsecond=0)
period.end = period.end.replace(hour=23, minute=59, second=59, microsecond=0)
# getting objects
objects = self.baze.objects.instances.get(object_names=object_names, object_types=["Tracker Motor"], output_type="DataFrame")
# checking if objects were found
if len(objects) == 0:
raise ValueError("No objects found with the specified names.")
# getting parent objects
parent_objs = objects["attributes.parentId"].unique().tolist()
# getting the parent objects
parents = self.baze.objects.instances.get(object_ids=parent_objs, output_type="DataFrame")
# checking if any parent objects do not have latitude and longitude defined
wrong_parents = parents[parents["attributes.latitude"].isna() | parents["attributes.longitude"].isna()]
if len(wrong_parents) > 0:
raise ValueError(f"Parent objects {wrong_parents.index.tolist()} do not have latitude or longitude defined.")
# getting the parent coordinates as a dict to use for each tracker
latitudes = parents.set_index("objectId")["attributes.latitude"].to_dict()
longitudes = parents.set_index("objectId")["attributes.longitude"].to_dict()
# adding the coordinates to the objects
objects["latitude"] = objects["attributes.parentId"].map(latitudes)
objects["longitude"] = objects["attributes.parentId"].map(longitudes)
# getting the needed data
tags_5min = [
"TrackerMisaligned_5min.REP",
] # "PositionActual_5min.AVG" is not used as we commented out its usage below for evaluation
df_5min = self.baze.points.values.series.get(
points=dict.fromkeys(objects.index.tolist(), tags_5min),
period=period,
output_type="DataFrame",
aggregation="Raw",
round_timestamps={"freq": timedelta(minutes=5), "tolerance": timedelta(minutes=2)},
request_interval=kwargs.get("points_request_interval", timedelta(days=365)),
)
df_5min = df_5min.rename(columns={"TrackerMisaligned_5min.REP": "TrackerMisaligned"})
tags_realtime = ["CommunicationState"]
df_realtime = self.baze.points.values.series.get(
points=dict.fromkeys(objects.index.tolist(), tags_realtime),
period=period,
output_type="DataFrame",
aggregation="TimeAverage",
aggregation_interval=timedelta(minutes=5),
reindex="5min",
request_interval=kwargs.get("points_request_interval", timedelta(days=365)),
)
# TODO : Use communication state 5min when available instead of the realtime one
# merging both DataFrames
df = df_5min.merge(df_realtime, left_index=True, right_index=True)
# creating DataFrame with same timestamps as df to set a is_day column (must have two levels of columns: object_name, quantity)
isday_df = DataFrame(index=df.index, columns=MultiIndex.from_product([df.columns.get_level_values(0).unique(), ["is_day"]]))
# calculating if it is day or night based on each object location
for obj in objects.index:
isday_df[obj, "is_day"] = is_day(df.index, objects.loc[obj, "latitude"], objects.loc[obj, "longitude"])
# merging the is_day column with the df
df = df.merge(isday_df, left_index=True, right_index=True)
df.columns.names = ["object_name", "quantity"]
df.index.name = "time"
# melting the DataFrame to have index as object and time, columns will be tags and is_day
df = df.melt(ignore_index=False).reset_index(drop=False).pivot(columns=["quantity"], index=["object_name", "time"], values="value")
# converting data types
df = df.astype({"CommunicationState": "Float64", "TrackerMisaligned": "Float64", "is_day": "bool"})
# dropping when at night
df = df[df["is_day"]].copy()
# filling CommunicationState with 0 when missing
df["CommunicationState"] = df["CommunicationState"].fillna(0.0)
# clipping CommunicationState values to be between 0 and 1
df["CommunicationState"] = df["CommunicationState"].clip(0.0, 1.0)
# logging wrong values in TrackerMisaligned
wrong_rows = df[~df["TrackerMisaligned"].between(0.0, 1.0)]
if len(wrong_rows) > 0:
logger.warning(f"Wrong values in TrackerMisaligned:\n{wrong_rows[['TrackerMisaligned']]}")
# forcing CommunicationState to be 1 when TrackerMisaligned is lower than 0 or higher than 1
# this is done to consider wrong values in TrackerMisaligned as a loss of communication
df["CommunicationState"] = df["CommunicationState"].where((df["TrackerMisaligned"] >= 0.0) & (df["TrackerMisaligned"] <= 1.0), 1.0)
# defining TrackerMisaligned as 0 when CommunicationState > 0
# this is done so that the availability is not counted when the tracker has communication problems
df["TrackerMisaligned"] = df["TrackerMisaligned"].where(df["CommunicationState"] == 0.0, 0.0)
# creating an "Available" column to calculate the availability
df["Available"] = 1.0 - df["TrackerMisaligned"] - df["CommunicationState"]
# forcing CommunicationState to be 0 when PositionActual is lower than -45 or higher than 45 for RBG trackers
# ? This was commented out to evaluate if it's really needed, as it masks the real communication issues
# mask = df.index.get_level_values(0).str.startswith("RBG") & ((df["PositionActual"] < -45) | (df["PositionActual"] > 45))
# df.loc[mask, "CommunicationState"] = 0
# dropping unwanted columns
df = df.drop(columns=["is_day"])
# melting to have columns names as one column called "points" and values as "value"
df = df.reset_index(drop=False).melt(id_vars=["object_name", "time"], var_name="quantity", value_name="value")
# changing quantity for better naming
df["quantity"] = df["quantity"].replace({"CommunicationState": "NotCommunicating", "TrackerMisaligned": "Misaligned"})
# pivoting again to have object_name and quantity as column levels
df = df.reset_index(drop=False).pivot(columns=["object_name", "quantity"], index="time", values="value")
if return_type == "Availability":
# resampling to subperiod_size per object
df = df.resample(subperiod_size, origin="start_day").mean()
# site total
site_avg = df.T.groupby(level=1).mean().T
site_avg.columns = MultiIndex.from_product([["Total"], site_avg.columns])
df = df.join(site_avg)
elif return_type == "Amounts":
# converting values to seconds in each interval
df = df * 300.0
# resampling to subperiod_size per object
df = df.resample(subperiod_size, origin="start_day").sum()
# site total
site_avg = df.T.groupby(level=1).sum().T
site_avg.columns = MultiIndex.from_product([["Total"], site_avg.columns])
df = df.join(site_avg)
else:
raise ValueError(f'return_type must be "Availability" or "Amounts". Got "{return_type}".')
df.columns.names = ["object_name", "quantity"]
return df