Skip to content

Commit

Permalink
Add option to unexport unavailable metrics
Browse files Browse the repository at this point in the history
When sensors go offline, this component will continue to report its
last value, until Home Assistant itself restarts, or the sensor
returns.

The `entity_available` metric is used to filter out unavailable
metrics, but this is slow with current versions of prometheus
(see prometheus/prometheus#9577).

This new option will automatically withdraw metrics when the entity
becomes unavailable, which matches the behavior on restart and
makes it easier to see missing metrics without using an `unless`.
  • Loading branch information
agoode committed Sep 8, 2024
1 parent 7e7a6e4 commit be744a1
Show file tree
Hide file tree
Showing 2 changed files with 145 additions and 7 deletions.
12 changes: 12 additions & 0 deletions homeassistant/components/prometheus/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@
CONF_COMPONENT_CONFIG_GLOB = "component_config_glob"
CONF_COMPONENT_CONFIG_DOMAIN = "component_config_domain"
CONF_DEFAULT_METRIC = "default_metric"
CONF_EXPORT_UNAVAILABLE_METRICS = "export_unavailable_metrics"
CONF_OVERRIDE_METRIC = "override_metric"
COMPONENT_CONFIG_SCHEMA_ENTRY = vol.Schema(
{vol.Optional(CONF_OVERRIDE_METRIC): cv.string}
Expand All @@ -109,6 +110,7 @@
vol.Optional(CONF_PROM_NAMESPACE, default=DEFAULT_NAMESPACE): cv.string,
vol.Optional(CONF_REQUIRES_AUTH, default=True): cv.boolean,
vol.Optional(CONF_DEFAULT_METRIC): cv.string,
vol.Optional(CONF_EXPORT_UNAVAILABLE_METRICS, default=True): cv.boolean,
vol.Optional(CONF_OVERRIDE_METRIC): cv.string,
vol.Optional(CONF_COMPONENT_CONFIG, default={}): vol.Schema(
{cv.entity_id: COMPONENT_CONFIG_SCHEMA_ENTRY}
Expand Down Expand Up @@ -136,6 +138,7 @@ def setup(hass: HomeAssistant, config: ConfigType) -> bool:
climate_units = hass.config.units.temperature_unit
override_metric: str | None = conf.get(CONF_OVERRIDE_METRIC)
default_metric: str | None = conf.get(CONF_DEFAULT_METRIC)
export_unavailable_metrics: bool = conf[CONF_EXPORT_UNAVAILABLE_METRICS]
component_config = EntityValues(
conf[CONF_COMPONENT_CONFIG],
conf[CONF_COMPONENT_CONFIG_DOMAIN],
Expand All @@ -149,6 +152,7 @@ def setup(hass: HomeAssistant, config: ConfigType) -> bool:
component_config,
override_metric,
default_metric,
export_unavailable_metrics,
)

hass.bus.listen(EVENT_STATE_CHANGED, metrics.handle_state_changed_event)
Expand All @@ -175,11 +179,13 @@ def __init__(
component_config: EntityValues,
override_metric: str | None,
default_metric: str | None,
export_unavailable_metrics: bool,
) -> None:
"""Initialize Prometheus Metrics."""
self._component_config = component_config
self._override_metric = override_metric
self._default_metric = default_metric
self._export_unavailable_metrics = export_unavailable_metrics
self._filter = entity_filter
self._sensor_metric_handlers: list[
Callable[[State, str | None], str | None]
Expand Down Expand Up @@ -223,6 +229,12 @@ def handle_state(self, state: State) -> None:

ignored_states = (STATE_UNAVAILABLE, STATE_UNKNOWN)

if state.state in ignored_states and not self._export_unavailable_metrics:
self._remove_labelsets(
state.entity_id, state.attributes.get(ATTR_FRIENDLY_NAME)
)
return

handler = f"_handle_{domain}"

if hasattr(self, handler) and state.state not in ignored_states:
Expand Down
140 changes: 133 additions & 7 deletions tests/components/prometheus/test_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
STATE_OPEN,
STATE_OPENING,
STATE_UNAVAILABLE,
STATE_UNKNOWN,
STATE_UNLOCKED,
UnitOfEnergy,
UnitOfTemperature,
Expand Down Expand Up @@ -103,11 +104,7 @@ async def setup_prometheus_client(
namespace: str,
):
"""Initialize an hass_client with Prometheus component."""
# Reset registry
prometheus_client.REGISTRY = prometheus_client.CollectorRegistry(auto_describe=True)
prometheus_client.ProcessCollector(registry=prometheus_client.REGISTRY)
prometheus_client.PlatformCollector(registry=prometheus_client.REGISTRY)
prometheus_client.GCCollector(registry=prometheus_client.REGISTRY)
reset_prometheus_registry()

config = {}
if namespace is not None:
Expand Down Expand Up @@ -1165,13 +1162,15 @@ async def test_disabling_entity(


@pytest.mark.parametrize("namespace", [""])
@pytest.mark.parametrize("unavailable_state", [STATE_UNAVAILABLE, STATE_UNKNOWN])
async def test_entity_becomes_unavailable_with_export(
hass: HomeAssistant,
entity_registry: er.EntityRegistry,
client: ClientSessionGenerator,
sensor_entities: dict[str, er.RegistryEntry],
unavailable_state: str,
) -> None:
"""Test an entity that becomes unavailable is still exported."""
"""Test an entity that becomes unavailable/unknown is still exported."""
data = {**sensor_entities}

await hass.async_block_till_done()
Expand Down Expand Up @@ -1215,7 +1214,7 @@ async def test_entity_becomes_unavailable_with_export(

# Make sensor_1 unavailable.
set_state_with_entry(
hass, data["sensor_1"], STATE_UNAVAILABLE, data["sensor_1_attributes"]
hass, data["sensor_1"], unavailable_state, data["sensor_1_attributes"]
)

await hass.async_block_till_done()
Expand Down Expand Up @@ -1284,6 +1283,125 @@ async def test_entity_becomes_unavailable_with_export(
)


@pytest.mark.parametrize("unavailable_state", [STATE_UNAVAILABLE, STATE_UNKNOWN])
async def test_entity_becomes_unavailable_without_export(
hass: HomeAssistant,
hass_client: ClientSessionGenerator,
entity_registry: er.EntityRegistry,
sensor_entities: dict[str, er.RegistryEntry],
unavailable_state: str,
) -> None:
"""Test an entity that becomes unavailable/unknown is no longer exported."""
reset_prometheus_registry()
config = {
prometheus.CONF_PROM_NAMESPACE: "",
prometheus.CONF_EXPORT_UNAVAILABLE_METRICS: False,
}
assert await async_setup_component(
hass, prometheus.DOMAIN, {prometheus.DOMAIN: config}
)
await hass.async_block_till_done()
client = await hass_client()

data = {**sensor_entities}

await hass.async_block_till_done()
body = await generate_latest_metrics(client)

assert (
'sensor_temperature_celsius{domain="sensor",'
'entity="sensor.outside_temperature",'
'friendly_name="Outside Temperature"} 15.6' in body
)

assert (
'state_change_total{domain="sensor",'
'entity="sensor.outside_temperature",'
'friendly_name="Outside Temperature"} 1.0' in body
)

assert (
'entity_available{domain="sensor",'
'entity="sensor.outside_temperature",'
'friendly_name="Outside Temperature"} 1.0' in body
)

assert (
'sensor_humidity_percent{domain="sensor",'
'entity="sensor.outside_humidity",'
'friendly_name="Outside Humidity"} 54.0' in body
)

assert (
'state_change_total{domain="sensor",'
'entity="sensor.outside_humidity",'
'friendly_name="Outside Humidity"} 1.0' in body
)

assert (
'entity_available{domain="sensor",'
'entity="sensor.outside_humidity",'
'friendly_name="Outside Humidity"} 1.0' in body
)

# Make sensor_1 unavailable/unknown.
set_state_with_entry(
hass, data["sensor_1"], unavailable_state, data["sensor_1_attributes"]
)

await hass.async_block_till_done()
body = await generate_latest_metrics(client)

# Check if unavailable metrics are deleted.
body_line = "\n".join(body)
assert 'entity="sensor.outside_temperature"' not in body_line
assert 'friendly_name="Outside Temperature"' not in body_line

# Verify that the other sensor is unchanged.
assert (
'sensor_humidity_percent{domain="sensor",'
'entity="sensor.outside_humidity",'
'friendly_name="Outside Humidity"} 54.0' in body
)

assert (
'state_change_total{domain="sensor",'
'entity="sensor.outside_humidity",'
'friendly_name="Outside Humidity"} 1.0' in body
)

assert (
'entity_available{domain="sensor",'
'entity="sensor.outside_humidity",'
'friendly_name="Outside Humidity"} 1.0' in body
)

# Bring sensor_1 back and check that it returned.
set_state_with_entry(hass, data["sensor_1"], 201.0, data["sensor_1_attributes"])

await hass.async_block_till_done()
body = await generate_latest_metrics(client)

assert (
'sensor_temperature_celsius{domain="sensor",'
'entity="sensor.outside_temperature",'
'friendly_name="Outside Temperature"} 201.0' in body
)

# state_change is reset in this configuration.
assert (
'state_change_total{domain="sensor",'
'entity="sensor.outside_temperature",'
'friendly_name="Outside Temperature"} 1.0' in body
)

assert (
'entity_available{domain="sensor",'
'entity="sensor.outside_temperature",'
'friendly_name="Outside Temperature"} 1.0' in body
)


@pytest.fixture(name="sensor_entities")
async def sensor_fixture(
hass: HomeAssistant, entity_registry: er.EntityRegistry
Expand Down Expand Up @@ -2109,6 +2227,14 @@ def set_state_with_entry(
)


def reset_prometheus_registry() -> None:
"""Reset the prometheus registry."""
prometheus_client.REGISTRY = prometheus_client.CollectorRegistry(auto_describe=True)
prometheus_client.ProcessCollector(registry=prometheus_client.REGISTRY)
prometheus_client.PlatformCollector(registry=prometheus_client.REGISTRY)
prometheus_client.GCCollector(registry=prometheus_client.REGISTRY)


@pytest.fixture(name="mock_client")
def mock_client_fixture():
"""Mock the prometheus client."""
Expand Down

0 comments on commit be744a1

Please sign in to comment.