Telegraf & InfluxDB2 alerting if number of processes from procstat changes

I am trying to build an alert if the number of instances of a process change. So for instance I have two tomcat instances running on a system and one stops, I would like an alert to be sent.

I have this:
from(bucket: “telegraf”)
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r[“_measurement”] == “procstat”)
|> filter(fn: (r) => r[“_field”] == “pid”)
|> filter(fn: (r) => r[“user”] == “tomcat”)
|> unique()
|> count()

And this is returning the number of processes, but how would I track and alert on a change?

Any ideas?

Thank you in advance!

Thank you,
Matt

Hi Matt, and welcome to the Grafana forum.

Can you clarify a few things for me…

  1. You said you have two tomcat instances running. Are these under the tag called “user”, and if so, shouldn’t you have something like this?
    |> filter(fn: (r) => r[“user”] == “tomcat1” or r[“user”] == “tomcat2”)

  2. You said that the query you wrote returns the number of processes. Does the field pid represent the process? Or does your query return the number of users?

It might be easiest if you just shared the output of your query.

I presume you are using Grafana with v9 or greater. If yes, you will need to use their Unified Alerting.

Hi Grant!

Thank you!

I am on v9.2.3.

Both instances are ran by the same user, tomcat.

Here is some sample data from executing the query in Grafana.

{
  "state": "Done",
  "series": [
    {
      "name": "procstat",
      "refId": "A",
      "meta": {
        "executedQueryString": "from(bucket: \"telegraf\")\n  |> range(start: 2022-11-10T12:16:10.495Z, stop: 2022-11-10T13:16:10.495Z)\n  |> filter(fn: (r) => r[\"_measurement\"] == \"procstat\")\n  |> filter(fn: (r) => r[\"_field\"] == \"pid\")\n  |> filter(fn: (r) => r[\"user\"] == \"tomcat\")\n  |> unique()\n  |> count()"
      },
      "fields": [
        {
          "name": "_value",
          "type": "number",
          "typeInfo": {
            "frame": "int64",
            "nullable": true
          },
          "labels": {
            "_field": "pid",
            "_start": "2022-11-10 12:16:10.495 +0000 UTC",
            "_stop": "2022-11-10 13:16:10.495 +0000 UTC",
            "host": "[host_1]",
            "process_name": "java",
            "user": "tomcat"
          },
          "config": {},
          "values": [
            2
          ],
          "entities": {},
          "state": {
            "displayName": "_value {_field=\"pid\", _start=\"2022-11-10 12:16:10.495 +0000 UTC\", _stop=\"2022-11-10 13:16:10.495 +0000 UTC\", host=\"[host_1]\", process_name=\"java\", user=\"tomcat\"}",
            "multipleFrames": true
          }
        }
      ],
      "length": 1
    },
    {
      "name": "procstat",
      "refId": "A",
      "fields": [
        {
          "name": "_value",
          "type": "number",
          "typeInfo": {
            "frame": "int64",
            "nullable": true
          },
          "labels": {
            "_field": "pid",
            "_start": "2022-11-10 12:16:10.495 +0000 UTC",
            "_stop": "2022-11-10 13:16:10.495 +0000 UTC",
            "host": "[host_2]",
            "process_name": "java",
            "user": "tomcat"
          },
          "config": {},
          "values": [
            2
          ],
          "entities": {},
          "state": {
            "displayName": "_value {_field=\"pid\", _start=\"2022-11-10 12:16:10.495 +0000 UTC\", _stop=\"2022-11-10 13:16:10.495 +0000 UTC\", host=\"[host_2]\", process_name=\"java\", user=\"tomcat\"}",
            "multipleFrames": true
          }
        }
      ],
      "length": 1
    }
  ],
  "annotations": [],
  "request": {
    "app": "explore",
    "dashboardId": 0,
    "timezone": "browser",
    "startTime": 1668086170503,
    "interval": "2s",
    "intervalMs": 2000,
    "panelId": "Q-c7d53391-d72a-4936-bc7b-15311a7c55ef-0",
    "targets": [
      {
        "refId": "A",
        "key": "Q-c7d53391-d72a-4936-bc7b-15311a7c55ef-0",
        "datasource": {
          "type": "influxdb",
          "uid": "h5nw1XeGz"
        },
        "query": "from(bucket: \"telegraf\")\n  |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n  |> filter(fn: (r) => r[\"_measurement\"] == \"procstat\")\n  |> filter(fn: (r) => r[\"_field\"] == \"pid\")\n  |> filter(fn: (r) => r[\"user\"] == \"tomcat\")\n  |> unique()\n  |> count()"
      }
    ],
    "range": {
      "from": "2022-11-10T12:16:10.495Z",
      "to": "2022-11-10T13:16:10.495Z",
      "raw": {
        "from": "now-1h",
        "to": "now"
      }
    },
    "requestId": "explore_left",
    "rangeRaw": {
      "from": "now-1h",
      "to": "now"
    },
    "scopedVars": {
      "__interval": {
        "text": "2s",
        "value": "2s"
      },
      "__interval_ms": {
        "text": 2000,
        "value": 2000
      }
    },
    "maxDataPoints": 1863,
    "liveStreaming": false,
    "endTime": 1668086170686
  },
  "timeRange": {
    "from": "2022-11-10T12:16:10.495Z",
    "to": "2022-11-10T13:16:10.495Z",
    "raw": {
      "from": "now-1h",
      "to": "now"
    }
  },
  "timings": {
    "dataProcessingTime": 0
  },
  "graphFrames": [],
  "tableFrames": [
    {
      "name": "procstat",
      "refId": "A",
      "meta": {
        "executedQueryString": "from(bucket: \"telegraf\")\n  |> range(start: 2022-11-10T12:16:10.495Z, stop: 2022-11-10T13:16:10.495Z)\n  |> filter(fn: (r) => r[\"_measurement\"] == \"procstat\")\n  |> filter(fn: (r) => r[\"_field\"] == \"pid\")\n  |> filter(fn: (r) => r[\"user\"] == \"tomcat\")\n  |> unique()\n  |> count()"
      },
      "fields": [
        {
          "name": "_value",
          "type": "number",
          "typeInfo": {
            "frame": "int64",
            "nullable": true
          },
          "labels": {
            "_field": "pid",
            "_start": "2022-11-10 12:16:10.495 +0000 UTC",
            "_stop": "2022-11-10 13:16:10.495 +0000 UTC",
            "host": "[host_1]",
            "process_name": "java",
            "user": "tomcat"
          },
          "config": {},
          "values": [
            2
          ],
          "entities": {},
          "state": {
            "displayName": "_value {_field=\"pid\", _start=\"2022-11-10 12:16:10.495 +0000 UTC\", _stop=\"2022-11-10 13:16:10.495 +0000 UTC\", host=\"[host_1]\", process_name=\"java\", user=\"tomcat\"}",
            "multipleFrames": true
          }
        }
      ],
      "length": 1
    },
    {
      "name": "procstat",
      "refId": "A",
      "fields": [
        {
          "name": "_value",
          "type": "number",
          "typeInfo": {
            "frame": "int64",
            "nullable": true
          },
          "labels": {
            "_field": "pid",
            "_start": "2022-11-10 12:16:10.495 +0000 UTC",
            "_stop": "2022-11-10 13:16:10.495 +0000 UTC",
            "host": "[host_2]",
            "process_name": "java",
            "user": "tomcat"
          },
          "config": {},
          "values": [
            2
          ],
          "entities": {},
          "state": {
            "displayName": "_value {_field=\"pid\", _start=\"2022-11-10 12:16:10.495 +0000 UTC\", _stop=\"2022-11-10 13:16:10.495 +0000 UTC\", host=\"[host_2]\", process_name=\"java\", user=\"tomcat\"}",
            "multipleFrames": true
          }
        }
      ],
      "length": 1
    }
  ],
  "logsFrames": [],
  "traceFrames": [],
  "nodeGraphFrames": [],
  "graphResult": null,
  "tableResult": {
    "fields": [
      {
        "name": "_value",
        "type": "number",
        "config": {},
        "values": [
          2
        ]
      }
    ]
  },
  "logsResult": null
}

Thank you,
Matt