I had written a blog earlier on how to customise operation insights alarm.
In this blog we will see another sample code to customise generic alarm message using OCI function to get a short and crisp message like below
Severity : CRITICAL
Resource : <resourceDisplayName>
Resource ID : ocid1.instance.oc1…..ancd
Alarm Query and Value: {‘CpuUtilization[1m].mean()’: ‘70.08’}
import io
import oci
import json
def ons_publish(**kwargs):
try:
signer = oci.auth.signers.get_resource_principals_signer()
ons_client = oci.ons.NotificationDataPlaneClient({},signer=signer)
topic_id = kwargs.get('topic_id')
resource_display_name_func = kwargs.get('resource_display_name')
severity_func = kwargs.get('severity')
metric_value_func = kwargs.get('metric_value')
title_func = kwargs.get('title')
resource_id_func = kwargs.get('resource_id')
alarm_body = f'Severity : {severity_func}\nResource : {resource_display_name_func}\nResource ID : {resource_id_func}\nAlarm Query and Value: {metric_value_func}'
print("Publishing message to ONS topic")
ons_client.publish_message(
topic_id=topic_id,
message_details=oci.ons.models.MessageDetails(
body=alarm_body,
title=title_func))
except Exception as ons_exception:
print(ons_exception)
def handler(ctx, data: io.BytesIO = None):
cfg = dict(ctx.Config())
# fetch details from function config
topic_id = cfg['topic_id']
try:
body = json.loads(data.getvalue())
alarm_type = body.get("type")
alarm_status = body['alarmMetaData'][0]['status']
#Look only for OK_TO_FIRING and REPEAT type alarm. FIRING_TO_OK is ignored
if alarm_type in ["OK_TO_FIRING",'REPEAT'] and alarm_status == "FIRING":
resource_id = body["alarmMetaData"][0]["dimensions"][0]["resourceId"]
severity = body.get("severity")
resource_display_name = body["alarmMetaData"][0]["dimensions"][0]["resourceDisplayName"]
metric_value = body["alarmMetaData"][0]["metricValues"][0]
title = body.get("title")
ons_publish(resource_display_name=resource_display_name, resource_id=resource_id, topic_id=topic_id,
severity=severity,
metric_value=metric_value, title=title)
else:
print("Alarm type is not in OK_TO_FIRING or REPEAT")
except (Exception, ValueError) as ex:
print(ex)
Deploy the function using Code Editor or Cloud Shell . Set the topic_id in function configuration once deployed.
To test the function is working fine create a sample alarm.json file in /tmp directory
{
"dedupeKey": "5db34-4f299401",
"title": "Testing Alarm",
"type": "REPEAT",
"severity": "CRITICAL",
"timestampEpochMillis": 1705743900000,
"timestamp": "2024-01-20T09:45:00Z",
"alarmMetaData": [
{
"id": "alarm ocid",
"status": "FIRING",
"severity": "CRITICAL",
"namespace": "oci_computeagent",
"query": "CpuUtilization[1m].mean() > 60",
"totalMetricsFiring": 1,
"dimensions": [
{
"instancePoolId": "Default",
"resourceDisplayName": "computevmname",
"faultDomain": "FAULT-DOMAIN-2",
"resourceId": "resource ocid",
"availabilityDomain": "NoEK:EU-FRANKFURT-1-AD-1",
"imageId": "image ocid",
"shape": "VM.Standard.E4.Flex",
"dedicatedVmHostId": "DefaultVmHostId",
"region": "eu-frankfurt-1"
}
],
"alarmUrl": "https://cloud.oracle.com/monitoring/alarms",
"alarmSummary": "Alarm \"Testing Alarm\" is in a \"FIRING\" state; because the resources with dimensions listed below meet the trigger rule: \"CpuUtilization[1m].mean() > 60\", with a trigger delay of 1 minute",
"metricValues": [
{
"CpuUtilization[1m].mean()": "70.08"
}
]
}
],
"notificationType": "Split messages per metric stream",
"version": 1.5
}
In the Cloud Shell or Code Editor invoke the function using the command
cat /tmp/alarm.json | fn invoke <application> <functionname>