add/correct a bunch of metrics
This commit is contained in:
parent
ff206c24cb
commit
4b7683e9d9
60
main.py
60
main.py
@ -14,7 +14,7 @@ from targets.fan import *
|
|||||||
from targets.cpu import *
|
from targets.cpu import *
|
||||||
from targets.drive import *
|
from targets.drive import *
|
||||||
from targets.memory import *
|
from targets.memory import *
|
||||||
import targets.power
|
from targets.power import *
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
@ -120,10 +120,45 @@ class BulkCollector(Collector):
|
|||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
|
||||||
def get_power_draw() -> float:
|
class PowerCollector(Collector):
|
||||||
verbose('collecting ilo_server_power_draw')
|
def collect(self) -> float:
|
||||||
val = snmp_get(config, targets.power.POWER_METER_READING)
|
verbose('collecting ilo_server_power_draw')
|
||||||
return val
|
try:
|
||||||
|
reading = snmp_get(config, POWER_METER_READING)
|
||||||
|
support = snmp_get(config, POWER_METER_SUPPORT)
|
||||||
|
status = snmp_get(config, POWER_METER_STATUS)
|
||||||
|
|
||||||
|
if not isinstance(reading, int):
|
||||||
|
print('expected power meter reading to be an int, got', type(reading))
|
||||||
|
print('value in question:', reading)
|
||||||
|
reading = -1
|
||||||
|
if not isinstance(support, int):
|
||||||
|
print('expected power meter support to be an int, got', type(support))
|
||||||
|
print('value in question:', support)
|
||||||
|
support = 1
|
||||||
|
if not isinstance(status, int):
|
||||||
|
print('expected power meter status to be an int, got', type(status))
|
||||||
|
print('value in question:', status)
|
||||||
|
status = 1
|
||||||
|
|
||||||
|
if support not in POWER_METER_SUPPORT_MAP:
|
||||||
|
print('ILO returned a value outside of the expected range for POWER_METER_SUPPORT:', support)
|
||||||
|
support_s = 'unknown'
|
||||||
|
else:
|
||||||
|
support_s = POWER_METER_SUPPORT_MAP[support]
|
||||||
|
if status not in POWER_METER_STATUS_MAP:
|
||||||
|
print('ILO returned a value outside of the expected range for POWER_METER_STATUS:', status)
|
||||||
|
status_s = 'unknown'
|
||||||
|
else:
|
||||||
|
status_s = POWER_METER_STATUS_MAP[status]
|
||||||
|
|
||||||
|
metric = GaugeMetricFamily('ilo_server_power_draw', 'Power draw of the server in watts', labels=['support', 'status'])
|
||||||
|
metric.add_metric([support_s, status_s], reading)
|
||||||
|
yield metric
|
||||||
|
except Exception as e:
|
||||||
|
print('Failed to scan SNMP, aborting collection')
|
||||||
|
SCAN_FAIL_COUNTER.inc()
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
@ -135,8 +170,7 @@ if __name__ == '__main__':
|
|||||||
ContextData(),
|
ContextData(),
|
||||||
)
|
)
|
||||||
|
|
||||||
power = Gauge("ilo_server_power_draw", "Power draw of the server in watts")
|
REGISTRY.register(PowerCollector())
|
||||||
power.set_function(get_power_draw)
|
|
||||||
|
|
||||||
no_value = BulkDummyValue('info')
|
no_value = BulkDummyValue('info')
|
||||||
|
|
||||||
@ -176,12 +210,12 @@ if __name__ == '__main__':
|
|||||||
DRIVE_INDEX,
|
DRIVE_INDEX,
|
||||||
'drive',
|
'drive',
|
||||||
not args.scan_drives_once,
|
not args.scan_drives_once,
|
||||||
('Information about installed drives', no_value, [DRIVE_PORT, DRIVE_BOX, DRIVE_BAY, DRIVE_VENDOR, DRIVE_MODEL, DRIVE_SERIAL, DRIVE_LINK_RATE, DRIVE_STATUS, DRIVE_CONDITION]),
|
('Information about installed drives', no_value, [DRIVE_PORT, DRIVE_BOX, DRIVE_BAY, DRIVE_VENDOR, DRIVE_SERIAL, DRIVE_FIRMWARE, DRIVE_LINK_RATE, DRIVE_SUPPORTS_PREDICTIVE_FAILURE_MONITORING, DRIVE_SMART_STATUS, DRIVE_MEDIA_TYPE, DRIVE_ROTATIONAL_SPEED, DRIVE_STATUS, DRIVE_CONDITION]),
|
||||||
('Sizes of installed drives in megabytes', DRIVE_SIZE, [DRIVE_PORT, DRIVE_BOX, DRIVE_BAY, DRIVE_VENDOR, DRIVE_MODEL, DRIVE_SERIAL]),
|
('Sizes of installed drives in megabytes', DRIVE_SIZE, [DRIVE_PORT, DRIVE_BOX, DRIVE_BAY, DRIVE_VENDOR, DRIVE_SERIAL]),
|
||||||
('Temperatures of installed drives in celsius', DRIVE_TEMP, [DRIVE_PORT, DRIVE_BOX, DRIVE_BAY, DRIVE_VENDOR, DRIVE_MODEL, DRIVE_SERIAL]),
|
('Temperatures of installed drives in celsius', DRIVE_TEMP, [DRIVE_PORT, DRIVE_BOX, DRIVE_BAY, DRIVE_VENDOR, DRIVE_SERIAL]),
|
||||||
('Temperature thresholds of installed drives in celsius', DRIVE_TEMP_THRESHOLD, [DRIVE_PORT, DRIVE_BOX, DRIVE_BAY, DRIVE_VENDOR, DRIVE_MODEL, DRIVE_SERIAL]),
|
('Temperature thresholds of installed drives in celsius', DRIVE_TEMP_THRESHOLD, [DRIVE_PORT, DRIVE_BOX, DRIVE_BAY, DRIVE_VENDOR, DRIVE_SERIAL]),
|
||||||
('Maximum temperatures of installed drives in celsius', DRIVE_TEMP_MAX, [DRIVE_PORT, DRIVE_BOX, DRIVE_BAY, DRIVE_VENDOR, DRIVE_MODEL, DRIVE_SERIAL]),
|
('Maximum temperatures of installed drives in celsius', DRIVE_TEMP_MAX, [DRIVE_PORT, DRIVE_BOX, DRIVE_BAY, DRIVE_VENDOR, DRIVE_SERIAL]),
|
||||||
('Reference time of installed drives in hours', DRIVE_REFERENCE_TIME, [DRIVE_PORT, DRIVE_BOX, DRIVE_BAY, DRIVE_VENDOR, DRIVE_MODEL, DRIVE_SERIAL]),
|
('Reference time of installed drives in hours', DRIVE_REFERENCE_TIME, [DRIVE_PORT, DRIVE_BOX, DRIVE_BAY, DRIVE_VENDOR, DRIVE_SERIAL]),
|
||||||
scan_method=scrape.detect_complex,
|
scan_method=scrape.detect_complex,
|
||||||
))
|
))
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@ from snmp_groups import BulkEnums, BulkNumbers, BulkStrings
|
|||||||
|
|
||||||
DRIVE_INDEX = '1.3.6.1.4.1.232.3.2.5.1.1.2'
|
DRIVE_INDEX = '1.3.6.1.4.1.232.3.2.5.1.1.2'
|
||||||
|
|
||||||
# controller index?
|
# controller index
|
||||||
# DRIVE_CONTROLLER = BulkNumbers(
|
# DRIVE_CONTROLLER = BulkNumbers(
|
||||||
# (lambda i: (1, 3, 6, 1, 4, 1, 232, 3, 2, 5, 1, 1, 1) + i),
|
# (lambda i: (1, 3, 6, 1, 4, 1, 232, 3, 2, 5, 1, 1, 1) + i),
|
||||||
# 'controller'
|
# 'controller'
|
||||||
@ -39,9 +39,9 @@ DRIVE_SERIAL = BulkStrings(
|
|||||||
'serial',
|
'serial',
|
||||||
)
|
)
|
||||||
|
|
||||||
DRIVE_MODEL = BulkStrings(
|
DRIVE_FIRMWARE = BulkStrings(
|
||||||
(lambda i: (1, 3, 6, 1, 4, 1, 232, 3, 2, 5, 1, 1, 4) + i),
|
(lambda i: (1, 3, 6, 1, 4, 1, 232, 3, 2, 5, 1, 1, 4) + i),
|
||||||
'model',
|
'firmware',
|
||||||
)
|
)
|
||||||
|
|
||||||
DRIVE_SIZE = BulkNumbers(
|
DRIVE_SIZE = BulkNumbers(
|
||||||
@ -107,3 +107,49 @@ DRIVE_REFERENCE_TIME = BulkNumbers(
|
|||||||
(lambda i: (1, 3, 6, 1, 4, 1, 232, 3, 2, 5, 1, 1, 9) + i),
|
(lambda i: (1, 3, 6, 1, 4, 1, 232, 3, 2, 5, 1, 1, 9) + i),
|
||||||
'reference_time'
|
'reference_time'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
DRIVE_SUPPORTS_PREDICTIVE_FAILURE_MONITORING = BulkEnums(
|
||||||
|
(lambda i: (1, 3, 6, 1, 4, 1, 232, 3, 2, 5, 1, 1, 52) + i),
|
||||||
|
'supports_predictive_failure_monitoring',
|
||||||
|
{
|
||||||
|
1: 'other',
|
||||||
|
2: 'notAvailable',
|
||||||
|
3: 'available'
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
DRIVE_SMART_STATUS = BulkEnums(
|
||||||
|
(lambda i: (1, 3, 6, 1, 4, 1, 232, 3, 2, 5, 1, 1, 57) + i),
|
||||||
|
'smart_status',
|
||||||
|
{
|
||||||
|
1: 'other',
|
||||||
|
2: 'ok',
|
||||||
|
3: 'replaceDrive'
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
DRIVE_ROTATIONAL_SPEED = BulkEnums(
|
||||||
|
(lambda i: (1, 3, 6, 1, 4, 1, 232, 3, 2, 5, 1, 1, 59) + i),
|
||||||
|
'rotational_speed',
|
||||||
|
{
|
||||||
|
1: 'other',
|
||||||
|
2: '7200 rpm',
|
||||||
|
3: '10k rpm',
|
||||||
|
4: '15k rpm',
|
||||||
|
5: 'ssd',
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
DRIVE_MEDIA_TYPE = BulkEnums(
|
||||||
|
(lambda i: (1, 3, 6, 1, 4, 1, 232, 3, 2, 5, 1, 1, 69) + i),
|
||||||
|
'media_type',
|
||||||
|
{
|
||||||
|
1: 'other',
|
||||||
|
2: 'rotatingPlatters',
|
||||||
|
3: 'solidState',
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# there appear to be a hell of a lot more, but I don't have the time to add them all right now
|
||||||
|
# here is a reference: https://oidref.com/1.3.6.1.4.1.232.3.2.5.1.1
|
||||||
|
@ -1,7 +1,15 @@
|
|||||||
|
|
||||||
POWER_METER_READING = '1.3.6.1.4.1.232.6.2.15.3.0'
|
POWER_METER_READING = '1.3.6.1.4.1.232.6.2.15.3.0'
|
||||||
|
POWER_METER_SUPPORT = '1.3.6.1.4.1.232.6.2.15.1.0'
|
||||||
|
POWER_METER_SUPPORT_MAP = {
|
||||||
|
1: 'other',
|
||||||
|
2: 'supported',
|
||||||
|
3: 'unsupported',
|
||||||
|
}
|
||||||
|
|
||||||
# I have no idea what these values mean (or map to). any help would be appreciated
|
POWER_METER_STATUS = '1.3.6.1.4.1.232.6.2.15.2.0'
|
||||||
# POWER_METER_SUPPORT = '1.3.6.1.4.1.232.6.2.15.1'
|
POWER_METER_STATUS_MAP = {
|
||||||
# POWER_METER_STATUS = '1.3.6.1.4.1.232.6.2.15.2'
|
1: 'other',
|
||||||
# POWER_METER_PREVIOUS_READING = '1.3.6.1.4.1.232.6.2.15.4'
|
2: 'present',
|
||||||
|
3: 'absent',
|
||||||
|
}
|
||||||
|
@ -49,8 +49,9 @@ TEMP_CONDITION = BulkEnums(
|
|||||||
'condition',
|
'condition',
|
||||||
{
|
{
|
||||||
1: 'other',
|
1: 'other',
|
||||||
2: 'normal',
|
2: 'ok',
|
||||||
3: 'high',
|
3: 'degraded',
|
||||||
|
4: 'failed',
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user