powerapi/power_client.py

159 lines
6.3 KiB
Python

import math
import time
import requests
import socket
from killswitch import KillSwitch
from timber import LumberJack, axe
from config import _CLIENT_CONFIG as _c
lj = LumberJack(log_file='power_client.log',
log_level='d')
class PowerClient:
def __init__(self):
self.DRYRUN = _c['dryrun']
self.RETRY_ATTEMPTS = _c['retry']['attempts']
self.RETRY_COUNT = 0
self.config_check_count = 0
self.HEALTHY_RETRY_SECONDS = _c['retry']['seconds']['healthy']
self.UNHEALTHY_RETRY_SECONDS = _c['retry']['seconds']['unhealthy']
self.RETRY_SECONDS = self.HEALTHY_RETRY_SECONDS
self.config_check_cycles = _c['config_check']['cycles']
self.power_server = _c['server']['address']
self.server_port = _c['server']['port']
self.api_base_endpoint = _c['api']['base_endpoint']
self.hostname = self.get_hostname()
self.api_config_endpoint = self.get_api_config_endpoint()
self.config_url = self.get_config_url()
self.url = self.get_url()
# check for new config at init
self.update_config()
def get_config(self):
c = requests.get(self.config_url)
if c.status_code == 200:
return c.json()
# return json.loads(c.json())
else:
return None
def update_config(self):
axe.info('Checking for new configuration')
c = self.get_config()
if c:
if self.DRYRUN != c['dryrun']:
axe.info(f'New DRYRUN configuration: {c["dryrun"]}')
self.DRYRUN = c['dryrun']
if self.RETRY_ATTEMPTS != c['retry']['attempts']:
axe.info(f"New RETRY_ATTEMPTS configuration: {c['retry']['attempts']}")
self.RETRY_ATTEMPTS = c['retry']['attempts']
if self.HEALTHY_RETRY_SECONDS != c['retry']['seconds']['healthy']:
axe.info(f"New HEALTHY_RETRY_SECONDS configuration: {c['retry']['seconds']['healthy']}")
self.HEALTHY_RETRY_SECONDS = c['retry']['seconds']['healthy']
self.RETRY_SECONDS = self.HEALTHY_RETRY_SECONDS
if self.UNHEALTHY_RETRY_SECONDS != c['retry']['seconds']['unhealthy']:
axe.info(f"New UNHEALTHY_RETRY_SECONDS configuration: {c['retry']['seconds']['unhealthy']}")
self.UNHEALTHY_RETRY_SECONDS = c['retry']['seconds']['unhealthy']
if self.config_check_cycles != c['config_check']['cycles']:
axe.info(f"New config_check_cycles configuration: {c['config_check']['cycles']}")
self.config_check_cycles = c['config_check']['cycles']
if self.power_server != c['server']['address']:
axe.info(f"New power_server configuration: {c['server']['address']}")
self.power_server = c['server']['address']
self.set_config_url()
self.set_url()
if self.server_port != c['server']['port']:
axe.info(f"New server_port configuration: {c['server']['port']}")
self.server_port = c['server']['port']
self.set_config_url()
self.set_url()
if self.api_base_endpoint != c['api']['base_endpoint']:
axe.info(f'New base api endpoint: {c["api"]["base_endpoint"]}')
self.api_base_endpoint = c['api']['base_endpoint']
self.set_api_config_endpoint()
self.set_config_url()
self.set_url()
else:
axe.error(f'Unable to get to config endpoint: {self.config_url}')
def get_api_config_endpoint(self):
return f'{self.api_base_endpoint}/_CONFIG/client/{self.hostname}'
def get_config_url(self):
return f'http://{self.power_server}:{self.server_port}/{self.api_config_endpoint}'
def get_url(self):
return f'http://{self.power_server}:{self.server_port}/{self.api_base_endpoint}/{self.hostname}'
def set_api_config_endpoint(self):
self.api_config_endpoint = self.get_api_config_endpoint()
self.set_config_url()
def set_config_url(self):
self.config_url = self.get_config_url()
def set_url(self):
self.url = self.get_url()
@staticmethod
def get_hostname():
try:
hn = socket.gethostname()
except Exception as e:
hn = None
axe.error(f"Could not get hostname because an error occurred:\n{e}")
return hn
def get_status_code(self):
return requests.get(self.url)
def check_power(self):
axe.debug(f'Checking {self.url} for power with {self.RETRY_ATTEMPTS} retry attempts, DRYRUN={self.DRYRUN}')
while True:
self.config_check_count += 1
self.RETRY_COUNT += 1
response = self.get_status_code()
remaining_tries = self.RETRY_ATTEMPTS - self.RETRY_COUNT
if self.config_check_cycles == self.config_check_count:
self.update_config()
self.config_check_count = 0
if response.ok and response.status_code == 200:
if self.RETRY_COUNT == 1:
axe.info(f'Power is up')
else:
axe.info(f'Power is back up after {self.RETRY_COUNT} checks')
self.RETRY_COUNT = 0
self.RETRY_SECONDS = self.HEALTHY_RETRY_SECONDS
else:
self.RETRY_SECONDS = self.UNHEALTHY_RETRY_SECONDS
if self.RETRY_COUNT < int(math.floor(self.RETRY_ATTEMPTS / 2)):
axe.warning(f'Attempt {self.RETRY_COUNT} request failed, power may be down. \
Will try {remaining_tries} more times before initiating shutdown.')
elif self.RETRY_COUNT < self.RETRY_ATTEMPTS:
axe.error(f'Attempt {self.RETRY_COUNT} requests failed, power appears down.\
Will try {remaining_tries} more times before initiating shutdown.')
else:
axe.critical(f'Attempt {self.RETRY_COUNT} requests failed, power appears down.\
Engaging Killswitch!')
ks = KillSwitch(DRYRUN=self.DRYRUN)
ks.power_down()
break
time.sleep(self.RETRY_SECONDS)
if __name__ == "__main__":
pc = PowerClient()
pc.check_power()