import math import time import requests import socket from killswitch import KillSwitch from timber import LumberJack, axe from config import _CLIENT_CONFIG as _c lj = LumberJack(log_file='power_client.log', log_level='d') class PowerClient: def __init__(self): self.DRYRUN = _c['dryrun'] self.RETRY_ATTEMPTS = _c['retry']['attempts'] self.RETRY_COUNT = 0 self.config_check_count = 0 self.HEALTHY_RETRY_SECONDS = _c['retry']['seconds']['healthy'] self.UNHEALTHY_RETRY_SECONDS = _c['retry']['seconds']['unhealthy'] self.RETRY_SECONDS = self.HEALTHY_RETRY_SECONDS self.config_check_cycles = _c['config_check']['cycles'] self.power_server = _c['server']['address'] self.server_port = _c['server']['port'] self.api_base_endpoint = _c['api']['base_endpoint'] self.hostname = self.get_hostname() self.api_config_endpoint = self.get_api_config_endpoint() self.config_url = self.get_config_url() self.url = self.get_url() # check for new config at init self.update_config() def get_config(self): c = requests.get(self.config_url) if c.status_code == 200: return c.json() # return json.loads(c.json()) else: return None def update_config(self): axe.info('Checking for new configuration') c = self.get_config() if c: if self.DRYRUN != c['dryrun']: axe.info(f'New DRYRUN configuration: {c["dryrun"]}') self.DRYRUN = c['dryrun'] if self.RETRY_ATTEMPTS != c['retry']['attempts']: axe.info(f"New RETRY_ATTEMPTS configuration: {c['retry']['attempts']}") self.RETRY_ATTEMPTS = c['retry']['attempts'] if self.HEALTHY_RETRY_SECONDS != c['retry']['seconds']['healthy']: axe.info(f"New HEALTHY_RETRY_SECONDS configuration: {c['retry']['seconds']['healthy']}") self.HEALTHY_RETRY_SECONDS = c['retry']['seconds']['healthy'] self.RETRY_SECONDS = self.HEALTHY_RETRY_SECONDS if self.UNHEALTHY_RETRY_SECONDS != c['retry']['seconds']['unhealthy']: axe.info(f"New UNHEALTHY_RETRY_SECONDS configuration: {c['retry']['seconds']['unhealthy']}") self.UNHEALTHY_RETRY_SECONDS = c['retry']['seconds']['unhealthy'] if self.config_check_cycles != c['config_check']['cycles']: axe.info(f"New config_check_cycles configuration: {c['config_check']['cycles']}") self.config_check_cycles = c['config_check']['cycles'] if self.power_server != c['server']['address']: axe.info(f"New power_server configuration: {c['server']['address']}") self.power_server = c['server']['address'] self.set_config_url() self.set_url() if self.server_port != c['server']['port']: axe.info(f"New server_port configuration: {c['server']['port']}") self.server_port = c['server']['port'] self.set_config_url() self.set_url() if self.api_base_endpoint != c['api']['base_endpoint']: axe.info(f'New base api endpoint: {c["api"]["base_endpoint"]}') self.api_base_endpoint = c['api']['base_endpoint'] self.set_api_config_endpoint() self.set_config_url() self.set_url() else: axe.error(f'Unable to get to config endpoint: {self.config_url}') def get_api_config_endpoint(self): return f'{self.api_base_endpoint}/_CONFIG/client/{self.hostname}' def get_config_url(self): return f'http://{self.power_server}:{self.server_port}/{self.api_config_endpoint}' def get_url(self): return f'http://{self.power_server}:{self.server_port}/{self.api_base_endpoint}/{self.hostname}' def set_api_config_endpoint(self): self.api_config_endpoint = self.get_api_config_endpoint() self.set_config_url() def set_config_url(self): self.config_url = self.get_config_url() def set_url(self): self.url = self.get_url() @staticmethod def get_hostname(): try: hn = socket.gethostname() except Exception as e: hn = None axe.error(f"Could not get hostname because an error occurred:\n{e}") return hn def get_status_code(self): return requests.get(self.url) def check_power(self): axe.debug(f'Checking {self.url} for power with {self.RETRY_ATTEMPTS} retry attempts, DRYRUN={self.DRYRUN}') while True: self.config_check_count += 1 self.RETRY_COUNT += 1 response = self.get_status_code() remaining_tries = self.RETRY_ATTEMPTS - self.RETRY_COUNT if self.config_check_cycles == self.config_check_count: self.update_config() self.config_check_count = 0 if response.ok and response.status_code == 200: if self.RETRY_COUNT == 1: axe.info(f'Power is up') else: axe.info(f'Power is back up after {self.RETRY_COUNT} checks') self.RETRY_COUNT = 0 self.RETRY_SECONDS = self.HEALTHY_RETRY_SECONDS else: self.RETRY_SECONDS = self.UNHEALTHY_RETRY_SECONDS if self.RETRY_COUNT < int(math.floor(self.RETRY_ATTEMPTS / 2)): axe.warning(f'Attempt {self.RETRY_COUNT} request failed, power may be down. \ Will try {remaining_tries} more times before initiating shutdown.') elif self.RETRY_COUNT < self.RETRY_ATTEMPTS: axe.error(f'Attempt {self.RETRY_COUNT} requests failed, power appears down.\ Will try {remaining_tries} more times before initiating shutdown.') else: axe.critical(f'Attempt {self.RETRY_COUNT} requests failed, power appears down.\ Engaging Killswitch!') ks = KillSwitch(DRYRUN=self.DRYRUN) ks.power_down() break time.sleep(self.RETRY_SECONDS) if __name__ == "__main__": pc = PowerClient() pc.check_power()