#!/usr/bin/python26 ### ### backmon.commands.check.dssu ### import sys import os import os.path import glob import re from optparse import OptionParser from guppy import hpy from ....lib import * from backup_monitoring.debug import * from backup_monitoring.math import * from backup_monitoring.parsing.parsers import bpstulist from backup_monitoring.parsing.parsers import df from backup_monitoring.parsing.parsers import dsu_ls_l from backup_monitoring.parsing.parsers import nbstlutil from backup_monitoring.parsing.parsers import nbdevquery from backup_monitoring.parsing.parsers import bpdbjobs usage = 'usage: %prog -e [environment] check dssu [dssu]' parser = OptionParser(usage=usage) parser.add_option('-u', '--updating', action='store_true', default=False, dest='updating', help='only include that are updating monitoring data') def run(args, kwargs): # # add kwargs to local namespace # for key in kwargs.keys(): if re.compile('^[A-Z][A-Z_]+$').match(key): exec(key + ' = kwargs[\'' + key + '\']') (options, args) = parser.parse_args(args) if len(args) == 0: ERROR('No storage unit specified!') sys.exit(3) STUNIT = args[0] STATUS = ExtendedDict() master_feeds = ['nbemmcmd_machinealias_getaliases', 'bpstulist', 'nbdevquery_listdv_stype_basicdisk', 'nbdevquery_listdv_stype_advanceddisk', 'nbdevquery_listdv_stype_puredisk', 'df', 'nbstlutil_list'] media_feeds = ['df', 'dsu_ls_l', ] try: environments = ENVIRONMENTS.values() hp = hpy() DEBUG('HEAPSIZE=%s' % (heapsize())) for environment in environments: environment.load_feeds(master=master_feeds, media=media_feeds) environment.parse_aliases() environment.parse_stunits() environment.parse_df_data() environment.parse_dsu_contents() environment.parse_lifecycle_images() environment.parse_disk_pools() DEBUG('HEAPSIZE=%s' % (heapsize())) active_jobs = {} for environment in environments: # # summarize other stunit statistics # for stunit in environment.stunits: state = ExtendedDict() server = environment.resolve_alias(stunit.host_connection) server = server.replace('-backup', '') if server in environment.updates: monitored = True else: monitored = False label = stunit.label # # DSSU specific # if stunit.storage_unit_type == 'Disk' and stunit.media_subtype == 'Basic' and stunit.stage_data == 'yes': path = stunit.path media_subtype = stunit.media_subtype disk_type = '' disk_pool = None # # metrics from nbdevquery # if( media_subtype == 'Basic' and label in environment.disk_pools): disk_pool = environment.disk_pools[label] if( media_subtype == 'DiskPool' and stunit.disk_pool in environment.disk_pools): disk_pool = environment.disk_pools[stunit.disk_pool] status = '' size = '' used = '' avail = '' pct_full = 100.0 pct_free = 0.0 written = '' jobs = '' images = '' staged = '' pct_staged = 0.0 pct_not_staged = 100.0 backlog = '' pct_usable = 0.0 pct_not_usable = 100.0 # # get dsu size and available capacity from df -k # if server in environment.df_data: while True: # # path matches a mountpoint # if path in environment.df_data[server]: fs = environment.df_data[server][path] fs_bytes_size = fs.kbytes * 1024.0 size = pp_bytes(fs_bytes_size) fs_bytes_used = fs.used * 1024.0 used = pp_bytes(fs_bytes_used) fs_bytes_avail = fs.avail * 1024.0 avail = pp_bytes(fs_bytes_avail) pct_full = fs.pct_full pct_free = 100.0 - pct_full break # # path is a top-level subdirectory of a mountpoint # head, tail = os.path.split(path) if head in environment.df_data[server]: fs = environment.df_data[server][head] fs_bytes_size = fs.kbytes * 1024.0 size = pp_bytes(fs_bytes_size) fs_bytes_used = fs.used * 1024.0 used = pp_bytes(fs_bytes_used) fs_bytes_avail = fs.avail * 1024.0 avail = pp_bytes(fs_bytes_avail) pct_full = fs.pct_full pct_free = 100.0 - pct_full break # # break if we cannot match stunit path to a mountpoint # break # # get size and capacity from nbdevquery if not available from df -k # elif disk_pool: size = pp_bytes(int(float(disk_pool.total_capacity_gb) * 1024.0 * 1024.0 * 1024.0)) used = pp_bytes(((float(disk_pool.total_capacity_gb) - float(disk_pool.free_space_gb)) * 1024.0 * 1024.0 * 1024.0)) avail = pp_bytes(int(float(disk_pool.free_space_gb) * 1024.0 * 1024.0 * 1024.0)) if( disk_pool.total_capacity_gb > 0.0 ): pct_full = (((float(disk_pool.total_capacity_gb) - float(disk_pool.free_space_gb)) / float(disk_pool.total_capacity_gb)) * 100.0) else: pct_full = disk_pool.use_pct pct_free = 100.0 - pct_full # # get number of images and amount staged # dsu_image_sizes = {} re_image_id = re.compile('^([a-zA-Z0-9\-_.]+_[0-9]+)_([a-zA-Z0-9\-_.]+)\.([a-zA-Z]+)$') # # get dsu usage from ls -l # if server in environment.dsu_contents: if path in environment.dsu_contents[server]: dir = environment.dsu_contents[server][path] ls_bytes_used = 0 for file in dir.files: ls_bytes_used += file.size match = re_image_id.match(file.filename) if match: image_id = match.group(1) if image_id not in dsu_image_sizes: dsu_image_sizes[image_id] = 0 dsu_image_sizes[image_id] += file.size written = pp_bytes(ls_bytes_used) image_count = 0 image_bytes = 0 staged_count = 0 staged_bytes = 0 backlog_count = 0 backlog_bytes = 0 for image_id, image_size in dsu_image_sizes.items(): image_count += 1 image_bytes += image_size if image_id in environment.lifecycle_images: staged_count += 1 staged_bytes += image_size else: backlog_count += 1 backlog_bytes += image_size images = '%d' % (image_count) staged = '%d' % (staged_count) if image_bytes > 0: pct_staged = ((float(staged_bytes) / float(image_bytes)) * 100.0) pct_not_staged = (100.0 - ((float(staged_bytes) / float(image_bytes)) * 100.0)) if image_count == 0: pct_staged = 100.0 pct_not_staged = 0.0 if image_count > staged_count: backlog = pp_bytes(backlog_bytes) # # status from nbdevquery # if disk_pool: if( 'AdminUp' in disk_pool.flags and 'InternalUp' in disk_pool.flags ): status = 'UP' else: status = 'DOWN' # # # pct_usable = pct_free + pct_staged pct_not_usable = 100.0 - pct_usable state['status'] = status state['pct_full'] = pct_full state['pct_free'] = pct_free state['pct_staged'] = pct_staged state['pct_not_staged'] = pct_not_staged state['pct_usable'] = pct_usable state['pct_not_usable'] = pct_not_usable STATUS[label] = state if STUNIT not in STATUS: perfdata = 'perfdata=100;0;0' ERROR('Could not find storage unit %s in monitoring data | %s' % (STUNIT, perfdata)) sys.exit(3) else: state = STATUS[STUNIT] perfdata = 'perfdata=%.0f;%.0f;%.0f' % (state.pct_full, state.pct_staged / 100.0 * state.pct_full, state.pct_staged) if state.status == 'UP': print('OK: Storage unit is up. | %s' % (perfdata)) sys.exit(0) if state.status == 'DOWN': print('Storage unit is DOWN. | %s' % (perfdata)) sys.exit(2) except Exception, e: perfdata = 'perfdata=100;0;0' #print 'Monitoring data unavailable: %s | %s' % (e, perfdata) print 'DSSU monitoring data unavailable! | %s' % (perfdata) sys.exit(3)