#!/usr/bin/python26 ### ### backmon.commands.status.dssu ### import sys import os import os.path import glob import re from optparse import OptionParser from guppy import hpy from ....lib import * from backup_monitoring.debug import * from backup_monitoring.math import * from backup_monitoring.parsing.parsers import bpstulist from backup_monitoring.parsing.parsers import df from backup_monitoring.parsing.parsers import dsu_ls_l from backup_monitoring.parsing.parsers import nbstlutil from backup_monitoring.parsing.parsers import nbdevquery from backup_monitoring.parsing.parsers import bpdbjobs usage = 'usage: %prog -e environment status dssu' parser = OptionParser(usage=usage) parser.add_option('-u', '--updating', action='store_true', default=False, dest='updating', help='only include that are updating monitoring data') def run(args, kwargs): # # add kwargs to local namespace # for key in kwargs.keys(): if re.compile('^[A-Z][A-Z_]+$').match(key): exec(key + ' = kwargs[\'' + key + '\']') (options, args) = parser.parse_args(args) master_feeds = ['nbemmcmd_machinealias_getaliases', 'bpstulist', 'nbdevquery_listdv_stype_basicdisk', 'nbdevquery_listdv_stype_advanceddisk', 'nbdevquery_listdv_stype_puredisk', 'df', 'nbstlutil_list', 'bpdbjobs_most_columns'] media_feeds = ['df', 'dsu_ls_l', ] try: environments = ENVIRONMENTS.values() hp = hpy() DEBUG('HEAPSIZE=%s' % (heapsize())) for environment in environments: environment.load_feeds(master=master_feeds, media=media_feeds) environment.parse_aliases() environment.parse_stunits() environment.parse_df_data() environment.parse_dsu_contents() environment.parse_lifecycle_images() environment.parse_jobs() environment.parse_disk_pools() DEBUG('HEAPSIZE=%s' % (heapsize())) print('%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s' % ('ENVIRONMENT'.center(15), 'DSSU'.center(25), 'SERVER'.center(8), 'STATUS'.center(6), 'MOUNTPOINT'.center(35), 'JOBS'.center(4), 'IMAGES'.center(6), 'STAGED'.center(6), '%STG'.center(4), '%FUL'.center(4), 'BACKLOG'.center(10), 'SIZE'.center(10), 'USED'.center(10), 'WRITTEN'.center(10), 'AVAIL'.center(10))) print('=============== ========================= ======== ====== =================================== ==== ====== ====== ==== ==== ========== ========== ========== ========== ==========') active_jobs = {} for environment in environments: # # count active backup jobs per storage unit # for job in environment.jobs: if job.stunit is None: continue if job.stunit not in active_jobs: active_jobs[job.stunit] = 0 if job.backup and job.active: active_jobs[job.stunit] += 1 # # summarize other stunit statistics # for stunit in environment.stunits: server = environment.resolve_alias(stunit.host_connection) server = server.replace('-backup', '') if server in environment.updates: monitored = True else: monitored = False label = stunit.label # # initialize active job counts # if label not in active_jobs: active_jobs[label] = 0 # # DSSU specific # if stunit.storage_unit_type == 'Disk' and stunit.media_subtype == 'Basic' and stunit.stage_data == 'yes': path = stunit.path media_subtype = stunit.media_subtype disk_type = '' disk_pool = None # # metrics from nbdevquery # if( media_subtype == 'Basic' and label in environment.disk_pools): disk_pool = environment.disk_pools[label] if( media_subtype == 'DiskPool' and stunit.disk_pool in environment.disk_pools): disk_pool = environment.disk_pools[stunit.disk_pool] status = '' size = '' used = '' avail = '' pct_full = '' written = '' jobs = '' images = '' staged = '' pct_staged = '' backlog = '' # # get dsu size and available capacity from df -k # if server in environment.df_data: while True: # # path matches a mountpoint # if path in environment.df_data[server]: fs = environment.df_data[server][path] fs_bytes_size = fs.kbytes * 1024.0 size = pp_bytes(fs_bytes_size) fs_bytes_used = fs.used * 1024.0 used = pp_bytes(fs_bytes_used) fs_bytes_avail = fs.avail * 1024.0 avail = pp_bytes(fs_bytes_avail) pct_full = '%01.0f%%' % (fs.pct_full) break # # path is a top-level subdirectory of a mountpoint # head, tail = os.path.split(path) if head in environment.df_data[server]: fs = environment.df_data[server][head] fs_bytes_size = fs.kbytes * 1024.0 size = pp_bytes(fs_bytes_size) fs_bytes_used = fs.used * 1024.0 used = pp_bytes(fs_bytes_used) fs_bytes_avail = fs.avail * 1024.0 avail = pp_bytes(fs_bytes_avail) pct_full = '%01.0f%%' % (fs.pct_full) break # # break if we cannot match stunit path to a mountpoint # break # # get size and capacity from nbdevquery if not available from df -k # elif disk_pool: size = pp_bytes(int(float(disk_pool.total_capacity_gb) * 1024.0 * 1024.0 * 1024.0)) used = pp_bytes(((float(disk_pool.total_capacity_gb) - float(disk_pool.free_space_gb)) * 1024.0 * 1024.0 * 1024.0)) avail = pp_bytes(int(float(disk_pool.free_space_gb) * 1024.0 * 1024.0 * 1024.0)) if( disk_pool.total_capacity_gb > 0.0 ): pct_full = '%01.0f%%' % ((((float(disk_pool.total_capacity_gb) - float(disk_pool.free_space_gb)) / float(disk_pool.total_capacity_gb)) * 100.0)) else: pct_full = '%01.0f%%' % (disk_pool.use_pct) # # get number of images and amount staged # dsu_image_sizes = {} re_image_id = re.compile('^([a-zA-Z0-9\-_.]+_[0-9]+)_([a-zA-Z0-9\-_.]+)\.([a-zA-Z]+)$') # # get dsu usage from ls -l # if server in environment.dsu_contents: if path in environment.dsu_contents[server]: dir = environment.dsu_contents[server][path] ls_bytes_used = 0 for file in dir.files: ls_bytes_used += file.size match = re_image_id.match(file.filename) if match: image_id = match.group(1) if image_id not in dsu_image_sizes: dsu_image_sizes[image_id] = 0 dsu_image_sizes[image_id] += file.size written = pp_bytes(ls_bytes_used) image_count = 0 image_bytes = 0 staged_count = 0 staged_bytes = 0 backlog_count = 0 backlog_bytes = 0 for image_id, image_size in dsu_image_sizes.items(): image_count += 1 image_bytes += image_size if image_id in environment.lifecycle_images: staged_count += 1 staged_bytes += image_size else: backlog_count += 1 backlog_bytes += image_size images = '%d' % (image_count) staged = '%d' % (staged_count) if image_bytes > 0: pct_staged = '%.0f%%' % (((float(staged_bytes) / float(image_bytes)) * 100.0)) if image_count > staged_count: backlog = pp_bytes(backlog_bytes) # # status from nbdevquery # if disk_pool: if( 'AdminUp' in disk_pool.flags and 'InternalUp' in disk_pool.flags ): status = 'UP' else: status = 'DOWN' # # get active job count # if label in active_jobs: jobs = '%d' % (active_jobs[label]) if((options.updating and monitored) or (not options.updating)): print('%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s' % (environment.name.center(15), label.rjust(25), server.ljust(8), status.center(6), path.ljust(35), jobs.rjust(4), images.rjust(6), staged.rjust(6), pct_staged.rjust(4), pct_full.rjust(4), backlog.rjust(10), size.rjust(10), used.rjust(10), written.rjust(10), avail.rjust(10))) except Exception, e: raise BackmonError, e