[976] | 1 | #!/usr/bin/python26
|
---|
| 2 | ###
|
---|
| 3 | ### backmon.commands.status.dssu
|
---|
| 4 | ###
|
---|
| 5 |
|
---|
| 6 | import sys
|
---|
| 7 | import os
|
---|
| 8 | import os.path
|
---|
| 9 | import glob
|
---|
| 10 | import re
|
---|
| 11 |
|
---|
| 12 | from optparse import OptionParser
|
---|
| 13 | from guppy import hpy
|
---|
| 14 |
|
---|
| 15 | from ....lib import *
|
---|
| 16 |
|
---|
| 17 | from backup_monitoring.debug import *
|
---|
| 18 | from backup_monitoring.math import *
|
---|
| 19 |
|
---|
| 20 | from backup_monitoring.parsing.parsers import bpstulist
|
---|
| 21 | from backup_monitoring.parsing.parsers import df
|
---|
| 22 | from backup_monitoring.parsing.parsers import dsu_ls_l
|
---|
| 23 | from backup_monitoring.parsing.parsers import nbstlutil
|
---|
| 24 | from backup_monitoring.parsing.parsers import nbdevquery
|
---|
| 25 | from backup_monitoring.parsing.parsers import bpdbjobs
|
---|
| 26 |
|
---|
| 27 | usage = 'usage: %prog -e environment status dssu'
|
---|
| 28 |
|
---|
| 29 | parser = OptionParser(usage=usage)
|
---|
| 30 | parser.add_option('-u', '--updating', action='store_true', default=False, dest='updating', help='only include that are updating monitoring data')
|
---|
| 31 |
|
---|
| 32 | def run(args, kwargs):
|
---|
| 33 |
|
---|
| 34 | #
|
---|
| 35 | # add kwargs to local namespace
|
---|
| 36 | #
|
---|
| 37 | for key in kwargs.keys():
|
---|
| 38 |
|
---|
| 39 | if re.compile('^[A-Z][A-Z_]+$').match(key):
|
---|
| 40 | exec(key + ' = kwargs[\'' + key + '\']')
|
---|
| 41 |
|
---|
| 42 | (options, args) = parser.parse_args(args)
|
---|
| 43 |
|
---|
| 44 | master_feeds = ['nbemmcmd_machinealias_getaliases', 'bpstulist', 'nbdevquery_listdv_stype_basicdisk', 'nbdevquery_listdv_stype_advanceddisk', 'nbdevquery_listdv_stype_puredisk', 'df', 'nbstlutil_list', 'bpdbjobs_most_columns']
|
---|
| 45 | media_feeds = ['df', 'dsu_ls_l', ]
|
---|
| 46 |
|
---|
| 47 | try:
|
---|
| 48 |
|
---|
| 49 | environments = ENVIRONMENTS.values()
|
---|
| 50 |
|
---|
| 51 | hp = hpy()
|
---|
| 52 |
|
---|
| 53 | DEBUG('HEAPSIZE=%s' % (heapsize()))
|
---|
| 54 |
|
---|
| 55 | for environment in environments:
|
---|
| 56 | environment.load_feeds(master=master_feeds, media=media_feeds)
|
---|
| 57 | environment.parse_aliases()
|
---|
| 58 | environment.parse_stunits()
|
---|
| 59 | environment.parse_df_data()
|
---|
| 60 | environment.parse_dsu_contents()
|
---|
| 61 | environment.parse_lifecycle_images()
|
---|
| 62 | environment.parse_jobs()
|
---|
| 63 | environment.parse_disk_pools()
|
---|
| 64 |
|
---|
| 65 | DEBUG('HEAPSIZE=%s' % (heapsize()))
|
---|
| 66 |
|
---|
| 67 | print('%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s' % ('ENVIRONMENT'.center(15), 'DSSU'.center(25), 'SERVER'.center(8), 'STATUS'.center(6), 'MOUNTPOINT'.center(35), 'JOBS'.center(4), 'IMAGES'.center(6), 'STAGED'.center(6), '%STG'.center(4), '%FUL'.center(4), 'BACKLOG'.center(10), 'SIZE'.center(10), 'USED'.center(10), 'WRITTEN'.center(10), 'AVAIL'.center(10)))
|
---|
| 68 | print('=============== ========================= ======== ====== =================================== ==== ====== ====== ==== ==== ========== ========== ========== ========== ==========')
|
---|
| 69 |
|
---|
| 70 | active_jobs = {}
|
---|
| 71 |
|
---|
| 72 | for environment in environments:
|
---|
| 73 |
|
---|
| 74 | #
|
---|
| 75 | # count active backup jobs per storage unit
|
---|
| 76 | #
|
---|
| 77 | for job in environment.jobs:
|
---|
| 78 |
|
---|
| 79 | if job.stunit is None:
|
---|
| 80 | continue
|
---|
| 81 |
|
---|
| 82 | if job.stunit not in active_jobs:
|
---|
| 83 | active_jobs[job.stunit] = 0
|
---|
| 84 |
|
---|
| 85 | if job.backup and job.active:
|
---|
| 86 | active_jobs[job.stunit] += 1
|
---|
| 87 |
|
---|
| 88 | #
|
---|
| 89 | # summarize other stunit statistics
|
---|
| 90 | #
|
---|
| 91 | for stunit in environment.stunits:
|
---|
| 92 |
|
---|
| 93 | server = environment.resolve_alias(stunit.host_connection)
|
---|
| 94 | server = server.replace('-backup', '')
|
---|
| 95 |
|
---|
| 96 | if server in environment.updates:
|
---|
| 97 | monitored = True
|
---|
| 98 | else:
|
---|
| 99 | monitored = False
|
---|
| 100 |
|
---|
| 101 | label = stunit.label
|
---|
| 102 |
|
---|
| 103 | #
|
---|
| 104 | # initialize active job counts
|
---|
| 105 | #
|
---|
| 106 | if label not in active_jobs:
|
---|
| 107 | active_jobs[label] = 0
|
---|
| 108 |
|
---|
| 109 | #
|
---|
| 110 | # DSSU specific
|
---|
| 111 | #
|
---|
| 112 | if stunit.storage_unit_type == 'Disk' and stunit.media_subtype == 'Basic' and stunit.stage_data == 'yes':
|
---|
| 113 |
|
---|
| 114 | path = stunit.path
|
---|
| 115 |
|
---|
| 116 | media_subtype = stunit.media_subtype
|
---|
| 117 | disk_type = ''
|
---|
| 118 | disk_pool = None
|
---|
| 119 |
|
---|
| 120 | #
|
---|
| 121 | # metrics from nbdevquery
|
---|
| 122 | #
|
---|
| 123 | if( media_subtype == 'Basic' and label in environment.disk_pools):
|
---|
| 124 |
|
---|
| 125 | disk_pool = environment.disk_pools[label]
|
---|
| 126 |
|
---|
| 127 | if( media_subtype == 'DiskPool' and stunit.disk_pool in environment.disk_pools):
|
---|
| 128 |
|
---|
| 129 | disk_pool = environment.disk_pools[stunit.disk_pool]
|
---|
| 130 |
|
---|
| 131 | status = ''
|
---|
| 132 | size = ''
|
---|
| 133 | used = ''
|
---|
| 134 | avail = ''
|
---|
| 135 | pct_full = ''
|
---|
| 136 | written = ''
|
---|
| 137 | jobs = ''
|
---|
| 138 | images = ''
|
---|
| 139 | staged = ''
|
---|
| 140 | pct_staged = ''
|
---|
| 141 | backlog = ''
|
---|
| 142 |
|
---|
| 143 | #
|
---|
| 144 | # get dsu size and available capacity from df -k
|
---|
| 145 | #
|
---|
| 146 | if server in environment.df_data:
|
---|
| 147 |
|
---|
| 148 | while True:
|
---|
| 149 |
|
---|
| 150 | #
|
---|
| 151 | # path matches a mountpoint
|
---|
| 152 | #
|
---|
| 153 | if path in environment.df_data[server]:
|
---|
| 154 |
|
---|
| 155 | fs = environment.df_data[server][path]
|
---|
| 156 |
|
---|
| 157 | fs_bytes_size = fs.kbytes * 1024.0
|
---|
| 158 | size = pp_bytes(fs_bytes_size)
|
---|
| 159 |
|
---|
| 160 | fs_bytes_used = fs.used * 1024.0
|
---|
| 161 | used = pp_bytes(fs_bytes_used)
|
---|
| 162 |
|
---|
| 163 | fs_bytes_avail = fs.avail * 1024.0
|
---|
| 164 | avail = pp_bytes(fs_bytes_avail)
|
---|
| 165 |
|
---|
| 166 | pct_full = '%01.0f%%' % (fs.pct_full)
|
---|
| 167 |
|
---|
| 168 | break
|
---|
| 169 |
|
---|
| 170 | #
|
---|
| 171 | # path is a top-level subdirectory of a mountpoint
|
---|
| 172 | #
|
---|
| 173 | head, tail = os.path.split(path)
|
---|
| 174 | if head in environment.df_data[server]:
|
---|
| 175 |
|
---|
| 176 | fs = environment.df_data[server][head]
|
---|
| 177 |
|
---|
| 178 | fs_bytes_size = fs.kbytes * 1024.0
|
---|
| 179 | size = pp_bytes(fs_bytes_size)
|
---|
| 180 |
|
---|
| 181 | fs_bytes_used = fs.used * 1024.0
|
---|
| 182 | used = pp_bytes(fs_bytes_used)
|
---|
| 183 |
|
---|
| 184 | fs_bytes_avail = fs.avail * 1024.0
|
---|
| 185 | avail = pp_bytes(fs_bytes_avail)
|
---|
| 186 |
|
---|
| 187 | pct_full = '%01.0f%%' % (fs.pct_full)
|
---|
| 188 |
|
---|
| 189 | break
|
---|
| 190 |
|
---|
| 191 | #
|
---|
| 192 | # break if we cannot match stunit path to a mountpoint
|
---|
| 193 | #
|
---|
| 194 | break
|
---|
| 195 |
|
---|
| 196 | #
|
---|
| 197 | # get size and capacity from nbdevquery if not available from df -k
|
---|
| 198 | #
|
---|
| 199 | elif disk_pool:
|
---|
| 200 |
|
---|
| 201 | size = pp_bytes(int(float(disk_pool.total_capacity_gb) * 1024.0 * 1024.0 * 1024.0))
|
---|
| 202 | used = pp_bytes(((float(disk_pool.total_capacity_gb) - float(disk_pool.free_space_gb)) * 1024.0 * 1024.0 * 1024.0))
|
---|
| 203 | avail = pp_bytes(int(float(disk_pool.free_space_gb) * 1024.0 * 1024.0 * 1024.0))
|
---|
| 204 |
|
---|
| 205 | if( disk_pool.total_capacity_gb > 0.0 ):
|
---|
| 206 | pct_full = '%01.0f%%' % ((((float(disk_pool.total_capacity_gb) - float(disk_pool.free_space_gb)) / float(disk_pool.total_capacity_gb)) * 100.0))
|
---|
| 207 | else:
|
---|
| 208 | pct_full = '%01.0f%%' % (disk_pool.use_pct)
|
---|
| 209 |
|
---|
| 210 | #
|
---|
| 211 | # get number of images and amount staged
|
---|
| 212 | #
|
---|
| 213 | dsu_image_sizes = {}
|
---|
| 214 | re_image_id = re.compile('^([a-zA-Z0-9\-_.]+_[0-9]+)_([a-zA-Z0-9\-_.]+)\.([a-zA-Z]+)$')
|
---|
| 215 |
|
---|
| 216 | #
|
---|
| 217 | # get dsu usage from ls -l
|
---|
| 218 | #
|
---|
| 219 | if server in environment.dsu_contents:
|
---|
| 220 |
|
---|
| 221 | if path in environment.dsu_contents[server]:
|
---|
| 222 |
|
---|
| 223 | dir = environment.dsu_contents[server][path]
|
---|
| 224 |
|
---|
| 225 | ls_bytes_used = 0
|
---|
| 226 |
|
---|
| 227 | for file in dir.files:
|
---|
| 228 |
|
---|
| 229 | ls_bytes_used += file.size
|
---|
| 230 |
|
---|
| 231 | match = re_image_id.match(file.filename)
|
---|
| 232 |
|
---|
| 233 | if match:
|
---|
| 234 |
|
---|
| 235 | image_id = match.group(1)
|
---|
| 236 |
|
---|
| 237 | if image_id not in dsu_image_sizes:
|
---|
| 238 | dsu_image_sizes[image_id] = 0
|
---|
| 239 |
|
---|
| 240 | dsu_image_sizes[image_id] += file.size
|
---|
| 241 |
|
---|
| 242 | written = pp_bytes(ls_bytes_used)
|
---|
| 243 |
|
---|
| 244 | image_count = 0
|
---|
| 245 | image_bytes = 0
|
---|
| 246 |
|
---|
| 247 | staged_count = 0
|
---|
| 248 | staged_bytes = 0
|
---|
| 249 |
|
---|
| 250 | backlog_count = 0
|
---|
| 251 | backlog_bytes = 0
|
---|
| 252 |
|
---|
| 253 | for image_id, image_size in dsu_image_sizes.items():
|
---|
| 254 |
|
---|
| 255 | image_count += 1
|
---|
| 256 | image_bytes += image_size
|
---|
| 257 |
|
---|
| 258 | if image_id in environment.lifecycle_images:
|
---|
| 259 |
|
---|
| 260 | staged_count += 1
|
---|
| 261 | staged_bytes += image_size
|
---|
| 262 |
|
---|
| 263 | else:
|
---|
| 264 |
|
---|
| 265 | backlog_count += 1
|
---|
| 266 | backlog_bytes += image_size
|
---|
| 267 |
|
---|
| 268 | images = '%d' % (image_count)
|
---|
| 269 | staged = '%d' % (staged_count)
|
---|
| 270 |
|
---|
| 271 | if image_bytes > 0:
|
---|
| 272 | pct_staged = '%.0f%%' % (((float(staged_bytes) / float(image_bytes)) * 100.0))
|
---|
| 273 |
|
---|
| 274 | if image_count > staged_count:
|
---|
| 275 | backlog = pp_bytes(backlog_bytes)
|
---|
| 276 |
|
---|
| 277 | #
|
---|
| 278 | # status from nbdevquery
|
---|
| 279 | #
|
---|
| 280 | if disk_pool:
|
---|
| 281 |
|
---|
| 282 | if( 'AdminUp' in disk_pool.flags and 'InternalUp' in disk_pool.flags ):
|
---|
| 283 | status = 'UP'
|
---|
| 284 | else:
|
---|
| 285 | status = 'DOWN'
|
---|
| 286 |
|
---|
| 287 | #
|
---|
| 288 | # get active job count
|
---|
| 289 | #
|
---|
| 290 | if label in active_jobs:
|
---|
| 291 | jobs = '%d' % (active_jobs[label])
|
---|
| 292 |
|
---|
| 293 | if((options.updating and monitored) or (not options.updating)):
|
---|
| 294 | print('%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s' % (environment.name.center(15), label.rjust(25), server.ljust(8), status.center(6), path.ljust(35), jobs.rjust(4), images.rjust(6), staged.rjust(6), pct_staged.rjust(4), pct_full.rjust(4), backlog.rjust(10), size.rjust(10), used.rjust(10), written.rjust(10), avail.rjust(10)))
|
---|
| 295 |
|
---|
| 296 | except Exception, e:
|
---|
| 297 |
|
---|
| 298 | raise BackmonError, e
|
---|
| 299 |
|
---|