1 | #!/usr/bin/python26
|
---|
2 | ###
|
---|
3 | ### backmon.commands.status.dssu
|
---|
4 | ###
|
---|
5 |
|
---|
6 | import sys
|
---|
7 | import os
|
---|
8 | import os.path
|
---|
9 | import glob
|
---|
10 | import re
|
---|
11 |
|
---|
12 | from optparse import OptionParser
|
---|
13 | from guppy import hpy
|
---|
14 |
|
---|
15 | from ....lib import *
|
---|
16 |
|
---|
17 | from backup_monitoring.debug import *
|
---|
18 | from backup_monitoring.math import *
|
---|
19 |
|
---|
20 | from backup_monitoring.parsing.parsers import bpstulist
|
---|
21 | from backup_monitoring.parsing.parsers import df
|
---|
22 | from backup_monitoring.parsing.parsers import dsu_ls_l
|
---|
23 | from backup_monitoring.parsing.parsers import nbstlutil
|
---|
24 | from backup_monitoring.parsing.parsers import nbdevquery
|
---|
25 | from backup_monitoring.parsing.parsers import bpdbjobs
|
---|
26 |
|
---|
27 | usage = 'usage: %prog -e environment status dssu'
|
---|
28 |
|
---|
29 | parser = OptionParser(usage=usage)
|
---|
30 | parser.add_option('-u', '--updating', action='store_true', default=False, dest='updating', help='only include that are updating monitoring data')
|
---|
31 |
|
---|
32 | def run(args, kwargs):
|
---|
33 |
|
---|
34 | #
|
---|
35 | # add kwargs to local namespace
|
---|
36 | #
|
---|
37 | for key in kwargs.keys():
|
---|
38 |
|
---|
39 | if re.compile('^[A-Z][A-Z_]+$').match(key):
|
---|
40 | exec(key + ' = kwargs[\'' + key + '\']')
|
---|
41 |
|
---|
42 | (options, args) = parser.parse_args(args)
|
---|
43 |
|
---|
44 | master_feeds = ['nbemmcmd_machinealias_getaliases', 'bpstulist', 'nbdevquery_listdv_stype_basicdisk', 'nbdevquery_listdv_stype_advanceddisk', 'nbdevquery_listdv_stype_puredisk', 'df', 'nbstlutil_list', 'bpdbjobs_most_columns']
|
---|
45 | media_feeds = ['df', 'dsu_ls_l', ]
|
---|
46 |
|
---|
47 | try:
|
---|
48 |
|
---|
49 | environments = ENVIRONMENTS.values()
|
---|
50 |
|
---|
51 | hp = hpy()
|
---|
52 |
|
---|
53 | DEBUG('HEAPSIZE=%s' % (heapsize()))
|
---|
54 |
|
---|
55 | for environment in environments:
|
---|
56 | environment.load_feeds(master=master_feeds, media=media_feeds)
|
---|
57 | environment.parse_aliases()
|
---|
58 | environment.parse_stunits()
|
---|
59 | environment.parse_df_data()
|
---|
60 | environment.parse_dsu_contents()
|
---|
61 | environment.parse_lifecycle_images()
|
---|
62 | environment.parse_jobs()
|
---|
63 | environment.parse_disk_pools()
|
---|
64 |
|
---|
65 | DEBUG('HEAPSIZE=%s' % (heapsize()))
|
---|
66 |
|
---|
67 | print('%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s' % ('ENVIRONMENT'.center(15), 'DSSU'.center(25), 'SERVER'.center(8), 'STATUS'.center(6), 'MOUNTPOINT'.center(35), 'JOBS'.center(4), 'IMAGES'.center(6), 'STAGED'.center(6), '%STG'.center(4), '%FUL'.center(4), 'BACKLOG'.center(10), 'SIZE'.center(10), 'USED'.center(10), 'WRITTEN'.center(10), 'AVAIL'.center(10)))
|
---|
68 | print('=============== ========================= ======== ====== =================================== ==== ====== ====== ==== ==== ========== ========== ========== ========== ==========')
|
---|
69 |
|
---|
70 | active_jobs = {}
|
---|
71 |
|
---|
72 | for environment in environments:
|
---|
73 |
|
---|
74 | #
|
---|
75 | # count active backup jobs per storage unit
|
---|
76 | #
|
---|
77 | for job in environment.jobs:
|
---|
78 |
|
---|
79 | if job.stunit is None:
|
---|
80 | continue
|
---|
81 |
|
---|
82 | if job.stunit not in active_jobs:
|
---|
83 | active_jobs[job.stunit] = 0
|
---|
84 |
|
---|
85 | if job.backup and job.active:
|
---|
86 | active_jobs[job.stunit] += 1
|
---|
87 |
|
---|
88 | #
|
---|
89 | # summarize other stunit statistics
|
---|
90 | #
|
---|
91 | for stunit in environment.stunits:
|
---|
92 |
|
---|
93 | server = environment.resolve_alias(stunit.host_connection)
|
---|
94 | server = server.replace('-backup', '')
|
---|
95 |
|
---|
96 | if server in environment.updates:
|
---|
97 | monitored = True
|
---|
98 | else:
|
---|
99 | monitored = False
|
---|
100 |
|
---|
101 | label = stunit.label
|
---|
102 |
|
---|
103 | #
|
---|
104 | # initialize active job counts
|
---|
105 | #
|
---|
106 | if label not in active_jobs:
|
---|
107 | active_jobs[label] = 0
|
---|
108 |
|
---|
109 | #
|
---|
110 | # DSSU specific
|
---|
111 | #
|
---|
112 | if stunit.storage_unit_type == 'Disk' and stunit.media_subtype == 'Basic' and stunit.stage_data == 'yes':
|
---|
113 |
|
---|
114 | path = stunit.path
|
---|
115 |
|
---|
116 | media_subtype = stunit.media_subtype
|
---|
117 | disk_type = ''
|
---|
118 | disk_pool = None
|
---|
119 |
|
---|
120 | #
|
---|
121 | # metrics from nbdevquery
|
---|
122 | #
|
---|
123 | if( media_subtype == 'Basic' and label in environment.disk_pools):
|
---|
124 |
|
---|
125 | disk_pool = environment.disk_pools[label]
|
---|
126 |
|
---|
127 | if( media_subtype == 'DiskPool' and stunit.disk_pool in environment.disk_pools):
|
---|
128 |
|
---|
129 | disk_pool = environment.disk_pools[stunit.disk_pool]
|
---|
130 |
|
---|
131 | status = ''
|
---|
132 | size = ''
|
---|
133 | used = ''
|
---|
134 | avail = ''
|
---|
135 | pct_full = ''
|
---|
136 | written = ''
|
---|
137 | jobs = ''
|
---|
138 | images = ''
|
---|
139 | staged = ''
|
---|
140 | pct_staged = ''
|
---|
141 | backlog = ''
|
---|
142 |
|
---|
143 | #
|
---|
144 | # get dsu size and available capacity from df -k
|
---|
145 | #
|
---|
146 | if server in environment.df_data:
|
---|
147 |
|
---|
148 | while True:
|
---|
149 |
|
---|
150 | #
|
---|
151 | # path matches a mountpoint
|
---|
152 | #
|
---|
153 | if path in environment.df_data[server]:
|
---|
154 |
|
---|
155 | fs = environment.df_data[server][path]
|
---|
156 |
|
---|
157 | fs_bytes_size = fs.kbytes * 1024.0
|
---|
158 | size = pp_bytes(fs_bytes_size)
|
---|
159 |
|
---|
160 | fs_bytes_used = fs.used * 1024.0
|
---|
161 | used = pp_bytes(fs_bytes_used)
|
---|
162 |
|
---|
163 | fs_bytes_avail = fs.avail * 1024.0
|
---|
164 | avail = pp_bytes(fs_bytes_avail)
|
---|
165 |
|
---|
166 | pct_full = '%01.0f%%' % (fs.pct_full)
|
---|
167 |
|
---|
168 | break
|
---|
169 |
|
---|
170 | #
|
---|
171 | # path is a top-level subdirectory of a mountpoint
|
---|
172 | #
|
---|
173 | head, tail = os.path.split(path)
|
---|
174 | if head in environment.df_data[server]:
|
---|
175 |
|
---|
176 | fs = environment.df_data[server][head]
|
---|
177 |
|
---|
178 | fs_bytes_size = fs.kbytes * 1024.0
|
---|
179 | size = pp_bytes(fs_bytes_size)
|
---|
180 |
|
---|
181 | fs_bytes_used = fs.used * 1024.0
|
---|
182 | used = pp_bytes(fs_bytes_used)
|
---|
183 |
|
---|
184 | fs_bytes_avail = fs.avail * 1024.0
|
---|
185 | avail = pp_bytes(fs_bytes_avail)
|
---|
186 |
|
---|
187 | pct_full = '%01.0f%%' % (fs.pct_full)
|
---|
188 |
|
---|
189 | break
|
---|
190 |
|
---|
191 | #
|
---|
192 | # break if we cannot match stunit path to a mountpoint
|
---|
193 | #
|
---|
194 | break
|
---|
195 |
|
---|
196 | #
|
---|
197 | # get size and capacity from nbdevquery if not available from df -k
|
---|
198 | #
|
---|
199 | elif disk_pool:
|
---|
200 |
|
---|
201 | size = pp_bytes(int(float(disk_pool.total_capacity_gb) * 1024.0 * 1024.0 * 1024.0))
|
---|
202 | used = pp_bytes(((float(disk_pool.total_capacity_gb) - float(disk_pool.free_space_gb)) * 1024.0 * 1024.0 * 1024.0))
|
---|
203 | avail = pp_bytes(int(float(disk_pool.free_space_gb) * 1024.0 * 1024.0 * 1024.0))
|
---|
204 |
|
---|
205 | if( disk_pool.total_capacity_gb > 0.0 ):
|
---|
206 | pct_full = '%01.0f%%' % ((((float(disk_pool.total_capacity_gb) - float(disk_pool.free_space_gb)) / float(disk_pool.total_capacity_gb)) * 100.0))
|
---|
207 | else:
|
---|
208 | pct_full = '%01.0f%%' % (disk_pool.use_pct)
|
---|
209 |
|
---|
210 | #
|
---|
211 | # get number of images and amount staged
|
---|
212 | #
|
---|
213 | dsu_image_sizes = {}
|
---|
214 | re_image_id = re.compile('^([a-zA-Z0-9\-_.]+_[0-9]+)_([a-zA-Z0-9\-_.]+)\.([a-zA-Z]+)$')
|
---|
215 |
|
---|
216 | #
|
---|
217 | # get dsu usage from ls -l
|
---|
218 | #
|
---|
219 | if server in environment.dsu_contents:
|
---|
220 |
|
---|
221 | if path in environment.dsu_contents[server]:
|
---|
222 |
|
---|
223 | dir = environment.dsu_contents[server][path]
|
---|
224 |
|
---|
225 | ls_bytes_used = 0
|
---|
226 |
|
---|
227 | for file in dir.files:
|
---|
228 |
|
---|
229 | ls_bytes_used += file.size
|
---|
230 |
|
---|
231 | match = re_image_id.match(file.filename)
|
---|
232 |
|
---|
233 | if match:
|
---|
234 |
|
---|
235 | image_id = match.group(1)
|
---|
236 |
|
---|
237 | if image_id not in dsu_image_sizes:
|
---|
238 | dsu_image_sizes[image_id] = 0
|
---|
239 |
|
---|
240 | dsu_image_sizes[image_id] += file.size
|
---|
241 |
|
---|
242 | written = pp_bytes(ls_bytes_used)
|
---|
243 |
|
---|
244 | image_count = 0
|
---|
245 | image_bytes = 0
|
---|
246 |
|
---|
247 | staged_count = 0
|
---|
248 | staged_bytes = 0
|
---|
249 |
|
---|
250 | backlog_count = 0
|
---|
251 | backlog_bytes = 0
|
---|
252 |
|
---|
253 | for image_id, image_size in dsu_image_sizes.items():
|
---|
254 |
|
---|
255 | image_count += 1
|
---|
256 | image_bytes += image_size
|
---|
257 |
|
---|
258 | if image_id in environment.lifecycle_images:
|
---|
259 |
|
---|
260 | staged_count += 1
|
---|
261 | staged_bytes += image_size
|
---|
262 |
|
---|
263 | else:
|
---|
264 |
|
---|
265 | backlog_count += 1
|
---|
266 | backlog_bytes += image_size
|
---|
267 |
|
---|
268 | images = '%d' % (image_count)
|
---|
269 | staged = '%d' % (staged_count)
|
---|
270 |
|
---|
271 | if image_bytes > 0:
|
---|
272 | pct_staged = '%.0f%%' % (((float(staged_bytes) / float(image_bytes)) * 100.0))
|
---|
273 |
|
---|
274 | if image_count > staged_count:
|
---|
275 | backlog = pp_bytes(backlog_bytes)
|
---|
276 |
|
---|
277 | #
|
---|
278 | # status from nbdevquery
|
---|
279 | #
|
---|
280 | if disk_pool:
|
---|
281 |
|
---|
282 | if( 'AdminUp' in disk_pool.flags and 'InternalUp' in disk_pool.flags ):
|
---|
283 | status = 'UP'
|
---|
284 | else:
|
---|
285 | status = 'DOWN'
|
---|
286 |
|
---|
287 | #
|
---|
288 | # get active job count
|
---|
289 | #
|
---|
290 | if label in active_jobs:
|
---|
291 | jobs = '%d' % (active_jobs[label])
|
---|
292 |
|
---|
293 | if((options.updating and monitored) or (not options.updating)):
|
---|
294 | print('%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s' % (environment.name.center(15), label.rjust(25), server.ljust(8), status.center(6), path.ljust(35), jobs.rjust(4), images.rjust(6), staged.rjust(6), pct_staged.rjust(4), pct_full.rjust(4), backlog.rjust(10), size.rjust(10), used.rjust(10), written.rjust(10), avail.rjust(10)))
|
---|
295 |
|
---|
296 | except Exception, e:
|
---|
297 |
|
---|
298 | raise BackmonError, e
|
---|
299 |
|
---|