source: people/peter.buschman/backup_monitoring/backmon/commands/check/subcommands/dssu.py@ 999

Last change on this file since 999 was 976, checked in by peter, on Dec 6, 2011 at 10:19:33 AM

Raw checkin of current NetBackup / TSM parsing code.

File size: 11.4 KB
Line 
1#!/usr/bin/python26
2###
3### backmon.commands.check.dssu
4###
5
6import sys
7import os
8import os.path
9import glob
10import re
11
12from optparse import OptionParser
13from guppy import hpy
14
15from ....lib import *
16
17from backup_monitoring.debug import *
18from backup_monitoring.math import *
19
20from backup_monitoring.parsing.parsers import bpstulist
21from backup_monitoring.parsing.parsers import df
22from backup_monitoring.parsing.parsers import dsu_ls_l
23from backup_monitoring.parsing.parsers import nbstlutil
24from backup_monitoring.parsing.parsers import nbdevquery
25from backup_monitoring.parsing.parsers import bpdbjobs
26
27usage = 'usage: %prog -e [environment] check dssu [dssu]'
28
29parser = OptionParser(usage=usage)
30parser.add_option('-u', '--updating', action='store_true', default=False, dest='updating', help='only include that are updating monitoring data')
31
32def run(args, kwargs):
33
34 #
35 # add kwargs to local namespace
36 #
37 for key in kwargs.keys():
38
39 if re.compile('^[A-Z][A-Z_]+$').match(key):
40 exec(key + ' = kwargs[\'' + key + '\']')
41
42 (options, args) = parser.parse_args(args)
43
44 if len(args) == 0:
45 ERROR('No storage unit specified!')
46 sys.exit(3)
47
48 STUNIT = args[0]
49 STATUS = ExtendedDict()
50
51 master_feeds = ['nbemmcmd_machinealias_getaliases', 'bpstulist', 'nbdevquery_listdv_stype_basicdisk', 'nbdevquery_listdv_stype_advanceddisk', 'nbdevquery_listdv_stype_puredisk', 'df', 'nbstlutil_list']
52 media_feeds = ['df', 'dsu_ls_l', ]
53
54 try:
55
56 environments = ENVIRONMENTS.values()
57
58 hp = hpy()
59
60 DEBUG('HEAPSIZE=%s' % (heapsize()))
61
62 for environment in environments:
63 environment.load_feeds(master=master_feeds, media=media_feeds)
64 environment.parse_aliases()
65 environment.parse_stunits()
66 environment.parse_df_data()
67 environment.parse_dsu_contents()
68 environment.parse_lifecycle_images()
69 environment.parse_disk_pools()
70
71 DEBUG('HEAPSIZE=%s' % (heapsize()))
72
73 active_jobs = {}
74
75 for environment in environments:
76
77 #
78 # summarize other stunit statistics
79 #
80 for stunit in environment.stunits:
81
82 state = ExtendedDict()
83
84 server = environment.resolve_alias(stunit.host_connection)
85 server = server.replace('-backup', '')
86
87 if server in environment.updates:
88 monitored = True
89 else:
90 monitored = False
91
92 label = stunit.label
93
94 #
95 # DSSU specific
96 #
97 if stunit.storage_unit_type == 'Disk' and stunit.media_subtype == 'Basic' and stunit.stage_data == 'yes':
98
99 path = stunit.path
100
101 media_subtype = stunit.media_subtype
102 disk_type = ''
103 disk_pool = None
104
105 #
106 # metrics from nbdevquery
107 #
108 if( media_subtype == 'Basic' and label in environment.disk_pools):
109
110 disk_pool = environment.disk_pools[label]
111
112 if( media_subtype == 'DiskPool' and stunit.disk_pool in environment.disk_pools):
113
114 disk_pool = environment.disk_pools[stunit.disk_pool]
115
116 status = ''
117 size = ''
118 used = ''
119 avail = ''
120 pct_full = 100.0
121 pct_free = 0.0
122 written = ''
123 jobs = ''
124 images = ''
125 staged = ''
126 pct_staged = 0.0
127 pct_not_staged = 100.0
128 backlog = ''
129 pct_usable = 0.0
130 pct_not_usable = 100.0
131
132 #
133 # get dsu size and available capacity from df -k
134 #
135 if server in environment.df_data:
136
137 while True:
138
139 #
140 # path matches a mountpoint
141 #
142 if path in environment.df_data[server]:
143
144 fs = environment.df_data[server][path]
145
146 fs_bytes_size = fs.kbytes * 1024.0
147 size = pp_bytes(fs_bytes_size)
148
149 fs_bytes_used = fs.used * 1024.0
150 used = pp_bytes(fs_bytes_used)
151
152 fs_bytes_avail = fs.avail * 1024.0
153 avail = pp_bytes(fs_bytes_avail)
154
155 pct_full = fs.pct_full
156 pct_free = 100.0 - pct_full
157
158 break
159
160 #
161 # path is a top-level subdirectory of a mountpoint
162 #
163 head, tail = os.path.split(path)
164 if head in environment.df_data[server]:
165
166 fs = environment.df_data[server][head]
167
168 fs_bytes_size = fs.kbytes * 1024.0
169 size = pp_bytes(fs_bytes_size)
170
171 fs_bytes_used = fs.used * 1024.0
172 used = pp_bytes(fs_bytes_used)
173
174 fs_bytes_avail = fs.avail * 1024.0
175 avail = pp_bytes(fs_bytes_avail)
176
177 pct_full = fs.pct_full
178 pct_free = 100.0 - pct_full
179
180 break
181
182 #
183 # break if we cannot match stunit path to a mountpoint
184 #
185 break
186
187 #
188 # get size and capacity from nbdevquery if not available from df -k
189 #
190 elif disk_pool:
191
192 size = pp_bytes(int(float(disk_pool.total_capacity_gb) * 1024.0 * 1024.0 * 1024.0))
193 used = pp_bytes(((float(disk_pool.total_capacity_gb) - float(disk_pool.free_space_gb)) * 1024.0 * 1024.0 * 1024.0))
194 avail = pp_bytes(int(float(disk_pool.free_space_gb) * 1024.0 * 1024.0 * 1024.0))
195
196 if( disk_pool.total_capacity_gb > 0.0 ):
197 pct_full = (((float(disk_pool.total_capacity_gb) - float(disk_pool.free_space_gb)) / float(disk_pool.total_capacity_gb)) * 100.0)
198 else:
199 pct_full = disk_pool.use_pct
200
201 pct_free = 100.0 - pct_full
202
203 #
204 # get number of images and amount staged
205 #
206 dsu_image_sizes = {}
207 re_image_id = re.compile('^([a-zA-Z0-9\-_.]+_[0-9]+)_([a-zA-Z0-9\-_.]+)\.([a-zA-Z]+)$')
208
209 #
210 # get dsu usage from ls -l
211 #
212 if server in environment.dsu_contents:
213
214 if path in environment.dsu_contents[server]:
215
216 dir = environment.dsu_contents[server][path]
217
218 ls_bytes_used = 0
219
220 for file in dir.files:
221
222 ls_bytes_used += file.size
223
224 match = re_image_id.match(file.filename)
225
226 if match:
227
228 image_id = match.group(1)
229
230 if image_id not in dsu_image_sizes:
231 dsu_image_sizes[image_id] = 0
232
233 dsu_image_sizes[image_id] += file.size
234
235 written = pp_bytes(ls_bytes_used)
236
237 image_count = 0
238 image_bytes = 0
239
240 staged_count = 0
241 staged_bytes = 0
242
243 backlog_count = 0
244 backlog_bytes = 0
245
246 for image_id, image_size in dsu_image_sizes.items():
247
248 image_count += 1
249 image_bytes += image_size
250
251 if image_id in environment.lifecycle_images:
252
253 staged_count += 1
254 staged_bytes += image_size
255
256 else:
257
258 backlog_count += 1
259 backlog_bytes += image_size
260
261 images = '%d' % (image_count)
262 staged = '%d' % (staged_count)
263
264 if image_bytes > 0:
265 pct_staged = ((float(staged_bytes) / float(image_bytes)) * 100.0)
266 pct_not_staged = (100.0 - ((float(staged_bytes) / float(image_bytes)) * 100.0))
267
268 if image_count == 0:
269 pct_staged = 100.0
270 pct_not_staged = 0.0
271
272 if image_count > staged_count:
273 backlog = pp_bytes(backlog_bytes)
274
275 #
276 # status from nbdevquery
277 #
278 if disk_pool:
279
280 if( 'AdminUp' in disk_pool.flags and 'InternalUp' in disk_pool.flags ):
281 status = 'UP'
282 else:
283 status = 'DOWN'
284
285 #
286 #
287 #
288 pct_usable = pct_free + pct_staged
289 pct_not_usable = 100.0 - pct_usable
290
291 state['status'] = status
292 state['pct_full'] = pct_full
293 state['pct_free'] = pct_free
294 state['pct_staged'] = pct_staged
295 state['pct_not_staged'] = pct_not_staged
296 state['pct_usable'] = pct_usable
297 state['pct_not_usable'] = pct_not_usable
298
299 STATUS[label] = state
300
301 if STUNIT not in STATUS:
302
303 perfdata = 'perfdata=100;0;0'
304 ERROR('Could not find storage unit %s in monitoring data | %s' % (STUNIT, perfdata))
305 sys.exit(3)
306
307 else:
308
309 state = STATUS[STUNIT]
310
311 perfdata = 'perfdata=%.0f;%.0f;%.0f' % (state.pct_full, state.pct_staged / 100.0 * state.pct_full, state.pct_staged)
312
313 if state.status == 'UP':
314 print('OK: Storage unit is up. | %s' % (perfdata))
315 sys.exit(0)
316
317 if state.status == 'DOWN':
318 print('Storage unit is DOWN. | %s' % (perfdata))
319 sys.exit(2)
320
321
322 except Exception, e:
323
324 perfdata = 'perfdata=100;0;0'
325 #print 'Monitoring data unavailable: %s | %s' % (e, perfdata)
326 print 'DSSU monitoring data unavailable! | %s' % (perfdata)
327 sys.exit(3)
328
Note: See TracBrowser for help on using the repository browser.