Step to hook into Ansible
[bootmanager.git] / source / BootManager.py
1 #!/usr/bin/python -u
2 #
3 # Copyright (c) 2003 Intel Corporation
4 # All rights reserved.
5 #
6 # Copyright (c) 2004-2006 The Trustees of Princeton University
7 # All rights reserved.
8
9 import string
10 import sys, os, traceback
11 import time
12 import gzip
13
14 from steps import *
15 from Exceptions import *
16 import notify_messages
17 import BootServerRequest
18 import utils
19
20 # all output is written to this file
21 BM_NODE_LOG= "/tmp/bm.log"
22 VARS_FILE = "configuration"
23
24 # the new contents of PATH when the boot manager is running
25 BIN_PATH= ('/usr/local/bin',
26            '/usr/local/sbin',
27            '/usr/bin',
28            '/usr/sbin',
29            '/bin',
30            '/sbin')
31
32 def read_configuration_file(filename):
33     # read in and store all variables in VARS_FILE into each line
34     # is in the format name=val (any whitespace around the = is
35     # removed. everything after the = to the end of the line is
36     # the value
37     vars = {}
38     vars_file= file(filename,'r')
39     validConfFile = True
40     for line in vars_file:
41         # if its a comment or a whitespace line, ignore
42         if line[:1] == "#" or string.strip(line) == "":
43             continue
44
45         parts= string.split(line,"=")
46         if len(parts) != 2:
47             validConfFile = False
48             raise Exception( "Invalid line in vars file: %s" % line )
49
50         name= string.strip(parts[0])
51         value= string.strip(parts[1])
52         value= value.replace("'", "")   # remove quotes
53         value= value.replace('"', "")   # remove quotes
54         vars[name]= value
55
56     vars_file.close()
57     if not validConfFile:
58         raise Exception( "Unable to read configuration vars." )
59
60     # find out which directory we are running it, and set a variable
61     # for that. future steps may need to get files out of the bootmanager
62     # directory
63     current_dir= os.getcwd()
64     vars['BM_SOURCE_DIR']= current_dir
65
66     return vars
67
68 ##############################
69 class log:
70
71     format="%H:%M:%S(%Z) "
72
73     def __init__( self, OutputFilePath= None ):
74         try:
75             self.OutputFile= open( OutputFilePath, "w")
76             self.OutputFilePath= OutputFilePath
77         except:
78             print( "bootmanager log : Unable to open output file %r, continuing"%OutputFilePath )
79             self.OutputFile= None
80
81         self.VARS = None
82         try:
83             vars = read_configuration_file(VARS_FILE)
84             self.VARS = vars
85         except Exception, e:
86             self.LogEntry( str(e) )
87             return
88     
89     def LogEntry( self, str, inc_newline= 1, display_screen= 1 ):
90         now=time.strftime(log.format, time.localtime())
91         if self.OutputFile:
92             self.OutputFile.write( now+str )
93         if display_screen:
94             sys.stdout.write( now+str )
95             
96         if inc_newline:
97             if display_screen:
98                 sys.stdout.write( "\n" )
99             if self.OutputFile:
100                 self.OutputFile.write( "\n" )
101
102         if self.OutputFile:
103             self.OutputFile.flush()
104
105     def write( self, str ):
106         """
107         make log behave like a writable file object (for traceback
108         prints)
109         """
110         self.LogEntry( str, 0, 1 )
111     
112     # bm log uploading is available back again, as of nodeconfig-5.0-2
113     def Upload( self, extra_file=None ):
114         """
115         upload the contents of the log to the server
116         """
117         if self.OutputFile is not None:
118             self.OutputFile.flush()
119
120             self.LogEntry( "Uploading logs to %s" % self.VARS['UPLOAD_LOG_SCRIPT'] )
121             
122             self.OutputFile.close()
123             self.OutputFile= None
124
125             hostname= self.VARS['INTERFACE_SETTINGS']['hostname'] + "." + \
126                       self.VARS['INTERFACE_SETTINGS']['domainname']
127             bs_request = BootServerRequest.BootServerRequest(self.VARS)
128             try:
129                 # this was working until f10
130                 bs_request.MakeRequest(PartialPath = self.VARS['UPLOAD_LOG_SCRIPT'],
131                                        GetVars = None, PostVars = None,
132                                        DoSSL = True, DoCertCheck = True,
133                                        FormData = ["log=@" + self.OutputFilePath,
134                                                    "hostname=" + hostname, 
135                                                    "type=bm.log"])
136             except:
137                 # new pycurl
138                 import pycurl
139                 bs_request.MakeRequest(PartialPath = self.VARS['UPLOAD_LOG_SCRIPT'],
140                                        GetVars = None, PostVars = None,
141                                        DoSSL = True, DoCertCheck = True,
142                                        FormData = [('log',(pycurl.FORM_FILE, self.OutputFilePath)),
143                                                    ("hostname",hostname),
144                                                    ("type","bm.log")])
145         if extra_file is not None:
146             # NOTE: for code-reuse, evoke the bash function 'upload_logs'; 
147             # by adding --login, bash reads .bash_profile before execution.
148             # Also, never fail, since this is an optional feature.
149             utils.sysexec_noerr( """bash --login -c "upload_logs %s" """ % extra_file, self)
150
151
152 ##############################
153 class BootManager:
154
155     # file containing initial variables/constants
156
157     # the set of valid node run states
158     NodeRunStates = {'reinstall':None,
159                      'boot':None,
160                      'safeboot':None,
161                      'disabled':None,
162                      }
163     
164     def __init__(self, log, forceState):
165         # override machine's current state from the command line
166         self.forceState = forceState
167
168         # the main logging point
169         self.LOG= log
170
171         # set to 1 if we can run after initialization
172         self.CAN_RUN = 0
173
174         if log.VARS:
175             # this contains a set of information used and updated by each step
176             self.VARS= log.VARS
177         else:
178             return
179              
180         # not sure what the current PATH is set to, replace it with what
181         # we know will work with all the boot cds
182         os.environ['PATH']= string.join(BIN_PATH,":")
183
184         self.CAN_RUN= 1
185
186     def Run(self):
187         """
188         core boot manager logic.
189
190         the way errors are handled is as such: if any particular step
191         cannot continue or unexpectibly fails, an exception is thrown.
192         in this case, the boot manager cannot continue running.
193
194         these step functions can also return a 0/1 depending on whether
195         or not it succeeded. In the case of steps like ConfirmInstallWithUser,
196         a 0 is returned and no exception is thrown if the user chose not
197         to confirm the install. The same goes with the CheckHardwareRequirements.
198         If requriements not met, but tests were succesfull, return 0.
199
200         for steps that run within the installer, they are expected to either
201         complete succesfully and return 1, or throw an exception.
202
203         For exact return values and expected operations, see the comments
204         at the top of each of the invididual step functions.
205         """
206
207         def _nodeNotInstalled(message='MSG_NODE_NOT_INSTALLED'):
208             # called by the _xxxState() functions below upon failure
209             self.VARS['RUN_LEVEL']= 'failboot'
210             notify = getattr(notify_messages, message)
211             self.VARS['STATE_CHANGE_NOTIFY']= 1
212             self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= notify
213             raise BootManagerException, notify
214
215         def _bootRun():
216             # implements the boot logic, which consists of first
217             # double checking that the node was properly installed,
218             # checking whether someone added or changed disks, and
219             # then finally chain boots.
220
221             # starting the fallback/debug ssh daemon for safety:
222             # if the node install somehow hangs, or if it simply takes ages, 
223             # we can still enter and investigate
224             try:
225                 StartDebug.Run(self.VARS, self.LOG, last_resort = False)
226             except:
227                 pass
228
229             InstallInit.Run( self.VARS, self.LOG )                    
230             ret = ValidateNodeInstall.Run( self.VARS, self.LOG )
231             if ret == 1:
232 # Thierry - feb. 2013 turning off WriteModprobeConfig for now on lxc
233 # for one thing this won't work at all with f18, as modules.pcimap
234 # has disappeared (Daniel suggested modules.aliases could be used instead)
235 # and second, in any case it's been years now that modprobe.conf was deprecated
236 # so most likely this code has no actual effect
237                 if self.VARS['virt'] == 'vs':
238                     WriteModprobeConfig.Run( self.VARS, self.LOG )
239                 WriteNetworkConfig.Run( self.VARS, self.LOG )
240                 CheckForNewDisks.Run( self.VARS, self.LOG )
241                 SendHardwareConfigToPLC.Run( self.VARS, self.LOG )
242                 ChainBootNode.Run( self.VARS, self.LOG )
243             elif ret == -1:
244                 _nodeNotInstalled('MSG_NODE_FILESYSTEM_CORRUPT')
245             elif ret == -2:
246                 _nodeNotInstalled('MSG_NODE_MOUNT_FAILED')
247             elif ret == -3:
248                 _nodeNotInstalled('MSG_NODE_MISSING_KERNEL')
249             else:
250                 _nodeNotInstalled()
251
252         def _reinstallRun():
253
254             # starting the fallback/debug ssh daemon for safety:
255             # if the node install somehow hangs, or if it simply takes ages, 
256             # we can still enter and investigate
257             try:
258                 StartDebug.Run(self.VARS, self.LOG, last_resort = False)
259             except:
260                 pass
261
262             # implements the reinstall logic, which will check whether
263             # the min. hardware requirements are met, install the
264             # software, and upon correct installation will switch too
265             # 'boot' state and chainboot into the production system
266             if not CheckHardwareRequirements.Run( self.VARS, self.LOG ):
267                 self.VARS['RUN_LEVEL']= 'failboot'
268                 raise BootManagerException, "Hardware requirements not met."
269
270             # runinstaller
271             InstallInit.Run( self.VARS, self.LOG )                    
272             InstallPartitionDisks.Run( self.VARS, self.LOG )            
273             InstallBootstrapFS.Run( self.VARS, self.LOG )            
274             InstallWriteConfig.Run( self.VARS, self.LOG )
275             InstallUninitHardware.Run( self.VARS, self.LOG )
276             self.VARS['BOOT_STATE']= 'boot'
277             self.VARS['STATE_CHANGE_NOTIFY']= 1
278             self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \
279                  notify_messages.MSG_INSTALL_FINISHED
280             AnsibleHook.Run( self.VARS, self.LOG )
281             UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
282             _bootRun()
283             
284         def _installRun():
285             # implements the new install logic, which will first check
286             # with the user whether it is ok to install on this
287             # machine, switch to 'reinstall' state and then invoke the reinstall
288             # logic.  See reinstallState logic comments for further
289             # details.
290             if not ConfirmInstallWithUser.Run( self.VARS, self.LOG ):
291                 return 0
292             self.VARS['BOOT_STATE']= 'reinstall'
293
294             AnsibleHook.Run( self.VARS, self.LOG )
295             _reinstallRun()
296
297         def _debugRun(state='failboot'):
298             # implements debug logic, which starts the sshd and just waits around
299             self.VARS['RUN_LEVEL']=state
300             StartDebug.Run( self.VARS, self.LOG )
301             # fsck/mount fs if present, and ignore return value if it's not.
302             ValidateNodeInstall.Run( self.VARS, self.LOG )
303
304         def _badstateRun():
305             # should never happen; log event
306             self.LOG.write( "\nInvalid BOOT_STATE = %s\n" % self.VARS['BOOT_STATE'])
307             _debugRun()
308
309         # setup state -> function hash table
310         BootManager.NodeRunStates['reinstall']  = _reinstallRun
311         BootManager.NodeRunStates['boot']       = _bootRun
312         BootManager.NodeRunStates['safeboot']   = lambda : _debugRun('safeboot')
313         BootManager.NodeRunStates['disabled']   = lambda : _debugRun('disabled')
314
315         success = 0
316         try:
317             InitializeBootManager.Run( self.VARS, self.LOG )
318             ReadNodeConfiguration.Run( self.VARS, self.LOG )
319             AuthenticateWithPLC.Run( self.VARS, self.LOG )
320             UpdateLastBootOnce.Run( self.VARS, self.LOG )
321             StartRunlevelAgent.Run( self.VARS, self.LOG )
322             GetAndUpdateNodeDetails.Run( self.VARS, self.LOG )
323
324             # override machine's current state from the command line
325             if self.forceState is not None:
326                 self.VARS['BOOT_STATE']= self.forceState
327                 UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
328
329             stateRun = BootManager.NodeRunStates.get(self.VARS['BOOT_STATE'],_badstateRun)
330             stateRun()
331             success = 1
332
333         except KeyError, e:
334             self.LOG.write( "\n\nKeyError while running: %s\n" % str(e) )
335         except BootManagerException, e:
336             self.LOG.write( "\n\nException while running: %s\n" % str(e) )
337         except BootManagerAuthenticationException, e:
338             self.LOG.write( "\n\nFailed to Authenticate Node: %s\n" % str(e) )
339             # sets /tmp/CANCEL_BOOT flag
340             StartDebug.Run(self.VARS, self.LOG )
341             # Return immediately b/c any other calls to API will fail
342             return success
343         except:
344             self.LOG.write( "\n\nImplementation Error\n")
345             traceback.print_exc(file=self.LOG.OutputFile)
346             traceback.print_exc()
347
348         if not success:
349             try:
350                 _debugRun()
351             except BootManagerException, e:
352                 self.LOG.write( "\n\nException while running: %s\n" % str(e) )
353             except:
354                 self.LOG.write( "\n\nImplementation Error\n")
355                 traceback.print_exc(file=self.LOG.OutputFile)
356                 traceback.print_exc()
357
358         return success
359             
360             
361 def main(argv):
362
363     import utils
364     utils.prompt_for_breakpoint_mode()
365
366 #    utils.breakpoint ("Entering BootManager::main")
367     
368     # set to 1 if error occurred
369     error= 0
370     
371     # all output goes through this class so we can save it and post
372     # the data back to PlanetLab central
373     LOG= log( BM_NODE_LOG )
374
375     # NOTE: assume CWD is BM's source directory, but never fail
376     utils.sysexec_noerr("./setup_bash_history_scripts.sh", LOG)
377
378     LOG.LogEntry( "BootManager started at: %s" % \
379                   time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
380
381     try:
382         forceState = None
383         if len(argv) == 2:
384             fState = argv[1]
385             if BootManager.NodeRunStates.has_key(fState):
386                 forceState = fState
387             else:
388                 LOG.LogEntry("FATAL: cannot force node run state to=%s" % fState)
389                 error = 1
390     except:
391         traceback.print_exc(file=LOG.OutputFile)
392         traceback.print_exc()
393         
394     if error:
395         LOG.LogEntry( "BootManager finished at: %s" % \
396                       time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
397         LOG.Upload()
398         return error
399
400     try:
401         bm= BootManager(LOG,forceState)
402         if bm.CAN_RUN == 0:
403             LOG.LogEntry( "Unable to initialize BootManager." )
404         else:
405             LOG.LogEntry( "Running version %s of BootManager." % bm.VARS['VERSION'] )
406             success= bm.Run()
407             if success:
408                 LOG.LogEntry( "\nDone!" );
409             else:
410                 LOG.LogEntry( "\nError occurred!" );
411                 error = 1
412     except:
413         traceback.print_exc(file=LOG.OutputFile)
414         traceback.print_exc()
415
416     LOG.LogEntry( "BootManager finished at: %s" % \
417                   time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
418     LOG.Upload()
419
420     return error
421
422     
423 if __name__ == "__main__":
424     error = main(sys.argv)
425     sys.exit(error)