svn:keywords
[bootmanager.git] / source / BootManager.py
1 #!/usr/bin/python -u
2 #
3 # $Id$
4 # $URL$
5 #
6 # Copyright (c) 2003 Intel Corporation
7 # All rights reserved.
8 #
9 # Copyright (c) 2004-2006 The Trustees of Princeton University
10 # All rights reserved.
11
12 import string
13 import sys, os, traceback
14 import time
15 import gzip
16
17 from steps import *
18 from Exceptions import *
19 import notify_messages
20 import BootServerRequest
21
22 # all output is written to this file
23 BM_NODE_LOG= "/tmp/bm.log"
24 VARS_FILE = "configuration"
25
26 # the new contents of PATH when the boot manager is running
27 BIN_PATH= ('/usr/local/bin',
28            '/usr/local/sbin',
29            '/usr/bin',
30            '/usr/sbin',
31            '/bin',
32            '/sbin')
33
34 def read_configuration_file(filename):
35     # read in and store all variables in VARS_FILE into each line
36     # is in the format name=val (any whitespace around the = is
37     # removed. everything after the = to the end of the line is
38     # the value
39     vars = {}
40     vars_file= file(filename,'r')
41     validConfFile = True
42     for line in vars_file:
43         # if its a comment or a whitespace line, ignore
44         if line[:1] == "#" or string.strip(line) == "":
45             continue
46
47         parts= string.split(line,"=")
48         if len(parts) != 2:
49             validConfFile = False
50             raise Exception( "Invalid line in vars file: %s" % line )
51
52         name= string.strip(parts[0])
53         value= string.strip(parts[1])
54         value= value.replace("'", "")   # remove quotes
55         value= value.replace('"', "")   # remove quotes
56         vars[name]= value
57
58     vars_file.close()
59     if not validConfFile:
60         raise Exception( "Unable to read configuration vars." )
61
62     # find out which directory we are running it, and set a variable
63     # for that. future steps may need to get files out of the bootmanager
64     # directory
65     current_dir= os.getcwd()
66     vars['BM_SOURCE_DIR']= current_dir
67
68     return vars
69
70 ##############################
71 class log:
72
73     format="%H:%M:%S(%Z) "
74
75     def __init__( self, OutputFilePath= None ):
76         try:
77             self.OutputFile= open( OutputFilePath, "w")
78             self.OutputFilePath= OutputFilePath
79         except:
80             print( "bootmanager log : Unable to open output file %r, continuing"%OutputFilePath )
81             self.OutputFile= None
82
83         self.VARS = None
84         try:
85             vars = read_configuration_file(VARS_FILE)
86             self.VARS = vars
87         except Exception, e:
88             self.LogEntry( str(e) )
89             return
90     
91     def LogEntry( self, str, inc_newline= 1, display_screen= 1 ):
92         now=time.strftime(log.format, time.localtime())
93         if self.OutputFile:
94             self.OutputFile.write( now+str )
95         if display_screen:
96             sys.stdout.write( now+str )
97             
98         if inc_newline:
99             if display_screen:
100                 sys.stdout.write( "\n" )
101             if self.OutputFile:
102                 self.OutputFile.write( "\n" )
103
104         if self.OutputFile:
105             self.OutputFile.flush()
106
107     def write( self, str ):
108         """
109         make log behave like a writable file object (for traceback
110         prints)
111         """
112         self.LogEntry( str, 0, 1 )
113     
114     # bm log uploading is available back again, as of nodeconfig-5.0-2
115     def Upload( self ):
116         """
117         upload the contents of the log to the server
118         """
119         if self.OutputFile is not None:
120             self.OutputFile.flush()
121
122             self.LogEntry( "Uploading logs to %s" % self.VARS['UPLOAD_LOG_SCRIPT'] )
123             
124             self.OutputFile.close()
125             self.OutputFile= None
126
127             hostname= self.VARS['INTERFACE_SETTINGS']['hostname'] + "." + \
128                       self.VARS['INTERFACE_SETTINGS']['domainname']
129             bs_request = BootServerRequest.BootServerRequest(self.VARS)
130             try:
131                 # this was working until f10
132                 bs_request.MakeRequest(PartialPath = self.VARS['UPLOAD_LOG_SCRIPT'],
133                                        GetVars = None, PostVars = None,
134                                        DoSSL = True, DoCertCheck = True,
135                                        FormData = ["log=@" + self.OutputFilePath,
136                                                    "hostname=" + hostname, 
137                                                    "type=bm.log"])
138             except:
139                 # new pycurl
140                 import pycurl
141                 bs_request.MakeRequest(PartialPath = self.VARS['UPLOAD_LOG_SCRIPT'],
142                                        GetVars = None, PostVars = None,
143                                        DoSSL = True, DoCertCheck = True,
144                                        FormData = [('log',(pycurl.FORM_FILE, self.OutputFilePath)),
145                                                    ("hostname",hostname),
146                                                    ("type","bm.log")])
147
148
149 ##############################
150 class BootManager:
151
152     # file containing initial variables/constants
153
154     # the set of valid node run states
155     NodeRunStates = {'reinstall':None,
156                      'boot':None,
157                      'safeboot':None,
158                      'disabled':None,
159                      }
160     
161     def __init__(self, log, forceState):
162         # override machine's current state from the command line
163         self.forceState = forceState
164
165         # the main logging point
166         self.LOG= log
167
168         # set to 1 if we can run after initialization
169         self.CAN_RUN = 0
170
171         if log.VARS:
172             # this contains a set of information used and updated by each step
173             self.VARS= log.VARS
174         else:
175             return
176              
177         # not sure what the current PATH is set to, replace it with what
178         # we know will work with all the boot cds
179         os.environ['PATH']= string.join(BIN_PATH,":")
180
181         self.CAN_RUN= 1
182
183     def Run(self):
184         """
185         core boot manager logic.
186
187         the way errors are handled is as such: if any particular step
188         cannot continue or unexpectibly fails, an exception is thrown.
189         in this case, the boot manager cannot continue running.
190
191         these step functions can also return a 0/1 depending on whether
192         or not it succeeded. In the case of steps like ConfirmInstallWithUser,
193         a 0 is returned and no exception is thrown if the user chose not
194         to confirm the install. The same goes with the CheckHardwareRequirements.
195         If requriements not met, but tests were succesfull, return 0.
196
197         for steps that run within the installer, they are expected to either
198         complete succesfully and return 1, or throw an execption.
199
200         For exact return values and expected operations, see the comments
201         at the top of each of the invididual step functions.
202         """
203
204         def _nodeNotInstalled(message='MSG_NODE_NOT_INSTALLED'):
205             # called by the _xxxState() functions below upon failure
206             self.VARS['RUN_LEVEL']= 'failboot'
207             notify = getattr(notify_messages, message)
208             self.VARS['STATE_CHANGE_NOTIFY']= 1
209             self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= notify
210             raise BootManagerException, notify
211
212         def _bootRun():
213             # implements the boot logic, which consists of first
214             # double checking that the node was properly installed,
215             # checking whether someone added or changed disks, and
216             # then finally chain boots.
217
218             # starting the fallback/debug ssh daemon for safety:
219             # if the node install somehow hangs, or if it simply takes ages, 
220             # we can still enter and investigate
221             try:
222                 StartDebug.Run(self.VARS, self.LOG, last_resort = False)
223             except:
224                 pass
225
226             InstallInit.Run( self.VARS, self.LOG )                    
227             ret = ValidateNodeInstall.Run( self.VARS, self.LOG )
228             if ret == 1:
229                 WriteModprobeConfig.Run( self.VARS, self.LOG )
230                 MakeInitrd.Run( self.VARS, self.LOG )
231                 WriteNetworkConfig.Run( self.VARS, self.LOG )
232                 CheckForNewDisks.Run( self.VARS, self.LOG )
233                 SendHardwareConfigToPLC.Run( self.VARS, self.LOG )
234                 ChainBootNode.Run( self.VARS, self.LOG )
235             elif ret == -1:
236                 _nodeNotInstalled('MSG_NODE_FILESYSTEM_CORRUPT')
237             elif ret == -2:
238                 _nodeNotInstalled('MSG_NODE_MOUNT_FAILED')
239             elif ret == -3:
240                 _nodeNotInstalled('MSG_NODE_MISSING_KERNEL')
241             else:
242                 _nodeNotInstalled()
243
244         def _reinstallRun():
245
246             # starting the fallback/debug ssh daemon for safety:
247             # if the node install somehow hangs, or if it simply takes ages, 
248             # we can still enter and investigate
249             try:
250                 StartDebug.Run(self.VARS, self.LOG, last_resort = False)
251             except:
252                 pass
253
254             # implements the reinstall logic, which will check whether
255             # the min. hardware requirements are met, install the
256             # software, and upon correct installation will switch too
257             # 'boot' state and chainboot into the production system
258             if not CheckHardwareRequirements.Run( self.VARS, self.LOG ):
259                 self.VARS['RUN_LEVEL']= 'failboot'
260                 raise BootManagerException, "Hardware requirements not met."
261
262             # runinstaller
263             InstallInit.Run( self.VARS, self.LOG )                    
264             InstallPartitionDisks.Run( self.VARS, self.LOG )            
265             InstallBootstrapFS.Run( self.VARS, self.LOG )            
266             InstallWriteConfig.Run( self.VARS, self.LOG )
267             InstallUninitHardware.Run( self.VARS, self.LOG )
268             self.VARS['BOOT_STATE']= 'boot'
269             self.VARS['STATE_CHANGE_NOTIFY']= 1
270             self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \
271                  notify_messages.MSG_INSTALL_FINISHED
272             UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
273             _bootRun()
274             
275         def _installRun():
276             # implements the new install logic, which will first check
277             # with the user whether it is ok to install on this
278             # machine, switch to 'reinstall' state and then invoke the reinstall
279             # logic.  See reinstallState logic comments for further
280             # details.
281             if not ConfirmInstallWithUser.Run( self.VARS, self.LOG ):
282                 return 0
283             self.VARS['BOOT_STATE']= 'reinstall'
284             UpdateRunLevelWithPLC.Run( self.VARS, self.LOG )
285             _reinstallRun()
286
287         def _debugRun(state='failboot'):
288             # implements debug logic, which starts the sshd and just waits around
289             self.VARS['RUN_LEVEL']=state
290             UpdateRunLevelWithPLC.Run( self.VARS, self.LOG )
291             StartDebug.Run( self.VARS, self.LOG )
292             # fsck/mount fs if present, and ignore return value if it's not.
293             ValidateNodeInstall.Run( self.VARS, self.LOG )
294
295         def _badstateRun():
296             # should never happen; log event
297             self.LOG.write( "\nInvalid BOOT_STATE = %s\n" % self.VARS['BOOT_STATE'])
298             _debugRun()
299
300         # setup state -> function hash table
301         BootManager.NodeRunStates['reinstall']  = _reinstallRun
302         BootManager.NodeRunStates['boot']       = _bootRun
303         BootManager.NodeRunStates['safeboot']   = lambda : _debugRun('safeboot')
304         BootManager.NodeRunStates['disabled']   = lambda : _debugRun('disabled')
305
306         success = 0
307         try:
308             InitializeBootManager.Run( self.VARS, self.LOG )
309             ReadNodeConfiguration.Run( self.VARS, self.LOG )
310             AuthenticateWithPLC.Run( self.VARS, self.LOG )
311             StartRunlevelAgent.Run( self.VARS, self.LOG )
312             GetAndUpdateNodeDetails.Run( self.VARS, self.LOG )
313
314             # override machine's current state from the command line
315             if self.forceState is not None:
316                 self.VARS['BOOT_STATE']= self.forceState
317                 UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
318                 UpdateRunLevelWithPLC.Run( self.VARS, self.LOG )
319
320             stateRun = BootManager.NodeRunStates.get(self.VARS['BOOT_STATE'],_badstateRun)
321             stateRun()
322             success = 1
323
324         except KeyError, e:
325             self.LOG.write( "\n\nKeyError while running: %s\n" % str(e) )
326         except BootManagerException, e:
327             self.LOG.write( "\n\nException while running: %s\n" % str(e) )
328         except BootManagerAuthenticationException, e:
329             self.LOG.write( "\n\nFailed to Authenticate Node: %s\n" % str(e) )
330             # sets /tmp/CANCEL_BOOT flag
331             StartDebug.Run(self.VARS, self.LOG )
332             # Return immediately b/c any other calls to API will fail
333             return success
334         except:
335             self.LOG.write( "\n\nImplementation Error\n")
336             traceback.print_exc(file=self.LOG.OutputFile)
337             traceback.print_exc()
338
339         if not success:
340             try:
341                 _debugRun()
342             except BootManagerException, e:
343                 self.LOG.write( "\n\nException while running: %s\n" % str(e) )
344             except:
345                 self.LOG.write( "\n\nImplementation Error\n")
346                 traceback.print_exc(file=self.LOG.OutputFile)
347                 traceback.print_exc()
348
349         return success
350             
351             
352 def main(argv):
353
354     import utils
355     utils.prompt_for_breakpoint_mode()
356
357     utils.breakpoint ("Entering BootManager::main")
358     
359     # set to 1 if error occurred
360     error= 0
361     
362     # all output goes through this class so we can save it and post
363     # the data back to PlanetLab central
364     LOG= log( BM_NODE_LOG )
365
366     LOG.LogEntry( "BootManager started at: %s" % \
367                   time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
368
369     try:
370         forceState = None
371         if len(argv) == 2:
372             fState = argv[1]
373             if BootManager.NodeRunStates.has_key(fState):
374                 forceState = fState
375             else:
376                 LOG.LogEntry("FATAL: cannot force node run state to=%s" % fState)
377                 error = 1
378     except:
379         traceback.print_exc(file=LOG.OutputFile)
380         traceback.print_exc()
381         
382     if error:
383         LOG.LogEntry( "BootManager finished at: %s" % \
384                       time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
385         LOG.Upload()
386         return error
387
388     try:
389         bm= BootManager(LOG,forceState)
390         if bm.CAN_RUN == 0:
391             LOG.LogEntry( "Unable to initialize BootManager." )
392         else:
393             LOG.LogEntry( "Running version %s of BootManager." % bm.VARS['VERSION'] )
394             success= bm.Run()
395             if success:
396                 LOG.LogEntry( "\nDone!" );
397             else:
398                 LOG.LogEntry( "\nError occurred!" );
399                 error = 1
400     except:
401         traceback.print_exc(file=LOG.OutputFile)
402         traceback.print_exc()
403
404     LOG.LogEntry( "BootManager finished at: %s" % \
405                   time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
406     LOG.Upload()
407
408     return error
409
410     
411 if __name__ == "__main__":
412     error = main(sys.argv)
413     sys.exit(error)