Catch and raise the exception when GetSession Fails.
[bootmanager.git] / source / BootManager.py
1 #!/usr/bin/python -u
2
3 # Copyright (c) 2003 Intel Corporation
4 # All rights reserved.
5 #
6 # Copyright (c) 2004-2006 The Trustees of Princeton University
7 # All rights reserved.
8
9 import string
10 import sys, os, traceback
11 import time
12 import gzip
13
14 from steps import *
15 from Exceptions import *
16 import notify_messages
17 import BootServerRequest
18
19 # all output is written to this file
20 BM_NODE_LOG= "/tmp/bm.log"
21 UPLOAD_LOG_SCRIPT = "/boot/upload-bmlog.php"
22
23 # the new contents of PATH when the boot manager is running
24 BIN_PATH= ('/usr/local/bin',
25            '/usr/local/sbin',
26            '/usr/bin',
27            '/usr/sbin',
28            '/bin',
29            '/sbin')
30            
31 ##############################
32 class log:
33
34     format="%H:%M:%S(%Z) "
35
36     def __init__( self, OutputFilePath= None ):
37         try:
38             self.OutputFile= open( OutputFilePath, "w")
39             self.OutputFilePath= OutputFilePath
40         except:
41             print( "bootmanager log : Unable to open output file %r, continuing"%OutputFilePath )
42             self.OutputFile= None
43     
44     def LogEntry( self, str, inc_newline= 1, display_screen= 1 ):
45         now=time.strftime(log.format, time.localtime())
46         if self.OutputFile:
47             self.OutputFile.write( now+str )
48         if display_screen:
49             sys.stdout.write( now+str )
50             
51         if inc_newline:
52             if display_screen:
53                 sys.stdout.write( "\n" )
54             if self.OutputFile:
55                 self.OutputFile.write( "\n" )
56
57         if self.OutputFile:
58             self.OutputFile.flush()
59
60     def write( self, str ):
61         """
62         make log behave like a writable file object (for traceback
63         prints)
64         """
65         self.LogEntry( str, 0, 1 )
66     
67     # bm log uploading is available back again, as of nodeconfig-5.0-2
68     def Upload( self ):
69         """
70         upload the contents of the log to the server
71         """
72         if self.OutputFile is not None:
73             self.OutputFile.flush()
74
75             self.LogEntry( "Uploading logs to %s" % UPLOAD_LOG_SCRIPT )
76             
77             self.OutputFile.close()
78             self.OutputFile= None
79
80             bs_request = BootServerRequest.BootServerRequest()
81             bs_request.MakeRequest(PartialPath = UPLOAD_LOG_SCRIPT,
82                                    GetVars = None, PostVars = None,
83                                    FormData = ["log=@" + self.OutputFilePath],
84                                    DoSSL = True, DoCertCheck = True)
85
86 ##############################
87 class BootManager:
88
89     # file containing initial variables/constants
90     VARS_FILE = "configuration"
91
92     # the set of valid node run states
93     NodeRunStates = {'reinstall':None,
94                      'boot':None,
95                      'safeboot':None,
96                      'disabled':None,
97                      }
98     
99     def __init__(self, log, forceState):
100         # override machine's current state from the command line
101         self.forceState = forceState
102
103         # the main logging point
104         self.LOG= log
105
106         # set to 1 if we can run after initialization
107         self.CAN_RUN = 0
108              
109         # read in and store all variables in VARS_FILE into each line
110         # is in the format name=val (any whitespace around the = is
111         # removed. everything after the = to the end of the line is
112         # the value
113         vars = {}
114         vars_file= file(self.VARS_FILE,'r')
115         validConfFile = True
116         for line in vars_file:
117             # if its a comment or a whitespace line, ignore
118             if line[:1] == "#" or string.strip(line) == "":
119                 continue
120
121             parts= string.split(line,"=")
122             if len(parts) != 2:
123                 self.LOG.LogEntry( "Invalid line in vars file: %s" % line )
124                 validConfFile = False
125                 break
126
127             name= string.strip(parts[0])
128             value= string.strip(parts[1])
129             vars[name]= value
130
131         vars_file.close()
132         if not validConfFile:
133             self.LOG.LogEntry( "Unable to read configuration vars." )
134             return
135
136         # find out which directory we are running it, and set a variable
137         # for that. future steps may need to get files out of the bootmanager
138         # directory
139         current_dir= os.getcwd()
140         vars['BM_SOURCE_DIR']= current_dir
141
142         # not sure what the current PATH is set to, replace it with what
143         # we know will work with all the boot cds
144         os.environ['PATH']= string.join(BIN_PATH,":")
145                    
146         # this contains a set of information used and updated by each step
147         self.VARS= vars
148
149         self.CAN_RUN= 1
150
151     def Run(self):
152         """
153         core boot manager logic.
154
155         the way errors are handled is as such: if any particular step
156         cannot continue or unexpectibly fails, an exception is thrown.
157         in this case, the boot manager cannot continue running.
158
159         these step functions can also return a 0/1 depending on whether
160         or not it succeeded. In the case of steps like ConfirmInstallWithUser,
161         a 0 is returned and no exception is thrown if the user chose not
162         to confirm the install. The same goes with the CheckHardwareRequirements.
163         If requriements not met, but tests were succesfull, return 0.
164
165         for steps that run within the installer, they are expected to either
166         complete succesfully and return 1, or throw an execption.
167
168         For exact return values and expected operations, see the comments
169         at the top of each of the invididual step functions.
170         """
171
172         def _nodeNotInstalled(message='MSG_NODE_NOT_INSTALLED'):
173             # called by the _xxxState() functions below upon failure
174             self.VARS['RUN_LEVEL']= 'failboot'
175             notify = getattr(notify_messages, message)
176             self.VARS['STATE_CHANGE_NOTIFY']= 1
177             self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= notify
178             raise BootManagerException, notify
179
180         def _bootRun():
181             # implements the boot logic, which consists of first
182             # double checking that the node was properly installed,
183             # checking whether someone added or changed disks, and
184             # then finally chain boots.
185
186             # starting the fallback/debug ssh daemon for safety:
187             # if the node install somehow hangs, or if it simply takes ages, 
188             # we can still enter and investigate
189             try:
190                 StartDebug.Run(self.VARS, self.LOG, last_resort = False)
191             except:
192                 pass
193
194             InstallInit.Run( self.VARS, self.LOG )                    
195             ret = ValidateNodeInstall.Run( self.VARS, self.LOG )
196             if ret == 1:
197                 WriteModprobeConfig.Run( self.VARS, self.LOG )
198                 MakeInitrd.Run( self.VARS, self.LOG )
199                 WriteNetworkConfig.Run( self.VARS, self.LOG )
200                 CheckForNewDisks.Run( self.VARS, self.LOG )
201                 SendHardwareConfigToPLC.Run( self.VARS, self.LOG )
202                 ChainBootNode.Run( self.VARS, self.LOG )
203             elif ret == -1:
204                 _nodeNotInstalled('MSG_NODE_FILESYSTEM_CORRUPT')
205             elif ret == -2:
206                 _nodeNotInstalled('MSG_NODE_MOUNT_FAILED')
207             elif ret == -3:
208                 _nodeNotInstalled('MSG_NODE_MISSING_KERNEL')
209             else:
210                 _nodeNotInstalled()
211
212         def _reinstallRun():
213
214             # starting the fallback/debug ssh daemon for safety:
215             # if the node install somehow hangs, or if it simply takes ages, 
216             # we can still enter and investigate
217             try:
218                 StartDebug.Run(self.VARS, self.LOG, last_resort = False)
219             except:
220                 pass
221
222             # implements the reinstall logic, which will check whether
223             # the min. hardware requirements are met, install the
224             # software, and upon correct installation will switch too
225             # 'boot' state and chainboot into the production system
226             if not CheckHardwareRequirements.Run( self.VARS, self.LOG ):
227                 self.VARS['RUN_LEVEL']= 'failboot'
228                 raise BootManagerException, "Hardware requirements not met."
229
230             # runinstaller
231             InstallInit.Run( self.VARS, self.LOG )                    
232             InstallPartitionDisks.Run( self.VARS, self.LOG )            
233             InstallBootstrapFS.Run( self.VARS, self.LOG )            
234             InstallWriteConfig.Run( self.VARS, self.LOG )
235             InstallUninitHardware.Run( self.VARS, self.LOG )
236             self.VARS['BOOT_STATE']= 'boot'
237             self.VARS['STATE_CHANGE_NOTIFY']= 1
238             self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \
239                  notify_messages.MSG_INSTALL_FINISHED
240             UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
241             _bootRun()
242             
243         def _installRun():
244             # implements the new install logic, which will first check
245             # with the user whether it is ok to install on this
246             # machine, switch to 'reinstall' state and then invoke the reinstall
247             # logic.  See reinstallState logic comments for further
248             # details.
249             if not ConfirmInstallWithUser.Run( self.VARS, self.LOG ):
250                 return 0
251             self.VARS['BOOT_STATE']= 'reinstall'
252             UpdateRunLevelWithPLC.Run( self.VARS, self.LOG )
253             _reinstallRun()
254
255         def _debugRun(state='failboot'):
256             # implements debug logic, which starts the sshd and just waits around
257             self.VARS['RUN_LEVEL']=state
258             UpdateRunLevelWithPLC.Run( self.VARS, self.LOG )
259             StartDebug.Run( self.VARS, self.LOG )
260             # fsck/mount fs if present, and ignore return value if it's not.
261             ValidateNodeInstall.Run( self.VARS, self.LOG )
262
263         def _badstateRun():
264             # should never happen; log event
265             self.LOG.write( "\nInvalid BOOT_STATE = %s\n" % self.VARS['BOOT_STATE'])
266             _debugRun()
267
268         # setup state -> function hash table
269         BootManager.NodeRunStates['reinstall']  = _reinstallRun
270         BootManager.NodeRunStates['boot']       = _bootRun
271         BootManager.NodeRunStates['safeboot']   = lambda : _debugRun('safeboot')
272         BootManager.NodeRunStates['disabled']   = lambda : _debugRun('disabled')
273
274         success = 0
275         try:
276             InitializeBootManager.Run( self.VARS, self.LOG )
277             ReadNodeConfiguration.Run( self.VARS, self.LOG )
278             AuthenticateWithPLC.Run( self.VARS, self.LOG )
279             StartRunlevelAgent.Run( self.VARS, self.LOG )
280             GetAndUpdateNodeDetails.Run( self.VARS, self.LOG )
281
282             # override machine's current state from the command line
283             if self.forceState is not None:
284                 self.VARS['BOOT_STATE']= self.forceState
285                 UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
286                 UpdateRunLevelWithPLC.Run( self.VARS, self.LOG )
287
288             stateRun = BootManager.NodeRunStates.get(self.VARS['BOOT_STATE'],_badstateRun)
289             stateRun()
290             success = 1
291
292         except KeyError, e:
293             self.LOG.write( "\n\nKeyError while running: %s\n" % str(e) )
294         except BootManagerException, e:
295             self.LOG.write( "\n\nException while running: %s\n" % str(e) )
296         except BootManagerAuthenticationException, e:
297             self.LOG.write( "\n\nFailed to Authenticate Node: %s\n" % str(e) )
298             # sets /tmp/CANCEL_BOOT flag
299             StartDebug.Run(self.VARS, self.LOG )
300             # Return immediately b/c any other calls to API will fail
301             return success
302         except:
303             self.LOG.write( "\n\nImplementation Error\n")
304             traceback.print_exc(file=self.LOG.OutputFile)
305             traceback.print_exc()
306
307         if not success:
308             try:
309                 _debugRun()
310             except BootManagerException, e:
311                 self.LOG.write( "\n\nException while running: %s\n" % str(e) )
312             except:
313                 self.LOG.write( "\n\nImplementation Error\n")
314                 traceback.print_exc(file=self.LOG.OutputFile)
315                 traceback.print_exc()
316
317         return success
318             
319             
320 def main(argv):
321
322     import utils
323     utils.prompt_for_breakpoint_mode()
324
325     utils.breakpoint ("Entering BootManager::main")
326     
327     # set to 1 if error occurred
328     error= 0
329     
330     # all output goes through this class so we can save it and post
331     # the data back to PlanetLab central
332     LOG= log( BM_NODE_LOG )
333
334     LOG.LogEntry( "BootManager started at: %s" % \
335                   time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
336
337     try:
338         forceState = None
339         if len(argv) == 2:
340             fState = argv[1]
341             if BootManager.NodeRunStates.has_key(fState):
342                 forceState = fState
343             else:
344                 LOG.LogEntry("FATAL: cannot force node run state to=%s" % fState)
345                 error = 1
346     except:
347         traceback.print_exc(file=LOG.OutputFile)
348         traceback.print_exc()
349         
350     if error:
351         LOG.LogEntry( "BootManager finished at: %s" % \
352                       time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
353         LOG.Upload()
354         return error
355
356     try:
357         bm= BootManager(LOG,forceState)
358         if bm.CAN_RUN == 0:
359             LOG.LogEntry( "Unable to initialize BootManager." )
360         else:
361             LOG.LogEntry( "Running version %s of BootManager." % bm.VARS['VERSION'] )
362             success= bm.Run()
363             if success:
364                 LOG.LogEntry( "\nDone!" );
365             else:
366                 LOG.LogEntry( "\nError occurred!" );
367                 error = 1
368     except:
369         traceback.print_exc(file=LOG.OutputFile)
370         traceback.print_exc()
371
372     LOG.LogEntry( "BootManager finished at: %s" % \
373                   time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
374     LOG.Upload()
375
376     return error
377
378     
379 if __name__ == "__main__":
380     error = main(sys.argv)
381     sys.exit(error)