remove 'failboot' from possible boot states
[bootmanager.git] / source / BootManager.py
1 #!/usr/bin/python -u
2
3 # Copyright (c) 2003 Intel Corporation
4 # All rights reserved.
5 #
6 # Copyright (c) 2004-2006 The Trustees of Princeton University
7 # All rights reserved.
8
9 import string
10 import sys, os, traceback
11 import time
12 import gzip
13
14 from steps import *
15 from Exceptions import *
16 import notify_messages
17 import BootServerRequest
18
19 # all output is written to this file
20 BM_NODE_LOG= "/tmp/bm.log"
21 UPLOAD_LOG_SCRIPT = "/boot/upload-bmlog.php"
22
23 # the new contents of PATH when the boot manager is running
24 BIN_PATH= ('/usr/local/bin',
25            '/usr/local/sbin',
26            '/usr/bin',
27            '/usr/sbin',
28            '/bin',
29            '/sbin')
30            
31 ##############################
32 class log:
33
34     format="%H:%M:%S(%Z) "
35
36     def __init__( self, OutputFilePath= None ):
37         try:
38             self.OutputFile= open( OutputFilePath, "w")
39             self.OutputFilePath= OutputFilePath
40         except:
41             print( "bootmanager log : Unable to open output file %r, continuing"%OutputFilePath )
42             self.OutputFile= None
43     
44     def LogEntry( self, str, inc_newline= 1, display_screen= 1 ):
45         now=time.strftime(log.format, time.localtime())
46         if self.OutputFile:
47             self.OutputFile.write( now+str )
48         if display_screen:
49             sys.stdout.write( now+str )
50             
51         if inc_newline:
52             if display_screen:
53                 sys.stdout.write( "\n" )
54             if self.OutputFile:
55                 self.OutputFile.write( "\n" )
56
57         if self.OutputFile:
58             self.OutputFile.flush()
59
60     def write( self, str ):
61         """
62         make log behave like a writable file object (for traceback
63         prints)
64         """
65         self.LogEntry( str, 0, 1 )
66     
67     # bm log uploading is available back again, as of nodeconfig-5.0-2
68     def Upload( self ):
69         """
70         upload the contents of the log to the server
71         """
72         if self.OutputFile is not None:
73             self.OutputFile.flush()
74
75             self.LogEntry( "Uploading logs to %s" % UPLOAD_LOG_SCRIPT )
76             
77             self.OutputFile.close()
78             self.OutputFile= None
79
80             bs_request = BootServerRequest.BootServerRequest()
81             bs_request.MakeRequest(PartialPath = UPLOAD_LOG_SCRIPT,
82                                    GetVars = None, PostVars = None,
83                                    FormData = ["log=@" + self.OutputFilePath],
84                                    DoSSL = True, DoCertCheck = True)
85
86 ##############################
87 class BootManager:
88
89     # file containing initial variables/constants
90     VARS_FILE = "configuration"
91
92     # the set of valid node run states
93     NodeRunStates = {'reinstall':None,
94                      'boot':None,
95                      'safeboot':None,
96                      'disabled':None,
97                      }
98     
99     def __init__(self, log, forceState):
100         # override machine's current state from the command line
101         self.forceState = forceState
102
103         # the main logging point
104         self.LOG= log
105
106         # set to 1 if we can run after initialization
107         self.CAN_RUN = 0
108              
109         # read in and store all variables in VARS_FILE into each line
110         # is in the format name=val (any whitespace around the = is
111         # removed. everything after the = to the end of the line is
112         # the value
113         vars = {}
114         vars_file= file(self.VARS_FILE,'r')
115         validConfFile = True
116         for line in vars_file:
117             # if its a comment or a whitespace line, ignore
118             if line[:1] == "#" or string.strip(line) == "":
119                 continue
120
121             parts= string.split(line,"=")
122             if len(parts) != 2:
123                 self.LOG.LogEntry( "Invalid line in vars file: %s" % line )
124                 validConfFile = False
125                 break
126
127             name= string.strip(parts[0])
128             value= string.strip(parts[1])
129             vars[name]= value
130
131         vars_file.close()
132         if not validConfFile:
133             self.LOG.LogEntry( "Unable to read configuration vars." )
134             return
135
136         # find out which directory we are running it, and set a variable
137         # for that. future steps may need to get files out of the bootmanager
138         # directory
139         current_dir= os.getcwd()
140         vars['BM_SOURCE_DIR']= current_dir
141
142         # not sure what the current PATH is set to, replace it with what
143         # we know will work with all the boot cds
144         os.environ['PATH']= string.join(BIN_PATH,":")
145                    
146         # this contains a set of information used and updated by each step
147         self.VARS= vars
148
149         self.CAN_RUN= 1
150
151     def Run(self):
152         """
153         core boot manager logic.
154
155         the way errors are handled is as such: if any particular step
156         cannot continue or unexpectibly fails, an exception is thrown.
157         in this case, the boot manager cannot continue running.
158
159         these step functions can also return a 0/1 depending on whether
160         or not it succeeded. In the case of steps like ConfirmInstallWithUser,
161         a 0 is returned and no exception is thrown if the user chose not
162         to confirm the install. The same goes with the CheckHardwareRequirements.
163         If requriements not met, but tests were succesfull, return 0.
164
165         for steps that run within the installer, they are expected to either
166         complete succesfully and return 1, or throw an execption.
167
168         For exact return values and expected operations, see the comments
169         at the top of each of the invididual step functions.
170         """
171
172         def _nodeNotInstalled():
173             # called by the _xxxState() functions below upon failure
174             self.VARS['RUN_LEVEL']= 'failboot'
175             self.VARS['STATE_CHANGE_NOTIFY']= 1
176             self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \
177                       notify_messages.MSG_NODE_NOT_INSTALLED
178             raise BootManagerException, \
179                   notify_messages.MSG_NODE_NOT_INSTALLED
180
181         def _bootRun():
182             # implements the boot logic, which consists of first
183             # double checking that the node was properly installed,
184             # checking whether someone added or changed disks, and
185             # then finally chain boots.
186
187             # starting the fallback/debug ssh daemon for safety:
188             # if the node install somehow hangs, or if it simply takes ages, 
189             # we can still enter and investigate
190             try:
191                 StartDebug.Run(self.VARS, self.LOG, last_resort = False)
192             except:
193                 pass
194
195             InstallInit.Run( self.VARS, self.LOG )                    
196             if ValidateNodeInstall.Run( self.VARS, self.LOG ):
197                 WriteModprobeConfig.Run( self.VARS, self.LOG )
198                 MakeInitrd.Run( self.VARS, self.LOG )
199                 WriteNetworkConfig.Run( self.VARS, self.LOG )
200                 CheckForNewDisks.Run( self.VARS, self.LOG )
201                 SendHardwareConfigToPLC.Run( self.VARS, self.LOG )
202                 ChainBootNode.Run( self.VARS, self.LOG )
203             else:
204                 _nodeNotInstalled()
205
206         def _reinstallRun():
207
208             # starting the fallback/debug ssh daemon for safety:
209             # if the node install somehow hangs, or if it simply takes ages, 
210             # we can still enter and investigate
211             try:
212                 StartDebug.Run(self.VARS, self.LOG, last_resort = False)
213             except:
214                 pass
215
216             # implements the reinstall logic, which will check whether
217             # the min. hardware requirements are met, install the
218             # software, and upon correct installation will switch too
219             # 'boot' state and chainboot into the production system
220             if not CheckHardwareRequirements.Run( self.VARS, self.LOG ):
221                 self.VARS['RUN_LEVEL']= 'failboot'
222                 raise BootManagerException, "Hardware requirements not met."
223
224             # runinstaller
225             InstallInit.Run( self.VARS, self.LOG )                    
226             InstallPartitionDisks.Run( self.VARS, self.LOG )            
227             InstallBootstrapFS.Run( self.VARS, self.LOG )            
228             InstallWriteConfig.Run( self.VARS, self.LOG )
229             InstallUninitHardware.Run( self.VARS, self.LOG )
230             self.VARS['BOOT_STATE']= 'boot'
231             self.VARS['STATE_CHANGE_NOTIFY']= 1
232             self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \
233                  notify_messages.MSG_INSTALL_FINISHED
234             UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
235             _bootRun()
236             
237         def _installRun():
238             # implements the new install logic, which will first check
239             # with the user whether it is ok to install on this
240             # machine, switch to 'reinstall' state and then invoke the reinstall
241             # logic.  See reinstallState logic comments for further
242             # details.
243             if not ConfirmInstallWithUser.Run( self.VARS, self.LOG ):
244                 return 0
245             self.VARS['BOOT_STATE']= 'reinstall'
246             UpdateRunLevelWithPLC.Run( self.VARS, self.LOG )
247             _reinstallRun()
248
249         def _debugRun(state='failboot'):
250             # implements debug logic, which starts the sshd and just waits around
251             self.VARS['RUN_LEVEL']=state
252             UpdateRunLevelWithPLC.Run( self.VARS, self.LOG )
253             StartDebug.Run( self.VARS, self.LOG )
254             # fsck/mount fs if present, and ignore return value if it's not.
255             ValidateNodeInstall.Run( self.VARS, self.LOG )
256
257         def _badstateRun():
258             # should never happen; log event
259             self.LOG.write( "\nInvalid BOOT_STATE = %s\n" % self.VARS['BOOT_STATE'])
260             _debugRun()
261
262         # setup state -> function hash table
263         BootManager.NodeRunStates['reinstall']  = _reinstallRun
264         BootManager.NodeRunStates['boot']       = _bootRun
265         BootManager.NodeRunStates['safeboot']   = lambda : _debugRun('safeboot')
266         BootManager.NodeRunStates['disabled']   = lambda : _debugRun('disabled')
267
268         success = 0
269         try:
270             InitializeBootManager.Run( self.VARS, self.LOG )
271             ReadNodeConfiguration.Run( self.VARS, self.LOG )
272             AuthenticateWithPLC.Run( self.VARS, self.LOG )
273             StartRunlevelAgent.Run( self.VARS, self.LOG )
274             GetAndUpdateNodeDetails.Run( self.VARS, self.LOG )
275
276             # override machine's current state from the command line
277             if self.forceState is not None:
278                 self.VARS['BOOT_STATE']= self.forceState
279                 UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
280                 UpdateRunLevelWithPLC.Run( self.VARS, self.LOG )
281
282             stateRun = BootManager.NodeRunStates.get(self.VARS['BOOT_STATE'],_badstateRun)
283             stateRun()
284             success = 1
285
286         except KeyError, e:
287             self.LOG.write( "\n\nKeyError while running: %s\n" % str(e) )
288         except BootManagerException, e:
289             self.LOG.write( "\n\nException while running: %s\n" % str(e) )
290         except:
291             self.LOG.write( "\n\nImplementation Error\n")
292             traceback.print_exc(file=self.LOG.OutputFile)
293             traceback.print_exc()
294
295         if not success:
296             try:
297                 _debugRun()
298             except BootManagerException, e:
299                 self.LOG.write( "\n\nException while running: %s\n" % str(e) )
300             except:
301                 self.LOG.write( "\n\nImplementation Error\n")
302                 traceback.print_exc(file=self.LOG.OutputFile)
303                 traceback.print_exc()
304
305         return success
306             
307             
308 def main(argv):
309
310     import utils
311     utils.prompt_for_breakpoint_mode()
312
313     utils.breakpoint ("Entering BootManager::main")
314     
315     # set to 1 if error occurred
316     error= 0
317     
318     # all output goes through this class so we can save it and post
319     # the data back to PlanetLab central
320     LOG= log( BM_NODE_LOG )
321
322     LOG.LogEntry( "BootManager started at: %s" % \
323                   time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
324
325     try:
326         forceState = None
327         if len(argv) == 2:
328             fState = argv[1]
329             if BootManager.NodeRunStates.has_key(fState):
330                 forceState = fState
331             else:
332                 LOG.LogEntry("FATAL: cannot force node run state to=%s" % fState)
333                 error = 1
334     except:
335         traceback.print_exc(file=LOG.OutputFile)
336         traceback.print_exc()
337         
338     if error:
339         LOG.LogEntry( "BootManager finished at: %s" % \
340                       time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
341         LOG.Upload()
342         return error
343
344     try:
345         bm= BootManager(LOG,forceState)
346         if bm.CAN_RUN == 0:
347             LOG.LogEntry( "Unable to initialize BootManager." )
348         else:
349             LOG.LogEntry( "Running version %s of BootManager." % bm.VARS['VERSION'] )
350             success= bm.Run()
351             if success:
352                 LOG.LogEntry( "\nDone!" );
353             else:
354                 LOG.LogEntry( "\nError occurred!" );
355                 error = 1
356     except:
357         traceback.print_exc(file=LOG.OutputFile)
358         traceback.print_exc()
359
360     LOG.LogEntry( "BootManager finished at: %s" % \
361                   time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
362     LOG.Upload()
363
364     return error
365
366     
367 if __name__ == "__main__":
368     error = main(sys.argv)
369     sys.exit(error)