cosmetic
[bootmanager.git] / source / BootManager.py
1 #!/usr/bin/python -u
2
3 # Copyright (c) 2003 Intel Corporation
4 # All rights reserved.
5 #
6 # Copyright (c) 2004-2006 The Trustees of Princeton University
7 # All rights reserved.
8
9 import string
10 import sys, os, traceback
11 import time
12 import gzip
13
14 from steps import *
15 from Exceptions import *
16 import notify_messages
17 import BootServerRequest
18
19 # all output is written to this file
20 LOG_FILE= "/tmp/bm.log"
21 UPLOAD_LOG_PATH = "/alpina-logs/upload.php"
22
23 # the new contents of PATH when the boot manager is running
24 BIN_PATH= ('/usr/local/bin',
25            '/usr/local/sbin',
26            '/bin',
27            '/sbin',
28            '/usr/bin',
29            '/usr/sbin',
30            '/usr/local/planetlab/bin')
31            
32
33 # the set of valid node run states
34 NodeRunStates = {}
35
36 class log:
37
38     format="%H:%M:%S(%Z) "
39
40     def __init__( self, OutputFilePath= None ):
41         if OutputFilePath:
42             try:
43                 self.OutputFilePath= OutputFilePath
44                 self.OutputFile= gzip.GzipFile( OutputFilePath, "w", 9 )
45             except:
46                 print( "Unable to open output file for log, continuing" )
47                 self.OutputFile= None
48
49     
50     def LogEntry( self, str, inc_newline= 1, display_screen= 1 ):
51         now=time.strftime(log.format, time.localtime())
52         if self.OutputFile:
53             self.OutputFile.write( now+str )
54         if display_screen:
55             sys.stdout.write( now+str )
56             
57         if inc_newline:
58             if display_screen:
59                 sys.stdout.write( "\n" )
60             if self.OutputFile:
61                 self.OutputFile.write( "\n" )
62
63         if self.OutputFile:
64             self.OutputFile.flush()
65
66             
67
68     def write( self, str ):
69         """
70         make log behave like a writable file object (for traceback
71         prints)
72         """
73         self.LogEntry( str, 0, 1 )
74
75
76     
77     def Upload( self ):
78         """
79         upload the contents of the log to the server
80         """
81
82         if self.OutputFile is not None:
83             self.LogEntry( "NOTE: upload logs is known to be broken (beg)")
84             self.LogEntry( "Uploading logs to %s" % UPLOAD_LOG_PATH )
85             
86             self.OutputFile.close()
87             self.OutputFile= None
88
89             bs_request = BootServerRequest.BootServerRequest()
90             bs_request.MakeRequest(PartialPath = UPLOAD_LOG_PATH,
91                                    GetVars = None, PostVars = None,
92                                    FormData = ["log=@" + self.OutputFilePath],
93                                    DoSSL = True, DoCertCheck = True)
94             self.LogEntry( "NOTE: upload logs is known to be broken (end)")
95         
96     
97
98         
99
100
101 class BootManager:
102
103     # file containing initial variables/constants
104     VARS_FILE = "configuration"
105
106     
107     def __init__(self, log, forceState):
108         # override machine's current state from the command line
109         self.forceState = forceState
110
111         # the main logging point
112         self.LOG= log
113
114         # set to 1 if we can run after initialization
115         self.CAN_RUN = 0
116              
117         # read in and store all variables in VARS_FILE into each line
118         # is in the format name=val (any whitespace around the = is
119         # removed. everything after the = to the end of the line is
120         # the value
121         vars = {}
122         vars_file= file(self.VARS_FILE,'r')
123         validConfFile = True
124         for line in vars_file:
125             # if its a comment or a whitespace line, ignore
126             if line[:1] == "#" or string.strip(line) == "":
127                 continue
128
129             parts= string.split(line,"=")
130             if len(parts) != 2:
131                 self.LOG.LogEntry( "Invalid line in vars file: %s" % line )
132                 validConfFile = False
133                 break
134
135             name= string.strip(parts[0])
136             value= string.strip(parts[1])
137             vars[name]= value
138
139         vars_file.close()
140         if not validConfFile:
141             self.LOG.LogEntry( "Unable to read configuration vars." )
142             return
143
144         # find out which directory we are running it, and set a variable
145         # for that. future steps may need to get files out of the bootmanager
146         # directory
147         current_dir= os.getcwd()
148         vars['BM_SOURCE_DIR']= current_dir
149
150         # not sure what the current PATH is set to, replace it with what
151         # we know will work with all the boot cds
152         os.environ['PATH']= string.join(BIN_PATH,":")
153                    
154         # this contains a set of information used and updated
155         # by each step
156         self.VARS= vars
157
158         self.CAN_RUN= 1
159
160     def Run(self):
161         """
162         core boot manager logic.
163
164         the way errors are handled is as such: if any particular step
165         cannot continue or unexpectibly fails, an exception is thrown.
166         in this case, the boot manager cannot continue running.
167
168         these step functions can also return a 0/1 depending on whether
169         or not it succeeded. In the case of steps like ConfirmInstallWithUser,
170         a 0 is returned and no exception is thrown if the user chose not
171         to confirm the install. The same goes with the CheckHardwareRequirements.
172         If requriements not met, but tests were succesfull, return 0.
173
174         for steps that run within the installer, they are expected to either
175         complete succesfully and return 1, or throw an execption.
176
177         For exact return values and expected operations, see the comments
178         at the top of each of the invididual step functions.
179         """
180
181         def _nodeNotInstalled():
182             # called by the _xxxState() functions below upon failure
183             self.VARS['BOOT_STATE']= 'failboot'
184             self.VARS['STATE_CHANGE_NOTIFY']= 1
185             self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \
186                       notify_messages.MSG_NODE_NOT_INSTALLED
187             raise BootManagerException, \
188                   notify_messages.MSG_NODE_NOT_INSTALLED
189
190         def _bootRun():
191             # implements the boot logic, which consists of first
192             # double checking that the node was properly installed,
193             # checking whether someone added or changed disks, and
194             # then finally chain boots.
195
196             InstallInit.Run( self.VARS, self.LOG )                    
197             if ValidateNodeInstall.Run( self.VARS, self.LOG ):
198                 WriteModprobeConfig.Run( self.VARS, self.LOG )
199                 MakeInitrd.Run( self.VARS, self.LOG )
200                 WriteNetworkConfig.Run( self.VARS, self.LOG )
201                 CheckForNewDisks.Run( self.VARS, self.LOG )
202                 SendHardwareConfigToPLC.Run( self.VARS, self.LOG )
203                 ChainBootNode.Run( self.VARS, self.LOG )
204             else:
205                 _nodeNotInstalled()
206
207         def _reinstallRun():
208             # implements the reinstall logic, which will check whether
209             # the min. hardware requirements are met, install the
210             # software, and upon correct installation will switch too
211             # 'boot' state and chainboot into the production system
212             if not CheckHardwareRequirements.Run( self.VARS, self.LOG ):
213                 self.VARS['BOOT_STATE']= 'failboot'
214                 raise BootManagerException, "Hardware requirements not met."
215
216             # runinstaller
217             InstallInit.Run( self.VARS, self.LOG )                    
218             InstallPartitionDisks.Run( self.VARS, self.LOG )            
219             InstallBootstrapFS.Run( self.VARS, self.LOG )            
220             InstallWriteConfig.Run( self.VARS, self.LOG )
221             InstallUninitHardware.Run( self.VARS, self.LOG )
222             self.VARS['BOOT_STATE']= 'boot'
223             self.VARS['STATE_CHANGE_NOTIFY']= 1
224             self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \
225                  notify_messages.MSG_INSTALL_FINISHED
226             UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
227             _bootRun()
228             
229         def _newRun():
230             # implements the new install logic, which will first check
231             # with the user whether it is ok to install on this
232             # machine, switch to 'reinstall' state and then invoke the reinstall
233             # logic.  See reinstallState logic comments for further
234             # details.
235             if not ConfirmInstallWithUser.Run( self.VARS, self.LOG ):
236                 return 0
237             self.VARS['BOOT_STATE']= 'reinstall'
238             UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
239             _reinstallRun()
240
241         def _debugRun(state='failboot'):
242             # implements debug logic, which just starts the sshd
243             # and just waits around
244             self.VARS['BOOT_STATE']=state
245             UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
246             StartDebug.Run( self.VARS, self.LOG )
247
248         def _badRun():
249             # should never happen; log event
250             self.LOG.write( "\nInvalid BOOT_STATE = %s\n" % self.VARS['BOOT_STATE'])
251             _debugRun()
252
253         global NodeRunStates
254         # setup state -> function hash table
255         NodeRunStates['install'] = _newRun
256         NodeRunStates['reinstall'] = _reinstallRun
257         NodeRunStates['boot'] = _bootRun
258         NodeRunStates['failboot']  = _bootRun   # should always try to boot.
259         NodeRunStates['safeboot']  = lambda : _debugRun('safeboot')
260         NodeRunStates['disabled']  = lambda : _debugRun('disabled')
261
262         success = 0
263         try:
264             InitializeBootManager.Run( self.VARS, self.LOG )
265             ReadNodeConfiguration.Run( self.VARS, self.LOG )
266             AuthenticateWithPLC.Run( self.VARS, self.LOG )
267             GetAndUpdateNodeDetails.Run( self.VARS, self.LOG )
268
269             # override machine's current state from the command line
270             if self.forceState is not None:
271                 self.VARS['BOOT_STATE']= self.forceState
272                 UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
273
274             stateRun = NodeRunStates.get(self.VARS['BOOT_STATE'],_badRun)
275             stateRun()
276             success = 1
277
278         except KeyError, e:
279             self.LOG.write( "\n\nKeyError while running: %s\n" % str(e) )
280         except BootManagerException, e:
281             self.LOG.write( "\n\nException while running: %s\n" % str(e) )
282         except:
283             self.LOG.write( "\n\nImplementation Error\n")
284             traceback.print_exc(file=self.LOG.OutputFile)
285             traceback.print_exc()
286
287         if not success:
288             try:
289                 _debugRun()
290             except BootManagerException, e:
291                 self.LOG.write( "\n\nException while running: %s\n" % str(e) )
292             except:
293                 self.LOG.write( "\n\nImplementation Error\n")
294                 traceback.print_exc(file=self.LOG.OutputFile)
295                 traceback.print_exc()
296
297         return success
298             
299             
300 def main(argv):
301
302     import utils
303     utils.prompt_for_breakpoint_mode()
304
305     #utils.breakpoint ("Entering BootManager::main")
306     
307     global NodeRunStates
308     NodeRunStates = {'install':None,
309                      'reinstall':None,
310                      'boot':None,
311                      'safeboot':None,
312                      'failboot':None,
313                      'disabled':None, }
314
315     # set to 1 if error occurred
316     error= 0
317     
318     # all output goes through this class so we can save it and post
319     # the data back to PlanetLab central
320     LOG= log( LOG_FILE )
321
322     LOG.LogEntry( "BootManager started at: %s" % \
323                   time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
324
325     try:
326         forceState = None
327         if len(argv) == 2:
328             fState = argv[1]
329             if NodeRunStates.has_key(fState):
330                 forceState = fState
331             else:
332                 LOG.LogEntry("FATAL: cannot force node run state to=%s" % fState)
333                 error = 1
334     except:
335         traceback.print_exc(file=LOG.OutputFile)
336         traceback.print_exc()
337         
338     if error:
339         LOG.LogEntry( "BootManager finished at: %s" % \
340                       time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
341         LOG.Upload()
342         return error
343
344     try:
345         bm= BootManager(LOG,forceState)
346         if bm.CAN_RUN == 0:
347             LOG.LogEntry( "Unable to initialize BootManager." )
348         else:
349             LOG.LogEntry( "Running version %s of BootManager." %
350                           bm.VARS['VERSION'] )
351             success= bm.Run()
352             if success:
353                 LOG.LogEntry( "\nDone!" );
354             else:
355                 LOG.LogEntry( "\nError occurred!" );
356                 error = 1
357     except:
358         traceback.print_exc(file=LOG.OutputFile)
359         traceback.print_exc()
360
361     LOG.LogEntry( "BootManager finished at: %s" % \
362                   time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
363     LOG.Upload()
364
365     return error
366
367     
368 if __name__ == "__main__":
369     error = main(sys.argv)
370     sys.exit(error)