attempt to display timestamps during boot manager steps
[bootmanager.git] / source / BootManager.py
1 #!/usr/bin/python2 -u
2
3 # Copyright (c) 2003 Intel Corporation
4 # All rights reserved.
5 #
6 # Copyright (c) 2004-2006 The Trustees of Princeton University
7 # All rights reserved.
8
9 import string
10 import sys, os, traceback
11 from time import gmtime, strftime
12 from gzip import GzipFile
13
14 from steps import *
15 from Exceptions import *
16 import notify_messages
17 import BootServerRequest
18
19 # all output is written to this file
20 LOG_FILE= "/tmp/bm.log"
21 UPLOAD_LOG_PATH = "/alpina-logs/upload.php"
22
23 # the new contents of PATH when the boot manager is running
24 BIN_PATH= ('/usr/local/bin',
25            '/usr/local/sbin',
26            '/bin',
27            '/sbin',
28            '/usr/bin',
29            '/usr/sbin',
30            '/usr/local/planetlab/bin')
31            
32
33 # the set of valid node run states
34 NodeRunStates = {}
35
36 class log:
37
38     format="%H:%M:%S(%Z) "
39
40     def __init__( self, OutputFilePath= None ):
41         if OutputFilePath:
42             try:
43                 self.OutputFilePath= OutputFilePath
44                 self.OutputFile= GzipFile( OutputFilePath, "w", 9 )
45             except:
46                 print( "Unable to open output file for log, continuing" )
47                 self.OutputFile= None
48
49     
50     def LogEntry( self, str, inc_newline= 1, display_screen= 1 ):
51         now=time.strftime(log.format, time.localtime())
52         if self.OutputFile:
53             self.OutputFile.write( now+str )
54         if display_screen:
55             sys.stdout.write( now+str )
56             
57         if inc_newline:
58             if display_screen:
59                 sys.stdout.write( "\n" )
60             if self.OutputFile:
61                 self.OutputFile.write( "\n" )
62
63         if self.OutputFile:
64             self.OutputFile.flush()
65
66             
67
68     def write( self, str ):
69         """
70         make log behave like a writable file object (for traceback
71         prints)
72         """
73         self.LogEntry( str, 0, 1 )
74
75
76     
77     def Upload( self ):
78         """
79         upload the contents of the log to the server
80         """
81
82         if self.OutputFile is not None:
83             self.LogEntry( "Uploading logs to %s" % UPLOAD_LOG_PATH )
84             
85             self.OutputFile.close()
86             self.OutputFile= None
87
88             bs_request = BootServerRequest.BootServerRequest()
89             bs_request.MakeRequest(PartialPath = UPLOAD_LOG_PATH,
90                                    GetVars = None, PostVars = None,
91                                    FormData = ["log=@" + self.OutputFilePath],
92                                    DoSSL = True, DoCertCheck = True)
93         
94     
95
96         
97
98
99 class BootManager:
100
101     # file containing initial variables/constants
102     VARS_FILE = "configuration"
103
104     
105     def __init__(self, log, forceState):
106         # override machine's current state from the command line
107         self.forceState = forceState
108
109         # the main logging point
110         self.LOG= log
111
112         # set to 1 if we can run after initialization
113         self.CAN_RUN = 0
114              
115         # read in and store all variables in VARS_FILE into each line
116         # is in the format name=val (any whitespace around the = is
117         # removed. everything after the = to the end of the line is
118         # the value
119         vars = {}
120         vars_file= file(self.VARS_FILE,'r')
121         validConfFile = True
122         for line in vars_file:
123             # if its a comment or a whitespace line, ignore
124             if line[:1] == "#" or string.strip(line) == "":
125                 continue
126
127             parts= string.split(line,"=")
128             if len(parts) != 2:
129                 self.LOG.LogEntry( "Invalid line in vars file: %s" % line )
130                 validConfFile = False
131                 break
132
133             name= string.strip(parts[0])
134             value= string.strip(parts[1])
135             vars[name]= value
136
137         vars_file.close()
138         if not validConfFile:
139             self.LOG.LogEntry( "Unable to read configuration vars." )
140             return
141
142         # find out which directory we are running it, and set a variable
143         # for that. future steps may need to get files out of the bootmanager
144         # directory
145         current_dir= os.getcwd()
146         vars['BM_SOURCE_DIR']= current_dir
147
148         # not sure what the current PATH is set to, replace it with what
149         # we know will work with all the boot cds
150         os.environ['PATH']= string.join(BIN_PATH,":")
151                    
152         # this contains a set of information used and updated
153         # by each step
154         self.VARS= vars
155
156         self.CAN_RUN= 1
157
158     def Run(self):
159         """
160         core boot manager logic.
161
162         the way errors are handled is as such: if any particular step
163         cannot continue or unexpectibly fails, an exception is thrown.
164         in this case, the boot manager cannot continue running.
165
166         these step functions can also return a 0/1 depending on whether
167         or not it succeeded. In the case of steps like ConfirmInstallWithUser,
168         a 0 is returned and no exception is thrown if the user chose not
169         to confirm the install. The same goes with the CheckHardwareRequirements.
170         If requriements not met, but tests were succesfull, return 0.
171
172         for steps that run within the installer, they are expected to either
173         complete succesfully and return 1, or throw an execption.
174
175         For exact return values and expected operations, see the comments
176         at the top of each of the invididual step functions.
177         """
178
179         def _nodeNotInstalled():
180             # called by the _xxxState() functions below upon failure
181             self.VARS['BOOT_STATE']= 'failboot'
182             self.VARS['STATE_CHANGE_NOTIFY']= 1
183             self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \
184                       notify_messages.MSG_NODE_NOT_INSTALLED
185             raise BootManagerException, \
186                   notify_messages.MSG_NODE_NOT_INSTALLED
187
188         def _bootRun():
189             # implements the boot logic, which consists of first
190             # double checking that the node was properly installed,
191             # checking whether someone added or changed disks, and
192             # then finally chain boots.
193
194             InstallInit.Run( self.VARS, self.LOG )                    
195             if ValidateNodeInstall.Run( self.VARS, self.LOG ):
196                 WriteModprobeConfig.Run( self.VARS, self.LOG )
197                 MakeInitrd.Run( self.VARS, self.LOG )
198                 WriteNetworkConfig.Run( self.VARS, self.LOG )
199                 CheckForNewDisks.Run( self.VARS, self.LOG )
200                 SendHardwareConfigToPLC.Run( self.VARS, self.LOG )
201                 ChainBootNode.Run( self.VARS, self.LOG )
202             else:
203                 _nodeNotInstalled()
204
205         def _reinstallRun():
206             # implements the reinstall logic, which will check whether
207             # the min. hardware requirements are met, install the
208             # software, and upon correct installation will switch too
209             # 'boot' state and chainboot into the production system
210             if not CheckHardwareRequirements.Run( self.VARS, self.LOG ):
211                 self.VARS['BOOT_STATE']= 'failboot'
212                 raise BootManagerException, "Hardware requirements not met."
213
214             # runinstaller
215             InstallInit.Run( self.VARS, self.LOG )                    
216             InstallPartitionDisks.Run( self.VARS, self.LOG )            
217             InstallBootstrapFS.Run( self.VARS, self.LOG )            
218             InstallWriteConfig.Run( self.VARS, self.LOG )
219             InstallUninitHardware.Run( self.VARS, self.LOG )
220             self.VARS['BOOT_STATE']= 'boot'
221             self.VARS['STATE_CHANGE_NOTIFY']= 1
222             self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \
223                  notify_messages.MSG_INSTALL_FINISHED
224             UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
225             _bootRun()
226             
227         def _newRun():
228             # implements the new install logic, which will first check
229             # with the user whether it is ok to install on this
230             # machine, switch to 'reinstall' state and then invoke the reinstall
231             # logic.  See reinstallState logic comments for further
232             # details.
233             if not ConfirmInstallWithUser.Run( self.VARS, self.LOG ):
234                 return 0
235             self.VARS['BOOT_STATE']= 'reinstall'
236             UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
237             _reinstallRun()
238
239         def _debugRun(state='failboot'):
240             # implements debug logic, which just starts the sshd
241             # and just waits around
242             self.VARS['BOOT_STATE']=state
243             UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
244             StartDebug.Run( self.VARS, self.LOG )
245
246         def _badRun():
247             # should never happen; log event
248             self.LOG.write( "\nInvalid BOOT_STATE = %s\n" % self.VARS['BOOT_STATE'])
249             _debugRun()
250
251         global NodeRunStates
252         # setup state -> function hash table
253         NodeRunStates['install'] = _newRun
254         NodeRunStates['reinstall'] = _reinstallRun
255         NodeRunStates['boot'] = _bootRun
256         NodeRunStates['failboot']  = _bootRun   # should always try to boot.
257         NodeRunStates['safeboot']  = lambda : _debugRun('safeboot')
258         NodeRunStates['disabled']  = lambda : _debugRun('disabled')
259
260         success = 0
261         try:
262             InitializeBootManager.Run( self.VARS, self.LOG )
263             ReadNodeConfiguration.Run( self.VARS, self.LOG )
264             AuthenticateWithPLC.Run( self.VARS, self.LOG )
265             GetAndUpdateNodeDetails.Run( self.VARS, self.LOG )
266
267             # override machine's current state from the command line
268             if self.forceState is not None:
269                 self.VARS['BOOT_STATE']= self.forceState
270                 UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
271
272             stateRun = NodeRunStates.get(self.VARS['BOOT_STATE'],_badRun)
273             stateRun()
274             success = 1
275
276         except KeyError, e:
277             self.LOG.write( "\n\nKeyError while running: %s\n" % str(e) )
278         except BootManagerException, e:
279             self.LOG.write( "\n\nException while running: %s\n" % str(e) )
280         except:
281             self.LOG.write( "\n\nImplementation Error\n")
282             traceback.print_exc(file=self.LOG.OutputFile)
283             traceback.print_exc()
284
285         if not success:
286             try:
287                 _debugRun()
288             except BootManagerException, e:
289                 self.LOG.write( "\n\nException while running: %s\n" % str(e) )
290             except:
291                 self.LOG.write( "\n\nImplementation Error\n")
292                 traceback.print_exc(file=self.LOG.OutputFile)
293                 traceback.print_exc()
294
295         return success
296             
297             
298 def main(argv):
299
300     import utils
301     utils.prompt_for_breakpoint_mode()
302
303     #utils.breakpoint ("Entering BootManager::main")
304     
305     global NodeRunStates
306     NodeRunStates = {'install':None,
307                      'reinstall':None,
308                      'boot':None,
309                      'safeboot':None,
310                      'failboot':None,
311                      'disabled':None, }
312
313     # set to 1 if error occurred
314     error= 0
315     
316     # all output goes through this class so we can save it and post
317     # the data back to PlanetLab central
318     LOG= log( LOG_FILE )
319
320     LOG.LogEntry( "BootManager started at: %s" % \
321                   strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) )
322
323     try:
324         forceState = None
325         if len(argv) == 2:
326             fState = argv[1]
327             if NodeRunStates.has_key(fState):
328                 forceState = fState
329             else:
330                 LOG.LogEntry("FATAL: cannot force node run state to=%s" % fState)
331                 error = 1
332     except:
333         traceback.print_exc(file=LOG.OutputFile)
334         traceback.print_exc()
335         
336     if error:
337         LOG.LogEntry( "BootManager finished at: %s" % \
338                       strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) )
339         LOG.Upload()
340         return error
341
342     try:
343         bm= BootManager(LOG,forceState)
344         if bm.CAN_RUN == 0:
345             LOG.LogEntry( "Unable to initialize BootManager." )
346         else:
347             LOG.LogEntry( "Running version %s of BootManager." %
348                           bm.VARS['VERSION'] )
349             success= bm.Run()
350             if success:
351                 LOG.LogEntry( "\nDone!" );
352             else:
353                 LOG.LogEntry( "\nError occurred!" );
354                 error = 1
355     except:
356         traceback.print_exc(file=LOG.OutputFile)
357         traceback.print_exc()
358
359     LOG.LogEntry( "BootManager finished at: %s" % \
360                   strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) )
361     LOG.Upload()
362
363     return error
364
365     
366 if __name__ == "__main__":
367     error = main(sys.argv)
368     sys.exit(error)