merge from trunk : upload log & early sshd start
[bootmanager.git] / source / BootManager.py
1 #!/usr/bin/python2 -u
2
3 # Copyright (c) 2003 Intel Corporation
4 # All rights reserved.
5 #
6 # Copyright (c) 2004-2006 The Trustees of Princeton University
7 # All rights reserved.
8
9 import string
10 import sys, os, traceback
11 from time import gmtime, strftime
12 from gzip import GzipFile
13
14 from steps import *
15 from Exceptions import *
16 import notify_messages
17 import BootServerRequest
18
19 # all output is written to this file
20 BM_NODE_LOG= "/tmp/bm.log"
21 UPLOAD_LOG_SCRIPT = "/boot/upload-bmlog.php"
22
23 # the new contents of PATH when the boot manager is running
24 BIN_PATH= ('/usr/local/bin',
25            '/usr/local/sbin',
26            '/bin',
27            '/sbin',
28            '/usr/bin',
29            '/usr/sbin',
30            '/usr/local/planetlab/bin')
31            
32
33 # the set of valid node run states
34 NodeRunStates = {}
35
36 class log:
37
38     def __init__( self, OutputFilePath= None ):
39         try:
40             self.OutputFile= open( OutputFilePath, "w")
41             self.OutputFilePath= OutputFilePath
42         except:
43             print( "bootmanager log : Unable to open output file %r, continuing"%OutputFilePath )
44             self.OutputFile= None
45     
46     def LogEntry( self, str, inc_newline= 1, display_screen= 1 ):
47         if self.OutputFile:
48             self.OutputFile.write( str )
49         if display_screen:
50             sys.stdout.write( str )
51             
52         if inc_newline:
53             if display_screen:
54                 sys.stdout.write( "\n" )
55             if self.OutputFile:
56                 self.OutputFile.write( "\n" )
57
58         if self.OutputFile:
59             self.OutputFile.flush()
60
61             
62
63     def write( self, str ):
64         """
65         make log behave like a writable file object (for traceback
66         prints)
67         """
68         self.LogEntry( str, 0, 1 )
69
70
71     
72     # bm log uploading is available back again, as of nodeconfig-5.0-2
73     def Upload( self ):
74         """
75         upload the contents of the log to the server
76         """
77         if self.OutputFile is not None:
78             self.OutputFile.flush()
79
80             self.LogEntry( "Uploading logs to %s" % UPLOAD_LOG_SCRIPT )
81             
82             self.OutputFile.close()
83             self.OutputFile= None
84
85             bs_request = BootServerRequest.BootServerRequest()
86             bs_request.MakeRequest(PartialPath = UPLOAD_LOG_SCRIPT,
87                                    GetVars = None, PostVars = None,
88                                    FormData = ["log=@" + self.OutputFilePath],
89                                    DoSSL = True, DoCertCheck = True)
90         
91     
92
93         
94
95
96 class BootManager:
97
98     # file containing initial variables/constants
99     VARS_FILE = "configuration"
100
101     
102     def __init__(self, log, forceState):
103         # override machine's current state from the command line
104         self.forceState = forceState
105
106         # the main logging point
107         self.LOG= log
108
109         # set to 1 if we can run after initialization
110         self.CAN_RUN = 0
111              
112         # read in and store all variables in VARS_FILE into each line
113         # is in the format name=val (any whitespace around the = is
114         # removed. everything after the = to the end of the line is
115         # the value
116         vars = {}
117         vars_file= file(self.VARS_FILE,'r')
118         validConfFile = True
119         for line in vars_file:
120             # if its a comment or a whitespace line, ignore
121             if line[:1] == "#" or string.strip(line) == "":
122                 continue
123
124             parts= string.split(line,"=")
125             if len(parts) != 2:
126                 self.LOG.LogEntry( "Invalid line in vars file: %s" % line )
127                 validConfFile = False
128                 break
129
130             name= string.strip(parts[0])
131             value= string.strip(parts[1])
132             vars[name]= value
133
134         vars_file.close()
135         if not validConfFile:
136             self.LOG.LogEntry( "Unable to read configuration vars." )
137             return
138
139         # find out which directory we are running it, and set a variable
140         # for that. future steps may need to get files out of the bootmanager
141         # directory
142         current_dir= os.getcwd()
143         vars['BM_SOURCE_DIR']= current_dir
144
145         # not sure what the current PATH is set to, replace it with what
146         # we know will work with all the boot cds
147         os.environ['PATH']= string.join(BIN_PATH,":")
148                    
149         # this contains a set of information used and updated
150         # by each step
151         self.VARS= vars
152
153         self.CAN_RUN= 1
154
155     def Run(self):
156         """
157         core boot manager logic.
158
159         the way errors are handled is as such: if any particular step
160         cannot continue or unexpectibly fails, an exception is thrown.
161         in this case, the boot manager cannot continue running.
162
163         these step functions can also return a 0/1 depending on whether
164         or not it succeeded. In the case of steps like ConfirmInstallWithUser,
165         a 0 is returned and no exception is thrown if the user chose not
166         to confirm the install. The same goes with the CheckHardwareRequirements.
167         If requriements not met, but tests were succesfull, return 0.
168
169         for steps that run within the installer, they are expected to either
170         complete succesfully and return 1, or throw an execption.
171
172         For exact return values and expected operations, see the comments
173         at the top of each of the invididual step functions.
174         """
175
176         def _nodeNotInstalled():
177             # called by the _xxxState() functions below upon failure
178             self.VARS['BOOT_STATE']= 'dbg'
179             self.VARS['STATE_CHANGE_NOTIFY']= 1
180             self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \
181                       notify_messages.MSG_NODE_NOT_INSTALLED
182             raise BootManagerException, \
183                   notify_messages.MSG_NODE_NOT_INSTALLED
184
185         def _bootRun():
186             # implements the boot logic, which consists of first
187             # double checking that the node was properly installed,
188             # checking whether someone added or changed disks, and
189             # then finally chain boots.
190
191             # starting the fallback/debug ssh daemon for safety:
192             # if the node install somehow hangs, or if it simply takes ages, 
193             # we can still enter and investigate
194             try:
195                 StartDebug.Run(self.VARS, self.LOG, last_resort = False)
196             except:
197                 pass
198
199             InstallInit.Run( self.VARS, self.LOG )                    
200             if ValidateNodeInstall.Run( self.VARS, self.LOG ):
201                 WriteModprobeConfig.Run( self.VARS, self.LOG )
202                 MakeInitrd.Run( self.VARS, self.LOG )
203                 WriteNetworkConfig.Run( self.VARS, self.LOG )
204                 CheckForNewDisks.Run( self.VARS, self.LOG )
205                 SendHardwareConfigToPLC.Run( self.VARS, self.LOG )
206                 ChainBootNode.Run( self.VARS, self.LOG )
207             else:
208                 _nodeNotInstalled()
209
210         def _rinsRun():
211
212             # starting the fallback/debug ssh daemon for safety:
213             # if the node install somehow hangs, or if it simply takes ages, 
214             # we can still enter and investigate
215             try:
216                 StartDebug.Run(self.VARS, self.LOG, last_resort = False)
217             except:
218                 pass
219
220             # implements the reinstall logic, which will check whether
221             # the min. hardware requirements are met, install the
222             # software, and upon correct installation will switch too
223             # 'boot' state and chainboot into the production system
224             if not CheckHardwareRequirements.Run( self.VARS, self.LOG ):
225                 self.VARS['BOOT_STATE']= 'dbg'
226                 raise BootManagerException, "Hardware requirements not met."
227
228             # runinstaller
229             InstallInit.Run( self.VARS, self.LOG )                    
230             InstallPartitionDisks.Run( self.VARS, self.LOG )            
231             InstallBootstrapFS.Run( self.VARS, self.LOG )            
232             InstallWriteConfig.Run( self.VARS, self.LOG )
233             InstallUninitHardware.Run( self.VARS, self.LOG )
234             self.VARS['BOOT_STATE']= 'boot'
235             self.VARS['STATE_CHANGE_NOTIFY']= 1
236             self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \
237                  notify_messages.MSG_INSTALL_FINISHED
238             UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
239             _bootRun()
240             
241         def _newRun():
242             # implements the new install logic, which will first check
243             # with the user whether it is ok to install on this
244             # machine, switch to 'rins' state and then invoke the rins
245             # logic.  See rinsState logic comments for further
246             # details.
247             if not ConfirmInstallWithUser.Run( self.VARS, self.LOG ):
248                 return 0
249             self.VARS['BOOT_STATE']= 'rins'
250             UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
251             _rinsRun()
252
253         def _debugRun(state='dbg'):
254             # implements debug logic, which just starts the sshd
255             # and just waits around
256             self.VARS['BOOT_STATE']=state
257             UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
258             StartDebug.Run( self.VARS, self.LOG )
259
260         def _badRun():
261             # should never happen; log event
262             self.LOG.write( "\nInvalid BOOT_STATE = %s\n" % self.VARS['BOOT_STATE'])
263             _debugRun()
264
265         global NodeRunStates
266         # setup state -> function hash table
267         NodeRunStates['new']  = _newRun
268         NodeRunStates['inst'] = _newRun
269         NodeRunStates['rins'] = _rinsRun
270         NodeRunStates['boot'] = _bootRun
271         NodeRunStates['dbg']  = _bootRun   # should always try to boot.
272         NodeRunStates['diag']  = lambda : _debugRun('diag')
273         NodeRunStates['disable']  = lambda : _debugRun('disable')
274
275         success = 0
276         try:
277             InitializeBootManager.Run( self.VARS, self.LOG )
278             ReadNodeConfiguration.Run( self.VARS, self.LOG )
279             AuthenticateWithPLC.Run( self.VARS, self.LOG )
280             GetAndUpdateNodeDetails.Run( self.VARS, self.LOG )
281
282             # override machine's current state from the command line
283             if self.forceState is not None:
284                 self.VARS['BOOT_STATE']= self.forceState
285                 UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
286
287             stateRun = NodeRunStates.get(self.VARS['BOOT_STATE'],_badRun)
288             stateRun()
289             success = 1
290
291         except KeyError, e:
292             self.LOG.write( "\n\nKeyError while running: %s\n" % str(e) )
293         except BootManagerException, e:
294             self.LOG.write( "\n\nException while running: %s\n" % str(e) )
295         except:
296             self.LOG.write( "\n\nImplementation Error\n")
297             traceback.print_exc(file=self.LOG.OutputFile)
298             traceback.print_exc()
299
300         if not success:
301             try:
302                 _debugRun()
303             except BootManagerException, e:
304                 self.LOG.write( "\n\nException while running: %s\n" % str(e) )
305             except:
306                 self.LOG.write( "\n\nImplementation Error\n")
307                 traceback.print_exc(file=self.LOG.OutputFile)
308                 traceback.print_exc()
309
310         return success
311             
312             
313 def main(argv):
314
315     import utils
316     utils.prompt_for_breakpoint_mode()
317
318     #utils.breakpoint ("Entering BootManager::main")
319     
320     global NodeRunStates
321     NodeRunStates = {'new':None,
322                      'inst':None,
323                      'rins':None,
324                      'boot':None,
325                      'diag':None,
326                      'disable':None,
327                      'dbg':None}
328
329     # set to 1 if error occurred
330     error= 0
331     
332     # all output goes through this class so we can save it and post
333     # the data back to PlanetLab central
334     LOG= log( BM_NODE_LOG )
335
336     LOG.LogEntry( "BootManager started at: %s" % \
337                   strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) )
338
339     try:
340         forceState = None
341         if len(argv) == 2:
342             fState = argv[1]
343             if NodeRunStates.has_key(fState):
344                 forceState = fState
345             else:
346                 LOG.LogEntry("FATAL: cannot force node run state to=%s" % fState)
347                 error = 1
348     except:
349         traceback.print_exc(file=LOG.OutputFile)
350         traceback.print_exc()
351         
352     if error:
353         LOG.LogEntry( "BootManager finished at: %s" % \
354                       strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) )
355         LOG.Upload()
356         return error
357
358     try:
359         bm= BootManager(LOG,forceState)
360         if bm.CAN_RUN == 0:
361             LOG.LogEntry( "Unable to initialize BootManager." )
362         else:
363             LOG.LogEntry( "Running version %s of BootManager." %
364                           bm.VARS['VERSION'] )
365             success= bm.Run()
366             if success:
367                 LOG.LogEntry( "\nDone!" );
368             else:
369                 LOG.LogEntry( "\nError occurred!" );
370                 error = 1
371     except:
372         traceback.print_exc(file=LOG.OutputFile)
373         traceback.print_exc()
374
375     LOG.LogEntry( "BootManager finished at: %s" % \
376                   strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) )
377     LOG.Upload()
378
379     return error
380
381     
382 if __name__ == "__main__":
383     error = main(sys.argv)
384     sys.exit(error)