Cross module commit for 'diag' and 'disabled' node states.
[bootmanager.git] / source / BootManager.py
1 #!/usr/bin/python2 -u
2
3 # Copyright (c) 2003 Intel Corporation
4 # All rights reserved.
5 #
6 # Copyright (c) 2004-2006 The Trustees of Princeton University
7 # All rights reserved.
8
9 import string
10 import sys, os, traceback
11 from time import gmtime, strftime
12 from gzip import GzipFile
13
14 from steps import *
15 from Exceptions import *
16 import notify_messages
17 import BootServerRequest
18
19 # all output is written to this file
20 LOG_FILE= "/tmp/bm.log"
21 UPLOAD_LOG_PATH = "/alpina-logs/upload.php"
22
23 # the new contents of PATH when the boot manager is running
24 BIN_PATH= ('/usr/local/bin',
25            '/usr/local/sbin',
26            '/bin',
27            '/sbin',
28            '/usr/bin',
29            '/usr/sbin',
30            '/usr/local/planetlab/bin')
31            
32
33 # the set of valid node run states
34 NodeRunStates = {}
35
36 class log:
37
38     def __init__( self, OutputFilePath= None ):
39         if OutputFilePath:
40             try:
41                 self.OutputFilePath= OutputFilePath
42                 self.OutputFile= GzipFile( OutputFilePath, "w", 9 )
43             except:
44                 print( "Unable to open output file for log, continuing" )
45                 self.OutputFile= None
46
47     
48     def LogEntry( self, str, inc_newline= 1, display_screen= 1 ):
49         if self.OutputFile:
50             self.OutputFile.write( str )
51         if display_screen:
52             sys.stdout.write( str )
53             
54         if inc_newline:
55             if display_screen:
56                 sys.stdout.write( "\n" )
57             if self.OutputFile:
58                 self.OutputFile.write( "\n" )
59
60         if self.OutputFile:
61             self.OutputFile.flush()
62
63             
64
65     def write( self, str ):
66         """
67         make log behave like a writable file object (for traceback
68         prints)
69         """
70         self.LogEntry( str, 0, 1 )
71
72
73     
74     def Upload( self ):
75         """
76         upload the contents of the log to the server
77         """
78
79         if self.OutputFile is not None:
80             self.LogEntry( "Uploading logs to %s" % UPLOAD_LOG_PATH )
81             
82             self.OutputFile.close()
83             self.OutputFile= None
84
85             bs_request = BootServerRequest.BootServerRequest()
86             bs_request.MakeRequest(PartialPath = UPLOAD_LOG_PATH,
87                                    GetVars = None, PostVars = None,
88                                    FormData = ["log=@" + self.OutputFilePath],
89                                    DoSSL = True, DoCertCheck = True)
90         
91     
92
93         
94
95
96 class BootManager:
97
98     # file containing initial variables/constants
99     VARS_FILE = "configuration"
100
101     
102     def __init__(self, log, forceState):
103         # override machine's current state from the command line
104         self.forceState = forceState
105
106         # the main logging point
107         self.LOG= log
108
109         # set to 1 if we can run after initialization
110         self.CAN_RUN = 0
111              
112         # read in and store all variables in VARS_FILE into each line
113         # is in the format name=val (any whitespace around the = is
114         # removed. everything after the = to the end of the line is
115         # the value
116         vars = {}
117         vars_file= file(self.VARS_FILE,'r')
118         validConfFile = True
119         for line in vars_file:
120             # if its a comment or a whitespace line, ignore
121             if line[:1] == "#" or string.strip(line) == "":
122                 continue
123
124             parts= string.split(line,"=")
125             if len(parts) != 2:
126                 self.LOG.LogEntry( "Invalid line in vars file: %s" % line )
127                 validConfFile = False
128                 break
129
130             name= string.strip(parts[0])
131             value= string.strip(parts[1])
132             vars[name]= value
133
134         vars_file.close()
135         if not validConfFile:
136             self.LOG.LogEntry( "Unable to read configuration vars." )
137             return
138
139         # find out which directory we are running it, and set a variable
140         # for that. future steps may need to get files out of the bootmanager
141         # directory
142         current_dir= os.getcwd()
143         vars['BM_SOURCE_DIR']= current_dir
144
145         # not sure what the current PATH is set to, replace it with what
146         # we know will work with all the boot cds
147         os.environ['PATH']= string.join(BIN_PATH,":")
148                    
149         # this contains a set of information used and updated
150         # by each step
151         self.VARS= vars
152
153         self.CAN_RUN= 1
154
155     def Run(self):
156         """
157         core boot manager logic.
158
159         the way errors are handled is as such: if any particular step
160         cannot continue or unexpectibly fails, an exception is thrown.
161         in this case, the boot manager cannot continue running.
162
163         these step functions can also return a 0/1 depending on whether
164         or not it succeeded. In the case of steps like ConfirmInstallWithUser,
165         a 0 is returned and no exception is thrown if the user chose not
166         to confirm the install. The same goes with the CheckHardwareRequirements.
167         If requriements not met, but tests were succesfull, return 0.
168
169         for steps that run within the installer, they are expected to either
170         complete succesfully and return 1, or throw an execption.
171
172         For exact return values and expected operations, see the comments
173         at the top of each of the invididual step functions.
174         """
175
176         def _nodeNotInstalled():
177             # called by the _xxxState() functions below upon failure
178             self.VARS['BOOT_STATE']= 'dbg'
179             self.VARS['STATE_CHANGE_NOTIFY']= 1
180             self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \
181                       notify_messages.MSG_NODE_NOT_INSTALLED
182             raise BootManagerException, \
183                   notify_messages.MSG_NODE_NOT_INSTALLED
184
185         def _bootRun():
186             # implements the boot logic, which consists of first
187             # double checking that the node was properly installed,
188             # checking whether someone added or changed disks, and
189             # then finally chain boots.
190
191             InstallInit.Run( self.VARS, self.LOG )                    
192             if ValidateNodeInstall.Run( self.VARS, self.LOG ):
193                 WriteModprobeConfig.Run( self.VARS, self.LOG )
194                 MakeInitrd.Run( self.VARS, self.LOG )
195                 WriteNetworkConfig.Run( self.VARS, self.LOG )
196                 # the following step should be done by NM
197                 UpdateNodeConfiguration.Run( self.VARS, self.LOG )
198                 CheckForNewDisks.Run( self.VARS, self.LOG )
199                 SendHardwareConfigToPLC.Run( self.VARS, self.LOG )
200                 ChainBootNode.Run( self.VARS, self.LOG )
201             else:
202                 _nodeNotInstalled()
203
204         def _rinsRun():
205             # implements the reinstall logic, which will check whether
206             # the min. hardware requirements are met, install the
207             # software, and upon correct installation will switch too
208             # 'boot' state and chainboot into the production system
209             if not CheckHardwareRequirements.Run( self.VARS, self.LOG ):
210                 self.VARS['BOOT_STATE']= 'dbg'
211                 raise BootManagerException, "Hardware requirements not met."
212
213             # runinstaller
214             InstallInit.Run( self.VARS, self.LOG )                    
215             InstallPartitionDisks.Run( self.VARS, self.LOG )            
216             InstallBootstrapRPM.Run( self.VARS, self.LOG )            
217             InstallWriteConfig.Run( self.VARS, self.LOG )
218             InstallUninitHardware.Run( self.VARS, self.LOG )
219             self.VARS['BOOT_STATE']= 'boot'
220             self.VARS['STATE_CHANGE_NOTIFY']= 1
221             self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \
222                  notify_messages.MSG_INSTALL_FINISHED
223             UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
224             _bootRun()
225             
226         def _newRun():
227             # implements the new install logic, which will first check
228             # with the user whether it is ok to install on this
229             # machine, switch to 'rins' state and then invoke the rins
230             # logic.  See rinsState logic comments for further
231             # details.
232             if not ConfirmInstallWithUser.Run( self.VARS, self.LOG ):
233                 return 0
234             self.VARS['BOOT_STATE']= 'rins'
235             UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
236             _rinsRun()
237
238         def _debugRun(state='dbg'):
239             # implements debug logic, which just starts the sshd
240             # and just waits around
241             self.VARS['BOOT_STATE']=state
242             UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
243             StartDebug.Run( self.VARS, self.LOG )
244
245         def _badRun():
246             # should never happen; log event
247             self.LOG.write( "\nInvalid BOOT_STATE = %s\n" % self.VARS['BOOT_STATE'])
248             _debugRun()
249
250         global NodeRunStates
251         # setup state -> function hash table
252         NodeRunStates['new']  = _newRun
253         NodeRunStates['inst'] = _newRun
254         NodeRunStates['rins'] = _rinsRun
255         NodeRunStates['boot'] = _bootRun
256         NodeRunStates['dbg']  = _debugRun
257         NodeRunStates['diag']  = lambda : _debugRun('diag')
258         NodeRunStates['disable']  = lambda : _debugRun('disable')
259
260         success = 0
261         try:
262             InitializeBootManager.Run( self.VARS, self.LOG )
263             ReadNodeConfiguration.Run( self.VARS, self.LOG )
264             AuthenticateWithPLC.Run( self.VARS, self.LOG )
265             GetAndUpdateNodeDetails.Run( self.VARS, self.LOG )
266
267             # override machine's current state from the command line
268             if self.forceState is not None:
269                 self.VARS['BOOT_STATE']= self.forceState
270                 UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
271
272             stateRun = NodeRunStates.get(self.VARS['BOOT_STATE'],_badRun)
273             stateRun()
274             success = 1
275
276         except KeyError, e:
277             self.LOG.write( "\n\nKeyError while running: %s\n" % str(e) )
278         except BootManagerException, e:
279             self.LOG.write( "\n\nException while running: %s\n" % str(e) )
280         except:
281             self.LOG.write( "\n\nImplementation Error\n")
282             traceback.print_exc(file=self.LOG.OutputFile)
283             traceback.print_exc()
284
285         if not success:
286             try:
287                 _debugRun()
288             except BootManagerException, e:
289                 self.LOG.write( "\n\nException while running: %s\n" % str(e) )
290             except:
291                 self.LOG.write( "\n\nImplementation Error\n")
292                 traceback.print_exc(file=self.LOG.OutputFile)
293                 traceback.print_exc()
294
295         return success
296             
297             
298 def main(argv):
299
300     import utils
301     utils.prompt_for_breakpoint_mode()
302
303     utils.breakpoint ("Entering BootManager::main")
304     
305     global NodeRunStates
306     NodeRunStates = {'new':None,
307                      'inst':None,
308                      'rins':None,
309                      'boot':None,
310                                          'diag':None,
311                                          'disable':None,
312                      'dbg':None}
313
314     # set to 1 if error occurred
315     error= 0
316     
317     # all output goes through this class so we can save it and post
318     # the data back to PlanetLab central
319     LOG= log( LOG_FILE )
320
321     LOG.LogEntry( "BootManager started at: %s" % \
322                   strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) )
323
324     try:
325         forceState = None
326         if len(argv) == 2:
327             fState = argv[1]
328             if NodeRunStates.has_key(fState):
329                 forceState = fState
330             else:
331                 LOG.LogEntry("FATAL: cannot force node run state to=%s" % fState)
332                 error = 1
333     except:
334         traceback.print_exc(file=LOG.OutputFile)
335         traceback.print_exc()
336         
337     if error:
338         LOG.LogEntry( "BootManager finished at: %s" % \
339                       strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) )
340         LOG.Upload()
341         return error
342
343     try:
344         bm= BootManager(LOG,forceState)
345         if bm.CAN_RUN == 0:
346             LOG.LogEntry( "Unable to initialize BootManager." )
347         else:
348             LOG.LogEntry( "Running version %s of BootManager." %
349                           bm.VARS['VERSION'] )
350             success= bm.Run()
351             if success:
352                 LOG.LogEntry( "\nDone!" );
353             else:
354                 LOG.LogEntry( "\nError occurred!" );
355                 error = 1
356     except:
357         traceback.print_exc(file=LOG.OutputFile)
358         traceback.print_exc()
359
360     LOG.LogEntry( "BootManager finished at: %s" % \
361                   strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) )
362     LOG.Upload()
363
364     return error
365
366     
367 if __name__ == "__main__":
368     error = main(sys.argv)
369     sys.exit(error)