uploading bmlog - fits new pycurl
[bootmanager.git] / source / BootManager.py
1 #!/usr/bin/python -u
2
3 # Copyright (c) 2003 Intel Corporation
4 # All rights reserved.
5 #
6 # Copyright (c) 2004-2006 The Trustees of Princeton University
7 # All rights reserved.
8
9 import string
10 import sys, os, traceback
11 import time
12 import gzip
13
14 from steps import *
15 from Exceptions import *
16 import notify_messages
17 import BootServerRequest
18
19 # all output is written to this file
20 BM_NODE_LOG= "/tmp/bm.log"
21 VARS_FILE = "configuration"
22
23 # the new contents of PATH when the boot manager is running
24 BIN_PATH= ('/usr/local/bin',
25            '/usr/local/sbin',
26            '/usr/bin',
27            '/usr/sbin',
28            '/bin',
29            '/sbin')
30
31 def read_configuration_file(filename):
32     # read in and store all variables in VARS_FILE into each line
33     # is in the format name=val (any whitespace around the = is
34     # removed. everything after the = to the end of the line is
35     # the value
36     vars = {}
37     vars_file= file(filename,'r')
38     validConfFile = True
39     for line in vars_file:
40         # if its a comment or a whitespace line, ignore
41         if line[:1] == "#" or string.strip(line) == "":
42             continue
43
44         parts= string.split(line,"=")
45         if len(parts) != 2:
46             validConfFile = False
47             raise Exception( "Invalid line in vars file: %s" % line )
48
49         name= string.strip(parts[0])
50         value= string.strip(parts[1])
51         value= value.replace("'", "")   # remove quotes
52         value= value.replace('"', "")   # remove quotes
53         vars[name]= value
54
55     vars_file.close()
56     if not validConfFile:
57         raise Exception( "Unable to read configuration vars." )
58
59     # find out which directory we are running it, and set a variable
60     # for that. future steps may need to get files out of the bootmanager
61     # directory
62     current_dir= os.getcwd()
63     vars['BM_SOURCE_DIR']= current_dir
64
65     return vars
66
67 ##############################
68 class log:
69
70     format="%H:%M:%S(%Z) "
71
72     def __init__( self, OutputFilePath= None ):
73         try:
74             self.OutputFile= open( OutputFilePath, "w")
75             self.OutputFilePath= OutputFilePath
76         except:
77             print( "bootmanager log : Unable to open output file %r, continuing"%OutputFilePath )
78             self.OutputFile= None
79
80         self.VARS = None
81         try:
82             vars = read_configuration_file(VARS_FILE)
83             self.VARS = vars
84         except Exception, e:
85             self.LogEntry( str(e) )
86             return
87     
88     def LogEntry( self, str, inc_newline= 1, display_screen= 1 ):
89         now=time.strftime(log.format, time.localtime())
90         if self.OutputFile:
91             self.OutputFile.write( now+str )
92         if display_screen:
93             sys.stdout.write( now+str )
94             
95         if inc_newline:
96             if display_screen:
97                 sys.stdout.write( "\n" )
98             if self.OutputFile:
99                 self.OutputFile.write( "\n" )
100
101         if self.OutputFile:
102             self.OutputFile.flush()
103
104     def write( self, str ):
105         """
106         make log behave like a writable file object (for traceback
107         prints)
108         """
109         self.LogEntry( str, 0, 1 )
110     
111     # bm log uploading is available back again, as of nodeconfig-5.0-2
112     def Upload( self ):
113         """
114         upload the contents of the log to the server
115         """
116         if self.OutputFile is not None:
117             self.OutputFile.flush()
118
119             self.LogEntry( "Uploading logs to %s" % self.VARS['UPLOAD_LOG_SCRIPT'] )
120             
121             self.OutputFile.close()
122             self.OutputFile= None
123
124             hostname= self.VARS['INTERFACE_SETTINGS']['hostname'] + "." + \
125                       self.VARS['INTERFACE_SETTINGS']['domainname']
126             bs_request = BootServerRequest.BootServerRequest(self.VARS)
127             try:
128                 # this was working until f10
129                 bs_request.MakeRequest(PartialPath = self.VARS['UPLOAD_LOG_SCRIPT'],
130                                        GetVars = None, PostVars = None,
131                                        DoSSL = True, DoCertCheck = True,
132                                        FormData = ["log=@" + self.OutputFilePath,
133                                                    "hostname=" + hostname, 
134                                                    "type=bm.log"])
135             except:
136                 # new pycurl
137                 import pycurl
138                 bs_request.MakeRequest(PartialPath = self.VARS['UPLOAD_LOG_SCRIPT'],
139                                        GetVars = None, PostVars = None,
140                                        DoSSL = True, DoCertCheck = True,
141                                        FormData = [('log',(pycurl.FORM_FILE, self.OutputFilePath)),
142                                                    ("hostname",hostname),
143                                                    ("type","bm.log")])
144
145
146 ##############################
147 class BootManager:
148
149     # file containing initial variables/constants
150
151     # the set of valid node run states
152     NodeRunStates = {'reinstall':None,
153                      'boot':None,
154                      'safeboot':None,
155                      'disabled':None,
156                      }
157     
158     def __init__(self, log, forceState):
159         # override machine's current state from the command line
160         self.forceState = forceState
161
162         # the main logging point
163         self.LOG= log
164
165         # set to 1 if we can run after initialization
166         self.CAN_RUN = 0
167
168         if log.VARS:
169             # this contains a set of information used and updated by each step
170             self.VARS= log.VARS
171         else:
172             return
173              
174         # not sure what the current PATH is set to, replace it with what
175         # we know will work with all the boot cds
176         os.environ['PATH']= string.join(BIN_PATH,":")
177
178         self.CAN_RUN= 1
179
180     def Run(self):
181         """
182         core boot manager logic.
183
184         the way errors are handled is as such: if any particular step
185         cannot continue or unexpectibly fails, an exception is thrown.
186         in this case, the boot manager cannot continue running.
187
188         these step functions can also return a 0/1 depending on whether
189         or not it succeeded. In the case of steps like ConfirmInstallWithUser,
190         a 0 is returned and no exception is thrown if the user chose not
191         to confirm the install. The same goes with the CheckHardwareRequirements.
192         If requriements not met, but tests were succesfull, return 0.
193
194         for steps that run within the installer, they are expected to either
195         complete succesfully and return 1, or throw an execption.
196
197         For exact return values and expected operations, see the comments
198         at the top of each of the invididual step functions.
199         """
200
201         def _nodeNotInstalled(message='MSG_NODE_NOT_INSTALLED'):
202             # called by the _xxxState() functions below upon failure
203             self.VARS['RUN_LEVEL']= 'failboot'
204             notify = getattr(notify_messages, message)
205             self.VARS['STATE_CHANGE_NOTIFY']= 1
206             self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= notify
207             raise BootManagerException, notify
208
209         def _bootRun():
210             # implements the boot logic, which consists of first
211             # double checking that the node was properly installed,
212             # checking whether someone added or changed disks, and
213             # then finally chain boots.
214
215             # starting the fallback/debug ssh daemon for safety:
216             # if the node install somehow hangs, or if it simply takes ages, 
217             # we can still enter and investigate
218             try:
219                 StartDebug.Run(self.VARS, self.LOG, last_resort = False)
220             except:
221                 pass
222
223             InstallInit.Run( self.VARS, self.LOG )                    
224             ret = ValidateNodeInstall.Run( self.VARS, self.LOG )
225             if ret == 1:
226                 WriteModprobeConfig.Run( self.VARS, self.LOG )
227                 MakeInitrd.Run( self.VARS, self.LOG )
228                 WriteNetworkConfig.Run( self.VARS, self.LOG )
229                 CheckForNewDisks.Run( self.VARS, self.LOG )
230                 SendHardwareConfigToPLC.Run( self.VARS, self.LOG )
231                 ChainBootNode.Run( self.VARS, self.LOG )
232             elif ret == -1:
233                 _nodeNotInstalled('MSG_NODE_FILESYSTEM_CORRUPT')
234             elif ret == -2:
235                 _nodeNotInstalled('MSG_NODE_MOUNT_FAILED')
236             elif ret == -3:
237                 _nodeNotInstalled('MSG_NODE_MISSING_KERNEL')
238             else:
239                 _nodeNotInstalled()
240
241         def _reinstallRun():
242
243             # starting the fallback/debug ssh daemon for safety:
244             # if the node install somehow hangs, or if it simply takes ages, 
245             # we can still enter and investigate
246             try:
247                 StartDebug.Run(self.VARS, self.LOG, last_resort = False)
248             except:
249                 pass
250
251             # implements the reinstall logic, which will check whether
252             # the min. hardware requirements are met, install the
253             # software, and upon correct installation will switch too
254             # 'boot' state and chainboot into the production system
255             if not CheckHardwareRequirements.Run( self.VARS, self.LOG ):
256                 self.VARS['RUN_LEVEL']= 'failboot'
257                 raise BootManagerException, "Hardware requirements not met."
258
259             # runinstaller
260             InstallInit.Run( self.VARS, self.LOG )                    
261             InstallPartitionDisks.Run( self.VARS, self.LOG )            
262             InstallBootstrapFS.Run( self.VARS, self.LOG )            
263             InstallWriteConfig.Run( self.VARS, self.LOG )
264             InstallUninitHardware.Run( self.VARS, self.LOG )
265             self.VARS['BOOT_STATE']= 'boot'
266             self.VARS['STATE_CHANGE_NOTIFY']= 1
267             self.VARS['STATE_CHANGE_NOTIFY_MESSAGE']= \
268                  notify_messages.MSG_INSTALL_FINISHED
269             UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
270             _bootRun()
271             
272         def _installRun():
273             # implements the new install logic, which will first check
274             # with the user whether it is ok to install on this
275             # machine, switch to 'reinstall' state and then invoke the reinstall
276             # logic.  See reinstallState logic comments for further
277             # details.
278             if not ConfirmInstallWithUser.Run( self.VARS, self.LOG ):
279                 return 0
280             self.VARS['BOOT_STATE']= 'reinstall'
281             UpdateRunLevelWithPLC.Run( self.VARS, self.LOG )
282             _reinstallRun()
283
284         def _debugRun(state='failboot'):
285             # implements debug logic, which starts the sshd and just waits around
286             self.VARS['RUN_LEVEL']=state
287             UpdateRunLevelWithPLC.Run( self.VARS, self.LOG )
288             StartDebug.Run( self.VARS, self.LOG )
289             # fsck/mount fs if present, and ignore return value if it's not.
290             ValidateNodeInstall.Run( self.VARS, self.LOG )
291
292         def _badstateRun():
293             # should never happen; log event
294             self.LOG.write( "\nInvalid BOOT_STATE = %s\n" % self.VARS['BOOT_STATE'])
295             _debugRun()
296
297         # setup state -> function hash table
298         BootManager.NodeRunStates['reinstall']  = _reinstallRun
299         BootManager.NodeRunStates['boot']       = _bootRun
300         BootManager.NodeRunStates['safeboot']   = lambda : _debugRun('safeboot')
301         BootManager.NodeRunStates['disabled']   = lambda : _debugRun('disabled')
302
303         success = 0
304         try:
305             InitializeBootManager.Run( self.VARS, self.LOG )
306             ReadNodeConfiguration.Run( self.VARS, self.LOG )
307             AuthenticateWithPLC.Run( self.VARS, self.LOG )
308             StartRunlevelAgent.Run( self.VARS, self.LOG )
309             GetAndUpdateNodeDetails.Run( self.VARS, self.LOG )
310
311             # override machine's current state from the command line
312             if self.forceState is not None:
313                 self.VARS['BOOT_STATE']= self.forceState
314                 UpdateBootStateWithPLC.Run( self.VARS, self.LOG )
315                 UpdateRunLevelWithPLC.Run( self.VARS, self.LOG )
316
317             stateRun = BootManager.NodeRunStates.get(self.VARS['BOOT_STATE'],_badstateRun)
318             stateRun()
319             success = 1
320
321         except KeyError, e:
322             self.LOG.write( "\n\nKeyError while running: %s\n" % str(e) )
323         except BootManagerException, e:
324             self.LOG.write( "\n\nException while running: %s\n" % str(e) )
325         except BootManagerAuthenticationException, e:
326             self.LOG.write( "\n\nFailed to Authenticate Node: %s\n" % str(e) )
327             # sets /tmp/CANCEL_BOOT flag
328             StartDebug.Run(self.VARS, self.LOG )
329             # Return immediately b/c any other calls to API will fail
330             return success
331         except:
332             self.LOG.write( "\n\nImplementation Error\n")
333             traceback.print_exc(file=self.LOG.OutputFile)
334             traceback.print_exc()
335
336         if not success:
337             try:
338                 _debugRun()
339             except BootManagerException, e:
340                 self.LOG.write( "\n\nException while running: %s\n" % str(e) )
341             except:
342                 self.LOG.write( "\n\nImplementation Error\n")
343                 traceback.print_exc(file=self.LOG.OutputFile)
344                 traceback.print_exc()
345
346         return success
347             
348             
349 def main(argv):
350
351     import utils
352     utils.prompt_for_breakpoint_mode()
353
354     utils.breakpoint ("Entering BootManager::main")
355     
356     # set to 1 if error occurred
357     error= 0
358     
359     # all output goes through this class so we can save it and post
360     # the data back to PlanetLab central
361     LOG= log( BM_NODE_LOG )
362
363     LOG.LogEntry( "BootManager started at: %s" % \
364                   time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
365
366     try:
367         forceState = None
368         if len(argv) == 2:
369             fState = argv[1]
370             if BootManager.NodeRunStates.has_key(fState):
371                 forceState = fState
372             else:
373                 LOG.LogEntry("FATAL: cannot force node run state to=%s" % fState)
374                 error = 1
375     except:
376         traceback.print_exc(file=LOG.OutputFile)
377         traceback.print_exc()
378         
379     if error:
380         LOG.LogEntry( "BootManager finished at: %s" % \
381                       time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
382         LOG.Upload()
383         return error
384
385     try:
386         bm= BootManager(LOG,forceState)
387         if bm.CAN_RUN == 0:
388             LOG.LogEntry( "Unable to initialize BootManager." )
389         else:
390             LOG.LogEntry( "Running version %s of BootManager." % bm.VARS['VERSION'] )
391             success= bm.Run()
392             if success:
393                 LOG.LogEntry( "\nDone!" );
394             else:
395                 LOG.LogEntry( "\nError occurred!" );
396                 error = 1
397     except:
398         traceback.print_exc(file=LOG.OutputFile)
399         traceback.print_exc()
400
401     LOG.LogEntry( "BootManager finished at: %s" % \
402                   time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
403     LOG.Upload()
404
405     return error
406
407     
408 if __name__ == "__main__":
409     error = main(sys.argv)
410     sys.exit(error)