coming back to a model that works .. most of the time
[bootcd.git] / initscripts / pl_boot
1 #!/bin/bash
2
3 ### xxx tmp debug
4
5 set -x
6 echo pl_boot on console > /dev/console
7 exec > /dev/console
8 echo pl_boot plain
9
10 ###
11 . /etc/init.d/pl_functions
12
13 . /tmp/planet.cnf
14
15 # Run gpg once to create default options
16 GNUPGHOME=/root
17 export GNUPGHOME
18 /usr/bin/gpg --yes 2>/dev/null </dev/null
19
20 # if this file is present, cancel the boot (exit this script)
21 CANCEL_BOOT_FLAG=/tmp/CANCEL_BOOT
22
23 # how many times to fail in attempting to contact primary server
24 # before falling back to original. if the backup fails this many times
25 # too, then the process is repeated started with the primary server
26 ATTEMPTS_BEFORE_BACKUP=3
27
28 # where all the configuration files for contacting
29 # the boot server are stored
30 BOOT_DIR=/usr/boot/
31
32 # get the server we are going to be contacting
33 BOOT_SERVER=$(cat $BOOT_DIR/boot_server)
34 BOOT_SERVER_PORT=$(cat $BOOT_DIR/boot_server_port)
35
36 # the file to request from the boot server
37 BOOT_SERVER_PATH=$(cat $BOOT_DIR/boot_server_path)
38
39 # location of the cacert for this boot server
40 BOOT_SERVER_CACERT=$BOOT_DIR/cacert.pem
41
42 # location of the gpg key ring to verify scripts
43 BOOT_SERVER_GPG_KEYRING=$BOOT_DIR/pubring.gpg
44
45 # get the backup server we are going to be contacting
46 BACKUP_BOOT_SERVER=$(cat $BOOT_DIR/backup/boot_server)
47 BACKUP_BOOT_SERVER_PORT=$(cat $BOOT_DIR/backup/boot_server_port)
48
49 # the file to request from the backup boot server
50 BACKUP_BOOT_SERVER_PATH=$(cat $BOOT_DIR/backup/boot_server_path)
51
52 # location of the cacert for the backup boot server
53 BACKUP_BOOT_SERVER_CACERT=$BOOT_DIR/backup/cacert.pem
54
55 # location of the gpg key ring for backup server to verify scripts
56 BACKUP_BOOT_SERVER_GPG_KEYRING=$BOOT_DIR/backup/pubring.gpg
57
58 # location of a file containing this boot cd version
59 BOOT_VERSION_FILE=/pl_version
60
61 # the locations of the downloaded scripts
62 UNVERIFIED_SCRIPT=/tmp/bootscript.gpg
63 VERIFIED_SCRIPT=/tmp/bootscript
64
65
66 # --------------------------
67
68
69 # now, contact the boot server, run the script, and do it over again.
70 contact_count=0
71
72 # set to one when we are trying to contact backup server
73 on_backup_server=0
74
75 # start out contacting the primary servers
76 CONNECT_BOOT_SERVER=$BOOT_SERVER
77 CONNECT_BOOT_SERVER_PORT=$BOOT_SERVER_PORT
78 CONNECT_BOOT_SERVER_PATH=$BOOT_SERVER_PATH
79 CONNECT_BOOT_SERVER_GPG_KEYRING=$BOOT_SERVER_GPG_KEYRING
80 CONNECT_BOOT_SERVER_CACERT=$BOOT_SERVER_CACERT
81
82 while : ; do
83
84     if [[ -f $CANCEL_BOOT_FLAG ]]; then
85         echo $(date "+%H:%M:%S") " pl_boot: got request to cancel boot, exiting"
86         exit 0
87     fi
88     
89     if [[ $contact_count -ge $ATTEMPTS_BEFORE_BACKUP ]]; then
90
91         contact_count=0
92
93         if [[ $on_backup_server == 1 ]]; then
94             echo $(date "+%H:%M:%S") " pl_boot: failed to contact backup server, trying primary $BOOT_SERVER"
95
96             on_backup_server=0
97
98             CONNECT_BOOT_SERVER=$BOOT_SERVER
99             CONNECT_BOOT_SERVER_PORT=$BOOT_SERVER_PORT
100             CONNECT_BOOT_SERVER_PATH=$BOOT_SERVER_PATH
101             CONNECT_BOOT_SERVER_GPG_KEYRING=$BOOT_SERVER_GPG_KEYRING
102             CONNECT_BOOT_SERVER_CACERT=$BOOT_SERVER_CACERT
103         else
104             echo $(date "+%H:%M:%S") " pl_boot: failed to contact primary server, trying backup $BACKUP_BOOT_SERVER"
105
106             on_backup_server=1
107
108             CONNECT_BOOT_SERVER=$BACKUP_BOOT_SERVER
109             CONNECT_BOOT_SERVER_PORT=$BACKUP_BOOT_SERVER_PORT
110             CONNECT_BOOT_SERVER_PATH=$BACKUP_BOOT_SERVER_PATH
111             CONNECT_BOOT_SERVER_GPG_KEYRING=$BACKUP_BOOT_SERVER_GPG_KEYRING
112             CONNECT_BOOT_SERVER_CACERT=$BACKUP_BOOT_SERVER_CACERT
113         fi
114     fi
115
116     if [[ $contact_count != 0 ]]; then
117
118         echo $(date "+%H:%M:%S") " pl_boot: next attempt in 30s, to fetch script from server at $CONNECT_BOOT_SERVER"
119         /bin/sleep 30
120     fi
121
122     # assemble the curl transaction
123     CURL_CMD="/usr/bin/curl \
124         --connect-timeout 60 \
125         --max-time 600 \
126         --form version=<$BOOT_VERSION_FILE \
127         --form cmdline=</proc/cmdline \
128         --form uptime=</proc/uptime \
129         --form ifconfig=</tmp/ifconfig \
130         --form nonce=</tmp/nonce \
131         --location \
132         --output $UNVERIFIED_SCRIPT \
133         --sslv3  \
134         --silent \
135         --show-error \
136         --fail \
137         --stderr /tmp/curl_errors \
138         --cacert $CONNECT_BOOT_SERVER_CACERT \
139    https://$CONNECT_BOOT_SERVER:$CONNECT_BOOT_SERVER_PORT/$CONNECT_BOOT_SERVER_PATH"
140
141     # assemble the gpg command line
142     GPG_CMD="/usr/bin/gpg \
143         --no-default-keyring \
144         --keyring $CONNECT_BOOT_SERVER_GPG_KEYRING \
145         --output $VERIFIED_SCRIPT \
146         --always-trust \
147         --decrypt $UNVERIFIED_SCRIPT"
148
149     echo $(date "+%H:%M:%S") " pl_boot: generating new nonce"
150     /usr/bin/head --bytes=32 /dev/urandom | \
151     /usr/bin/od -tx1 -An --width=32 | \
152     /bin/sed 's/ //g' > /tmp/nonce
153
154     echo $(date "+%H:%M:%S") " pl_boot: fetching script from boot server $CONNECT_BOOT_SERVER"
155     ((contact_count++))
156     rm -f $UNVERIFIED_SCRIPT
157     $CURL_CMD
158     curl_err=$?
159     if [ $curl_err -ne 0 ]; then
160         echo $(date "+%H:%M:%S") " pl_boot: curl request failed with error $curl_err:"
161         cat /tmp/curl_errors
162         echo
163         if [ -n "$DISCONNECTED_OPERATION" ]; then
164             mkdir /tmp/boot-media
165             mount -U "$DISCONNECTED_OPERATION" /tmp/boot-media
166             cp /tmp/boot-media/bootscript.gpg $UNVERIFIED_SCRIPT
167             umount /tmp/boot-media
168             rmdir /tmp/boot-media
169         else
170             case $curl_err in
171                 6)
172                     echo $(date "+%H:%M:%S") " This error likely indicates a networking configuration error. "
173                     echo $(date "+%H:%M:%S") " Please, check whether you can ping this machine.  If you can, "
174                     echo $(date "+%H:%M:%S") " we recommend checking your DNS settings.  If you cannot, then "
175                     echo $(date "+%H:%M:%S") " please double check your network settings registered at PLC and "
176                     echo $(date "+%H:%M:%S") " stored on this Boot Image."
177                 ;;
178                 60)
179                     echo $(date "+%H:%M:%S") " This error likely indicates that the hardware clock is likely not set "
180                     echo $(date "+%H:%M:%S") " to GMT.  The result is that authentication between the local and "
181                     echo $(date "+%H:%M:%S") " remote site fails.  Please double check this machine's system "
182                     echo $(date "+%H:%M:%S") " clock, and set it to GMT in the BIOS. If after rebooting the same "
183                     echo $(date "+%H:%M:%S") " error occurs, please report the situation to support@planet-lab.org "
184                     echo $(date "+%H:%M:%S") " with as much detail as possible."
185                 ;;
186                 *)
187                     echo $(date "+%H:%M:%S") " The best-practice approach for handling this error is not yet "
188                     echo $(date "+%H:%M:%S") " documented.  Please report this error to support@planet-lab.org "
189                     echo $(date "+%H:%M:%S") " with as much detail as possible."
190                 ;;
191             esac
192             # in any case display as much info as we can (see pl_functions)
193             pl_network_sanity_checks
194             # retry
195             continue
196         fi
197     elif [ -n "$DISCONNECTED_OPERATION" ]; then
198         mkdir /tmp/boot-media
199         mount -U "$DISCONNECTED_OPERATION" /tmp/boot-media
200         cp $UNVERIFIED_SCRIPT /tmp/boot-media
201         umount /tmp/boot-media
202         rmdir /tmp/boot-media
203     fi 
204
205     echo $(date "+%H:%M:%S") " pl_boot: verifying downloaded script"
206     rm -f $VERIFIED_SCRIPT
207     $GPG_CMD 2> /tmp/gpg_errors
208     if [ $? -ne 0 ]; then
209         echo $(date "+%H:%M:%S") " pl_boot: failed to verify file:"
210         cat /tmp/gpg_errors
211         echo
212         continue
213     fi
214     echo $(date "+%H:%M:%S") " pl_boot: decrypted and verified script succesfully"
215
216     echo $(date "+%H:%M:%S") " pl_boot: handing control to download script"
217     rm -f $UNVERIFIED_SCRIPT
218     chmod +x $VERIFIED_SCRIPT
219     $VERIFIED_SCRIPT
220     
221     echo $(date "+%H:%M:%S") " pl_boot: downloaded script has returned"
222 done
223
224 echo $(date "+%H:%M:%S") " pl_boot: automatic boot process canceled by user"