performs various sanity checks at network init and if curl fails
[bootcd.git] / initscripts / pl_boot
1 #!/bin/bash
2
3 . /etc/init.d/pl_functions
4
5 . /tmp/planet.cnf
6
7 # Run gpg once to create default options
8 GNUPGHOME=/root
9 export GNUPGHOME
10 /usr/bin/gpg --yes 2>/dev/null </dev/null
11
12 # if this file is present, cancel the boot (exit this script)
13 CANCEL_BOOT_FLAG=/tmp/CANCEL_BOOT
14
15 # how many times to fail in attempting to contact primary server
16 # before falling back to original. if the backup fails this many times
17 # too, then the process is repeated started with the primary server
18 ATTEMPTS_BEFORE_BACKUP=3
19
20 # where all the configuration files for contacting
21 # the boot server are stored
22 BOOT_DIR=/usr/boot/
23
24 # get the server we are going to be contacting
25 BOOT_SERVER=$(cat $BOOT_DIR/boot_server)
26 BOOT_SERVER_PORT=$(cat $BOOT_DIR/boot_server_port)
27
28 # the file to request from the boot server
29 BOOT_SERVER_PATH=$(cat $BOOT_DIR/boot_server_path)
30
31 # location of the cacert for this boot server
32 BOOT_SERVER_CACERT=$BOOT_DIR/cacert.pem
33
34 # location of the gpg key ring to verify scripts
35 BOOT_SERVER_GPG_KEYRING=$BOOT_DIR/pubring.gpg
36
37 # get the backup server we are going to be contacting
38 BACKUP_BOOT_SERVER=$(cat $BOOT_DIR/backup/boot_server)
39 BACKUP_BOOT_SERVER_PORT=$(cat $BOOT_DIR/backup/boot_server_port)
40
41 # the file to request from the backup boot server
42 BACKUP_BOOT_SERVER_PATH=$(cat $BOOT_DIR/backup/boot_server_path)
43
44 # location of the cacert for the backup boot server
45 BACKUP_BOOT_SERVER_CACERT=$BOOT_DIR/backup/cacert.pem
46
47 # location of the gpg key ring for backup server to verify scripts
48 BACKUP_BOOT_SERVER_GPG_KEYRING=$BOOT_DIR/backup/pubring.gpg
49
50 # location of a file containing this boot cd version
51 BOOT_VERSION_FILE=/pl_version
52
53 # the locations of the downloaded scripts
54 UNVERIFIED_SCRIPT=/tmp/bootscript.gpg
55 VERIFIED_SCRIPT=/tmp/bootscript
56
57
58 # --------------------------
59
60
61 # now, contact the boot server, run the script, and do it over again.
62 contact_count=0
63
64 # set to one when we are trying to contact backup server
65 on_backup_server=0
66
67 # start out contacting the primary servers
68 CONNECT_BOOT_SERVER=$BOOT_SERVER
69 CONNECT_BOOT_SERVER_PORT=$BOOT_SERVER_PORT
70 CONNECT_BOOT_SERVER_PATH=$BOOT_SERVER_PATH
71 CONNECT_BOOT_SERVER_GPG_KEYRING=$BOOT_SERVER_GPG_KEYRING
72 CONNECT_BOOT_SERVER_CACERT=$BOOT_SERVER_CACERT
73
74 while : ; do
75
76     if [[ -f $CANCEL_BOOT_FLAG ]]; then
77         echo $(date "+%H:%M:%S") " pl_boot: got request to cancel boot, exiting"
78         exit 0
79     fi
80     
81     if [[ $contact_count -ge $ATTEMPTS_BEFORE_BACKUP ]]; then
82
83         contact_count=0
84
85         if [[ $on_backup_server == 1 ]]; then
86             echo $(date "+%H:%M:%S") " pl_boot: failed to contact backup server, trying primary $BOOT_SERVER"
87
88             on_backup_server=0
89
90             CONNECT_BOOT_SERVER=$BOOT_SERVER
91             CONNECT_BOOT_SERVER_PORT=$BOOT_SERVER_PORT
92             CONNECT_BOOT_SERVER_PATH=$BOOT_SERVER_PATH
93             CONNECT_BOOT_SERVER_GPG_KEYRING=$BOOT_SERVER_GPG_KEYRING
94             CONNECT_BOOT_SERVER_CACERT=$BOOT_SERVER_CACERT
95         else
96             echo $(date "+%H:%M:%S") " pl_boot: failed to contact primary server, trying backup $BACKUP_BOOT_SERVER"
97
98             on_backup_server=1
99
100             CONNECT_BOOT_SERVER=$BACKUP_BOOT_SERVER
101             CONNECT_BOOT_SERVER_PORT=$BACKUP_BOOT_SERVER_PORT
102             CONNECT_BOOT_SERVER_PATH=$BACKUP_BOOT_SERVER_PATH
103             CONNECT_BOOT_SERVER_GPG_KEYRING=$BACKUP_BOOT_SERVER_GPG_KEYRING
104             CONNECT_BOOT_SERVER_CACERT=$BACKUP_BOOT_SERVER_CACERT
105         fi
106     fi
107
108     if [[ $contact_count != 0 ]]; then
109
110         if [[ $on_backup_server == 1 ]]; then
111             echo $(date "+%H:%M:%S") " pl_boot: attempting to fetch script from backup server in 30s"
112         else
113             echo $(date "+%H:%M:%S") " pl_boot: attempting to fetch script from primary server in 30s"
114         fi
115         /bin/sleep 30
116     fi
117
118     # assemble the curl transaction
119     CURL_CMD="/usr/bin/curl \
120         --connect-timeout 60 \
121         --max-time 600 \
122         --form version=<$BOOT_VERSION_FILE \
123         --form cmdline=</proc/cmdline \
124         --form uptime=</proc/uptime \
125         --form ifconfig=</tmp/ifconfig \
126         --form nonce=</tmp/nonce \
127         --location \
128         --output $UNVERIFIED_SCRIPT \
129         --sslv3  \
130         --silent \
131         --show-error \
132         --fail \
133         --stderr /tmp/curl_errors \
134         --cacert $CONNECT_BOOT_SERVER_CACERT \
135    https://$CONNECT_BOOT_SERVER:$CONNECT_BOOT_SERVER_PORT/$CONNECT_BOOT_SERVER_PATH"
136
137     # assemble the gpg command line
138     GPG_CMD="/usr/bin/gpg \
139         --no-default-keyring \
140         --keyring $CONNECT_BOOT_SERVER_GPG_KEYRING \
141         --output $VERIFIED_SCRIPT \
142         --always-trust \
143         --decrypt $UNVERIFIED_SCRIPT"
144
145     echo $(date "+%H:%M:%S") " pl_boot: generating new nonce"
146     /usr/bin/head --bytes=32 /dev/urandom | \
147     /usr/bin/od -tx1 -An --width=32 | \
148     /bin/sed 's/ //g' > /tmp/nonce
149
150     echo $(date "+%H:%M:%S") " pl_boot: fetching script from boot server $CONNECT_BOOT_SERVER"
151     ((contact_count++))
152     rm -f $UNVERIFIED_SCRIPT
153     $CURL_CMD
154     curl_err=$?
155     if [ $curl_err -ne 0 ]; then
156         echo $(date "+%H:%M:%S") " pl_boot: curl request failed with error $curl_err:"
157         cat /tmp/curl_errors
158         echo
159         if [ -n "$DISCONNECTED_OPERATION" ]; then
160             mkdir /tmp/boot-media
161             mount -U "$DISCONNECTED_OPERATION" /tmp/boot-media
162             cp /tmp/boot-media/bootscript.gpg $UNVERIFIED_SCRIPT
163             umount /tmp/boot-media
164             rmdir /tmp/boot-media
165         else
166             case $curl_err in
167                 6)
168                     echo $(date "+%H:%M:%S") " This error likely indicates a networking configuration error. "
169                     echo $(date "+%H:%M:%S") " Please, check whether you can ping this machine.  If you can, "
170                     echo $(date "+%H:%M:%S") " we recommend checking your DNS settings.  If you cannot, then "
171                     echo $(date "+%H:%M:%S") " please double check your network settings registered at PLC and "
172                     echo $(date "+%H:%M:%S") " stored on this Boot Image."
173                 ;;
174                 60)
175                     echo $(date "+%H:%M:%S") " This error likely indicates that the hardware clock is likely not set "
176                     echo $(date "+%H:%M:%S") " to GMT.  The result is that authentication between the local and "
177                     echo $(date "+%H:%M:%S") " remote site fails.  Please double check this machine's system "
178                     echo $(date "+%H:%M:%S") " clock, and set it to GMT in the BIOS. If after rebooting the same "
179                     echo $(date "+%H:%M:%S") " error occurs, please report the situation to support@planet-lab.org "
180                     echo $(date "+%H:%M:%S") " with as much detail as possible."
181                 ;;
182                 *)
183                     echo $(date "+%H:%M:%S") " The best-practice approach for handling this error is not yet "
184                     echo $(date "+%H:%M:%S") " documented.  Please report this error to support@planet-lab.org "
185                     echo $(date "+%H:%M:%S") " with as much detail as possible."
186                 ;;
187             esac
188             # in any case display as much info as we can (see pl_functions)
189             pl_network_sanity_checks
190             # retry
191             continue
192         fi
193     elif [ -n "$DISCONNECTED_OPERATION" ]; then
194         mkdir /tmp/boot-media
195         mount -U "$DISCONNECTED_OPERATION" /tmp/boot-media
196         cp $UNVERIFIED_SCRIPT /tmp/boot-media
197         umount /tmp/boot-media
198         rmdir /tmp/boot-media
199     fi 
200
201     echo $(date "+%H:%M:%S") " pl_boot: verifying downloaded script"
202     rm -f $VERIFIED_SCRIPT
203     $GPG_CMD 2> /tmp/gpg_errors
204     if [ $? -ne 0 ]; then
205         echo $(date "+%H:%M:%S") " pl_boot: failed to verify file:"
206         cat /tmp/gpg_errors
207         echo
208         continue
209     fi
210     echo $(date "+%H:%M:%S") " pl_boot: decrypted and verified script succesfully"
211
212     echo $(date "+%H:%M:%S") " pl_boot: handing control to download script"
213     rm -f $UNVERIFIED_SCRIPT
214     chmod +x $VERIFIED_SCRIPT
215     $VERIFIED_SCRIPT
216     
217     echo $(date "+%H:%M:%S") " pl_boot: downloaded script has returned"
218 done
219
220 echo $(date "+%H:%M:%S") " pl_boot: automatic boot process canceled by user"