Merge branch 'master' of ssh://git.onelab.eu/git/monitor
[monitor.git] / docs / model.txt
1 ##
2 #
3 # TODO: Whitelist for sites or machines with scheduled maintenance or
4 #                       predicted downtimes way in the future.
5 #               Scheduled outages for some short period of time.
6 #                       gives admins a chance to compare expectations with observations.
7
8
9 PLC
10         name
11         fullname
12         xmlrpcserver
13         history->MultipleJoin->PLCHistory.plc
14         sites -> MultipleJoin->Site.plc
15         actions->MultipleJoin->PLCAction.plc
16
17   # configuration parameters...
18         monitor_frequency
19         rt_db_stuff
20         rt_user_stuff
21
22
23 PLCHistory
24         plc->
25         date_checked
26         plc_nodes
27         plc_sites
28         ob_sites_up
29         ob_sites_down
30         ob_nodes_up
31         ob_nodes_down
32         ob_nodes_other
33
34 # I'm unsure of this.  There is a transactional pattern that I need to
35 # discover or define wrt how data is introduced into the db and whether or not
36 # it can be trusted as 'complete'.  this action may be the root of all the
37 # others in a tree.
38
39 PLCAction
40         date_performed
41         plc->ForeignKey->PLC.actions
42         plc_query                                                       # site, node, slice query.
43         nodes_query                                                     # probe nodes
44         nodes_categorize                                        # take raw data and push into finite states
45                                                                                 # also aggregate data for site and plc
46         stats_aggregate                                         # fill out the rest of aggregate fields in db?
47         nodes_diagnose                                          # decide what actions to take based on history
48                                                                                 # write action to db.
49         nodes_act                                                       # read action db and perform to change state at plc or node
50
51 --
52
53 Site
54         loginbase                                                               # from plc
55         active                                                                  # site exists in PLC
56         plc->ForeignKey->PLC.sites
57         history->MultipleJoin->SiteHistory.site
58         nodes->MultipleJoin->Node.site
59         actions->MultipleJoin->SiteAction.site
60
61 SiteHistory
62         site->ForeignKey->Site.history
63         date_checked
64
65   # Statistics
66         plc_nodes                                       # from plc
67         plc_slices_max                                  # from plc
68         plc_slices_used                                 # from plc
69         plc_disabled                                    # from plc
70         plc_suspended                                   # from plc
71         ob_nodes_up                                             # from monitor
72         ob_nodes_down                                   # from monitor
73
74 SiteAction
75         site->ForeignKey->Site.actions
76
77         date_created
78   # Action to take
79         email                                           # from monitor diagnose -> action
80         suspend_slices                          # from monitor diagnose -> action
81         disable_creation                        # from monitor diagnose -> action
82
83   # Are other actions available for either Notify or Findbad scripts?
84         slices_suspend
85         slices_enable
86
87         site_enable
88         site_disable
89
90         email_send
91         message
92
93   # Action taken
94         date_performed                          # from monitor (action)
95         rt_ticket_id                            # from monitor sent mail (action.py)
96
97 --
98
99 Node
100         nodename
101         probe->ForeignKey->NodeProbe.node
102         hardware->OnetoOne->Hardware->node
103         actions->MultipleJoin->NodeAction.node
104
105 NodeCurrent
106         ->node
107         site->
108         ->probe
109         ->action
110         ->downtime
111
112 NodeProbe
113         nodecurrent->
114         date_checked
115         ob_ping
116         ob_ssh
117         ob_kernel
118         ob_bmlog
119         ob_bootstate
120         ob_bootcd
121         plc_bootstate
122         plc_bootcd
123         plc_pcu
124
125 NodeDowntime
126         nodecurrent->
127         date_created
128         date_expires
129         enabled
130         owner_reason
131         owner_comments
132
133 NodeAction
134         nodecurrent->ForeignKey->Node.actions
135         date_created
136
137         inferred_state
138         inf_category
139         
140   # Are there actions on individual nodes?
141         plc_boot_state_reset
142         pcu_test
143         node_reboot
144
145 # TODO: Whitelist for sites or machines with scheduled maintenance or
146 #                       predicted downtimes way in the future.
147 #               Scheduled outages for some short period of time.
148 #                       gives admins a chance to compare expectations with observations.
149
150 Message
151         date_created
152         title
153         template_arguments
154         message_parts->Join->MessagePart->message.id
155
156 MessagePart
157         date_created
158         title
159         template_message
160         message->ForeignKey->Message.id
161         
162
163 Hardware
164         node->
165         date_checked
166         cpu_model
167         cpu_speed
168         cpu_count
169
170         ram_model
171         ram_size
172         disk_model
173         disk_specs
174         
175   # pci devices
176         network cards
177         daughter cards
178         raid cards
179         usb
180         other?