Skip to content

Commit c9024a2

Browse files
author
Jon Ludlam
committed
Merge pull request #59 from jonludlam/CP-12141-finally
CP-12141: Remove master/slave knowledge from RRDD
2 parents 4128eec + 89be5dc commit c9024a2

File tree

9 files changed

+82
-158
lines changed

9 files changed

+82
-158
lines changed

lib/pool_role_shared.ml

Lines changed: 0 additions & 70 deletions
This file was deleted.

lib/rrdd_libs.mldylib

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,4 @@
22
# DO NOT EDIT (digest: 0c7354c835dd0cb4479d655ef155db47)
33
Stats
44
Constants
5-
Pool_role_shared
65
# OASIS_STOP

lib/rrdd_libs.mllib

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,4 @@
22
# DO NOT EDIT (digest: 0c7354c835dd0cb4479d655ef155db47)
33
Stats
44
Constants
5-
Pool_role_shared
65
# OASIS_STOP

rrdd/rrdd_http_handler.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ let put_rrd_handler (req : Http.Request.t) (s : Unix.file_descr) _ =
125125
if List.mem_assoc "archive" query then (
126126
debug "Receiving RRD on the master for archiving, type=%s."
127127
(if is_host then "Host" else "VM uuid=" ^ uuid);
128-
archive_rrd ~uuid ~rrd:(Rrd.copy_rrd rrd) ()
128+
archive_rrd_internal ~uuid ~rrd:(Rrd.copy_rrd rrd) ()
129129
) else (
130130
debug "Receiving RRD for resident VM uuid=%s. Replacing in hashtable." uuid;
131131
let domid = int_of_string (List.assoc "domid" query) in

rrdd/rrdd_monitor.ml

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -39,28 +39,18 @@ let create_fresh_rrd use_min_max dss =
3939
* domain has gone and we stream the RRD to the master. We also have a
4040
* list of the currently rebooting VMs to ensure we don't accidentally
4141
* archive the RRD. *)
42-
let update_rrds timestamp dss (uuid_domids : (string * int) list) rebooting_vms paused_vms =
42+
let update_rrds timestamp dss (uuid_domids : (string * int) list) paused_vms =
4343
(* Here we do the synchronising between the dom0 view of the world
4444
and our Hashtbl. By the end of this execute block, the Hashtbl
4545
correctly represents the world *)
46-
let to_send_back = Mutex.execute mutex (fun _ ->
46+
Mutex.execute mutex (fun _ ->
4747
let out_of_date, by_how_much =
4848
match !host_rrd with
4949
| None -> false, 0.
5050
| Some rrdi -> rrdi.rrd.Rrd.last_updated > timestamp, abs_float (timestamp -. rrdi.rrd.Rrd.last_updated)
5151
in
5252
if out_of_date then
5353
error "Clock just went backwards by %.0f seconds: RRD data may now be unreliable" by_how_much;
54-
let registered = Hashtblext.fold_keys vm_rrds in
55-
let gone_vms = List.filter (fun vm -> not (List.mem_assoc vm uuid_domids)) registered in
56-
let to_send_back = List.map (fun uuid -> uuid, Hashtbl.find vm_rrds uuid) gone_vms in
57-
(* Don't send back rebooting VMs! *)
58-
let to_send_back = List.filter (fun (uuid, _) ->
59-
let rebooting = (List.exists (fun uuid' -> uuid = uuid') rebooting_vms) in
60-
if rebooting then debug "Ignoring disappeared VM which is rebooting";
61-
not rebooting
62-
) to_send_back in
63-
List.iter (fun (uuid, _) -> Hashtbl.remove vm_rrds uuid) to_send_back;
6454
let do_vm (vm_uuid, domid) =
6555
try
6656
let dss = List.filter_map (fun (ty, ds) -> match ty with | VM x -> if x = vm_uuid then Some ds else None | _ -> None) dss in
@@ -169,8 +159,4 @@ let update_rrds timestamp dss (uuid_domids : (string * int) list) rebooting_vms
169159
Rrd.ds_update_named rrd timestamp ~new_domid:false
170160
(List.map (fun ds -> (ds.ds_name, (ds.ds_value,ds.ds_pdp_transform_function))) host_dss)
171161
end;
172-
to_send_back
173162
)
174-
in List.iter (fun (uuid, rrdi) ->
175-
debug "Sending back RRD for VM uuid=%s" uuid;
176-
archive_rrd ~uuid ~rrd:rrdi.rrd ()) to_send_back

rrdd/rrdd_server.ml

Lines changed: 49 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ let archive_sr_rrd _ ~(sr_uuid : string) : string =
3434
raise (Archive_failed(msg))
3535
) in
3636
try
37-
archive_rrd ~uuid:sr_uuid ~rrd:sr_rrd.rrd ();
37+
archive_rrd_internal ~uuid:sr_uuid ~rrd:sr_rrd.rrd ();
3838
let archive_path =
3939
Filename.concat Constants.rrd_location (sr_uuid ^ ".gz") in
4040
if not (Unixext.file_exists archive_path) then begin
@@ -62,8 +62,18 @@ let push_sr_rrd _ ~(sr_uuid : string) ~(path : string) : unit =
6262
let has_vm_rrd _ ~(vm_uuid : string) =
6363
Mutex.execute mutex (fun _ -> Hashtbl.mem vm_rrds vm_uuid)
6464

65-
let backup_rrds _ ?(save_stats_locally = true) () : unit =
66-
debug "backup safe_stats_locally=%b" save_stats_locally;
65+
let archive_rrd _ ~vm_uuid ~remote_address : unit =
66+
Mutex.execute mutex (fun () ->
67+
try
68+
let rrd = (Hashtbl.find vm_rrds vm_uuid).rrd in
69+
archive_rrd_internal ~remote_address ~uuid:vm_uuid ~rrd ();
70+
Hashtbl.remove vm_rrds vm_uuid
71+
with Not_found -> ())
72+
73+
let backup_rrds _ ?(remote_address = None) () : unit =
74+
debug "backing up rrds %s" (match remote_address with
75+
| None -> "locally"
76+
| Some x -> Printf.sprintf "remotely at %s" x);
6777
let total_cycles = 5 in
6878
let cycles_tried = ref 0 in
6979
while !cycles_tried < total_cycles do
@@ -81,7 +91,7 @@ let backup_rrds _ ?(save_stats_locally = true) () : unit =
8191
(fun (uuid, rrd) ->
8292
debug "Backup: saving RRD for VM uuid=%s to local disk" uuid;
8393
let rrd = Mutex.execute mutex (fun () -> Rrd.copy_rrd rrd) in
84-
archive_rrd ~save_stats_locally ~uuid ~rrd ()
94+
archive_rrd_internal ~remote_address ~uuid ~rrd ()
8595
) vrrds;
8696
let srrds =
8797
try
@@ -95,13 +105,13 @@ let backup_rrds _ ?(save_stats_locally = true) () : unit =
95105
(fun (uuid, rrd) ->
96106
debug "Backup: saving RRD for SR uuid=%s to local disk" uuid;
97107
let rrd = Mutex.execute mutex (fun () -> Rrd.copy_rrd rrd) in
98-
archive_rrd ~uuid ~rrd ()
108+
archive_rrd_internal ~uuid ~rrd ()
99109
) srrds;
100110
match !host_rrd with
101111
| Some rrdi ->
102112
debug "Backup: saving RRD for host to local disk";
103113
let rrd = Mutex.execute mutex (fun () -> Rrd.copy_rrd rrdi.rrd) in
104-
archive_rrd ~save_stats_locally ~uuid:(Inventory.lookup Inventory._installation_uuid) ~rrd ()
114+
archive_rrd_internal ~remote_address ~uuid:(Inventory.lookup Inventory._installation_uuid) ~rrd ()
105115
| None -> ()
106116
end else begin
107117
cycles_tried := 1 + !cycles_tried;
@@ -120,17 +130,16 @@ let load_rrd_from_local_filesystem uuid =
120130
module Deprecated = struct
121131
(* DEPRECATED *)
122132
(* Fetch an RRD from the master *)
123-
let pull_rrd_from_master ~uuid ~is_host =
133+
let pull_rrd_from_master ~uuid ~master_address =
124134
let pool_secret = get_pool_secret () in
125-
let uri = if is_host then Constants.get_host_rrd_uri else Constants.get_vm_rrd_uri in
135+
let uri = Constants.get_host_rrd_uri in
126136
(* Add in "dbsync = true" to the query to make sure the master
127137
* doesn't try to redirect here! *)
128138
let uri = uri ^ "?uuid=" ^ uuid ^ "&dbsync=true" in
129139
let request =
130140
Http.Request.make ~user_agent:Constants.rrdd_user_agent
131141
~cookie:["pool_secret", pool_secret] Http.Get uri in
132142
let open Xmlrpc_client in
133-
let master_address = Pool_role_shared.get_master_address () in
134143
let transport = SSL(SSL.make (), master_address, !Rrdd_shared.https_port) in
135144
with_transport transport (
136145
with_http request (fun (response, s) ->
@@ -149,60 +158,56 @@ module Deprecated = struct
149158
* 1. For the local host after a xapi restart or host restart.
150159
* 2. For running VMs after a xapi restart.
151160
* It is now only used to load the host's RRD after xapi restart. *)
152-
let load_rrd _ ~(uuid : string) ~(domid : int) ~(is_host : bool)
153-
~(timescale : int) () : unit =
161+
let load_rrd _ ~(uuid : string) ~(timescale : int) ~(master_address : string option) : unit =
154162
try
155163
let rrd =
156164
try
157165
let rrd = load_rrd_from_local_filesystem uuid in
158166
debug "RRD loaded from local filesystem for object uuid=%s" uuid;
159167
rrd
160168
with e ->
161-
if Pool_role_shared.is_master () then begin
169+
match master_address with
170+
| None -> begin
162171
info "Failed to load RRD from local filesystem: metrics not available for uuid=%s" uuid;
163172
raise e
164-
end else begin
173+
end
174+
| Some x -> begin
165175
debug "Failed to load RRD from local filesystem for object uuid=%s; asking master" uuid;
166176
try
167-
let rrd = pull_rrd_from_master ~uuid ~is_host in
177+
let rrd = pull_rrd_from_master ~uuid ~master_address:x in
168178
debug "RRD pulled from master for object uuid=%s" uuid;
169179
rrd
170180
with e ->
171181
info "Failed to fetch RRD from master: metrics not available for uuid=%s" uuid;
172182
raise e
173183
end
174184
in
175-
Mutex.execute mutex (fun () ->
176-
if is_host
177-
then begin
178-
host_rrd := Some {rrd; dss = []; domid}
179-
end else
180-
Hashtbl.replace vm_rrds uuid {rrd; dss = []; domid}
181-
)
185+
Mutex.execute mutex (fun () -> host_rrd := Some {rrd; dss = []; domid = 0} )
182186
with _ -> ()
183187
end
184188

185-
(* Push function to push the archived RRD to the appropriate host
186-
* (which might be us, in which case, pop it into the hashtbl. *)
187-
let push_rrd _ ~(vm_uuid : string) ~(domid : int) ~(is_on_localhost : bool) ()
188-
: unit =
189+
let get_rrd ~vm_uuid =
190+
let path = Filename.concat Constants.rrd_location vm_uuid in
191+
rrd_of_gzip path
192+
193+
let push_rrd_local _ ~vm_uuid ~domid : unit =
189194
try
190-
let path = Constants.rrd_location ^ "/" ^ vm_uuid in
191-
let rrd = rrd_of_gzip path in
192-
debug "Pushing RRD for VM uuid=%s" vm_uuid;
193-
if is_on_localhost then
194-
Mutex.execute mutex (fun _ ->
195-
Hashtbl.replace vm_rrds vm_uuid {rrd; dss=[]; domid}
196-
)
197-
else
198-
(* Host might be OpaqueRef:null, in which case we'll fail silently *)
199-
let address = Pool_role_shared.get_master_address () in
200-
send_rrd ~address ~to_archive:false ~uuid:vm_uuid
201-
~rrd:(Rrd.copy_rrd rrd) ()
195+
let rrd = get_rrd ~vm_uuid in
196+
debug "Pushing RRD for VM uuid=%s locally" vm_uuid;
197+
Mutex.execute mutex (fun _ ->
198+
Hashtbl.replace vm_rrds vm_uuid {rrd; dss=[]; domid}
199+
)
200+
with _ -> ()
201+
202+
let push_rrd_remote _ ~vm_uuid ~remote_address : unit =
203+
try
204+
let rrd = get_rrd ~vm_uuid in
205+
debug "Pushing RRD for VM uuid=%s remotely" vm_uuid;
206+
send_rrd ~address:remote_address ~to_archive:false ~uuid:vm_uuid ~rrd:(Rrd.copy_rrd rrd) ()
202207
with _ -> ()
203208

204209
(** Remove an RRD from the local filesystem, if it exists. *)
205-
let remove_rrd _ ~(uuid : string) () : unit =
210+
let remove_rrd _ ~(uuid : string) : unit =
206211
let path = Constants.rrd_location ^ "/" ^ uuid in
207212
let gz_path = path ^ ".gz" in
208213
(try Unix.unlink path with _ -> ());
@@ -215,7 +220,7 @@ let remove_rrd _ ~(uuid : string) () : unit =
215220
* Remote address is assumed to be valid, since it is set by monitor_master.
216221
*)
217222
let migrate_rrd _ ?(session_id : string option) ~(remote_address : string)
218-
~(vm_uuid : string) ~(host_uuid : string) () : unit =
223+
~(vm_uuid : string) ~(host_uuid : string) : unit =
219224
try
220225
let rrdi = Mutex.execute mutex (fun () ->
221226
let rrdi = Hashtbl.find vm_rrds vm_uuid in
@@ -237,12 +242,12 @@ let migrate_rrd _ ?(session_id : string option) ~(remote_address : string)
237242

238243
(* Called on host shutdown/reboot to send the Host RRD to the master for
239244
* backup. Note all VMs will have been shutdown by now. *)
240-
let send_host_rrd_to_master _ () =
245+
let send_host_rrd_to_master _ ~master_address =
241246
match !host_rrd with
242247
| Some rrdi ->
243248
debug "sending host RRD to master";
244249
let rrd = Mutex.execute mutex (fun () -> Rrd.copy_rrd rrdi.rrd) in
245-
archive_rrd ~save_stats_locally:false ~uuid:(Inventory.lookup Inventory._installation_uuid) ~rrd ()
250+
send_rrd ~address:master_address ~to_archive:true ~uuid:(Inventory.lookup Inventory._installation_uuid) ~rrd ()
246251
| None -> ()
247252

248253
let add_ds ~rrdi ~ds_name =
@@ -346,7 +351,7 @@ let query_sr_ds _ ~(sr_uuid : string) ~(ds_name : string) : float =
346351
Rrd.query_named_ds rrdi.rrd now ds_name Rrd.CF_Average
347352
)
348353

349-
let update_use_min_max _ ~(value : bool) () : unit =
354+
let update_use_min_max _ ~(value : bool) : unit =
350355
debug "Updating use_min_max: New value=%b" value;
351356
use_min_max := value
352357

@@ -357,7 +362,7 @@ let update_vm_memory_target _ ~(domid : int) ~(target : int64) : unit =
357362
Mutex.execute memory_targets_m
358363
(fun _ -> Hashtbl.replace memory_targets domid target)
359364

360-
let set_cache_sr _ ~(sr_uuid : string) () : unit =
365+
let set_cache_sr _ ~(sr_uuid : string) : unit =
361366
Mutex.execute cache_sr_lock (fun () -> cache_sr_uuid := Some sr_uuid)
362367

363368
let unset_cache_sr _ () =
@@ -529,7 +534,7 @@ end
529534

530535
module HA = struct
531536
let enable_and_update _ ~(statefile_latencies : Rrd.Statefile_latency.t list)
532-
~(heartbeat_latency : float) ~(xapi_latency : float) () =
537+
~(heartbeat_latency : float) ~(xapi_latency : float) =
533538
Mutex.execute Rrdd_ha_stats.m (fun _ ->
534539
Rrdd_ha_stats.enabled := true;
535540
Rrdd_ha_stats.Statefile_latency.all := statefile_latencies;

rrdd/rrdd_shared.ml

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -120,11 +120,10 @@ let send_rrd ?(session_id : string option) ~(address : string)
120120
);
121121
debug "Sending RRD complete."
122122

123-
let archive_rrd ?(save_stats_locally = Pool_role_shared.is_master ()) ~uuid
124-
~rrd () =
125-
debug "Archiving RRD for object uuid=%s %s" uuid
126-
(if save_stats_locally then "to local disk" else "to remote master");
127-
if save_stats_locally then begin
123+
let archive_rrd_internal ?(remote_address = None) ~uuid ~rrd () =
124+
match remote_address with
125+
| None -> begin
126+
debug "Archiving RRD for object uuid=%s to local disk" uuid;
128127
try
129128
(* Stash away the rrd onto disk. *)
130129
let exists =
@@ -146,9 +145,9 @@ let archive_rrd ?(save_stats_locally = Pool_role_shared.is_master ()) ~uuid
146145
with e ->
147146
(*debug "Caught exception: %s" (ExnHelper.string_of_exn e);*)
148147
log_backtrace();
149-
end else begin
148+
end
149+
| Some x -> begin
150150
(* Stream it to the master to store, or maybe to a host in the migrate case *)
151-
debug "About to send to master.";
152-
let address = Pool_role_shared.get_master_address () in
153-
send_rrd ~address ~to_archive:true ~uuid ~rrd ()
151+
debug "Archiving RRD for object uuid=%s to remote master" uuid;
152+
send_rrd ~address:x ~to_archive:true ~uuid ~rrd ()
154153
end

0 commit comments

Comments
 (0)