Charm: oneiric/hadoop-mapreduce
Revision: 5
Hook: ganglia-relation-changed
#!/bin/bash
set -uex
juju-log "installing ganglia monitor"
DEBIAN_FRONTEND=noninteractive apt-get -y install -qq --no-install-recommends ganglia-monitor=3.1.7-2ubuntu2
service ganglia-monitor status && service ganglia-monitor stop
server=`relation-get server`
if [ -z "$server" ] ; then
juju-log "remote host not set yet."
exit 0
fi
#remote_ip=`dig +short $remote_host|head -n 1`
service_name=`echo $JUJU_UNIT_NAME | cut -d/ -f1`
unit_name=${JUJU_UNIT_NAME/\//-}
gmond_already_configured() {
false
}
configure_gmond() {
mv /etc/ganglia/gmond.conf /etc/ganglia/gmond.conf.dist
# this so sucks, but this should be a standalone hook
# to copy into another service
# remove when colocated services work
cat > /etc/ganglia/gmond.conf <<EOS
globals {
daemonize = yes
setuid = yes
user = ganglia
debug_level = 0
max_udp_msg_len = 1472
mute = no
deaf = no
host_dmax = 3600 /*secs */
cleanup_threshold = 300 /*secs */
gexec = no
send_metadata_interval = 20
}
cluster {
name = "$service_name"
owner = "unspecified"
latlong = "unspecified"
url = "unspecified"
}
udp_send_channel {
host = $server
port = 8649
ttl = 1
}
host {
location = "$unit_name"
}
udp_recv_channel {
port = 8649
}
tcp_accept_channel {
port = 8649
}
modules {
module {
name = "core_metrics"
}
module {
name = "cpu_module"
path = "/usr/lib/ganglia/modcpu.so"
}
module {
name = "disk_module"
path = "/usr/lib/ganglia/moddisk.so"
}
module {
name = "load_module"
path = "/usr/lib/ganglia/modload.so"
}
module {
name = "mem_module"
path = "/usr/lib/ganglia/modmem.so"
}
module {
name = "net_module"
path = "/usr/lib/ganglia/modnet.so"
}
module {
name = "proc_module"
path = "/usr/lib/ganglia/modproc.so"
}
module {
name = "sys_module"
path = "/usr/lib/ganglia/modsys.so"
}
}
collection_group {
collect_once = yes
time_threshold = 20
metric {
name = "heartbeat"
}
}
collection_group {
collect_once = yes
time_threshold = 1200
metric {
name = "cpu_num"
title = "CPU Count"
}
metric {
name = "cpu_speed"
title = "CPU Speed"
}
metric {
name = "mem_total"
title = "Memory Total"
}
/* Should this be here? Swap can be added/removed between reboots. */
metric {
name = "swap_total"
title = "Swap Space Total"
}
metric {
name = "boottime"
title = "Last Boot Time"
}
metric {
name = "machine_type"
title = "Machine Type"
}
metric {
name = "os_name"
title = "Operating System"
}
metric {
name = "os_release"
title = "Operating System Release"
}
metric {
name = "location"
title = "Location"
}
}
collection_group {
collect_once = yes
time_threshold = 300
metric {
name = "gexec"
title = "Gexec Status"
}
}
collection_group {
collect_every = 20
time_threshold = 90
/* CPU status */
metric {
name = "cpu_user"
value_threshold = "1.0"
title = "CPU User"
}
metric {
name = "cpu_system"
value_threshold = "1.0"
title = "CPU System"
}
metric {
name = "cpu_idle"
value_threshold = "5.0"
title = "CPU Idle"
}
metric {
name = "cpu_nice"
value_threshold = "1.0"
title = "CPU Nice"
}
metric {
name = "cpu_aidle"
value_threshold = "5.0"
title = "CPU aidle"
}
metric {
name = "cpu_wio"
value_threshold = "1.0"
title = "CPU wio"
}
}
collection_group {
collect_every = 20
time_threshold = 90
/* Load Averages */
metric {
name = "load_one"
value_threshold = "1.0"
title = "One Minute Load Average"
}
metric {
name = "load_five"
value_threshold = "1.0"
title = "Five Minute Load Average"
}
metric {
name = "load_fifteen"
value_threshold = "1.0"
title = "Fifteen Minute Load Average"
}
}
collection_group {
collect_every = 80
time_threshold = 950
metric {
name = "proc_run"
value_threshold = "1.0"
title = "Total Running Processes"
}
metric {
name = "proc_total"
value_threshold = "1.0"
title = "Total Processes"
}
}
collection_group {
collect_every = 40
time_threshold = 180
metric {
name = "mem_free"
value_threshold = "1024.0"
title = "Free Memory"
}
metric {
name = "mem_shared"
value_threshold = "1024.0"
title = "Shared Memory"
}
metric {
name = "mem_buffers"
value_threshold = "1024.0"
title = "Memory Buffers"
}
metric {
name = "mem_cached"
value_threshold = "1024.0"
title = "Cached Memory"
}
metric {
name = "swap_free"
value_threshold = "1024.0"
title = "Free Swap Space"
}
}
collection_group {
collect_every = 40
time_threshold = 300
metric {
name = "bytes_out"
value_threshold = 4096
title = "Bytes Sent"
}
metric {
name = "bytes_in"
value_threshold = 4096
title = "Bytes Received"
}
metric {
name = "pkts_in"
value_threshold = 256
title = "Packets Received"
}
metric {
name = "pkts_out"
value_threshold = 256
title = "Packets Sent"
}
}
collection_group {
collect_every = 1800
time_threshold = 3600
metric {
name = "disk_total"
value_threshold = 1.0
title = "Total Disk Space"
}
}
collection_group {
collect_every = 40
time_threshold = 180
metric {
name = "disk_free"
value_threshold = 1.0
title = "Disk Space Available"
}
metric {
name = "part_max_used"
value_threshold = 1.0
title = "Maximum Disk Space Used"
}
}
EOS
}
gmond_already_configured || configure_gmond
service ganglia-monitor restart || service ganglia-monitor start