membase_stats.py

#
# Copyright 2009-2011 AppFirst, Inc
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# 


#!/usr/bin/env python
# ./membase_stats.py

import subprocess
import getopt
import sys

class MembaseStats():
#mapping a list of resource name we are keeping the state
    resource_name_mapping = {
                             "curr_items": {"description": "Current active items", "unit": "", "type": "current", "name": "current_active_items"},
                             "curr_items_tot": {"description": "Current total items", "unit": "", "type": "current", "name": "current_total_items"},
                             "ep_num_active_non_resident": {"description": "Current non-resident item", "unit": "", "type": "current", "name": "num_active_items_not_in_RAM"},
                             "get_hits": {"description": "Number of fetches", "unit": "", "type": "cumulative", "name": "num_successful_get"},
                             "ep_bg_fetched": {"description": "Number of items fetched from the disk", "unit": "", "type": "cumulative", "name": "num_items_get_from_disk" },
                             "ep_num_non_resident": {"description": "Number of items stored only on disk, not cached in RAM", "unit": "", "type": "current", "name": "num_total_items_not_in_RAM"},
                             "ep_total_enqueued": {"description": "Total number of items queued for storage" , "unit": "", "type": "cumulative", "name": "total_items_enqueued_for_storage"},
                             "ep_total_new_items": {"description": "Total number of persisted new items", "unit": "", "type": "cumulative", "name": "num_persisted_new_items"},
                             "get_misses": {"description": "Number of unsuccessful fetches", "unit": "", "type": "cumulative", "name": "num_unsuccessful_get"},
                             "mem_used": {"description": "Current memory usage", "unit": "B", "type": "current", "name": "memory_usage"},
                             "ep_total_cache_size": {"description": "Cache size", "unit": "B", "type": "current", "name": "total_cache_size"},
                             "bytes_written": {"description": "Number of bytes written", "unit": "B", "type": "cumulative", "name": "bytes_written"},
                             "ep_queue_size": {"description": "Number of items in the disk written queue", "unit": "", "type": "current", "name": "disk_write_queue_size"},
                             "ep_io_num_write": {"description": "Number of io write operations", "unit": "", "type": "cumulative", "name": "num_io_write_operations"},
                             "ep_io_num_read": {"description": "Number of io read operations", "unit": "", "type": "cumulative", "name": "num_io_read_operations"},
                             "ep_total_persisted": {"description": "Total number of persisted items", "unit": "", "type": "cumulative", "name": "num_items_persisted"},
                             "ep_total_del_items": {"description": "Total number of persisted deletions", "unit": "", "type": "cumulative", "name": "num_items_deleted"},
                             "ep_item_commit_failed": {"description": "Number of times a transaction failed to commit due to storage errors", "unit": "", "type": "cumulative", "name": "num_failed_transactions"},
                             "ep_expired": {"description": "Number of times an item was expired", "unit": "", "type": "cumulative", "name": "expired_items"},
                             }
    
    data_file_name = "/usr/share/appfirst/plugins/membase_stats_data"
    prev_stats = {}
    d_stats = {}
    list = False
    # get the previous value from the data we kept
    def before_work(self):
        try:
            file = open(self.data_file_name, 'r')
            for line in file.readlines():# read all the previous value line by line
                items = line.split()
                if len(items) > 1:
                    resource_name = items[0]
                    resource_value = items[1]
                    try:
                        self.prev_stats[resource_name] = int(resource_value)
                    except Exception,e:
                        pass
            file.close()
        except Exception,e:
            pass
        
    def after_work(self):
        try:
            file = open(self.data_file_name, 'w')
            file_content = ""
            for key in self.prev_stats:
                file_content += ("%s %d\n" % (key, self.prev_stats[key]))
            file.write(file_content)
            file.close()
        except:
            pass
                
    def usage(self):
        print "usage:\n\tmembase_stats.py [-l|--list|-h|--help]\n\tmembase_stats.py metric|all\n\t\tWhere metric is one of the metrics shown with a -l|--list"
    
    def get_status(self):
        str_stats = subprocess.Popen("/opt/membase/bin/ep_engine/management/stats 127.0.0.1:11210 all",
                                     shell = True, stdout=subprocess.PIPE).communicate()[0]
        stats = str_stats.split('\n')
        for i in range(len(stats) - 1):
            j = stats[i].index(':')
            k = stats[i].rindex(' ')
            
            try:
                self.d_stats[stats[i][1:j]] = int(stats[i][k+1:])
            except ValueError:
                self.d_stats[stats[i][1:j]] = stats[i][k+1:]
                
    def main(self):
        try:
            opts, args = getopt.getopt(sys.argv[1:], "lh", ["list", "help"])
        except getopt.GetoptError, err:
            # print help information and exit:
            print str(err) # will print something like "option -a not recognized"
            usage()
            sys.exit(2)
            
        self.prev_stats = {}
        self.d_stats = {}
        self.before_work()
        for o, a in opts:
            if (o in ("-l")) or (o in ("--list")):
                self.list = True
            if (o in ("-h")) or o in ("--help"):
                self.usage()
                sys.exit(0)
    
        self.get_status()
        
        if self.list is True:
            for k in self.self.d_stats.iterkeys():
                print k
            sys.exit(0)
        #
        # Nagios developer docs define perfomance data format as:
        #    'label'=value[UOM];[warn];[crit];[min];[max]
        # Example from check_http:
        #    HTTP OK: HTTP/1.1 200 OK - 8683 bytes in 1.196 second response time |time=1.196437s;5.000000;10.000000;0.000000 size=8683B;;;0 ec="0"
        #
        perf_data = "Membase_Stats:OK | " 
    
        if not args:
            args.append("all")
    
        if "all" in args:
            keys = self.d_stats.iterkeys()
        else:
            keys = args
        
        for key in keys:
            perf_data += self.process_data(key)
        perf_data += self.get_resident_ratio()
        perf_data += self.get_cache_miss_ratio()
        perf_data += self.get_replica_resident_ratio()
        print perf_data
        self.after_work()
        
    def process_data(self, key):
        ret = " "
        if self.resource_name_mapping.has_key(key):
            type = "current"
            name = key
            if self.resource_name_mapping[key].has_key('name'):
                name = self.resource_name_mapping[key]['name']
            if self.resource_name_mapping[key].has_key('type'):
                type = self.resource_name_mapping[key]['type']
            if type == "cumulative": 
                tmp = self.d_stats[key]
                if self.prev_stats.has_key(key):
                    self.d_stats[key] = self.d_stats[key] - self.prev_stats[key]
                    if self.d_stats[key] < 0: #somehow membase is reseted
                        tmp = 0
                    else:    
                        ret = "%s=%s%s " % (name, self.d_stats[key], self.resource_name_mapping[key]['unit'])
                self.prev_stats[key] = tmp # update the value we are keeping track of     
            else:
                ret = "%s=%s%s " % (name, self.d_stats[key], self.resource_name_mapping[key]['unit'])
        else:
            ret = "%s=%s " % (key,self.d_stats[key])
        return ret
    
    def get_resident_ratio(self):
        resident_item_ratio = 100
        if self.d_stats.has_key('ep_num_active_non_resident') and self.d_stats.has_key('curr_items') \
            and self.d_stats['curr_items'] > 0:
             resident_item_ratio = 100 - float(self.d_stats['ep_num_active_non_resident']) / float(self.d_stats['curr_items']) * 100
        if resident_item_ratio < 0:
            resident_item_ratio = 0
        return "%s=%s%s " % ('resident_item_ratio', resident_item_ratio, '%')  
    
    def get_cache_miss_ratio(self):
        cache_miss_ratio = 0
        if self.d_stats.has_key('get_hits') and self.d_stats.has_key('ep_bg_fetched') and self.d_stats['get_hits'] > 0:
            cache_miss_ratio = float(self.d_stats['ep_bg_fetched']) / float(self.d_stats['get_hits']) * 100
        return "%s=%s%s " % ('cache_miss_ratio', cache_miss_ratio, '%')
    
    def get_replica_resident_ratio(self):
        replica_resident_ratio = 100
        if self.d_stats.has_key('ep_num_non_resident') and self.d_stats.has_key('curr_items_tot') \
            and self.d_stats['curr_items_tot'] > 0 and self.d_stats.has_key("curr_items") and self.d_stats.has_key('ep_num_active_non_resident'):
            if self.d_stats['curr_items_tot'] > self.d_stats['curr_items']:
                replica_resident_ratio = 100 - float(self.d_stats['ep_num_non_resident'] - self.d_stats['ep_num_active_non_resident']) / float(self.d_stats['curr_items_tot'] - self.d_stats['curr_items']) * 100
                print replica_resident_ratio
            if replica_resident_ratio < 0:
                replica_resident_ratio = 0
        return "%s=%s%s " % ('replica_resident_ratio', replica_resident_ratio, '%')
    
if __name__ == "__main__":
    stat = MembaseStats()
    stat.main()