Skip to content

Commit

Permalink
修复连接池泄露以及增加监控点
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangsong committed Sep 6, 2018
1 parent f3cf302 commit bc6bd7c
Show file tree
Hide file tree
Showing 12 changed files with 287 additions and 75 deletions.
3 changes: 3 additions & 0 deletions backend/backend_conn.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ type Conn struct {
salt []byte

pushTimestamp int64
checkChannel chan int64
pkgErr error
}

Expand Down Expand Up @@ -350,6 +351,8 @@ func (c *Conn) Ping() error {
return err
}

c.pushTimestamp = time.Now().Unix()

return nil
}

Expand Down
41 changes: 35 additions & 6 deletions backend/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ type DB struct {
cacheConns chan *Conn
checkConn *Conn
lastPing int64

pushConnCount int64
popConnCount int64
}

func Open(addr string, user string, password string, dbName string, maxConnNum int) (*DB, error) {
Expand Down Expand Up @@ -84,22 +87,38 @@ func Open(addr string, user string, password string, dbName string, maxConnNum i
for i := 0; i < db.maxConnNum; i++ {
if i < db.InitConnNum {
conn, err := db.newConn()

if err != nil {
db.Close()
return nil, err
}
conn.pushTimestamp = time.Now().Unix()

db.cacheConns <- conn
atomic.AddInt64(&db.pushConnCount, 1)
} else {
conn := new(Conn)
db.idleConns <- conn
atomic.AddInt64(&db.pushConnCount, 1)
}
}
db.SetLastPing()

return db, nil
}

func (db *DB) newCheckConn(conn *Conn) {
go func() {
select {
case <- conn.checkChannel:
case <- time.After(time.Second * 60 * 5):
conn := new(Conn)
db.idleConns <- conn
atomic.AddInt64(&db.pushConnCount, 1)
return
}
}()
}

func (db *DB) Addr() string {
return db.addr
}
Expand All @@ -117,10 +136,10 @@ func (db *DB) State() string {
return state
}

func (db *DB) IdleConnCount() int {
func (db *DB) ConnCount() (int,int,int64,int64) {
db.RLock()
defer db.RUnlock()
return len(db.cacheConns)
return len(db.idleConns),len(db.cacheConns),db.pushConnCount,db.popConnCount
}

func (db *DB) Close() error {
Expand Down Expand Up @@ -170,14 +189,14 @@ func (db *DB) Ping() error {
if db.checkConn == nil {
db.checkConn, err = db.newConn()
if err != nil {
db.closeConn(db.checkConn)
db.checkConn.Close()
db.checkConn = nil
return err
}
}
err = db.checkConn.Ping()
if err != nil {
db.closeConn(db.checkConn)
db.checkConn.Close()
db.checkConn = nil
return err
}
Expand All @@ -191,6 +210,9 @@ func (db *DB) newConn() (*Conn, error) {
return nil, err
}

co.pushTimestamp = time.Now().Unix()
co.checkChannel = make(chan int64)

return co, nil
}

Expand All @@ -201,6 +223,8 @@ func (db *DB) closeConn(co *Conn) error {
if conns != nil {
select {
case conns <- co:
co.checkChannel <- co.pushTimestamp
atomic.AddInt64(&db.pushConnCount, 1)
return nil
default:
return nil
Expand Down Expand Up @@ -263,6 +287,9 @@ func (db *DB) PopConn() (*Conn, error) {
return nil, err
}

atomic.AddInt64(&db.popConnCount, 1)
// add check conn
db.newCheckConn(co)
return co, nil
}

Expand Down Expand Up @@ -290,7 +317,7 @@ func (db *DB) GetConnFromIdle(cacheConns, idleConns chan *Conn) (*Conn, error) {
var err error
select {
case co = <-idleConns:
err = co.Connect(db.addr, db.user, db.password, db.db)
co, err := db.newConn()
if err != nil {
db.closeConn(co)
return nil, err
Expand Down Expand Up @@ -327,6 +354,8 @@ func (db *DB) PushConn(co *Conn, err error) {
co.pushTimestamp = time.Now().Unix()
select {
case conns <- co:
co.checkChannel <- co.pushTimestamp
atomic.AddInt64(&db.pushConnCount, 1)
return
default:
db.closeConn(co)
Expand Down
4 changes: 2 additions & 2 deletions core/hack/version.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package hack

const (
Version = "2018-06-11 17:46:38 +0800 @d861c77"
Compile = "2018-09-04 19:53:20 +0800 by go version go1.9 darwin/amd64"
Version = "2018-09-05 09:47:25 +0800 @d56c0ec"
Compile = "2018-09-05 20:38:10 +0800 by go version go1.9 darwin/amd64"
)
12 changes: 6 additions & 6 deletions doc/KingDoc/admin_command_introduce.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,12 @@ SlowLogTotal:kingshard启动以来产生的慢日志个数
#查看node状态
mysql> admin server(opt,k,v) values('show','node','config');
+-------+---------------------+--------+-------+-------------------------------+-------------+----------+
| Node | Address | Type | State | LastPing | MaxIdleConn | IdleConn |
+-------+---------------------+--------+-------+-------------------------------+-------------+----------+
| node1 | 127.0.0.1:3306 | master | up | 2015-08-07 15:54:44 +0800 CST | 16 | 1 |
| node2 | 192.168.59.103:3307 | master | up | 2015-08-07 15:54:44 +0800 CST | 16 | 1 |
+-------+---------------------+--------+-------+-------------------------------+-------------+----------+
+-------+--------------------+--------+-------+-------------------------------+---------+----------+------------+---------------+--------------+
| Node | Address | Type | State | LastPing | MaxConn | IdleConn | CacheConns | PushConnCount | PopConnCount |
+-------+--------------------+--------+-------+-------------------------------+---------+----------+------------+---------------+--------------+
| node1 | 127.0.0.1:3306 | master | up | 2015-08-07 15:54:44 +0800 CST | 512 | 509 | 2 | 6301447 | 6300936 |
| node2 | 192.168.59.103:3307 | master | up | 2015-08-07 15:54:44 +0800 CST | 512 | 509 | 2 | 6301447 | 6300936 |
+-------+--------------------+--------+-------+-------------------------------+---------+----------+------------+---------------+--------------+
2 rows in set (0.00 sec)
#查看schema配置
Expand Down
10 changes: 8 additions & 2 deletions doc/KingDoc/kingshard_admin_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,10 @@ curl -X GET \
"status": "up",
"laste_ping": "2016-09-24 17:17:52 +0800 CST",
"max_conn": 32,
"idle_conn": 8
"idle_conn": 8,
"cache_conn":12,
"push_conn_count":32,
"pop_conn_count":0
},
{
"node": "node2",
Expand All @@ -56,7 +59,10 @@ curl -X GET \
"status": "up",
"laste_ping": "2016-09-24 17:17:52 +0800 CST",
"max_conn": 32,
"idle_conn": 8
"idle_conn": 8,
"cache_conn":12,
"push_conn_count":32,
"pop_conn_count":0
}
]
```
Expand Down
45 changes: 38 additions & 7 deletions doc/KingDoc/kingshard_quick_try.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,23 +82,52 @@
```
# server listen addr
addr : 127.0.0.1:9696
addr : 0.0.0.0:9696
# prometheus server listen addr
prometheus_addr : 0.0.0.0:7080
# server user and password
user : kingshard
password : kingshard
# the web api server
user_list:
-
user : root
password : root
-
user : kingshard
password : kingshard
# the web api server
web_addr : 0.0.0.0:9797
#HTTP Basic Auth
web_user : admin
web_password : admin
# if set log_path, the sql log will write into log_path/sql.log,the system log
# will write into log_path/sys.log
#log_path : /Users/flike/log
# log level[debug|info|warn|error],default error
log_level : debug
# if set log_sql(on|off) off,the sql log will not output
log_sql: on
# only log the query that take more than slow_log_time ms
#slow_log_time : 100
# the path of blacklist sql file
# all these sqls in the file will been forbidden by kingshard
#blacklist_sql_file: /Users/flike/blacklist
# only allow this ip list ip to connect kingshard
#allow_ips: 127.0.0.1
#allow_ips : 127.0.0.1,192.168.0.14
# the charset of kingshard, if you don't set this item
# the default charset of kingshard is utf8.
#proxy_charset: gbk
# node is an agenda for real remote mysql server.
nodes :
-
name : node1
Expand Down Expand Up @@ -138,8 +167,10 @@
# 0 will no down
down_after_noalive: 100
# schema defines which db can be used by client and this db's sql will be executed in which nodes
schema :
# schema defines sharding rules, the db is the sharding table database.
schema_list :
-
user: kingshard
nodes: [node1,node2]
default: node1
shard:
Expand Down
96 changes: 96 additions & 0 deletions doc/KingDoc/prometheus.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# prometheus的使用

## 系统架构

prometheus系统架构可参考下面这幅图。
![prometheus系统架构](./prometheus.png)

## 下载并运行

[下载](https://github.com/prometheus/prometheus/releases)Prometheus,然后提取并运行:

```
tar xvfz prometheus-*.tar.gz
cd prometheus-*
./prometheus
```

## 配置yml

根据文档进行yml配置文件的配置,此处不再多解释。

[完整配置文档](https://prometheus.io/docs/prometheus/latest/configuration/configuration/)

kingshard暴露了9708端口作为prometheus的抓取地址,因此通过http://localhost:8080/metrics可以获取到相关监控项,端口可在配置文件进行变更。

## 启动

根据上述的配置文件启动prometheus

```
./prometheus -config.file=prometheus.yml
```

## 监控项解释

除了golang本身的监控项外,主要提供了以下的监控项。

### idle监控项

标明了当前系统中idle连接的数目,正常为最大连接减去cache连接的数目

```
# HELP idleConn the db idle connection
# TYPE idleConn gauge
idleConn{addr="10.10.60.189:3306",type="master"} 384
idleConn{addr="10.10.60.189:3307",type="slave"} 384
idleConn{addr="10.10.60.189:3308",type="slave"} 384
```

### cache监控项

标明了当前系统中cache连接的数目,正常为最大连接减去idle连接的数目,负载不大的系统该项变化应该不大

```
# HELP cacheConns the db cache connection
# TYPE cacheConns gauge
cacheConns{addr="10.10.60.189:3306",type="master"} 128
cacheConns{addr="10.10.60.189:3307",type="slave"} 128
cacheConns{addr="10.10.60.189:3308",type="slave"} 128
```

### maxConn监控项

标明了当前系统中配置的maxConn数量

```
# HELP maxConn the max connection config
# TYPE maxConn gauge
maxConn{addr="10.10.60.189:3306",type="master"} 512
maxConn{addr="10.10.60.189:3307",type="slave"} 512
maxConn{addr="10.10.60.189:3308",type="slave"} 512
```

### pushConnCount监控项

标明了当前系统中往连接池中推入连接的次数,用于排查异常情况

```
# HELP pushConnCount the db pushConnCount
# TYPE pushConnCount gauge
pushConnCount{addr="10.10.60.189:3306",type="master"} 512
pushConnCount{addr="10.10.60.189:3307",type="slave"} 512
pushConnCount{addr="10.10.60.189:3308",type="slave"} 512
```

### popConnCount监控项

标明了当前系统中从连接池中取出连接的次数,用于排查异常情况

```
# HELP popConnCount the db popConnCount
# TYPE popConnCount gauge
popConnCount{addr="10.10.60.189:3306",type="master"} 0
popConnCount{addr="10.10.60.189:3307",type="slave"} 0
popConnCount{addr="10.10.60.189:3308",type="slave"} 0
```
Binary file added doc/KingDoc/prometheus.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit bc6bd7c

Please sign in to comment.