2012,知乎面试题二(重写)
题目
2. antispam 系统
假设我们可以获得线上的实时请求(按时间顺序)
每个请求包含如下信息:
时间(unix时间戳)
用户名
动作(提问、回答、评论)
内容依次考虑如何解决以下问题:
1.当发现动作频率较高的用户时,输出报警(1分钟内超过10次评论、回答、提问)
2.当发现一个用户连续发重复的内容时,输出报警(连续发了10个相同的评论、回答、提问)3.使用你觉得最优美的方法将上面的策略与程序分离开。使得上面的策略可以随时更新而不用修改程序。
要求:
服务监听一个端口,使用测试程序模拟用户行为不断向服务发送请求,
请求格式为json如
{“time”:1323333,”user”:”xxx”,”action”:”answer”,”content”:”abcde”}服务输出报警格式如下
xxx,”频繁提问”
说明
1.antispam 跑于gunicorn 在终端输入:gunicorn –workers=1 antispam:antispam 运行,不支持多进程(workers只能等于1)
2.testclient在test文件夹里面。请先运行antispam,再运行testclient
3.testclient支持参数配置 -a 或者 -addr 为要连接的server,默认127.0.0.1:8000。-f 或者 -test_file 为测试数据文件,默认testdata
antispam.py
# -*- coding:utf-8 -*-
# @author: jzb
from urlparse import parse_qs
import json
from strategys import strategys
class Store(object):
'''
存放数据的地方,为一个简陋的stack
'''
def __init__(self):
self._data_list = []
self._i = 0
def seek_top(self):
self._i = len(self._data_list)
def next(self):
self._i -= 1
return self._data_list[self._i]
def top(self):
n = len(self._data_list)
return self._data_list[0 if n == 0 else n - 1]
def push(self, e):
self._data_list.append(e)
def is_end(self):
return self._i == 0
class Context(object):
_store = Store()
@property
def store(self):
return self._store
def __init__(self, strategys = []):
self._strategys = strategys
def execute(self):
for e in self._strategys:
if not e['strategy'](self._store):
print e['warning'] % (e['strategy'].get_warning_user())
def antispam(environ, start_response):
'''
wsgi应用
'''
params = parse_qs(environ.get('QUERY_STRING', ''))
if 'data' in params:
context = Context(strategys)
context.store.push(json.loads(params['data'][0].strip()))
context.execute()
start_response('200 ok', [('Content-Type', 'text/html')])
return ['Data(%s) successfully received' % (params['data'][0].strip())]
else:
start_response('200 ok', [('Content-Type', 'text/html')])
return ['Error']
strategys.py
# -*- coding:utf-8 -*-
# @author: jzb
class Strategy(object):
'''
策略只返回True/False
'''
def __init__(self):
self._user = ''
def __call__(self, data_list):
pass
def get_warning_user(self):
return self._user
class StrategyHighFrequencyAction(Strategy):
def __init__(self, action):
self._action = action
def __call__(self, store):
times = 10
s = 60
time_limit = int(store.top()['time']) - s
user = store.top()['user']
action_sum = 0
store.seek_top()
while not store.is_end():
e = store.next()
if int(e['time']) >= time_limit:
if e['user'] == user:
if self._action == e['action']:
action_sum += 1
else:
break
if action_sum >= times:
self._user = user
return False
return True
class StrategyContinuousRepeatContent(Strategy):
def __init__(self, action):
self._action = action
def __call__(self, store):
store.seek_top()
times = 10
user = store.top()['user']
if self._get_repeat_sum(store, self._action) > times:
self._user = user
return False
return True
def _get_repeat_sum(self, store, action):
content = store.top()['content']
user = store.top()['user']
action_sum = 0
while not store.is_end():
e = store.next()
if e['user'] == user and e['action'] == action:
if content == e['content']:
action_sum += 1
else:
break
return action_sum
# 在这里存放要执行的策略,warning为策略返回False时,输出的警告,%s会被被警告的用户名所替代
strategys = [
{'strategy' : StrategyHighFrequencyAction('answer'), 'warning' : u'%s, "频繁提问"'},
{'strategy' : StrategyHighFrequencyAction('comment'), 'warning' : u'%s, "频繁评论"'},
{'strategy' : StrategyHighFrequencyAction('question'), 'warning' : u'%s, "频繁提问"'},
{'strategy' : StrategyContinuousRepeatContent('answer'), 'warning' : u'%s, "重复回答"'},
{'strategy' : StrategyContinuousRepeatContent('comment'), 'warning' : u'%s, "重复评论"'},
{'strategy' : StrategyContinuousRepeatContent('question'), 'warning' : u'%s, "重复提问"'}
]
testclient.py
# -*- coding:utf-8 -*-
# @author: jzb
import httplib
import os
import sys
import urllib
import argparse
class TestClient(object):
def __init__(self, ip, file_path):
self._ip = ip
self._file_path = file_path
def run(self):
headers = {
"Host": self._ip,
"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1) Gecko/20090624 Firefox/3.5",
"Accept": "text/plain"
}
with open(self._file_path, 'r') as f:
for line in f:
try:
value = urllib.urlencode({'data' : line})
conn = httplib.HTTPConnection(self._ip)
conn.request(method = 'POST', url = '/?%s' %(value), headers = headers)
conn.close()
except httplib.NotConnected:
print '%s can not connect' % self._ip
sys.exit()
except httplib.HTTPException:
print 'send data exception'
sys.exit()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description = 'It is a simple test client.')
parser.add_argument('-a', '--addr', default = '127.0.0.1:8000', type = str, help = 'standard format: ip:port')
parser.add_argument('-f', '--test_file', default = 'testdata', type = str, help = 'test data file')
args = parser.parse_args()
test_client = TestClient(args.addr, args.test_file)
test_client.run()

近期评论