From 25a437e2834f7d117d39a2bb9b122bba42026b46 Mon Sep 17 00:00:00 2001 From: Anatoly Kopyl Date: Sun, 12 Nov 2017 23:18:05 +0300 Subject: [PATCH] Commented and removed some useless code --- anomaly.py | 6 ++---- main.py | 22 ++++++++++------------ 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/anomaly.py b/anomaly.py index c9fdf87..767a2a3 100644 --- a/anomaly.py +++ b/anomaly.py @@ -2,10 +2,8 @@ import numpy as np def is_anomaly(result): - data = [item[1] for name in result for item in result[name]] - - x = np.array(data) + data = np.array([item[1] for name in result for item in result[name]]) for name in result: for item in result[name]: - item[2] = item[1] > x.mean() + 3 * x.std() \ No newline at end of file + item[2] = item[1] > data.mean() + 3 * data.std() \ No newline at end of file diff --git a/main.py b/main.py index ab49527..ad4dedf 100644 --- a/main.py +++ b/main.py @@ -5,33 +5,31 @@ import MySQLdb from anomaly import is_anomaly -TIME_SPAN = 900 +TIME_SPAN = 900 # 15 minutes result = {} -with open('raw_data.csv', 'rt', encoding="UTF-8") as csvfile: +with open('raw_data2.csv', 'rt', encoding="UTF-8") as csvfile: reader = csv.reader(csvfile, quotechar='"') + for row in reader: if row[0] != "ts": - timestamp = time.mktime(datetime.datetime.strptime(row[0], "%Y-%m-%d %H:%M:%S,%f").timetuple()) + timestamp = time.mktime(datetime.datetime.strptime(row[0], "%Y-%m-%d %H:%M:%S,%f").timetuple()) # generating a unix timestamp - name = row[1] + "*" + row[2] + name = row[1] + "*" + row[2] # combining api_name and http_method into a pair name if 'start_time' not in locals(): start_time = timestamp - start_time = int(min(start_time, timestamp)) + start_time = int(min(start_time, timestamp)) # earliest request if 'end_time' not in locals(): end_time = timestamp - end_time = int(max(end_time, timestamp)) + end_time = int(max(end_time, timestamp)) # latest request if name not in result: result[name] = [] -with open('raw_data.csv', 'rt', encoding="UTF-8") as csvfile: - reader = csv.reader(csvfile, quotechar='"') - - name = row[1] + "*" + row[2] - for t in range(start_time, end_time, TIME_SPAN): - result[name].append([t, 0, 0]) + result[name].append([t, 0, 0]) # [timestamp, num of requests, is anomaly] + + csvfile.seek(0) for row in reader: if (row[3])[:1] == "5":