Index: /branches/rel_apv_10_7/usr/click/webui/htdocs/new/src/hive/report.py
===================================================================
--- /branches/rel_apv_10_7/usr/click/webui/htdocs/new/src/hive/report.py	(revision 38398)
+++ /branches/rel_apv_10_7/usr/click/webui/htdocs/new/src/hive/report.py	(working copy)
@@ -780,10 +780,9 @@
                 item_value = []
                 table_data = []
                 time_list = []
-                for each in tcp_status[each_vs]:
-                    # each = ["2024-06-28 15:42:47", {"LISTEN": "0"}]
-                    date = each[0]
-                    value = each[1][column]
+                for date, each in tcp_status[each_vs].items():
+                    # {"2024-06-28 15:42:47": {"LISTEN": "0"}}
+                    value = each[column]
                     item_value.append(value) # ex: 2
                     table_data.append([date, value]) # ex: ["2024-06-27 16:12:00", 2]
                     time_list.append(date) # ex: "2024-06-27 16:12:00"
@@ -1169,10 +1168,9 @@
         item_value = []
         table_data = []
         time_list = []
-        for each in tcp_status[MonitorOutput.C_GLOBAL_INDEX]:
-            # each = ["2024-06-28 15:42:47", {"LISTEN": "0"}]
-            date = each[0]
-            value = each[1][column]
+        for date, each in tcp_status[MonitorOutput.C_GLOBAL_INDEX].items():
+            # {"2024-06-28 15:42:47": {"LISTEN": "0"}}
+            value = each[column]
             item_value.append(value) # ex: 2
             table_data.append([date, value]) # ex: ["2024-06-27 16:12:00", 2]
             time_list.append(date) # ex: "2024-06-27 16:12:00"
@@ -1316,9 +1314,9 @@
             cw.writerow([unicode(_(column))])
             # ex: Time, LISTEN
             cw.writerow([unicode(_('Time')), unicode(_(column))])
-            for each in tcp_status[vs_name]:
-                # each = ["2024-06-28 15:42:47", {"LISTEN": "0"}]
-                cw.writerow([each[0], each[1][column]])
+            for date, each in tcp_status[vs_name].items():
+                # {"2024-06-28 15:42:47": {"LISTEN": "0"}}
+                cw.writerow([date, each[column]])
 
     return cw
 
@@ -1506,17 +1504,19 @@
         cw.writerow([unicode(_(column))])
         # ex: Time, LISTEN
         cw.writerow([unicode(_('Time')), unicode(_(column))])
-        for each in tcp_status[MonitorOutput.C_GLOBAL_INDEX]:
-            # each = ["2024-06-28 15:42:47", {"LISTEN": "0"}]
-            cw.writerow([each[0], each[1][column]])
+        for date, each in tcp_status[MonitorOutput.C_GLOBAL_INDEX].items():
+            # {"2024-06-28 15:42:47": {"LISTEN": "0"}}
+            cw.writerow([date, each[column]])
     return cw
 def generateCSVReport(report_name, from_time, to_time, end_time, start_time, system_list, slb_vs_list, slb_rs_list, slb_vh_list, slb_rh_list, slb_total_list, llb_list, llb_total_list, gslb_list, tcp_list):
     sys_info = get_system_version()
     is_tcp_status = False
+    is_tcp_group = True
     if slb_vs_list:
         for vsname in slb_vs_list:
             if 'vs_tcp' in slb_vs_list[vsname]:
                 is_tcp_status = True
+                is_tcp_group = False
     get_tcp_status = {}
     if is_tcp_status or tcp_list:
         mo = MonitorOutput()
@@ -1524,7 +1524,7 @@
         start_time = tr.ParseFrom()
         end_time = tr.ParseTo()
         # get tcp status of all
-        get_tcp_status = mo.get(datetime.fromtimestamp(start_time), datetime.fromtimestamp(end_time))
+        get_tcp_status = mo.get(datetime.fromtimestamp(start_time), is_tcp_group)
     with codecs.open(report_file_path+report_name, 'w', encoding="utf-8-sig") as csvfile:
         cw = csv.writer(csvfile)
         cw.writerow([unicode(_('Create Time')), time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())])
@@ -1598,10 +1598,12 @@
     sys_info = get_system_version()
 
     is_tcp_status = False
+    is_tcp_group = True
     if slb_vs_list:
         for vsname in slb_vs_list:
             if 'vs_tcp' in slb_vs_list[vsname]:
                 is_tcp_status = True
+                is_tcp_group = False
     get_tcp_status = {}
     if is_tcp_status or tcp_list:
         mo = MonitorOutput()
@@ -1609,7 +1611,7 @@
         start_time = tr.ParseFrom()
         end_time = tr.ParseTo()
         # get tcp status of all
-        get_tcp_status = mo.get(datetime.fromtimestamp(start_time), datetime.fromtimestamp(end_time))
+        get_tcp_status = mo.get(datetime.fromtimestamp(start_time), is_tcp_group)
 
     doc = SimpleDocTemplate(report_file_path+pdf_name,pagesize=letter,
                         rightMargin=60,leftMargin=60,
Index: /branches/rel_apv_10_7/usr/click/webui/htdocs/new/src/hive/tcp_status/statistics.py
===================================================================
--- /branches/rel_apv_10_7/usr/click/webui/htdocs/new/src/hive/tcp_status/statistics.py	(revision 38398)
+++ /branches/rel_apv_10_7/usr/click/webui/htdocs/new/src/hive/tcp_status/statistics.py	(working copy)
@@ -1,26 +1,20 @@
+# -*- coding: utf-8 -*-
 """
 This file is for parsing data from monitor.out*
 and for the part of tcp statistics
 Author: shuinvy@arraynetworks.net
 """
 from datetime import datetime
-import time
 import os
-from hive.utils import andebug
-from django.utils.translation import ugettext_lazy as _
+from hive.monitor_log.MonitorFileController import MonitorFileController
+from hive.monitor_log.DataController import DataController
 
 class MonitorOutput:
     """
     Class for parsing data from monitor.out*
     """
-    C_FILE_LOCATION = "/var/crash/"
-    C_PREFIX = "monitor.out"
-    C_FILE_EXT = ".gz"
-    C_INIT_FILE = "/var/crash/monitor.out0"
-    C_TCP_COMMAND = "show statistics tcp all"
-    C_GLOBAL_PATTERN = "Global Statistics"
+    C_FILE_LOCATION = "/var/crash"
     C_GLOBAL_INDEX = "global"
-    C_VIRTUAL_SERVICE_PATTERN = "virtual service"
     C_EACH_FIELDS = [
         'LISTEN',
         'SYN_SENT',
@@ -45,100 +39,17 @@
         'FIN_WAIT_2': 'fin_wait_2',
         'TIME_WAIT': 'time_wait'
     }
+    directory_to_extract_to = ""
 
     def __init__(self):
-        """
-        :param string type: global or vs(virtual service)
-        """
+        pass
 
-    def get(self, start_time_obj, end_time_obj):
+    def get(self, start_time, is_group):
         """
         Get monitor data(filter by date range)
         It will sort time asc
-        :param datetime start_time_obj: start time
-        :param datetime end_time_obj: end time
-        :return dict
-            global example: {
-                'global': [{
-                    '2024-06-24 07:12:43': {
-                        "ESTABLISHED": "0",
-                        "SYN_RCVD": "0",
-                        "LAST_ACK": "0",
-                        "FIN_WAIT_2": "1",
-                        "FIN_WAIT_1": "0",
-                        "TIME_WAIT": "0",
-                        "CLOSING": "0",
-                        "SYN_SENT": "0",
-                        "CLOSE_WAIT": "1",
-                        "LISTEN": "0"
-                    }
-                }]
-            }
-            vs example: {
-                'v1': [{
-                    '2024-06-24 07:12:43': {
-                        "ESTABLISHED": "0",
-                        "SYN_RCVD": "0",
-                        "LAST_ACK": "0",
-                        "FIN_WAIT_2": "1",
-                        "FIN_WAIT_1": "0",
-                        "TIME_WAIT": "0",
-                        "CLOSING": "0",
-                        "SYN_SENT": "0",
-                        "CLOSE_WAIT": "1",
-                        "LISTEN": "0"
-                    }
-                }]
-            }
-        """
-        # check is original file exist
-        check_init = os.path.isfile(self.C_INIT_FILE)
-        if not check_init:
-            # no statistics data
-            return {}
-        out0 = self.read_file(self.C_INIT_FILE, start_time_obj, end_time_obj)
-        final = out0.copy()
-        """
-        To get the list of logs, we should make new folder
-        and copy file into the new folder
-        then unzip the file in the folder
-        get data from the file
-        and remove the folder(and the file inside it)
-        """
-        for item in os.listdir(self.C_FILE_LOCATION):
-            if os.path.isfile(os.path.join(self.C_FILE_LOCATION, item)) and self.C_PREFIX in item and item.endswith(self.C_FILE_EXT):
-                # get modify time of the file
-                ti_m = os.path.getmtime(self.C_FILE_LOCATION + item)
-                m_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(ti_m))
-                m_time_obj = datetime.strptime(m_time, "%Y-%m-%d %H:%M:%S")
-                if m_time_obj < start_time_obj or m_time_obj > end_time_obj:
-                    # not in the date interval
-                    continue
-                folder_name = item.replace(self.C_FILE_EXT, "")
-                # make folder for the gz files, because it will be removed after you unzip it.
-                os.mkdir(self.C_FILE_LOCATION + folder_name)
-                os.system("cp %s %s" % (self.C_FILE_LOCATION + item, self.C_FILE_LOCATION + folder_name))
-                copyed_gz_path = self.C_FILE_LOCATION + folder_name + "/" + item
-                os.system("gunzip %s" % copyed_gz_path)
-                unzip_file = self.C_FILE_LOCATION + folder_name + "/" + folder_name
-                # get data from unzip file of monitor
-                outN = self.read_file(unzip_file, start_time_obj, end_time_obj)
-                # remove unzip file and folder we created
-                os.remove(unzip_file)
-                os.rmdir(self.C_FILE_LOCATION + folder_name)
-                # merge data with same key(each virtual service name or 'global' in each monitor files)
-                final = self.merge_by_key(final, outN)
-        output = {}
-        for key in final.keys():
-           output[key] = sorted(final[key].items(), key = lambda x:datetime.strptime(x[0], '%Y-%m-%d %H:%M:%S'), reverse=False)
-        return output
-
-    def read_file(self, file_path, start_time_obj, end_time_obj):
-        """
-        Get statistics data from file based on type
-        :param string file_path: the full path of the file
-        :param time start_time_obj: start time (time object)
-        :param time end_time_obj: start time (time object)
+        :param datetime start_time: start time(datetime obj)
+        :param bool is_group: is global or individual virtual service
         :return dict
             global example: {
                 'global': {
@@ -173,88 +84,50 @@
                 }
             }
         """
-        try:
-            fp = open(file_path)
-            lines = fp.readlines()
-            is_start_line = False
-            index_list = [] # add line index of file to be passed
-            final_list = {}
-            for ind, line in enumerate(lines):
-                # find the place the commnad line is started
-                if self.C_TCP_COMMAND in line:
-                    is_start_line = True
-                    date_line = self.get_date_str(lines[ind-1]) # date_line always before command line
-                    date_obj = datetime.strptime(date_line, "%a %b %d %H:%M:%S %Z %Y")
-                    date_format = str(date_obj)
-                if is_start_line:
-                    if ind in index_list:
-                        # already be handled because the field is required,
-                        # so skip these idnex
-                        continue
-                    if self.C_GLOBAL_PATTERN in line or self.C_VIRTUAL_SERVICE_PATTERN in line:
-                        section_list = {}
-                        # if length = 10, that is range(1, 11)
-                        for start_ind in range(1, len(self.C_EACH_FIELDS) + 1):
-                            next_index = ind + start_ind
-                            the_pair_of_next = self.get_pair(lines[next_index])
-                            if the_pair_of_next['key'] in self.C_EACH_FIELDS:
-                                section_list[the_pair_of_next['key']] = the_pair_of_next['value']
-                            index_list.append(next_index)
-                        if date_obj < start_time_obj or date_obj > end_time_obj:
-                            # not in the time interval
-                            continue
-                        if self.C_GLOBAL_PATTERN in line:
-                            each_key = self.C_GLOBAL_INDEX
+        self.directory_to_extract_to = os.path.join(os.getcwd(), "tmp", "tcpmonitor")
+        file_controller = MonitorFileController(self.C_FILE_LOCATION, self.directory_to_extract_to)
+        log_path_list = file_controller.get_gz_file_path()
+        data_controller = DataController()
+        data_list = []
+        is_enough_log_file = False
+        pattern = r"\[([^]]*GMT[^]]*)\]\n\/ca\/bin\/backend -c \"show statistics tcp all\"\n(.*?)(?:ü|$)"
+        grp_pattern = r"Global Statistics:\n\s+LISTEN:\s+(\d+)\s+\n\s+SYN_SENT:\s+(\d+)\s+\n\s+SYN_RCVD:\s+(\d+)\s+\n\s+ESTABLISHED:\s+(\d+)\s+\n\s+CLOSE_WAIT:\s+(\d+)\s+\n\s+FIN_WAIT_1:\s+(\d+)\s+\n\s+CLOSING:\s+(\d+)\s+\n\s+LAST_ACK:\s+(\d+)\s+\n\s+FIN_WAIT_2:\s+(\d+)\s+\n\s+TIME_WAIT:\s+(\d+)\s+\n"
+        vs_pattern = r"virtual service: (.*?)\n\s+LISTEN: (.*?)\n\s+SYN_SENT: (.*?)\n\s+SYN_RCVD: (.*?)\n\s+ESTABLISHED: (.*?)\n\s+CLOSE_WAIT: (.*?)\n\s+FIN_WAIT_1: (.*?)\n\s+CLOSING: (.*?)\n\s+LAST_ACK: (.*?)\n\s+FIN_WAIT_2: (.*?)\n\s+TIME_WAIT: (.*?)\n"
+        final = {}
+        while not is_enough_log_file:
+            # for each log file, check if there is enough log data by current datetime
+            log_file_path = file_controller.unzip_file([log_path_list.pop(0)])[0]
+            content = file_controller.read_file(file_path=log_file_path)
+            data_list = data_controller.get_data_list(content, pattern) + data_list # 0 is time, 1 is remained
+            # check if log data is enough or not
+            cur_fisrt_datetime = datetime.strptime(data_list[0][0], '%a %b %d %H:%M:%S GMT %Y')
+            if cur_fisrt_datetime < start_time:
+                is_enough_log_file = True
+                # you can do binary search to make it faster here
+                for i in range(len(data_list)):
+                    if start_time < datetime.strptime(data_list[i][0], '%a %b %d %H:%M:%S GMT %Y'):
+                        date_obj = datetime.strptime(data_list[i][0], '%a %b %d %H:%M:%S GMT %Y')
+                        if is_group:
+                            group_list = data_controller.get_data_list(data_list[i][1], grp_pattern)
+                            if self.C_GLOBAL_INDEX not in final:
+                                final[self.C_GLOBAL_INDEX] = {}
+                            final[self.C_GLOBAL_INDEX][str(date_obj)] = self.convert_index_to_key(group_list[0])
                         else:
-                            each_key = self.get_virtual_service_name(line)
-                        if not each_key in final_list:
-                            final_list[each_key] = {}
-                        #final_list[each_key][date_format] = section_list
-                        final_list[each_key].update({date_format: section_list})
-                if '/Ao=' in line.encode("base64"):
-                    # end of show statistics tcp parts
-                    is_start_line = False
-        except Exception as ex:
-            andebug("hive.debug", "Cannot open monitor file: %s" % ex)
-            return {}
-        finally:
-            fp.close()
-        return final_list
-
-    def get_pair(self, line):
-        # line is "           LISTEN:  0"
-        tmp = line.split(":")
-        return {
-            'key': str(tmp[0]).strip(),
-            'value': str(tmp[1]).strip()
-        }
-
-    def get_date_str(self, line):
-        # line is "[Sun Jun 24 02:12:43 GMT 2024]"
-        tmp = str(line).strip()
-        if not '[' in tmp or not ']' in tmp:
-            return tmp
-        return tmp.split('[')[1].split(']')[0]
-
-    def get_virtual_service_name(self, line):
-        # line is "  virtual service:  tcp1"
-        tmp = str(line).strip()
-        tmp2 = tmp.split(":")
-        return str(tmp2[1]).strip()
-
-    def merge_by_key(self, dict1, dict2):
-        dict1tmp = dict1
-        dict2tmp = dict2
-        # choose dict2 as base if key count of dict2 is much than dict1 
-        if len(dict2) > len(dict1):
-            dict1tmp = dict2
-            dict2tmp = dict1
-        for key in dict1tmp.keys():
-            if dict2tmp and not key in dict2tmp:
-                continue
-            if dict2tmp and dict2tmp[key]:
-                dict1tmp[key].update(dict2tmp[key])
-        return dict1tmp
+                            vs_list = data_controller.get_data_list(data_list[i][1], vs_pattern)
+                            for each in range(len(vs_list)):
+                                vs_name = vs_list[each][0].strip()
+                                if vs_name not in final:
+                                    final[vs_name] = {}
+                                final[vs_name][str(date_obj)] = self.convert_index_to_key(vs_list[each][1:])
+                        break
+        file_controller.delete_tmp_dir()
+        return final
+
+    def convert_index_to_key(self, list):
+        result = {}
+        for ind in range(len(self.C_EACH_FIELDS)):
+            result[self.C_EACH_FIELDS[ind]] = list[ind].strip()
+        return result
 
     def convert_list_to_column(self, list):
         output = []
