Index: /branches/amp_4_0/platform/tools/container/manage_amp.sh
===================================================================
--- /branches/amp_4_0/platform/tools/container/manage_amp.sh	(revision 2915)
+++ /branches/amp_4_0/platform/tools/container/manage_amp.sh	(working copy)
@@ -433,6 +433,28 @@
             echo "✅ Directory already exists: $DIR"
         fi
     done
+    
+    # Create Telegraf config directory for amp-api modifications
+    echo ""
+    echo "--- Initializing Telegraf Config Directory ---"
+    TELEGRAF_CONFIG_DIR="/opt/amp/telegraf.d"
+    if [ ! -d "$TELEGRAF_CONFIG_DIR" ]; then
+        echo "Creating directory: $TELEGRAF_CONFIG_DIR"
+        sudo mkdir -p "$TELEGRAF_CONFIG_DIR"
+        
+        # Copy default SNMP configs from services directory
+        if [ -d "$SERVICES_DIR/telegraf/telegraf.d" ]; then
+            echo "Copying default Telegraf configs..."
+            sudo cp "$SERVICES_DIR/telegraf/telegraf.d"/*.toml "$TELEGRAF_CONFIG_DIR/" 2>/dev/null || true
+            sudo chmod 644 "$TELEGRAF_CONFIG_DIR"/*.toml 2>/dev/null || true
+            echo "✅ Copied default configs to $TELEGRAF_CONFIG_DIR"
+        else
+            echo "⚠️  Source config directory not found: $SERVICES_DIR/telegraf/telegraf.d"
+        fi
+        echo "✅ Created $TELEGRAF_CONFIG_DIR"
+    else
+        echo "✅ Telegraf config directory already exists: $TELEGRAF_CONFIG_DIR"
+    fi
     
     # Configure Firewall Rules (if firewalld is available)
     if command -v firewall-cmd &> /dev/null; then
Index: /branches/amp_4_0/platform/tools/container/services/logstash/pipeline/syslog.conf
===================================================================
--- /branches/amp_4_0/platform/tools/container/services/logstash/pipeline/syslog.conf	(revision 2915)
+++ /branches/amp_4_0/platform/tools/container/services/logstash/pipeline/syslog.conf	(working copy)
@@ -178,8 +178,74 @@
       }
     }
 
+    # Attempt 6: Minimal Syslog (No Hostname) - Format: <pri>YYYY MMM DD HH:mm:ss message
+    if "_grokparsefailure_rfc5424" in [tags] and "_grokparsefailure_an_welf_log" in [tags] and "_grokparsefailure_an_welf_log_no_header" in [tags] and "_grokparsefailure_custom_nonstandard" in [tags] and "_grokparsefailure_bsd" in [tags] {
+      grok {
+        match => {
+          "message" => "^<%{POSINT:syslog_pri}>%{YEAR:syslog_year} %{MONTH:syslog_month} +%{MONTHDAY:syslog_day} %{TIME:syslog_time} %{GREEDYDATA:syslog_message}"
+        }
+        tag_on_failure => ["_grokparsefailure_minimal"]
+        add_tag => ["minimal_syslog_attempt"]
+      }
+      if !("_grokparsefailure_minimal" in [tags]) {
+        mutate {
+          add_field => { "syslog_timestamp_raw" => "%{syslog_year} %{syslog_month} %{syslog_day} %{syslog_time}" }
+          remove_field => ["syslog_year", "syslog_month", "syslog_day", "syslog_time"]
+        }
+        # Parse the raw timestamp and convert to ISO format
+        date {
+          match => ["syslog_timestamp_raw", "yyyy MMM dd HH:mm:ss", "yyyy MMM  d HH:mm:ss"]
+          target => "syslog_timestamp"
+          tag_on_failure => ["_dateparsefailure_minimal"]
+        }
+        # If date parsing failed, just remove the problematic field
+        if "_dateparsefailure_minimal" in [tags] {
+          mutate {
+            remove_field => ["syslog_timestamp", "syslog_timestamp_raw"]
+          }
+        } else {
+          mutate {
+            remove_field => ["syslog_timestamp_raw"]
+          }
+        }
+        # Extract hostname from host field since message doesn't contain it
+        mutate {
+          copy => { "[host]" => "device_ip" }
+        }
+      }
+    }
+
+    # Attempt 7: Cisco-Style Syslog - Format: <pri>sequence: timestamp: %FACILITY-SEVERITY-MNEMONIC: message
+    if "_grokparsefailure_rfc5424" in [tags] and "_grokparsefailure_an_welf_log" in [tags] and "_grokparsefailure_an_welf_log_no_header" in [tags] and "_grokparsefailure_custom_nonstandard" in [tags] and "_grokparsefailure_bsd" in [tags] and "_grokparsefailure_minimal" in [tags] {
+      grok {
+        match => {
+          "message" => "^<%{POSINT:syslog_pri}>(?:%{NUMBER:sequence}:)?\s*(?:%{DATA:syslog_timestamp}:)?\s*%%{DATA:cisco_facility}-%{INT:cisco_severity}-%{DATA:cisco_mnemonic}:\s*%{GREEDYDATA:syslog_message}"
+        }
+        tag_on_failure => ["_grokparsefailure_cisco"]
+        add_tag => ["cisco_syslog_attempt"]
+      }
+    }
+
+    # Attempt 8: Simple Priority + Message (Fallback for any format with just <pri>message)
+    if "_grokparsefailure_rfc5424" in [tags] and "_grokparsefailure_an_welf_log" in [tags] and "_grokparsefailure_an_welf_log_no_header" in [tags] and "_grokparsefailure_custom_nonstandard" in [tags] and "_grokparsefailure_bsd" in [tags] and "_grokparsefailure_minimal" in [tags] and "_grokparsefailure_cisco" in [tags] {
+      grok {
+        match => {
+          "message" => "^<%{POSINT:syslog_pri}>%{GREEDYDATA:syslog_message}"
+        }
+        tag_on_failure => ["_grokparsefailure_simple"]
+        add_tag => ["simple_syslog_fallback"]
+      }
+      if !("_grokparsefailure_simple" in [tags]) {
+        mutate {
+          copy => { "[host]" => "device_ip" }
+        }
+      }
+    }
+
+
     # Stage 3: Common Post-Parsing Processing
-    if !("_grokparsefailure_rfc5424" in [tags] and "_grokparsefailure_an_welf_log" in [tags] and "_grokparsefailure_an_welf_log_no_header" in [tags] and "_grokparsefailure_custom_nonstandard" in [tags] and "_grokparsefailure_bsd" in [tags]) {
+    # Only process if at least one parsing attempt succeeded
+    if !("_grokparsefailure_rfc5424" in [tags] and "_grokparsefailure_an_welf_log" in [tags] and "_grokparsefailure_an_welf_log_no_header" in [tags] and "_grokparsefailure_custom_nonstandard" in [tags] and "_grokparsefailure_bsd" in [tags] and "_grokparsefailure_minimal" in [tags] and "_grokparsefailure_cisco" in [tags] and "_grokparsefailure_simple" in [tags]) {
       # USERAGENT FILTER
       if [user_agent] {
         useragent {
@@ -239,6 +305,38 @@
         "
       }
 
+      # Sub-parser for AN_WELF_LOG content in syslog_message
+      if [syslog_message] =~ /^AN_WELF_LOG:/ {
+        grok {
+          match => {
+            "syslog_message" => "^AN_WELF_LOG:id=%{WORD:log_id} time=\"%{DATA:log_time}\" fw=%{IP:virtual_ip} pri=%{POSINT:welf_priority} proto=%{WORD:protocol} src=%{IP:src_ip} dstname=%{IP:destination_name} arg=%{DATA:arg} op=%{WORD:http_method} agent=\"%{DATA:user_agent}\" result=%{INT:http_status_code} sent=%{INT:bytes_sent} duration=%{NUMBER:duration} msg=\"%{GREEDYDATA:welf_message}\""
+          }
+          tag_on_failure => ["_grokparsefailure_welf_sub"]
+          add_tag => ["an_welf_log_subparsed"]
+        }
+        if !("_grokparsefailure_welf_sub" in [tags]) {
+          # Parse the welf_message for cache and peer info
+          grok {
+            match => {
+              "welf_message" => "^cache:%{WORD:cache_status}\s+peer:%{WORD:peer_type}/%{IP:peer_ip}$"
+            }
+            tag_on_failure => ["_grokparsefailure_welf_msg"]
+          }
+          mutate {
+            remove_field => ["welf_message"]
+          }
+          # Convert numeric fields
+          mutate {
+            convert => {
+              "http_status_code" => "integer"
+              "bytes_sent" => "integer"
+              "duration" => "float"
+              "welf_priority" => "integer"
+            }
+          }
+        }
+      }
+
       # IP RENAMING LOGIC
       mutate {
         copy => { "[host][ip]" => "device_ip" }
@@ -270,9 +368,9 @@
         jdbc_streaming {
           jdbc_driver_library => "/usr/share/logstash/drivers/postgresql.jar"
           jdbc_driver_class => "org.postgresql.Driver"
-          jdbc_connection_string => "jdbc:postgresql://${DB_HOST}/amp_ts"
-          jdbc_user => "amp_ts_user"
-          jdbc_password => "Array@123$"
+          jdbc_connection_string => "jdbc:postgresql://${DB_HOST}/cm"
+          jdbc_user => "${POSTGRES_USER}"
+          jdbc_password => "${POSTGRES_PASSWORD}"
           statement => "SELECT name, type, device_group FROM device WHERE ip_address = :device_ip"
           parameters => { "device_ip" => "device_ip" }
           target => "device_info"
@@ -296,11 +394,32 @@
         }
       }
 
+      # Clean up failure tags and add success tag based on which parser succeeded
       if !("_grokparsefailure_rfc5424" in [tags]) {
         mutate {
-          remove_tag => ["_grokparsefailure_an_welf_log", "_grokparsefailure_an_welf_log_no_header", "_grokparsefailure_custom_nonstandard", "_grokparsefailure_bsd"]
+          remove_tag => ["_grokparsefailure_an_welf_log", "_grokparsefailure_an_welf_log_no_header", "_grokparsefailure_custom_nonstandard", "_grokparsefailure_bsd", "_grokparsefailure_minimal", "_grokparsefailure_cisco", "_grokparsefailure_simple"]
           add_tag => ["syslog_parsed", "rfc5424"]
         }
+      } else if !("_grokparsefailure_minimal" in [tags]) {
+        mutate {
+          remove_tag => ["_grokparsefailure_rfc5424", "_grokparsefailure_an_welf_log", "_grokparsefailure_an_welf_log_no_header", "_grokparsefailure_custom_nonstandard", "_grokparsefailure_bsd", "_grokparsefailure_cisco", "_grokparsefailure_simple"]
+          add_tag => ["syslog_parsed", "minimal_format"]
+        }
+      } else if !("_grokparsefailure_cisco" in [tags]) {
+        mutate {
+          remove_tag => ["_grokparsefailure_rfc5424", "_grokparsefailure_an_welf_log", "_grokparsefailure_an_welf_log_no_header", "_grokparsefailure_custom_nonstandard", "_grokparsefailure_bsd", "_grokparsefailure_minimal", "_grokparsefailure_simple"]
+          add_tag => ["syslog_parsed", "cisco_format"]
+        }
+      } else if !("_grokparsefailure_simple" in [tags]) {
+        mutate {
+          remove_tag => ["_grokparsefailure_rfc5424", "_grokparsefailure_an_welf_log", "_grokparsefailure_an_welf_log_no_header", "_grokparsefailure_custom_nonstandard", "_grokparsefailure_bsd", "_grokparsefailure_minimal", "_grokparsefailure_cisco"]
+          add_tag => ["syslog_parsed", "simple_format"]
+        }
+      } else if !("_grokparsefailure_bsd" in [tags]) {
+        mutate {
+          remove_tag => ["_grokparsefailure_rfc5424", "_grokparsefailure_an_welf_log", "_grokparsefailure_an_welf_log_no_header", "_grokparsefailure_custom_nonstandard", "_grokparsefailure_minimal", "_grokparsefailure_cisco", "_grokparsefailure_simple"]
+          add_tag => ["syslog_parsed", "bsd_format"]
+        }
       }
     }
   }
Index: /branches/amp_4_0/platform/tools/container/services/postgres/initdb.d/02_telegraf_snmp.sql
===================================================================
--- /branches/amp_4_0/platform/tools/container/services/postgres/initdb.d/02_telegraf_snmp.sql	(revision 2915)
+++ /branches/amp_4_0/platform/tools/container/services/postgres/initdb.d/02_telegraf_snmp.sql	(working copy)
@@ -738,7 +738,32 @@
 SELECT add_retention_policy('asf_https_service',            INTERVAL '180 days');
 COMMIT;
 
--- Grant permissions to amp_ts_user so Grafana can see the tables
+-- Grant permissions to amp_ts_user so Telegraf can write and Grafana can read
 GRANT USAGE ON SCHEMA public TO amp_ts_user;
-GRANT SELECT ON ALL TABLES IN SCHEMA public TO amp_ts_user;
-ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON TABLES TO amp_ts_user;
+GRANT SELECT, INSERT, UPDATE ON ALL TABLES IN SCHEMA public TO amp_ts_user;
+ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT, INSERT, UPDATE ON TABLES TO amp_ts_user;
+
+-- Grant table ownership to amp_ts_user so Telegraf can ALTER tables (add columns)
+-- APV tables
+ALTER TABLE IF EXISTS an_device_metrics OWNER TO amp_ts_user;
+ALTER TABLE IF EXISTS an_device_performance OWNER TO amp_ts_user;
+ALTER TABLE IF EXISTS an_device_storage OWNER TO amp_ts_user;
+ALTER TABLE IF EXISTS apv_virtual_stats OWNER TO amp_ts_user;
+ALTER TABLE IF EXISTS apv_real_stats OWNER TO amp_ts_user;
+ALTER TABLE IF EXISTS apv_llb_stats OWNER TO amp_ts_user;
+-- AG tables
+ALTER TABLE IF EXISTS ag_device_metrics OWNER TO amp_ts_user;
+ALTER TABLE IF EXISTS ag_virtual_site_stats OWNER TO amp_ts_user;
+ALTER TABLE IF EXISTS ag_vpn_stats OWNER TO amp_ts_user;
+ALTER TABLE IF EXISTS ag_web_stats OWNER TO amp_ts_user;
+-- ASF tables
+ALTER TABLE IF EXISTS asf_device_metrics OWNER TO amp_ts_user;
+ALTER TABLE IF EXISTS asf_device_storage OWNER TO amp_ts_user;
+ALTER TABLE IF EXISTS asf_ssl_statistics OWNER TO amp_ts_user;
+ALTER TABLE IF EXISTS asf_ssl_host_statistics OWNER TO amp_ts_user;
+ALTER TABLE IF EXISTS asf_vip_group_statistics OWNER TO amp_ts_user;
+ALTER TABLE IF EXISTS asf_vip_statistics OWNER TO amp_ts_user;
+ALTER TABLE IF EXISTS asf_syslog_history OWNER TO amp_ts_user;
+ALTER TABLE IF EXISTS asf_performance_statistics OWNER TO amp_ts_user;
+ALTER TABLE IF EXISTS asf_http_service OWNER TO amp_ts_user;
+ALTER TABLE IF EXISTS asf_https_service OWNER TO amp_ts_user;
Index: /branches/amp_4_0/platform/tools/container/stack.yml
===================================================================
--- /branches/amp_4_0/platform/tools/container/stack.yml	(revision 2915)
+++ /branches/amp_4_0/platform/tools/container/stack.yml	(working copy)
@@ -446,16 +446,24 @@
   logstash:
     image: ${REGISTRY:-127.0.0.1:5000}/amp/logstash:latest
     deploy:
-      replicas: 1
+      mode: global  # Run on every node for reliability
     ports:
-      - "514:5514/tcp"
-      - "514:5514/udp"
+      - target: 5514
+        published: 514
+        protocol: tcp
+        mode: host
+      - target: 5514
+        published: 514
+        protocol: udp
+        mode: host
     environment:
       OPENSEARCH_URL: https://${AMP_DOMAIN_OR_IP:-localhost}:9200 # Clean URL for Logstash
       POSTGRES_PASSWORD_FILE: /run/secrets/pg_password
       POSTGRES_USER: postgres
       POSTGRES_DB: cm
       DB_HOST: ${AMP_DOMAIN_OR_IP} # For JDBC Filter
+      AMP_DB_USER: ${AMP_DB_USER:-amp_ts_user}
+      AMP_DB_PASSWORD: ${AMP_DB_PASSWORD:-Array@123$}
     command: >
       bash -c "export OPENSEARCH_INITIAL_ADMIN_PASSWORD=\$$(cat /run/secrets/opensearch_initial_admin_password) && /usr/share/logstash/bin/logstash"
 
Index: /branches/amp_4_0/platform/tools/container/stack.yml.template
===================================================================
--- /branches/amp_4_0/platform/tools/container/stack.yml.template	(revision 2915)
+++ /branches/amp_4_0/platform/tools/container/stack.yml.template	(working copy)
@@ -13,12 +13,6 @@
     file: services/nginx/conf.d/app.conf
   telegraf_main_conf:
     file: services/telegraf/telegraf.conf
-  telegraf_ag_conf:
-    file: services/telegraf/telegraf.d/ag.toml
-  telegraf_apv_conf:
-    file: services/telegraf/telegraf.d/apv.toml
-  telegraf_asf_conf:
-    file: services/telegraf/telegraf.d/asf.toml
   logstash_pipeline_conf:
     file: services/logstash/pipeline/syslog.conf
   logstash_config_yml:
@@ -368,7 +362,7 @@
       - label=disable
     deploy:
       mode: global # Run on EVERY node to monitor docker
-    entrypoint: ["telegraf"]
+    entrypoint: ["telegraf", "--config", "/etc/telegraf/telegraf.conf", "--config", "/etc/telegraf/telegraf.d/apv.toml", "--config", "/etc/telegraf/telegraf.d/ag.toml", "--config", "/etc/telegraf/telegraf.d/asf.toml"]
     environment:
       PG_PASSWORD_FILE: /run/secrets/pg_password
     secrets:
@@ -376,18 +370,13 @@
     configs:
       - source: telegraf_main_conf
         target: /etc/telegraf/telegraf.conf
-      - source: telegraf_ag_conf
-        target: /etc/telegraf/telegraf.d/ag.toml
-      - source: telegraf_apv_conf
-        target: /etc/telegraf/telegraf.d/apv.toml
-      - source: telegraf_asf_conf
-        target: /etc/telegraf/telegraf.d/asf.toml
     volumes:
       - /var/run/docker.sock:/var/run/docker.sock
       - /dev:/dev:ro
       - /proc:/rootfs/proc:ro
       - /sys:/rootfs/sys:ro
       - /etc:/rootfs/etc:ro
+      - /opt/amp/telegraf.d:/etc/telegraf/telegraf.d:rw  # Host-mounted for amp-api modifications
     networks:
       - hostnet
 
@@ -396,16 +385,21 @@
     deploy:
       mode: global  # Run on every node to ensure syslog collection from all hosts
     ports:
-      - "514:5514/tcp"
-      - "514:5514/udp"
+      - target: 5514
+        published: 514
+        protocol: tcp
+        mode: host
+      - target: 5514
+        published: 514
+        protocol: udp
+        mode: host
     environment:
       OPENSEARCH_URL: https://${AMP_DOMAIN_OR_IP:-localhost}:9200 # Clean URL for Logstash
-      POSTGRES_PASSWORD_FILE: /run/secrets/pg_password
       POSTGRES_USER: postgres
       POSTGRES_DB: cm
       DB_HOST: ${AMP_DB_JDBC_HOSTS} # For JDBC Filter (Multi-host string)
     command: >
-      bash -c "export OPENSEARCH_INITIAL_ADMIN_PASSWORD=\$$(cat /run/secrets/opensearch_initial_admin_password) && /usr/share/logstash/bin/logstash"
+      bash -c "export OPENSEARCH_INITIAL_ADMIN_PASSWORD=$$(cat /run/secrets/opensearch_initial_admin_password) && export POSTGRES_PASSWORD=$$(cat /run/secrets/pg_password) && /usr/share/logstash/bin/logstash"
 
     secrets:
       - opensearch_initial_admin_password
@@ -450,6 +444,7 @@
       - DB_NAME=${POSTGRES_DB_CM:-cm}
       - DB_USER=${POSTGRES_USER:-postgres}
       - DB_PASSWORD=${POSTGRES_PASSWORD:-Arr@y2050}
+      - OPENSEARCH_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD:-Arr@y2050}
       - DJANGO_SETTINGS_MODULE=djproject.settings
       - PYTHONPATH=/ca/webui/htdocs/new/src
     volumes:
@@ -482,8 +477,12 @@
         source: /etc/group
         target: /etc/group
         read_only: true
+      # Mount Docker socket for service health checks
+      - /var/run/docker.sock:/var/run/docker.sock:ro
     extra_hosts:
       - "host.docker.internal:host-gateway"
+    secrets:
+      - opensearch_jwt_secret
     networks:
       - hostnet
     ports:
Index: /branches/amp_4_0/src/webui/webui/htdocs/new/src/hive/an_opensearch.py
===================================================================
--- /branches/amp_4_0/src/webui/webui/htdocs/new/src/hive/an_opensearch.py	(revision 2915)
+++ /branches/amp_4_0/src/webui/webui/htdocs/new/src/hive/an_opensearch.py	(working copy)
@@ -5,13 +5,15 @@
 import datetime
 import base64
 import yaml
+import os
 from hive.utils import andebug
 
 CONFIG_YML_PATH = '/etc/opensearch/opensearch-security/config.yml'
 
-HOST = 'localhost'
+HOST = '127.0.0.1'
 PORT = 9200
-AUTH = ('admin', 'Arr@y2050')
+OPENSEARCH_PASSWORD = os.environ.get('OPENSEARCH_PASSWORD', 'admin')
+AUTH = ('admin', OPENSEARCH_PASSWORD)
 
 
 def opensearch_proxy(request):
@@ -28,13 +30,16 @@
 
 
 def get_opensearch_sso_token(request):
-    # Load the signing key from config.yml
-    with open(CONFIG_YML_PATH, 'r') as file:
-        config = yaml.safe_load(file)
+    # Load the signing key from Docker secret
+    JWT_SECRET_PATH = '/run/secrets/opensearch_jwt_secret'
+    
+    try:
+        with open(JWT_SECRET_PATH, 'r') as file:
+            secret_key = file.read().strip().encode('utf-8')
+    except FileNotFoundError:
+        # Fallback to environment variable if secret file not available
+        secret_key = os.environ.get('OPENSEARCH_JWT_SECRET', 'supersecretjwtkey').encode('utf-8')
 
-    base64_key = config['config']['dynamic']['authc']['jwt_auth_domain']['http_authenticator']['config']['signing_key']
-    secret_key = base64.b64decode(base64_key)
-
     # Use timezone-aware UTC datetime objects
     now = datetime.datetime.now(datetime.timezone.utc)
 
Index: /branches/amp_4_0/src/webui/webui/htdocs/new/src/hive/controller/utils.py
===================================================================
--- /branches/amp_4_0/src/webui/webui/htdocs/new/src/hive/controller/utils.py	(revision 2915)
+++ /branches/amp_4_0/src/webui/webui/htdocs/new/src/hive/controller/utils.py	(working copy)
@@ -1,6 +1,7 @@
 import json
 
 from django.http import HttpResponse, JsonResponse
+from django.views.decorators.csrf import csrf_exempt
 from cm.lib.libbasic_operation import oper_log
 from hive.custom_exceptions import generic_exception as ge
 from hive.services.utils import get_observability_services_status, perform_observability_services_restart
@@ -25,6 +26,7 @@
         raise ge.GenericError(500, message)
 
 
+@csrf_exempt
 def handle_observability_restart_req(request, path=None):
     try:
         if request.method == 'POST':
Index: /branches/amp_4_0/src/webui/webui/htdocs/new/src/hive/services/utils.py
===================================================================
--- /branches/amp_4_0/src/webui/webui/htdocs/new/src/hive/services/utils.py	(revision 2915)
+++ /branches/amp_4_0/src/webui/webui/htdocs/new/src/hive/services/utils.py	(working copy)
@@ -10,6 +10,150 @@
 from cm.lib.libbasic_operation import oper_log
 
 
+# Service health check configuration
+# amp-core runs on host network mode, so services are accessible via localhost
+# OpenSearch password is read from environment variable
+OPENSEARCH_PASSWORD = os.environ.get('OPENSEARCH_PASSWORD', 'admin')
+
+SERVICE_HEALTH_CONFIG = {
+    'opensearch': {
+        'host': '127.0.0.1',
+        'port': 9200,
+        'check_type': 'https',
+        'path': '/_cluster/health',
+        'use_ssl': True,
+        'auth': {'user': 'admin', 'pass': OPENSEARCH_PASSWORD}
+    },
+    'opensearch-dashboards': {
+        'host': '127.0.0.1',
+        'port': 5601,
+        'check_type': 'https',
+        'path': '/api/status',
+        'use_ssl': True
+    },
+    'logstash': {
+        'host': '127.0.0.1',
+        'port': 514,  # Syslog port (9600 monitoring port not published)
+        'check_type': 'tcp'
+    },
+    'telegraf': {
+        'check_type': 'docker',
+        'docker_service': 'amp_telegraf'
+    },
+}
+
+# Mapping of service names to their Docker Swarm service names (for restart)
+DOCKER_SERVICE_MAP = {
+    'opensearch': 'amp_opensearch',
+    'opensearch-dashboards': 'amp_opensearch-dashboards',
+    'logstash': 'amp_logstash',
+    'telegraf': 'amp_telegraf',
+}
+
+
+def check_service_health(service_name):
+    """Check if a service is healthy by connecting to it."""
+    import socket
+    try:
+        config = SERVICE_HEALTH_CONFIG.get(service_name)
+        if not config:
+            return False
+
+        check_type = config.get('check_type', 'tcp')
+
+        if check_type == 'docker':
+            # Docker service check - use Docker socket API directly
+            try:
+                import http.client
+                import urllib.parse
+
+                docker_service = config.get('docker_service', 'amp_' + service_name)
+                
+                # Connect to Docker socket
+                class UnixHTTPConnection(http.client.HTTPConnection):
+                    def __init__(self, socket_path, timeout=5):
+                        super().__init__('localhost', timeout=timeout)
+                        self.socket_path = socket_path
+
+                    def connect(self):
+                        import socket as sock_module
+                        self.sock = sock_module.socket(sock_module.AF_UNIX, sock_module.SOCK_STREAM)
+                        self.sock.settimeout(self.timeout)
+                        self.sock.connect(self.socket_path)
+
+                conn = UnixHTTPConnection('/var/run/docker.sock', timeout=5)
+                conn.request('GET', '/v1.44/services?filters=' + urllib.parse.quote('{"name":["' + docker_service + '"]}'))
+                response = conn.getresponse()
+                
+                if response.status == 200:
+                    import json as json_lib
+                    data = json_lib.loads(response.read().decode('utf-8'))
+                    if data and len(data) > 0:
+                        service_data = data[0]
+                        # Check replicas from service spec and status
+                        mode = service_data.get('Spec', {}).get('Mode', {})
+                        if 'Replicated' in mode:
+                            desired = mode['Replicated'].get('Replicas', 0)
+                        else:
+                            # Global mode - count running tasks
+                            desired = 1  # At least 1 expected
+                        
+                        # For now, service exists = running (detailed task check would be more complex)
+                        return True
+                conn.close()
+                return False
+            except Exception as e:
+                oper_log('warning', 'system', 'Docker socket check failed for {}: {}'.format(service_name, str(e)))
+                return False
+
+        host = config['host']
+        port = config['port']
+
+        if check_type == 'http' or check_type == 'https':
+            # HTTP/HTTPS health check
+            try:
+                import urllib.request
+                import ssl
+
+                protocol = 'https' if config.get('use_ssl', False) else 'http'
+                url = "{}://{}:{}{}".format(protocol, host, port, config.get('path', '/'))
+
+                # Create request with optional basic auth
+                request = urllib.request.Request(url)
+                if config.get('auth'):
+                    import base64
+                    credentials = base64.b64encode(
+                        "{}:{}".format(config['auth']['user'], config['auth']['pass']).encode()
+                    ).decode()
+                    request.add_header('Authorization', 'Basic {}'.format(credentials))
+
+                # Create SSL context that doesn't verify certificates (for self-signed certs)
+                ssl_context = None
+                if protocol == 'https':
+                    ssl_context = ssl.create_default_context()
+                    ssl_context.check_hostname = False
+                    ssl_context.verify_mode = ssl.CERT_NONE
+
+                response = urllib.request.urlopen(request, timeout=5, context=ssl_context)
+                # Consider 2xx as "running"
+                return response.status < 300
+            except urllib.error.HTTPError as e:
+                # 401/403 means service is running but auth failed - still counts as running
+                return e.code < 500
+            except Exception:
+                return False
+        else:
+            # TCP socket check
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            sock.settimeout(3)
+            result = sock.connect_ex((host, port))
+            sock.close()
+            return result == 0
+    except Exception as e:
+        oper_log('warning', 'system', 'Error checking service {}: {}'.format(service_name, str(e)))
+        return False
+
+
 def get_observability_services_status():
     try:
         services_list = [
@@ -20,13 +164,12 @@
         ]
         result = []
         for service in services_list:
-            process = len(os.popen('ps aux | grep "' + service[
-                'value'] + '" | grep -v grep | grep -v tail | grep -v keepH5ssAlive').readlines())
-            result.append({'value': process >= 1, 'label': service['label'], 'service': service['value']})
+            is_running = check_service_health(service['value'])
+            result.append({'value': is_running, 'label': service['label'], 'service': service['value']})
         return result
     except Exception as e:
         oper_log('error', 'system', "Exception while fetching observability services status.")
-        message = str(e.message).replace("'", "")
+        message = str(e).replace("'", "")
         message = 'Error while fetching observability services status, details: {}'.format(message)
         return HttpResponse(json.dumps({
             "message": "while fetching observability services status",
@@ -35,35 +178,90 @@
 
 
 def perform_observability_services_restart(service_name):
-    command = ["/bin/systemctl", "restart", service_name]
-    try:
-        process = subprocess.Popen(
-            command,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE
-        )
-        stdout, stderr = process.communicate()
-        exit_code = process.returncode
+    # Map the service name to Docker Swarm service name
+    docker_service_name = DOCKER_SERVICE_MAP.get(service_name)
+    if not docker_service_name:
+        return {
+            "status": False,
+            "message": "Unknown service: {}".format(service_name)
+        }
 
-        if exit_code == 0:
+    # Use direct HTTP requests to Docker socket (no external module needed)
+    import socket
+    import http.client
+    
+    class DockerSocketConnection(http.client.HTTPConnection):
+        """HTTP connection via Unix socket"""
+        def __init__(self, socket_path='/var/run/docker.sock'):
+            super().__init__('localhost')
+            self.socket_path = socket_path
+            
+        def connect(self):
+            self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+            self.sock.connect(self.socket_path)
+    
+    try:
+        conn = DockerSocketConnection()
+        
+        # Get service details first
+        conn.request('GET', '/services/{}'.format(docker_service_name))
+        response = conn.getresponse()
+        
+        if response.status == 404:
+            conn.close()
             return {
+                "status": False,
+                "message": "Service '{}' not found.".format(docker_service_name)
+            }
+        
+        if response.status != 200:
+            error_body = response.read().decode('utf-8')
+            conn.close()
+            return {
+                "status": False,
+                "message": "Failed to get service info: {}".format(error_body)
+            }
+        
+        service_data = json.loads(response.read().decode('utf-8'))
+        version = service_data.get('Version', {}).get('Index', 0)
+        spec = service_data.get('Spec', {})
+        
+        # Increment ForceUpdate to trigger restart
+        task_template = spec.get('TaskTemplate', {})
+        force_update = task_template.get('ForceUpdate', 0) + 1
+        task_template['ForceUpdate'] = force_update
+        spec['TaskTemplate'] = task_template
+        
+        # Update the service
+        update_body = json.dumps(spec)
+        headers = {'Content-Type': 'application/json'}
+        conn.request('POST', '/services/{}/update?version={}'.format(docker_service_name, version), 
+                     body=update_body, headers=headers)
+        update_response = conn.getresponse()
+        
+        if update_response.status == 200:
+            conn.close()
+            return {
                 "status": True,
-                "message": "The service has been restarted successfully."
+                "message": "The service '{}' has been restarted successfully.".format(service_name)
             }
         else:
+            error_body = update_response.read().decode('utf-8')
+            conn.close()
             return {
                 "status": False,
-                "message": "Failed to restart the service. Exit code: {}".format(exit_code)
+                "message": "Failed to restart service: {}".format(error_body)
             }
 
     except Exception as e:
         oper_log('error', 'system', "Exception while restarting the observability services.")
         message = str(e).replace("'", "")
         message = 'Exception while restarting the observability services, details: {}'.format(message)
-        return HttpResponse(json.dumps({
+        return {
+            "status": False,
             "message": "Exception while restarting the observability services",
             "details": "{}".format(message)
-        }), content_type="application/json", status=500)
+        }
 
 
 def construct_json_response(metrics):
