Skip to content

Commit

Permalink
feat(mis-web): 管理系统新增集群监控功能 (#1044)
Browse files Browse the repository at this point in the history
管理系统新增集群监控功能
1. 新增资源信息页面,通过嵌入 grafana 查看集群信息
支持直接嵌入 grafana 或通过代理的方式将请求转发到 grafana

![image](https://github.com/PKUHPC/SCOW/assets/140392039/70dbd318-5dc8-4f04-a9d6-d6fc90b5b89c)

2. 新增告警日志页面,通过 grafana api 获取告警日志信息

![image](https://github.com/PKUHPC/SCOW/assets/140392039/25228d35-82a4-4731-993a-63f9edb828f0)
  • Loading branch information
Miracle575 authored Jan 2, 2024
1 parent d1c2e74 commit abb7e84
Show file tree
Hide file tree
Showing 27 changed files with 2,320 additions and 4 deletions.
7 changes: 7 additions & 0 deletions .changeset/young-pants-occur.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
"@scow/mis-web": minor
"@scow/config": minor
"@scow/cli": minor
---

管理系统新增集群监控功能
16 changes: 16 additions & 0 deletions apps/cli/assets/config/mis.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,22 @@ createUser:
# 外置页面URL
# url: http://your-create-user-page.com

# # 集群监控配置
# clusterMonitor:
# # 协议 + ip/域名 + 端口,默认为 http://127.0.0.1:4000
# grafanaUrl: "http://172.16.20.125:4000"
# # 资源状态相关配置
# resourceStatus:
# # 是否开启资源状态,默认不开启
# enabled: false
# # 是否使用代理的方式
# proxy: false
# # 默认面板 id,默认设置为 shZOtO4Sk
# dashboardUid: "shZOtO4Sk"
# # 告警日志配置
# alarmLogs:
# # 是否开启告警日志,默认不开启
# enabled: false

# # 新增导航链接相关配置
# navLinks:
Expand Down
12 changes: 12 additions & 0 deletions apps/mis-web/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,18 @@ const buildRuntimeConfig = async (phase, basePath) => {

SYSTEM_LANGUAGE_CONFIG: systemLanguageConfig,

CLUSTER_MONITOR: {
grafanaUrl: misConfig.clusterMonitor?.grafanaUrl,
resourceStatus: {
enabled: misConfig.clusterMonitor?.resourceStatus?.enabled,
proxy: misConfig.clusterMonitor?.resourceStatus?.proxy,
dashboardUid: misConfig.clusterMonitor?.resourceStatus?.dashboardUid,
},
alarmLogs: {
enabled: misConfig.clusterMonitor?.alarmLogs?.enabled,
},
},

UI_EXTENSION: misConfig.uiExtension,

};
Expand Down
1 change: 1 addition & 0 deletions apps/mis-web/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
"styled-components": "6.1.1",
"tslib": "2.6.2",
"typescript": "5.2.2",
"http-proxy": "1.18.1",
"@ant-design/cssinjs": "1.17.2",
"react-typed-i18n": "2.3.0",
"csv-stringify": "6.4.4"
Expand Down
17 changes: 17 additions & 0 deletions apps/mis-web/src/apis/api.mock.ts
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,23 @@ export const mockApi: MockApi<typeof api> = {
operationTime: "2020-04-23T23:49:50.000Z",
operationEvent: { $case: "login", login: {} },
}], totalCount: 1 }),

getAlarmDbId: async () => ({
id: 13,
uid: "kfcfkxq4",
name: "alertdb",
type: "mysql",
}),
getAlarmLogs: async () => ({ results: [{
id: 13,
status: "resolved",
severity: "Warning",
fingerprint: "38cc18aad8e553f6",
description: "hpc01 partition: normal - CPU usage above 80% (current value: 1)",
startsAt: 1702886670000,
endsAt: 1702889670000,
}]}),
getAlarmLogsCount: async () => ({ totalCount: 1 }),
};

export const MOCK_USER_INFO = {
Expand Down
8 changes: 7 additions & 1 deletion apps/mis-web/src/apis/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ import type { GetTopChargeAccountSchema } from "src/pages/api/admin/getTopCharge
import type { GetTopPayAccountSchema } from "src/pages/api/admin/getTopPayAccount";
import type { GetTopSubmitJobUserSchema } from "src/pages/api/admin/getTopSubmitJobUser";
import type { ImportUsersSchema } from "src/pages/api/admin/importUsers";
import type { GetAlarmDbIdSchema } from "src/pages/api/admin/monitor/getAlarmDbId";
import type { GetAlarmLogsSchema } from "src/pages/api/admin/monitor/getAlarmLogs";
import type { GetAlarmLogsCountSchema } from "src/pages/api/admin/monitor/getAlarmLogsCount";
import type { QueryStorageQuotaSchema } from "src/pages/api/admin/queryStorageQuota";
import type { SetPlatformRoleSchema } from "src/pages/api/admin/setPlatformRole";
import type { SetTenantRoleSchema } from "src/pages/api/admin/setTenantRole";
Expand Down Expand Up @@ -101,7 +104,6 @@ import type { UnblockUserInAccountSchema } from "src/pages/api/users/unblockInAc
import type { UnsetAdminSchema } from "src/pages/api/users/unsetAdmin"; ;

export const api = {
cancelJob: apiClient.fromTypeboxRoute<typeof CancelJobSchema>("DELETE", "/api/job/cancelJob"),
changeJobPrice: apiClient.fromTypeboxRoute<typeof ChangeJobPriceSchema>("PATCH", "/api/admin/changeJobPrice"),
changePasswordAsPlatformAdmin: apiClient.fromTypeboxRoute<typeof ChangePasswordAsPlatformAdminSchema>("PATCH", "/api/admin/changePassword"),
changeStorageQuota: apiClient.fromTypeboxRoute<typeof ChangeStorageQuotaSchema>("PUT", "/api/admin/changeStorage"),
Expand All @@ -117,6 +119,9 @@ export const api = {
getPlatformUsersCounts: apiClient.fromTypeboxRoute<typeof GetPlatformUsersCountsSchema>("GET", "/api/admin/getPlatformUsersCounts"),
getTenantUsers: apiClient.fromTypeboxRoute<typeof GetTenantUsersSchema>("GET", "/api/admin/getTenantUsers"),
importUsers: apiClient.fromTypeboxRoute<typeof ImportUsersSchema>("POST", "/api/admin/importUsers"),
getAlarmDbId: apiClient.fromTypeboxRoute<typeof GetAlarmDbIdSchema>("GET", "/api/admin/monitor/getAlarmDbId"),
getAlarmLogs: apiClient.fromTypeboxRoute<typeof GetAlarmLogsSchema>("GET", "/api/admin/monitor/getAlarmLogs"),
getAlarmLogsCount: apiClient.fromTypeboxRoute<typeof GetAlarmLogsCountSchema>("GET", "/api/admin/monitor/getAlarmLogsCount"),
queryStorageQuota: apiClient.fromTypeboxRoute<typeof QueryStorageQuotaSchema>("GET", "/api/admin/queryStorageQuota"),
setPlatformRole: apiClient.fromTypeboxRoute<typeof SetPlatformRoleSchema>("PUT", "/api/admin/setPlatformRole"),
setTenantRole: apiClient.fromTypeboxRoute<typeof SetTenantRoleSchema>("PUT", "/api/admin/setTenantRole"),
Expand Down Expand Up @@ -152,6 +157,7 @@ export const api = {
unsetInitAdmin: apiClient.fromTypeboxRoute<typeof UnsetInitAdminSchema>("DELETE", "/api/init/unsetInitAdmin"),
userExists: apiClient.fromTypeboxRoute<typeof UserExistsSchema>("POST", "/api/init/userExists"),
addBillingItem: apiClient.fromTypeboxRoute<typeof AddBillingItemSchema>("POST", "/api/job/addBillingItem"),
cancelJob: apiClient.fromTypeboxRoute<typeof CancelJobSchema>("DELETE", "/api/job/cancelJob"),
changeJobTimeLimit: apiClient.fromTypeboxRoute<typeof ChangeJobTimeLimitSchema>("PATCH", "/api/job/changeJobTimeLimit"),
getAvailableBillingTable: apiClient.fromTypeboxRoute<typeof GetAvailableBillingTableSchema>("GET", "/api/job/getAvailableBillingTable"),
getBillingItems: apiClient.fromTypeboxRoute<typeof GetBillingItemsSchema>("GET", "/api/job/getBillingItems"),
Expand Down
30 changes: 30 additions & 0 deletions apps/mis-web/src/i18n/en.ts
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,9 @@ export default {
statusSynchronization: "Block Status Synchronization",
jobSynchronization: "Job Information Synchronization",
accountList: "Account List",
clusterMonitor: "Cluster Monitor",
resourceStatus: "Resrouce Status",
alarmLog: "Alarm Log",
},
tenantManagement: {
firstNav: "Tenant Management",
Expand Down Expand Up @@ -241,6 +244,17 @@ export default {
},
},
admin: {
allAlarmLogsTable: {
firing: "Triggering",
resolved: "Resolved",
serialNumber: "Serial Number",
fingerPrint: "Finger Print",
status: "Status",
alarmLevel: "Alarm Level",
description: "Description",
firingTime: "Trigger Time",
resolvedTime: "Resolved Time",
},
allTenantsTable: {
tenantName: "Tenant Name",
accountCount: "Account Count",
Expand Down Expand Up @@ -834,6 +848,22 @@ export default {
unableReinitialize: "The system has already been initialized and cannot be reinitialized!",
},
admin: {
monitor: {
alarmLog: {
alarmLog: "Alarm Log",
firingTime: "Trigger Time",
firingTimePrompt: "Trigger time of the alarm",
status: "Status",
selectAll: "Select All",
firing: "Triggering",
resolved: "Resolved",
search: "Search",
refresh: "Refresh",
},
resourceStatus: {
resourceStatus: "Resource Status",
},
},
operationLogs: {
platformOperationLog: "Platform Operation Log",
},
Expand Down
30 changes: 30 additions & 0 deletions apps/mis-web/src/i18n/zh_cn.ts
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,9 @@ export default {
statusSynchronization: "封锁状态同步",
jobSynchronization: "作业信息同步",
accountList: "账户列表",
clusterMonitor: "集群监控",
resourceStatus: "资源状态",
alarmLog: "告警日志",
},
tenantManagement: {
firstNav: "租户管理",
Expand Down Expand Up @@ -241,6 +244,17 @@ export default {
},
},
admin:{
allAlarmLogsTable: {
firing: "触发中",
resolved: "已解决",
serialNumber: "序号",
fingerPrint: "指纹",
status: "状态",
alarmLevel: "告警级别",
description: "描述",
firingTime: "触发时间",
resolvedTime: "处理时间",
},
allTenantsTable:{
tenantName:"租户名称",
accountCount:"账户数量",
Expand Down Expand Up @@ -834,6 +848,22 @@ export default {
unableReinitialize: "系统已经初始化完成,无法重新初始化!",
},
admin: {
monitor: {
alarmLog: {
alarmLog: "告警日志",
firingTime: "触发时间",
firingTimePrompt: "告警的触发时间",
status: "状态",
selectAll: "全选",
firing: "触发中",
resolved: "已解决",
search: "搜索",
refresh: "刷新",
},
resourceStatus: {
resourceStatus: "资源状态",
},
},
operationLogs: {
platformOperationLog: "平台操作日志",
},
Expand Down
25 changes: 22 additions & 3 deletions apps/mis-web/src/layouts/routes.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
*/

import {
AccountBookOutlined, BookOutlined, CloudServerOutlined, DashboardOutlined,
InfoOutlined, LineChartOutlined,
LinkOutlined, LockOutlined, MoneyCollectOutlined, PartitionOutlined,
AccountBookOutlined, AlertOutlined, BookOutlined, CloudServerOutlined,
DashboardOutlined,
InfoOutlined, LineChartOutlined, LinkOutlined, LockOutlined, MoneyCollectOutlined, MonitorOutlined, PartitionOutlined,
PlusOutlined, PlusSquareOutlined, ProfileOutlined,
StarOutlined, ToolOutlined, UserAddOutlined,
UserOutlined } from "@ant-design/icons";
Expand Down Expand Up @@ -131,6 +131,25 @@ export const platformAdminRoutes: (platformRoles: PlatformRole[], t: TransType)
},
],
},
...(platformRoles.includes(PlatformRole.PLATFORM_ADMIN) &&
(publicConfig.CLUSTER_MONITOR.resourceStatus.enabled || publicConfig.CLUSTER_MONITOR.alarmLogs.enabled) ? [{
Icon: MonitorOutlined,
text: t(pPlatform("clusterMonitor")),
path: "/admin/monitor",
clickable: false,
children: [
...(publicConfig.CLUSTER_MONITOR.resourceStatus.enabled) ? [{
Icon: LineChartOutlined,
text: t(pPlatform("resourceStatus")),
path: "/admin/monitor/resourceStatus",
}] : [],
...(publicConfig.CLUSTER_MONITOR.alarmLogs.enabled) ? [{
Icon: AlertOutlined,
text: t(pPlatform("alarmLog")),
path: "/admin/monitor/alarmLog",
}] : [],
],
}] : []),
...(publicConfig.AUDIT_DEPLOYED && platformRoles.includes(PlatformRole.PLATFORM_ADMIN) ?
[{
Icon: BookOutlined,
Expand Down
100 changes: 100 additions & 0 deletions apps/mis-web/src/pageComponents/admin/AllAlarmLogsTable.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/**
* Copyright (c) 2022 Peking University and Peking University Institute for Computing and Digital Economy
* SCOW is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
* http://license.coscl.org.cn/MulanPSL2
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PSL v2 for more details.
*/

import { Static } from "@sinclair/typebox";
import { Table, Tag } from "antd";
import dayjs from "dayjs";
import { prefix, useI18nTranslateToString } from "src/i18n";
import { GetAlarmLogsSchema } from "src/pages/api/admin/monitor/getAlarmLogs";

interface Pagination {
current: number;
pageSize: number | undefined;
defaultPageSize: number;
showSizeChanger: boolean;
total: number | undefined;
onChange: (page: number, pageSize: number) => void;
}

interface Props {
data: Static<typeof GetAlarmLogsSchema["responses"]["200"]> | undefined;
isLoading: boolean;
pagination: Pagination;
}

interface AlarmLog {
id: number;
fingerprint: string;
status: string;
severity: string;
description: string;
startsAt: number;
endsAt: number | null;
}

const p = prefix("pageComp.admin.allAlarmLogsTable.");

export const AllAlarmLogsTable: React.FC<Props> = ({ data, isLoading, pagination }) => {

const t = useI18nTranslateToString();

const statusTexts = {
"firing": <Tag color="error">{t(p("firing"))}</Tag>,
"resolved": <Tag color="success">{t(p("resolved"))}</Tag>,
};

const severityTexts = {
"Warning": <Tag color="warning">{"Warning"}</Tag>,
"Error": <Tag color="error">{"Error"}</Tag>,
};

return (
<Table
dataSource={data?.results}
loading={isLoading}
rowKey="id"
scroll={{ x: true }}
pagination={pagination}
>
<Table.Column<AlarmLog>
dataIndex="id"
title={t(p("serialNumber"))}
render={(text, record, index) => {
const { current, pageSize } = pagination;
return (current - 1) * (pageSize || 0) + index + 1;
}}
/>
<Table.Column<AlarmLog> dataIndex="fingerprint" title={t(p("fingerPrint"))} />
<Table.Column<AlarmLog>
dataIndex="status"
title={t(p("status"))}
render={(m: string) => statusTexts[m]}
/>
<Table.Column<AlarmLog>
dataIndex="severity"
title={t(p("alarmLevel"))}
render={(s: string) => severityTexts[s]}
/>
<Table.Column<AlarmLog> dataIndex="description" title={t(p("description"))} />
<Table.Column<AlarmLog>
dataIndex="startsAt"
title={t(p("firingTime"))}
render={(s: number) => dayjs(s).format("YYYY-MM-DD HH:mm:ss")}
/>
<Table.Column<AlarmLog>
dataIndex="endsAt"
title={t(p("resolvedTime"))}
render={(e: number) => e ? dayjs(e).format("YYYY-MM-DD HH:mm:ss") : "-"}
/>
</Table>
);
};
Loading

0 comments on commit abb7e84

Please sign in to comment.