Skip to content

Commit 969904b

Browse files
committed
modified
1 parent f36b9a2 commit 969904b

File tree

7 files changed

+55
-475
lines changed

7 files changed

+55
-475
lines changed

slurm.spec

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ Summary: Slurm Workload Manager
8585

8686
License: GPL
8787
Group: System Environment/Base
88-
Source: slurm-17.02.9_edit_v16.tar.bz2
88+
Source: slurm-17.02.9_edit_final.tar.bz2
8989
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}
9090
URL: https://slurm.schedmd.com/
9191

@@ -374,7 +374,7 @@ according to the Slurm
374374
#############################################################################
375375

376376
%prep
377-
%setup -n slurm-17.02.9_edit_v16
377+
%setup -n slurm-17.02.9_edit_final
378378

379379
%build
380380
%configure \

src/api/node_info.c

Lines changed: 5 additions & 295 deletions
Original file line numberDiff line numberDiff line change
@@ -791,207 +791,17 @@ extern int slurm_get_node_power(char *host, uint16_t *socket_cnt, power_current_
791791

792792

793793
/*
794-
* slurm_get_node_power.c - issue RPC to get the power of node
795-
* IN host - name of node to query, NULL if localhost
796-
* OUT energy - pointer of power_current_data_t structures
797-
* on success or NULL other wise
798-
* RET 0 on success or a slurm error code
799-
* NOTE: free the response using xfree
800-
*/
801-
extern int slurm_get_node_power2(char *host, uint16_t socket_cnt, power_capping_data_t *set_info)
802-
{
803-
int rc;
804-
int k;
805-
slurm_msg_t req_msg;
806-
slurm_msg_t resp_msg;
807-
//acct_gather_energy_req_msg_t req;
808-
//power_knob_get_info_req_msg_t req;
809-
power_knob_set_req_msg_t req;
810-
uint32_t cluster_flags = slurmdb_setup_cluster_flags();
811-
char *this_addr;
812-
813-
814-
req.cap_info = set_info;
815-
// req.socket_cnt = socket_cnt;
816-
// req.socket_cnt = 1;
817-
818-
slurm_msg_t_init(&req_msg);
819-
slurm_msg_t_init(&resp_msg);
820-
821-
for(k=0;k<req.socket_cnt;k++){
822-
debug("Cap inside node_info.c %d is %d", k, req.cap_info[k].cpu_cap_watts);
823-
}
824-
825-
debug("req.socket_cnt %d", req.socket_cnt);
826-
if (host)
827-
slurm_conf_get_addr(host, &req_msg.address);
828-
else if (cluster_flags & CLUSTER_FLAG_MULTSD) {
829-
if ((this_addr = getenv("SLURMD_NODENAME"))) {
830-
slurm_conf_get_addr(this_addr, &req_msg.address);
831-
} else {
832-
this_addr = "localhost";
833-
slurm_set_addr(&req_msg.address,
834-
(uint16_t)slurm_get_slurmd_port(),
835-
this_addr);
836-
}
837-
} else {
838-
char this_host[256];
839-
/*
840-
* Set request message address to slurmd on localhost
841-
*/
842-
gethostname_short(this_host, sizeof(this_host));
843-
this_addr = slurm_conf_get_nodeaddr(this_host);
844-
if (this_addr == NULL)
845-
this_addr = xstrdup("localhost");
846-
slurm_set_addr(&req_msg.address,
847-
(uint16_t)slurm_get_slurmd_port(),
848-
this_addr);
849-
xfree(this_addr);
850-
}
851-
req_msg.msg_type = REQUEST_POWER_KNOB_SET;
852-
// req_msg.msg_type = REQUEST_POWER_KNOB_GET_INFO;
853-
req_msg.data = &req;
854-
855-
debug3("CAO: do slurm_send_recv_node_msg. *host = '%s'", host);
856-
rc = slurm_send_recv_node_msg(&req_msg, &resp_msg, 0);
857-
858-
859-
if (rc != 0 || !resp_msg.auth_cred) {
860-
error("slurm_get_node_energy: %m");
861-
if (resp_msg.auth_cred)
862-
g_slurm_auth_destroy(resp_msg.auth_cred);
863-
return SLURM_ERROR;
864-
}
865-
if (resp_msg.auth_cred)
866-
g_slurm_auth_destroy(resp_msg.auth_cred);
867-
switch (resp_msg.msg_type) {
868-
case RESPONSE_POWER_KNOB_GET_INFO:
869-
socket_cnt = ((power_knob_get_info_node_resp_msg_t *)
870-
resp_msg.data)->socket_cnt;
871-
// set_info = ((power_knob_get_info_node_resp_msg_t *)
872-
// resp_msg.data)->power_info;
873-
((power_knob_get_info_node_resp_msg_t *) resp_msg.data)->power_info = NULL;
874-
slurm_free_power_knob_get_info_node_resp_msg(resp_msg.data);
875-
break;
876-
case RESPONSE_SLURM_RC:
877-
rc = ((return_code_msg_t *) resp_msg.data)->return_code;
878-
slurm_free_return_code_msg(resp_msg.data);
879-
if (rc)
880-
slurm_seterrno_ret(rc);
881-
break;
882-
default:
883-
slurm_seterrno_ret(SLURM_UNEXPECTED_MSG_ERROR);
884-
break;
885-
}
886-
887-
return SLURM_PROTOCOL_SUCCESS;
888-
}
889-
890-
891-
/*
892-
* slurm_get_node_power.c - issue RPC to get the power of node
893-
* IN host - name of node to query, NULL if localhost
894-
* OUT energy - pointer of power_current_data_t structures
895-
* on success or NULL other wise
896-
* RET 0 on success or a slurm error code
897-
* NOTE: free the response using xfree
898-
*/
899-
extern int slurm_get_node_power3(char *host, int set_info)
900-
{
901-
int rc;
902-
int k;
903-
slurm_msg_t req_msg;
904-
slurm_msg_t resp_msg;
905-
power_knob_cap_req_msg_t req;
906-
uint32_t cluster_flags = slurmdb_setup_cluster_flags();
907-
char *this_addr;
908-
909-
int socket_cnt =2;
910-
911-
req.cap_info = set_info;
912-
913-
slurm_msg_t_init(&req_msg);
914-
slurm_msg_t_init(&resp_msg);
915-
916-
for(k=0;k<socket_cnt;k++){
917-
debug("Cap inside node_info.c %d is %d", k, req.cap_info);
918-
}
919-
920-
if (host)
921-
slurm_conf_get_addr(host, &req_msg.address);
922-
else if (cluster_flags & CLUSTER_FLAG_MULTSD) {
923-
if ((this_addr = getenv("SLURMD_NODENAME"))) {
924-
slurm_conf_get_addr(this_addr, &req_msg.address);
925-
} else {
926-
this_addr = "localhost";
927-
slurm_set_addr(&req_msg.address,
928-
(uint16_t)slurm_get_slurmd_port(),
929-
this_addr);
930-
}
931-
} else {
932-
char this_host[256];
933-
/*
934-
* Set request message address to slurmd on localhost
935-
*/
936-
gethostname_short(this_host, sizeof(this_host));
937-
this_addr = slurm_conf_get_nodeaddr(this_host);
938-
if (this_addr == NULL)
939-
this_addr = xstrdup("localhost");
940-
slurm_set_addr(&req_msg.address,
941-
(uint16_t)slurm_get_slurmd_port(),
942-
this_addr);
943-
xfree(this_addr);
944-
}
945-
req_msg.msg_type = REQUEST_POWER_CAP_SET;
946-
req_msg.data = &req;
947-
948-
debug3("CAO: do slurm_send_recv_node_msg. *host = '%s'", host);
949-
rc = slurm_send_recv_node_msg(&req_msg, &resp_msg, 0);
950-
951-
if (rc != 0 || !resp_msg.auth_cred) {
952-
error("slurm_get_node_energy: %m");
953-
if (resp_msg.auth_cred)
954-
g_slurm_auth_destroy(resp_msg.auth_cred);
955-
return SLURM_ERROR;
956-
}
957-
if (resp_msg.auth_cred)
958-
g_slurm_auth_destroy(resp_msg.auth_cred);
959-
switch (resp_msg.msg_type) {
960-
case RESPONSE_POWER_KNOB_GET_INFO:
961-
socket_cnt = ((power_knob_get_info_node_resp_msg_t *)
962-
resp_msg.data)->socket_cnt;
963-
// set_info = ((power_knob_get_info_node_resp_msg_t *)
964-
// resp_msg.data)->power_info;
965-
((power_knob_get_info_node_resp_msg_t *) resp_msg.data)->power_info = NULL;
966-
slurm_free_power_knob_get_info_node_resp_msg(resp_msg.data);
967-
break;
968-
case RESPONSE_SLURM_RC:
969-
rc = ((return_code_msg_t *) resp_msg.data)->return_code;
970-
slurm_free_return_code_msg(resp_msg.data);
971-
if (rc)
972-
slurm_seterrno_ret(rc);
973-
break;
974-
default:
975-
slurm_seterrno_ret(SLURM_UNEXPECTED_MSG_ERROR);
976-
break;
977-
}
978-
979-
return SLURM_PROTOCOL_SUCCESS;
980-
}
981-
982-
983-
/*
984-
* slurm_get_node_power.c - issue RPC to get the power of node
794+
* slurm_set_node_power.c - issue RPC to set the power cap of node
985795
* IN host - name of node to query, NULL if localhost
986-
* OUT energy - pointer of power_current_data_t structures
796+
* IN power_cap - 2 values for two sockets
797+
* OUT none
987798
* on success or NULL other wise
988799
* RET 0 on success or a slurm error code
989800
* NOTE: free the response using xfree
990801
*/
991802
extern int slurm_set_node_power4(char *host, int set_info, int set_info2)
992803
{
993804
int rc;
994-
int k;
995805
slurm_msg_t req_msg;
996806
slurm_msg_t resp_msg;
997807
power_knob_cap_req_msg_t req;
@@ -1006,10 +816,8 @@ extern int slurm_set_node_power4(char *host, int set_info, int set_info2)
1006816
slurm_msg_t_init(&req_msg);
1007817
slurm_msg_t_init(&resp_msg);
1008818

1009-
//for(k=0;k<socket_cnt;k++){
1010-
debug("Cap inside node_info.c 0 is %d", req.cap_info);
1011-
debug("Cap inside node_info.c 1 is %d", req.cap_info2);
1012-
//}
819+
debug("Cap inside node_info.c 0 is %d", req.cap_info);
820+
debug("Cap inside node_info.c 1 is %d", req.cap_info2);
1013821

1014822
if (host)
1015823
slurm_conf_get_addr(host, &req_msg.address);
@@ -1072,104 +880,6 @@ extern int slurm_set_node_power4(char *host, int set_info, int set_info2)
1072880

1073881
return SLURM_PROTOCOL_SUCCESS;
1074882
}
1075-
/*
1076-
* slurm_set_node_power.c - issue RPC to set the power cap of node
1077-
* IN host - name of node to query, NULL if localhost
1078-
* OUT energy - pointer of power_current_data_t structures
1079-
* on success or NULL other wise
1080-
* RET 0 on success or a slurm error code
1081-
* NOTE: free the response using xfree
1082-
*/
1083-
// Linh changed extern int slurm_set_node_power(char *host, uint16_t socket_cnt, power_capping_data_t *set_info)
1084-
extern int slurm_set_node_power(char *host, uint16_t socket_cnt, power_capping_data_t *set_info)
1085-
{
1086-
1087-
int rc;
1088-
slurm_msg_t req_msg;
1089-
slurm_msg_t resp_msg;
1090-
//power_knob_get_info_req_msg_t req;
1091-
power_knob_set_req_msg_t req;
1092-
1093-
uint32_t cluster_flags = slurmdb_setup_cluster_flags();
1094-
char *this_addr;
1095-
1096-
req.cap_info = set_info;
1097-
req.socket_cnt = socket_cnt;
1098-
1099-
slurm_msg_t_init(&req_msg);
1100-
slurm_msg_t_init(&resp_msg);
1101-
int k;
1102-
1103-
for(k=0;k<socket_cnt;k++){
1104-
debug("Cap inside node_info.c %d is %d", k, req.cap_info[k].cpu_cap_watts);
1105-
}
1106-
1107-
if (host)
1108-
slurm_conf_get_addr(host, &req_msg.address);
1109-
else if (cluster_flags & CLUSTER_FLAG_MULTSD) {
1110-
if ((this_addr = getenv("SLURMD_NODENAME"))) {
1111-
slurm_conf_get_addr(this_addr, &req_msg.address);
1112-
} else {
1113-
this_addr = "localhost";
1114-
slurm_set_addr(&req_msg.address,
1115-
(uint16_t)slurm_get_slurmd_port(),
1116-
this_addr);
1117-
}
1118-
} else {
1119-
char this_host[256];
1120-
/*
1121-
* Set request message address to slurmd on localhost
1122-
*/
1123-
gethostname_short(this_host, sizeof(this_host));
1124-
this_addr = slurm_conf_get_nodeaddr(this_host);
1125-
if (this_addr == NULL)
1126-
this_addr = xstrdup("localhost");
1127-
slurm_set_addr(&req_msg.address,
1128-
(uint16_t)slurm_get_slurmd_port(),
1129-
this_addr);
1130-
xfree(this_addr);
1131-
}
1132-
1133-
req_msg.msg_type = REQUEST_POWER_KNOB_SET;
1134-
// req_msg.msg_type = REQUEST_POWER_KNOB_GET_INFO;
1135-
req_msg.data = &req;
1136-
1137-
debug3(" slurm_send_recv_node_msg(&req_msg, &resp_msg, 0);");
1138-
rc = slurm_send_recv_node_msg(&req_msg, &resp_msg, 0);
1139-
debug3(" FINISH slurm_send_recv_node_msg(&req_msg, &resp_msg, 0);");
1140-
if (rc != 0 || !resp_msg.auth_cred) {
1141-
error("slurm_get_node_energy: %m");
1142-
if (resp_msg.auth_cred)
1143-
g_slurm_auth_destroy(resp_msg.auth_cred);
1144-
return SLURM_ERROR;
1145-
}
1146-
if (resp_msg.auth_cred)
1147-
g_slurm_auth_destroy(resp_msg.auth_cred);
1148-
switch (resp_msg.msg_type) {
1149-
case RESPONSE_POWER_KNOB_GET_INFO:
1150-
// TODO
1151-
// add a function of updating node power value using below data
1152-
socket_cnt = ((power_knob_get_info_node_resp_msg_t *)
1153-
resp_msg.data)->socket_cnt;
1154-
//*get_info = ((power_knob_get_info_node_resp_msg_t *)
1155-
// resp_msg.data)->power_info;
1156-
((power_knob_get_info_node_resp_msg_t *) resp_msg.data)->power_info = NULL;
1157-
slurm_free_power_knob_get_info_node_resp_msg(resp_msg.data);
1158-
break;
1159-
case RESPONSE_SLURM_RC:
1160-
rc = ((return_code_msg_t *) resp_msg.data)->return_code;
1161-
slurm_free_return_code_msg(resp_msg.data);
1162-
if (rc)
1163-
slurm_seterrno_ret(rc);
1164-
break;
1165-
default:
1166-
slurm_seterrno_ret(SLURM_UNEXPECTED_MSG_ERROR);
1167-
break;
1168-
}
1169-
1170-
return SLURM_PROTOCOL_SUCCESS;
1171-
}
1172-
1173883

1174884
/*
1175885
* slurm_get_cache - issue RPC to get the cache of node

src/plugins/power_allocator/dynamic/power_allocator_dynamic.c

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ static void node_power_schedule(void)
301301
cap_values[1] = (int) perf_function;
302302

303303
slurm_set_power_cap2(cap_values, node_name,number_of_node);
304-
304+
debug ("Finish node_power_schedule" );
305305
}
306306

307307
list_iterator_destroy(job_iterator);
@@ -375,11 +375,11 @@ int power_allocator_p_do_power_safe(){
375375
if (sum1 >slurmctld_conf.power_alert){
376376
percentage_dif = ((float)sum1 /slurmctld_conf.power_alert) *100;
377377
printf ("Alert, power is %f%% of power budget \n", percentage_dif);
378-
}
379-
percentage_dif = ((float)sum1 /slurmctld_conf.power_alert) *100;
380-
381-
printf ("NO Alert, power is %f%% of power budget \n", percentage_dif);
382-
378+
}
379+
else {
380+
percentage_dif = ((float)sum1 /slurmctld_conf.power_alert) *100;
381+
printf ("NO Alert, power is %f%% of power budget \n", percentage_dif);
382+
}
383383
return SLURM_SUCCESS;
384384
}
385385

@@ -553,11 +553,14 @@ static void slurm_set_power_cap2(int *power_cap_value[], char *node_name[], int
553553
debug("_get_node_power_task: can't get info from slurmd(NODE : %s)", node_ptr->name);
554554
debug("After assign of set power");
555555
}
556+
debug("Outside if");
556557
}
557558

558559
debug("Socket is %d", socket_cnt);
559560
lock_slurmctld(node_write_lock);
560-
memcpy(node_ptr->power_info->power_cap, powers_cap, sizeof(power_capping_data_t) * socket_cnt);
561+
debug("before memcpy");
562+
//memcpy(node_ptr->power_info->power_cap, powers_cap, sizeof(power_capping_data_t) * socket_cnt);
563+
debug("after memcpy");
561564
unlock_slurmctld(node_write_lock);
562565

563566
xfree(powers);

0 commit comments

Comments
 (0)