18237 crashes on E5-2640 V3 and rtx 3080
Posted: Mon Sep 23, 2024 12:39 pm
hi!
i've got this configuration:
core 0x24 has failed to work:
0x23 works stable.
may be someone know where could be a problem ?
i've got this configuration:
Code: Select all
# free
total used free shared buff/cache available
Mem: 16239932 4990348 9094824 597800 2154760 10176476
Swap: 16777212 0 16777212
#
> info
PyON 1 info
[
[
"FAHClient",
["Version", "7.6.13"],
["Author", "Joseph Coffland <[email protected]>"],
["Copyright", "2020 foldingathome.org"],
["Homepage", "https://foldingathome.org/"],
["Date", "Apr 28 2020"],
["Time", "04:20:16"],
["Revision", "5a652817f46116b6e135503af97f18e094414e3b"],
["Branch", "master"],
["Compiler", "GNU 8.3.0"],
["Options", "-std=c++11 -ffunction-sections -fdata-sections -O3 -funroll-loops -fno-pie"],
["Platform", "linux2 4.19.0-5-amd64"],
["Bits", "64"],
["Mode", "Release"],
["Config", "/var/lib/fahclient/config.xml"]
],
[
"CBang",
["Date", "Apr 25 2020"],
["Time", "00:07:53"],
["Revision", "ea081a3b3b0f4a37c4d0440b4f1bc184197c7797"],
["Branch", "master"],
["Compiler", "GNU 8.3.0"],
["Options", "-std=c++11 -ffunction-sections -fdata-sections -O3 -funroll-loops -fno-pie -fPIC"],
["Platform", "linux2 4.19.0-5-amd64"],
["Bits", "64"],
["Mode", "Release"]
],
[
"System",
["CPU", "Intel(R) Xeon(R) CPU E5-2640 v3 @ 2.60GHz"],
["CPU ID", "GenuineIntel Family 6 Model 63 Stepping 2"],
["CPUs", "16"],
["Memory", "15.49GiB"],
["Free Memory", "10.11GiB"],
["Threads", "POSIX_THREADS"],
["OS Version", "5.4"],
["Has Battery", "false"],
["On Battery", "false"],
["UTC Offset", "3"],
["PID", "27247"],
["CWD", "/var/lib/fahclient"],
["OS", "Linux 5.4.53 x86_64"],
["OS Arch", "AMD64"],
["GPUs", "3"],
["GPU 0", "Bus:1 Slot:0 Func:0 NVIDIA:8 GA102 [GeForce RTX 3080]"],
["GPU 1", "Bus:3 Slot:0 Func:0 NVIDIA:8 GA102 [GeForce RTX 3080]"],
["GPU 2", "Bus:5 Slot:0 Func:0 NVIDIA:8 GA102 [GeForce RTX 3080]"],
["CUDA Device 0", "Platform:0 Device:0 Bus:1 Slot:0 Compute:8.6 Driver:12.0"],
["CUDA Device 1", "Platform:0 Device:1 Bus:3 Slot:0 Compute:8.6 Driver:12.0"],
["CUDA Device 2", "Platform:0 Device:2 Bus:5 Slot:0 Compute:8.6 Driver:12.0"],
["OpenCL Device 0", "Platform:0 Device:0 Bus:1 Slot:0 Compute:3.0 Driver:525.60"],
["OpenCL Device 1", "Platform:0 Device:1 Bus:3 Slot:0 Compute:3.0 Driver:525.60"],
["OpenCL Device 2", "Platform:0 Device:2 Bus:5 Slot:0 Compute:3.0 Driver:525.60"]
],
[
"libFAH",
["Date", "Apr 15 2020"],
["Time", "21:43:24"],
["Revision", "216968bc7025029c841ed6e36e81a03a316890d3"],
["Branch", "master"],
["Compiler", "GNU 8.3.0"],
["Options", "-std=c++11 -ffunction-sections -fdata-sections -O3 -funroll-loops -fno-pie"],
["Platform", "linux2 4.19.0-5-amd64"],
["Bits", "64"],
["Mode", "Release"]
]
]
---
>
Code: Select all
12:20:11:WU00:FS01:Connecting to assign1.foldingathome.org:80
12:20:12:WU00:FS01:Assigned to work server 158.130.118.23
12:20:12:WU00:FS01:Requesting new work unit for slot 01: READY gpu:0:GA102 [GeForce RTX 3080] from 158.130.118.23
12:20:12:WU00:FS01:Connecting to 158.130.118.23:8080
12:20:13:FS01:Finishing
12:20:13:WU00:FS01:Downloading 10.83MiB
12:20:19:WU00:FS01:Download 15.58%
12:20:25:WU00:FS01:Download 82.51%
12:20:25:WU00:FS01:Download complete
12:20:25:WU00:FS01:Received Unit: id:00 state:DOWNLOAD error:NO_ERROR project:18237 run:246 clone:0 gen:4 core:0x24 unit:0x00000000000000040000473d000000f6
12:20:25:WU00:FS01:Starting
12:20:25:WU00:FS01:Running FahCore: /usr/bin/FAHCoreWrapper /var/lib/fahclient/cores/cores.foldingathome.org/openmm-core-24/centos-7.9.2009-64bit/release/0x24-8.1.4/Core_24.fah/FahCore_24 -dir 00 -suffix 01 -version 706 -lifeline 24014 -checkpoint 6 -gpu-vendor nvidia -opencl-platform 0 -opencl-device 0 -cuda-device 0 -gpu 0
12:20:25:WU00:FS01:Started FahCore on PID 28625
12:20:25:WU00:FS01:Core PID:28629
12:20:25:WU00:FS01:FahCore 0x24 started
12:20:26:WARNING:WU00:FS01:FahCore returned: FAILED_2 (1 = 0x1)
12:20:26:WU00:FS01:Starting
12:20:26:WU00:FS01:Running FahCore: /usr/bin/FAHCoreWrapper /var/lib/fahclient/cores/cores.foldingathome.org/openmm-core-24/centos-7.9.2009-64bit/release/0x24-8.1.4/Core_24.fah/FahCore_24 -dir 00 -suffix 01 -version 706 -lifeline 24014 -checkpoint 6 -gpu-vendor nvidia -opencl-platform 0 -opencl-device 0 -cuda-device 0 -gpu 0
12:20:26:WU00:FS01:Started FahCore on PID 28658
12:20:26:WU00:FS01:Core PID:28662
12:20:26:WU00:FS01:FahCore 0x24 started
12:20:27:WARNING:WU00:FS01:FahCore returned: FAILED_2 (1 = 0x1)
12:21:28:WU00:FS01:Starting
12:21:28:WU00:FS01:Running FahCore: /usr/bin/FAHCoreWrapper /var/lib/fahclient/cores/cores.foldingathome.org/openmm-core-24/centos-7.9.2009-64bit/release/0x24-8.1.4/Core_24.fah/FahCore_24 -dir 00 -suffix 01 -version 706 -lifeline 24014 -checkpoint 6 -gpu-vendor nvidia -opencl-platform 0 -opencl-device 0 -cuda-device 0 -gpu 0
12:21:28:WU00:FS01:Started FahCore on PID 29786
12:21:28:WU00:FS01:Core PID:29790
12:21:28:WU00:FS01:FahCore 0x24 started
12:21:28:WARNING:WU00:FS01:FahCore returned: FAILED_2 (1 = 0x1)
12:22:28:WU00:FS01:Starting
12:22:28:WU00:FS01:Running FahCore: /usr/bin/FAHCoreWrapper /var/lib/fahclient/cores/cores.foldingathome.org/openmm-core-24/centos-7.9.2009-64bit/release/0x24-8.1.4/Core_24.fah/FahCore_24 -dir 00 -suffix 01 -version 706 -lifeline 24014 -checkpoint 6 -gpu-vendor nvidia -opencl-platform 0 -opencl-device 0 -cuda-device 0 -gpu 0
12:22:28:WU00:FS01:Started FahCore on PID 30903
12:22:28:WU00:FS01:Core PID:30907
12:22:28:WU00:FS01:FahCore 0x24 started
12:22:28:WARNING:WU00:FS01:FahCore returned: FAILED_2 (1 = 0x1)
12:23:28:WU00:FS01:Starting
12:23:28:WU00:FS01:Running FahCore: /usr/bin/FAHCoreWrapper /var/lib/fahclient/cores/cores.foldingathome.org/openmm-core-24/centos-7.9.2009-64bit/release/0x24-8.1.4/Core_24.fah/FahCore_24 -dir 00 -suffix 01 -version 706 -lifeline 24014 -checkpoint 6 -gpu-vendor nvidia -opencl-platform 0 -opencl-device 0 -cuda-device 0 -gpu 0
12:23:28:WU00:FS01:Started FahCore on PID 32001
12:23:28:WU00:FS01:Core PID:32005
12:23:28:WU00:FS01:FahCore 0x24 started
12:23:28:WARNING:WU00:FS01:FahCore returned: FAILED_2 (1 = 0x1)
12:23:28:WARNING:WU00:FS01:Too many errors, failing
12:23:28:WU00:FS01:Sending unit results: id:00 state:SEND error:FAILED project:18237 run:246 clone:0 gen:4 core:0x24 unit:0x00000000000000040000473d000000f6
12:23:28:WU00:FS01:Connecting to 158.130.118.23:8080
12:23:30:WU00:FS01:Server responded WORK_ACK (400)
12:23:30:WU00:FS01:Cleaning up
12:24:26:WU00:FS01:Connecting to assign1.foldingathome.org:80
12:24:27:WU00:FS01:Assigned to work server 158.130.118.26
12:24:27:WU00:FS01:Requesting new work unit for slot 01: READY gpu:0:GA102 [GeForce RTX 3080] from 158.130.118.26
12:24:27:WU00:FS01:Connecting to 158.130.118.26:8080
12:24:28:WU00:FS01:Downloading 9.20MiB
12:24:30:FS01:Finishing
12:24:34:WU00:FS01:Download 25.83%
12:24:40:WU00:FS01:Download 73.40%
12:24:42:WU00:FS01:Download complete
12:24:42:WU00:FS01:Received Unit: id:00 state:DOWNLOAD error:NO_ERROR project:18235 run:458 clone:0 gen:2 core:0x24 unit:0x00000000000000020000473b000001ca
12:24:42:WU00:FS01:Starting
12:24:42:WU00:FS01:Running FahCore: /usr/bin/FAHCoreWrapper /var/lib/fahclient/cores/cores.foldingathome.org/openmm-core-24/centos-7.9.2009-64bit/release/0x24-8.1.4/Core_24.fah/FahCore_24 -dir 00 -suffix 01 -version 706 -lifeline 24014 -checkpoint 6 -gpu-vendor nvidia -opencl-platform 0 -opencl-device 0 -cuda-device 0 -gpu 0
12:24:42:WU00:FS01:Started FahCore on PID 929
12:24:42:WU00:FS01:Core PID:933
12:24:42:WU00:FS01:FahCore 0x24 started
12:24:42:WARNING:WU00:FS01:FahCore returned: FAILED_2 (1 = 0x1)
12:24:43:WU00:FS01:Starting
12:24:43:WU00:FS01:Running FahCore: /usr/bin/FAHCoreWrapper /var/lib/fahclient/cores/cores.foldingathome.org/openmm-core-24/centos-7.9.2009-64bit/release/0x24-8.1.4/Core_24.fah/FahCore_24 -dir 00 -suffix 01 -version 706 -lifeline 24014 -checkpoint 6 -gpu-vendor nvidia -opencl-platform 0 -opencl-device 0 -cuda-device 0 -gpu 0
12:24:43:WU00:FS01:Started FahCore on PID 962
12:24:43:WU00:FS01:Core PID:966
12:24:43:WU00:FS01:FahCore 0x24 started
12:24:43:WARNING:WU00:FS01:FahCore returned: FAILED_2 (1 = 0x1)
12:25:43:WU00:FS01:Starting
12:25:43:WU00:FS01:Running FahCore: /usr/bin/FAHCoreWrapper /var/lib/fahclient/cores/cores.foldingathome.org/openmm-core-24/centos-7.9.2009-64bit/release/0x24-8.1.4/Core_24.fah/FahCore_24 -dir 00 -suffix 01 -version 706 -lifeline 24014 -checkpoint 6 -gpu-vendor nvidia -opencl-platform 0 -opencl-device 0 -cuda-device 0 -gpu 0
12:25:43:WU00:FS01:Started FahCore on PID 2082
12:25:43:WU00:FS01:Core PID:2086
12:25:43:WU00:FS01:FahCore 0x24 started
12:25:44:WARNING:WU00:FS01:FahCore returned: FAILED_2 (1 = 0x1)
12:26:43:WU00:FS01:Starting
12:26:43:WU00:FS01:Running FahCore: /usr/bin/FAHCoreWrapper /var/lib/fahclient/cores/cores.foldingathome.org/openmm-core-24/centos-7.9.2009-64bit/release/0x24-8.1.4/Core_24.fah/FahCore_24 -dir 00 -suffix 01 -version 706 -lifeline 24014 -checkpoint 6 -gpu-vendor nvidia -opencl-platform 0 -opencl-device 0 -cuda-device 0 -gpu 0
12:26:43:WU00:FS01:Started FahCore on PID 3212
12:26:43:WU00:FS01:Core PID:3216
12:26:43:WU00:FS01:FahCore 0x24 started
12:26:44:WARNING:WU00:FS01:FahCore returned: FAILED_2 (1 = 0x1)
12:27:44:WU00:FS01:Starting
12:27:44:WU00:FS01:Running FahCore: /usr/bin/FAHCoreWrapper /var/lib/fahclient/cores/cores.foldingathome.org/openmm-core-24/centos-7.9.2009-64bit/release/0x24-8.1.4/Core_24.fah/FahCore_24 -dir 00 -suffix 01 -version 706 -lifeline 24014 -checkpoint 6 -gpu-vendor nvidia -opencl-platform 0 -opencl-device 0 -cuda-device 0 -gpu 0
12:27:44:WU00:FS01:Started FahCore on PID 4312
12:27:44:WU00:FS01:Core PID:4316
12:27:44:WU00:FS01:FahCore 0x24 started
12:27:44:WARNING:WU00:FS01:FahCore returned: FAILED_2 (1 = 0x1)
12:27:44:WARNING:WU00:FS01:Too many errors, failing
12:27:44:WU00:FS01:Sending unit results: id:00 state:SEND error:FAILED project:18235 run:458 clone:0 gen:2 core:0x24 unit:0x00000000000000020000473b000001ca
12:27:44:WU00:FS01:Connecting to 158.130.118.26:8080
12:27:46:WU00:FS01:Server responded WORK_ACK (400)
12:27:46:WU00:FS01:Cleaning up
may be someone know where could be a problem ?