| /* Copyright 2015-2022 Rivoreo |
| |
| Permission is hereby granted, free of charge, to any person obtaining |
| a copy of this software and associated documentation files (the |
| "Software"), to deal in the Software without restriction, including |
| without limitation the rights to use, copy, modify, merge, publish, |
| distribute, sublicense, and/or sell copies of the Software, and to |
| permit persons to whom the Software is furnished to do so, subject to |
| the following conditions: |
| |
| The above copyright notice and this permission notice shall be |
| included in all copies or substantial portions of the Software. |
| |
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| NONINFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE |
| FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF |
| CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
| WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| */ |
| |
| #include <unistd.h> |
| #include <string.h> |
| #include <stdint.h> |
| #include <stdlib.h> |
| #include <stdio.h> |
| |
| struct nvml_device; |
| |
| struct nvml_functions { |
| size_t size; |
| int (*func[0])(); |
| }; |
| |
| struct nvml_memory { |
| unsigned long long int total, free, used; |
| }; |
| |
| extern int nvmlInit_v2(void); |
| extern int nvmlInitWithFlags(unsigned int); |
| extern char *nvmlErrorString(int); |
| extern int nvmlDeviceGetCount_v2(unsigned int *); |
| extern int nvmlDeviceGetHandleByIndex_v2(unsigned int, struct nvml_device **); |
| extern int nvmlDeviceGetName(struct nvml_device *, char *, unsigned int); |
| extern int nvmlDeviceGetSerial(struct nvml_device *, char *, unsigned int); |
| extern int nvmlDeviceGetUUID(struct nvml_device *, char *, unsigned int); |
| extern int nvmlDeviceGetMinorNumber(struct nvml_device *, unsigned int *); |
| extern int nvmlDeviceGetPowerUsage(struct nvml_device *, unsigned int *); |
| extern int nvmlDeviceGetTotalEnergyConsumption(struct nvml_device *, unsigned long long int *); |
| extern int nvmlDeviceGetMemoryInfo(struct nvml_device *, struct nvml_memory *); |
| extern int nvmlDeviceGetEccMode(struct nvml_device *, int *, int *); |
| extern int nvmlDeviceGetPowerManagementLimit(struct nvml_device *, unsigned int *); |
| extern int nvmlDeviceGetFanSpeed(struct nvml_device *, unsigned int *); |
| extern int nvmlDeviceGetTemperature(struct nvml_device *, unsigned int, unsigned int *); |
| extern int nvmlInternalGetExportTable(struct nvml_functions **, const unsigned char *); |
| extern int nvmlShutdown(void); |
| |
| static const unsigned char magic[][16] = { |
| { 0xC4, 0xFE, 0x3E, 0x6C, 0xC9, 0x8F, 0x6C, 0x4E, 0xA3, 0x27, 0xEE, 0x69, 0x6E, 0x12, 0xF7, 0xC4 }, |
| { 0xB8, 0x1D, 0xD7, 0x30, 0xE4, 0xD2, 0x81, 0x44, 0x9C, 0xD0, 0xE1, 0xDE, 0x35, 0x04, 0xFA, 0x27 }, |
| { 0x9E, 0x8C, 0x7A, 0x58, 0x12, 0x3A, 0xB5, 0x4F, 0xB9, 0xE3, 0xF4, 0xBA, 0x39, 0x45, 0xDD, 0x5F }, |
| { 0x4E, 0x3B, 0x47, 0x52, 0x76, 0x17, 0xE4, 0x11, 0xAA, 0x00, 0xB2, 0x22, 0x7C, 0xCE, 0x2B, 0x54 } |
| }; |
| static struct nvml_functions *func[4]; |
| |
| struct nvml_clock_rate { |
| unsigned int current_freq; |
| unsigned int expected_freq; |
| unsigned int source; |
| unsigned int flags; |
| }; |
| |
| struct nvml_gpu_perf_pstates { |
| unsigned int version; |
| unsigned int is_enabled:1; |
| unsigned int is_dyn_pstates_available:1; |
| unsigned int is_dyn_pstates_enabled:1; |
| unsigned int pstate_count; |
| unsigned int clock_count; |
| unsigned int voltage_count; |
| struct { |
| unsigned int pstate; |
| unsigned int is_pcie_limit_gen1:1; |
| unsigned int is_overclocked:1; |
| unsigned int is_overclockable:1; |
| struct { |
| unsigned int domain; |
| unsigned int is_force_pll:1; |
| unsigned int is_force_bypass:1; |
| unsigned int apply_ratio:1; |
| unsigned int freq; // kHz |
| } clocks[32]; |
| struct { |
| unsigned int domain; |
| unsigned int is_vdt:1; |
| unsigned int voltage; // mV |
| } voltages[16]; |
| } pstates[16]; |
| }; |
| |
| static int (*nvml_get_performance_state)(struct nvml_device *, unsigned int *); |
| static int (*nvml_set_performance_state)(struct nvml_device *, unsigned int); |
| static int (*nvml_clear_performance_state)(struct nvml_device *); |
| static int (*nvml_get_clock_domains)(struct nvml_device *, unsigned int, unsigned int *); |
| static int (*nvml_device_get_raw_clock)(struct nvml_device *, unsigned int, struct nvml_clock_rate *); |
| static int (*nvml_device_set_raw_clock)(struct nvml_device *, unsigned int, unsigned int); |
| static int (*nvml_device_set_pstate_limit)(struct nvml_device *, unsigned int, unsigned int); |
| static int (*nvml_device_get_pstate_limit)(struct nvml_device *, unsigned int, unsigned int *); |
| static int (*nvml_device_rm_call)(struct nvml_device *, unsigned int, void *, unsigned int); |
| static int (*nvml_device_get_pstates)(struct nvml_device *, struct nvml_gpu_perf_pstates *, unsigned int); |
| static int (*nvml_device_set_pstates)(struct nvml_device *, struct nvml_gpu_perf_pstates *, unsigned int); |
| static int (*nvml_device_get_temperature)(struct nvml_device *, unsigned int, unsigned int *); |
| static int (*nvml_device_get_default_pm_limit)(struct nvml_device *, unsigned int *); |
| static int (*nvml_device_set_pm_limit)(struct nvml_device *, unsigned int); |
| static int (*nvml_device_set_fan_speed)(struct nvml_device *, unsigned int); |
| static int (*nvml_device_reset_fan_speed)(struct nvml_device *); |
| static int (*nvml_device_get_voltage)(struct nvml_device *, unsigned int *); |
| static int (*nvml_device_adjust_frequency)(struct nvml_device *, unsigned int, unsigned int, int); |
| |
| static void import_nvml_internal_functions() { |
| int e = nvmlInternalGetExportTable(func, magic[0]); |
| if(e) { |
| fprintf(stderr, "nvmlInternalGetExportTable failed: %s\n", nvmlErrorString(e)); |
| exit(1); |
| } |
| nvml_device_get_temperature = func[0]->func[6]; |
| nvml_device_set_fan_speed = func[0]->func[7]; |
| nvml_device_reset_fan_speed = func[0]->func[8]; |
| nvml_get_performance_state = func[0]->func[13]; |
| nvml_set_performance_state = func[0]->func[14]; |
| nvml_get_clock_domains = func[0]->func[15]; |
| nvml_device_get_raw_clock = func[0]->func[16]; |
| nvml_device_set_raw_clock = func[0]->func[17]; |
| nvml_device_set_pstate_limit = func[0]->func[20]; |
| nvml_device_get_pstate_limit = func[0]->func[21]; |
| nvml_clear_performance_state = func[0]->func[24]; |
| nvml_device_rm_call = func[0]->func[31]; |
| nvml_device_get_pstates = func[0]->func[41]; |
| nvml_device_set_pstates = func[0]->func[42]; |
| nvml_device_get_default_pm_limit = func[0]->func[46]; |
| nvml_device_set_pm_limit = func[0]->func[47]; |
| nvml_device_get_voltage = func[0]->func[62]; |
| nvml_device_adjust_frequency = func[0]->func[65]; |
| } |
| |
| struct nv2080_gpu_perf_pstates_data_2_delta_entry { |
| int32_t value; |
| int32_t min_value; |
| int32_t max_value; |
| }; |
| |
| struct nv2080_gpu_perf_pstates_data_2 { |
| uint32_t flags; |
| uint32_t pstate_count; |
| uint32_t clock_count; |
| uint32_t voltage_count; |
| struct { |
| uint32_t pstate; |
| uint32_t flags; |
| union { |
| uint64_t storage; |
| struct nv2080_gpu_perf_pstates_data_2_clock_entry { |
| uint32_t domain; |
| uint32_t flags; |
| uint8_t type; |
| union { |
| uint32_t single_freq; |
| struct { |
| uint32_t min_freq; |
| uint32_t max_freq; |
| uint32_t voltage_domain; |
| uint32_t min_voltage; |
| uint32_t max_voltage; |
| } range; |
| } data; |
| struct nv2080_gpu_perf_pstates_data_2_delta_entry freq_delta; |
| } *p; |
| } clocks; |
| union { |
| uint64_t storage; |
| struct nv2080_gpu_perf_pstates_data_2_voltage_entry { |
| uint32_t domain; |
| uint32_t flags; |
| uint8_t voltage_domain; |
| uint8_t type; |
| union { |
| uint32_t logical_voltage; |
| uint8_t vdt_index; |
| uint8_t vfe_equ_index; |
| struct { |
| uint8_t pstate_index; |
| uint8_t freq_type; |
| } pstate; |
| uint8_t vpstate_index; |
| struct { |
| uint32_t clock_domain; |
| uint32_t freq; |
| } freq; |
| } data; |
| struct nv2080_gpu_perf_pstates_data_2_delta_entry voltage_delta; |
| uint32_t current_target_voltage; |
| } *p; |
| } voltages; |
| } pstates[16]; |
| struct { |
| uint32_t flags; |
| uint32_t voltage_count; |
| union { |
| uint64_t storage; |
| struct nv2080_gpu_perf_pstates_data_2_voltage_entry *p; |
| } voltages; |
| } ov; |
| }; |
| |
| #define PSTATES_DATA_2_CLOCK_COUNT 2 |
| #define PSTATES_DATA_2_VOLTAGE_COUNT 0 |
| |
| static const char *const temperature_sensor_names[] = { |
| "core", |
| "RAM", |
| "board", |
| "voltage regulator 1", |
| "voltage regulator 2", |
| "voltage regulator 3", |
| "voltage regulator 4" |
| }; |
| |
| static void print_usage(const char *name) { |
| fprintf(stderr, "Usage:\n" |
| " %s { -a | -u <unit> } [-H [<pstate>.]{core|ram}{+|-}<freq-offset>]\n" |
| " [-f <fan-speed> | -F] [-p {-1|<pstate>}]\n" |
| " %s -l\n" |
| "Frequency offset values are in MHz.\n", |
| name, name); |
| } |
| |
| static int *parse_and_create_number(const char *s) { |
| char *end_p; |
| int *n = malloc(sizeof(int)); |
| if(!n) { |
| fputs("Out of memory when parsing options\n", stderr); |
| return NULL; |
| } |
| *n = strtol(s, &end_p, 10); |
| if(*end_p) { |
| free(n); |
| fprintf(stderr, "Invalid number '%s'\n", s); |
| return NULL; |
| } |
| return n; |
| } |
| |
| static int parse_freq_offset_setting(const char *s, int **core_freq_offsets, int **ram_freq_offsets) { |
| char *end_p; |
| unsigned int pstate = 0; |
| const char *dot = strchr(s, '.'); |
| if(dot && dot > s) { |
| if(dot - s > 2) { |
| invalid_pstate: |
| fputs("Invalid pstate specification for option '-H'\n", stderr); |
| return -1; |
| } |
| pstate = strtoul(s, &end_p, 10); |
| if(end_p != dot) goto invalid_pstate; |
| if(pstate > 15) goto invalid_pstate; |
| s = dot + 1; |
| } |
| int **freq_offsets; |
| if(strncmp(s, "core", 4) == 0) { |
| freq_offsets = core_freq_offsets; |
| s += 4; |
| } else if(strncmp(s, "ram", 3) == 0) { |
| freq_offsets = ram_freq_offsets; |
| s += 3; |
| } else if(strncmp(s, "memory", 6) == 0) { |
| freq_offsets = ram_freq_offsets; |
| s += 6; |
| } else { |
| fputs("Invalid domain specification for option '-H'\n", stderr); |
| return -1; |
| } |
| if(*s != '+' && *s != '-') { |
| fputs("Offset value must have '+' or '-' prefix for option '-H'\n", stderr); |
| return -1; |
| } |
| freq_offsets[pstate] = parse_and_create_number(s); |
| return freq_offsets[pstate] ? 0 : -1; |
| } |
| |
| static void nvml_perror(int e, const char *prefix) { |
| fprintf(stderr, "%s: %s\n", prefix, nvmlErrorString(e)); |
| } |
| |
| static void get_frequency(struct nvml_device *dev, const struct nvml_gpu_perf_pstates *pstates_data_1, const struct nv2080_gpu_perf_pstates_data_2 *pstates_data_2, unsigned int domain, unsigned int pstate, struct nvml_clock_rate *output) { |
| if(domain == 0 && nvml_device_get_raw_clock(dev, 1 << 12, output) == 0) { |
| output->current_freq /= 2; |
| output->expected_freq /= 2; |
| return; |
| } |
| if(nvml_device_get_raw_clock(dev, 1 << domain, output) == 0) return; |
| |
| unsigned int i, j, freq = 0; |
| if(pstates_data_1 && pstates_data_1->is_enabled) { |
| for(i = 0; i < pstates_data_1->pstate_count; i++) { |
| if(pstates_data_1->pstates[i].pstate != pstate) continue; |
| for(j = 0; j < pstates_data_1->clock_count; j++) { |
| if(pstates_data_1->pstates[i].clocks[j].domain != domain) continue; |
| freq = pstates_data_1->pstates[i].clocks[j].freq; |
| break; |
| } |
| break; |
| } |
| } |
| if(!freq && pstates_data_2) { |
| for(i = 0; i < pstates_data_2->pstate_count; i++) { |
| unsigned int entry_pstate = pstates_data_2->pstates[i].pstate; |
| int is_pstate_match = (entry_pstate >> pstate) | 1; |
| if(entry_pstate != 1 && !is_pstate_match) continue; |
| for(j = 0; j < pstates_data_2->clock_count; j++) { |
| struct nv2080_gpu_perf_pstates_data_2_clock_entry *clock_entry = pstates_data_2->pstates[i].clocks.p + j; |
| if(clock_entry->type > 1) continue; |
| if(clock_entry->type == 1 && !is_pstate_match) continue; |
| if(domain == 0 && ((clock_entry->domain >> 12) | 1)) { |
| freq = clock_entry->data.single_freq / 2; |
| break; |
| } |
| if(!((clock_entry->domain >> domain) | 1)) continue; |
| freq = clock_entry->data.single_freq; |
| break; |
| } |
| if(is_pstate_match || freq) break; |
| } |
| } |
| output->current_freq = freq; |
| output->expected_freq = freq; |
| } |
| |
| static int print_gpu_information_by_index(unsigned int unit) { |
| static const char *const separators[] = { " ", ", " }; |
| |
| struct nvml_device *dev; |
| int e = nvmlDeviceGetHandleByIndex_v2(unit, &dev); |
| if(e) { |
| nvml_perror(e, "nvmlDeviceGetHandleByIndex_v2"); |
| return -1; |
| } |
| |
| int r = -1; |
| |
| struct nvml_gpu_perf_pstates *pstates_data_1 = malloc(sizeof(struct nvml_gpu_perf_pstates)); |
| if(pstates_data_1) { |
| e = nvml_device_get_pstates(dev, pstates_data_1, 0); |
| if(e) { |
| //nvml_perror(e, "nvml_device_get_pstates"); |
| free(pstates_data_1); |
| pstates_data_1 = NULL; |
| } |
| } |
| struct nv2080_gpu_perf_pstates_data_2 *pstates_data_2 = malloc(sizeof(struct nv2080_gpu_perf_pstates_data_2)); |
| if(pstates_data_2) { |
| memset(pstates_data_2, 0, sizeof(struct nv2080_gpu_perf_pstates_data_2)); |
| { |
| unsigned int i = 0; |
| struct nv2080_gpu_perf_pstates_data_2_clock_entry *clock_entry = malloc(sizeof(struct nv2080_gpu_perf_pstates_data_2_clock_entry) * PSTATES_DATA_2_CLOCK_COUNT); |
| if(!clock_entry) goto pstates_data_2_fail; |
| memset(clock_entry, 0, sizeof(struct nv2080_gpu_perf_pstates_data_2_clock_entry) * PSTATES_DATA_2_CLOCK_COUNT); |
| #if 0 |
| for(j = 0; j < PSTATES_DATA_2_CLOCK_COUNT; j++) { |
| clock_entry[j].domain = 1 << j; |
| } |
| #else |
| clock_entry[0].domain = 1 << 12; |
| clock_entry[1].domain = 1 << 4; |
| #endif |
| pstates_data_2->pstates[i].clocks.p = clock_entry; |
| struct nv2080_gpu_perf_pstates_data_2_voltage_entry *voltage_entry = malloc(sizeof(struct nv2080_gpu_perf_pstates_data_2_voltage_entry) * PSTATES_DATA_2_VOLTAGE_COUNT); |
| if(!voltage_entry) goto pstates_data_2_fail; |
| memset(voltage_entry, 0, sizeof(struct nv2080_gpu_perf_pstates_data_2_voltage_entry) * PSTATES_DATA_2_VOLTAGE_COUNT); |
| #if 0 |
| for(j = 0; j < PSTATES_DATA_2_VOLTAGE_COUNT; j++) { |
| voltage_entry[j].domain = 1 << j; |
| } |
| #else |
| //voltage_entry[0].domain = 1; |
| #endif |
| pstates_data_2->pstates[i].voltages.p = voltage_entry; |
| pstates_data_2->pstates[i].pstate = 1 << i; |
| } |
| pstates_data_2->pstate_count = 1; |
| pstates_data_2->clock_count = PSTATES_DATA_2_CLOCK_COUNT; |
| pstates_data_2->voltage_count = PSTATES_DATA_2_VOLTAGE_COUNT; |
| e = nvml_device_rm_call(dev, 0x2080206a, pstates_data_2, sizeof(struct nv2080_gpu_perf_pstates_data_2)); |
| if(e) { |
| //nvml_perror(e, "nvml_device_rm_call"); |
| pstates_data_2->pstates->clocks.p[0].domain = 1; |
| e = nvml_device_rm_call(dev, 0x2080206a, pstates_data_2, sizeof(struct nv2080_gpu_perf_pstates_data_2)); |
| if(e) { |
| //nvml_perror(e, "nvml_device_rm_call"); |
| pstates_data_2_fail: |
| free(pstates_data_2->pstates[0].clocks.p); |
| free(pstates_data_2->pstates[0].voltages.p); |
| free(pstates_data_2); |
| pstates_data_2 = NULL; |
| } |
| } |
| } |
| |
| char name[64]; |
| e = nvmlDeviceGetName(dev, name, sizeof name); |
| if(e) { |
| nvml_perror(e, "nvmlDeviceGetName"); |
| goto fail; |
| } |
| char serial[30]; |
| e = nvmlDeviceGetSerial(dev, serial, sizeof serial); |
| if(e) { |
| //nvml_perror(e, "nvmlDeviceGetSerial"); |
| *serial = 0; |
| } |
| char uuid[80]; |
| e = nvmlDeviceGetUUID(dev, uuid, sizeof uuid); |
| if(e) { |
| nvml_perror(e, "nvmlDeviceGetUUID"); |
| goto fail; |
| } |
| unsigned int ctl_dev_unit; |
| e = nvmlDeviceGetMinorNumber(dev, &ctl_dev_unit); |
| if(e) ctl_dev_unit = -1; |
| struct nvml_memory memory_info; |
| e = nvmlDeviceGetMemoryInfo(dev, &memory_info); |
| if(e) { |
| nvml_perror(e, "nvmlDeviceGetMemoryInfo"); |
| goto fail; |
| } |
| int ecc_state, pending_ecc_state; |
| e = nvmlDeviceGetEccMode(dev, &ecc_state, &pending_ecc_state); |
| if(e) { |
| //nvml_perror(e, "nvmlDeviceGetEccMode"); |
| ecc_state = -1; |
| pending_ecc_state = -1; |
| } |
| unsigned int pstate; |
| e = nvml_get_performance_state(dev, &pstate); |
| if(e) { |
| nvml_perror(e, "nvml_get_performance_state"); |
| goto fail; |
| } |
| unsigned int clock_domain_mask; |
| e = nvml_get_clock_domains(dev, 0, &clock_domain_mask); |
| if(e) { |
| nvml_perror(e, "nvml_get_clock_domains"); |
| goto fail; |
| } |
| struct nvml_clock_rate core_freq, ram_freq; |
| get_frequency(dev, pstates_data_1, pstates_data_2, 0, pstate, &core_freq); |
| get_frequency(dev, pstates_data_1, pstates_data_2, 4, pstate, &ram_freq); |
| unsigned int pstate_hard_limit, pstate_soft_limit; |
| e = nvml_device_get_pstate_limit(dev, 0, &pstate_hard_limit); |
| if(e) { |
| nvml_perror(e, "nvml_device_get_pstate_limit"); |
| goto fail; |
| } |
| e = nvml_device_get_pstate_limit(dev, 1, &pstate_soft_limit); |
| if(e) { |
| nvml_perror(e, "nvml_device_get_pstate_limit"); |
| goto fail; |
| } |
| unsigned int pm_limit; |
| e = nvmlDeviceGetPowerManagementLimit(dev, &pm_limit); |
| if(e) { |
| nvml_perror(e, "nvmlDeviceGetPowerManagementLimit"); |
| goto fail; |
| } |
| unsigned int default_pm_limit; |
| e = nvml_device_get_default_pm_limit(dev, &default_pm_limit); |
| if(e) { |
| nvml_perror(e, "nvml_device_get_default_pm_limit"); |
| goto fail; |
| } |
| unsigned int fan_speed; |
| #if 0 |
| for(j = 0; !(e = nvmlDeviceGetFanSpeed_v2(dev, j, &fan_speed)); j++) { |
| fprintf(stderr, "fan %u speed %u%%\n", j, fan_speed); |
| } |
| if(e != 2) { |
| nvml_perror(e, "nvmlDeviceGetFanSpeed_v2"); |
| goto fail; |
| } |
| #else |
| e = nvmlDeviceGetFanSpeed(dev, &fan_speed); |
| if(e) { |
| nvml_perror(e, "nvmlDeviceGetFanSpeed"); |
| fan_speed = -1; |
| } |
| #endif |
| if(!nvml_device_get_temperature) nvml_device_get_temperature = nvmlDeviceGetTemperature; |
| unsigned int i, temp[3]; |
| for(i = 0; i < sizeof temp / sizeof(unsigned int); i++) { |
| e = nvml_device_get_temperature(dev, i, temp + i); |
| if(e) { |
| if(e != 3) nvml_perror(e, "nvml_device_get_temperature"); |
| temp[i] = 0; |
| } |
| } |
| unsigned int power_rate; |
| e = nvmlDeviceGetPowerUsage(dev, &power_rate); |
| if(e) power_rate = 0; |
| unsigned long long total_power_consumption; |
| e = nvmlDeviceGetTotalEnergyConsumption(dev, &total_power_consumption); |
| if(e) total_power_consumption = 0; |
| unsigned int voltage; |
| e = nvml_device_get_voltage(dev, &voltage); |
| if(e) voltage = 0; |
| |
| if(ctl_dev_unit == (unsigned int)-1) printf("%u: %s\n", unit, name); |
| else printf("nvidia%u:%s, index %u\n", ctl_dev_unit, name, unit); |
| if(*serial) printf(" serial %s\n", serial); |
| printf(" UUID %s\n" |
| " RAM size %llu MiB, used %llu MiB, ECC %s%s\n" |
| " clock domain mask 0x%x\n", |
| uuid, |
| memory_info.total / 1048576, memory_info.used / 1048576, |
| ecc_state < 0 ? "unavailable" : (ecc_state ? "on" : "off"), |
| pending_ecc_state != ecc_state ? " (change pending)" : "", |
| clock_domain_mask); |
| if(pstates_data_2 && pstates_data_2->pstates->pstate == 1 && |
| (pstates_data_2->pstates->clocks.p[0].freq_delta.value || |
| pstates_data_2->pstates->clocks.p[1].freq_delta.value)) { |
| printf(" core frequency %u MHz", core_freq.current_freq / 1000); |
| if(pstates_data_2->pstates->clocks.p[0].freq_delta.value) { |
| printf(", offset %d", pstates_data_2->pstates->clocks.p[0].freq_delta.value / 1000); |
| } |
| printf("\n RAM frequency %u MHz", ram_freq.current_freq / 1000); |
| if(pstates_data_2->pstates->clocks.p[1].freq_delta.value) { |
| printf(", offset %d", pstates_data_2->pstates->clocks.p[1].freq_delta.value / 1000); |
| } |
| } else { |
| printf(" core frequency %u MHz, RAM frequency %u MHz", |
| core_freq.current_freq / 1000, ram_freq.current_freq / 1000); |
| } |
| printf("\n performance state %u (hard limit %u, soft limit %u)\n" |
| " power management limit %u (default %u)\n", |
| pstate, pstate_hard_limit, pstate_soft_limit, |
| pm_limit, default_pm_limit); |
| if(fan_speed != (unsigned int)-1) printf(" fan speed %u%%\n", fan_speed); |
| for(i = 0; i < sizeof temp / sizeof(unsigned int); i++) { |
| if(!temp[i]) continue; |
| printf(" %s temperature %u C\n", temperature_sensor_names[i], temp[i]); |
| } |
| if(power_rate || total_power_consumption) { |
| i = 0; |
| if(power_rate) printf("%spower %u mW", separators[i++], power_rate); |
| if(total_power_consumption) printf("%senergy consumed %llu J", separators[i], total_power_consumption / 1000); |
| putchar('\n'); |
| } |
| if(voltage) printf(" voltage %u mV\n", voltage); |
| putchar('\n'); |
| r = 0; |
| fail: |
| free(pstates_data_1); |
| if(pstates_data_2) { |
| for(i = 0; i < 16; i++) { |
| free(pstates_data_2->pstates[i].clocks.p); |
| free(pstates_data_2->pstates[i].voltages.p); |
| } |
| free(pstates_data_2); |
| } |
| return r; |
| } |
| |
| static int set_clock_rate(struct nvml_device *dev, unsigned int domain, unsigned int freq) { |
| struct nvml_gpu_perf_pstates pstates = { |
| .is_enabled = 1, |
| .pstate_count = 1, |
| .clock_count = 1, |
| .pstates[0] = { |
| .pstate = 0, |
| //.is_overclocked = 1, |
| .clocks[0] = { |
| .domain = domain, |
| //.is_force_bypass = 1, |
| //.apply_ratio = 1, |
| .freq = freq * 1000 |
| } |
| } |
| }; |
| int e = nvml_device_set_pstates(dev, &pstates, 1); |
| if(e) { |
| nvml_perror(e, "nvml_device_set_pstates"); |
| return -1; |
| } |
| return 0; |
| } |
| |
| static int set_freq_offset_via_pstates20(struct nvml_device *dev, unsigned int domain, int **offsets) { |
| struct nv2080_gpu_perf_pstates_data_2 pstates_data = { .flags = 0x1, .clock_count = 1 }; |
| unsigned int i; |
| for(i = 0; i < 16; i++) { |
| int *offset = offsets[i]; |
| if(!offset) continue; |
| struct nv2080_gpu_perf_pstates_data_2_clock_entry *clock_entry = malloc(sizeof(struct nv2080_gpu_perf_pstates_data_2_clock_entry)); |
| if(!clock_entry) { |
| fputs("Out of memory\n", stderr); |
| return -1; |
| } |
| memset(clock_entry, 0, sizeof(struct nv2080_gpu_perf_pstates_data_2_clock_entry)); |
| clock_entry->domain = 1 << domain; |
| //clock_entry->flags = 0; |
| clock_entry->type = 0; |
| clock_entry->freq_delta.value = *offset * 1000; |
| pstates_data.pstates[pstates_data.pstate_count].pstate = 1 << i; |
| pstates_data.pstates[pstates_data.pstate_count].flags = 0x1; |
| pstates_data.pstates[pstates_data.pstate_count].clocks.p = clock_entry; |
| pstates_data.pstate_count++; |
| } |
| int e = nvml_device_rm_call(dev, 0x2080206b, &pstates_data, sizeof(struct nv2080_gpu_perf_pstates_data_2)); |
| if(e) { |
| nvml_perror(e, "nvml_device_rm_call"); |
| return -1; |
| } |
| return 0; |
| } |
| |
| static int set_freq_offset(struct nvml_device *dev, unsigned int domain, int **offsets) { |
| unsigned int i; |
| for(i = 0; i < 16; i++) { |
| int *offset = offsets[i]; |
| if(!offset) continue; |
| if(*offset < 10000) { |
| fprintf(stderr, "Frequency offset %d for domain %u is too low\n", |
| *offset, domain); |
| return -1; |
| } |
| if(*offset > 10000) { |
| fprintf(stderr, "Frequency offset %d for domain %u is too high\n", |
| *offset, domain); |
| return -1; |
| } |
| int e = nvml_device_adjust_frequency(dev, domain, i, *offset * 1000); |
| if(e) { |
| if(set_freq_offset_via_pstates20(dev, domain, offsets) == 0) return 0; |
| nvml_perror(e, "nvml_device_adjust_frequency"); |
| return -1; |
| } |
| } |
| return 0; |
| } |
| |
| int main(int argc, char **argv) { |
| int list_gpu = 0; |
| int unit = -2; |
| int *core_freq = NULL; |
| int *ram_freq = NULL; |
| int is_offset_value_specified = 0; |
| int *core_freq_offset_by_pstate[16] = { NULL }; |
| int *ram_freq_offset_by_pstate[16] = { NULL }; |
| int *fan_speed = NULL; |
| int should_reset_fan_speed = 0; |
| int *pstate = NULL; |
| while(1) { |
| int c = getopt(argc, argv, "au:H:c:m:f:Fp:lh"); |
| if(c == -1) break; |
| switch(c) { |
| case 'a': |
| unit = -1; |
| break; |
| case 'u': |
| unit = atoi(optarg); |
| if(unit < 0) { |
| fprintf(stderr, "%s: Invalid unit number '%s'\n", |
| argv[0], optarg); |
| return -1; |
| } |
| break; |
| case 'H': |
| if(parse_freq_offset_setting(optarg, core_freq_offset_by_pstate, ram_freq_offset_by_pstate) < 0) return -1; |
| is_offset_value_specified = 1; |
| break; |
| case 'c': |
| core_freq = parse_and_create_number(optarg); |
| if(!core_freq) return 1; |
| break; |
| case 'm': |
| ram_freq = parse_and_create_number(optarg); |
| if(!ram_freq) return 1; |
| break; |
| case 'f': |
| fan_speed = parse_and_create_number(optarg); |
| if(!fan_speed) return 1; |
| should_reset_fan_speed = 0; |
| break; |
| case 'F': |
| should_reset_fan_speed = 1; |
| if(fan_speed) { |
| free(fan_speed); |
| fan_speed = NULL; |
| } |
| break; |
| case 'p': |
| pstate = parse_and_create_number(optarg); |
| if(!pstate) return 1; |
| break; |
| case 'l': |
| list_gpu = 1; |
| break; |
| case 'h': |
| print_usage(argv[0]); |
| return 0; |
| case '?': |
| return -1; |
| } |
| } |
| if(unit == -2 && !list_gpu) { |
| print_usage(argv[0]); |
| return -1; |
| } |
| |
| int e = nvmlInit_v2(); |
| //int e = nvmlInitWithFlags(0); |
| if(e) { |
| nvml_perror(e, "nvmlInit_v2"); |
| return 1; |
| } |
| import_nvml_internal_functions(); |
| |
| unsigned int device_count; |
| e = nvmlDeviceGetCount_v2(&device_count); |
| if(e) { |
| nvml_perror(e, "nvmlDeviceGetCount_v2"); |
| return 1; |
| } |
| |
| unsigned int i = 0; |
| |
| if(list_gpu) { |
| char name[64]; |
| while(i < device_count) { |
| struct nvml_device *dev; |
| e = nvmlDeviceGetHandleByIndex_v2(i, &dev); |
| if(e) nvml_perror(e, "nvmlDeviceGetHandleByIndex_v2"); |
| else { |
| e = nvmlDeviceGetName(dev, name, sizeof name); |
| if(e) nvml_perror(e, "nvmlDeviceGetName"); |
| else printf("%d %s\n", i, name); |
| } |
| i++; |
| } |
| return 0; |
| } |
| |
| if(!core_freq && !ram_freq && !is_offset_value_specified && !fan_speed && !should_reset_fan_speed && !pstate) { |
| if(unit < 0) while(i < device_count) { |
| //printf("%d:\n", i); |
| print_gpu_information_by_index(i); |
| i++; |
| } else if(unit < device_count) { |
| print_gpu_information_by_index(unit); |
| } else { |
| fprintf(stderr, "%s: Unit number %u out of range\n", argv[0], unit); |
| return 1; |
| } |
| return 0; |
| } |
| |
| if(fan_speed && (*fan_speed < 1 || *fan_speed > 100)) { |
| fprintf(stderr, "%s: Fan speed %u is out of valid range\n", argv[0], *fan_speed); |
| } |
| if(pstate && (*pstate < -1 || *pstate > 15)) { |
| fprintf(stderr, "%s: Performance state %u is out of valid range\n", |
| argv[0], *pstate); |
| } |
| |
| if(unit >= 0) { |
| if(unit < device_count) { |
| i = unit; |
| device_count = 1; |
| } else { |
| fprintf(stderr, "%s: Unit number %u out of range\n", argv[0], unit); |
| return 1; |
| } |
| } else if(!device_count) { |
| fputs("No supported device found\n", stderr); |
| return 1; |
| } |
| int r = 0; |
| do { |
| struct nvml_device *dev; |
| e = nvmlDeviceGetHandleByIndex_v2(i, &dev); |
| if(e) nvml_perror(e, "nvmlDeviceGetHandleByIndex_v2"); |
| else { |
| if(core_freq) { |
| e = nvml_device_set_raw_clock(dev, 1, *core_freq * 1000); |
| if(e && set_clock_rate(dev, 0, *core_freq) < 0) { |
| nvml_perror(e, "nvml_device_set_raw_clock"); |
| r = 1; |
| } |
| } |
| if(ram_freq) { |
| e = nvml_device_set_raw_clock(dev, 1 << 4, *ram_freq * 1000); |
| if(e && set_clock_rate(dev, 4, *ram_freq) < 0) { |
| nvml_perror(e, "nvml_device_set_raw_clock"); |
| r = 1; |
| } |
| } |
| if(is_offset_value_specified) { |
| if(set_freq_offset(dev, 0, core_freq_offset_by_pstate) < 0) r = 1; |
| if(set_freq_offset(dev, 4, ram_freq_offset_by_pstate) < 0) r = 1; |
| } |
| if(fan_speed) { |
| e = nvml_device_set_fan_speed(dev, *fan_speed); |
| if(e) { |
| nvml_perror(e, "nvml_device_set_fan_speed"); |
| r = 1; |
| } |
| } else if(should_reset_fan_speed) { |
| e = nvml_device_reset_fan_speed(dev); |
| if(e) { |
| nvml_perror(e, "nvml_device_reset_fan_speed"); |
| r = 1; |
| } |
| } |
| if(pstate) { |
| if(*pstate < 0) { |
| e = nvml_clear_performance_state(dev); |
| if(e) { |
| nvml_perror(e, "nvml_clear_performance_state"); |
| r = 1; |
| } |
| } else { |
| e = nvml_set_performance_state(dev, *pstate); |
| if(e) { |
| nvml_perror(e, "nvml_set_performance_state"); |
| r = 1; |
| } |
| } |
| } |
| } |
| } while(++i < device_count); |
| return r; |
| } |