blob: dacf5bb4924edbdea0ff63ec7d0c129b93e4da14 [file] [log] [blame] [raw]
/* Copyright 2015-2022 Rivoreo
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE
FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <unistd.h>
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
struct nvml_device;
struct nvml_functions {
size_t size;
int (*func[0])();
};
struct nvml_memory {
unsigned long long int total, free, used;
};
extern int nvmlInit_v2(void);
extern int nvmlInitWithFlags(unsigned int);
extern char *nvmlErrorString(int);
extern int nvmlDeviceGetCount_v2(unsigned int *);
extern int nvmlDeviceGetHandleByIndex_v2(unsigned int, struct nvml_device **);
extern int nvmlDeviceGetName(struct nvml_device *, char *, unsigned int);
extern int nvmlDeviceGetSerial(struct nvml_device *, char *, unsigned int);
extern int nvmlDeviceGetUUID(struct nvml_device *, char *, unsigned int);
extern int nvmlDeviceGetMinorNumber(struct nvml_device *, unsigned int *);
extern int nvmlDeviceGetPowerUsage(struct nvml_device *, unsigned int *);
extern int nvmlDeviceGetTotalEnergyConsumption(struct nvml_device *, unsigned long long int *);
extern int nvmlDeviceGetMemoryInfo(struct nvml_device *, struct nvml_memory *);
extern int nvmlDeviceGetEccMode(struct nvml_device *, int *, int *);
extern int nvmlDeviceGetPowerManagementLimit(struct nvml_device *, unsigned int *);
extern int nvmlDeviceGetFanSpeed(struct nvml_device *, unsigned int *);
extern int nvmlDeviceGetTemperature(struct nvml_device *, unsigned int, unsigned int *);
extern int nvmlInternalGetExportTable(struct nvml_functions **, const unsigned char *);
extern int nvmlShutdown(void);
static const unsigned char magic[][16] = {
{ 0xC4, 0xFE, 0x3E, 0x6C, 0xC9, 0x8F, 0x6C, 0x4E, 0xA3, 0x27, 0xEE, 0x69, 0x6E, 0x12, 0xF7, 0xC4 },
{ 0xB8, 0x1D, 0xD7, 0x30, 0xE4, 0xD2, 0x81, 0x44, 0x9C, 0xD0, 0xE1, 0xDE, 0x35, 0x04, 0xFA, 0x27 },
{ 0x9E, 0x8C, 0x7A, 0x58, 0x12, 0x3A, 0xB5, 0x4F, 0xB9, 0xE3, 0xF4, 0xBA, 0x39, 0x45, 0xDD, 0x5F },
{ 0x4E, 0x3B, 0x47, 0x52, 0x76, 0x17, 0xE4, 0x11, 0xAA, 0x00, 0xB2, 0x22, 0x7C, 0xCE, 0x2B, 0x54 }
};
static struct nvml_functions *func[4];
struct nvml_clock_rate {
unsigned int current_freq;
unsigned int expected_freq;
unsigned int source;
unsigned int flags;
};
struct nvml_gpu_perf_pstates {
unsigned int version;
unsigned int is_enabled:1;
unsigned int is_dyn_pstates_available:1;
unsigned int is_dyn_pstates_enabled:1;
unsigned int pstate_count;
unsigned int clock_count;
unsigned int voltage_count;
struct {
unsigned int pstate;
unsigned int is_pcie_limit_gen1:1;
unsigned int is_overclocked:1;
unsigned int is_overclockable:1;
struct {
unsigned int domain;
unsigned int is_force_pll:1;
unsigned int is_force_bypass:1;
unsigned int apply_ratio:1;
unsigned int freq; // kHz
} clocks[32];
struct {
unsigned int domain;
unsigned int is_vdt:1;
unsigned int voltage; // mV
} voltages[16];
} pstates[16];
};
static int (*nvml_get_performance_state)(struct nvml_device *, unsigned int *);
static int (*nvml_set_performance_state)(struct nvml_device *, unsigned int);
static int (*nvml_clear_performance_state)(struct nvml_device *);
static int (*nvml_get_clock_domains)(struct nvml_device *, unsigned int, unsigned int *);
static int (*nvml_device_get_raw_clock)(struct nvml_device *, unsigned int, struct nvml_clock_rate *);
static int (*nvml_device_set_raw_clock)(struct nvml_device *, unsigned int, unsigned int);
static int (*nvml_device_set_pstate_limit)(struct nvml_device *, unsigned int, unsigned int);
static int (*nvml_device_get_pstate_limit)(struct nvml_device *, unsigned int, unsigned int *);
static int (*nvml_device_rm_call)(struct nvml_device *, unsigned int, void *, unsigned int);
static int (*nvml_device_get_pstates)(struct nvml_device *, struct nvml_gpu_perf_pstates *, unsigned int);
static int (*nvml_device_set_pstates)(struct nvml_device *, struct nvml_gpu_perf_pstates *, unsigned int);
static int (*nvml_device_get_temperature)(struct nvml_device *, unsigned int, unsigned int *);
static int (*nvml_device_get_default_pm_limit)(struct nvml_device *, unsigned int *);
static int (*nvml_device_set_pm_limit)(struct nvml_device *, unsigned int);
static int (*nvml_device_set_fan_speed)(struct nvml_device *, unsigned int);
static int (*nvml_device_reset_fan_speed)(struct nvml_device *);
static int (*nvml_device_get_voltage)(struct nvml_device *, unsigned int *);
static int (*nvml_device_adjust_frequency)(struct nvml_device *, unsigned int, unsigned int, int);
static void import_nvml_internal_functions() {
int e = nvmlInternalGetExportTable(func, magic[0]);
if(e) {
fprintf(stderr, "nvmlInternalGetExportTable failed: %s\n", nvmlErrorString(e));
exit(1);
}
nvml_device_get_temperature = func[0]->func[6];
nvml_device_set_fan_speed = func[0]->func[7];
nvml_device_reset_fan_speed = func[0]->func[8];
nvml_get_performance_state = func[0]->func[13];
nvml_set_performance_state = func[0]->func[14];
nvml_get_clock_domains = func[0]->func[15];
nvml_device_get_raw_clock = func[0]->func[16];
nvml_device_set_raw_clock = func[0]->func[17];
nvml_device_set_pstate_limit = func[0]->func[20];
nvml_device_get_pstate_limit = func[0]->func[21];
nvml_clear_performance_state = func[0]->func[24];
nvml_device_rm_call = func[0]->func[31];
nvml_device_get_pstates = func[0]->func[41];
nvml_device_set_pstates = func[0]->func[42];
nvml_device_get_default_pm_limit = func[0]->func[46];
nvml_device_set_pm_limit = func[0]->func[47];
nvml_device_get_voltage = func[0]->func[62];
nvml_device_adjust_frequency = func[0]->func[65];
}
struct nv2080_gpu_perf_pstates_data_2_delta_entry {
int32_t value;
int32_t min_value;
int32_t max_value;
};
struct nv2080_gpu_perf_pstates_data_2 {
uint32_t flags;
uint32_t pstate_count;
uint32_t clock_count;
uint32_t voltage_count;
struct {
uint32_t pstate;
uint32_t flags;
union {
uint64_t storage;
struct nv2080_gpu_perf_pstates_data_2_clock_entry {
uint32_t domain;
uint32_t flags;
uint8_t type;
union {
uint32_t single_freq;
struct {
uint32_t min_freq;
uint32_t max_freq;
uint32_t voltage_domain;
uint32_t min_voltage;
uint32_t max_voltage;
} range;
} data;
struct nv2080_gpu_perf_pstates_data_2_delta_entry freq_delta;
} *p;
} clocks;
union {
uint64_t storage;
struct nv2080_gpu_perf_pstates_data_2_voltage_entry {
uint32_t domain;
uint32_t flags;
uint8_t voltage_domain;
uint8_t type;
union {
uint32_t logical_voltage;
uint8_t vdt_index;
uint8_t vfe_equ_index;
struct {
uint8_t pstate_index;
uint8_t freq_type;
} pstate;
uint8_t vpstate_index;
struct {
uint32_t clock_domain;
uint32_t freq;
} freq;
} data;
struct nv2080_gpu_perf_pstates_data_2_delta_entry voltage_delta;
uint32_t current_target_voltage;
} *p;
} voltages;
} pstates[16];
struct {
uint32_t flags;
uint32_t voltage_count;
union {
uint64_t storage;
struct nv2080_gpu_perf_pstates_data_2_voltage_entry *p;
} voltages;
} ov;
};
#define PSTATES_DATA_2_CLOCK_COUNT 2
#define PSTATES_DATA_2_VOLTAGE_COUNT 0
static const char *const temperature_sensor_names[] = {
"core",
"RAM",
"board",
"voltage regulator 1",
"voltage regulator 2",
"voltage regulator 3",
"voltage regulator 4"
};
static void print_usage(const char *name) {
fprintf(stderr, "Usage:\n"
" %s { -a | -u <unit> } [-H [<pstate>.]{core|ram}{+|-}<freq-offset>]\n"
" [-f <fan-speed> | -F] [-p {-1|<pstate>}]\n"
" %s -l\n"
"Frequency offset values are in MHz.\n",
name, name);
}
static int *parse_and_create_number(const char *s) {
char *end_p;
int *n = malloc(sizeof(int));
if(!n) {
fputs("Out of memory when parsing options\n", stderr);
return NULL;
}
*n = strtol(s, &end_p, 10);
if(*end_p) {
free(n);
fprintf(stderr, "Invalid number '%s'\n", s);
return NULL;
}
return n;
}
static int parse_freq_offset_setting(const char *s, int **core_freq_offsets, int **ram_freq_offsets) {
char *end_p;
unsigned int pstate = 0;
const char *dot = strchr(s, '.');
if(dot && dot > s) {
if(dot - s > 2) {
invalid_pstate:
fputs("Invalid pstate specification for option '-H'\n", stderr);
return -1;
}
pstate = strtoul(s, &end_p, 10);
if(end_p != dot) goto invalid_pstate;
if(pstate > 15) goto invalid_pstate;
s = dot + 1;
}
int **freq_offsets;
if(strncmp(s, "core", 4) == 0) {
freq_offsets = core_freq_offsets;
s += 4;
} else if(strncmp(s, "ram", 3) == 0) {
freq_offsets = ram_freq_offsets;
s += 3;
} else if(strncmp(s, "memory", 6) == 0) {
freq_offsets = ram_freq_offsets;
s += 6;
} else {
fputs("Invalid domain specification for option '-H'\n", stderr);
return -1;
}
if(*s != '+' && *s != '-') {
fputs("Offset value must have '+' or '-' prefix for option '-H'\n", stderr);
return -1;
}
freq_offsets[pstate] = parse_and_create_number(s);
return freq_offsets[pstate] ? 0 : -1;
}
static void nvml_perror(int e, const char *prefix) {
fprintf(stderr, "%s: %s\n", prefix, nvmlErrorString(e));
}
static void get_frequency(struct nvml_device *dev, const struct nvml_gpu_perf_pstates *pstates_data_1, const struct nv2080_gpu_perf_pstates_data_2 *pstates_data_2, unsigned int domain, unsigned int pstate, struct nvml_clock_rate *output) {
if(domain == 0 && nvml_device_get_raw_clock(dev, 1 << 12, output) == 0) {
output->current_freq /= 2;
output->expected_freq /= 2;
return;
}
if(nvml_device_get_raw_clock(dev, 1 << domain, output) == 0) return;
unsigned int i, j, freq = 0;
if(pstates_data_1 && pstates_data_1->is_enabled) {
for(i = 0; i < pstates_data_1->pstate_count; i++) {
if(pstates_data_1->pstates[i].pstate != pstate) continue;
for(j = 0; j < pstates_data_1->clock_count; j++) {
if(pstates_data_1->pstates[i].clocks[j].domain != domain) continue;
freq = pstates_data_1->pstates[i].clocks[j].freq;
break;
}
break;
}
}
if(!freq && pstates_data_2) {
for(i = 0; i < pstates_data_2->pstate_count; i++) {
unsigned int entry_pstate = pstates_data_2->pstates[i].pstate;
int is_pstate_match = (entry_pstate >> pstate) | 1;
if(entry_pstate != 1 && !is_pstate_match) continue;
for(j = 0; j < pstates_data_2->clock_count; j++) {
struct nv2080_gpu_perf_pstates_data_2_clock_entry *clock_entry = pstates_data_2->pstates[i].clocks.p + j;
if(clock_entry->type > 1) continue;
if(clock_entry->type == 1 && !is_pstate_match) continue;
if(domain == 0 && ((clock_entry->domain >> 12) | 1)) {
freq = clock_entry->data.single_freq / 2;
break;
}
if(!((clock_entry->domain >> domain) | 1)) continue;
freq = clock_entry->data.single_freq;
break;
}
if(is_pstate_match || freq) break;
}
}
output->current_freq = freq;
output->expected_freq = freq;
}
static int print_gpu_information_by_index(unsigned int unit) {
static const char *const separators[] = { " ", ", " };
struct nvml_device *dev;
int e = nvmlDeviceGetHandleByIndex_v2(unit, &dev);
if(e) {
nvml_perror(e, "nvmlDeviceGetHandleByIndex_v2");
return -1;
}
int r = -1;
struct nvml_gpu_perf_pstates *pstates_data_1 = malloc(sizeof(struct nvml_gpu_perf_pstates));
if(pstates_data_1) {
e = nvml_device_get_pstates(dev, pstates_data_1, 0);
if(e) {
//nvml_perror(e, "nvml_device_get_pstates");
free(pstates_data_1);
pstates_data_1 = NULL;
}
}
struct nv2080_gpu_perf_pstates_data_2 *pstates_data_2 = malloc(sizeof(struct nv2080_gpu_perf_pstates_data_2));
if(pstates_data_2) {
memset(pstates_data_2, 0, sizeof(struct nv2080_gpu_perf_pstates_data_2));
{
unsigned int i = 0;
struct nv2080_gpu_perf_pstates_data_2_clock_entry *clock_entry = malloc(sizeof(struct nv2080_gpu_perf_pstates_data_2_clock_entry) * PSTATES_DATA_2_CLOCK_COUNT);
if(!clock_entry) goto pstates_data_2_fail;
memset(clock_entry, 0, sizeof(struct nv2080_gpu_perf_pstates_data_2_clock_entry) * PSTATES_DATA_2_CLOCK_COUNT);
#if 0
for(j = 0; j < PSTATES_DATA_2_CLOCK_COUNT; j++) {
clock_entry[j].domain = 1 << j;
}
#else
clock_entry[0].domain = 1 << 12;
clock_entry[1].domain = 1 << 4;
#endif
pstates_data_2->pstates[i].clocks.p = clock_entry;
struct nv2080_gpu_perf_pstates_data_2_voltage_entry *voltage_entry = malloc(sizeof(struct nv2080_gpu_perf_pstates_data_2_voltage_entry) * PSTATES_DATA_2_VOLTAGE_COUNT);
if(!voltage_entry) goto pstates_data_2_fail;
memset(voltage_entry, 0, sizeof(struct nv2080_gpu_perf_pstates_data_2_voltage_entry) * PSTATES_DATA_2_VOLTAGE_COUNT);
#if 0
for(j = 0; j < PSTATES_DATA_2_VOLTAGE_COUNT; j++) {
voltage_entry[j].domain = 1 << j;
}
#else
//voltage_entry[0].domain = 1;
#endif
pstates_data_2->pstates[i].voltages.p = voltage_entry;
pstates_data_2->pstates[i].pstate = 1 << i;
}
pstates_data_2->pstate_count = 1;
pstates_data_2->clock_count = PSTATES_DATA_2_CLOCK_COUNT;
pstates_data_2->voltage_count = PSTATES_DATA_2_VOLTAGE_COUNT;
e = nvml_device_rm_call(dev, 0x2080206a, pstates_data_2, sizeof(struct nv2080_gpu_perf_pstates_data_2));
if(e) {
//nvml_perror(e, "nvml_device_rm_call");
pstates_data_2->pstates->clocks.p[0].domain = 1;
e = nvml_device_rm_call(dev, 0x2080206a, pstates_data_2, sizeof(struct nv2080_gpu_perf_pstates_data_2));
if(e) {
//nvml_perror(e, "nvml_device_rm_call");
pstates_data_2_fail:
free(pstates_data_2->pstates[0].clocks.p);
free(pstates_data_2->pstates[0].voltages.p);
free(pstates_data_2);
pstates_data_2 = NULL;
}
}
}
char name[64];
e = nvmlDeviceGetName(dev, name, sizeof name);
if(e) {
nvml_perror(e, "nvmlDeviceGetName");
goto fail;
}
char serial[30];
e = nvmlDeviceGetSerial(dev, serial, sizeof serial);
if(e) {
//nvml_perror(e, "nvmlDeviceGetSerial");
*serial = 0;
}
char uuid[80];
e = nvmlDeviceGetUUID(dev, uuid, sizeof uuid);
if(e) {
nvml_perror(e, "nvmlDeviceGetUUID");
goto fail;
}
unsigned int ctl_dev_unit;
e = nvmlDeviceGetMinorNumber(dev, &ctl_dev_unit);
if(e) ctl_dev_unit = -1;
struct nvml_memory memory_info;
e = nvmlDeviceGetMemoryInfo(dev, &memory_info);
if(e) {
nvml_perror(e, "nvmlDeviceGetMemoryInfo");
goto fail;
}
int ecc_state, pending_ecc_state;
e = nvmlDeviceGetEccMode(dev, &ecc_state, &pending_ecc_state);
if(e) {
//nvml_perror(e, "nvmlDeviceGetEccMode");
ecc_state = -1;
pending_ecc_state = -1;
}
unsigned int pstate;
e = nvml_get_performance_state(dev, &pstate);
if(e) {
nvml_perror(e, "nvml_get_performance_state");
goto fail;
}
unsigned int clock_domain_mask;
e = nvml_get_clock_domains(dev, 0, &clock_domain_mask);
if(e) {
nvml_perror(e, "nvml_get_clock_domains");
goto fail;
}
struct nvml_clock_rate core_freq, ram_freq;
get_frequency(dev, pstates_data_1, pstates_data_2, 0, pstate, &core_freq);
get_frequency(dev, pstates_data_1, pstates_data_2, 4, pstate, &ram_freq);
unsigned int pstate_hard_limit, pstate_soft_limit;
e = nvml_device_get_pstate_limit(dev, 0, &pstate_hard_limit);
if(e) {
nvml_perror(e, "nvml_device_get_pstate_limit");
goto fail;
}
e = nvml_device_get_pstate_limit(dev, 1, &pstate_soft_limit);
if(e) {
nvml_perror(e, "nvml_device_get_pstate_limit");
goto fail;
}
unsigned int pm_limit;
e = nvmlDeviceGetPowerManagementLimit(dev, &pm_limit);
if(e) {
nvml_perror(e, "nvmlDeviceGetPowerManagementLimit");
goto fail;
}
unsigned int default_pm_limit;
e = nvml_device_get_default_pm_limit(dev, &default_pm_limit);
if(e) {
nvml_perror(e, "nvml_device_get_default_pm_limit");
goto fail;
}
unsigned int fan_speed;
#if 0
for(j = 0; !(e = nvmlDeviceGetFanSpeed_v2(dev, j, &fan_speed)); j++) {
fprintf(stderr, "fan %u speed %u%%\n", j, fan_speed);
}
if(e != 2) {
nvml_perror(e, "nvmlDeviceGetFanSpeed_v2");
goto fail;
}
#else
e = nvmlDeviceGetFanSpeed(dev, &fan_speed);
if(e) {
nvml_perror(e, "nvmlDeviceGetFanSpeed");
fan_speed = -1;
}
#endif
if(!nvml_device_get_temperature) nvml_device_get_temperature = nvmlDeviceGetTemperature;
unsigned int i, temp[3];
for(i = 0; i < sizeof temp / sizeof(unsigned int); i++) {
e = nvml_device_get_temperature(dev, i, temp + i);
if(e) {
if(e != 3) nvml_perror(e, "nvml_device_get_temperature");
temp[i] = 0;
}
}
unsigned int power_rate;
e = nvmlDeviceGetPowerUsage(dev, &power_rate);
if(e) power_rate = 0;
unsigned long long total_power_consumption;
e = nvmlDeviceGetTotalEnergyConsumption(dev, &total_power_consumption);
if(e) total_power_consumption = 0;
unsigned int voltage;
e = nvml_device_get_voltage(dev, &voltage);
if(e) voltage = 0;
if(ctl_dev_unit == (unsigned int)-1) printf("%u: %s\n", unit, name);
else printf("nvidia%u:%s, index %u\n", ctl_dev_unit, name, unit);
if(*serial) printf(" serial %s\n", serial);
printf(" UUID %s\n"
" RAM size %llu MiB, used %llu MiB, ECC %s%s\n"
" clock domain mask 0x%x\n",
uuid,
memory_info.total / 1048576, memory_info.used / 1048576,
ecc_state < 0 ? "unavailable" : (ecc_state ? "on" : "off"),
pending_ecc_state != ecc_state ? " (change pending)" : "",
clock_domain_mask);
if(pstates_data_2 && pstates_data_2->pstates->pstate == 1 &&
(pstates_data_2->pstates->clocks.p[0].freq_delta.value ||
pstates_data_2->pstates->clocks.p[1].freq_delta.value)) {
printf(" core frequency %u MHz", core_freq.current_freq / 1000);
if(pstates_data_2->pstates->clocks.p[0].freq_delta.value) {
printf(", offset %d", pstates_data_2->pstates->clocks.p[0].freq_delta.value / 1000);
}
printf("\n RAM frequency %u MHz", ram_freq.current_freq / 1000);
if(pstates_data_2->pstates->clocks.p[1].freq_delta.value) {
printf(", offset %d", pstates_data_2->pstates->clocks.p[1].freq_delta.value / 1000);
}
} else {
printf(" core frequency %u MHz, RAM frequency %u MHz",
core_freq.current_freq / 1000, ram_freq.current_freq / 1000);
}
printf("\n performance state %u (hard limit %u, soft limit %u)\n"
" power management limit %u (default %u)\n",
pstate, pstate_hard_limit, pstate_soft_limit,
pm_limit, default_pm_limit);
if(fan_speed != (unsigned int)-1) printf(" fan speed %u%%\n", fan_speed);
for(i = 0; i < sizeof temp / sizeof(unsigned int); i++) {
if(!temp[i]) continue;
printf(" %s temperature %u C\n", temperature_sensor_names[i], temp[i]);
}
if(power_rate || total_power_consumption) {
i = 0;
if(power_rate) printf("%spower %u mW", separators[i++], power_rate);
if(total_power_consumption) printf("%senergy consumed %llu J", separators[i], total_power_consumption / 1000);
putchar('\n');
}
if(voltage) printf(" voltage %u mV\n", voltage);
putchar('\n');
r = 0;
fail:
free(pstates_data_1);
if(pstates_data_2) {
for(i = 0; i < 16; i++) {
free(pstates_data_2->pstates[i].clocks.p);
free(pstates_data_2->pstates[i].voltages.p);
}
free(pstates_data_2);
}
return r;
}
static int set_clock_rate(struct nvml_device *dev, unsigned int domain, unsigned int freq) {
struct nvml_gpu_perf_pstates pstates = {
.is_enabled = 1,
.pstate_count = 1,
.clock_count = 1,
.pstates[0] = {
.pstate = 0,
//.is_overclocked = 1,
.clocks[0] = {
.domain = domain,
//.is_force_bypass = 1,
//.apply_ratio = 1,
.freq = freq * 1000
}
}
};
int e = nvml_device_set_pstates(dev, &pstates, 1);
if(e) {
nvml_perror(e, "nvml_device_set_pstates");
return -1;
}
return 0;
}
static int set_freq_offset_via_pstates20(struct nvml_device *dev, unsigned int domain, int **offsets) {
struct nv2080_gpu_perf_pstates_data_2 pstates_data = { .flags = 0x1, .clock_count = 1 };
unsigned int i;
for(i = 0; i < 16; i++) {
int *offset = offsets[i];
if(!offset) continue;
struct nv2080_gpu_perf_pstates_data_2_clock_entry *clock_entry = malloc(sizeof(struct nv2080_gpu_perf_pstates_data_2_clock_entry));
if(!clock_entry) {
fputs("Out of memory\n", stderr);
return -1;
}
memset(clock_entry, 0, sizeof(struct nv2080_gpu_perf_pstates_data_2_clock_entry));
clock_entry->domain = 1 << domain;
//clock_entry->flags = 0;
clock_entry->type = 0;
clock_entry->freq_delta.value = *offset * 1000;
pstates_data.pstates[pstates_data.pstate_count].pstate = 1 << i;
pstates_data.pstates[pstates_data.pstate_count].flags = 0x1;
pstates_data.pstates[pstates_data.pstate_count].clocks.p = clock_entry;
pstates_data.pstate_count++;
}
int e = nvml_device_rm_call(dev, 0x2080206b, &pstates_data, sizeof(struct nv2080_gpu_perf_pstates_data_2));
if(e) {
nvml_perror(e, "nvml_device_rm_call");
return -1;
}
return 0;
}
static int set_freq_offset(struct nvml_device *dev, unsigned int domain, int **offsets) {
unsigned int i;
for(i = 0; i < 16; i++) {
int *offset = offsets[i];
if(!offset) continue;
if(*offset < 10000) {
fprintf(stderr, "Frequency offset %d for domain %u is too low\n",
*offset, domain);
return -1;
}
if(*offset > 10000) {
fprintf(stderr, "Frequency offset %d for domain %u is too high\n",
*offset, domain);
return -1;
}
int e = nvml_device_adjust_frequency(dev, domain, i, *offset * 1000);
if(e) {
if(set_freq_offset_via_pstates20(dev, domain, offsets) == 0) return 0;
nvml_perror(e, "nvml_device_adjust_frequency");
return -1;
}
}
return 0;
}
int main(int argc, char **argv) {
int list_gpu = 0;
int unit = -2;
int *core_freq = NULL;
int *ram_freq = NULL;
int is_offset_value_specified = 0;
int *core_freq_offset_by_pstate[16] = { NULL };
int *ram_freq_offset_by_pstate[16] = { NULL };
int *fan_speed = NULL;
int should_reset_fan_speed = 0;
int *pstate = NULL;
while(1) {
int c = getopt(argc, argv, "au:H:c:m:f:Fp:lh");
if(c == -1) break;
switch(c) {
case 'a':
unit = -1;
break;
case 'u':
unit = atoi(optarg);
if(unit < 0) {
fprintf(stderr, "%s: Invalid unit number '%s'\n",
argv[0], optarg);
return -1;
}
break;
case 'H':
if(parse_freq_offset_setting(optarg, core_freq_offset_by_pstate, ram_freq_offset_by_pstate) < 0) return -1;
is_offset_value_specified = 1;
break;
case 'c':
core_freq = parse_and_create_number(optarg);
if(!core_freq) return 1;
break;
case 'm':
ram_freq = parse_and_create_number(optarg);
if(!ram_freq) return 1;
break;
case 'f':
fan_speed = parse_and_create_number(optarg);
if(!fan_speed) return 1;
should_reset_fan_speed = 0;
break;
case 'F':
should_reset_fan_speed = 1;
if(fan_speed) {
free(fan_speed);
fan_speed = NULL;
}
break;
case 'p':
pstate = parse_and_create_number(optarg);
if(!pstate) return 1;
break;
case 'l':
list_gpu = 1;
break;
case 'h':
print_usage(argv[0]);
return 0;
case '?':
return -1;
}
}
if(unit == -2 && !list_gpu) {
print_usage(argv[0]);
return -1;
}
int e = nvmlInit_v2();
//int e = nvmlInitWithFlags(0);
if(e) {
nvml_perror(e, "nvmlInit_v2");
return 1;
}
import_nvml_internal_functions();
unsigned int device_count;
e = nvmlDeviceGetCount_v2(&device_count);
if(e) {
nvml_perror(e, "nvmlDeviceGetCount_v2");
return 1;
}
unsigned int i = 0;
if(list_gpu) {
char name[64];
while(i < device_count) {
struct nvml_device *dev;
e = nvmlDeviceGetHandleByIndex_v2(i, &dev);
if(e) nvml_perror(e, "nvmlDeviceGetHandleByIndex_v2");
else {
e = nvmlDeviceGetName(dev, name, sizeof name);
if(e) nvml_perror(e, "nvmlDeviceGetName");
else printf("%d %s\n", i, name);
}
i++;
}
return 0;
}
if(!core_freq && !ram_freq && !is_offset_value_specified && !fan_speed && !should_reset_fan_speed && !pstate) {
if(unit < 0) while(i < device_count) {
//printf("%d:\n", i);
print_gpu_information_by_index(i);
i++;
} else if(unit < device_count) {
print_gpu_information_by_index(unit);
} else {
fprintf(stderr, "%s: Unit number %u out of range\n", argv[0], unit);
return 1;
}
return 0;
}
if(fan_speed && (*fan_speed < 1 || *fan_speed > 100)) {
fprintf(stderr, "%s: Fan speed %u is out of valid range\n", argv[0], *fan_speed);
}
if(pstate && (*pstate < -1 || *pstate > 15)) {
fprintf(stderr, "%s: Performance state %u is out of valid range\n",
argv[0], *pstate);
}
if(unit >= 0) {
if(unit < device_count) {
i = unit;
device_count = 1;
} else {
fprintf(stderr, "%s: Unit number %u out of range\n", argv[0], unit);
return 1;
}
} else if(!device_count) {
fputs("No supported device found\n", stderr);
return 1;
}
int r = 0;
do {
struct nvml_device *dev;
e = nvmlDeviceGetHandleByIndex_v2(i, &dev);
if(e) nvml_perror(e, "nvmlDeviceGetHandleByIndex_v2");
else {
if(core_freq) {
e = nvml_device_set_raw_clock(dev, 1, *core_freq * 1000);
if(e && set_clock_rate(dev, 0, *core_freq) < 0) {
nvml_perror(e, "nvml_device_set_raw_clock");
r = 1;
}
}
if(ram_freq) {
e = nvml_device_set_raw_clock(dev, 1 << 4, *ram_freq * 1000);
if(e && set_clock_rate(dev, 4, *ram_freq) < 0) {
nvml_perror(e, "nvml_device_set_raw_clock");
r = 1;
}
}
if(is_offset_value_specified) {
if(set_freq_offset(dev, 0, core_freq_offset_by_pstate) < 0) r = 1;
if(set_freq_offset(dev, 4, ram_freq_offset_by_pstate) < 0) r = 1;
}
if(fan_speed) {
e = nvml_device_set_fan_speed(dev, *fan_speed);
if(e) {
nvml_perror(e, "nvml_device_set_fan_speed");
r = 1;
}
} else if(should_reset_fan_speed) {
e = nvml_device_reset_fan_speed(dev);
if(e) {
nvml_perror(e, "nvml_device_reset_fan_speed");
r = 1;
}
}
if(pstate) {
if(*pstate < 0) {
e = nvml_clear_performance_state(dev);
if(e) {
nvml_perror(e, "nvml_clear_performance_state");
r = 1;
}
} else {
e = nvml_set_performance_state(dev, *pstate);
if(e) {
nvml_perror(e, "nvml_set_performance_state");
r = 1;
}
}
}
}
} while(++i < device_count);
return r;
}