gpu/darwin: pure go implemention (#57)

This commit is contained in:
UUBulb 2024-08-26 22:57:07 +08:00 committed by GitHub
parent f03671686f
commit 42187f2bf2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 230 additions and 340 deletions

1
go.mod
View File

@ -11,6 +11,7 @@ require (
github.com/creack/pty v1.1.21
github.com/dean2021/goss v0.0.0-20230129073947-df90431348f1
github.com/ebi-yade/altsvc-go v0.1.1
github.com/ebitengine/purego v0.7.1
github.com/go-ping/ping v1.1.0
github.com/iamacarpet/go-winpty v1.0.4
github.com/jaypipes/ghw v0.12.0

2
go.sum
View File

@ -37,6 +37,8 @@ github.com/dean2021/goss v0.0.0-20230129073947-df90431348f1 h1:5UiJ324LiCdOF/3w/
github.com/dean2021/goss v0.0.0-20230129073947-df90431348f1/go.mod h1:NiLueuVb3hYcdF4ta+2ezcKJh6BEjhrBz9Hts6XJ5Sc=
github.com/ebi-yade/altsvc-go v0.1.1 h1:HmZDNb5ZOPlkyXhi34LnRckawFCux7yPYw+dtInIixo=
github.com/ebi-yade/altsvc-go v0.1.1/go.mod h1:K/U20bLcsOVrbTeDhqRjp+e3tgNT5iAqSiQzPoU0/Q0=
github.com/ebitengine/purego v0.7.1 h1:6/55d26lG3o9VCZX8lping+bZcmShseiqlh2bnUDiPA=
github.com/ebitengine/purego v0.7.1/go.mod h1:ah1In8AOtksoNK6yk5z1HTJeUkC1Ez4Wk2idgGslMwQ=
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=

View File

@ -1,53 +1,225 @@
//go:build darwin && !cgo
//go:build darwin
package gpu
import (
"os/exec"
"regexp"
"strings"
"fmt"
"unsafe"
"github.com/ebitengine/purego"
)
func extractGPUInfo(cmd *exec.Cmd) ([]string, error) {
gi, err := cmd.CombinedOutput()
if err != nil {
return nil, err
}
type (
CFStringEncoding = uint32
CFIndex = int32
CFTypeID = int32
CFNumberType = CFIndex
CFTypeRef = unsafe.Pointer
CFStringRef = unsafe.Pointer
CFDictionaryRef = unsafe.Pointer
re := regexp.MustCompile(`"model"\s*=\s*["<]?"([^">]+)"[">]?`)
matches := re.FindAllSubmatch(gi, -1)
var modelNames []string
for _, match := range matches {
if len(match) > 1 {
modelNames = append(modelNames, string(match[1]))
}
}
return modelNames, nil
machPort = uint32
ioIterator = uint32
ioObject = uint32
ioRegistryEntry = uint32
ioService = uint32
IOOptionBits = uint32
)
type (
CFStringCreateWithCStringFunc = func(alloc uintptr, cStr string, encoding CFStringEncoding) CFStringRef
CFGetTypeIDFunc = func(cf uintptr) CFTypeID
CFStringGetTypeIDFunc = func() CFTypeID
CFStringGetCStringFunc = func(cfStr uintptr, buffer *byte, size CFIndex, encoding CFStringEncoding) bool
CFDictionaryGetTypeIDFunc = func() CFTypeID
CFDictionaryGetValueFunc = func(dict, key uintptr) unsafe.Pointer
CFNumberGetValueFunc = func(number uintptr, theType CFNumberType, valuePtr uintptr) bool
CFReleaseFunc = func(cf uintptr)
IOServiceGetMatchingServicesFunc = func(mainPort machPort, matching uintptr, existing *ioIterator) ioService
IOIteratorNextFunc = func(iterator ioIterator) ioObject
IOServiceMatchingFunc = func(name string) CFDictionaryRef
IORegistryEntrySearchCFPropertyFunc = func(entry ioRegistryEntry, plane string, key, allocator uintptr, options IOOptionBits) CFTypeRef
IOObjectReleaseFunc = func(object ioObject) int
)
const (
KERN_SUCCESS = 0
MACH_PORT_NULL = 0
IOSERVICE_GPU = "IOAccelerator"
IOSERVICE_PCI = "IOPCIDevice"
kIOServicePlane = "IOService"
kIORegistryIterateRecursively = 1
kCFStringEncodingUTF8 = 0x08000100
kCFNumberIntType = 9
)
var (
kCFAllocatorDefault uintptr = 0
kIOMainPortDefault machPort = 0
)
var (
coreFoundation, _ = purego.Dlopen("/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation", purego.RTLD_LAZY|purego.RTLD_GLOBAL)
ioKit, _ = purego.Dlopen("/System/Library/Frameworks/IOKit.framework/IOKit", purego.RTLD_LAZY|purego.RTLD_GLOBAL)
cfStringCreateWithCString, _ = purego.Dlsym(coreFoundation, "CFStringCreateWithCString")
cfGetTypeID, _ = purego.Dlsym(coreFoundation, "CFGetTypeID")
cfStringGetTypeID, _ = purego.Dlsym(coreFoundation, "CFStringGetTypeID")
cfStringGetCString, _ = purego.Dlsym(coreFoundation, "CFStringGetCString")
cfDictionaryGetTypeID, _ = purego.Dlsym(coreFoundation, "CFDictionaryGetTypeID")
cfDictionaryGetValue, _ = purego.Dlsym(coreFoundation, "CFDictionaryGetValue")
cfNumberGetValue, _ = purego.Dlsym(coreFoundation, "CFNumberGetValue")
cfRelease, _ = purego.Dlsym(coreFoundation, "CFRelease")
ioServiceGetMatchingServices, _ = purego.Dlsym(ioKit, "IOServiceGetMatchingServices")
ioIteratorNext, _ = purego.Dlsym(ioKit, "IOIteratorNext")
ioServiceMatching, _ = purego.Dlsym(ioKit, "IOServiceMatching")
ioRegistryEntrySearchCFProperty, _ = purego.Dlsym(ioKit, "IORegistryEntrySearchCFProperty")
ioObjectRelease, _ = purego.Dlsym(ioKit, "IOObjectRelease")
)
var (
CFStringCreateWithCString CFStringCreateWithCStringFunc
CFGetTypeID CFGetTypeIDFunc
CFStringGetTypeID CFStringGetTypeIDFunc
CFStringGetCString CFStringGetCStringFunc
CFDictionaryGetTypeID CFDictionaryGetTypeIDFunc
CFDictionaryGetValue CFDictionaryGetValueFunc
CFNumberGetValue CFNumberGetValueFunc
CFRelease CFReleaseFunc
IOServiceGetMatchingServices IOServiceGetMatchingServicesFunc
IOIteratorNext IOIteratorNextFunc
IOServiceMatching IOServiceMatchingFunc
IORegistryEntrySearchCFProperty IORegistryEntrySearchCFPropertyFunc
IOObjectRelease IOObjectReleaseFunc
)
func init() {
purego.RegisterFunc(&CFStringCreateWithCString, cfStringCreateWithCString)
purego.RegisterFunc(&CFGetTypeID, cfGetTypeID)
purego.RegisterFunc(&CFStringGetTypeID, cfStringGetTypeID)
purego.RegisterFunc(&CFStringGetCString, cfStringGetCString)
purego.RegisterFunc(&CFDictionaryGetTypeID, cfDictionaryGetTypeID)
purego.RegisterFunc(&CFDictionaryGetValue, cfDictionaryGetValue)
purego.RegisterFunc(&CFNumberGetValue, cfNumberGetValue)
purego.RegisterFunc(&CFRelease, cfRelease)
purego.RegisterFunc(&IOServiceGetMatchingServices, ioServiceGetMatchingServices)
purego.RegisterFunc(&IOIteratorNext, ioIteratorNext)
purego.RegisterFunc(&IOServiceMatching, ioServiceMatching)
purego.RegisterFunc(&IORegistryEntrySearchCFProperty, ioRegistryEntrySearchCFProperty)
purego.RegisterFunc(&IOObjectRelease, ioObjectRelease)
}
func GetGPUModel() ([]string, error) {
vendorNames := []string{
"AMD", "Intel", "Nvidia", "Apple",
return findDevices("model")
}
func FindUtilization(key, dictKey string) (int, error) {
return findUtilization(key, dictKey)
}
func findDevices(key string) ([]string, error) {
var iterator ioIterator
var results []string
iv := IOServiceGetMatchingServices(kIOMainPortDefault, uintptr(IOServiceMatching(IOSERVICE_GPU)), &iterator)
if iv != KERN_SUCCESS {
return nil, fmt.Errorf("error retrieving GPU entry")
}
ioreg := exec.Command("ioreg", "-rd1", "-c", "IOAccelerator")
gi, err := extractGPUInfo(ioreg)
if err != nil || len(gi) == 0 {
ioreg = exec.Command("ioreg", "-rd1", "-c", "IOPCIDevice")
gi, err = extractGPUInfo(ioreg)
if err != nil {
return nil, err
var service ioObject
index := 0
for {
service = IOIteratorNext(iterator)
if service == MACH_PORT_NULL {
break
}
}
var gpuModel []string
for _, model := range gi {
for _, vendor := range vendorNames {
if strings.Contains(model, vendor) {
gpuModel = append(gpuModel, model)
break
cfStr := CFStringCreateWithCString(kCFAllocatorDefault, key, kCFStringEncodingUTF8)
result, _, _ := findProperties(service, uintptr(cfStr), 0)
IOObjectRelease(service)
if result != nil {
results = append(results, string(result))
index++
} else if key == "model" {
IOObjectRelease(iterator)
iv = IOServiceGetMatchingServices(kIOMainPortDefault, uintptr(IOServiceMatching(IOSERVICE_PCI)), &iterator)
if iv != KERN_SUCCESS {
return nil, fmt.Errorf("error retrieving GPU entry")
}
}
}
return gpuModel, nil
IOObjectRelease(iterator)
return results, nil
}
func findUtilization(key, dictKey string) (int, error) {
var iterator ioIterator
var result int
var err error
iv := IOServiceGetMatchingServices(kIOMainPortDefault, uintptr(IOServiceMatching(IOSERVICE_GPU)), &iterator)
if iv != KERN_SUCCESS {
return 0, fmt.Errorf("error retrieving GPU entry")
}
// Only retrieving the utilization of first GPU here
service := IOIteratorNext(iterator)
if service != MACH_PORT_NULL {
cfStr := CFStringCreateWithCString(kCFAllocatorDefault, key, CFStringEncoding(kCFStringEncodingUTF8))
cfDictStr := CFStringCreateWithCString(kCFAllocatorDefault, dictKey, CFStringEncoding(kCFStringEncodingUTF8))
_, result, err = findProperties(service, uintptr(cfStr), uintptr(cfDictStr))
CFRelease(uintptr(cfStr))
CFRelease(uintptr(cfDictStr))
if err != nil {
return 0, fmt.Errorf("failed retrieving GPU utilization: %v", err)
}
} else {
IOObjectRelease(service)
IOObjectRelease(iterator)
return 0, fmt.Errorf("no GPU utilization entry found")
}
IOObjectRelease(service)
IOObjectRelease(iterator)
return result, nil
}
func findProperties(service ioRegistryEntry, key, dictKey uintptr) ([]byte, int, error) {
properties := IORegistryEntrySearchCFProperty(service, kIOServicePlane, key, kCFAllocatorDefault, kIORegistryIterateRecursively)
ptrValue := uintptr(properties)
if properties != nil {
switch CFGetTypeID(ptrValue) {
// model
case CFStringGetTypeID():
buf := make([]byte, 1024)
CFStringGetCString(ptrValue, &buf[0], int32(unsafe.Sizeof(buf)), uint32(kCFStringEncodingUTF8))
CFRelease(ptrValue)
return buf, 0, nil
// PerformanceStatistics
case CFDictionaryGetTypeID():
cfValue := CFDictionaryGetValue(ptrValue, dictKey)
if cfValue != nil {
var value int
if CFNumberGetValue(uintptr(cfValue), kCFNumberIntType, uintptr(unsafe.Pointer(&value))) {
return nil, value, nil
} else {
return nil, 0, fmt.Errorf("failed to exec CFNumberGetValue")
}
} else {
return nil, 0, fmt.Errorf("failed to exec CFDictionaryGetValue")
}
}
}
return nil, 0, fmt.Errorf("failed to exec IORegistryEntrySearchCFProperty")
}

View File

@ -1,65 +0,0 @@
//go:build darwin && cgo
package gpu
// #cgo LDFLAGS: -framework IOKit -framework CoreFoundation
// #include "stat/gpu_darwin.h"
import "C"
import (
"errors"
"strings"
"unsafe"
)
func GoStrings(argc C.int, argv **C.char) []string {
length := int(argc)
tmpslice := unsafe.Slice(argv, length)
gostrings := make([]string, length)
for i, s := range tmpslice {
gostrings[i] = C.GoString(s)
}
return gostrings
}
func extractGPUInfo(key *C.char) ([]string, error) {
devices := C.find_devices(key)
if devices != nil {
defer C.free(unsafe.Pointer(devices))
length := 0
for {
device := *(**C.char)(unsafe.Pointer(uintptr(unsafe.Pointer(devices)) + uintptr(length)*unsafe.Sizeof(*devices)))
if device == nil {
break
}
length++
}
gpu := GoStrings(C.int(length), devices)
return gpu, nil
}
return nil, errors.New("cannot find key")
}
func GetGPUModel() ([]string, error) {
vendorNames := []string{
"AMD", "Intel", "Nvidia", "Apple",
}
key := C.CString("model")
defer C.free(unsafe.Pointer(key))
gi, err := extractGPUInfo(key)
if err != nil {
return nil, err
}
var gpuModel []string
for _, model := range gi {
for _, vendor := range vendorNames {
if strings.Contains(model, vendor) {
gpuModel = append(gpuModel, model)
break
}
}
}
return gpuModel, nil
}

View File

@ -26,7 +26,7 @@ func (rsmi *ROCmSMI) Start() error {
if _, err := os.Stat(rsmi.BinPath); os.IsNotExist(err) {
binPath, err := exec.LookPath("rocm-smi")
if err != nil {
return errors.New("Didn't find the adequate tool to query GPU utilization")
return errors.New("didn't find the adequate tool to query GPU utilization")
}
rsmi.BinPath = binPath
}

View File

@ -1,144 +0,0 @@
#include "gpu_darwin.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define IOSERVICE_GPU "IOAccelerator"
#define IOSERVICE_PCI "IOPCIDevice"
void *find_properties(io_registry_entry_t service, int depth, CFStringRef key,
CFStringRef dict_key) {
CFTypeRef properties = IORegistryEntrySearchCFProperty(
service, kIOServicePlane, key, kCFAllocatorDefault,
kIORegistryIterateRecursively);
if (properties) {
if (CFGetTypeID(properties) == CFStringGetTypeID()) {
CFStringRef cfStr = (CFStringRef)properties;
char buffer[1024];
CFStringGetCString(cfStr, buffer, sizeof(buffer), kCFStringEncodingUTF8);
CFRelease(properties);
return strdup(buffer);
} else if (CFGetTypeID(properties) == CFDictionaryGetTypeID()) {
CFDictionaryRef cfDict = (CFDictionaryRef)properties;
CFNumberRef cfValue = (CFNumberRef)CFDictionaryGetValue(cfDict, dict_key);
if (cfValue == NULL) {
return NULL;
}
int value;
if (!CFNumberGetValue(cfValue, kCFNumberIntType, &value)) {
return NULL;
}
return (void *)(intptr_t)value;
}
}
return NULL;
}
char **find_devices(char *key) {
io_service_t io_reg_err;
io_iterator_t iterator;
int capacity = 10;
char **cards = malloc(capacity * sizeof(char *));
if (!cards) {
fprintf(stderr, "Memory allocation failed\n");
return NULL;
}
io_reg_err = IOServiceGetMatchingServices(
kIOMainPortDefault, IOServiceMatching(IOSERVICE_GPU), &iterator);
if (io_reg_err != KERN_SUCCESS) {
printf("Error getting GPU entry\n");
return NULL;
}
io_object_t service;
int index = 0;
while ((service = IOIteratorNext(iterator)) != MACH_PORT_NULL) {
CFStringRef cfStr = CFStringCreateWithCString(kCFAllocatorDefault, key,
kCFStringEncodingUTF8);
char *result = find_properties(service, 0, cfStr, CFSTR(""));
CFRelease(cfStr);
IOObjectRelease(service);
if (result != NULL) {
if (index >= capacity) {
capacity += 1;
char **new_cards = (char **)realloc(cards, capacity * sizeof(char *));
if (!new_cards) {
fprintf(stderr, "Memory reallocation failed\n");
for (int i = 0; i < index; i++) {
free(cards[i]);
}
free(cards);
free(result);
return NULL;
}
cards = new_cards;
}
cards[index] = result;
index++;
}
if (result == NULL && strcmp(key, "model") == 0) {
IOObjectRelease(iterator);
io_reg_err = IOServiceGetMatchingServices(
kIOMainPortDefault, IOServiceMatching(IOSERVICE_PCI), &iterator);
if (io_reg_err != KERN_SUCCESS) {
printf("Error getting PCI entry\n");
return NULL;
}
}
}
IOObjectRelease(iterator);
char **result_cards = (char **)realloc(cards, sizeof(char *) * (index + 1));
if (!result_cards) {
fprintf(stderr, "Memory reallocation failed\n");
for (int i = 0; i < index; i++) {
free(cards[i]);
}
free(cards);
return NULL;
}
result_cards[index] = NULL;
return result_cards;
}
int find_utilization(char *key, char *dict_key) {
void *result_ptr;
io_service_t io_reg_err;
io_iterator_t iterator;
io_reg_err = IOServiceGetMatchingServices(
kIOMainPortDefault, IOServiceMatching(IOSERVICE_GPU), &iterator);
if (io_reg_err != KERN_SUCCESS) {
printf("Error getting GPU entry\n");
return 0;
}
io_object_t service = IOIteratorNext(iterator);
if (service != MACH_PORT_NULL) {
CFStringRef cfStr = CFStringCreateWithCString(kCFAllocatorDefault, key,
kCFStringEncodingUTF8);
CFStringRef cfDictStr = CFStringCreateWithCString(
kCFAllocatorDefault, dict_key, kCFStringEncodingUTF8);
result_ptr = find_properties(service, 0, cfStr, cfDictStr);
CFRelease(cfStr);
CFRelease(cfDictStr);
}
IOObjectRelease(service);
IOObjectRelease(iterator);
if (result_ptr == NULL) {
return 0;
}
return (int)(intptr_t)result_ptr;
}

View File

@ -1,15 +0,0 @@
#ifndef __SMC_H__
#define __SMC_H__ 1
#include <IOKit/IOKitLib.h>
#include <CoreFoundation/CoreFoundation.h>
#if (defined __MAC_OS_X_VERSION_MIN_REQUIRED) && (__MAC_OS_X_VERSION_MIN_REQUIRED < 120000)
#define kIOMainPortDefault kIOMasterPortDefault
#endif
void *find_properties(io_registry_entry_t, int, CFStringRef, CFStringRef);
char **find_devices(char *);
int find_utilization(char *, char *);
#endif

View File

@ -26,7 +26,7 @@ func (smi *NvidiaSMI) Start() error {
if _, err := os.Stat(smi.BinPath); os.IsNotExist(err) {
binPath, err := exec.LookPath("nvidia-smi")
if err != nil {
return errors.New("Didn't find the adequate tool to query GPU utilization")
return errors.New("didn't find the adequate tool to query GPU utilization")
}
smi.BinPath = binPath
}

View File

@ -1,36 +1,12 @@
//go:build darwin && !cgo
//go:build darwin
package stat
import (
"os/exec"
"regexp"
"strconv"
"github.com/nezhahq/agent/pkg/gpu"
)
func extractGPUStat(cmd *exec.Cmd) ([]float64, error) {
gs, err := cmd.CombinedOutput()
if err != nil {
return nil, err
}
re := regexp.MustCompile(`"Device Utilization %"\s*=\s*(\d+)`)
matches := re.FindAllSubmatch(gs, -1)
var u []float64
for _, match := range matches {
if len(match) > 1 {
p, _ := strconv.ParseFloat(string(match[1]), 64)
u = append(u, p)
}
}
return u, nil
}
func GetGPUStat() (float64, error) {
ioreg := exec.Command("ioreg", "-rd1", "-c", "IOAccelerator")
gs, err := extractGPUStat(ioreg)
if err != nil || len(gs) == 0 {
return 0, err
}
return gs[0], nil
usage, err := gpu.FindUtilization("PerformanceStatistics", "Device Utilization %")
return float64(usage), err
}

View File

@ -1,25 +0,0 @@
//go:build darwin && cgo
package stat
// #cgo LDFLAGS: -framework IOKit -framework CoreFoundation
// #include "gpu_darwin.h"
import "C"
import (
"unsafe"
)
func extractGPUStat(key *C.char, dict_key *C.char) (int, error) {
utilization := C.find_utilization(key, dict_key)
return int(utilization), nil
}
func GetGPUStat() (float64, error) {
key := C.CString("PerformanceStatistics")
dict_key := C.CString("Device Utilization %")
defer C.free(unsafe.Pointer(key))
defer C.free(unsafe.Pointer(dict_key))
gs, _ := extractGPUStat(key, dict_key)
return float64(gs), nil
}

View File

@ -2,7 +2,6 @@ package monitor
import (
"fmt"
"math"
"os/exec"
"runtime"
"strconv"
@ -42,7 +41,6 @@ var (
var (
netInSpeed, netOutSpeed, netInTransfer, netOutTransfer, lastUpdateNetStats uint64
cachedBootTime time.Time
gpuStat uint64
temperatureStat []model.SensorTemperature
)
@ -64,7 +62,6 @@ var statDataFetchAttempts = map[string]int{
}
var (
updateGPUStatus int32
updateTempStatus int32
)
@ -219,10 +216,7 @@ func GetState(skipConnectionCount bool, skipProcsCount bool) *model.HostState {
ret.Temperatures = temperatureStat
}
if agentConfig.GPU {
go updateGPUStat(&gpuStat)
ret.GPU = math.Float64frombits(gpuStat)
}
ret.GPU = updateGPUStat()
ret.NetInTransfer, ret.NetOutTransfer = netInTransfer, netOutTransfer
ret.NetInSpeed, ret.NetOutSpeed = netInSpeed, netOutSpeed
@ -350,23 +344,21 @@ func getConns(skipConnectionCount bool) (tcpConnCount, udpConnCount uint64) {
return tcpConnCount, udpConnCount
}
func updateGPUStat(gpuStat *uint64) {
if !atomic.CompareAndSwapInt32(&updateGPUStatus, 0, 1) {
return
}
defer atomic.StoreInt32(&updateGPUStatus, 0)
if statDataFetchAttempts["GPU"] < maxDeviceDataFetchAttempts {
gs, err := gpustat.GetGPUStat()
if err != nil {
statDataFetchAttempts["GPU"]++
printf("gpustat.GetGPUStat error: %v, attempt: %d", err, statDataFetchAttempts["GPU"])
atomicStoreFloat64(gpuStat, gs)
} else {
statDataFetchAttempts["GPU"] = 0
atomicStoreFloat64(gpuStat, gs)
func updateGPUStat() float64 {
if agentConfig.GPU {
if statDataFetchAttempts["GPU"] < maxDeviceDataFetchAttempts {
gs, err := gpustat.GetGPUStat()
if err != nil {
statDataFetchAttempts["GPU"]++
println("gpustat.GetGPUStat error: ", err, ", attempt: ", statDataFetchAttempts["GPU"])
return 0
} else {
statDataFetchAttempts["GPU"] = 0
return gs
}
}
}
return 0
}
func updateTemperatureStat() {
@ -406,10 +398,6 @@ func isListContainsStr(list []string, str string) bool {
return false
}
func atomicStoreFloat64(x *uint64, v float64) {
atomic.StoreUint64(x, math.Float64bits(v))
}
func printf(format string, v ...interface{}) {
util.Printf(agentConfig.Debug, format, v...)
}