9 Commits

Author SHA1 Message Date
zmy
7c304c688b fix another bug 2020-11-19 10:32:37 +08:00
zmy
356d201499 fix error when multi process on a single gpu 2020-11-19 10:22:51 +08:00
zmy
9e3bae50ba update readme 2020-11-17 21:15:07 +08:00
zmy
a047c0fd69 update readme 2020-11-17 21:14:08 +08:00
zmy
432dec365e update to 0.0.2 2020-11-17 21:09:11 +08:00
zmy
a74f51475e update display style, add customlize support 2020-11-17 21:07:40 +08:00
zmy
8d44d98e82 update 2020-11-17 16:53:52 +08:00
zmy
14cea4a287 update readme 2020-11-02 19:50:35 +08:00
zmy
bb320ac2e7 restrict platform to Linux only since it use ps command 2020-11-02 19:41:44 +08:00
4 changed files with 235 additions and 42 deletions

View File

@@ -5,24 +5,40 @@ A naive tool for observing gpu status and auto set visible gpu in python code.
## How to use ## How to use
1. install the package. 1. install the package.
``` ```shell
pip install https://git.zmy.pub/zmyme/gpuutil/archive/v0.0.1.tar.gz pip install https://git.zmy.pub/zmyme/gpuutil/archive/v0.0.2.tar.gz
``` ```
2. for observing gpu status, just input 2. for observing gpu status, just input
``` ```shell
python -m gpuutil <options> python -m gpuutil <options>
``` ```
where options can either be "brief" or "detail", and you will get something like when directly running ```python -m gpuutil```, you would probably get:
```text
+---+------+------+---------+---------+------+---------------+
|ID | Fan | Temp | Pwr | Freq | Util | Vmem |
+---+------+------+---------+---------+------+---------------+
| 0 | 22 % | 33 C | 4.47 W | 300 MHz | 0 % | 1569/11019 MiB|
| 1 | 22 % | 35 C | 3.87 W | 300 MHz | 0 % | 3/11019 MiB|
| 2 | 22 % | 36 C | 8.22 W | 300 MHz | 0 % | 3/11019 MiB|
| 3 | 22 % | 36 C | 21.82 W | 300 MHz | 0 % | 3/11019 MiB|
+---+------+------+---------+---------+------+---------------+
[34860|0] user1(783 MiB) python train.py --some -args
[38694|0] user2(783 MiB) python train.py --some --other -args
``` ```
================== GPU INFO ================== To get more information, run ```python -m gpuutil -h```, you would get:
[0] Utils: 94 % | Mem: 10166/11019 MiB(853MiB free) user1(10163MiB,pid=14018) ```text
[1] Utils: 89 % | Mem: 6690/11019 MiB(4329MiB free) user2(6687MiB,pid=19855) python __main__.py -h
[2] Utils: 0 % | Mem: 1/11019 MiB(11018MiB free) usage: __main__.py [-h] [--profile PROFILE] [--cols COLS] [--show-process SHOW_PROCESS] [--save]
[3] Utils: 0 % | Mem: 1/11019 MiB(11018MiB free)
================ PROCESS INFO ================ optional arguments:
[14018] user1(10163 MiB) python train.py --some -args -h, --help show this help message and exit
[19855] user2(6687 MiB) python train.py --some --different --args --profile PROFILE, -p PROFILE
profile keyword, corresponding configuration are saved in ~/.gpuutil.conf
--cols COLS, -c COLS colums to show
--show-process SHOW_PROCESS, -sp SHOW_PROCESS
whether show process or not
--save save config to profile
``` ```
3. To auto set visible gpu in your python code, just use the following python code. 3. To auto set visible gpu in your python code, just use the following python code.
@@ -43,4 +59,5 @@ def auto_set(num, allow_nonfree=True, ask=True, blacklist=[], show=True):
``` ```
## ps: ## ps:
you can get more detailed via accessing gpuutil.GPUStat class, for more information, just look the code. 1. you can get more detailed gpu info via accessing gpuutil.GPUStat class, for more information, just look the code.
2. Since it use ps command to get detailed process info, it can only be used on linux.

View File

@@ -1,18 +1,67 @@
from gpuutil import GPUStat from gpuutil import GPUStat
import sys import sys
import json
import argparse
import os
def csv2list(csv):
l = [col.strip() for col in csv.split(',')]
return [col for col in l if col != '']
def str2bool(s):
if s.lower() in ['t', 'yes', 'y', 'aye', 'positive', 'true']:
return True
else:
return False
def load_config():
home_dir = os.path.expanduser('~')
configpath = os.path.join(home_dir, '.gpuutil.conf')
if not os.path.isfile(configpath):
return {}
with open(configpath, 'r', encoding='utf-8') as f:
return json.load(f)
def save_config(config):
home_dir = os.path.expanduser('~')
configdir = os.path.join(home_dir, '.gpuutil.conf')
with open(configdir, 'w+', encoding='utf-8') as f:
json.dump(config, f, ensure_ascii=False, indent=4)
if __name__ == '__main__': if __name__ == '__main__':
stat = GPUStat() stat = GPUStat()
show_types = ['brief', 'detail']
default_type = 'brief'
show_type = default_type
if len(sys.argv) > 1:
show_type = str(sys.argv[1])
if show_type in show_types:
stat.show(disp_type=show_type)
else:
print('The given type is \"{0}\" not understood, and it should be choosen from {1}\nUsing default type \"{2}\".'.format(show_type, show_types, default_type))
show_type = default_type
stat.show(disp_type=show_type)
# auto_set(1, ask=True, blacklist=[], show=True) avaliable_cols = ['ID', 'Fan', 'Temp', 'TempMax', 'Pwr', 'PwrMax', 'Freq', 'FreqMax', 'Util', 'Vmem', 'UsedMem', 'TotalMem', 'FreeMem', 'Users']
recommended_cols = ['ID', 'Fan', 'Temp', 'Pwr', 'Freq', 'Util', 'Vmem']
parser = argparse.ArgumentParser()
parser.add_argument('--profile', '-p', default=None, type=str, help='profile keyword, corresponding configuration are saved in ~/.gpuutil.conf')
parser.add_argument('--cols', '-c', type=csv2list, help='colums to show')
parser.add_argument('--show-process', '-sp', default=True, type=str2bool, help='whether show process or not')
parser.add_argument('--save', default=False, action="store_true", help='save config to profile')
args = parser.parse_args()
cols = args.cols if args.cols is not None else recommended_cols
show_process = args.show_process
unexpected_cols = []
for col in cols:
if col not in avaliable_cols:
unexpected_cols.append(col)
if len(unexpected_cols) > 0:
raise ValueError('Unexpected cols {0} occured. Cols must be chosen from {1}'.format(unexpected_cols, ','.join(avaliable_cols)))
if args.save:
params = {
"cols": cols,
"show-process": show_process
}
profile = args.profile if args.profile is not None else input('Please input your profile name:\n>>> ')
config = load_config()
config[profile] = params
save_config(config)
elif args.profile is not None:
config = load_config()
if args.profile in config:
params = config[args.profile]
cols = params["cols"]
show_process = params["show-process"]
else:
raise ValueError('Profile do not exist.\nAvaliable Profiles:{0}'.format(','.join(list(config.keys()))))
stat.show(enabled_cols = cols, show_command=show_process)

View File

@@ -1,8 +1,14 @@
from io import StringIO
from sys import platform
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import os import os
import json import json
import random import random
import sys import sys
import csv
import platform
osname = platform.system()
def xml2dict(node): def xml2dict(node):
@@ -121,13 +127,14 @@ def short_gpu_info(stat, disp_type='brief'):
util=stat_disp['util'], util=stat_disp['util'],
mem=stat_disp['mem'] mem=stat_disp['mem']
) )
if len(process_info) > 0: if len(process_info) > 0:
info += ' ' info += ' '
info += process_info info += process_info
return info return info
def get_basic_process_info():
def get_basic_process_info_linux():
pipe = os.popen('ps axo user:20,pid,args:1024') pipe = os.popen('ps axo user:20,pid,args:1024')
output = pipe.read() output = pipe.read()
lines = output.split('\n')[1:] lines = output.split('\n')[1:]
@@ -145,6 +152,67 @@ def get_basic_process_info():
} }
return processes return processes
def get_basic_process_info_windows():
pipe = os.popen("tasklist /FO CSV")
content = StringIO(pipe.read())
reader = csv.reader(content, delimiter=',', quotechar='"')
content = []
for row in reader:
content.append(list(row))
processes = {}
for line in content[1:]:
name, pid, _, _, _ = line
processes[pid] = {
"user": None,
"command": name
}
return processes
def draw_table(table, header_line = 0, c_align = 'r', h_align='c', delemeter = ' | ', joint_delemeter = '-+-'):
# calculate max lengths.
num_columns = len(table[0])
def cvt_align(align, num_columns):
if type(align) is str:
if len(align) == 1:
return [align] * num_columns
elif len(align) == num_columns:
return list(align)
else:
raise ValueError('align flag length mismatch')
else:
return align
c_align = cvt_align(c_align, num_columns)
h_align = cvt_align(h_align, num_columns)
max_lengths = [0] * num_columns
for row in table:
for i, col in enumerate(row):
if len(col) > max_lengths[i]:
max_lengths[i] = len(col)
width = sum(max_lengths) + num_columns * len(delemeter) + 1
hline = '+'
hline += joint_delemeter.join(['-' * length for length in max_lengths])
hline += '+\n'
info = hline
for i, row in enumerate(table):
info += '|'
row_just = []
align = h_align if i <= header_line else c_align
for w, col, a in zip(max_lengths, row, align):
if a == 'c':
row_just.append(col.center(w))
elif a == 'l':
row_just.append(col.ljust(w))
elif a == 'r':
row_just.append(col.rjust(w))
info += delemeter.join(row_just)
info += '|\n'
if i == header_line:
info += hline
info += hline
return info
class GPUStat(): class GPUStat():
def __init__(self): def __init__(self):
self.gpus = [] self.gpus = []
@@ -155,6 +223,11 @@ class GPUStat():
self.cuda_version = '' self.cuda_version = ''
self.attached_gpus = '' self.attached_gpus = ''
self.driver_version = '' self.driver_version = ''
def get_process_info(self):
if osname == 'Windows':
return get_basic_process_info_windows()
elif osname == 'Linux':
return get_basic_process_info_linux()
def parse(self): def parse(self):
self.raw_info = parse_nvsmi_info('nvidia-smi -q -x') self.raw_info = parse_nvsmi_info('nvidia-smi -q -x')
self.detailed_info = {} self.detailed_info = {}
@@ -165,7 +238,7 @@ class GPUStat():
if type(value) is not list: if type(value) is not list:
value = [value] value = [value]
self.detailed_info[key] = [parse_gpu_info(info) for info in value] self.detailed_info[key] = [parse_gpu_info(info) for info in value]
self.process_info = get_basic_process_info() self.process_info = self.get_process_info()
self.simplified_info = { self.simplified_info = {
"driver_version": self.detailed_info["driver_version"], "driver_version": self.detailed_info["driver_version"],
"cuda_version": self.detailed_info["cuda_version"], "cuda_version": self.detailed_info["cuda_version"],
@@ -182,31 +255,83 @@ class GPUStat():
gpu['id'] = i gpu['id'] = i
self.gpus.append(gpu) self.gpus.append(gpu)
def show(self, disp_type='brief', command=True): def show(self, enabled_cols = ['ID', 'Fan', 'Temp', 'Pwr', 'Freq', 'Util', 'Vmem', 'Users'], show_command=True):
self.parse() self.parse()
lines = [short_gpu_info(info, disp_type=disp_type) for info in self.gpus] gpu_infos = []
print('================== GPU INFO ==================') # stats = {
print('\n'.join(lines)) # "id": stat['id'],
if command: # "fan": stat['fan_speed'].split(' ')[0].strip(),
print('================ PROCESS INFO ================') # "temp_cur": stat['temperature']['current'].split(' ')[0].strip(),
# "temp_max": stat['temperature']['max'].split(' ')[0].strip(),
# "power_cur": stat['power']['current'].split(' ')[0].strip(),
# "power_max": stat['power']['max'].split(' ')[0].strip(),
# "clock_cur": stat['clocks']['current'].split(' ')[0].strip(),
# "clock_max": stat['clocks']['max'].split(' ')[0].strip(),
# "util": stat['utilization'],
# "mem_used": stat['memory']['used'].split(' ')[0].strip(),
# "mem_total": stat['memory']['total'].split(' ')[0].strip(),
# "mem_free": stat['memory']['free'].split(' ')[0].strip()
# }
for gpu in self.gpus:
process_fmt = '{user}({pid})'
process_info = ','.join([process_fmt.format(
user = proc['user'],
pid = proc['pid']
) for proc in gpu['processes']])
info_gpu = {
'ID': '{0}'.format(str(gpu['id'])),
'Fan': '{0} %'.format(gpu['fan_speed'].split(' ')[0].strip()),
'Temp': '{0} C'.format(gpu['temperature']['current'].split(' ')[0].strip()),
'TempMax': '{0} C'.format(gpu['temperature']['max'].split(' ')[0].strip()),
'Pwr': '{0} W'.format(gpu['power']['current'].split(' ')[0].strip()),
'PwrMax': '{0} W'.format(gpu['power']['max'].split(' ')[0].strip()),
'Freq': '{0} MHz'.format(gpu['clocks']['current'].split(' ')[0].strip()),
'FreqMax': '{0} MHz'.format(gpu['clocks']['max'].split(' ')[0].strip()),
'Util': '{0} %'.format(gpu['utilization'].split(' ')[0]),
'Vmem': '{0}/{1} MiB'.format(
gpu['memory']['used'].split(' ')[0].strip(),
gpu['memory']['total'].split(' ')[0].strip(),
),
'UsedMem': '{0} MiB'.format(gpu['memory']['used'].split(' ')[0].strip()),
'TotalMem': '{0} MiB'.format(gpu['memory']['total'].split(' ')[0].strip()),
'FreeMem': '{0} MiB'.format(gpu['memory']['free'].split(' ')[0].strip()),
'Users': process_info
}
gpu_infos.append(info_gpu)
align_methods = {key:'r' for key in gpu_infos[0]}
align_methods['Users'] = 'l'
if enabled_cols is None:
enabled_cols = list(align_methods.keys())
c_align = [align_methods[col] for col in enabled_cols]
info_table = [enabled_cols]
for info in gpu_infos:
this_row = [info[key] for key in enabled_cols]
info_table.append(this_row)
info = draw_table(info_table, header_line=0, delemeter=' | ', joint_delemeter='-+-', c_align=c_align)
if show_command:
procs = {} procs = {}
for gpu in self.gpus: for gpu in self.gpus:
for proc in gpu['processes']: for proc in gpu['processes']:
pid = proc['pid'] pid = proc['pid']
proc['gpu'] = [str(gpu['id'])]
if pid not in procs: if pid not in procs:
procs[pid] = proc procs[pid] = proc
proc_fmt = '[{pid}] {user}({vmem} MiB) {cmd}' else:
procs[pid]['gpu'].append(str(gpu['id']))
proc_fmt = '[{pid}|{gpus}] {user}({vmem} MiB) {cmd}'
proc_strs = [] proc_strs = []
for pid in procs: for pid in procs:
this_proc_str = proc_fmt.format( this_proc_str = proc_fmt.format(
user = procs[pid]['user'], user = procs[pid]['user'],
vmem = procs[pid]['vmem'].split(' ')[0], vmem = procs[pid]['vmem'].split(' ')[0],
pid = procs[pid]['pid'], pid = procs[pid]['pid'].rjust(5),
cmd = procs[pid]['command'] cmd = procs[pid]['command'],
gpus = ','.join(procs[pid]['gpu'])
) )
proc_strs.append(this_proc_str) proc_strs.append(this_proc_str)
proc_info = '\n'.join(proc_strs) proc_info = '\n'.join(proc_strs)
print(proc_info) info += proc_info
print(info)
class MoreGPUNeededError(Exception): class MoreGPUNeededError(Exception):
def __init__(self): def __init__(self):
@@ -284,4 +409,6 @@ def auto_set(num, allow_nonfree=True, ask=True, blacklist=[], show=True):
else: else:
raise MoreGPUNeededError raise MoreGPUNeededError
set_gpu(selected_gpu, show=show) set_gpu(selected_gpu, show=show)
if __name__ == '__main__':
print(get_basic_process_info_windows())

View File

@@ -2,15 +2,15 @@ from setuptools import setup, find_packages
setup( setup(
name = 'gpuutil', name = 'gpuutil',
version = '0.0.1', version = '0.0.2',
keywords='gpu utils', keywords='gpu utils',
description = 'A tool for observing gpu stat and auto set visible gpu in python code.', description = 'A tool for observing gpu stat and auto set visible gpu in python code.',
license = 'MIT License', license = 'MIT License',
url = '', url = 'https://git.zmy.pub/zmyme/gpuutil',
author = 'zmy', author = 'zmy',
author_email = 'izmy@qq.com', author_email = 'izmy@qq.com',
packages = find_packages(), packages = find_packages(),
include_package_data = True, include_package_data = True,
platforms = 'any', platforms = 'All',
install_requires = [], install_requires = [],
) )