import ctypes, subprocess, functools, pathlib, tempfile
from typing import Any
from tinygrad.device import Compiled, MallocAllocator
from tinygrad.helpers import diskcache, cpu_time_execution
from tinygrad.codegen.kernel import LinearizerOptions
from tinygrad.renderer.cstyle import uops_to_cstyle, CStyleLanguage
CLANG_PROGRAM_HEADER = "#include <math.h>\n#define max(x,y) ((x>y)?x:y)\n#define int64 long\n#define half __fp16\n#define uchar unsigned char\n#include <stdbool.h>\n"
@diskcache
def compile_clang(prg: str, header: str = CLANG_PROGRAM_HEADER) -> bytes:
"""
Compile a given C program using clang.
Parameters:
prg (str): The C program to be compiled.
header (str): The header for the C program. Default is CLANG_PROGRAM_HEADER.
Returns:
bytes: The compiled C program in bytes format.
Note:
Currently, there's a TODO to remove file write due to clang not supporting /dev/stdout usage.
"""
with tempfile.NamedTemporaryFile(delete=True) as output_file:
subprocess.check_output(
args=(
"clang -shared -O2 -Wall -Werror -x c -lm -fPIC --rtlib=compiler-rt - -o "
+ str(output_file.name)
).split(),
input=(header + prg).encode("utf-8"),
)
return pathlib.Path(output_file.name).read_bytes()
[docs]
class ClangProgram:
"""
A class representing a compiled Clang program.
Attributes:
name (str): The name of the Clang program.
lib (bytes): The compiled Clang program in bytes format.
fxn (Any): The function object of the compiled Clang program.
Note:
Writes to disk for loading the compiled program.
"""
def __init__(self, name: str, lib: bytes):
self.name, self.lib = name, lib
# write to disk so we can load it
with tempfile.NamedTemporaryFile(delete=True) as cached_file_path:
pathlib.Path(cached_file_path.name).write_bytes(lib)
self.fxn: Any = ctypes.CDLL(str(cached_file_path.name))[name]
def __call__(self, *bufs, vals=(), wait=False):
"""
Call the Clang program with given buffers and values.
Parameters:
bufs (*): Variable length buffer arguments.
vals (tuple): Tuple of constant integer values. Default is an empty tuple.
wait (bool): If True, enables waiting for the call to complete. Default is False.
Returns:
Any: The result of the Clang program execution.
"""
return cpu_time_execution(lambda: self.fxn(*bufs, *vals), enable=wait)
renderer = functools.partial(
uops_to_cstyle,
CStyleLanguage(buffer_suffix=" restrict", arg_int_prefix="const int"),
)
ClangDevice = Compiled(
MallocAllocator,
LinearizerOptions(supports_float4=False, has_local=False),
renderer,
compile_clang,
ClangProgram,
)