1. 준비사항
∙ CUDA 사용이 가능한 GPU (CUDA 사용 가능 하드웨어 목록)
∙ Mac OS X 10.8 or later
∙ gcc 또는 Clang 컴파일러와 툴체인이 Xcode에 설치되어 있을 것
∙ Command Line Tools 패키지 설치 필요
∙ the NVIDIA CUDA Toolkit 설치할 것(CUDA Download page)
∙ 환경변수 설정 (매우 중요)
export PATH=/Developer/NVIDIA/CUDA-6.0/bin:$PATH
export DYLD_LIBRARY_PATH=/Developer/NVIDIA/CUDA-6.0/lib:$DYLD_LIBRARY_PATH
2. 설치 확인
∙ 드라이버 설치 확인
kextstat | grep -i cuda
∙ 컴파일러 설치 확인
nvcc -V
∙ 예제프로그램 컴파일 후 실행
make -C 1_Utilities/deviceQuery
cd bin/x86_64/darwin/release
./deviceQuery
3. *QT Creator에서 설정하기
∙ .pro (맨 아래 네 줄이 가장 중요한 포인트임)
QT += core
QT -= gui
TARGET = cudatest # depending on the project
CONFIG += console
CONFIG -= app_bundle
TEMPLATE = app
# Basic .pro configuration
SOURCES += \
main.cpp
# This makes the .cu files appear in your project
OTHER_FILES += main.cu
# Cuda sources
CUDA_SOURCES += main.cu
# CUDA settings (depending on the system)
CUDA_DIR = /usr/local/cuda # Path to cuda toolkit install
# nvcc flags (ptxas option verbose is always useful)
NVCCFLAGS = --compiler-options -fno-strict-aliasing -use_fast_math --ptxas-options=-v
# include paths
INCLUDEPATH += $$CUDA_DIR/include
# lib dirs
QMAKE_LIBDIR += $$CUDA_DIR/lib
# libs
LIBS += -lcudart
# join the includes in a line
CUDA_INC = $$join(INCLUDEPATH,' -I','-I',' ')
# Prepare the extra compiler configuration
cuda.input = CUDA_SOURCES
cuda.output = ${OBJECTS_DIR}${QMAKE_FILE_BASE}_cuda.o
cuda.commands = $$CUDA_DIR/bin/nvcc -gencode arch=compute_10,code=sm_10 -gencode arch=compute_20,code=sm_20 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_32,code=sm_32 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_50,code=compute_50 -c $$NVCCFLAGS $$CUDA_INC $$LIBS ${QMAKE_FILE_NAME} -o ${QMAKE_FILE_OUT}
cuda.dependency_type = TYPE_C
cuda.depend_command = $$CUDA_DIR/bin/nvcc -gencode arch=compute_10,code=sm_10 -gencode arch=compute_20,code=sm_20 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_32,code=sm_32 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_50,code=compute_50 $$CUDA_INC $$NVCCFLAGS ${QMAKE_FILE_NAME}
# Tell Qt that we want add more stuff to the Makefile
QMAKE_EXTRA_COMPILERS += cuda
QMAKE_MACOSX_DEPLOYMENT_TARGET = 10.9
QMAKE_CXXFLAGS += -stdlib=libstdc++
QMAKE_LFLAGS += -stdlib=libstdc++ -rpath $$CUDA_DIR/lib
∙ main.cpp
#include <QtCore/QCoreApplication>
#include <cuda.h>
using namespace std;
void run();
int main(int argc, char *argv[])
{
QCoreApplication a(argc, argv);
run();
return a.exec();
}
∙ main.cu
#include <iostream>
#include <string>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#define SIZE 1024
using namespace std;
__global__ void VectorAdd(int *a, int *b, int *c, int n)
{
int i = threadIdx.x;
if( i < n )
c[i] = a[i] + b[i];
}
void run()
{
int *a, *b, *c;
int *d_a, *d_b, *d_c;
a = (int *)malloc( SIZE * sizeof(int) );
b = (int *)malloc( SIZE * sizeof(int) );
c = (int *)malloc( SIZE * sizeof(int) );
cudaMalloc( &d_a, SIZE * sizeof(int) );
cudaMalloc( &d_b, SIZE * sizeof(int) );
cudaMalloc( &d_c, SIZE * sizeof(int) );
int i;
for( i=0; i<SIZE; ++i )
{
a[i] = i;
b[i] = i;
c[i] = 0;
}
cudaMemcpy( d_a, a, SIZE * sizeof(int), cudaMemcpyHostToDevice );
cudaMemcpy( d_b, b, SIZE * sizeof(int), cudaMemcpyHostToDevice );
cudaMemcpy( d_c, c, SIZE * sizeof(int), cudaMemcpyHostToDevice );
// VectorAdd( a, b, c, SIZE );
VectorAdd<<< 1, SIZE >>>(d_a, d_b, d_c, SIZE);
cudaMemcpy( a, d_a, SIZE * sizeof(int), cudaMemcpyDeviceToHost );
cudaMemcpy( b, d_b, SIZE * sizeof(int), cudaMemcpyDeviceToHost );
cudaMemcpy( c, d_c, SIZE * sizeof(int), cudaMemcpyDeviceToHost );
for( i=0; i<10; ++i )
printf( "c[%d] = %d\n", i, c[i] );
free(a);
free(b);
free(c);
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);
}
4. 테스트 결과
SPECIAL THANKS TO 문상환
출처 : http://docs.nvidia.com/cuda/cuda-getting-started-guide-for-mac-os-x
'[ 프로그래밍 ]' 카테고리의 다른 글
Visual C++ 2010 SP1 Compiler Update for the Windows SDK 7.1 (0) | 2018.12.05 |
---|---|
초보를 위한 도커 안내서 (0) | 2018.10.22 |
Using NVIDIA/DIGITS with Docker on Ubuntu 16.04 (0) | 2018.09.11 |