| 查看: 858 | 回复: 0 | ||
[资源]
【原创】并行平台构建与管理[6]:并行测试 GotoBLAS+hpl测试之安装(1)
|
|
前面讲了atlas+hpl的安装,测试了一下,效率不是很理想。传闻使用Goto blas库测得的效率略理想一些,于是今天试了一下,安装成功。将过程贴出来供大家讨论。 GotoBLAS之编译: 1、 到 http://www.tacc.utexas.edu/resources/software/ 网站上下载 Source Code v.1.00或者其他版本,我用的是Source Code v.1.00,网站需要注册,不过是免费的,很快就可以搞定。 2、用 tar -zxvf 解压之,可以看到GotoBLAS的文件夹 3、编辑Makefile.rule,详细情况见附件;更改getarch.c里面的archtecture,使之符合自己的情况 4、make 5、cd exports 执行make so 6、新编辑xerbla.c,内容见附件二,执行gcc -c xerbla.c -o xerbla.o 在GotoBLAS下可以看到libgoto.a,libgoto_opteronp-r1.00.a,libgoto_opteronp-r1.00.so,xerbla.o几个新东西,表明编译成功。 附件一:Makefile.rule # # Beginning of user configuration # # This library's version REVISION = -r1.00 # Which do you prefer to use for C compiler? Default is gcc. # I recommend you to use GCC because inline assembler is required. C_COMPILER = GNU # C_COMPILER = INTEL # Which do you prefer to use for C compiler? Default is GNU G77. # F_COMPILER = G77 # F_COMPILER = G95 # F_COMPILER = GFORTRAN # F_COMPILER = INTEL # F_COMPILER = PGI # F_COMPILER = PATHSCALE # F_COMPILER = IBM # F_COMPILER = COMPAQ # F_COMPILER = SUN # F_COMPILER = F2C # If you want to build threaded version. # You can specify number of threads by environment value # "OMP_NUM_THREADS", otherwise, it's automatically detected. SMP = 2 # You may specify Maximum number of threads. It should be minimum. MAX_THREADS = 2 # If you need 64bit binary; some architecture can accept both 32bit and # 64bit binary(EM64T, Opteron, SPARC and Power/PowerPC). BINARY64 = 1 # If you need 64bit integer interface. INTERFACE64 = 1 # If you need Special memory management; # Using HugeTLB file system(Linux / AIX / Solaris) CCOMMON_OPT += -DALLOC_HUGETLB # Using static allocation instead of dynamic allocation # CCOMMON_OPT += -DALLOC_STATIC # If you want to use CPU affinity CCOMMON_OPT += -DUSE_CPU_AFFINITY # If you want to use memory affinity (for NUMA) # CCOMMON_OPT += -DUSE_MEMORY_AFFINITY # If you have special compiler to run script to determine architecture. GETARCH_CC = GETARCH_FLAGS = # # End of user configuration # MACHINE := $(shell uname -m | sed -e s/i.86/i386/ ) OSNAME := $(shell uname -s) ifeq ($(MACHINE), i386) BINARY64 = NATIVEARCH = YES endif ifeq ($(MACHINE), ia64) BINARY64 = YES NATIVEARCH = YES endif ifeq ($(MACHINE), alpha) BINARY64 = YES NATIVEARCH = YES endif ifeq ($(OSNAME), AIX) NATIVEARCH = YES endif ifeq ($(OSNAME), Darwin) ifndef BINARY64 NATIVEARCH = YES endif endif # If you need to access over 4GB chunk on 64bit system. ifdef BINARY64 CCOMMON_OPT += -D__64BIT__ ifdef INTERFACE64 CCOMMON_OPT += -DUSE64BITINT endif endif # If you need modified GEMV/GEMM to find best parameters; # CCOMMON_OPT += -DPARAMTEST # CCOMMON_OPT += -DPREFETCHTEST # Common Optimization Flag COMMON_OPT += -O2 # Optimization Flag for C compiler CCOMMON_OPT += # Optimization Flag for Fortran Compiler FCOMMON_OPT += # Profiling flags COMMON_PROF = -pg ################## End of Main Configuration ##################### # TO suppress recursive includes INCLUDED = 1 ifndef C_COMPILER C_COMPILER = GNU endif ifndef F_COMPILER F_COMPILER = G77 endif ifeq ($(C_COMPILER), GNU) COMPILER = gcc CCOMMON_OPT += -Wall ifneq ($(OSNAME), CYGWIN_NT-5.1) CCOMMON_OPT += -fPIC endif ifndef NATIVEARCH ifdef BINARY64 CCOMMON_OPT += -m64 GETARCH_FLAGS = -m64 else CCOMMON_OPT += -m32 GETARCH_FLAGS = -m32 endif endif COMMON_PROF += -fno-inline endif ifeq ($(C_COMPILER), INTEL) COMPILER = icc CCOMMON_OPT += -fPIC endif ifeq ($(F_COMPILER), G77) COMPILER_F77 = g77 BU = _ CCOMMON_OPT += -DF_INTERFACE_F2C -DNEED_F2CCONV FCOMMON_OPT += -Wall ifneq ($(OSNAME), CYGWIN_NT-5.1) FCOMMON_OPT += -fPIC endif ifndef NATIVEARCH ifdef BINARY64 FCOMMON_OPT += -m64 else FCOMMON_OPT += -m32 endif endif endif ifeq ($(F_COMPILER), G95) COMPILER_F77 = g95 BU = _ CCOMMON_OPT += -DF_INTERFACE_F2C FCOMMON_OPT += -Wall ifneq ($(OSNAME), CYGWIN_NT-5.1) FCOMMON_OPT += -fPIC endif ifndef NATIVEARCH ifdef BINARY64 FCOMMON_OPT += -m64 else FCOMMON_OPT += -m32 endif endif endif ifeq ($(F_COMPILER), GFORTRAN) COMPILER_F77 = gfortran BU = _ CCOMMON_OPT += -DF_INTERFACE_GFORT FCOMMON_OPT += -Wall ifneq ($(OSNAME), CYGWIN_NT-5.1) FCOMMON_OPT += -fPIC endif ifndef NATIVEARCH ifdef BINARY64 FCOMMON_OPT += -m64 else FCOMMON_OPT += -m32 endif endif endif ifeq ($(F_COMPILER), INTEL) COMPILER_F77 = ifort BU = _ CCOMMON_OPT += -DF_INTERFACE_F2C FCOMMON_OPT += -fPIC ifdef INTERFACE64 FCOMMON_OPT += -i8 endif endif ifeq ($(F_COMPILER), IBM) COMPILER_F77 = xlf BU = # FCOMMON_OPT += -qarch=440 ifdef BINARY64 FCOMMON_OPT += -q64 else FCOMMON_OPT += -q32 endif endif ifeq ($(F_COMPILER), COMPAQ) ifeq ($(OSNAME), Linux) COMPILER_F77 = fort FCOMMON_OPT += -fPIC else COMPILER_F77 = f77 endif BU = _ endif ifeq ($(F_COMPILER), PGI) COMPILER_F77 = pgf77 BU = _ CCOMMON_OPT += -DF_INTERFACE_F2C FCOMMON_OPT += -fPIC COMMON_PROF += -DPGICOMPILER ifndef BINARY64 # FCOMMON_OPT += -tp k8-32 FCOMMON_OPT += -tp p7 EXTRALIB += -L/opt/pgi/linux86/6.0/lib -lpgc else FCOMMON_OPT += -tp k8-64 EXTRALIB += -L/opt/pgi/linux86-64/6.0/lib -lpgc -lpgf90rtl ifdef INTERFACE64 FCOMMON_OPT += -i8 endif endif endif ifdef SMP EXTRALIB += -lpthread endif ifeq ($(F_COMPILER), PATHSCALE) COMPILER_F77 = pathf90 BU = _ CCOMMON_OPT += -DAMD_ABI -DF_PATHSCALE FCOMMON_OPT += -fPIC ifndef BINARY64 CCOMMON_OPT += -DF_INTERFACE_F2C FCOMMON_OPT += -m32 else FCOMMON_OPT += -m64 ifdef INTERFACE64 FCOMMON_OPT += -i8 endif endif endif ifeq ($(F_COMPILER), SUN) COMPILER_F77 = f90 BU = _ CCOMMON_OPT += -DF_SUN FCOMMON_OPT += -pic ifndef BINARY64 CCOMMON_OPT += -DF_INTERFACE_F2C endif endif ifeq ($(F_COMPILER), F2C) COMPILER_F77 = f2cf77 BU = _ CCOMMON_OPT += -DF_INTERFACE_F2C -DNEED_F2CCONV FCOMMON_OPT += -Wall -fPIC endif # Currently Windows version doesn't support threads ifeq ($(OSNAME), CYGWIN_NT-5.1) SMP = endif ifdef SMP CCOMMON_OPT += -DSMP_SERVER ifeq ($(C_COMPILER), GNU) ifeq ($(OSNAME), Linux) CCOMMON_OPT += -pthread endif endif endif ifndef GETARCH_CC GETARCH_CC = gcc endif ARCH := $(shell (cd $(TOPDIR); ./getarch 0 $(GETARCH_CC) $(GETARCH_FLAGS))) SUBARCH := $(shell (cd $(TOPDIR); ./getarch 1 $(GETARCH_CC) $(GETARCH_FLAGS))) ARCHSUBDIR := $(shell (cd $(TOPDIR); ./getarch 2 $(GETARCH_CC) $(GETARCH_FLAGS))) CONFIG := $(shell (cd $(TOPDIR); ./getarch 3 $(GETARCH_CC) $(GETARCH_FLAGS))) FU := $(shell (cd $(TOPDIR); ./getarch 4 $(GETARCH_CC) $(GETARCH_FLAGS))) LIBSUBARCH := $(shell (cd $(TOPDIR); ./getarch 5 $(GETARCH_CC) $(GETARCH_FLAGS))) CORE := $(shell (cd $(TOPDIR); ./getarch 6 $(GETARCH_CC) $(GETARCH_FLAGS))) ifndef MAX_THREADS MAX_THREADS := $(shell (cd $(TOPDIR); ./getarch 7 $(GETARCH_CC) $(GETARCH_FLAGS))) endif CCOMMON_OPT += -DMAX_CPU_NUMBER=$(MAX_THREADS) LIBPREFIX = libgoto ARFLAGS = CPP = $(CC) -E AR = $(COMPILER_PREFIX)ar AS = $(COMPILER_PREFIX)as LD = $(COMPILER_PREFIX)ld RANLIB = $(COMPILER_PREFIX)ranlib include $(TOPDIR)/Makefile.$(ARCHSUBDIR) CCOMMON_OPT += -DASMNAME=$(FU)$(*F) -DASMFNAME=$(FU)$(*F)$(BU) -DNAME=$(*F)$(BU) -DCNAME=$(*F) ifeq ($(CORE), PPC440) CCOMMON_OPT += -DALLOC_QALLOC endif ifeq ($(CORE), PPC440FP2) CCOMMON_OPT += -DALLOC_STATIC endif ifeq ($(FU), _) CCOMMON_OPT += -DFUNDERSCORE=$(FU) -DNEEDFUNDERSCORE endif ifeq ($(BU), _) CCOMMON_OPT += -DBUNDERSCORE=$(BU) -DNEEDBUNDERSCORE endif CFLAGS = $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) $(CONFIG) PFLAGS = $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) $(CONFIG) -DPROFILE $(COMMON_PROF) # FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) $(CONFIG) FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) FPFLAGS = $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF) ifndef SMP LIBNAME = $(LIBPREFIX)_$(LIBSUBARCH)$(REVISION).a LIBNAME_P = $(LIBPREFIX)_$(LIBSUBARCH)$(REVISION)_p.a else LIBNAME = $(LIBPREFIX)_$(LIBSUBARCH)p$(REVISION).a LIBNAME_P = $(LIBPREFIX)_$(LIBSUBARCH)p$(REVISION)_p.a endif LIBSONAME = $(LIBNAME:.a=.so) LIBDLLNAME = $(LIBNAME:.a=.dll) LIBDYNNAME = $(LIBNAME:.a=.dylib) LIBWIN2KNAME = $(LIBNAME:.a=.lib) LIBDEFNAME = $(LIBNAME:.a=.def) LIBEXPNAME = $(LIBNAME:.a=.exp) LIBZIPNAME = $(LIBNAME:.a=.zip) LIBS = $(TOPDIR)/$(LIBNAME) LIBS_P = $(TOPDIR)/$(LIBNAME_P) ifndef SMP LIBPTHREAD = else LIBPTHREAD = -lpthread endif CC = $(COMPILER_PREFIX)$(COMPILER) FC = $(COMPILER_PREFIX)$(COMPILER_F77) .SUFFIXES: .po .o .f .f.o: $(FC) $(FFLAGS) -c $< .f.po: $(FC) $(FPFLAGS) -pg -c $< 附件二:xerbla.c#include #include int xerbla_(char *message, int *info, long length){ fprintf(stderr, " ** On entry to %6s, parameter number %2d had an illegal value\n", message, *info); exit(1); } |
» 猜你喜欢
第一性原理计算方向2026级博士申请 PRB*1,四级484
已经有1人回复
求助VISSIM破解版软件
已经有0人回复
物理学I论文润色/翻译怎么收费?
已经有267人回复
求2026年在台湾举行的物理和材料领域国际学术会议信息
已经有0人回复
求国际会议网站
已经有1人回复
求取一些关于纳米材料和纳米技术相关的英文PPT。
已经有0人回复
【复旦大学】二维材料方向招收2026年博士研究生1名
已经有0人回复
北京纳米能源与系统研究所 王中林院士/曹南颖研究员课题组2026级硕/博/博后招生
已经有10人回复
荷兰Utrecht University超快太赫兹光谱王海教授课题招收2026 CSC博士生
已经有16人回复
反铁磁体中的磁性切换:两种不同的机制已成功可视化
已经有0人回复
找到一些相关的精华帖子,希望有用哦~
【求助】“脉冲交流直流并联叠加测试的方法”怎么翻译
已经有2人回复
刚做了原子力显微镜测试(AFM)不知道怎么作图啊
已经有14人回复
FLUENT读取并行case自动删除文件
已经有3人回复
关于PN结组装和IV曲线测试的问题
已经有4人回复
vasp并行测试时出错。
已经有7人回复
cp2k并行编译出错
已经有3人回复
请问MS-linux-cluster安装之后无法并行的问题?
已经有5人回复
【ghcacj个人文集】单机环境下安装openmpi使用Orca并行运算的方法介绍
已经有12人回复
【原创】并行平台构建与管理汇总
已经有4人回复
【原创】并行平台构建与管理[10]:强行删除pbs作业方式
已经有5人回复
【原创】并行平台构建与管理[1]:资金预算与配置
已经有3人回复
【资源】上传一本工业界及INL电池寿命测试手册
已经有4人回复
【求助/交流】微生物测试原子力显微镜如何制样好?
已经有5人回复
科研从小木虫开始,人人为我,我为人人













回复此楼
点击这里搜索更多相关资源