24小时热门版块排行榜    

CyRhmU.jpeg
查看: 858  |  回复: 0

[资源] 【原创】并行平台构建与管理[6]:并行测试 GotoBLAS+hpl测试之安装(1)

前面讲了atlas+hpl的安装,测试了一下,效率不是很理想。传闻使用Goto blas库测得的效率略理想一些,于是今天试了一下,安装成功。将过程贴出来供大家讨论。
GotoBLAS之编译:
1、 到 http://www.tacc.utexas.edu/resources/software/ 网站上下载 Source Code v.1.00或者其他版本,我用的是Source Code v.1.00,网站需要注册,不过是免费的,很快就可以搞定。
2、用 tar -zxvf 解压之,可以看到GotoBLAS的文件夹
3、编辑Makefile.rule,详细情况见附件;更改getarch.c里面的archtecture,使之符合自己的情况
4、make
5、cd exports 执行make so
6、新编辑xerbla.c,内容见附件二,执行gcc -c xerbla.c -o xerbla.o
在GotoBLAS下可以看到libgoto.a,libgoto_opteronp-r1.00.a,libgoto_opteronp-r1.00.so,xerbla.o几个新东西,表明编译成功。
附件一:Makefile.rule
#
#  Beginning of user configuration
#

# This library's version
REVISION = -r1.00

# Which do you prefer to use for C compiler? Default is gcc.
# I recommend you to use GCC because inline assembler is required.
C_COMPILER = GNU
# C_COMPILER = INTEL

# Which do you prefer to use for C compiler? Default is GNU G77.
# F_COMPILER = G77
# F_COMPILER = G95
# F_COMPILER = GFORTRAN
# F_COMPILER = INTEL
# F_COMPILER = PGI
# F_COMPILER = PATHSCALE
# F_COMPILER = IBM
# F_COMPILER = COMPAQ
# F_COMPILER = SUN
# F_COMPILER = F2C

# If you want to build threaded version.
# You can specify number of threads by environment value
# "OMP_NUM_THREADS", otherwise, it's automatically detected.
SMP = 2

# You may specify Maximum number of threads. It should be minimum.
MAX_THREADS = 2

# If you need 64bit binary; some architecture can accept both 32bit and
# 64bit binary(EM64T, Opteron, SPARC and Power/PowerPC).
BINARY64  = 1

# If you need 64bit integer interface.
INTERFACE64 = 1

# If you need Special memory management;
# Using HugeTLB file system(Linux / AIX / Solaris)
CCOMMON_OPT        += -DALLOC_HUGETLB

# Using static allocation instead of dynamic allocation
# CCOMMON_OPT        += -DALLOC_STATIC

# If you want to use CPU affinity
CCOMMON_OPT        += -DUSE_CPU_AFFINITY

# If you want to use memory affinity (for NUMA)
# CCOMMON_OPT        += -DUSE_MEMORY_AFFINITY

# If you have special compiler to run script to determine architecture.
GETARCH_CC         =
GETARCH_FLAGS         =

#
#  End of user configuration
#

MACHINE := $(shell uname -m | sed -e s/i.86/i386/ )
OSNAME  := $(shell uname -s)

ifeq ($(MACHINE), i386)
BINARY64        =
NATIVEARCH        = YES
endif

ifeq ($(MACHINE), ia64)
BINARY64        = YES
NATIVEARCH        = YES
endif

ifeq ($(MACHINE), alpha)
BINARY64        = YES
NATIVEARCH        = YES
endif

ifeq ($(OSNAME), AIX)
NATIVEARCH        = YES
endif

ifeq ($(OSNAME), Darwin)
ifndef BINARY64
NATIVEARCH        = YES
endif
endif

# If you need to access over 4GB chunk on 64bit system.
ifdef BINARY64
CCOMMON_OPT        += -D__64BIT__
ifdef INTERFACE64
CCOMMON_OPT        += -DUSE64BITINT
endif
endif

# If you need modified GEMV/GEMM to find best parameters;
# CCOMMON_OPT         += -DPARAMTEST
# CCOMMON_OPT         += -DPREFETCHTEST

# Common Optimization Flag
COMMON_OPT +=  -O2

# Optimization Flag for C compiler
CCOMMON_OPT +=

# Optimization Flag for Fortran Compiler
FCOMMON_OPT +=

# Profiling flags
COMMON_PROF = -pg

################## End of Main Configuration #####################

# TO suppress recursive includes
INCLUDED = 1

ifndef C_COMPILER
C_COMPILER    = GNU
endif

ifndef F_COMPILER
F_COMPILER = G77
endif

ifeq ($(C_COMPILER), GNU)
COMPILER     = gcc
CCOMMON_OPT += -Wall
ifneq ($(OSNAME), CYGWIN_NT-5.1)
CCOMMON_OPT += -fPIC
endif
ifndef NATIVEARCH
ifdef BINARY64
CCOMMON_OPT     += -m64
GETARCH_FLAGS         = -m64
else
CCOMMON_OPT     += -m32
GETARCH_FLAGS         = -m32
endif
endif
COMMON_PROF += -fno-inline
endif

ifeq ($(C_COMPILER), INTEL)
COMPILER     = icc
CCOMMON_OPT += -fPIC
endif

ifeq ($(F_COMPILER), G77)
COMPILER_F77 = g77
BU             = _
CCOMMON_OPT += -DF_INTERFACE_F2C -DNEED_F2CCONV
FCOMMON_OPT += -Wall
ifneq ($(OSNAME), CYGWIN_NT-5.1)
FCOMMON_OPT += -fPIC
endif
ifndef NATIVEARCH
ifdef BINARY64
FCOMMON_OPT += -m64
else
FCOMMON_OPT += -m32
endif
endif
endif

ifeq ($(F_COMPILER), G95)
COMPILER_F77 = g95
BU             = _
CCOMMON_OPT += -DF_INTERFACE_F2C
FCOMMON_OPT += -Wall
ifneq ($(OSNAME), CYGWIN_NT-5.1)
FCOMMON_OPT += -fPIC
endif
ifndef NATIVEARCH
ifdef BINARY64
FCOMMON_OPT += -m64
else
FCOMMON_OPT += -m32
endif
endif
endif

ifeq ($(F_COMPILER), GFORTRAN)
COMPILER_F77 = gfortran
BU             = _
CCOMMON_OPT += -DF_INTERFACE_GFORT
FCOMMON_OPT += -Wall
ifneq ($(OSNAME), CYGWIN_NT-5.1)
FCOMMON_OPT += -fPIC
endif
ifndef NATIVEARCH
ifdef BINARY64
FCOMMON_OPT += -m64
else
FCOMMON_OPT += -m32
endif
endif
endif

ifeq ($(F_COMPILER), INTEL)
COMPILER_F77 = ifort
BU             = _
CCOMMON_OPT += -DF_INTERFACE_F2C
FCOMMON_OPT += -fPIC
ifdef INTERFACE64
FCOMMON_OPT += -i8
endif
endif

ifeq ($(F_COMPILER), IBM)
COMPILER_F77 = xlf
BU             =
# FCOMMON_OPT        += -qarch=440
ifdef BINARY64
FCOMMON_OPT += -q64
else
FCOMMON_OPT += -q32
endif
endif

ifeq ($(F_COMPILER), COMPAQ)
ifeq ($(OSNAME), Linux)
COMPILER_F77 = fort
FCOMMON_OPT += -fPIC
else
COMPILER_F77 = f77
endif
BU             = _
endif

ifeq ($(F_COMPILER), PGI)
COMPILER_F77 = pgf77
BU              = _
CCOMMON_OPT  += -DF_INTERFACE_F2C
FCOMMON_OPT += -fPIC
COMMON_PROF +=  -DPGICOMPILER

ifndef BINARY64
# FCOMMON_OPT  += -tp k8-32
FCOMMON_OPT += -tp p7
EXTRALIB    += -L/opt/pgi/linux86/6.0/lib -lpgc
else
FCOMMON_OPT += -tp k8-64
EXTRALIB    += -L/opt/pgi/linux86-64/6.0/lib -lpgc -lpgf90rtl
ifdef INTERFACE64
FCOMMON_OPT += -i8
endif
endif
endif

ifdef SMP
EXTRALIB    += -lpthread
endif

ifeq ($(F_COMPILER), PATHSCALE)
COMPILER_F77 = pathf90
BU        = _
CCOMMON_OPT  += -DAMD_ABI -DF_PATHSCALE
FCOMMON_OPT += -fPIC
ifndef BINARY64
CCOMMON_OPT  += -DF_INTERFACE_F2C
FCOMMON_OPT += -m32
else
FCOMMON_OPT += -m64
ifdef INTERFACE64
FCOMMON_OPT += -i8
endif
endif
endif

ifeq ($(F_COMPILER), SUN)
COMPILER_F77 = f90
BU             = _
CCOMMON_OPT  += -DF_SUN
FCOMMON_OPT += -pic
ifndef BINARY64
CCOMMON_OPT  += -DF_INTERFACE_F2C
endif
endif

ifeq ($(F_COMPILER), F2C)
COMPILER_F77 = f2cf77
BU             = _
CCOMMON_OPT += -DF_INTERFACE_F2C -DNEED_F2CCONV
FCOMMON_OPT += -Wall -fPIC
endif

# Currently Windows version doesn't support threads
ifeq ($(OSNAME), CYGWIN_NT-5.1)
SMP =
endif

ifdef SMP
CCOMMON_OPT        += -DSMP_SERVER
ifeq ($(C_COMPILER), GNU)
ifeq ($(OSNAME), Linux)
CCOMMON_OPT        +=  -pthread
endif
endif
endif

ifndef GETARCH_CC
GETARCH_CC         = gcc
endif

ARCH        := $(shell (cd $(TOPDIR); ./getarch 0 $(GETARCH_CC) $(GETARCH_FLAGS)))
SUBARCH     := $(shell (cd $(TOPDIR); ./getarch 1 $(GETARCH_CC) $(GETARCH_FLAGS)))
ARCHSUBDIR  := $(shell (cd $(TOPDIR); ./getarch 2 $(GETARCH_CC) $(GETARCH_FLAGS)))
CONFIG      := $(shell (cd $(TOPDIR); ./getarch 3 $(GETARCH_CC) $(GETARCH_FLAGS)))
FU          := $(shell (cd $(TOPDIR); ./getarch 4 $(GETARCH_CC) $(GETARCH_FLAGS)))
LIBSUBARCH  := $(shell (cd $(TOPDIR); ./getarch 5 $(GETARCH_CC) $(GETARCH_FLAGS)))
CORE        := $(shell (cd $(TOPDIR); ./getarch 6 $(GETARCH_CC) $(GETARCH_FLAGS)))

ifndef MAX_THREADS
MAX_THREADS := $(shell (cd $(TOPDIR); ./getarch 7 $(GETARCH_CC) $(GETARCH_FLAGS)))
endif

CCOMMON_OPT        += -DMAX_CPU_NUMBER=$(MAX_THREADS)


LIBPREFIX = libgoto

ARFLAGS        =
CPP        = $(CC) -E
AR        = $(COMPILER_PREFIX)ar
AS        = $(COMPILER_PREFIX)as
LD        = $(COMPILER_PREFIX)ld
RANLIB        = $(COMPILER_PREFIX)ranlib

include $(TOPDIR)/Makefile.$(ARCHSUBDIR)

CCOMMON_OPT        += -DASMNAME=$(FU)$(*F) -DASMFNAME=$(FU)$(*F)$(BU) -DNAME=$(*F)$(BU) -DCNAME=$(*F)

ifeq ($(CORE), PPC440)
CCOMMON_OPT        += -DALLOC_QALLOC
endif

ifeq ($(CORE), PPC440FP2)
CCOMMON_OPT        += -DALLOC_STATIC
endif

ifeq ($(FU), _)
CCOMMON_OPT        += -DFUNDERSCORE=$(FU) -DNEEDFUNDERSCORE
endif

ifeq ($(BU), _)
CCOMMON_OPT        += -DBUNDERSCORE=$(BU) -DNEEDBUNDERSCORE
endif

CFLAGS  = $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) $(CONFIG)
PFLAGS  = $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) $(CONFIG) -DPROFILE $(COMMON_PROF)

# FFLAGS  += $(COMMON_OPT) $(FCOMMON_OPT) $(CONFIG)
FFLAGS  += $(COMMON_OPT) $(FCOMMON_OPT)

FPFLAGS = $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF)

ifndef SMP
LIBNAME                = $(LIBPREFIX)_$(LIBSUBARCH)$(REVISION).a
LIBNAME_P        = $(LIBPREFIX)_$(LIBSUBARCH)$(REVISION)_p.a
else
LIBNAME                = $(LIBPREFIX)_$(LIBSUBARCH)p$(REVISION).a
LIBNAME_P        = $(LIBPREFIX)_$(LIBSUBARCH)p$(REVISION)_p.a
endif

LIBSONAME    = $(LIBNAME:.a=.so)
LIBDLLNAME   = $(LIBNAME:.a=.dll)
LIBDYNNAME   = $(LIBNAME:.a=.dylib)
LIBWIN2KNAME = $(LIBNAME:.a=.lib)
LIBDEFNAME   = $(LIBNAME:.a=.def)
LIBEXPNAME   = $(LIBNAME:.a=.exp)
LIBZIPNAME   = $(LIBNAME:.a=.zip)

LIBS                = $(TOPDIR)/$(LIBNAME)
LIBS_P                = $(TOPDIR)/$(LIBNAME_P)

ifndef SMP
LIBPTHREAD        =
else
LIBPTHREAD        = -lpthread
endif

CC        = $(COMPILER_PREFIX)$(COMPILER)
FC        = $(COMPILER_PREFIX)$(COMPILER_F77)

.SUFFIXES: .po .o .f

.f.o:
        $(FC) $(FFLAGS) -c $<

.f.po:
        $(FC) $(FPFLAGS) -pg -c $<
附件二:xerbla.c#include
#include

int xerbla_(char *message, int *info, long length){
  fprintf(stderr, " ** On entry to  %6s, parameter number %2d had an illegal value\n", message, *info);
exit(1);
}
回复此楼
已阅   回复此楼   关注TA 给TA发消息 送TA红花 TA的回帖

智能机器人

Robot (super robot)

我们都爱小木虫

相关版块跳转 我要订阅楼主 onesupeng 的主题更新
☆ 无星级 ★ 一星级 ★★★ 三星级 ★★★★★ 五星级
普通表情 高级回复(可上传附件)
信息提示
请填处理意见