| ################################################################################################### |
| # # |
| # This file is part of BLASFEO. # |
| # # |
| # BLASFEO -- BLAS For Embedded Optimization. # |
| # Copyright (C) 2016-2017 by Gianluca Frison. # |
| # Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. # |
| # All rights reserved. # |
| # # |
| # HPMPC is free software; you can redistribute it and/or # |
| # modify it under the terms of the GNU Lesser General Public # |
| # License as published by the Free Software Foundation; either # |
| # version 2.1 of the License, or (at your option) any later version. # |
| # # |
| # HPMPC is distributed in the hope that it will be useful, # |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of # |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # |
| # See the GNU Lesser General Public License for more details. # |
| # # |
| # You should have received a copy of the GNU Lesser General Public # |
| # License along with HPMPC; if not, write to the Free Software # |
| # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # |
| # # |
| # Author: Gianluca Frison, giaf (at) dtu.dk # |
| # gianluca.frison (at) imtek.uni-freiburg.de # |
| # # |
| ################################################################################################### |
| |
| # Target architecture |
| # X64_INTEL_HASWELL : x86_64 architecture with AVX2 and FMA ISA (64 bit OS) code optimized for Intel Haswell and Intel Skylake architectures. |
| # X64_INTEL_SANDY_BRIDGE : x86_64 architecture with AVX ISA (64 bit OS) code optimized for Intel Sandy-Bridge architecture. |
| # X64_INTEL_CORE : x86_64 architecture with SSE3 (64 bit OS) code optimized for Intel Core archiecture. |
| # X64_AMD_BULLDOZER : x86_64 architecture with AVX and FMA ISA (64 bit OS) code optimized for AMD Bulldozer. |
| # ARMV7A_ARM_CORTEX_A15 : ARMv7A architecture with NEON-VFPv4 ISA (32 bit OS) code optimized for ARM Cortex A15. |
| # GENERIC : generic c99 code |
| TARGET = X64_INTEL_HASWELL |
| #TARGET = X64_INTEL_SANDY_BRIDGE |
| #TARGET = X64_INTEL_CORE |
| #TARGET = X64_AMD_BULLDOZER |
| #TARGET = ARMV8A_ARM_CORTEX_A57 |
| #TARGET = ARMV7A_ARM_CORTEX_A15 |
| #TARGET = GENERIC |
| |
| # Linear Algebra library |
| LA = HIGH_PERFORMANCE |
| #LA = REFERENCE |
| #LA = BLAS |
| |
| # BLAS and LAPACK version (for LA=BLAS) |
| REF_BLAS = 0 |
| #REF_BLAS = OPENBLAS |
| #REF_BLAS = NETLIB |
| #REF_BLAS = MKL |
| #REF_BLAS = BLIS |
| #REF_BLAS = ATLAS |
| |
| # Compile auxiliary functions with external dependencies (for memory allocation and printing) |
| #EXT_DEP = 0 |
| EXT_DEP = 1 |
| |
| # Enable on-line checks for matrix and vector dimensions |
| RUNTIME_CHECKS = 0 |
| #RUNTIME_CHECKS = 1 |
| |
| # Operating system |
| UNAME_S := $(shell uname -s) |
| ifeq ($(UNAME_S),Linux) |
| OS = LINUX |
| endif |
| ifeq ($(UNAME_S),Darwin) |
| OS = MAC |
| endif |
| #OS = LINUX |
| #OS = MAC |
| #OS = WINDOWS |
| |
| # C Compiler |
| CC = gcc |
| #CC = clang |
| #CC = x86_64-w64-mingw32-gcc |
| |
| # Installation directory |
| PREFIX = /opt |
| |
| # Macro level (code size vs performance in assembly kernels): 0 (no macro), 1 (all macro but gemm kernel), 2 (all macro) |
| MACRO_LEVEL = 0 |
| |
| # compiler / assembler / linker flags |
| CFLAGS = |
| ASFLAGS = |
| LDFLAGS = |
| |
| # Optimization flags |
| CFLAGS += -O2 -fPIC |
| |
| # Debugging flags |
| #CFLAGS += -g #-Wall -pedantic -Wfloat-equal #-pg |
| #ASFLAGS += -g |
| |
| # Definirions |
| ifeq ($(LA), HIGH_PERFORMANCE) |
| CFLAGS += -DLA_HIGH_PERFORMANCE |
| endif |
| ifeq ($(LA), REFERENCE) |
| CFLAGS += -DLA_REFERENCE |
| endif |
| ifeq ($(LA), BLAS) |
| CFLAGS += -DLA_BLAS |
| endif |
| |
| ifeq ($(RUNTIME_CHECKS), 1) |
| CFLAGS += -DDIM_CHECK |
| endif |
| |
| ifeq ($(EXT_DEP), 1) |
| CFLAGS += -DEXT_DEP |
| endif |
| |
| ifeq ($(MACRO_LEVEL), 1) |
| ASFLAGS += -DMACRO_LEVEL=1 |
| endif |
| ifeq ($(MACRO_LEVEL), 2) |
| ASFLAGS += -DMACRO_LEVEL=2 |
| endif |
| |
| ifeq ($(OS), LINUX) |
| CFLAGS += -DOS_LINUX |
| ASFLAGS += -DOS_LINUX |
| endif |
| ifeq ($(OS), MAC) |
| CFLAGS += -DOS_MAC |
| ASFLAGS += -DOS_MAC |
| endif |
| ifeq ($(OS), WINDOWS) |
| CFLAGS += -DOS_WINDOWS |
| ASFLAGS += -DOS_WINDOWS |
| endif |
| |
| ifeq ($(REF_BLAS), 0) |
| CFLAGS += |
| endif |
| ifeq ($(REF_BLAS), OPENBLAS) |
| CFLAGS += -DREF_BLAS_OPENBLAS -I/opt/openblas/include |
| endif |
| ifeq ($(REF_BLAS), BLIS) |
| CFLAGS += -DREF_BLAS_BLIS -std=c99 |
| endif |
| ifeq ($(REF_BLAS), NETLIB) |
| CFLAGS += -DREF_BLAS_NETLIB |
| endif |
| ifeq ($(REF_BLAS), MKL) |
| CFLAGS += -DREF_BLAS_MKL -m64 -I/opt/intel/mkl/include |
| endif |
| ifeq ($(REF_BLAS), ATLAS) |
| CFLAGS += -DREF_BLAS_ATLAS |
| endif |
| |
| # Architecture-specific flags |
| ifeq ($(TARGET), X64_INTEL_HASWELL) |
| CFLAGS += -m64 -mavx2 -mfma -DTARGET_X64_INTEL_HASWELL |
| endif |
| ifeq ($(TARGET), X64_INTEL_SANDY_BRIDGE) |
| CFLAGS += -m64 -mavx -DTARGET_X64_INTEL_SANDY_BRIDGE |
| endif |
| ifeq ($(TARGET), X64_INTEL_CORE) |
| CFLAGS += -m64 -msse3 -DTARGET_X64_INTEL_CORE |
| endif |
| ifeq ($(TARGET), X64_AMD_BULLDOZER) |
| CFLAGS += -m64 -mavx -mfma -DTARGET_X64_AMD_BULLDOZER |
| endif |
| ifeq ($(TARGET), ARMV8A_ARM_CORTEX_A57) |
| CFLAGS += -march=armv8-a+crc+crypto+fp+simd -DTARGET_ARMV8A_ARM_CORTEX_A57 |
| ASFLAGS += -DTARGET_ARMV7A_ARM_CORTEX_A15 |
| endif |
| ifeq ($(TARGET), ARMV7A_ARM_CORTEX_A15) |
| CFLAGS += -marm -mfloat-abi=hard -mfpu=neon-vfpv4 -mcpu=cortex-a15 -DTARGET_ARMV7A_ARM_CORTEX_A15 |
| ASFLAGS += -mfpu=neon-vfpv4 -DTARGET_ARMV7A_ARM_CORTEX_A15 |
| endif |
| ifeq ($(TARGET), GENERIC) |
| CFLAGS += -DTARGET_GENERIC |
| endif |
| |
| |