Austin Schuh | 9a24b37 | 2018-01-28 16:12:29 -0800 | [diff] [blame] | 1 | ################################################################################################### |
| 2 | # # |
| 3 | # This file is part of BLASFEO. # |
| 4 | # # |
| 5 | # BLASFEO -- BLAS For Embedded Optimization. # |
| 6 | # Copyright (C) 2016-2017 by Gianluca Frison. # |
| 7 | # Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. # |
| 8 | # All rights reserved. # |
| 9 | # # |
| 10 | # HPMPC is free software; you can redistribute it and/or # |
| 11 | # modify it under the terms of the GNU Lesser General Public # |
| 12 | # License as published by the Free Software Foundation; either # |
| 13 | # version 2.1 of the License, or (at your option) any later version. # |
| 14 | # # |
| 15 | # HPMPC is distributed in the hope that it will be useful, # |
| 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # |
| 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # |
| 18 | # See the GNU Lesser General Public License for more details. # |
| 19 | # # |
| 20 | # You should have received a copy of the GNU Lesser General Public # |
| 21 | # License along with HPMPC; if not, write to the Free Software # |
| 22 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # |
| 23 | # # |
| 24 | # Author: Gianluca Frison, giaf (at) dtu.dk # |
| 25 | # gianluca.frison (at) imtek.uni-freiburg.de # |
| 26 | # # |
| 27 | ################################################################################################### |
| 28 | |
| 29 | # Target architecture |
| 30 | # X64_INTEL_HASWELL : x86_64 architecture with AVX2 and FMA ISA (64 bit OS) code optimized for Intel Haswell and Intel Skylake architectures. |
| 31 | # X64_INTEL_SANDY_BRIDGE : x86_64 architecture with AVX ISA (64 bit OS) code optimized for Intel Sandy-Bridge architecture. |
| 32 | # X64_INTEL_CORE : x86_64 architecture with SSE3 (64 bit OS) code optimized for Intel Core archiecture. |
| 33 | # X64_AMD_BULLDOZER : x86_64 architecture with AVX and FMA ISA (64 bit OS) code optimized for AMD Bulldozer. |
| 34 | # ARMV7A_ARM_CORTEX_A15 : ARMv7A architecture with NEON-VFPv4 ISA (32 bit OS) code optimized for ARM Cortex A15. |
| 35 | # GENERIC : generic c99 code |
| 36 | TARGET = X64_INTEL_HASWELL |
| 37 | #TARGET = X64_INTEL_SANDY_BRIDGE |
| 38 | #TARGET = X64_INTEL_CORE |
| 39 | #TARGET = X64_AMD_BULLDOZER |
| 40 | #TARGET = ARMV8A_ARM_CORTEX_A57 |
| 41 | #TARGET = ARMV7A_ARM_CORTEX_A15 |
| 42 | #TARGET = GENERIC |
| 43 | |
| 44 | # Linear Algebra library |
| 45 | LA = HIGH_PERFORMANCE |
| 46 | #LA = REFERENCE |
| 47 | #LA = BLAS |
| 48 | |
| 49 | # BLAS and LAPACK version (for LA=BLAS) |
| 50 | REF_BLAS = 0 |
| 51 | #REF_BLAS = OPENBLAS |
| 52 | #REF_BLAS = NETLIB |
| 53 | #REF_BLAS = MKL |
| 54 | #REF_BLAS = BLIS |
| 55 | #REF_BLAS = ATLAS |
| 56 | |
| 57 | # Compile auxiliary functions with external dependencies (for memory allocation and printing) |
| 58 | #EXT_DEP = 0 |
| 59 | EXT_DEP = 1 |
| 60 | |
| 61 | # Enable on-line checks for matrix and vector dimensions |
| 62 | RUNTIME_CHECKS = 0 |
| 63 | #RUNTIME_CHECKS = 1 |
| 64 | |
| 65 | # Operating system |
| 66 | UNAME_S := $(shell uname -s) |
| 67 | ifeq ($(UNAME_S),Linux) |
| 68 | OS = LINUX |
| 69 | endif |
| 70 | ifeq ($(UNAME_S),Darwin) |
| 71 | OS = MAC |
| 72 | endif |
| 73 | #OS = LINUX |
| 74 | #OS = MAC |
| 75 | #OS = WINDOWS |
| 76 | |
| 77 | # C Compiler |
| 78 | CC = gcc |
| 79 | #CC = clang |
| 80 | #CC = x86_64-w64-mingw32-gcc |
| 81 | |
| 82 | # Installation directory |
| 83 | PREFIX = /opt |
| 84 | |
| 85 | # Macro level (code size vs performance in assembly kernels): 0 (no macro), 1 (all macro but gemm kernel), 2 (all macro) |
| 86 | MACRO_LEVEL = 0 |
| 87 | |
| 88 | # compiler / assembler / linker flags |
| 89 | CFLAGS = |
| 90 | ASFLAGS = |
| 91 | LDFLAGS = |
| 92 | |
| 93 | # Optimization flags |
| 94 | CFLAGS += -O2 -fPIC |
| 95 | |
| 96 | # Debugging flags |
| 97 | #CFLAGS += -g #-Wall -pedantic -Wfloat-equal #-pg |
| 98 | #ASFLAGS += -g |
| 99 | |
| 100 | # Definirions |
| 101 | ifeq ($(LA), HIGH_PERFORMANCE) |
| 102 | CFLAGS += -DLA_HIGH_PERFORMANCE |
| 103 | endif |
| 104 | ifeq ($(LA), REFERENCE) |
| 105 | CFLAGS += -DLA_REFERENCE |
| 106 | endif |
| 107 | ifeq ($(LA), BLAS) |
| 108 | CFLAGS += -DLA_BLAS |
| 109 | endif |
| 110 | |
| 111 | ifeq ($(RUNTIME_CHECKS), 1) |
| 112 | CFLAGS += -DDIM_CHECK |
| 113 | endif |
| 114 | |
| 115 | ifeq ($(EXT_DEP), 1) |
| 116 | CFLAGS += -DEXT_DEP |
| 117 | endif |
| 118 | |
| 119 | ifeq ($(MACRO_LEVEL), 1) |
| 120 | ASFLAGS += -DMACRO_LEVEL=1 |
| 121 | endif |
| 122 | ifeq ($(MACRO_LEVEL), 2) |
| 123 | ASFLAGS += -DMACRO_LEVEL=2 |
| 124 | endif |
| 125 | |
| 126 | ifeq ($(OS), LINUX) |
| 127 | CFLAGS += -DOS_LINUX |
| 128 | ASFLAGS += -DOS_LINUX |
| 129 | endif |
| 130 | ifeq ($(OS), MAC) |
| 131 | CFLAGS += -DOS_MAC |
| 132 | ASFLAGS += -DOS_MAC |
| 133 | endif |
| 134 | ifeq ($(OS), WINDOWS) |
| 135 | CFLAGS += -DOS_WINDOWS |
| 136 | ASFLAGS += -DOS_WINDOWS |
| 137 | endif |
| 138 | |
| 139 | ifeq ($(REF_BLAS), 0) |
| 140 | CFLAGS += |
| 141 | endif |
| 142 | ifeq ($(REF_BLAS), OPENBLAS) |
| 143 | CFLAGS += -DREF_BLAS_OPENBLAS -I/opt/openblas/include |
| 144 | endif |
| 145 | ifeq ($(REF_BLAS), BLIS) |
| 146 | CFLAGS += -DREF_BLAS_BLIS -std=c99 |
| 147 | endif |
| 148 | ifeq ($(REF_BLAS), NETLIB) |
| 149 | CFLAGS += -DREF_BLAS_NETLIB |
| 150 | endif |
| 151 | ifeq ($(REF_BLAS), MKL) |
| 152 | CFLAGS += -DREF_BLAS_MKL -m64 -I/opt/intel/mkl/include |
| 153 | endif |
| 154 | ifeq ($(REF_BLAS), ATLAS) |
| 155 | CFLAGS += -DREF_BLAS_ATLAS |
| 156 | endif |
| 157 | |
| 158 | # Architecture-specific flags |
| 159 | ifeq ($(TARGET), X64_INTEL_HASWELL) |
| 160 | CFLAGS += -m64 -mavx2 -mfma -DTARGET_X64_INTEL_HASWELL |
| 161 | endif |
| 162 | ifeq ($(TARGET), X64_INTEL_SANDY_BRIDGE) |
| 163 | CFLAGS += -m64 -mavx -DTARGET_X64_INTEL_SANDY_BRIDGE |
| 164 | endif |
| 165 | ifeq ($(TARGET), X64_INTEL_CORE) |
| 166 | CFLAGS += -m64 -msse3 -DTARGET_X64_INTEL_CORE |
| 167 | endif |
| 168 | ifeq ($(TARGET), X64_AMD_BULLDOZER) |
| 169 | CFLAGS += -m64 -mavx -mfma -DTARGET_X64_AMD_BULLDOZER |
| 170 | endif |
| 171 | ifeq ($(TARGET), ARMV8A_ARM_CORTEX_A57) |
| 172 | CFLAGS += -march=armv8-a+crc+crypto+fp+simd -DTARGET_ARMV8A_ARM_CORTEX_A57 |
| 173 | ASFLAGS += -DTARGET_ARMV7A_ARM_CORTEX_A15 |
| 174 | endif |
| 175 | ifeq ($(TARGET), ARMV7A_ARM_CORTEX_A15) |
| 176 | CFLAGS += -marm -mfloat-abi=hard -mfpu=neon-vfpv4 -mcpu=cortex-a15 -DTARGET_ARMV7A_ARM_CORTEX_A15 |
| 177 | ASFLAGS += -mfpu=neon-vfpv4 -DTARGET_ARMV7A_ARM_CORTEX_A15 |
| 178 | endif |
| 179 | ifeq ($(TARGET), GENERIC) |
| 180 | CFLAGS += -DTARGET_GENERIC |
| 181 | endif |
| 182 | |
| 183 | |