blob: 200721ec01158c972ac7d1d514881bf500d4343b [file] [log] [blame]
Austin Schuh9a24b372018-01-28 16:12:29 -08001###################################################################################################
2# #
3# This file is part of BLASFEO. #
4# #
5# BLASFEO -- BLAS For Embedded Optimization. #
6# Copyright (C) 2016-2017 by Gianluca Frison. #
7# Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. #
8# All rights reserved. #
9# #
10# HPMPC is free software; you can redistribute it and/or #
11# modify it under the terms of the GNU Lesser General Public #
12# License as published by the Free Software Foundation; either #
13# version 2.1 of the License, or (at your option) any later version. #
14# #
15# HPMPC is distributed in the hope that it will be useful, #
16# but WITHOUT ANY WARRANTY; without even the implied warranty of #
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. #
18# See the GNU Lesser General Public License for more details. #
19# #
20# You should have received a copy of the GNU Lesser General Public #
21# License along with HPMPC; if not, write to the Free Software #
22# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #
23# #
24# Author: Gianluca Frison, giaf (at) dtu.dk #
25# gianluca.frison (at) imtek.uni-freiburg.de #
26# #
27###################################################################################################
28
29# Target architecture
30# X64_INTEL_HASWELL : x86_64 architecture with AVX2 and FMA ISA (64 bit OS) code optimized for Intel Haswell and Intel Skylake architectures.
31# X64_INTEL_SANDY_BRIDGE : x86_64 architecture with AVX ISA (64 bit OS) code optimized for Intel Sandy-Bridge architecture.
32# X64_INTEL_CORE : x86_64 architecture with SSE3 (64 bit OS) code optimized for Intel Core archiecture.
33# X64_AMD_BULLDOZER : x86_64 architecture with AVX and FMA ISA (64 bit OS) code optimized for AMD Bulldozer.
34# ARMV7A_ARM_CORTEX_A15 : ARMv7A architecture with NEON-VFPv4 ISA (32 bit OS) code optimized for ARM Cortex A15.
35# GENERIC : generic c99 code
36TARGET = X64_INTEL_HASWELL
37#TARGET = X64_INTEL_SANDY_BRIDGE
38#TARGET = X64_INTEL_CORE
39#TARGET = X64_AMD_BULLDOZER
40#TARGET = ARMV8A_ARM_CORTEX_A57
41#TARGET = ARMV7A_ARM_CORTEX_A15
42#TARGET = GENERIC
43
44# Linear Algebra library
45LA = HIGH_PERFORMANCE
46#LA = REFERENCE
47#LA = BLAS
48
49# BLAS and LAPACK version (for LA=BLAS)
50REF_BLAS = 0
51#REF_BLAS = OPENBLAS
52#REF_BLAS = NETLIB
53#REF_BLAS = MKL
54#REF_BLAS = BLIS
55#REF_BLAS = ATLAS
56
57# Compile auxiliary functions with external dependencies (for memory allocation and printing)
58#EXT_DEP = 0
59EXT_DEP = 1
60
61# Enable on-line checks for matrix and vector dimensions
62RUNTIME_CHECKS = 0
63#RUNTIME_CHECKS = 1
64
65# Operating system
66UNAME_S := $(shell uname -s)
67ifeq ($(UNAME_S),Linux)
68 OS = LINUX
69endif
70ifeq ($(UNAME_S),Darwin)
71 OS = MAC
72endif
73#OS = LINUX
74#OS = MAC
75#OS = WINDOWS
76
77# C Compiler
78CC = gcc
79#CC = clang
80#CC = x86_64-w64-mingw32-gcc
81
82# Installation directory
83PREFIX = /opt
84
85# Macro level (code size vs performance in assembly kernels): 0 (no macro), 1 (all macro but gemm kernel), 2 (all macro)
86MACRO_LEVEL = 0
87
88# compiler / assembler / linker flags
89CFLAGS =
90ASFLAGS =
91LDFLAGS =
92
93# Optimization flags
94CFLAGS += -O2 -fPIC
95
96# Debugging flags
97#CFLAGS += -g #-Wall -pedantic -Wfloat-equal #-pg
98#ASFLAGS += -g
99
100# Definirions
101ifeq ($(LA), HIGH_PERFORMANCE)
102CFLAGS += -DLA_HIGH_PERFORMANCE
103endif
104ifeq ($(LA), REFERENCE)
105CFLAGS += -DLA_REFERENCE
106endif
107ifeq ($(LA), BLAS)
108CFLAGS += -DLA_BLAS
109endif
110
111ifeq ($(RUNTIME_CHECKS), 1)
112CFLAGS += -DDIM_CHECK
113endif
114
115ifeq ($(EXT_DEP), 1)
116CFLAGS += -DEXT_DEP
117endif
118
119ifeq ($(MACRO_LEVEL), 1)
120ASFLAGS += -DMACRO_LEVEL=1
121endif
122ifeq ($(MACRO_LEVEL), 2)
123ASFLAGS += -DMACRO_LEVEL=2
124endif
125
126ifeq ($(OS), LINUX)
127CFLAGS += -DOS_LINUX
128ASFLAGS += -DOS_LINUX
129endif
130ifeq ($(OS), MAC)
131CFLAGS += -DOS_MAC
132ASFLAGS += -DOS_MAC
133endif
134ifeq ($(OS), WINDOWS)
135CFLAGS += -DOS_WINDOWS
136ASFLAGS += -DOS_WINDOWS
137endif
138
139ifeq ($(REF_BLAS), 0)
140CFLAGS +=
141endif
142ifeq ($(REF_BLAS), OPENBLAS)
143CFLAGS += -DREF_BLAS_OPENBLAS -I/opt/openblas/include
144endif
145ifeq ($(REF_BLAS), BLIS)
146CFLAGS += -DREF_BLAS_BLIS -std=c99
147endif
148ifeq ($(REF_BLAS), NETLIB)
149CFLAGS += -DREF_BLAS_NETLIB
150endif
151ifeq ($(REF_BLAS), MKL)
152CFLAGS += -DREF_BLAS_MKL -m64 -I/opt/intel/mkl/include
153endif
154ifeq ($(REF_BLAS), ATLAS)
155CFLAGS += -DREF_BLAS_ATLAS
156endif
157
158# Architecture-specific flags
159ifeq ($(TARGET), X64_INTEL_HASWELL)
160CFLAGS += -m64 -mavx2 -mfma -DTARGET_X64_INTEL_HASWELL
161endif
162ifeq ($(TARGET), X64_INTEL_SANDY_BRIDGE)
163CFLAGS += -m64 -mavx -DTARGET_X64_INTEL_SANDY_BRIDGE
164endif
165ifeq ($(TARGET), X64_INTEL_CORE)
166CFLAGS += -m64 -msse3 -DTARGET_X64_INTEL_CORE
167endif
168ifeq ($(TARGET), X64_AMD_BULLDOZER)
169CFLAGS += -m64 -mavx -mfma -DTARGET_X64_AMD_BULLDOZER
170endif
171ifeq ($(TARGET), ARMV8A_ARM_CORTEX_A57)
172CFLAGS += -march=armv8-a+crc+crypto+fp+simd -DTARGET_ARMV8A_ARM_CORTEX_A57
173ASFLAGS += -DTARGET_ARMV7A_ARM_CORTEX_A15
174endif
175ifeq ($(TARGET), ARMV7A_ARM_CORTEX_A15)
176CFLAGS += -marm -mfloat-abi=hard -mfpu=neon-vfpv4 -mcpu=cortex-a15 -DTARGET_ARMV7A_ARM_CORTEX_A15
177ASFLAGS += -mfpu=neon-vfpv4 -DTARGET_ARMV7A_ARM_CORTEX_A15
178endif
179ifeq ($(TARGET), GENERIC)
180CFLAGS += -DTARGET_GENERIC
181endif
182
183