blob: 200721ec01158c972ac7d1d514881bf500d4343b [file] [log] [blame]
###################################################################################################
# #
# This file is part of BLASFEO. #
# #
# BLASFEO -- BLAS For Embedded Optimization. #
# Copyright (C) 2016-2017 by Gianluca Frison. #
# Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. #
# All rights reserved. #
# #
# HPMPC is free software; you can redistribute it and/or #
# modify it under the terms of the GNU Lesser General Public #
# License as published by the Free Software Foundation; either #
# version 2.1 of the License, or (at your option) any later version. #
# #
# HPMPC is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. #
# See the GNU Lesser General Public License for more details. #
# #
# You should have received a copy of the GNU Lesser General Public #
# License along with HPMPC; if not, write to the Free Software #
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #
# #
# Author: Gianluca Frison, giaf (at) dtu.dk #
# gianluca.frison (at) imtek.uni-freiburg.de #
# #
###################################################################################################
# Target architecture
# X64_INTEL_HASWELL : x86_64 architecture with AVX2 and FMA ISA (64 bit OS) code optimized for Intel Haswell and Intel Skylake architectures.
# X64_INTEL_SANDY_BRIDGE : x86_64 architecture with AVX ISA (64 bit OS) code optimized for Intel Sandy-Bridge architecture.
# X64_INTEL_CORE : x86_64 architecture with SSE3 (64 bit OS) code optimized for Intel Core archiecture.
# X64_AMD_BULLDOZER : x86_64 architecture with AVX and FMA ISA (64 bit OS) code optimized for AMD Bulldozer.
# ARMV7A_ARM_CORTEX_A15 : ARMv7A architecture with NEON-VFPv4 ISA (32 bit OS) code optimized for ARM Cortex A15.
# GENERIC : generic c99 code
TARGET = X64_INTEL_HASWELL
#TARGET = X64_INTEL_SANDY_BRIDGE
#TARGET = X64_INTEL_CORE
#TARGET = X64_AMD_BULLDOZER
#TARGET = ARMV8A_ARM_CORTEX_A57
#TARGET = ARMV7A_ARM_CORTEX_A15
#TARGET = GENERIC
# Linear Algebra library
LA = HIGH_PERFORMANCE
#LA = REFERENCE
#LA = BLAS
# BLAS and LAPACK version (for LA=BLAS)
REF_BLAS = 0
#REF_BLAS = OPENBLAS
#REF_BLAS = NETLIB
#REF_BLAS = MKL
#REF_BLAS = BLIS
#REF_BLAS = ATLAS
# Compile auxiliary functions with external dependencies (for memory allocation and printing)
#EXT_DEP = 0
EXT_DEP = 1
# Enable on-line checks for matrix and vector dimensions
RUNTIME_CHECKS = 0
#RUNTIME_CHECKS = 1
# Operating system
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Linux)
OS = LINUX
endif
ifeq ($(UNAME_S),Darwin)
OS = MAC
endif
#OS = LINUX
#OS = MAC
#OS = WINDOWS
# C Compiler
CC = gcc
#CC = clang
#CC = x86_64-w64-mingw32-gcc
# Installation directory
PREFIX = /opt
# Macro level (code size vs performance in assembly kernels): 0 (no macro), 1 (all macro but gemm kernel), 2 (all macro)
MACRO_LEVEL = 0
# compiler / assembler / linker flags
CFLAGS =
ASFLAGS =
LDFLAGS =
# Optimization flags
CFLAGS += -O2 -fPIC
# Debugging flags
#CFLAGS += -g #-Wall -pedantic -Wfloat-equal #-pg
#ASFLAGS += -g
# Definirions
ifeq ($(LA), HIGH_PERFORMANCE)
CFLAGS += -DLA_HIGH_PERFORMANCE
endif
ifeq ($(LA), REFERENCE)
CFLAGS += -DLA_REFERENCE
endif
ifeq ($(LA), BLAS)
CFLAGS += -DLA_BLAS
endif
ifeq ($(RUNTIME_CHECKS), 1)
CFLAGS += -DDIM_CHECK
endif
ifeq ($(EXT_DEP), 1)
CFLAGS += -DEXT_DEP
endif
ifeq ($(MACRO_LEVEL), 1)
ASFLAGS += -DMACRO_LEVEL=1
endif
ifeq ($(MACRO_LEVEL), 2)
ASFLAGS += -DMACRO_LEVEL=2
endif
ifeq ($(OS), LINUX)
CFLAGS += -DOS_LINUX
ASFLAGS += -DOS_LINUX
endif
ifeq ($(OS), MAC)
CFLAGS += -DOS_MAC
ASFLAGS += -DOS_MAC
endif
ifeq ($(OS), WINDOWS)
CFLAGS += -DOS_WINDOWS
ASFLAGS += -DOS_WINDOWS
endif
ifeq ($(REF_BLAS), 0)
CFLAGS +=
endif
ifeq ($(REF_BLAS), OPENBLAS)
CFLAGS += -DREF_BLAS_OPENBLAS -I/opt/openblas/include
endif
ifeq ($(REF_BLAS), BLIS)
CFLAGS += -DREF_BLAS_BLIS -std=c99
endif
ifeq ($(REF_BLAS), NETLIB)
CFLAGS += -DREF_BLAS_NETLIB
endif
ifeq ($(REF_BLAS), MKL)
CFLAGS += -DREF_BLAS_MKL -m64 -I/opt/intel/mkl/include
endif
ifeq ($(REF_BLAS), ATLAS)
CFLAGS += -DREF_BLAS_ATLAS
endif
# Architecture-specific flags
ifeq ($(TARGET), X64_INTEL_HASWELL)
CFLAGS += -m64 -mavx2 -mfma -DTARGET_X64_INTEL_HASWELL
endif
ifeq ($(TARGET), X64_INTEL_SANDY_BRIDGE)
CFLAGS += -m64 -mavx -DTARGET_X64_INTEL_SANDY_BRIDGE
endif
ifeq ($(TARGET), X64_INTEL_CORE)
CFLAGS += -m64 -msse3 -DTARGET_X64_INTEL_CORE
endif
ifeq ($(TARGET), X64_AMD_BULLDOZER)
CFLAGS += -m64 -mavx -mfma -DTARGET_X64_AMD_BULLDOZER
endif
ifeq ($(TARGET), ARMV8A_ARM_CORTEX_A57)
CFLAGS += -march=armv8-a+crc+crypto+fp+simd -DTARGET_ARMV8A_ARM_CORTEX_A57
ASFLAGS += -DTARGET_ARMV7A_ARM_CORTEX_A15
endif
ifeq ($(TARGET), ARMV7A_ARM_CORTEX_A15)
CFLAGS += -marm -mfloat-abi=hard -mfpu=neon-vfpv4 -mcpu=cortex-a15 -DTARGET_ARMV7A_ARM_CORTEX_A15
ASFLAGS += -mfpu=neon-vfpv4 -DTARGET_ARMV7A_ARM_CORTEX_A15
endif
ifeq ($(TARGET), GENERIC)
CFLAGS += -DTARGET_GENERIC
endif