diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml
index 899b2cc5015..31aaed6f693 100644
--- a/.github/workflows/regression.yml
+++ b/.github/workflows/regression.yml
@@ -26,25 +26,25 @@ jobs:
         config_set: [BaseMPI, ReverseMPI, ForwardMPI, BaseNoMPI, ReverseNoMPI, ForwardNoMPI, ReverseTagNoMPI, BaseOMP, ReverseOMP, ForwardOMP]
         include:
           - config_set: BaseMPI
-            flags: '-Denable-pywrapper=true -Denable-coolprop=true -Denable-mpp=true -Dinstall-mpp=true -Denable-mlpcpp=true -Denable-tests=true --warnlevel=2'
+            flags: '-Dcpu-arch=skylake -Denable-pywrapper=true -Denable-coolprop=true -Denable-mpp=true -Dinstall-mpp=true -Denable-mlpcpp=true -Denable-tests=true --warnlevel=2'
           - config_set: ReverseMPI
-            flags: '-Denable-autodiff=true -Denable-normal=false -Denable-pywrapper=true -Denable-tests=true -Denable-mlpcpp=true --warnlevel=3 --werror'
+            flags: '-Dcpu-arch=skylake -Denable-autodiff=true -Denable-normal=false -Denable-pywrapper=true -Denable-tests=true -Denable-mlpcpp=true --warnlevel=3 --werror'
           - config_set: ForwardMPI
-            flags: '-Denable-directdiff=true -Denable-normal=false -Denable-tests=true -Denable-mlpcpp=true --warnlevel=3 --werror'
+            flags: '-Dcpu-arch=skylake -Denable-directdiff=true -Denable-normal=false -Denable-tests=true -Denable-mlpcpp=true --warnlevel=3 --werror'
           - config_set: BaseNoMPI
-            flags: '-Denable-pywrapper=true -Denable-openblas=true -Dwith-mpi=disabled -Denable-mlpcpp=true -Denable-tests=true --warnlevel=3 --werror'
+            flags: '-Dcpu-arch=skylake -Denable-pywrapper=true -Denable-openblas=true -Dwith-mpi=disabled -Denable-mlpcpp=true -Denable-tests=true --warnlevel=3 --werror'
           - config_set: ReverseNoMPI
-            flags: '-Denable-autodiff=true -Denable-normal=false -Dwith-mpi=disabled -Denable-pywrapper=true -Denable-tests=true --warnlevel=3 --werror'
+            flags: '-Dcpu-arch=skylake -Denable-autodiff=true -Denable-normal=false -Dwith-mpi=disabled -Denable-pywrapper=true -Denable-tests=true --warnlevel=3 --werror'
           - config_set: ForwardNoMPI
-            flags: '-Denable-directdiff=true -Denable-normal=false -Dwith-mpi=disabled -Denable-tests=true --warnlevel=3 --werror'
+            flags: '-Dcpu-arch=skylake -Denable-directdiff=true -Denable-normal=false -Dwith-mpi=disabled -Denable-tests=true --warnlevel=3 --werror'
           - config_set: ReverseTagNoMPI
-            flags: '-Denable-autodiff=true -Denable-normal=false -Dwith-mpi=disabled -Denable-pywrapper=true -Denable-tests=true --warnlevel=3 --werror -Dcodi-tape=Tag'
+            flags: '-Dcpu-arch=skylake -Denable-autodiff=true -Denable-normal=false -Dwith-mpi=disabled -Denable-pywrapper=true -Denable-tests=true --warnlevel=3 --werror -Dcodi-tape=Tag'
           - config_set: BaseOMP
-            flags: '-Dwith-omp=true -Denable-mixedprec=true -Denable-pywrapper=true -Denable-tecio=false --warnlevel=3 --werror'
+            flags: '-Dcpu-arch=skylake -Dwith-omp=true -Denable-mixedprec=true -Denable-pywrapper=true -Denable-tecio=false --warnlevel=3 --werror'
           - config_set: ReverseOMP
-            flags: '-Denable-autodiff=true -Denable-normal=false -Dwith-omp=true -Denable-mixedprec=true -Denable-pywrapper=true -Denable-tecio=false --warnlevel=3 --werror'
+            flags: '-Dcpu-arch=skylake -Denable-autodiff=true -Denable-normal=false -Dwith-omp=true -Denable-mixedprec=true -Denable-pywrapper=true -Denable-tecio=false --warnlevel=3 --werror'
           - config_set: ForwardOMP
-            flags: '-Denable-directdiff=true -Denable-normal=false -Dwith-omp=true -Denable-mixedprec=true -Denable-pywrapper=true -Denable-tecio=false --warnlevel=3 --werror'
+            flags: '-Dcpu-arch=skylake -Denable-directdiff=true -Denable-normal=false -Dwith-omp=true -Denable-mixedprec=true -Denable-pywrapper=true -Denable-tecio=false --warnlevel=3 --werror'
     runs-on: ${{ inputs.runner || 'ubuntu-latest' }}
     steps:
       - name: Cache Object Files
@@ -83,11 +83,11 @@ jobs:
         config_set: [BaseOMP-tsan, ReverseOMP-tsan] #ForwardOMP-tsan
         include:
           - config_set: BaseOMP-tsan
-            flags: '--buildtype=debugoptimized -Dwith-omp=true -Denable-mixedprec=true -Denable-tecio=false --warnlevel=3'
+            flags: '--buildtype=debugoptimized -Dcpu-arch=skylake -Dwith-omp=true -Denable-mixedprec=true -Denable-tecio=false --warnlevel=3'
           - config_set: ReverseOMP-tsan
-            flags: '--buildtype=debugoptimized -Denable-autodiff=true -Denable-normal=false -Dwith-omp=true -Denable-mixedprec=true -Denable-tecio=false --warnlevel=3'
+            flags: '--buildtype=debugoptimized -Dcpu-arch=skylake -Denable-autodiff=true -Denable-normal=false -Dwith-omp=true -Denable-mixedprec=true -Denable-tecio=false --warnlevel=3'
           #- config_set: ForwardOMP-tsan
-          #  flags: '--buildtype=debug -Denable-directdiff=true -Denable-normal=false -Dwith-omp=true -Denable-mixedprec=true -Denable-pywrapper=true -Denable-tecio=false --warnlevel=3 --werror'
+          #  flags: '--buildtype=debug -Dcpu-arch=skylake -Denable-directdiff=true -Denable-normal=false -Dwith-omp=true -Denable-mixedprec=true -Denable-pywrapper=true -Denable-tecio=false --warnlevel=3 --werror'
     runs-on: ${{ inputs.runner || 'ubuntu-latest' }}
     steps:
       - name: Cache Object Files
@@ -125,9 +125,9 @@ jobs:
         config_set: [BaseNoMPI-asan, ReverseNoMPI-asan]
         include:
           - config_set: BaseNoMPI-asan
-            flags: '--buildtype=debugoptimized -Denable-openblas=true -Dwith-mpi=disabled -Denable-mlpcpp=true --warnlevel=3 --werror'
+            flags: '--buildtype=debugoptimized -Dcpu-arch=skylake -Denable-openblas=true -Dwith-mpi=disabled -Denable-mlpcpp=true --warnlevel=3 --werror'
           - config_set: ReverseNoMPI-asan
-            flags: '--buildtype=debugoptimized --optimization=1 -Denable-autodiff=true -Denable-normal=false -Dwith-mpi=disabled --warnlevel=3 --werror'
+            flags: '--buildtype=debugoptimized -Dcpu-arch=skylake --optimization=1 -Denable-autodiff=true -Denable-normal=false -Dwith-mpi=disabled --warnlevel=3 --werror'
     runs-on: ${{ inputs.runner || 'ubuntu-latest' }}
     steps:
       - name: Cache Object Files
diff --git a/.github/workflows/release-management.yml b/.github/workflows/release-management.yml
index b2e393d5824..cb9dd60b98e 100644
--- a/.github/workflows/release-management.yml
+++ b/.github/workflows/release-management.yml
@@ -12,20 +12,20 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os_bin: [macos64-omp, macos64-mpi, linux64-omp, linux64-mpi, win64-omp, win64-mpi]
+        os_bin: [macos64, macos64-mpi, linux64-omp, linux64-mpi, win64-omp, win64-mpi]
         include:
           - os_bin: win64-omp
-            flags: '-Dwith-omp=true -Dwith-mpi=disabled --cross-file=/hostfiles/hostfile_windows'
+            flags: '-Dcpu-arch=haswell -Dwith-omp=true -Dwith-mpi=disabled --cross-file=/hostfiles/hostfile_windows'
           - os_bin: win64-mpi
-            flags: '-Dcustom-mpi=true --cross-file=/hostfiles/hostfile_windows_mpi'
-          - os_bin: macos64-omp
-            flags: '-Dwith-omp=true -Dwith-mpi=disabled --cross-file=/hostfiles/hostfile_darwin'
+            flags: '-Dcpu-arch=haswell -Dcustom-mpi=true --cross-file=/hostfiles/hostfile_windows_mpi'
+          - os_bin: macos64
+            flags: '-Dcpu-arch= -Dwith-mpi=disabled --cross-file=/hostfiles/hostfile_darwin'
           - os_bin: macos64-mpi
-            flags: '-Dcustom-mpi=true --cross-file=/hostfiles/hostfile_darwin_mpi'
+            flags: '-Dcpu-arch= -Dcustom-mpi=true --cross-file=/hostfiles/hostfile_darwin_mpi'
           - os_bin: linux64-omp
-            flags: '-Dwith-omp=true -Dwith-mpi=disabled -Dstatic-cgns-deps=true --cross-file=/hostfiles/hostfile_linux'
+            flags: '-Dcpu-arch=haswell -Dwith-omp=true -Dwith-mpi=disabled -Dstatic-cgns-deps=true --cross-file=/hostfiles/hostfile_linux'
           - os_bin: linux64-mpi
-            flags: '-Dcustom-mpi=true --cross-file=/hostfiles/hostfile_linux_mpi'
+            flags: '-Dcpu-arch=haswell -Dcustom-mpi=true --cross-file=/hostfiles/hostfile_linux_mpi'
     runs-on: ubuntu-latest
     steps:
       - name: Cache Object Files
diff --git a/SU2_CFD/include/solvers/CHeatSolver.hpp b/SU2_CFD/include/solvers/CHeatSolver.hpp
index 7607114dff4..077d058b949 100644
--- a/SU2_CFD/include/solvers/CHeatSolver.hpp
+++ b/SU2_CFD/include/solvers/CHeatSolver.hpp
@@ -88,8 +88,8 @@ class CHeatSolver final : public CScalarSolver<CHeatVariable> {
     LinSysRes(iPoint, 0) -= thermal_diffusivity * dTdn * Area;
 
     if (implicit) {
-      su2double Jacobian_i[] = {-thermal_diffusivity / dist_ij * Area};
-      Jacobian.SubtractBlock2Diag(iPoint, &Jacobian_i);
+      su2double Jacobian_i[1][1] = {{-thermal_diffusivity / dist_ij * Area}};
+      Jacobian.SubtractBlock2Diag(iPoint, Jacobian_i);
     }
   }
 
diff --git a/SU2_CFD/src/solvers/CGradientSmoothingSolver.cpp b/SU2_CFD/src/solvers/CGradientSmoothingSolver.cpp
index 44f3bbb5981..644cd029d6e 100644
--- a/SU2_CFD/src/solvers/CGradientSmoothingSolver.cpp
+++ b/SU2_CFD/src/solvers/CGradientSmoothingSolver.cpp
@@ -550,8 +550,6 @@ void CGradientSmoothingSolver::Compute_Surface_Residual(CGeometry* geometry, con
   int EL_KIND = 0;
   std::array<unsigned long, MAXNNODE_2D> indexNode;
   std::array<unsigned long, MAXNNODE_2D> indexVertex;
-  su2double Weight, Jac_X, norm, val_Coord, normalSens = 0.0, Residual=0.0;
-  su2double normal[MAXNDIM];
 
   for (iElem = 0; iElem < geometry->GetnElem_Bound(val_marker); iElem++) {
     /*--- Identify the kind of boundary element ---*/
@@ -562,7 +560,7 @@ void CGradientSmoothingSolver::Compute_Surface_Residual(CGeometry* geometry, con
       indexNode[iNode] = geometry->bound[val_marker][iElem]->GetNode(iNode);
 
       for (iDim = 0; iDim < nDim; iDim++) {
-        val_Coord = Get_ValCoord(geometry, indexNode[iNode], iDim);
+        const su2double val_Coord = Get_ValCoord(geometry, indexNode[iNode], iDim);
         element_container[GRAD_TERM][EL_KIND]->SetRef_Coord(iNode, iDim, val_Coord);
       }
     }
@@ -575,31 +573,27 @@ void CGradientSmoothingSolver::Compute_Surface_Residual(CGeometry* geometry, con
       }
     }
 
-    element_container[GRAD_TERM][EL_KIND]
-        ->ClearElement(); /*--- Restarts the element: avoids adding over previous results in other elements --*/
+    /*--- Restarts the element: avoids adding over previous results in other elements --*/
+    element_container[GRAD_TERM][EL_KIND]->ClearElement();
     element_container[GRAD_TERM][EL_KIND]->ComputeGrad_SurfaceEmbedded();
     unsigned int nGauss = element_container[GRAD_TERM][EL_KIND]->GetnGaussPoints();
 
     for (unsigned int iGauss = 0; iGauss < nGauss; iGauss++) {
-      Weight = element_container[GRAD_TERM][EL_KIND]->GetWeight(iGauss);
-      Jac_X = element_container[GRAD_TERM][EL_KIND]->GetJ_X(iGauss);
+      const su2double Weight = element_container[GRAD_TERM][EL_KIND]->GetWeight(iGauss);
+      const su2double Jac_X = element_container[GRAD_TERM][EL_KIND]->GetJ_X(iGauss);
 
       for (unsigned int iNode = 0; iNode < nNodes; iNode++) {
+        su2double normal[MAXNDIM] = {};
         geometry->vertex[val_marker][indexVertex[iNode]]->GetNormal(normal);
-        norm = GeometryToolbox::Norm(nDim, normal);
+        const su2double norm = GeometryToolbox::Norm(nDim, normal);
         for (iDim = 0; iDim < nDim; iDim++) {
           normal[iDim] = normal[iDim] / norm;
         }
-
+        su2double normalSens = 0;
         for (iDim = 0; iDim < nDim; iDim++) {
           normalSens += normal[iDim] * nodes->GetSensitivity(indexNode[iNode], iDim);
         }
-
-        Residual += Weight * Jac_X * element_container[GRAD_TERM][EL_KIND]->GetNi(iNode, iGauss) * normalSens;
-        LinSysRes.AddBlock(indexNode[iNode], &Residual);
-
-        Residual = 0;
-        normalSens = 0;
+        LinSysRes(indexNode[iNode], 0) += Weight * Jac_X * element_container[GRAD_TERM][EL_KIND]->GetNi(iNode, iGauss) * normalSens;
       }
     }
   }
diff --git a/SU2_CFD/src/solvers/CHeatSolver.cpp b/SU2_CFD/src/solvers/CHeatSolver.cpp
index c4c19244fc7..6e127a98448 100644
--- a/SU2_CFD/src/solvers/CHeatSolver.cpp
+++ b/SU2_CFD/src/solvers/CHeatSolver.cpp
@@ -594,8 +594,8 @@ void CHeatSolver::BC_ConjugateHeat_Interface(CGeometry *geometry, CSolver **solv
           HeatFlux = HeatFluxDensity * Area;
 
           if (implicit) {
-            su2double Jacobian_i[] = {-thermal_diffusivity*Area};
-            Jacobian.SubtractBlock2Diag(iPoint, &Jacobian_i);
+            su2double Jacobian_i[1][1] = {{-thermal_diffusivity*Area}};
+            Jacobian.SubtractBlock2Diag(iPoint, Jacobian_i);
           }
         }
         else {
diff --git a/SU2_CFD/src/solvers/CTurbSASolver.cpp b/SU2_CFD/src/solvers/CTurbSASolver.cpp
index 7e0ebaed9d8..00d3009faa7 100644
--- a/SU2_CFD/src/solvers/CTurbSASolver.cpp
+++ b/SU2_CFD/src/solvers/CTurbSASolver.cpp
@@ -503,9 +503,8 @@ void CTurbSASolver::BC_HeatFlux_Wall(CGeometry *geometry, CSolver **solver_conta
          su2double coeff = (nu_total/sigma);
          su2double RoughWallBC = nodes->GetSolution(iPoint,0)/(0.03*Roughness_Height);
 
-         su2double Res_Wall;// = new su2double [nVar];
-         Res_Wall = coeff*RoughWallBC*Area;
-         LinSysRes.SubtractBlock(iPoint, &Res_Wall);
+         su2double Res_Wall = coeff*RoughWallBC*Area;
+         LinSysRes(iPoint, 0) -= Res_Wall;
 
          su2double Jacobian_i = (laminar_viscosity /density *Area)/(0.03*Roughness_Height*sigma);
          Jacobian_i += 2.0*RoughWallBC*Area/sigma;
diff --git a/SU2_PY/pySU2/meson.build b/SU2_PY/pySU2/meson.build
index 3d9abc747ec..436eb4b0d56 100644
--- a/SU2_PY/pySU2/meson.build
+++ b/SU2_PY/pySU2/meson.build
@@ -19,10 +19,17 @@ else
     mpi4py_include = ''
 endif
 
+su2_swig_args = []
+foreach arg : su2_cpp_args
+    if not arg.startswith('-f') and not arg.startswith('-m')
+        su2_swig_args += arg
+    endif
+endforeach
+
 swig_gen = generator(
     swig,
     output: ['@BASENAME@.cxx'],
-    arguments: su2_cpp_args +
+    arguments: su2_swig_args +
     [ '-c++', '-python', '-I'+mpi4py_include, '-outdir', meson.current_build_dir(), '-o', './@OUTPUT@', '@INPUT@'],
     depfile: '@BASENAME@.d',
 )
@@ -43,7 +50,7 @@ if get_option('enable-normal')
       ],
       install: true,
       include_directories : mpi4py_include,
-      cpp_args : [default_warning_flags,su2_cpp_args],
+      cpp_args : [default_warning_flags, su2_swig_args],
       name_prefix : '',
       install_dir: 'bin'
   )
@@ -62,7 +69,7 @@ if get_option('enable-autodiff')
       ],
       install: true,
       include_directories : mpi4py_include,
-      cpp_args : [default_warning_flags, su2_cpp_args, codi_rev_args],
+      cpp_args : [default_warning_flags, su2_swig_args, codi_rev_args],
       name_prefix : '',
       install_dir: 'bin'
   )
diff --git a/meson.build b/meson.build
index 16edb96eba7..468c4ec1e16 100644
--- a/meson.build
+++ b/meson.build
@@ -27,10 +27,8 @@ su2_deps     = [declare_dependency(include_directories: 'externals/CLI11')]
 
 default_warning_flags = []
 if build_machine.system() != 'windows'
-  if meson.get_compiler('cpp').get_id() != 'intel'
-    default_warning_flags += ['-Wno-empty-body']
-  endif
-  desired_warnings = ['-Wno-ignored-qualifiers',
+  desired_warnings = ['-Wno-empty-body',
+                      '-Wno-ignored-qualifiers',
                       '-Wno-unused-parameter',
                       '-Wno-deprecated-declarations',
                       '-Wno-error=cast-function-type',
@@ -55,6 +53,15 @@ if build_machine.system() != 'windows'
   endif
 endif
 
+if get_option('cpu-arch') != ''
+  su2_cpp_args += ['-march=' + get_option('cpu-arch')]
+endif
+
+# Fast math to allow pow(x, 2) -> x * x.
+if meson.get_compiler('cpp').has_argument('-fno-finite-math-only')
+  su2_cpp_args += ['-ffast-math', '-fno-finite-math-only']
+endif
+
 # Handle assertions: default is b_ndebug=true (assertions disabled)
 # but for debug builds, we want to enable assertions
 if get_option('buildtype') == 'debug'
diff --git a/meson_options.txt b/meson_options.txt
index 66cd441cccd..f460f6b1e13 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -1,11 +1,12 @@
-option('with-mpi',   type : 'feature', value : 'auto', description: 'enable MPI support')
-option('with-omp',   type : 'boolean', value : false, description: 'enable OpenMP support')
+option('with-mpi', type : 'feature', value : 'auto', description: 'enable MPI support')
+option('with-omp', type : 'boolean', value : false, description: 'enable OpenMP support')
+option('cpu-arch', type : 'string', value : 'native', description: 'CPU architecture optimization, important for vectorization')
 option('enable-tecio', type : 'boolean', value : true, description: 'enable TECIO support')
-option('enable-cgns',  type : 'boolean', value : true, description: 'enable CGNS support')
-option('enable-autodiff',  type : 'boolean', value : false, description: 'enable AD (reverse) support')
-option('enable-directdiff',  type : 'boolean', value : false, description: 'enable AD (forward) support')
-option('enable-pywrapper',  type : 'boolean', value : false, description: 'enable Python wrapper support')
-option('enable-normal',  type : 'boolean', value : true, description: 'enable normal build')
+option('enable-cgns', type : 'boolean', value : true, description: 'enable CGNS support')
+option('enable-autodiff', type : 'boolean', value : false, description: 'enable AD (reverse) support')
+option('enable-directdiff', type : 'boolean', value : false, description: 'enable AD (forward) support')
+option('enable-pywrapper', type : 'boolean', value : false, description: 'enable Python wrapper support')
+option('enable-normal', type : 'boolean', value : true, description: 'enable normal build')
 option('enable-mkl', type : 'boolean', value : false, description: 'enable Intel-MKL support')
 option('mkl_root', type : 'string', value : '/opt/intel/mkl', description: 'root of Intel-MKL installation (only for non-intel compilers)')
 option('enable-openblas', type : 'boolean', value : false, description: 'enable BLAS and LAPACK support via OpenBLAS')
@@ -16,9 +17,9 @@ option('custom-mpi',  type : 'boolean', value : false, description: 'enable MPI
 option('enable-tests',  type : 'boolean', value : false, description: 'compile Unit Tests')
 option('enable-mixedprec', type : 'boolean', value : false, description: 'use single precision floating point arithmetic for sparse algebra')
 option('extra-deps', type : 'string', value : '', description: 'comma-separated list of extra (custom) dependencies to add for compilation')
-option('enable-mpp',  type : 'boolean', value : false, description: 'enable Mutation++ support')
+option('enable-mpp', type : 'boolean', value : false, description: 'enable Mutation++ support')
 option('install-mpp', type : 'boolean', value : false, description: 'install Mutation++ in the directory defined with --prefix')
-option('enable-coolprop',  type : 'boolean', value : false, description: 'enable CoolProp support')
+option('enable-coolprop', type : 'boolean', value : false, description: 'enable CoolProp support')
 option('enable-mlpcpp', type : 'boolean', value : false, description: 'enable MLPCpp support')
 option('enable-gprof', type : 'boolean', value : false, description: 'enable profiling through gprof')
 option('opdi-backend', type : 'combo', choices : ['auto', 'macro', 'ompt'], value : 'auto', description: 'OpDiLib backend choice')