Updated EigenLib to the latest stable version 3.3.2

2017-01-25 20:05:31 +01:00 · 2017-01-25 20:05:31 +01:00 · 3a6c2879cd
parent f82d3e63fe
commit 3a6c2879cd
519 changed files with 58858 additions and 55073 deletions
--- a/eigenlib/Eigen/Array
+++ b/eigenlib/Eigen/Array
@ -1,11 +0,0 @@
 #ifndef EIGEN_ARRAY_MODULE_H
 #define EIGEN_ARRAY_MODULE_H
 // include Core first to handle Eigen2 support macros
 #include "Core"
 #ifndef EIGEN2_SUPPORT
  #error The Eigen/Array header does no longer exist in Eigen3. All that functionality has moved to Eigen/Core.
 #endif
 #endif // EIGEN_ARRAY_MODULE_H
--- a/eigenlib/Eigen/CMakeLists.txt
+++ b/eigenlib/Eigen/CMakeLists.txt
@ -16,4 +16,4 @@ install(FILES
  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen COMPONENT Devel
  )
-add_subdirectory(src)
+install(DIRECTORY src DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen COMPONENT Devel FILES_MATCHING PATTERN "*.h")
--- a/eigenlib/Eigen/Cholesky
+++ b/eigenlib/Eigen/Cholesky
@ -1,3 +1,10 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_CHOLESKY_MODULE_H
 #define EIGEN_CHOLESKY_MODULE_H
@ -10,20 +17,22 @@
  *
  *
  * This module provides two variants of the Cholesky decomposition for selfadjoint (hermitian) matrices.
-  * Those decompositions are accessible via the following MatrixBase methods:
+  * Those decompositions are also accessible via the following methods:
-  *  - MatrixBase::llt(),
+  *  - MatrixBase::llt()
  *  - MatrixBase::ldlt()
  *  - SelfAdjointView::llt()
  *  - SelfAdjointView::ldlt()
  *
  * \code
  * #include <Eigen/Cholesky>
  * \endcode
  */
 #include "src/misc/Solve.h"
 #include "src/Cholesky/LLT.h"
 #include "src/Cholesky/LDLT.h"
 #ifdef EIGEN_USE_LAPACKE
-#include "src/Cholesky/LLT_MKL.h"
+#include "src/misc/lapacke.h"
 #include "src/Cholesky/LLT_LAPACKE.h"
 #endif
 #include "src/Core/util/ReenableStupidWarnings.h"
--- a/eigenlib/Eigen/CholmodSupport
+++ b/eigenlib/Eigen/CholmodSupport
@ -1,3 +1,10 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_CHOLMODSUPPORT_MODULE_H
 #define EIGEN_CHOLMODSUPPORT_MODULE_H
@ -12,7 +19,7 @@ extern "C" {
 /** \ingroup Support_modules
  * \defgroup CholmodSupport_Module CholmodSupport module
  *
-  * This module provides an interface to the Cholmod library which is part of the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">suitesparse</a> package.
+  * This module provides an interface to the Cholmod library which is part of the <a href="http://www.suitesparse.com">suitesparse</a> package.
  * It provides the two following main factorization classes:
  * - class CholmodSupernodalLLT: a supernodal LLT Cholesky factorization.
  * - class CholmodDecomposiiton: a general L(D)LT Cholesky factorization with automatic or explicit runtime selection of the underlying factorization method (supernodal or simplicial).
@ -33,12 +40,8 @@ extern "C" {
  *
  */
 #include "src/misc/Solve.h"
 #include "src/misc/SparseSolve.h"
 #include "src/CholmodSupport/CholmodSupport.h"
 #include "src/Core/util/ReenableStupidWarnings.h"
 #endif // EIGEN_CHOLMODSUPPORT_MODULE_H
--- a/eigenlib/Eigen/Core
+++ b/eigenlib/Eigen/Core
@ -14,6 +14,58 @@
 // first thing Eigen does: stop the compiler from committing suicide
 #include "src/Core/util/DisableStupidWarnings.h"
 // Handle NVCC/CUDA/SYCL
 #if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__)
  // Do not try asserts on CUDA and SYCL!
  #ifndef EIGEN_NO_DEBUG
  #define EIGEN_NO_DEBUG
  #endif
  #ifdef EIGEN_INTERNAL_DEBUGGING
  #undef EIGEN_INTERNAL_DEBUGGING
  #endif
  #ifdef EIGEN_EXCEPTIONS
  #undef EIGEN_EXCEPTIONS
  #endif
  // All functions callable from CUDA code must be qualified with __device__
  #ifdef __CUDACC__
    // Do not try to vectorize on CUDA and SYCL!
    #ifndef EIGEN_DONT_VECTORIZE
    #define EIGEN_DONT_VECTORIZE
    #endif
    #define EIGEN_DEVICE_FUNC __host__ __device__
    // We need math_functions.hpp to ensure that that EIGEN_USING_STD_MATH macro
    // works properly on the device side
    #include <math_functions.hpp>
  #else
    #define EIGEN_DEVICE_FUNC
  #endif
 #else
  #define EIGEN_DEVICE_FUNC
 #endif
 // When compiling CUDA device code with NVCC, pull in math functions from the
 // global namespace.  In host mode, and when device doee with clang, use the
 // std versions.
 #if defined(__CUDA_ARCH__) && defined(__NVCC__)
  #define EIGEN_USING_STD_MATH(FUNC) using ::FUNC;
 #else
  #define EIGEN_USING_STD_MATH(FUNC) using std::FUNC;
 #endif
 #if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_USE_SYCL)
  #define EIGEN_EXCEPTIONS
 #endif
 #ifdef EIGEN_EXCEPTIONS
  #include <new>
 #endif
 // then include this file where all our macros are defined. It's really important to do it first because
 // it's where we do all the alignment settings (platform detection and honoring the user's will if he
 // defined e.g. EIGEN_DONT_ALIGN) so it needs to be done before we do anything with vectorization.
@ -21,7 +73,7 @@
 // Disable the ipa-cp-clone optimization flag with MinGW 6.x or newer (enabled by default with -O3)
 // See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=556 for details.
-#if defined(__MINGW32__) && EIGEN_GNUC_AT_LEAST(4,6)
+#if EIGEN_COMP_MINGW && EIGEN_GNUC_AT_LEAST(4,6)
  #pragma GCC optimize ("-fno-ipa-cp-clone")
 #endif
@ -31,26 +83,26 @@
 // and inclusion of their respective header files
 #include "src/Core/util/MKL_support.h"
-// if alignment is disabled, then disable vectorization. Note: EIGEN_ALIGN is the proper check, it takes into
+// if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into
-// account both the user's will (EIGEN_DONT_ALIGN) and our own platform checks
+// account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks
-#if !EIGEN_ALIGN
+#if EIGEN_MAX_ALIGN_BYTES==0
  #ifndef EIGEN_DONT_VECTORIZE
    #define EIGEN_DONT_VECTORIZE
  #endif
 #endif
-#ifdef _MSC_VER
+#if EIGEN_COMP_MSVC
  #include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled
-  #if (_MSC_VER >= 1500) // 2008 or later
+  #if (EIGEN_COMP_MSVC >= 1500) // 2008 or later
    // Remember that usage of defined() in a #define is undefined by the standard.
    // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP.
-    #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(_M_X64)
+    #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
      #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
    #endif
  #endif
 #else
  // Remember that usage of defined() in a #define is undefined by the standard
-  #if (defined __SSE2__) && ( (!defined __GNUC__) || (defined __INTEL_COMPILER) || EIGEN_GNUC_AT_LEAST(4,2) )
+  #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) )
    #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
  #endif
 #endif
@ -82,6 +134,28 @@
    #ifdef __SSE4_2__
      #define EIGEN_VECTORIZE_SSE4_2
    #endif
    #ifdef __AVX__
      #define EIGEN_VECTORIZE_AVX
      #define EIGEN_VECTORIZE_SSE3
      #define EIGEN_VECTORIZE_SSSE3
      #define EIGEN_VECTORIZE_SSE4_1
      #define EIGEN_VECTORIZE_SSE4_2
    #endif
    #ifdef __AVX2__
      #define EIGEN_VECTORIZE_AVX2
    #endif
    #ifdef __FMA__
      #define EIGEN_VECTORIZE_FMA
    #endif
    #if defined(__AVX512F__) && defined(EIGEN_ENABLE_AVX512)
      #define EIGEN_VECTORIZE_AVX512
      #define EIGEN_VECTORIZE_AVX2
      #define EIGEN_VECTORIZE_AVX
      #define EIGEN_VECTORIZE_FMA
      #ifdef __AVX512DQ__
        #define EIGEN_VECTORIZE_AVX512DQ
      #endif
    #endif
    // include files
@ -95,9 +169,10 @@
    extern "C" {
      // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly.
      // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus:
-      #if defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1110
+      #if EIGEN_COMP_ICC >= 1110
        #include <immintrin.h>
      #else
        #include <mmintrin.h>
        #include <emmintrin.h>
        #include <xmmintrin.h>
        #ifdef  EIGEN_VECTORIZE_SSE3
@ -112,8 +187,20 @@
        #ifdef EIGEN_VECTORIZE_SSE4_2
        #include <nmmintrin.h>
        #endif
        #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)
        #include <immintrin.h>
        #endif
      #endif
    } // end extern "C"
  #elif defined __VSX__
    #define EIGEN_VECTORIZE
    #define EIGEN_VECTORIZE_VSX
    #include <altivec.h>
    // We need to #undef all these ugly tokens defined in <altivec.h>
    // => use __vector instead of vector
    #undef bool
    #undef vector
    #undef pixel
  #elif defined __ALTIVEC__
    #define EIGEN_VECTORIZE
    #define EIGEN_VECTORIZE_ALTIVEC
@ -123,13 +210,35 @@
    #undef bool
    #undef vector
    #undef pixel
-  #elif defined  __ARM_NEON
+  #elif (defined  __ARM_NEON) || (defined __ARM_NEON__)
    #define EIGEN_VECTORIZE
    #define EIGEN_VECTORIZE_NEON
    #include <arm_neon.h>
  #elif (defined __s390x__ && defined __VEC__)
    #define EIGEN_VECTORIZE
    #define EIGEN_VECTORIZE_ZVECTOR
    #include <vecintrin.h>
  #endif
 #endif
 #if defined(__F16C__) && !defined(EIGEN_COMP_CLANG)
  // We can use the optimized fp16 to float and float to fp16 conversion routines
  #define EIGEN_HAS_FP16_C
 #endif
 #if defined __CUDACC__
  #define EIGEN_VECTORIZE_CUDA
  #include <vector_types.h>
  #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
    #define EIGEN_HAS_CUDA_FP16
  #endif
 #endif
 #if defined EIGEN_HAS_CUDA_FP16
  #include <host_defines.h>
  #include <cuda_fp16.h>
 #endif
 #if (defined _OPENMP) && (!defined EIGEN_DONT_PARALLELIZE)
  #define EIGEN_HAS_OPENMP
 #endif
@ -139,7 +248,7 @@
 #endif
 // MSVC for windows mobile does not have the errno.h file
-#if !(defined(_MSC_VER) && defined(_WIN32_WCE)) && !defined(__ARMCC_VERSION)
+#if !(EIGEN_COMP_MSVC && EIGEN_OS_WINCE) && !EIGEN_COMP_ARM
 #define EIGEN_HAS_ERRNO
 #endif
@ -159,29 +268,30 @@
 // for min/max:
 #include <algorithm>
 // for std::is_nothrow_move_assignable
 #ifdef EIGEN_INCLUDE_TYPE_TRAITS
 #include <type_traits>
 #endif
 // for outputting debug info
 #ifdef EIGEN_DEBUG_ASSIGN
 #include <iostream>
 #endif
 // required for __cpuid, needs to be included after cmath
-#if defined(_MSC_VER) && (defined(_M_IX86)||defined(_M_X64)) && (!defined(_WIN32_WCE))
+#if EIGEN_COMP_MSVC && EIGEN_ARCH_i386_OR_x86_64 && !EIGEN_OS_WINCE
  #include <intrin.h>
 #endif
 #if defined(_CPPUNWIND) || defined(__EXCEPTIONS)
  #define EIGEN_EXCEPTIONS
 #endif
 #ifdef EIGEN_EXCEPTIONS
  #include <new>
 #endif
 /** \brief Namespace containing all symbols from the %Eigen library. */
 namespace Eigen {
 inline static const char *SimdInstructionSetsInUse(void) {
-#if defined(EIGEN_VECTORIZE_SSE4_2)
+#if defined(EIGEN_VECTORIZE_AVX512)
  return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
 #elif defined(EIGEN_VECTORIZE_AVX)
  return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
 #elif defined(EIGEN_VECTORIZE_SSE4_2)
  return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
 #elif defined(EIGEN_VECTORIZE_SSE4_1)
  return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
@ -193,8 +303,12 @@ inline static const char *SimdInstructionSetsInUse(void) {
  return "SSE, SSE2";
 #elif defined(EIGEN_VECTORIZE_ALTIVEC)
  return "AltiVec";
 #elif defined(EIGEN_VECTORIZE_VSX)
  return "VSX";
 #elif defined(EIGEN_VECTORIZE_NEON)
  return "ARM NEON";
 #elif defined(EIGEN_VECTORIZE_ZVECTOR)
  return "S390X ZVECTOR";
 #else
  return "None";
 #endif
@ -202,40 +316,15 @@ inline static const char *SimdInstructionSetsInUse(void) {
 } // end namespace Eigen
-#define STAGE10_FULL_EIGEN2_API             10
+#if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS || defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API || defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS || defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API || defined EIGEN2_SUPPORT
-#define STAGE20_RESOLVE_API_CONFLICTS       20
+// This will generate an error message:
-#define STAGE30_FULL_EIGEN3_API             30
+#error Eigen2-support is only available up to version 3.2. Please go to "http://eigen.tuxfamily.org/index.php?title=Eigen2" for further information
 #define STAGE40_FULL_EIGEN3_STRICTNESS      40
 #define STAGE99_NO_EIGEN2_SUPPORT           99
 #if   defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS
  #define EIGEN2_SUPPORT
  #define EIGEN2_SUPPORT_STAGE STAGE40_FULL_EIGEN3_STRICTNESS
 #elif defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API
  #define EIGEN2_SUPPORT
  #define EIGEN2_SUPPORT_STAGE STAGE30_FULL_EIGEN3_API
 #elif defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS
  #define EIGEN2_SUPPORT
  #define EIGEN2_SUPPORT_STAGE STAGE20_RESOLVE_API_CONFLICTS
 #elif defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API
  #define EIGEN2_SUPPORT
  #define EIGEN2_SUPPORT_STAGE STAGE10_FULL_EIGEN2_API
 #elif defined EIGEN2_SUPPORT
  // default to stage 3, that's what it's always meant
  #define EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API
  #define EIGEN2_SUPPORT_STAGE STAGE30_FULL_EIGEN3_API
 #else
  #define EIGEN2_SUPPORT_STAGE STAGE99_NO_EIGEN2_SUPPORT
 #endif
 #ifdef EIGEN2_SUPPORT
 #undef minor
 #endif
 // we use size_t frequently and we'll never remember to prepend it with std:: everytime just to
 // ensure QNX/QCC support
 using std::size_t;
-// gcc 4.6.0 wants std:: for ptrdiff_t 
+// gcc 4.6.0 wants std:: for ptrdiff_t
 using std::ptrdiff_t;
 /** \defgroup Core_Module Core module
@ -249,8 +338,8 @@ using std::ptrdiff_t;
  */
 #include "src/Core/util/Constants.h"
 #include "src/Core/util/ForwardDeclarations.h"
 #include "src/Core/util/Meta.h"
 #include "src/Core/util/ForwardDeclarations.h"
 #include "src/Core/util/StaticAssert.h"
 #include "src/Core/util/XprHelper.h"
 #include "src/Core/util/Memory.h"
@ -258,41 +347,91 @@ using std::ptrdiff_t;
 #include "src/Core/NumTraits.h"
 #include "src/Core/MathFunctions.h"
 #include "src/Core/GenericPacketMath.h"
 #include "src/Core/MathFunctionsImpl.h"
-#if defined EIGEN_VECTORIZE_SSE
+#if defined EIGEN_VECTORIZE_AVX512
  #include "src/Core/arch/SSE/PacketMath.h"
  #include "src/Core/arch/AVX/PacketMath.h"
  #include "src/Core/arch/AVX512/PacketMath.h"
  #include "src/Core/arch/AVX512/MathFunctions.h"
 #elif defined EIGEN_VECTORIZE_AVX
  // Use AVX for floats and doubles, SSE for integers
  #include "src/Core/arch/SSE/PacketMath.h"
  #include "src/Core/arch/SSE/Complex.h"
  #include "src/Core/arch/SSE/MathFunctions.h"
  #include "src/Core/arch/AVX/PacketMath.h"
  #include "src/Core/arch/AVX/MathFunctions.h"
  #include "src/Core/arch/AVX/Complex.h"
  #include "src/Core/arch/AVX/TypeCasting.h"
 #elif defined EIGEN_VECTORIZE_SSE
  #include "src/Core/arch/SSE/PacketMath.h"
  #include "src/Core/arch/SSE/MathFunctions.h"
  #include "src/Core/arch/SSE/Complex.h"
-#elif defined EIGEN_VECTORIZE_ALTIVEC
+  #include "src/Core/arch/SSE/TypeCasting.h"
 #elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX)
  #include "src/Core/arch/AltiVec/PacketMath.h"
  #include "src/Core/arch/AltiVec/MathFunctions.h"
  #include "src/Core/arch/AltiVec/Complex.h"
 #elif defined EIGEN_VECTORIZE_NEON
  #include "src/Core/arch/NEON/PacketMath.h"
  #include "src/Core/arch/NEON/MathFunctions.h"
  #include "src/Core/arch/NEON/Complex.h"
 #elif defined EIGEN_VECTORIZE_ZVECTOR
  #include "src/Core/arch/ZVector/PacketMath.h"
  #include "src/Core/arch/ZVector/MathFunctions.h"
  #include "src/Core/arch/ZVector/Complex.h"
 #endif
 // Half float support
 #include "src/Core/arch/CUDA/Half.h"
 #include "src/Core/arch/CUDA/PacketMathHalf.h"
 #include "src/Core/arch/CUDA/TypeCasting.h"
 #if defined EIGEN_VECTORIZE_CUDA
  #include "src/Core/arch/CUDA/PacketMath.h"
  #include "src/Core/arch/CUDA/MathFunctions.h"
 #endif
 #include "src/Core/arch/Default/Settings.h"
-#include "src/Core/Functors.h"
+#include "src/Core/functors/TernaryFunctors.h"
 #include "src/Core/functors/BinaryFunctors.h"
 #include "src/Core/functors/UnaryFunctors.h"
 #include "src/Core/functors/NullaryFunctors.h"
 #include "src/Core/functors/StlFunctors.h"
 #include "src/Core/functors/AssignmentFunctors.h"
 // Specialized functors to enable the processing of complex numbers
 // on CUDA devices
 #include "src/Core/arch/CUDA/Complex.h"
 #include "src/Core/DenseCoeffsBase.h"
 #include "src/Core/DenseBase.h"
 #include "src/Core/MatrixBase.h"
 #include "src/Core/EigenBase.h"
 #include "src/Core/Product.h"
 #include "src/Core/CoreEvaluators.h"
 #include "src/Core/AssignEvaluator.h"
 #ifndef EIGEN_PARSED_BY_DOXYGEN // work around Doxygen bug triggered by Assign.h r814874
                                // at least confirmed with Doxygen 1.5.5 and 1.5.6
  #include "src/Core/Assign.h"
 #endif
 #include "src/Core/ArrayBase.h"
 #include "src/Core/util/BlasUtil.h"
 #include "src/Core/DenseStorage.h"
 #include "src/Core/NestByValue.h"
-#include "src/Core/ForceAlignedAccess.h"
+
 // #include "src/Core/ForceAlignedAccess.h"
 #include "src/Core/ReturnByValue.h"
 #include "src/Core/NoAlias.h"
 #include "src/Core/PlainObjectBase.h"
 #include "src/Core/Matrix.h"
 #include "src/Core/Array.h"
 #include "src/Core/CwiseTernaryOp.h"
 #include "src/Core/CwiseBinaryOp.h"
 #include "src/Core/CwiseUnaryOp.h"
 #include "src/Core/CwiseNullaryOp.h"
@ -300,32 +439,33 @@ using std::ptrdiff_t;
 #include "src/Core/SelfCwiseBinaryOp.h"
 #include "src/Core/Dot.h"
 #include "src/Core/StableNorm.h"
 #include "src/Core/MapBase.h"
 #include "src/Core/Stride.h"
 #include "src/Core/MapBase.h"
 #include "src/Core/Map.h"
 #include "src/Core/Ref.h"
 #include "src/Core/Block.h"
 #include "src/Core/VectorBlock.h"
 #include "src/Core/Ref.h"
 #include "src/Core/Transpose.h"
 #include "src/Core/DiagonalMatrix.h"
 #include "src/Core/Diagonal.h"
 #include "src/Core/DiagonalProduct.h"
 #include "src/Core/PermutationMatrix.h"
 #include "src/Core/Transpositions.h"
 #include "src/Core/Redux.h"
 #include "src/Core/Visitor.h"
 #include "src/Core/Fuzzy.h"
 #include "src/Core/IO.h"
 #include "src/Core/Swap.h"
 #include "src/Core/CommaInitializer.h"
 #include "src/Core/Flagged.h"
 #include "src/Core/ProductBase.h"
 #include "src/Core/GeneralProduct.h"
 #include "src/Core/Solve.h"
 #include "src/Core/Inverse.h"
 #include "src/Core/SolverBase.h"
 #include "src/Core/PermutationMatrix.h"
 #include "src/Core/Transpositions.h"
 #include "src/Core/TriangularMatrix.h"
 #include "src/Core/SelfAdjointView.h"
 #include "src/Core/products/GeneralBlockPanelKernel.h"
 #include "src/Core/products/Parallelizer.h"
-#include "src/Core/products/CoeffBasedProduct.h"
+#include "src/Core/ProductEvaluators.h"
 #include "src/Core/products/GeneralMatrixVector.h"
 #include "src/Core/products/GeneralMatrixMatrix.h"
 #include "src/Core/SolveTriangular.h"
@ -340,6 +480,7 @@ using std::ptrdiff_t;
 #include "src/Core/products/TriangularSolverVector.h"
 #include "src/Core/BandMatrix.h"
 #include "src/Core/CoreIterators.h"
 #include "src/Core/ConditionEstimator.h"
 #include "src/Core/BooleanRedux.h"
 #include "src/Core/Select.h"
@ -347,18 +488,17 @@ using std::ptrdiff_t;
 #include "src/Core/Random.h"
 #include "src/Core/Replicate.h"
 #include "src/Core/Reverse.h"
 #include "src/Core/ArrayBase.h"
 #include "src/Core/ArrayWrapper.h"
 #ifdef EIGEN_USE_BLAS
-#include "src/Core/products/GeneralMatrixMatrix_MKL.h"
+#include "src/Core/products/GeneralMatrixMatrix_BLAS.h"
-#include "src/Core/products/GeneralMatrixVector_MKL.h"
+#include "src/Core/products/GeneralMatrixVector_BLAS.h"
-#include "src/Core/products/GeneralMatrixMatrixTriangular_MKL.h"
+#include "src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h"
-#include "src/Core/products/SelfadjointMatrixMatrix_MKL.h"
+#include "src/Core/products/SelfadjointMatrixMatrix_BLAS.h"
-#include "src/Core/products/SelfadjointMatrixVector_MKL.h"
+#include "src/Core/products/SelfadjointMatrixVector_BLAS.h"
-#include "src/Core/products/TriangularMatrixMatrix_MKL.h"
+#include "src/Core/products/TriangularMatrixMatrix_BLAS.h"
-#include "src/Core/products/TriangularMatrixVector_MKL.h"
+#include "src/Core/products/TriangularMatrixVector_BLAS.h"
-#include "src/Core/products/TriangularSolverMatrix_MKL.h"
+#include "src/Core/products/TriangularSolverMatrix_BLAS.h"
 #endif // EIGEN_USE_BLAS
 #ifdef EIGEN_USE_MKL_VML
@ -369,8 +509,4 @@ using std::ptrdiff_t;
 #include "src/Core/util/ReenableStupidWarnings.h"
 #ifdef EIGEN2_SUPPORT
 #include "Eigen2Support"
 #endif
 #endif // EIGEN_CORE_H
--- a/eigenlib/Eigen/Eigen
+++ b/eigenlib/Eigen/Eigen
@ -1,2 +1,2 @@
 #include "Dense"
-//#include "Sparse"
+#include "Sparse"
--- a/eigenlib/Eigen/Eigen2Support
+++ b/eigenlib/Eigen/Eigen2Support
@ -1,95 +0,0 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN2SUPPORT_H
 #define EIGEN2SUPPORT_H
 #if (!defined(EIGEN2_SUPPORT)) || (!defined(EIGEN_CORE_H))
 #error Eigen2 support must be enabled by defining EIGEN2_SUPPORT before including any Eigen header
 #endif
 #ifndef EIGEN_NO_EIGEN2_DEPRECATED_WARNING
 #if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
 #warning "Eigen2 support is deprecated in Eigen 3.2.x and it will be removed in Eigen 3.3. (Define EIGEN_NO_EIGEN2_DEPRECATED_WARNING to disable this warning)"
 #else
 #pragma message ("Eigen2 support is deprecated in Eigen 3.2.x and it will be removed in Eigen 3.3. (Define EIGEN_NO_EIGEN2_DEPRECATED_WARNING to disable this warning)")
 #endif
 #endif // EIGEN_NO_EIGEN2_DEPRECATED_WARNING
 #include "src/Core/util/DisableStupidWarnings.h"
 /** \ingroup Support_modules
  * \defgroup Eigen2Support_Module Eigen2 support module
  *
  * \warning Eigen2 support is deprecated in Eigen 3.2.x and it will be removed in Eigen 3.3.
  *
  * This module provides a couple of deprecated functions improving the compatibility with Eigen2.
  * 
  * To use it, define EIGEN2_SUPPORT before including any Eigen header
  * \code
  * #define EIGEN2_SUPPORT
  * \endcode
  *
  */
 #include "src/Eigen2Support/Macros.h"
 #include "src/Eigen2Support/Memory.h"
 #include "src/Eigen2Support/Meta.h"
 #include "src/Eigen2Support/Lazy.h"
 #include "src/Eigen2Support/Cwise.h"
 #include "src/Eigen2Support/CwiseOperators.h"
 #include "src/Eigen2Support/TriangularSolver.h"
 #include "src/Eigen2Support/Block.h"
 #include "src/Eigen2Support/VectorBlock.h"
 #include "src/Eigen2Support/Minor.h"
 #include "src/Eigen2Support/MathFunctions.h"
 #include "src/Core/util/ReenableStupidWarnings.h"
 // Eigen2 used to include iostream
 #include<iostream>
 #define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \
 using Eigen::Matrix##SizeSuffix##TypeSuffix; \
 using Eigen::Vector##SizeSuffix##TypeSuffix; \
 using Eigen::RowVector##SizeSuffix##TypeSuffix;
 #define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(TypeSuffix) \
 EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \
 EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \
 EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \
 EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \
 #define EIGEN_USING_MATRIX_TYPEDEFS \
 EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(i) \
 EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(f) \
 EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(d) \
 EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cf) \
 EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cd)
 #define USING_PART_OF_NAMESPACE_EIGEN \
 EIGEN_USING_MATRIX_TYPEDEFS \
 using Eigen::Matrix; \
 using Eigen::MatrixBase; \
 using Eigen::ei_random; \
 using Eigen::ei_real; \
 using Eigen::ei_imag; \
 using Eigen::ei_conj; \
 using Eigen::ei_abs; \
 using Eigen::ei_abs2; \
 using Eigen::ei_sqrt; \
 using Eigen::ei_exp; \
 using Eigen::ei_log; \
 using Eigen::ei_sin; \
 using Eigen::ei_cos;
 #endif // EIGEN2SUPPORT_H
--- a/eigenlib/Eigen/Eigenvalues
+++ b/eigenlib/Eigen/Eigenvalues
@ -1,3 +1,10 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_EIGENVALUES_MODULE_H
 #define EIGEN_EIGENVALUES_MODULE_H
@ -25,6 +32,7 @@
  * \endcode
  */
 #include "src/misc/RealSvd2x2.h"
 #include "src/Eigenvalues/Tridiagonalization.h"
 #include "src/Eigenvalues/RealSchur.h"
 #include "src/Eigenvalues/EigenSolver.h"
@ -37,9 +45,10 @@
 #include "src/Eigenvalues/GeneralizedEigenSolver.h"
 #include "src/Eigenvalues/MatrixBaseEigenvalues.h"
 #ifdef EIGEN_USE_LAPACKE
-#include "src/Eigenvalues/RealSchur_MKL.h"
+#include "src/misc/lapacke.h"
-#include "src/Eigenvalues/ComplexSchur_MKL.h"
+#include "src/Eigenvalues/RealSchur_LAPACKE.h"
-#include "src/Eigenvalues/SelfAdjointEigenSolver_MKL.h"
+#include "src/Eigenvalues/ComplexSchur_LAPACKE.h"
 #include "src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h"
 #endif
 #include "src/Core/util/ReenableStupidWarnings.h"
--- a/eigenlib/Eigen/Geometry
+++ b/eigenlib/Eigen/Geometry
@ -1,3 +1,10 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_GEOMETRY_MODULE_H
 #define EIGEN_GEOMETRY_MODULE_H
@ -9,21 +16,17 @@
 #include "LU"
 #include <limits>
 #ifndef M_PI
 #define M_PI 3.14159265358979323846
 #endif
 /** \defgroup Geometry_Module Geometry module
  *
  *
  *
  * This module provides support for:
  *  - fixed-size homogeneous transformations
  *  - translation, scaling, 2D and 3D rotations
-  *  - quaternions
+  *  - \link Quaternion quaternions \endlink
-  *  - \ref MatrixBase::cross() "cross product"
+  *  - cross products (\ref MatrixBase::cross, \ref MatrixBase::cross3)
-  *  - \ref MatrixBase::unitOrthogonal() "orthognal vector generation"
+  *  - orthognal vector generation (\ref MatrixBase::unitOrthogonal)
-  *  - some linear components: parametrized-lines and hyperplanes
+  *  - some linear components: \link ParametrizedLine parametrized-lines \endlink and \link Hyperplane hyperplanes \endlink
  *  - \link AlignedBox axis aligned bounding boxes \endlink
  *  - \link umeyama least-square transformation fitting \endlink
  *
  * \code
  * #include <Eigen/Geometry>
@ -33,27 +36,23 @@
 #include "src/Geometry/OrthoMethods.h"
 #include "src/Geometry/EulerAngles.h"
-#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
+#include "src/Geometry/Homogeneous.h"
-  #include "src/Geometry/Homogeneous.h"
+#include "src/Geometry/RotationBase.h"
-  #include "src/Geometry/RotationBase.h"
+#include "src/Geometry/Rotation2D.h"
-  #include "src/Geometry/Rotation2D.h"
+#include "src/Geometry/Quaternion.h"
-  #include "src/Geometry/Quaternion.h"
+#include "src/Geometry/AngleAxis.h"
-  #include "src/Geometry/AngleAxis.h"
+#include "src/Geometry/Transform.h"
-  #include "src/Geometry/Transform.h"
+#include "src/Geometry/Translation.h"
-  #include "src/Geometry/Translation.h"
+#include "src/Geometry/Scaling.h"
-  #include "src/Geometry/Scaling.h"
+#include "src/Geometry/Hyperplane.h"
-  #include "src/Geometry/Hyperplane.h"
+#include "src/Geometry/ParametrizedLine.h"
-  #include "src/Geometry/ParametrizedLine.h"
+#include "src/Geometry/AlignedBox.h"
-  #include "src/Geometry/AlignedBox.h"
+#include "src/Geometry/Umeyama.h"
  #include "src/Geometry/Umeyama.h"
-  #if defined EIGEN_VECTORIZE_SSE
+// Use the SSE optimized version whenever possible. At the moment the
-    #include "src/Geometry/arch/Geometry_SSE.h"
+// SSE version doesn't compile when AVX is enabled
-  #endif
+#if defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX
-#endif
+#include "src/Geometry/arch/Geometry_SSE.h"
 #ifdef EIGEN2_SUPPORT
 #include "src/Eigen2Support/Geometry/All.h"
 #endif
 #include "src/Core/util/ReenableStupidWarnings.h"
--- a/eigenlib/Eigen/Householder
+++ b/eigenlib/Eigen/Householder
@ -1,3 +1,10 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_HOUSEHOLDER_MODULE_H
 #define EIGEN_HOUSEHOLDER_MODULE_H
--- a/eigenlib/Eigen/IterativeLinearSolvers
+++ b/eigenlib/Eigen/IterativeLinearSolvers
@ -1,3 +1,10 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_ITERATIVELINEARSOLVERS_MODULE_H
 #define EIGEN_ITERATIVELINEARSOLVERS_MODULE_H
@ -12,28 +19,29 @@
  * This module currently provides iterative methods to solve problems of the form \c A \c x = \c b, where \c A is a squared matrix, usually very large and sparse.
  * Those solvers are accessible via the following classes:
  *  - ConjugateGradient for selfadjoint (hermitian) matrices,
  *  - LeastSquaresConjugateGradient for rectangular least-square problems,
  *  - BiCGSTAB for general square matrices.
  *
  * These iterative solvers are associated with some preconditioners:
  *  - IdentityPreconditioner - not really useful
-  *  - DiagonalPreconditioner - also called JAcobi preconditioner, work very well on diagonal dominant matrices.
+  *  - DiagonalPreconditioner - also called Jacobi preconditioner, work very well on diagonal dominant matrices.
-  *  - IncompleteILUT - incomplete LU factorization with dual thresholding
+  *  - IncompleteLUT - incomplete LU factorization with dual thresholding
  *
  * Such problems can also be solved using the direct sparse decomposition modules: SparseCholesky, CholmodSupport, UmfPackSupport, SuperLUSupport.
  *
-  * \code
+    \code
-  * #include <Eigen/IterativeLinearSolvers>
+    #include <Eigen/IterativeLinearSolvers>
-  * \endcode
+    \endcode
  */
-#include "src/misc/Solve.h"
+#include "src/IterativeLinearSolvers/SolveWithGuess.h"
 #include "src/misc/SparseSolve.h"
 #include "src/IterativeLinearSolvers/IterativeSolverBase.h"
 #include "src/IterativeLinearSolvers/BasicPreconditioners.h"
 #include "src/IterativeLinearSolvers/ConjugateGradient.h"
 #include "src/IterativeLinearSolvers/LeastSquareConjugateGradient.h"
 #include "src/IterativeLinearSolvers/BiCGSTAB.h"
 #include "src/IterativeLinearSolvers/IncompleteLUT.h"
 #include "src/IterativeLinearSolvers/IncompleteCholesky.h"
 #include "src/Core/util/ReenableStupidWarnings.h"
--- a/eigenlib/Eigen/Jacobi
+++ b/eigenlib/Eigen/Jacobi
@ -1,3 +1,10 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_JACOBI_MODULE_H
 #define EIGEN_JACOBI_MODULE_H
--- a/eigenlib/Eigen/LU
+++ b/eigenlib/Eigen/LU
@ -1,3 +1,10 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_LU_MODULE_H
 #define EIGEN_LU_MODULE_H
@ -16,25 +23,23 @@
  * \endcode
  */
 #include "src/misc/Solve.h"
 #include "src/misc/Kernel.h"
 #include "src/misc/Image.h"
 #include "src/LU/FullPivLU.h"
 #include "src/LU/PartialPivLU.h"
 #ifdef EIGEN_USE_LAPACKE
-#include "src/LU/PartialPivLU_MKL.h"
+#include "src/misc/lapacke.h"
 #include "src/LU/PartialPivLU_LAPACKE.h"
 #endif
 #include "src/LU/Determinant.h"
-#include "src/LU/Inverse.h"
+#include "src/LU/InverseImpl.h"
-#if defined EIGEN_VECTORIZE_SSE
+// Use the SSE optimized version whenever possible. At the moment the
 // SSE version doesn't compile when AVX is enabled
 #if defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX
  #include "src/LU/arch/Inverse_SSE.h"
 #endif
 #ifdef EIGEN2_SUPPORT
  #include "src/Eigen2Support/LU.h"
 #endif
 #include "src/Core/util/ReenableStupidWarnings.h"
 #endif // EIGEN_LU_MODULE_H
--- a/eigenlib/Eigen/LeastSquares
+++ b/eigenlib/Eigen/LeastSquares
@ -1,32 +0,0 @@
 #ifndef EIGEN_REGRESSION_MODULE_H
 #define EIGEN_REGRESSION_MODULE_H
 #ifndef EIGEN2_SUPPORT
 #error LeastSquares is only available in Eigen2 support mode (define EIGEN2_SUPPORT)
 #endif
 // exclude from normal eigen3-only documentation
 #ifdef EIGEN2_SUPPORT
 #include "Core"
 #include "src/Core/util/DisableStupidWarnings.h"
 #include "Eigenvalues"
 #include "Geometry"
 /** \defgroup LeastSquares_Module LeastSquares module
  * This module provides linear regression and related features.
  *
  * \code
  * #include <Eigen/LeastSquares>
  * \endcode
  */
 #include "src/Eigen2Support/LeastSquares.h"
 #include "src/Core/util/ReenableStupidWarnings.h"
 #endif // EIGEN2_SUPPORT
 #endif // EIGEN_REGRESSION_MODULE_H
--- a/eigenlib/Eigen/MetisSupport
+++ b/eigenlib/Eigen/MetisSupport
@ -1,3 +1,10 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_METISSUPPORT_MODULE_H
 #define EIGEN_METISSUPPORT_MODULE_H
--- a/eigenlib/Eigen/OrderingMethods
+++ b/eigenlib/Eigen/OrderingMethods
@ -1,3 +1,10 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_ORDERINGMETHODS_MODULE_H
 #define EIGEN_ORDERINGMETHODS_MODULE_H
--- a/eigenlib/Eigen/PaStiXSupport
+++ b/eigenlib/Eigen/PaStiXSupport
@ -1,3 +1,10 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_PASTIXSUPPORT_MODULE_H
 #define EIGEN_PASTIXSUPPORT_MODULE_H
@ -5,7 +12,6 @@
 #include "src/Core/util/DisableStupidWarnings.h"
 #include <complex.h>
 extern "C" {
 #include <pastix_nompi.h>
 #include <pastix.h>
@ -35,12 +41,8 @@ extern "C" {
  *
  */
 #include "src/misc/Solve.h"
 #include "src/misc/SparseSolve.h"
 #include "src/PaStiXSupport/PaStiXSupport.h"
 #include "src/Core/util/ReenableStupidWarnings.h"
 #endif // EIGEN_PASTIXSUPPORT_MODULE_H
--- a/eigenlib/Eigen/PardisoSupport
+++ b/eigenlib/Eigen/PardisoSupport
@ -1,3 +1,10 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_PARDISOSUPPORT_MODULE_H
 #define EIGEN_PARDISOSUPPORT_MODULE_H
@ -7,8 +14,6 @@
 #include <mkl_pardiso.h>
 #include <unsupported/Eigen/SparseExtra>
 /** \ingroup Support_modules
  * \defgroup PardisoSupport_Module PardisoSupport module
  *
--- a/eigenlib/Eigen/QR
+++ b/eigenlib/Eigen/QR
@ -1,3 +1,10 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_QR_MODULE_H
 #define EIGEN_QR_MODULE_H
@ -15,31 +22,26 @@
  *
  * This module provides various QR decompositions
  * This module also provides some MatrixBase methods, including:
-  *  - MatrixBase::qr(),
+  *  - MatrixBase::householderQr()
  *  - MatrixBase::colPivHouseholderQr()
  *  - MatrixBase::fullPivHouseholderQr()
  *
  * \code
  * #include <Eigen/QR>
  * \endcode
  */
 #include "src/misc/Solve.h"
 #include "src/QR/HouseholderQR.h"
 #include "src/QR/FullPivHouseholderQR.h"
 #include "src/QR/ColPivHouseholderQR.h"
 #include "src/QR/CompleteOrthogonalDecomposition.h"
 #ifdef EIGEN_USE_LAPACKE
-#include "src/QR/HouseholderQR_MKL.h"
+#include "src/misc/lapacke.h"
-#include "src/QR/ColPivHouseholderQR_MKL.h"
+#include "src/QR/HouseholderQR_LAPACKE.h"
-#endif
+#include "src/QR/ColPivHouseholderQR_LAPACKE.h"
 #ifdef EIGEN2_SUPPORT
 #include "src/Eigen2Support/QR.h"
 #endif
 #include "src/Core/util/ReenableStupidWarnings.h"
 #ifdef EIGEN2_SUPPORT
 #include "Eigenvalues"
 #endif
 #endif // EIGEN_QR_MODULE_H
 /* vim: set filetype=cpp et sw=2 ts=2 ai: */
--- a/eigenlib/Eigen/QtAlignedMalloc
+++ b/eigenlib/Eigen/QtAlignedMalloc
@ -1,3 +1,9 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_QTMALLOC_MODULE_H
 #define EIGEN_QTMALLOC_MODULE_H
--- a/eigenlib/Eigen/SPQRSupport
+++ b/eigenlib/Eigen/SPQRSupport
@ -1,3 +1,10 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_SPQRSUPPORT_MODULE_H
 #define EIGEN_SPQRSUPPORT_MODULE_H
@ -10,7 +17,7 @@
 /** \ingroup Support_modules
  * \defgroup SPQRSupport_Module SuiteSparseQR module
  * 
-  * This module provides an interface to the SPQR library, which is part of the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">suitesparse</a> package.
+  * This module provides an interface to the SPQR library, which is part of the <a href="http://www.suitesparse.com">suitesparse</a> package.
  *
  * \code
  * #include <Eigen/SPQRSupport>
@ -21,8 +28,6 @@
  *
  */
 #include "src/misc/Solve.h"
 #include "src/misc/SparseSolve.h"
 #include "src/CholmodSupport/CholmodSupport.h"
 #include "src/SPQRSupport/SuiteSparseQRSupport.h"
--- a/eigenlib/Eigen/SVD
+++ b/eigenlib/Eigen/SVD
@ -1,3 +1,10 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_SVD_MODULE_H
 #define EIGEN_SVD_MODULE_H
@ -12,23 +19,26 @@
  *
  *
  * This module provides SVD decomposition for matrices (both real and complex).
-  * This decomposition is accessible via the following MatrixBase method:
+  * Two decomposition algorithms are provided:
  *  - JacobiSVD implementing two-sided Jacobi iterations is numerically very accurate, fast for small matrices, but very slow for larger ones.
  *  - BDCSVD implementing a recursive divide & conquer strategy on top of an upper-bidiagonalization which remains fast for large problems.
  * These decompositions are accessible via the respective classes and following MatrixBase methods:
  *  - MatrixBase::jacobiSvd()
  *  - MatrixBase::bdcSvd()
  *
  * \code
  * #include <Eigen/SVD>
  * \endcode
  */
-#include "src/misc/Solve.h"
+#include "src/misc/RealSvd2x2.h"
 #include "src/SVD/JacobiSVD.h"
 #if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)
 #include "src/SVD/JacobiSVD_MKL.h"
 #endif
 #include "src/SVD/UpperBidiagonalization.h"
-
+#include "src/SVD/SVDBase.h"
-#ifdef EIGEN2_SUPPORT
+#include "src/SVD/JacobiSVD.h"
-#include "src/Eigen2Support/SVD.h"
+#include "src/SVD/BDCSVD.h"
 #if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)
 #include "src/misc/lapacke.h"
 #include "src/SVD/JacobiSVD_LAPACKE.h"
 #endif
 #include "src/Core/util/ReenableStupidWarnings.h"
--- a/eigenlib/Eigen/Sparse
+++ b/eigenlib/Eigen/Sparse
@ -1,3 +1,10 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_SPARSE_MODULE_H
 #define EIGEN_SPARSE_MODULE_H
@ -11,9 +18,9 @@
  * - \ref SparseQR_Module
  * - \ref IterativeLinearSolvers_Module
  *
-  * \code
+    \code
-  * #include <Eigen/Sparse>
+    #include <Eigen/Sparse>
-  * \endcode
+    \endcode
  */
 #include "SparseCore"
--- a/eigenlib/Eigen/SparseCholesky
+++ b/eigenlib/Eigen/SparseCholesky
@ -34,8 +34,6 @@
 #error The SparseCholesky module has nothing to offer in MPL2 only mode
 #endif
 #include "src/misc/Solve.h"
 #include "src/misc/SparseSolve.h"
 #include "src/SparseCholesky/SimplicialCholesky.h"
 #ifndef EIGEN_MPL2_ONLY
--- a/eigenlib/Eigen/SparseCore
+++ b/eigenlib/Eigen/SparseCore
@ -1,3 +1,10 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_SPARSECORE_MODULE_H
 #define EIGEN_SPARSECORE_MODULE_H
@ -14,7 +21,7 @@
 /** 
  * \defgroup SparseCore_Module SparseCore module
  *
-  * This module provides a sparse matrix representation, and basic associatd matrix manipulations
+  * This module provides a sparse matrix representation, and basic associated matrix manipulations
  * and operations.
  *
  * See the \ref TutorialSparse "Sparse tutorial"
@ -26,37 +33,35 @@
  * This module depends on: Core.
  */
 namespace Eigen {
 /** The type used to identify a general sparse storage. */
 struct Sparse {};
 }
 #include "src/SparseCore/SparseUtil.h"
 #include "src/SparseCore/SparseMatrixBase.h"
 #include "src/SparseCore/SparseAssign.h"
 #include "src/SparseCore/CompressedStorage.h"
 #include "src/SparseCore/AmbiVector.h"
 #include "src/SparseCore/SparseCompressedBase.h"
 #include "src/SparseCore/SparseMatrix.h"
 #include "src/SparseCore/SparseMap.h"
 #include "src/SparseCore/MappedSparseMatrix.h"
 #include "src/SparseCore/SparseVector.h"
-#include "src/SparseCore/SparseBlock.h"
+#include "src/SparseCore/SparseRef.h"
 #include "src/SparseCore/SparseTranspose.h"
 #include "src/SparseCore/SparseCwiseUnaryOp.h"
 #include "src/SparseCore/SparseCwiseBinaryOp.h"
 #include "src/SparseCore/SparseTranspose.h"
 #include "src/SparseCore/SparseBlock.h"
 #include "src/SparseCore/SparseDot.h"
 #include "src/SparseCore/SparsePermutation.h"
 #include "src/SparseCore/SparseRedux.h"
-#include "src/SparseCore/SparseFuzzy.h"
+#include "src/SparseCore/SparseView.h"
 #include "src/SparseCore/SparseDiagonalProduct.h"
 #include "src/SparseCore/ConservativeSparseSparseProduct.h"
 #include "src/SparseCore/SparseSparseProductWithPruning.h"
 #include "src/SparseCore/SparseProduct.h"
 #include "src/SparseCore/SparseDenseProduct.h"
 #include "src/SparseCore/SparseDiagonalProduct.h"
 #include "src/SparseCore/SparseTriangularView.h"
 #include "src/SparseCore/SparseSelfAdjointView.h"
 #include "src/SparseCore/SparseTriangularView.h"
 #include "src/SparseCore/TriangularSolver.h"
-#include "src/SparseCore/SparseView.h"
+#include "src/SparseCore/SparsePermutation.h"
 #include "src/SparseCore/SparseFuzzy.h"
 #include "src/SparseCore/SparseSolverBase.h"
 #include "src/Core/util/ReenableStupidWarnings.h"
--- a/eigenlib/Eigen/SparseLU
+++ b/eigenlib/Eigen/SparseLU
@ -20,9 +20,6 @@
  * Please, see the documentation of the SparseLU class for more details.
  */
 #include "src/misc/Solve.h"
 #include "src/misc/SparseSolve.h"
 // Ordering interface
 #include "OrderingMethods"
--- a/eigenlib/Eigen/SparseQR
+++ b/eigenlib/Eigen/SparseQR
@ -1,3 +1,10 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_SPARSEQR_MODULE_H
 #define EIGEN_SPARSEQR_MODULE_H
@ -21,9 +28,6 @@
  * 
  */
 #include "src/misc/Solve.h"
 #include "src/misc/SparseSolve.h"
 #include "OrderingMethods"
 #include "src/SparseCore/SparseColEtree.h"
 #include "src/SparseQR/SparseQR.h"
--- a/eigenlib/Eigen/StdDeque
+++ b/eigenlib/Eigen/StdDeque
@ -14,7 +14,7 @@
 #include "Core"
 #include <deque>
-#if (defined(_MSC_VER) && defined(_WIN64)) /* MSVC auto aligns in 64 bit builds */
+#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */
 #define EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(...)
--- a/eigenlib/Eigen/StdList
+++ b/eigenlib/Eigen/StdList
@ -13,7 +13,7 @@
 #include "Core"
 #include <list>
-#if (defined(_MSC_VER) && defined(_WIN64)) /* MSVC auto aligns in 64 bit builds */    
+#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */    
 #define EIGEN_DEFINE_STL_LIST_SPECIALIZATION(...)
--- a/eigenlib/Eigen/StdVector
+++ b/eigenlib/Eigen/StdVector
@ -14,7 +14,7 @@
 #include "Core"
 #include <vector>
-#if (defined(_MSC_VER) && defined(_WIN64)) /* MSVC auto aligns in 64 bit builds */
+#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */
 #define EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION(...)
--- a/eigenlib/Eigen/SuperLUSupport
+++ b/eigenlib/Eigen/SuperLUSupport
@ -1,3 +1,10 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_SUPERLUSUPPORT_MODULE_H
 #define EIGEN_SUPERLUSUPPORT_MODULE_H
@ -36,6 +43,8 @@ namespace Eigen { struct SluMatrix; }
  * - class SuperLU: a supernodal sequential LU factorization.
  * - class SuperILU: a supernodal sequential incomplete LU factorization (to be used as a preconditioner for iterative methods).
  *
  * \warning This wrapper requires at least versions 4.0 of SuperLU. The 3.x versions are not supported.
  *
  * \warning When including this module, you have to use SUPERLU_EMPTY instead of EMPTY which is no longer defined because it is too polluting.
  *
  * \code
@ -48,12 +57,8 @@ namespace Eigen { struct SluMatrix; }
  *
  */
 #include "src/misc/Solve.h"
 #include "src/misc/SparseSolve.h"
 #include "src/SuperLUSupport/SuperLUSupport.h"
 #include "src/Core/util/ReenableStupidWarnings.h"
 #endif // EIGEN_SUPERLUSUPPORT_MODULE_H
--- a/eigenlib/Eigen/UmfPackSupport
+++ b/eigenlib/Eigen/UmfPackSupport
@ -1,3 +1,10 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_UMFPACKSUPPORT_MODULE_H
 #define EIGEN_UMFPACKSUPPORT_MODULE_H
@ -12,7 +19,7 @@ extern "C" {
 /** \ingroup Support_modules
  * \defgroup UmfPackSupport_Module UmfPackSupport module
  *
-  * This module provides an interface to the UmfPack library which is part of the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">suitesparse</a> package.
+  * This module provides an interface to the UmfPack library which is part of the <a href="http://www.suitesparse.com">suitesparse</a> package.
  * It provides the following factorization class:
  * - class UmfPackLU: a multifrontal sequential LU factorization.
  *
@ -26,9 +33,6 @@ extern "C" {
  *
  */
 #include "src/misc/Solve.h"
 #include "src/misc/SparseSolve.h"
 #include "src/UmfPackSupport/UmfPackSupport.h"
 #include "src/Core/util/ReenableStupidWarnings.h"
--- a/eigenlib/Eigen/src/CMakeLists.txt
+++ b/eigenlib/Eigen/src/CMakeLists.txt
@ -1,7 +0,0 @@
 file(GLOB Eigen_src_subdirectories "*")
 escape_string_as_regex(ESCAPED_CMAKE_CURRENT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
 foreach(f ${Eigen_src_subdirectories})
  if(NOT f MATCHES "\\.txt" AND NOT f MATCHES "${ESCAPED_CMAKE_CURRENT_SOURCE_DIR}/[.].+" )
    add_subdirectory(${f})
  endif()
 endforeach()
--- a/eigenlib/Eigen/src/Cholesky/CMakeLists.txt
+++ b/eigenlib/Eigen/src/Cholesky/CMakeLists.txt
@ -1,6 +0,0 @@
 FILE(GLOB Eigen_Cholesky_SRCS "*.h")
 INSTALL(FILES
  ${Eigen_Cholesky_SRCS}
  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Cholesky COMPONENT Devel
  )
--- a/eigenlib/Eigen/src/Cholesky/LDLT.h
+++ b/eigenlib/Eigen/src/Cholesky/LDLT.h
@ -13,7 +13,7 @@
 #ifndef EIGEN_LDLT_H
 #define EIGEN_LDLT_H
-namespace Eigen { 
+namespace Eigen {
 namespace internal {
  template<typename MatrixType, int UpLo> struct LDLT_Traits;
@ -28,8 +28,8 @@ namespace internal {
  *
  * \brief Robust Cholesky decomposition of a matrix with pivoting
  *
-  * \param MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition
+  * \tparam _MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition
-  * \param UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
+  * \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
  *             The other triangular part won't be read.
  *
  * Perform a robust Cholesky decomposition of a positive semidefinite or negative semidefinite
@ -43,7 +43,9 @@ namespace internal {
  * Remember that Cholesky decompositions are not rank-revealing. Also, do not use a Cholesky
  * decomposition to determine whether a system of equations has a solution.
  *
-  * \sa MatrixBase::ldlt(), class LLT
+  * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
  * 
  * \sa MatrixBase::ldlt(), SelfAdjointView::ldlt(), class LLT
  */
 template<typename _MatrixType, int _UpLo> class LDLT
 {
@ -52,15 +54,15 @@ template<typename _MatrixType, int _UpLo> class LDLT
    enum {
      RowsAtCompileTime = MatrixType::RowsAtCompileTime,
      ColsAtCompileTime = MatrixType::ColsAtCompileTime,
      Options = MatrixType::Options & ~RowMajorBit, // these are the options for the TmpMatrixType, we need a ColMajor matrix here!
      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
      UpLo = _UpLo
    };
    typedef typename MatrixType::Scalar Scalar;
    typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
-    typedef typename MatrixType::Index Index;
+    typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
-    typedef Matrix<Scalar, RowsAtCompileTime, 1, Options, MaxRowsAtCompileTime, 1> TmpMatrixType;
+    typedef typename MatrixType::StorageIndex StorageIndex;
    typedef Matrix<Scalar, RowsAtCompileTime, 1, 0, MaxRowsAtCompileTime, 1> TmpMatrixType;
    typedef Transpositions<RowsAtCompileTime, MaxRowsAtCompileTime> TranspositionType;
    typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationType;
@ -72,11 +74,11 @@ template<typename _MatrixType, int _UpLo> class LDLT
      * The default constructor is useful in cases in which the user intends to
      * perform decompositions via LDLT::compute(const MatrixType&).
      */
-    LDLT() 
+    LDLT()
-      : m_matrix(), 
+      : m_matrix(),
-        m_transpositions(), 
+        m_transpositions(),
        m_sign(internal::ZeroSign),
-        m_isInitialized(false) 
+        m_isInitialized(false)
    {}
    /** \brief Default Constructor with memory preallocation
@ -85,7 +87,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
      * according to the specified problem \a size.
      * \sa LDLT()
      */
-    LDLT(Index size)
+    explicit LDLT(Index size)
      : m_matrix(size, size),
        m_transpositions(size),
        m_temporary(size),
@ -96,16 +98,35 @@ template<typename _MatrixType, int _UpLo> class LDLT
    /** \brief Constructor with decomposition
      *
      * This calculates the decomposition for the input \a matrix.
      *
      * \sa LDLT(Index size)
      */
-    LDLT(const MatrixType& matrix)
+    template<typename InputType>
    explicit LDLT(const EigenBase<InputType>& matrix)
      : m_matrix(matrix.rows(), matrix.cols()),
        m_transpositions(matrix.rows()),
        m_temporary(matrix.rows()),
        m_sign(internal::ZeroSign),
        m_isInitialized(false)
    {
-      compute(matrix);
+      compute(matrix.derived());
    }
    /** \brief Constructs a LDLT factorization from a given matrix
      *
      * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when \c MatrixType is a Eigen::Ref.
      *
      * \sa LDLT(const EigenBase&)
      */
    template<typename InputType>
    explicit LDLT(EigenBase<InputType>& matrix)
      : m_matrix(matrix.derived()),
        m_transpositions(matrix.rows()),
        m_temporary(matrix.rows()),
        m_sign(internal::ZeroSign),
        m_isInitialized(false)
    {
      compute(matrix.derived());
    }
    /** Clear any existing decomposition
@ -151,13 +172,6 @@ template<typename _MatrixType, int _UpLo> class LDLT
      eigen_assert(m_isInitialized && "LDLT is not initialized.");
      return m_sign == internal::PositiveSemiDef || m_sign == internal::ZeroSign;
    }
    #ifdef EIGEN2_SUPPORT
    inline bool isPositiveDefinite() const
    {
      return isPositive();
    }
    #endif
    /** \returns true if the matrix is negative (semidefinite) */
    inline bool isNegative(void) const
@ -173,37 +187,38 @@ template<typename _MatrixType, int _UpLo> class LDLT
      * \note_about_checking_solutions
      *
      * More precisely, this method solves \f$ A x = b \f$ using the decomposition \f$ A = P^T L D L^* P \f$
-      * by solving the systems \f$ P^T y_1 = b \f$, \f$ L y_2 = y_1 \f$, \f$ D y_3 = y_2 \f$, 
+      * by solving the systems \f$ P^T y_1 = b \f$, \f$ L y_2 = y_1 \f$, \f$ D y_3 = y_2 \f$,
      * \f$ L^* y_4 = y_3 \f$ and \f$ P x = y_4 \f$ in succession. If the matrix \f$ A \f$ is singular, then
      * \f$ D \f$ will also be singular (all the other matrices are invertible). In that case, the
      * least-square solution of \f$ D y_3 = y_2 \f$ is computed. This does not mean that this function
      * computes the least-square solution of \f$ A x = b \f$ is \f$ A \f$ is singular.
      *
-      * \sa MatrixBase::ldlt()
+      * \sa MatrixBase::ldlt(), SelfAdjointView::ldlt()
      */
    template<typename Rhs>
-    inline const internal::solve_retval<LDLT, Rhs>
+    inline const Solve<LDLT, Rhs>
    solve(const MatrixBase<Rhs>& b) const
    {
      eigen_assert(m_isInitialized && "LDLT is not initialized.");
      eigen_assert(m_matrix.rows()==b.rows()
                && "LDLT::solve(): invalid number of rows of the right hand side matrix b");
-      return internal::solve_retval<LDLT, Rhs>(*this, b.derived());
+      return Solve<LDLT, Rhs>(*this, b.derived());
    }
    #ifdef EIGEN2_SUPPORT
    template<typename OtherDerived, typename ResultType>
    bool solve(const MatrixBase<OtherDerived>& b, ResultType *result) const
    {
      *result = this->solve(b);
      return true;
    }
    #endif
    template<typename Derived>
    bool solveInPlace(MatrixBase<Derived> &bAndX) const;
-    LDLT& compute(const MatrixType& matrix);
+    template<typename InputType>
    LDLT& compute(const EigenBase<InputType>& matrix);
    /** \returns an estimate of the reciprocal condition number of the matrix of
     *  which \c *this is the LDLT decomposition.
     */
    RealScalar rcond() const
    {
      eigen_assert(m_isInitialized && "LDLT is not initialized.");
      return internal::rcond_estimate_helper(m_l1_norm, *this);
    }
    template <typename Derived>
    LDLT& rankUpdate(const MatrixBase<Derived>& w, const RealScalar& alpha=1);
@ -220,6 +235,13 @@ template<typename _MatrixType, int _UpLo> class LDLT
    MatrixType reconstructedMatrix() const;
    /** \returns the adjoint of \c *this, that is, a const reference to the decomposition itself as the underlying matrix is self-adjoint.
      *
      * This method is provided for compatibility with other matrix decompositions, thus enabling generic code such as:
      * \code x = decomposition.adjoint().solve(b) \endcode
      */
    const LDLT& adjoint() const { return *this; };
    inline Index rows() const { return m_matrix.rows(); }
    inline Index cols() const { return m_matrix.cols(); }
@ -231,11 +253,17 @@ template<typename _MatrixType, int _UpLo> class LDLT
    ComputationInfo info() const
    {
      eigen_assert(m_isInitialized && "LDLT is not initialized.");
-      return Success;
+      return m_info;
    }
    #ifndef EIGEN_PARSED_BY_DOXYGEN
    template<typename RhsType, typename DstType>
    EIGEN_DEVICE_FUNC
    void _solve_impl(const RhsType &rhs, DstType &dst) const;
    #endif
  protected:
-    
+
    static void check_template_parameters()
    {
      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
@ -248,10 +276,12 @@ template<typename _MatrixType, int _UpLo> class LDLT
      * is not stored), and the diagonal entries correspond to D.
      */
    MatrixType m_matrix;
    RealScalar m_l1_norm;
    TranspositionType m_transpositions;
    TmpMatrixType m_temporary;
    internal::SignMatrix m_sign;
    bool m_isInitialized;
    ComputationInfo m_info;
 };
 namespace internal {
@ -266,15 +296,17 @@ template<> struct ldlt_inplace<Lower>
    using std::abs;
    typedef typename MatrixType::Scalar Scalar;
    typedef typename MatrixType::RealScalar RealScalar;
-    typedef typename MatrixType::Index Index;
+    typedef typename TranspositionType::StorageIndex IndexType;
    eigen_assert(mat.rows()==mat.cols());
    const Index size = mat.rows();
    bool found_zero_pivot = false;
    bool ret = true;
    if (size <= 1)
    {
      transpositions.setIdentity();
-      if (numext::real(mat.coeff(0,0)) > 0) sign = PositiveSemiDef;
+      if (numext::real(mat.coeff(0,0)) > static_cast<RealScalar>(0) ) sign = PositiveSemiDef;
-      else if (numext::real(mat.coeff(0,0)) < 0) sign = NegativeSemiDef;
+      else if (numext::real(mat.coeff(0,0)) < static_cast<RealScalar>(0)) sign = NegativeSemiDef;
      else sign = ZeroSign;
      return true;
    }
@ -286,7 +318,7 @@ template<> struct ldlt_inplace<Lower>
      mat.diagonal().tail(size-k).cwiseAbs().maxCoeff(&index_of_biggest_in_corner);
      index_of_biggest_in_corner += k;
-      transpositions.coeffRef(k) = index_of_biggest_in_corner;
+      transpositions.coeffRef(k) = IndexType(index_of_biggest_in_corner);
      if(k != index_of_biggest_in_corner)
      {
        // apply the transposition while taking care to consider only
@ -295,7 +327,7 @@ template<> struct ldlt_inplace<Lower>
        mat.row(k).head(k).swap(mat.row(index_of_biggest_in_corner).head(k));
        mat.col(k).tail(s).swap(mat.col(index_of_biggest_in_corner).tail(s));
        std::swap(mat.coeffRef(k,k),mat.coeffRef(index_of_biggest_in_corner,index_of_biggest_in_corner));
-        for(int i=k+1;i<index_of_biggest_in_corner;++i)
+        for(Index i=k+1;i<index_of_biggest_in_corner;++i)
        {
          Scalar tmp = mat.coeffRef(i,k);
          mat.coeffRef(i,k) = numext::conj(mat.coeffRef(index_of_biggest_in_corner,i));
@ -321,26 +353,44 @@ template<> struct ldlt_inplace<Lower>
        if(rs>0)
          A21.noalias() -= A20 * temp.head(k);
      }
-      
+
      // In some previous versions of Eigen (e.g., 3.2.1), the scaling was omitted if the pivot
-      // was smaller than the cutoff value. However, soince LDLT is not rank-revealing
+      // was smaller than the cutoff value. However, since LDLT is not rank-revealing
-      // we should only make sure we do not introduce INF or NaN values.
+      // we should only make sure that we do not introduce INF or NaN values.
-      // LAPACK also uses 0 as the cutoff value.
+      // Remark that LAPACK also uses 0 as the cutoff value.
      RealScalar realAkk = numext::real(mat.coeffRef(k,k));
-      if((rs>0) && (abs(realAkk) > RealScalar(0)))
+      bool pivot_is_valid = (abs(realAkk) > RealScalar(0));
      if(k==0 && !pivot_is_valid)
      {
        // The entire diagonal is zero, there is nothing more to do
        // except filling the transpositions, and checking whether the matrix is zero.
        sign = ZeroSign;
        for(Index j = 0; j<size; ++j)
        {
          transpositions.coeffRef(j) = IndexType(j);
          ret = ret && (mat.col(j).tail(size-j-1).array()==Scalar(0)).all();
        }
        return ret;
      }
      if((rs>0) && pivot_is_valid)
        A21 /= realAkk;
      if(found_zero_pivot && pivot_is_valid) ret = false; // factorization failed
      else if(!pivot_is_valid) found_zero_pivot = true;
      if (sign == PositiveSemiDef) {
-        if (realAkk < 0) sign = Indefinite;
+        if (realAkk < static_cast<RealScalar>(0)) sign = Indefinite;
      } else if (sign == NegativeSemiDef) {
-        if (realAkk > 0) sign = Indefinite;
+        if (realAkk > static_cast<RealScalar>(0)) sign = Indefinite;
      } else if (sign == ZeroSign) {
-        if (realAkk > 0) sign = PositiveSemiDef;
+        if (realAkk > static_cast<RealScalar>(0)) sign = PositiveSemiDef;
-        else if (realAkk < 0) sign = NegativeSemiDef;
+        else if (realAkk < static_cast<RealScalar>(0)) sign = NegativeSemiDef;
      }
    }
-    return true;
+    return ret;
  }
  // Reference for the algorithm: Davis and Hager, "Multiple Rank
@ -356,7 +406,6 @@ template<> struct ldlt_inplace<Lower>
    using numext::isfinite;
    typedef typename MatrixType::Scalar Scalar;
    typedef typename MatrixType::RealScalar RealScalar;
    typedef typename MatrixType::Index Index;
    const Index size = mat.rows();
    eigen_assert(mat.cols() == size && w.size()==size);
@ -420,16 +469,16 @@ template<typename MatrixType> struct LDLT_Traits<MatrixType,Lower>
 {
  typedef const TriangularView<const MatrixType, UnitLower> MatrixL;
  typedef const TriangularView<const typename MatrixType::AdjointReturnType, UnitUpper> MatrixU;
-  static inline MatrixL getL(const MatrixType& m) { return m; }
+  static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); }
-  static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); }
+  static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); }
 };
 template<typename MatrixType> struct LDLT_Traits<MatrixType,Upper>
 {
  typedef const TriangularView<const typename MatrixType::AdjointReturnType, UnitLower> MatrixL;
  typedef const TriangularView<const MatrixType, UnitUpper> MatrixU;
-  static inline MatrixL getL(const MatrixType& m) { return m.adjoint(); }
+  static inline MatrixL getL(const MatrixType& m) { return MatrixL(m.adjoint()); }
-  static inline MatrixU getU(const MatrixType& m) { return m; }
+  static inline MatrixU getU(const MatrixType& m) { return MatrixU(m); }
 };
 } // end namespace internal
@ -437,21 +486,35 @@ template<typename MatrixType> struct LDLT_Traits<MatrixType,Upper>
 /** Compute / recompute the LDLT decomposition A = L D L^* = U^* D U of \a matrix
  */
 template<typename MatrixType, int _UpLo>
-LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
+template<typename InputType>
 LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>& a)
 {
  check_template_parameters();
-  
+
  eigen_assert(a.rows()==a.cols());
  const Index size = a.rows();
-  m_matrix = a;
+  m_matrix = a.derived();
  // Compute matrix L1 norm = max abs column sum.
  m_l1_norm = RealScalar(0);
  // TODO move this code to SelfAdjointView
  for (Index col = 0; col < size; ++col) {
    RealScalar abs_col_sum;
    if (_UpLo == Lower)
      abs_col_sum = m_matrix.col(col).tail(size - col).template lpNorm<1>() + m_matrix.row(col).head(col).template lpNorm<1>();
    else
      abs_col_sum = m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>();
    if (abs_col_sum > m_l1_norm)
      m_l1_norm = abs_col_sum;
  }
  m_transpositions.resize(size);
  m_isInitialized = false;
  m_temporary.resize(size);
  m_sign = internal::ZeroSign;
-  internal::ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, m_sign);
+  m_info = internal::ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, m_sign) ? Success : NumericalIssue;
  m_isInitialized = true;
  return *this;
@ -464,20 +527,21 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
  */
 template<typename MatrixType, int _UpLo>
 template<typename Derived>
-LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w, const typename NumTraits<typename MatrixType::Scalar>::Real& sigma)
+LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w, const typename LDLT<MatrixType,_UpLo>::RealScalar& sigma)
 {
  typedef typename TranspositionType::StorageIndex IndexType;
  const Index size = w.rows();
  if (m_isInitialized)
  {
    eigen_assert(m_matrix.rows()==size);
  }
  else
-  {    
+  {
    m_matrix.resize(size,size);
    m_matrix.setZero();
    m_transpositions.resize(size);
    for (Index i = 0; i < size; i++)
-      m_transpositions.coeffRef(i) = i;
+      m_transpositions.coeffRef(i) = IndexType(i);
    m_temporary.resize(size);
    m_sign = sigma>=0 ? internal::PositiveSemiDef : internal::NegativeSemiDef;
    m_isInitialized = true;
@ -488,53 +552,45 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Deri
  return *this;
 }
-namespace internal {
+#ifndef EIGEN_PARSED_BY_DOXYGEN
-template<typename _MatrixType, int _UpLo, typename Rhs>
+template<typename _MatrixType, int _UpLo>
-struct solve_retval<LDLT<_MatrixType,_UpLo>, Rhs>
+template<typename RhsType, typename DstType>
-  : solve_retval_base<LDLT<_MatrixType,_UpLo>, Rhs>
+void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
 {
-  typedef LDLT<_MatrixType,_UpLo> LDLTType;
+  eigen_assert(rhs.rows() == rows());
-  EIGEN_MAKE_SOLVE_HELPERS(LDLTType,Rhs)
+  // dst = P b
  dst = m_transpositions * rhs;
-  template<typename Dest> void evalTo(Dest& dst) const
+  // dst = L^-1 (P b)
  matrixL().solveInPlace(dst);
  // dst = D^-1 (L^-1 P b)
  // more precisely, use pseudo-inverse of D (see bug 241)
  using std::abs;
  const typename Diagonal<const MatrixType>::RealReturnType vecD(vectorD());
  // In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon
  // as motivated by LAPACK's xGELSS:
  // RealScalar tolerance = numext::maxi(vecD.array().abs().maxCoeff() * NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest());
  // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest
  // diagonal element is not well justified and leads to numerical issues in some cases.
  // Moreover, Lapack's xSYTRS routines use 0 for the tolerance.
  RealScalar tolerance = RealScalar(1) / NumTraits<RealScalar>::highest();
  for (Index i = 0; i < vecD.size(); ++i)
  {
-    eigen_assert(rhs().rows() == dec().matrixLDLT().rows());
+    if(abs(vecD(i)) > tolerance)
-    // dst = P b
+      dst.row(i) /= vecD(i);
-    dst = dec().transpositionsP() * rhs();
+    else
-
+      dst.row(i).setZero();
    // dst = L^-1 (P b)
    dec().matrixL().solveInPlace(dst);
    // dst = D^-1 (L^-1 P b)
    // more precisely, use pseudo-inverse of D (see bug 241)
    using std::abs;
    using std::max;
    typedef typename LDLTType::MatrixType MatrixType;
    typedef typename LDLTType::RealScalar RealScalar;
    const typename Diagonal<const MatrixType>::RealReturnType vectorD(dec().vectorD());
    // In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon
    // as motivated by LAPACK's xGELSS:
    // RealScalar tolerance = (max)(vectorD.array().abs().maxCoeff() *NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest());
    // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest
    // diagonal element is not well justified and to numerical issues in some cases.
    // Moreover, Lapack's xSYTRS routines use 0 for the tolerance.
    RealScalar tolerance = RealScalar(1) / NumTraits<RealScalar>::highest();
    for (Index i = 0; i < vectorD.size(); ++i) {
      if(abs(vectorD(i)) > tolerance)
        dst.row(i) /= vectorD(i);
      else
        dst.row(i).setZero();
    }
    // dst = L^-T (D^-1 L^-1 P b)
    dec().matrixU().solveInPlace(dst);
    // dst = P^-1 (L^-T D^-1 L^-1 P b) = A^-1 b
    dst = dec().transpositionsP().transpose() * dst;
  }
-};
+
  // dst = L^-T (D^-1 L^-1 P b)
  matrixU().solveInPlace(dst);
  // dst = P^-1 (L^-T D^-1 L^-1 P b) = A^-1 b
  dst = m_transpositions.transpose() * dst;
 }
 #endif
 /** \internal use x = ldlt_object.solve(x);
  *
@ -588,6 +644,7 @@ MatrixType LDLT<MatrixType,_UpLo>::reconstructedMatrix() const
 /** \cholesky_module
  * \returns the Cholesky decomposition with full pivoting without square root of \c *this
  * \sa MatrixBase::ldlt()
  */
 template<typename MatrixType, unsigned int UpLo>
 inline const LDLT<typename SelfAdjointView<MatrixType, UpLo>::PlainObject, UpLo>
@ -598,6 +655,7 @@ SelfAdjointView<MatrixType, UpLo>::ldlt() const
 /** \cholesky_module
  * \returns the Cholesky decomposition with full pivoting without square root of \c *this
  * \sa SelfAdjointView::ldlt()
  */
 template<typename Derived>
 inline const LDLT<typename MatrixBase<Derived>::PlainObject>
--- a/eigenlib/Eigen/src/Cholesky/LLT.h
+++ b/eigenlib/Eigen/src/Cholesky/LLT.h
@ -10,7 +10,7 @@
 #ifndef EIGEN_LLT_H
 #define EIGEN_LLT_H
-namespace Eigen { 
+namespace Eigen {
 namespace internal{
 template<typename MatrixType, int UpLo> struct LLT_Traits;
@ -22,8 +22,8 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
  *
  * \brief Standard Cholesky decomposition (LL^T) of a matrix and associated features
  *
-  * \param MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition
+  * \tparam _MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition
-  * \param UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
+  * \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
  *             The other triangular part won't be read.
  *
  * This class performs a LL^T Cholesky decomposition of a symmetric, positive definite
@ -40,8 +40,10 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
  *
  * Example: \include LLT_example.cpp
  * Output: \verbinclude LLT_example.out
-  *    
+  *
-  * \sa MatrixBase::llt(), class LDLT
+  * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
  *
  * \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT
  */
 /* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH)
  * Note that during the decomposition, only the upper triangular part of A is considered. Therefore,
@ -54,12 +56,12 @@ template<typename _MatrixType, int _UpLo> class LLT
    enum {
      RowsAtCompileTime = MatrixType::RowsAtCompileTime,
      ColsAtCompileTime = MatrixType::ColsAtCompileTime,
      Options = MatrixType::Options,
      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
    };
    typedef typename MatrixType::Scalar Scalar;
    typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
-    typedef typename MatrixType::Index Index;
+    typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
    typedef typename MatrixType::StorageIndex StorageIndex;
    enum {
      PacketSize = internal::packet_traits<Scalar>::size,
@ -83,14 +85,30 @@ template<typename _MatrixType, int _UpLo> class LLT
      * according to the specified problem \a size.
      * \sa LLT()
      */
-    LLT(Index size) : m_matrix(size, size),
+    explicit LLT(Index size) : m_matrix(size, size),
                    m_isInitialized(false) {}
-    LLT(const MatrixType& matrix)
+    template<typename InputType>
    explicit LLT(const EigenBase<InputType>& matrix)
      : m_matrix(matrix.rows(), matrix.cols()),
        m_isInitialized(false)
    {
-      compute(matrix);
+      compute(matrix.derived());
    }
    /** \brief Constructs a LDLT factorization from a given matrix
      *
      * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when
      * \c MatrixType is a Eigen::Ref.
      *
      * \sa LLT(const EigenBase&)
      */
    template<typename InputType>
    explicit LLT(EigenBase<InputType>& matrix)
      : m_matrix(matrix.derived()),
        m_isInitialized(false)
    {
      compute(matrix.derived());
    }
    /** \returns a view of the upper triangular matrix U */
@ -115,33 +133,33 @@ template<typename _MatrixType, int _UpLo> class LLT
      * Example: \include LLT_solve.cpp
      * Output: \verbinclude LLT_solve.out
      *
-      * \sa solveInPlace(), MatrixBase::llt()
+      * \sa solveInPlace(), MatrixBase::llt(), SelfAdjointView::llt()
      */
    template<typename Rhs>
-    inline const internal::solve_retval<LLT, Rhs>
+    inline const Solve<LLT, Rhs>
    solve(const MatrixBase<Rhs>& b) const
    {
      eigen_assert(m_isInitialized && "LLT is not initialized.");
      eigen_assert(m_matrix.rows()==b.rows()
                && "LLT::solve(): invalid number of rows of the right hand side matrix b");
-      return internal::solve_retval<LLT, Rhs>(*this, b.derived());
+      return Solve<LLT, Rhs>(*this, b.derived());
    }
    #ifdef EIGEN2_SUPPORT
    template<typename OtherDerived, typename ResultType>
    bool solve(const MatrixBase<OtherDerived>& b, ResultType *result) const
    {
      *result = this->solve(b);
      return true;
    }
    bool isPositiveDefinite() const { return true; }
    #endif
    template<typename Derived>
    void solveInPlace(MatrixBase<Derived> &bAndX) const;
-    LLT& compute(const MatrixType& matrix);
+    template<typename InputType>
    LLT& compute(const EigenBase<InputType>& matrix);
    /** \returns an estimate of the reciprocal condition number of the matrix of
      *  which \c *this is the Cholesky decomposition.
      */
    RealScalar rcond() const
    {
      eigen_assert(m_isInitialized && "LLT is not initialized.");
      eigen_assert(m_info == Success && "LLT failed because matrix appears to be negative");
      return internal::rcond_estimate_helper(m_l1_norm, *this);
    }
    /** \returns the LLT decomposition matrix
      *
@ -167,24 +185,38 @@ template<typename _MatrixType, int _UpLo> class LLT
      return m_info;
    }
    /** \returns the adjoint of \c *this, that is, a const reference to the decomposition itself as the underlying matrix is self-adjoint.
      *
      * This method is provided for compatibility with other matrix decompositions, thus enabling generic code such as:
      * \code x = decomposition.adjoint().solve(b) \endcode
      */
    const LLT& adjoint() const { return *this; };
    inline Index rows() const { return m_matrix.rows(); }
    inline Index cols() const { return m_matrix.cols(); }
    template<typename VectorType>
    LLT rankUpdate(const VectorType& vec, const RealScalar& sigma = 1);
    #ifndef EIGEN_PARSED_BY_DOXYGEN
    template<typename RhsType, typename DstType>
    EIGEN_DEVICE_FUNC
    void _solve_impl(const RhsType &rhs, DstType &dst) const;
    #endif
  protected:
-    
+
    static void check_template_parameters()
    {
      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
    }
-    
+
    /** \internal
      * Used to compute and store L
      * The strict upper part is not used and even not initialized.
      */
    MatrixType m_matrix;
    RealScalar m_l1_norm;
    bool m_isInitialized;
    ComputationInfo m_info;
 };
@ -194,12 +226,11 @@ namespace internal {
 template<typename Scalar, int UpLo> struct llt_inplace;
 template<typename MatrixType, typename VectorType>
-static typename MatrixType::Index llt_rank_update_lower(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma)
+static Index llt_rank_update_lower(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma)
 {
  using std::sqrt;
  typedef typename MatrixType::Scalar Scalar;
  typedef typename MatrixType::RealScalar RealScalar;
  typedef typename MatrixType::Index Index;
  typedef typename MatrixType::ColXpr ColXpr;
  typedef typename internal::remove_all<ColXpr>::type ColXprCleaned;
  typedef typename ColXprCleaned::SegmentReturnType ColXprSegment;
@ -268,11 +299,10 @@ template<typename Scalar> struct llt_inplace<Scalar, Lower>
 {
  typedef typename NumTraits<Scalar>::Real RealScalar;
  template<typename MatrixType>
-  static typename MatrixType::Index unblocked(MatrixType& mat)
+  static Index unblocked(MatrixType& mat)
  {
    using std::sqrt;
-    typedef typename MatrixType::Index Index;
+
    eigen_assert(mat.rows()==mat.cols());
    const Index size = mat.rows();
    for(Index k = 0; k < size; ++k)
@ -289,15 +319,14 @@ template<typename Scalar> struct llt_inplace<Scalar, Lower>
        return k;
      mat.coeffRef(k,k) = x = sqrt(x);
      if (k>0 && rs>0) A21.noalias() -= A20 * A10.adjoint();
-      if (rs>0) A21 *= RealScalar(1)/x;
+      if (rs>0) A21 /= x;
    }
    return -1;
  }
  template<typename MatrixType>
-  static typename MatrixType::Index blocked(MatrixType& m)
+  static Index blocked(MatrixType& m)
  {
    typedef typename MatrixType::Index Index;
    eigen_assert(m.rows()==m.cols());
    Index size = m.rows();
    if(size<32)
@ -322,36 +351,36 @@ template<typename Scalar> struct llt_inplace<Scalar, Lower>
      Index ret;
      if((ret=unblocked(A11))>=0) return k+ret;
      if(rs>0) A11.adjoint().template triangularView<Upper>().template solveInPlace<OnTheRight>(A21);
-      if(rs>0) A22.template selfadjointView<Lower>().rankUpdate(A21,-1); // bottleneck
+      if(rs>0) A22.template selfadjointView<Lower>().rankUpdate(A21,typename NumTraits<RealScalar>::Literal(-1)); // bottleneck
    }
    return -1;
  }
  template<typename MatrixType, typename VectorType>
-  static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma)
+  static Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma)
  {
    return Eigen::internal::llt_rank_update_lower(mat, vec, sigma);
  }
 };
-  
+
 template<typename Scalar> struct llt_inplace<Scalar, Upper>
 {
  typedef typename NumTraits<Scalar>::Real RealScalar;
  template<typename MatrixType>
-  static EIGEN_STRONG_INLINE typename MatrixType::Index unblocked(MatrixType& mat)
+  static EIGEN_STRONG_INLINE Index unblocked(MatrixType& mat)
  {
    Transpose<MatrixType> matt(mat);
    return llt_inplace<Scalar, Lower>::unblocked(matt);
  }
  template<typename MatrixType>
-  static EIGEN_STRONG_INLINE typename MatrixType::Index blocked(MatrixType& mat)
+  static EIGEN_STRONG_INLINE Index blocked(MatrixType& mat)
  {
    Transpose<MatrixType> matt(mat);
    return llt_inplace<Scalar, Lower>::blocked(matt);
  }
  template<typename MatrixType, typename VectorType>
-  static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma)
+  static Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma)
  {
    Transpose<MatrixType> matt(mat);
    return llt_inplace<Scalar, Lower>::rankUpdate(matt, vec.conjugate(), sigma);
@ -362,8 +391,8 @@ template<typename MatrixType> struct LLT_Traits<MatrixType,Lower>
 {
  typedef const TriangularView<const MatrixType, Lower> MatrixL;
  typedef const TriangularView<const typename MatrixType::AdjointReturnType, Upper> MatrixU;
-  static inline MatrixL getL(const MatrixType& m) { return m; }
+  static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); }
-  static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); }
+  static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); }
  static bool inplace_decomposition(MatrixType& m)
  { return llt_inplace<typename MatrixType::Scalar, Lower>::blocked(m)==-1; }
 };
@ -372,8 +401,8 @@ template<typename MatrixType> struct LLT_Traits<MatrixType,Upper>
 {
  typedef const TriangularView<const typename MatrixType::AdjointReturnType, Lower> MatrixL;
  typedef const TriangularView<const MatrixType, Upper> MatrixU;
-  static inline MatrixL getL(const MatrixType& m) { return m.adjoint(); }
+  static inline MatrixL getL(const MatrixType& m) { return MatrixL(m.adjoint()); }
-  static inline MatrixU getU(const MatrixType& m) { return m; }
+  static inline MatrixU getU(const MatrixType& m) { return MatrixU(m); }
  static bool inplace_decomposition(MatrixType& m)
  { return llt_inplace<typename MatrixType::Scalar, Upper>::blocked(m)==-1; }
 };
@ -388,14 +417,28 @@ template<typename MatrixType> struct LLT_Traits<MatrixType,Upper>
  * Output: \verbinclude TutorialLinAlgComputeTwice.out
  */
 template<typename MatrixType, int _UpLo>
-LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const MatrixType& a)
+template<typename InputType>
 LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>& a)
 {
  check_template_parameters();
-  
+
  eigen_assert(a.rows()==a.cols());
  const Index size = a.rows();
  m_matrix.resize(size, size);
-  m_matrix = a;
+  m_matrix = a.derived();
  // Compute matrix L1 norm = max abs column sum.
  m_l1_norm = RealScalar(0);
  // TODO move this code to SelfAdjointView
  for (Index col = 0; col < size; ++col) {
    RealScalar abs_col_sum;
    if (_UpLo == Lower)
      abs_col_sum = m_matrix.col(col).tail(size - col).template lpNorm<1>() + m_matrix.row(col).head(col).template lpNorm<1>();
    else
      abs_col_sum = m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>();
    if (abs_col_sum > m_l1_norm)
      m_l1_norm = abs_col_sum;
  }
  m_isInitialized = true;
  bool ok = Traits::inplace_decomposition(m_matrix);
@ -423,33 +466,24 @@ LLT<_MatrixType,_UpLo> LLT<_MatrixType,_UpLo>::rankUpdate(const VectorType& v, c
  return *this;
 }
 namespace internal {
 template<typename _MatrixType, int UpLo, typename Rhs>
 struct solve_retval<LLT<_MatrixType, UpLo>, Rhs>
  : solve_retval_base<LLT<_MatrixType, UpLo>, Rhs>
 {
  typedef LLT<_MatrixType,UpLo> LLTType;
  EIGEN_MAKE_SOLVE_HELPERS(LLTType,Rhs)
-  template<typename Dest> void evalTo(Dest& dst) const
+#ifndef EIGEN_PARSED_BY_DOXYGEN
-  {
+template<typename _MatrixType,int _UpLo>
-    dst = rhs();
+template<typename RhsType, typename DstType>
-    dec().solveInPlace(dst);
+void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
-  }
+{
-};
+  dst = rhs;
  solveInPlace(dst);
 }
 #endif
 /** \internal use x = llt_object.solve(x);
-  * 
+  *
  * This is the \em in-place version of solve().
  *
  * \param bAndX represents both the right-hand side matrix b and result x.
  *
-  * \returns true always! If you need to check for existence of solutions, use another decomposition like LU, QR, or SVD.
+  * This version avoids a copy when the right hand side matrix b is not needed anymore.
  *
  * This version avoids a copy when the right hand side matrix b is not
  * needed anymore.
  *
  * \sa LLT::solve(), MatrixBase::llt()
  */
@ -475,6 +509,7 @@ MatrixType LLT<MatrixType,_UpLo>::reconstructedMatrix() const
 /** \cholesky_module
  * \returns the LLT decomposition of \c *this
  * \sa SelfAdjointView::llt()
  */
 template<typename Derived>
 inline const LLT<typename MatrixBase<Derived>::PlainObject>
@ -485,6 +520,7 @@ MatrixBase<Derived>::llt() const
 /** \cholesky_module
  * \returns the LLT decomposition of \c *this
  * \sa SelfAdjointView::llt()
  */
 template<typename MatrixType, unsigned int UpLo>
 inline const LLT<typename SelfAdjointView<MatrixType, UpLo>::PlainObject, UpLo>
--- a/eigenlib/Eigen/src/Cholesky/LLT_LAPACKE.h
+++ b/eigenlib/Eigen/src/Cholesky/LLT_LAPACKE.h
@ -25,41 +25,38 @@
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ********************************************************************************
- *   Content : Eigen bindings to Intel(R) MKL
+ *   Content : Eigen bindings to LAPACKe
 *     LLt decomposition based on LAPACKE_?potrf function.
 ********************************************************************************
 */
-#ifndef EIGEN_LLT_MKL_H
+#ifndef EIGEN_LLT_LAPACKE_H
-#define EIGEN_LLT_MKL_H
+#define EIGEN_LLT_LAPACKE_H
 #include "Eigen/src/Core/util/MKL_support.h"
 #include <iostream>
 namespace Eigen { 
 namespace internal {
-template<typename Scalar> struct mkl_llt;
+template<typename Scalar> struct lapacke_llt;
-#define EIGEN_MKL_LLT(EIGTYPE, MKLTYPE, MKLPREFIX) \
+#define EIGEN_LAPACKE_LLT(EIGTYPE, BLASTYPE, LAPACKE_PREFIX) \
-template<> struct mkl_llt<EIGTYPE> \
+template<> struct lapacke_llt<EIGTYPE> \
 { \
  template<typename MatrixType> \
-  static inline typename MatrixType::Index potrf(MatrixType& m, char uplo) \
+  static inline Index potrf(MatrixType& m, char uplo) \
  { \
    lapack_int matrix_order; \
    lapack_int size, lda, info, StorageOrder; \
    EIGTYPE* a; \
    eigen_assert(m.rows()==m.cols()); \
    /* Set up parameters for ?potrf */ \
-    size = m.rows(); \
+    size = convert_index<lapack_int>(m.rows()); \
    StorageOrder = MatrixType::Flags&RowMajorBit?RowMajor:ColMajor; \
    matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
    a = &(m.coeffRef(0,0)); \
-    lda = m.outerStride(); \
+    lda = convert_index<lapack_int>(m.outerStride()); \
 \
-    info = LAPACKE_##MKLPREFIX##potrf( matrix_order, uplo, size, (MKLTYPE*)a, lda ); \
+    info = LAPACKE_##LAPACKE_PREFIX##potrf( matrix_order, uplo, size, (BLASTYPE*)a, lda ); \
    info = (info==0) ? -1 : info>0 ? info-1 : size; \
    return info; \
  } \
@ -67,36 +64,36 @@ template<> struct mkl_llt<EIGTYPE> \
 template<> struct llt_inplace<EIGTYPE, Lower> \
 { \
  template<typename MatrixType> \
-  static typename MatrixType::Index blocked(MatrixType& m) \
+  static Index blocked(MatrixType& m) \
  { \
-    return mkl_llt<EIGTYPE>::potrf(m, 'L'); \
+    return lapacke_llt<EIGTYPE>::potrf(m, 'L'); \
  } \
  template<typename MatrixType, typename VectorType> \
-  static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \
+  static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \
  { return Eigen::internal::llt_rank_update_lower(mat, vec, sigma); } \
 }; \
 template<> struct llt_inplace<EIGTYPE, Upper> \
 { \
  template<typename MatrixType> \
-  static typename MatrixType::Index blocked(MatrixType& m) \
+  static Index blocked(MatrixType& m) \
  { \
-    return mkl_llt<EIGTYPE>::potrf(m, 'U'); \
+    return lapacke_llt<EIGTYPE>::potrf(m, 'U'); \
  } \
  template<typename MatrixType, typename VectorType> \
-  static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \
+  static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \
  { \
    Transpose<MatrixType> matt(mat); \
    return llt_inplace<EIGTYPE, Lower>::rankUpdate(matt, vec.conjugate(), sigma); \
  } \
 };
-EIGEN_MKL_LLT(double, double, d)
+EIGEN_LAPACKE_LLT(double, double, d)
-EIGEN_MKL_LLT(float, float, s)
+EIGEN_LAPACKE_LLT(float, float, s)
-EIGEN_MKL_LLT(dcomplex, MKL_Complex16, z)
+EIGEN_LAPACKE_LLT(dcomplex, lapack_complex_double, z)
-EIGEN_MKL_LLT(scomplex, MKL_Complex8, c)
+EIGEN_LAPACKE_LLT(scomplex, lapack_complex_float, c)
 } // end namespace internal
 } // end namespace Eigen
-#endif // EIGEN_LLT_MKL_H
+#endif // EIGEN_LLT_LAPACKE_H
--- a/eigenlib/Eigen/src/CholmodSupport/CMakeLists.txt
+++ b/eigenlib/Eigen/src/CholmodSupport/CMakeLists.txt
@ -1,6 +0,0 @@
 FILE(GLOB Eigen_CholmodSupport_SRCS "*.h")
 INSTALL(FILES 
  ${Eigen_CholmodSupport_SRCS}
  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/CholmodSupport COMPONENT Devel
  )
--- a/eigenlib/Eigen/src/CholmodSupport/CholmodSupport.h
+++ b/eigenlib/Eigen/src/CholmodSupport/CholmodSupport.h
@ -14,46 +14,52 @@ namespace Eigen {
 namespace internal {
-template<typename Scalar, typename CholmodType>
+template<typename Scalar> struct cholmod_configure_matrix;
-void cholmod_configure_matrix(CholmodType& mat)
+
-{
+template<> struct cholmod_configure_matrix<double> {
-  if (internal::is_same<Scalar,float>::value)
+  template<typename CholmodType>
-  {
+  static void run(CholmodType& mat) {
    mat.xtype = CHOLMOD_REAL;
    mat.dtype = CHOLMOD_SINGLE;
  }
  else if (internal::is_same<Scalar,double>::value)
  {
    mat.xtype = CHOLMOD_REAL;
    mat.dtype = CHOLMOD_DOUBLE;
  }
-  else if (internal::is_same<Scalar,std::complex<float> >::value)
+};
-  {
+
-    mat.xtype = CHOLMOD_COMPLEX;
+template<> struct cholmod_configure_matrix<std::complex<double> > {
-    mat.dtype = CHOLMOD_SINGLE;
+  template<typename CholmodType>
-  }
+  static void run(CholmodType& mat) {
  else if (internal::is_same<Scalar,std::complex<double> >::value)
  {
    mat.xtype = CHOLMOD_COMPLEX;
    mat.dtype = CHOLMOD_DOUBLE;
  }
-  else
+};
-  {
+
-    eigen_assert(false && "Scalar type not supported by CHOLMOD");
+// Other scalar types are not yet suppotred by Cholmod
-  }
+// template<> struct cholmod_configure_matrix<float> {
-}
+//   template<typename CholmodType>
 //   static void run(CholmodType& mat) {
 //     mat.xtype = CHOLMOD_REAL;
 //     mat.dtype = CHOLMOD_SINGLE;
 //   }
 // };
 //
 // template<> struct cholmod_configure_matrix<std::complex<float> > {
 //   template<typename CholmodType>
 //   static void run(CholmodType& mat) {
 //     mat.xtype = CHOLMOD_COMPLEX;
 //     mat.dtype = CHOLMOD_SINGLE;
 //   }
 // };
 } // namespace internal
 /** Wraps the Eigen sparse matrix \a mat into a Cholmod sparse matrix object.
  * Note that the data are shared.
  */
-template<typename _Scalar, int _Options, typename _Index>
+template<typename _Scalar, int _Options, typename _StorageIndex>
-cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat)
+cholmod_sparse viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_StorageIndex> > mat)
 {
  cholmod_sparse res;
  res.nzmax   = mat.nonZeros();
-  res.nrow    = mat.rows();;
+  res.nrow    = mat.rows();
  res.ncol    = mat.cols();
  res.p       = mat.outerIndexPtr();
  res.i       = mat.innerIndexPtr();
@ -74,11 +80,11 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat)
  res.dtype   = 0;
  res.stype   = -1;
-  if (internal::is_same<_Index,int>::value)
+  if (internal::is_same<_StorageIndex,int>::value)
  {
    res.itype = CHOLMOD_INT;
  }
-  else if (internal::is_same<_Index,UF_long>::value)
+  else if (internal::is_same<_StorageIndex,long>::value)
  {
    res.itype = CHOLMOD_LONG;
  }
@ -88,7 +94,7 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat)
  }
  // setup res.xtype
-  internal::cholmod_configure_matrix<_Scalar>(res);
+  internal::cholmod_configure_matrix<_Scalar>::run(res);
  res.stype = 0;
@ -98,16 +104,23 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat)
 template<typename _Scalar, int _Options, typename _Index>
 const cholmod_sparse viewAsCholmod(const SparseMatrix<_Scalar,_Options,_Index>& mat)
 {
-  cholmod_sparse res = viewAsCholmod(mat.const_cast_derived());
+  cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.const_cast_derived()));
  return res;
 }
 template<typename _Scalar, int _Options, typename _Index>
 const cholmod_sparse viewAsCholmod(const SparseVector<_Scalar,_Options,_Index>& mat)
 {
  cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.const_cast_derived()));
  return res;
 }
 /** Returns a view of the Eigen sparse matrix \a mat as Cholmod sparse matrix.
  * The data are not copied but shared. */
 template<typename _Scalar, int _Options, typename _Index, unsigned int UpLo>
-cholmod_sparse viewAsCholmod(const SparseSelfAdjointView<SparseMatrix<_Scalar,_Options,_Index>, UpLo>& mat)
+cholmod_sparse viewAsCholmod(const SparseSelfAdjointView<const SparseMatrix<_Scalar,_Options,_Index>, UpLo>& mat)
 {
-  cholmod_sparse res = viewAsCholmod(mat.matrix().const_cast_derived());
+  cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.matrix().const_cast_derived()));
  if(UpLo==Upper) res.stype =  1;
  if(UpLo==Lower) res.stype = -1;
@ -131,19 +144,19 @@ cholmod_dense viewAsCholmod(MatrixBase<Derived>& mat)
  res.x      = (void*)(mat.derived().data());
  res.z      = 0;
-  internal::cholmod_configure_matrix<Scalar>(res);
+  internal::cholmod_configure_matrix<Scalar>::run(res);
  return res;
 }
 /** Returns a view of the Cholmod sparse matrix \a cm as an Eigen sparse matrix.
  * The data are not copied but shared. */
-template<typename Scalar, int Flags, typename Index>
+template<typename Scalar, int Flags, typename StorageIndex>
-MappedSparseMatrix<Scalar,Flags,Index> viewAsEigen(cholmod_sparse& cm)
+MappedSparseMatrix<Scalar,Flags,StorageIndex> viewAsEigen(cholmod_sparse& cm)
 {
-  return MappedSparseMatrix<Scalar,Flags,Index>
+  return MappedSparseMatrix<Scalar,Flags,StorageIndex>
-         (cm.nrow, cm.ncol, static_cast<Index*>(cm.p)[cm.ncol],
+         (cm.nrow, cm.ncol, static_cast<StorageIndex*>(cm.p)[cm.ncol],
-          static_cast<Index*>(cm.p), static_cast<Index*>(cm.i),static_cast<Scalar*>(cm.x) );
+          static_cast<StorageIndex*>(cm.p), static_cast<StorageIndex*>(cm.i),static_cast<Scalar*>(cm.x) );
 }
 enum CholmodMode {
@ -157,29 +170,39 @@ enum CholmodMode {
  * \sa class CholmodSupernodalLLT, class CholmodSimplicialLDLT, class CholmodSimplicialLLT
  */
 template<typename _MatrixType, int _UpLo, typename Derived>
-class CholmodBase : internal::noncopyable
+class CholmodBase : public SparseSolverBase<Derived>
 {
  protected:
    typedef SparseSolverBase<Derived> Base;
    using Base::derived;
    using Base::m_isInitialized;
  public:
    typedef _MatrixType MatrixType;
    enum { UpLo = _UpLo };
    typedef typename MatrixType::Scalar Scalar;
    typedef typename MatrixType::RealScalar RealScalar;
    typedef MatrixType CholMatrixType;
-    typedef typename MatrixType::Index Index;
+    typedef typename MatrixType::StorageIndex StorageIndex;
    enum {
      ColsAtCompileTime = MatrixType::ColsAtCompileTime,
      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
    };
  public:
    CholmodBase()
-      : m_cholmodFactor(0), m_info(Success), m_isInitialized(false)
+      : m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false)
    {
-      m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0);
+      EIGEN_STATIC_ASSERT((internal::is_same<double,RealScalar>::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY);
      m_shiftOffset[0] = m_shiftOffset[1] = 0.0;
      cholmod_start(&m_cholmod);
    }
-    CholmodBase(const MatrixType& matrix)
+    explicit CholmodBase(const MatrixType& matrix)
-      : m_cholmodFactor(0), m_info(Success), m_isInitialized(false)
+      : m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false)
    {
-      m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0);
+      EIGEN_STATIC_ASSERT((internal::is_same<double,RealScalar>::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY);
      m_shiftOffset[0] = m_shiftOffset[1] = 0.0;
      cholmod_start(&m_cholmod);
      compute(matrix);
    }
@ -191,11 +214,8 @@ class CholmodBase : internal::noncopyable
      cholmod_finish(&m_cholmod);
    }
-    inline Index cols() const { return m_cholmodFactor->n; }
+    inline StorageIndex cols() const { return internal::convert_index<StorageIndex, Index>(m_cholmodFactor->n); }
-    inline Index rows() const { return m_cholmodFactor->n; }
+    inline StorageIndex rows() const { return internal::convert_index<StorageIndex, Index>(m_cholmodFactor->n); }
    Derived& derived() { return *static_cast<Derived*>(this); }
    const Derived& derived() const { return *static_cast<const Derived*>(this); }
    /** \brief Reports whether previous computation was successful.
      *
@ -216,34 +236,6 @@ class CholmodBase : internal::noncopyable
      return derived();
    }
    /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
      *
      * \sa compute()
      */
    template<typename Rhs>
    inline const internal::solve_retval<CholmodBase, Rhs>
    solve(const MatrixBase<Rhs>& b) const
    {
      eigen_assert(m_isInitialized && "LLT is not initialized.");
      eigen_assert(rows()==b.rows()
                && "CholmodDecomposition::solve(): invalid number of rows of the right hand side matrix b");
      return internal::solve_retval<CholmodBase, Rhs>(*this, b.derived());
    }
    /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
      *
      * \sa compute()
      */
    template<typename Rhs>
    inline const internal::sparse_solve_retval<CholmodBase, Rhs>
    solve(const SparseMatrixBase<Rhs>& b) const
    {
      eigen_assert(m_isInitialized && "LLT is not initialized.");
      eigen_assert(rows()==b.rows()
                && "CholmodDecomposition::solve(): invalid number of rows of the right hand side matrix b");
      return internal::sparse_solve_retval<CholmodBase, Rhs>(*this, b.derived());
    }
    /** Performs a symbolic decomposition on the sparsity pattern of \a matrix.
      *
      * This function is particularly useful when solving for several problems having the same structure.
@ -277,7 +269,7 @@ class CholmodBase : internal::noncopyable
      eigen_assert(m_analysisIsOk && "You must first call analyzePattern()");
      cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView<UpLo>());
      cholmod_factorize_p(&A, m_shiftOffset, 0, 0, m_cholmodFactor, &m_cholmod);
-      
+
      // If the factorization failed, minor is the column at which it did. On success minor == n.
      this->m_info = (m_cholmodFactor->minor == m_cholmodFactor->n ? Success : NumericalIssue);
      m_factorizationIsOk = true;
@ -290,20 +282,22 @@ class CholmodBase : internal::noncopyable
    #ifndef EIGEN_PARSED_BY_DOXYGEN
    /** \internal */
    template<typename Rhs,typename Dest>
-    void _solve(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const
+    void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const
    {
      eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
      const Index size = m_cholmodFactor->n;
      EIGEN_UNUSED_VARIABLE(size);
      eigen_assert(size==b.rows());
      // Cholmod needs column-major stoarge without inner-stride, which corresponds to the default behavior of Ref.
      Ref<const Matrix<typename Rhs::Scalar,Dynamic,Dynamic,ColMajor> > b_ref(b.derived());
      // note: cd stands for Cholmod Dense
      Rhs& b_ref(b.const_cast_derived());
      cholmod_dense b_cd = viewAsCholmod(b_ref);
      cholmod_dense* x_cd = cholmod_solve(CHOLMOD_A, m_cholmodFactor, &b_cd, &m_cholmod);
      if(!x_cd)
      {
        this->m_info = NumericalIssue;
        return;
      }
      // TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
      dest = Matrix<Scalar,Dest::RowsAtCompileTime,Dest::ColsAtCompileTime>::Map(reinterpret_cast<Scalar*>(x_cd->x),b.rows(),b.cols());
@ -311,8 +305,8 @@ class CholmodBase : internal::noncopyable
    }
    /** \internal */
-    template<typename RhsScalar, int RhsOptions, typename RhsIndex, typename DestScalar, int DestOptions, typename DestIndex>
+    template<typename RhsDerived, typename DestDerived>
-    void _solve(const SparseMatrix<RhsScalar,RhsOptions,RhsIndex> &b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const
+    void _solve_impl(const SparseMatrixBase<RhsDerived> &b, SparseMatrixBase<DestDerived> &dest) const
    {
      eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
      const Index size = m_cholmodFactor->n;
@ -320,14 +314,16 @@ class CholmodBase : internal::noncopyable
      eigen_assert(size==b.rows());
      // note: cs stands for Cholmod Sparse
-      cholmod_sparse b_cs = viewAsCholmod(b);
+      Ref<SparseMatrix<typename RhsDerived::Scalar,ColMajor,typename RhsDerived::StorageIndex> > b_ref(b.const_cast_derived());
      cholmod_sparse b_cs = viewAsCholmod(b_ref);
      cholmod_sparse* x_cs = cholmod_spsolve(CHOLMOD_A, m_cholmodFactor, &b_cs, &m_cholmod);
      if(!x_cs)
      {
        this->m_info = NumericalIssue;
        return;
      }
      // TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
-      dest = viewAsEigen<DestScalar,DestOptions,DestIndex>(*x_cs);
+      dest.derived() = viewAsEigen<typename DestDerived::Scalar,ColMajor,typename DestDerived::StorageIndex>(*x_cs);
      cholmod_free_sparse(&x_cs, &m_cholmod);
    }
    #endif // EIGEN_PARSED_BY_DOXYGEN
@ -344,10 +340,61 @@ class CholmodBase : internal::noncopyable
      */
    Derived& setShift(const RealScalar& offset)
    {
-      m_shiftOffset[0] = offset;
+      m_shiftOffset[0] = double(offset);
      return derived();
    }
    /** \returns the determinant of the underlying matrix from the current factorization */
    Scalar determinant() const
    {
      using std::exp;
      return exp(logDeterminant());
    }
    /** \returns the log determinant of the underlying matrix from the current factorization */
    Scalar logDeterminant() const
    {
      using std::log;
      using numext::real;
      eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
      RealScalar logDet = 0;
      Scalar *x = static_cast<Scalar*>(m_cholmodFactor->x);
      if (m_cholmodFactor->is_super)
      {
        // Supernodal factorization stored as a packed list of dense column-major blocs,
        // as described by the following structure:
        // super[k] == index of the first column of the j-th super node
        StorageIndex *super = static_cast<StorageIndex*>(m_cholmodFactor->super);
        // pi[k] == offset to the description of row indices
        StorageIndex *pi = static_cast<StorageIndex*>(m_cholmodFactor->pi);
        // px[k] == offset to the respective dense block
        StorageIndex *px = static_cast<StorageIndex*>(m_cholmodFactor->px);
        Index nb_super_nodes = m_cholmodFactor->nsuper;
        for (Index k=0; k < nb_super_nodes; ++k)
        {
          StorageIndex ncols = super[k + 1] - super[k];
          StorageIndex nrows = pi[k + 1] - pi[k];
          Map<const Array<Scalar,1,Dynamic>, 0, InnerStride<> > sk(x + px[k], ncols, InnerStride<>(nrows+1));
          logDet += sk.real().log().sum();
        }
      }
      else
      {
        // Simplicial factorization stored as standard CSC matrix.
        StorageIndex *p = static_cast<StorageIndex*>(m_cholmodFactor->p);
        Index size = m_cholmodFactor->n;
        for (Index k=0; k<size; ++k)
          logDet += log(real( x[p[k]] ));
      }
      if (m_cholmodFactor->is_ll)
        logDet *= 2.0;
      return logDet;
    };
    template<typename Stream>
    void dumpMemory(Stream& /*s*/)
    {}
@ -355,9 +402,8 @@ class CholmodBase : internal::noncopyable
  protected:
    mutable cholmod_common m_cholmod;
    cholmod_factor* m_cholmodFactor;
-    RealScalar m_shiftOffset[2];
+    double m_shiftOffset[2];
    mutable ComputationInfo m_info;
    bool m_isInitialized;
    int m_factorizationIsOk;
    int m_analysisIsOk;
 };
@ -376,9 +422,13 @@ class CholmodBase : internal::noncopyable
  * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
  *               or Upper. Default is Lower.
  *
  * \implsparsesolverconcept
  *
  * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
  *
-  * \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLLT
+  * \warning Only double precision real and complex scalar types are supported by Cholmod.
  *
  * \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLLT
  */
 template<typename _MatrixType, int _UpLo = Lower>
 class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLLT<_MatrixType, _UpLo> >
@ -395,7 +445,7 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl
    CholmodSimplicialLLT(const MatrixType& matrix) : Base()
    {
      init();
-      compute(matrix);
+      this->compute(matrix);
    }
    ~CholmodSimplicialLLT() {}
@ -423,9 +473,13 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl
  * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
  *               or Upper. Default is Lower.
  *
  * \implsparsesolverconcept
  *
  * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
  *
-  * \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLDLT
+  * \warning Only double precision real and complex scalar types are supported by Cholmod.
  *
  * \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLDLT
  */
 template<typename _MatrixType, int _UpLo = Lower>
 class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLDLT<_MatrixType, _UpLo> >
@ -442,7 +496,7 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp
    CholmodSimplicialLDLT(const MatrixType& matrix) : Base()
    {
      init();
-      compute(matrix);
+      this->compute(matrix);
    }
    ~CholmodSimplicialLDLT() {}
@ -468,9 +522,13 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp
  * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
  *               or Upper. Default is Lower.
  *
  * \implsparsesolverconcept
  *
  * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
  *
-  * \sa \ref TutorialSparseDirectSolvers
+  * \warning Only double precision real and complex scalar types are supported by Cholmod.
  *
  * \sa \ref TutorialSparseSolverConcept
  */
 template<typename _MatrixType, int _UpLo = Lower>
 class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSupernodalLLT<_MatrixType, _UpLo> >
@ -487,7 +545,7 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper
    CholmodSupernodalLLT(const MatrixType& matrix) : Base()
    {
      init();
-      compute(matrix);
+      this->compute(matrix);
    }
    ~CholmodSupernodalLLT() {}
@ -515,9 +573,13 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper
  * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
  *               or Upper. Default is Lower.
  *
  * \implsparsesolverconcept
  *
  * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
  *
-  * \sa \ref TutorialSparseDirectSolvers
+  * \warning Only double precision real and complex scalar types are supported by Cholmod.
  *
  * \sa \ref TutorialSparseSolverConcept
  */
 template<typename _MatrixType, int _UpLo = Lower>
 class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecomposition<_MatrixType, _UpLo> >
@ -534,7 +596,7 @@ class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecom
    CholmodDecomposition(const MatrixType& matrix) : Base()
    {
      init();
-      compute(matrix);
+      this->compute(matrix);
    }
    ~CholmodDecomposition() {}
@ -572,36 +634,6 @@ class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecom
    }
 };
 namespace internal {
 template<typename _MatrixType, int _UpLo, typename Derived, typename Rhs>
 struct solve_retval<CholmodBase<_MatrixType,_UpLo,Derived>, Rhs>
  : solve_retval_base<CholmodBase<_MatrixType,_UpLo,Derived>, Rhs>
 {
  typedef CholmodBase<_MatrixType,_UpLo,Derived> Dec;
  EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
  template<typename Dest> void evalTo(Dest& dst) const
  {
    dec()._solve(rhs(),dst);
  }
 };
 template<typename _MatrixType, int _UpLo, typename Derived, typename Rhs>
 struct sparse_solve_retval<CholmodBase<_MatrixType,_UpLo,Derived>, Rhs>
  : sparse_solve_retval_base<CholmodBase<_MatrixType,_UpLo,Derived>, Rhs>
 {
  typedef CholmodBase<_MatrixType,_UpLo,Derived> Dec;
  EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs)
  template<typename Dest> void evalTo(Dest& dst) const
  {
    dec()._solve(rhs(),dst);
  }
 };
 } // end namespace internal
 } // end namespace Eigen
 #endif // EIGEN_CHOLMODSUPPORT_H
--- a/eigenlib/Eigen/src/Core/Array.h
+++ b/eigenlib/Eigen/src/Core/Array.h
@ -12,7 +12,16 @@
 namespace Eigen {
-/** \class Array 
+namespace internal {
 template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
 struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
 {
  typedef ArrayXpr XprKind;
  typedef ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > XprBase;
 };
 }
 /** \class Array
  * \ingroup Core_Module
  *
  * \brief General-purpose arrays with easy API for coefficient-wise operations
@ -24,20 +33,14 @@ namespace Eigen {
  * API for the %Matrix class provides easy access to linear-algebra
  * operations.
  *
-  * This class can be extended with the help of the plugin mechanism described on the page
+  * See documentation of class Matrix for detailed information on the template parameters
-  * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN.
+  * storage layout.
  *
-  * \sa \ref TutorialArrayClass, \ref TopicClassHierarchy
+  * This class can be extended with the help of the plugin mechanism described on the page
  * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN.
  *
  * \sa \blank \ref TutorialArrayClass, \ref TopicClassHierarchy
  */
 namespace internal {
 template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
 struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
 {
  typedef ArrayXpr XprKind;
  typedef ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > XprBase;
 };
 }
 template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
 class Array
  : public PlainObjectBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
@ -69,11 +72,27 @@ class Array
      * the usage of 'using'. This should be done only for operator=.
      */
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Array& operator=(const EigenBase<OtherDerived> &other)
    {
      return Base::operator=(other);
    }
    /** Set all the entries to \a value.
      * \sa DenseBase::setConstant(), DenseBase::fill()
      */
    /* This overload is needed because the usage of
      *   using Base::operator=;
      * fails on MSVC. Since the code below is working with GCC and MSVC, we skipped
      * the usage of 'using'. This should be done only for operator=.
      */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Array& operator=(const Scalar &value)
    {
      Base::setConstant(value);
      return *this;
    }
    /** Copies the value of the expression \a other into \c *this with automatic resizing.
      *
      * *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized),
@ -84,7 +103,8 @@ class Array
      * remain row-vectors and vectors remain vectors.
      */
    template<typename OtherDerived>
-    EIGEN_STRONG_INLINE Array& operator=(const ArrayBase<OtherDerived>& other)
+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Array& operator=(const DenseBase<OtherDerived>& other)
    {
      return Base::_set(other);
    }
@ -92,11 +112,12 @@ class Array
    /** This is a special case of the templated operator=. Its purpose is to
      * prevent a default operator= from hiding the templated operator=.
      */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Array& operator=(const Array& other)
    {
      return Base::_set(other);
    }
-
+    
    /** Default constructor.
      *
      * For fixed-size matrices, does nothing.
@ -107,6 +128,7 @@ class Array
      *
      * \sa resize(Index,Index)
      */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Array() : Base()
    {
      Base::_check_template_params();
@ -116,6 +138,7 @@ class Array
 #ifndef EIGEN_PARSED_BY_DOXYGEN
    // FIXME is it still needed ??
    /** \internal */
    EIGEN_DEVICE_FUNC
    Array(internal::constructor_without_unaligned_array_assert)
      : Base(internal::constructor_without_unaligned_array_assert())
    {
@ -124,41 +147,64 @@ class Array
    }
 #endif
-    /** Constructs a vector or row-vector with given dimension. \only_for_vectors
+#if EIGEN_HAS_RVALUE_REFERENCES
-      *
+    EIGEN_DEVICE_FUNC
-      * Note that this is only useful for dynamic-size vectors. For fixed-size vectors,
+    Array(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
-      * it is redundant to pass the dimension here, so it makes more sense to use the default
+      : Base(std::move(other))
      * constructor Matrix() instead.
      */
    EIGEN_STRONG_INLINE explicit Array(Index dim)
      : Base(dim, RowsAtCompileTime == 1 ? 1 : dim, ColsAtCompileTime == 1 ? 1 : dim)
    {
      Base::_check_template_params();
-      EIGEN_STATIC_ASSERT_VECTOR_ONLY(Array)
+      if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
-      eigen_assert(dim >= 0);
+        Base::_set_noalias(other);
      eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim);
      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
    }
    EIGEN_DEVICE_FUNC
    Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
    {
      other.swap(*this);
      return *this;
    }
 #endif
    #ifndef EIGEN_PARSED_BY_DOXYGEN
    template<typename T>
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE explicit Array(const T& x)
    {
      Base::_check_template_params();
      Base::template _init1<T>(x);
    }
    template<typename T0, typename T1>
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Array(const T0& val0, const T1& val1)
    {
      Base::_check_template_params();
      this->template _init2<T0,T1>(val0, val1);
    }
    #else
-    /** constructs an uninitialized matrix with \a rows rows and \a cols columns.
+    /** \brief Constructs a fixed-sized array initialized with coefficients starting at \a data */
    EIGEN_DEVICE_FUNC explicit Array(const Scalar *data);
    /** Constructs a vector or row-vector with given dimension. \only_for_vectors
      *
-      * This is useful for dynamic-size matrices. For fixed-size matrices,
+      * Note that this is only useful for dynamic-size vectors. For fixed-size vectors,
      * it is redundant to pass the dimension here, so it makes more sense to use the default
      * constructor Array() instead.
      */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE explicit Array(Index dim);
    /** constructs an initialized 1x1 Array with the given coefficient */
    Array(const Scalar& value);
    /** constructs an uninitialized array with \a rows rows and \a cols columns.
      *
      * This is useful for dynamic-size arrays. For fixed-size arrays,
      * it is redundant to pass these parameters, so one should use the default constructor
-      * Matrix() instead. */
+      * Array() instead. */
    Array(Index rows, Index cols);
    /** constructs an initialized 2D vector with given coefficients */
    Array(const Scalar& val0, const Scalar& val1);
    #endif
    /** constructs an initialized 3D vector with given coefficients */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2)
    {
      Base::_check_template_params();
@ -168,6 +214,7 @@ class Array
      m_storage.data()[2] = val2;
    }
    /** constructs an initialized 4D vector with given coefficients */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2, const Scalar& val3)
    {
      Base::_check_template_params();
@ -178,51 +225,21 @@ class Array
      m_storage.data()[3] = val3;
    }
    explicit Array(const Scalar *data);
    /** Constructor copying the value of the expression \a other */
    template<typename OtherDerived>
    EIGEN_STRONG_INLINE Array(const ArrayBase<OtherDerived>& other)
             : Base(other.rows() * other.cols(), other.rows(), other.cols())
    {
      Base::_check_template_params();
      Base::_set_noalias(other);
    }
    /** Copy constructor */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Array(const Array& other)
-            : Base(other.rows() * other.cols(), other.rows(), other.cols())
+            : Base(other)
-    {
+    { }
      Base::_check_template_params();
      Base::_set_noalias(other);
    }
    /** Copy constructor with in-place evaluation */
    template<typename OtherDerived>
    EIGEN_STRONG_INLINE Array(const ReturnByValue<OtherDerived>& other)
    {
      Base::_check_template_params();
      Base::resize(other.rows(), other.cols());
      other.evalTo(*this);
    }
    /** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other)
-      : Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
+      : Base(other.derived())
-    {
+    { }
      Base::_check_template_params();
      Base::_resize_to_match(other);
      *this = other;
    }
-    /** Override MatrixBase::swap() since for dynamic-sized matrices of same type it is enough to swap the
+    EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
-      * data pointers.
+    EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
      */
    template<typename OtherDerived>
    void swap(ArrayBase<OtherDerived> const & other)
    { this->_swap(other.derived()); }
    inline Index innerStride() const { return 1; }
    inline Index outerStride() const { return this->innerSize(); }
    #ifdef EIGEN_ARRAY_PLUGIN
    #include EIGEN_ARRAY_PLUGIN
--- a/eigenlib/Eigen/src/Core/ArrayBase.h
+++ b/eigenlib/Eigen/src/Core/ArrayBase.h
@ -32,7 +32,7 @@ template<typename ExpressionType> class MatrixWrapper;
  * \tparam Derived is the derived type, e.g., an array or an expression type.
  *
  * This class can be extended with the help of the plugin mechanism described on the page
-  * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAYBASE_PLUGIN.
+  * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_ARRAYBASE_PLUGIN.
  *
  * \sa class MatrixBase, \ref TopicClassHierarchy
  */
@ -46,11 +46,7 @@ template<typename Derived> class ArrayBase
    typedef ArrayBase Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl;
    using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
                typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator*;
    typedef typename internal::traits<Derived>::StorageKind StorageKind;
    typedef typename internal::traits<Derived>::Index Index;
    typedef typename internal::traits<Derived>::Scalar Scalar;
    typedef typename internal::packet_traits<Scalar>::type PacketScalar;
    typedef typename NumTraits<Scalar>::Real RealScalar;
@ -64,8 +60,7 @@ template<typename Derived> class ArrayBase
    using Base::MaxSizeAtCompileTime;
    using Base::IsVectorAtCompileTime;
    using Base::Flags;
-    using Base::CoeffReadCost;
+    
    using Base::derived;
    using Base::const_cast_derived;
    using Base::rows;
@ -85,25 +80,14 @@ template<typename Derived> class ArrayBase
 #endif // not EIGEN_PARSED_BY_DOXYGEN
 #ifndef EIGEN_PARSED_BY_DOXYGEN
-    /** \internal the plain matrix type corresponding to this expression. Note that is not necessarily
+    typedef typename Base::PlainObject PlainObject;
      * exactly the return type of eval(): in the case of plain matrices, the return type of eval() is a const
      * reference to a matrix, not a matrix! It is however guaranteed that the return type of eval() is either
      * PlainObject or const PlainObject&.
      */
    typedef Array<typename internal::traits<Derived>::Scalar,
                internal::traits<Derived>::RowsAtCompileTime,
                internal::traits<Derived>::ColsAtCompileTime,
                AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
                internal::traits<Derived>::MaxRowsAtCompileTime,
                internal::traits<Derived>::MaxColsAtCompileTime
          > PlainObject;
    /** \internal Represents a matrix with all coefficients equal to one another*/
-    typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType;
+    typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
 #endif // not EIGEN_PARSED_BY_DOXYGEN
 #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase
 #define EIGEN_DOC_UNARY_ADDONS(X,Y)
 #   include "../plugins/CommonCwiseUnaryOps.h"
 #   include "../plugins/MatrixCwiseUnaryOps.h"
 #   include "../plugins/ArrayCwiseUnaryOps.h"
@ -114,44 +98,62 @@ template<typename Derived> class ArrayBase
 #     include EIGEN_ARRAYBASE_PLUGIN
 #   endif
 #undef EIGEN_CURRENT_STORAGE_BASE_CLASS
 #undef EIGEN_DOC_UNARY_ADDONS
    /** Special case of the template operator=, in order to prevent the compiler
      * from generating a default operator= (issue hit with g++ 4.1)
      */
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    Derived& operator=(const ArrayBase& other)
    {
-      return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
+      internal::call_assignment(derived(), other.derived());
      return derived();
    }
    /** Set all the entries to \a value.
      * \sa DenseBase::setConstant(), DenseBase::fill() */
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    Derived& operator=(const Scalar &value)
    { Base::setConstant(value); return derived(); }
-    Derived& operator+=(const Scalar& scalar)
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    { return *this = derived() + scalar; }
+    Derived& operator+=(const Scalar& scalar);
-    Derived& operator-=(const Scalar& scalar)
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    { return *this = derived() - scalar; }
+    Derived& operator-=(const Scalar& scalar);
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    Derived& operator+=(const ArrayBase<OtherDerived>& other);
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    Derived& operator-=(const ArrayBase<OtherDerived>& other);
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    Derived& operator*=(const ArrayBase<OtherDerived>& other);
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    Derived& operator/=(const ArrayBase<OtherDerived>& other);
  public:
    EIGEN_DEVICE_FUNC
    ArrayBase<Derived>& array() { return *this; }
    EIGEN_DEVICE_FUNC
    const ArrayBase<Derived>& array() const { return *this; }
    /** \returns an \link Eigen::MatrixBase Matrix \endlink expression of this array
      * \sa MatrixBase::array() */
-    MatrixWrapper<Derived> matrix() { return derived(); }
+    EIGEN_DEVICE_FUNC
-    const MatrixWrapper<const Derived> matrix() const { return derived(); }
+    MatrixWrapper<Derived> matrix() { return MatrixWrapper<Derived>(derived()); }
    EIGEN_DEVICE_FUNC
    const MatrixWrapper<const Derived> matrix() const { return MatrixWrapper<const Derived>(derived()); }
 //     template<typename Dest>
 //     inline void evalTo(Dest& dst) const { dst = matrix(); }
  protected:
    EIGEN_DEVICE_FUNC
    ArrayBase() : Base() {}
  private:
@ -176,8 +178,7 @@ template<typename OtherDerived>
 EIGEN_STRONG_INLINE Derived &
 ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
 {
-  SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, Derived, OtherDerived> tmp(derived());
+  call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
  tmp = other.derived();
  return derived();
 }
@ -190,8 +191,7 @@ template<typename OtherDerived>
 EIGEN_STRONG_INLINE Derived &
 ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
 {
-  SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, Derived, OtherDerived> tmp(derived());
+  call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
  tmp = other.derived();
  return derived();
 }
@ -204,8 +204,7 @@ template<typename OtherDerived>
 EIGEN_STRONG_INLINE Derived &
 ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)
 {
-  SelfCwiseBinaryOp<internal::scalar_product_op<Scalar>, Derived, OtherDerived> tmp(derived());
+  call_assignment(derived(), other.derived(), internal::mul_assign_op<Scalar,typename OtherDerived::Scalar>());
  tmp = other.derived();
  return derived();
 }
@ -218,8 +217,7 @@ template<typename OtherDerived>
 EIGEN_STRONG_INLINE Derived &
 ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other)
 {
-  SelfCwiseBinaryOp<internal::scalar_quotient_op<Scalar>, Derived, OtherDerived> tmp(derived());
+  call_assignment(derived(), other.derived(), internal::div_assign_op<Scalar,typename OtherDerived::Scalar>());
  tmp = other.derived();
  return derived();
 }
--- a/eigenlib/Eigen/src/Core/ArrayWrapper.h
+++ b/eigenlib/Eigen/src/Core/ArrayWrapper.h
@ -44,6 +44,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
    typedef ArrayBase<ArrayWrapper> Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper)
    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper)
    typedef typename internal::remove_all<ExpressionType>::type NestedExpression;
    typedef typename internal::conditional<
                       internal::is_lvalue<ExpressionType>::value,
@ -51,76 +52,45 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
                       const Scalar
                     >::type ScalarWithConstIfNotLvalue;
-    typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
+    typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
-    inline ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
+    using Base::coeffRef;
    EIGEN_DEVICE_FUNC
    explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
    EIGEN_DEVICE_FUNC
    inline Index rows() const { return m_expression.rows(); }
    EIGEN_DEVICE_FUNC
    inline Index cols() const { return m_expression.cols(); }
    EIGEN_DEVICE_FUNC
    inline Index outerStride() const { return m_expression.outerStride(); }
    EIGEN_DEVICE_FUNC
    inline Index innerStride() const { return m_expression.innerStride(); }
-    inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); }
+    EIGEN_DEVICE_FUNC
    inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
    EIGEN_DEVICE_FUNC
    inline const Scalar* data() const { return m_expression.data(); }
-    inline CoeffReturnType coeff(Index rowId, Index colId) const
+    EIGEN_DEVICE_FUNC
    {
      return m_expression.coeff(rowId, colId);
    }
    inline Scalar& coeffRef(Index rowId, Index colId)
    {
      return m_expression.const_cast_derived().coeffRef(rowId, colId);
    }
    inline const Scalar& coeffRef(Index rowId, Index colId) const
    {
-      return m_expression.const_cast_derived().coeffRef(rowId, colId);
+      return m_expression.coeffRef(rowId, colId);
    }
    inline CoeffReturnType coeff(Index index) const
    {
      return m_expression.coeff(index);
    }
    inline Scalar& coeffRef(Index index)
    {
      return m_expression.const_cast_derived().coeffRef(index);
    }
    EIGEN_DEVICE_FUNC
    inline const Scalar& coeffRef(Index index) const
    {
-      return m_expression.const_cast_derived().coeffRef(index);
+      return m_expression.coeffRef(index);
    }
    template<int LoadMode>
    inline const PacketScalar packet(Index rowId, Index colId) const
    {
      return m_expression.template packet<LoadMode>(rowId, colId);
    }
    template<int LoadMode>
    inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
    {
      m_expression.const_cast_derived().template writePacket<LoadMode>(rowId, colId, val);
    }
    template<int LoadMode>
    inline const PacketScalar packet(Index index) const
    {
      return m_expression.template packet<LoadMode>(index);
    }
    template<int LoadMode>
    inline void writePacket(Index index, const PacketScalar& val)
    {
      m_expression.const_cast_derived().template writePacket<LoadMode>(index, val);
    }
    template<typename Dest>
    EIGEN_DEVICE_FUNC
    inline void evalTo(Dest& dst) const { dst = m_expression; }
    const typename internal::remove_all<NestedExpressionType>::type& 
    EIGEN_DEVICE_FUNC
    nestedExpression() const 
    {
      return m_expression;
@ -128,10 +98,12 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
    /** Forwards the resizing request to the nested expression
      * \sa DenseBase::resize(Index)  */
-    void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); }
+    EIGEN_DEVICE_FUNC
    void resize(Index newSize) { m_expression.resize(newSize); }
    /** Forwards the resizing request to the nested expression
      * \sa DenseBase::resize(Index,Index)*/
-    void resize(Index nbRows, Index nbCols) { m_expression.const_cast_derived().resize(nbRows,nbCols); }
+    EIGEN_DEVICE_FUNC
    void resize(Index rows, Index cols) { m_expression.resize(rows,cols); }
  protected:
    NestedExpressionType m_expression;
@ -169,6 +141,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
    typedef MatrixBase<MatrixWrapper<ExpressionType> > Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(MatrixWrapper)
    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixWrapper)
    typedef typename internal::remove_all<ExpressionType>::type NestedExpression;
    typedef typename internal::conditional<
                       internal::is_lvalue<ExpressionType>::value,
@ -176,72 +149,40 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
                       const Scalar
                     >::type ScalarWithConstIfNotLvalue;
-    typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
+    typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
-    inline MatrixWrapper(ExpressionType& a_matrix) : m_expression(a_matrix) {}
+    using Base::coeffRef;
    EIGEN_DEVICE_FUNC
    explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}
    EIGEN_DEVICE_FUNC
    inline Index rows() const { return m_expression.rows(); }
    EIGEN_DEVICE_FUNC
    inline Index cols() const { return m_expression.cols(); }
    EIGEN_DEVICE_FUNC
    inline Index outerStride() const { return m_expression.outerStride(); }
    EIGEN_DEVICE_FUNC
    inline Index innerStride() const { return m_expression.innerStride(); }
-    inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); }
+    EIGEN_DEVICE_FUNC
    inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
    EIGEN_DEVICE_FUNC
    inline const Scalar* data() const { return m_expression.data(); }
-    inline CoeffReturnType coeff(Index rowId, Index colId) const
+    EIGEN_DEVICE_FUNC
    {
      return m_expression.coeff(rowId, colId);
    }
    inline Scalar& coeffRef(Index rowId, Index colId)
    {
      return m_expression.const_cast_derived().coeffRef(rowId, colId);
    }
    inline const Scalar& coeffRef(Index rowId, Index colId) const
    {
      return m_expression.derived().coeffRef(rowId, colId);
    }
-    inline CoeffReturnType coeff(Index index) const
+    EIGEN_DEVICE_FUNC
    {
      return m_expression.coeff(index);
    }
    inline Scalar& coeffRef(Index index)
    {
      return m_expression.const_cast_derived().coeffRef(index);
    }
    inline const Scalar& coeffRef(Index index) const
    {
-      return m_expression.const_cast_derived().coeffRef(index);
+      return m_expression.coeffRef(index);
    }
    template<int LoadMode>
    inline const PacketScalar packet(Index rowId, Index colId) const
    {
      return m_expression.template packet<LoadMode>(rowId, colId);
    }
    template<int LoadMode>
    inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
    {
      m_expression.const_cast_derived().template writePacket<LoadMode>(rowId, colId, val);
    }
    template<int LoadMode>
    inline const PacketScalar packet(Index index) const
    {
      return m_expression.template packet<LoadMode>(index);
    }
    template<int LoadMode>
    inline void writePacket(Index index, const PacketScalar& val)
    {
      m_expression.const_cast_derived().template writePacket<LoadMode>(index, val);
    }
    EIGEN_DEVICE_FUNC
    const typename internal::remove_all<NestedExpressionType>::type& 
    nestedExpression() const 
    {
@ -250,10 +191,12 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
    /** Forwards the resizing request to the nested expression
      * \sa DenseBase::resize(Index)  */
-    void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); }
+    EIGEN_DEVICE_FUNC
    void resize(Index newSize) { m_expression.resize(newSize); }
    /** Forwards the resizing request to the nested expression
      * \sa DenseBase::resize(Index,Index)*/
-    void resize(Index nbRows, Index nbCols) { m_expression.const_cast_derived().resize(nbRows,nbCols); }
+    EIGEN_DEVICE_FUNC
    void resize(Index rows, Index cols) { m_expression.resize(rows,cols); }
  protected:
    NestedExpressionType m_expression;
--- a/eigenlib/Eigen/src/Core/Assign.h
+++ b/eigenlib/Eigen/src/Core/Assign.h
@ -14,478 +14,6 @@
 namespace Eigen {
 namespace internal {
 /***************************************************************************
 * Part 1 : the logic deciding a strategy for traversal and unrolling       *
 ***************************************************************************/
 template <typename Derived, typename OtherDerived>
 struct assign_traits
 {
 public:
  enum {
    DstIsAligned = Derived::Flags & AlignedBit,
    DstHasDirectAccess = Derived::Flags & DirectAccessBit,
    SrcIsAligned = OtherDerived::Flags & AlignedBit,
    JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned
  };
 private:
  enum {
    InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime)
              : int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime)
              : int(Derived::RowsAtCompileTime),
    InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime)
              : int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime)
              : int(Derived::MaxRowsAtCompileTime),
    MaxSizeAtCompileTime = Derived::SizeAtCompileTime,
    PacketSize = packet_traits<typename Derived::Scalar>::size
  };
  enum {
    StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)),
    MightVectorize = StorageOrdersAgree
                  && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit),
    MayInnerVectorize  = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
                       && int(DstIsAligned) && int(SrcIsAligned),
    MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
    MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
                       && (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
      /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
         so it's only good for large enough sizes. */
    MaySliceVectorize  = MightVectorize && DstHasDirectAccess
                       && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize)
      /* slice vectorization can be slow, so we only want it if the slices are big, which is
         indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
         in a fixed-size matrix */
  };
 public:
  enum {
    Traversal = int(MayInnerVectorize)  ? int(InnerVectorizedTraversal)
              : int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
              : int(MaySliceVectorize)  ? int(SliceVectorizedTraversal)
              : int(MayLinearize)       ? int(LinearTraversal)
                                        : int(DefaultTraversal),
    Vectorized = int(Traversal) == InnerVectorizedTraversal
              || int(Traversal) == LinearVectorizedTraversal
              || int(Traversal) == SliceVectorizedTraversal
  };
 private:
  enum {
    UnrollingLimit      = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1),
    MayUnrollCompletely = int(Derived::SizeAtCompileTime) != Dynamic
                       && int(OtherDerived::CoeffReadCost) != Dynamic
                       && int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit),
    MayUnrollInner      = int(InnerSize) != Dynamic
                       && int(OtherDerived::CoeffReadCost) != Dynamic
                       && int(InnerSize) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit)
  };
 public:
  enum {
    Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
                ? (
                    int(MayUnrollCompletely) ? int(CompleteUnrolling)
                  : int(MayUnrollInner)      ? int(InnerUnrolling)
                                             : int(NoUnrolling)
                  )
              : int(Traversal) == int(LinearVectorizedTraversal)
                ? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) )
              : int(Traversal) == int(LinearTraversal)
                ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) )
              : int(NoUnrolling)
  };
 #ifdef EIGEN_DEBUG_ASSIGN
  static void debug()
  {
    EIGEN_DEBUG_VAR(DstIsAligned)
    EIGEN_DEBUG_VAR(SrcIsAligned)
    EIGEN_DEBUG_VAR(JointAlignment)
    EIGEN_DEBUG_VAR(InnerSize)
    EIGEN_DEBUG_VAR(InnerMaxSize)
    EIGEN_DEBUG_VAR(PacketSize)
    EIGEN_DEBUG_VAR(StorageOrdersAgree)
    EIGEN_DEBUG_VAR(MightVectorize)
    EIGEN_DEBUG_VAR(MayLinearize)
    EIGEN_DEBUG_VAR(MayInnerVectorize)
    EIGEN_DEBUG_VAR(MayLinearVectorize)
    EIGEN_DEBUG_VAR(MaySliceVectorize)
    EIGEN_DEBUG_VAR(Traversal)
    EIGEN_DEBUG_VAR(UnrollingLimit)
    EIGEN_DEBUG_VAR(MayUnrollCompletely)
    EIGEN_DEBUG_VAR(MayUnrollInner)
    EIGEN_DEBUG_VAR(Unrolling)
  }
 #endif
 };
 /***************************************************************************
 * Part 2 : meta-unrollers
 ***************************************************************************/
 /************************
 *** Default traversal ***
 ************************/
 template<typename Derived1, typename Derived2, int Index, int Stop>
 struct assign_DefaultTraversal_CompleteUnrolling
 {
  enum {
    outer = Index / Derived1::InnerSizeAtCompileTime,
    inner = Index % Derived1::InnerSizeAtCompileTime
  };
  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
  {
    dst.copyCoeffByOuterInner(outer, inner, src);
    assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
  }
 };
 template<typename Derived1, typename Derived2, int Stop>
 struct assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
 {
  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
 };
 template<typename Derived1, typename Derived2, int Index, int Stop>
 struct assign_DefaultTraversal_InnerUnrolling
 {
  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer)
  {
    dst.copyCoeffByOuterInner(outer, Index, src);
    assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, outer);
  }
 };
 template<typename Derived1, typename Derived2, int Stop>
 struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
 {
  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {}
 };
 /***********************
 *** Linear traversal ***
 ***********************/
 template<typename Derived1, typename Derived2, int Index, int Stop>
 struct assign_LinearTraversal_CompleteUnrolling
 {
  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
  {
    dst.copyCoeff(Index, src);
    assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
  }
 };
 template<typename Derived1, typename Derived2, int Stop>
 struct assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
 {
  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
 };
 /**************************
 *** Inner vectorization ***
 **************************/
 template<typename Derived1, typename Derived2, int Index, int Stop>
 struct assign_innervec_CompleteUnrolling
 {
  enum {
    outer = Index / Derived1::InnerSizeAtCompileTime,
    inner = Index % Derived1::InnerSizeAtCompileTime,
    JointAlignment = assign_traits<Derived1,Derived2>::JointAlignment
  };
  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
  {
    dst.template copyPacketByOuterInner<Derived2, Aligned, JointAlignment>(outer, inner, src);
    assign_innervec_CompleteUnrolling<Derived1, Derived2,
      Index+packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src);
  }
 };
 template<typename Derived1, typename Derived2, int Stop>
 struct assign_innervec_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
 {
  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
 };
 template<typename Derived1, typename Derived2, int Index, int Stop>
 struct assign_innervec_InnerUnrolling
 {
  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer)
  {
    dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, Index, src);
    assign_innervec_InnerUnrolling<Derived1, Derived2,
      Index+packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src, outer);
  }
 };
 template<typename Derived1, typename Derived2, int Stop>
 struct assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
 {
  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {}
 };
 /***************************************************************************
 * Part 3 : implementation of all cases
 ***************************************************************************/
 template<typename Derived1, typename Derived2,
         int Traversal = assign_traits<Derived1, Derived2>::Traversal,
         int Unrolling = assign_traits<Derived1, Derived2>::Unrolling,
         int Version = Specialized>
 struct assign_impl;
 /************************
 *** Default traversal ***
 ************************/
 template<typename Derived1, typename Derived2, int Unrolling, int Version>
 struct assign_impl<Derived1, Derived2, InvalidTraversal, Unrolling, Version>
 {
  static inline void run(Derived1 &, const Derived2 &) { }
 };
 template<typename Derived1, typename Derived2, int Version>
 struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling, Version>
 {
  typedef typename Derived1::Index Index;
  static inline void run(Derived1 &dst, const Derived2 &src)
  {
    const Index innerSize = dst.innerSize();
    const Index outerSize = dst.outerSize();
    for(Index outer = 0; outer < outerSize; ++outer)
      for(Index inner = 0; inner < innerSize; ++inner)
        dst.copyCoeffByOuterInner(outer, inner, src);
  }
 };
 template<typename Derived1, typename Derived2, int Version>
 struct assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling, Version>
 {
  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
  {
    assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
      ::run(dst, src);
  }
 };
 template<typename Derived1, typename Derived2, int Version>
 struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling, Version>
 {
  typedef typename Derived1::Index Index;
  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
  {
    const Index outerSize = dst.outerSize();
    for(Index outer = 0; outer < outerSize; ++outer)
      assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, 0, Derived1::InnerSizeAtCompileTime>
        ::run(dst, src, outer);
  }
 };
 /***********************
 *** Linear traversal ***
 ***********************/
 template<typename Derived1, typename Derived2, int Version>
 struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling, Version>
 {
  typedef typename Derived1::Index Index;
  static inline void run(Derived1 &dst, const Derived2 &src)
  {
    const Index size = dst.size();
    for(Index i = 0; i < size; ++i)
      dst.copyCoeff(i, src);
  }
 };
 template<typename Derived1, typename Derived2, int Version>
 struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling, Version>
 {
  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
  {
    assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
      ::run(dst, src);
  }
 };
 /**************************
 *** Inner vectorization ***
 **************************/
 template<typename Derived1, typename Derived2, int Version>
 struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling, Version>
 {
  typedef typename Derived1::Index Index;
  static inline void run(Derived1 &dst, const Derived2 &src)
  {
    const Index innerSize = dst.innerSize();
    const Index outerSize = dst.outerSize();
    const Index packetSize = packet_traits<typename Derived1::Scalar>::size;
    for(Index outer = 0; outer < outerSize; ++outer)
      for(Index inner = 0; inner < innerSize; inner+=packetSize)
        dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, inner, src);
  }
 };
 template<typename Derived1, typename Derived2, int Version>
 struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, CompleteUnrolling, Version>
 {
  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
  {
    assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
      ::run(dst, src);
  }
 };
 template<typename Derived1, typename Derived2, int Version>
 struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolling, Version>
 {
  typedef typename Derived1::Index Index;
  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
  {
    const Index outerSize = dst.outerSize();
    for(Index outer = 0; outer < outerSize; ++outer)
      assign_innervec_InnerUnrolling<Derived1, Derived2, 0, Derived1::InnerSizeAtCompileTime>
        ::run(dst, src, outer);
  }
 };
 /***************************
 *** Linear vectorization ***
 ***************************/
 template <bool IsAligned = false>
 struct unaligned_assign_impl
 {
  template <typename Derived, typename OtherDerived>
  static EIGEN_STRONG_INLINE void run(const Derived&, OtherDerived&, typename Derived::Index, typename Derived::Index) {}
 };
 template <>
 struct unaligned_assign_impl<false>
 {
  // MSVC must not inline this functions. If it does, it fails to optimize the
  // packet access path.
 #ifdef _MSC_VER
  template <typename Derived, typename OtherDerived>
  static EIGEN_DONT_INLINE void run(const Derived& src, OtherDerived& dst, typename Derived::Index start, typename Derived::Index end)
 #else
  template <typename Derived, typename OtherDerived>
  static EIGEN_STRONG_INLINE void run(const Derived& src, OtherDerived& dst, typename Derived::Index start, typename Derived::Index end)
 #endif
  {
    for (typename Derived::Index index = start; index < end; ++index)
      dst.copyCoeff(index, src);
  }
 };
 template<typename Derived1, typename Derived2, int Version>
 struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling, Version>
 {
  typedef typename Derived1::Index Index;
  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
  {
    const Index size = dst.size();
    typedef packet_traits<typename Derived1::Scalar> PacketTraits;
    enum {
      packetSize = PacketTraits::size,
      dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : int(assign_traits<Derived1,Derived2>::DstIsAligned) ,
      srcAlignment = assign_traits<Derived1,Derived2>::JointAlignment
    };
    const Index alignedStart = assign_traits<Derived1,Derived2>::DstIsAligned ? 0
                             : internal::first_aligned(&dst.coeffRef(0), size);
    const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
    unaligned_assign_impl<assign_traits<Derived1,Derived2>::DstIsAligned!=0>::run(src,dst,0,alignedStart);
    for(Index index = alignedStart; index < alignedEnd; index += packetSize)
    {
      dst.template copyPacket<Derived2, dstAlignment, srcAlignment>(index, src);
    }
    unaligned_assign_impl<>::run(src,dst,alignedEnd,size);
  }
 };
 template<typename Derived1, typename Derived2, int Version>
 struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling, Version>
 {
  typedef typename Derived1::Index Index;
  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
  {
    enum { size = Derived1::SizeAtCompileTime,
           packetSize = packet_traits<typename Derived1::Scalar>::size,
           alignedSize = (size/packetSize)*packetSize };
    assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, alignedSize>::run(dst, src);
    assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, alignedSize, size>::run(dst, src);
  }
 };
 /**************************
 *** Slice vectorization ***
 ***************************/
 template<typename Derived1, typename Derived2, int Version>
 struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling, Version>
 {
  typedef typename Derived1::Index Index;
  static inline void run(Derived1 &dst, const Derived2 &src)
  {
    typedef typename Derived1::Scalar Scalar;
    typedef packet_traits<Scalar> PacketTraits;
    enum {
      packetSize = PacketTraits::size,
      alignable = PacketTraits::AlignedOnScalar,
      dstIsAligned = assign_traits<Derived1,Derived2>::DstIsAligned,
      dstAlignment = alignable ? Aligned : int(dstIsAligned),
      srcAlignment = assign_traits<Derived1,Derived2>::JointAlignment
    };
    const Scalar *dst_ptr = &dst.coeffRef(0,0);
    if((!bool(dstIsAligned)) && (Index(dst_ptr) % sizeof(Scalar))>0)
    {
      // the pointer is not aligend-on scalar, so alignment is not possible
      return assign_impl<Derived1,Derived2,DefaultTraversal,NoUnrolling>::run(dst, src);
    }
    const Index packetAlignedMask = packetSize - 1;
    const Index innerSize = dst.innerSize();
    const Index outerSize = dst.outerSize();
    const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0;
    Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned(dst_ptr, innerSize);
    for(Index outer = 0; outer < outerSize; ++outer)
    {
      const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
      // do the non-vectorizable part of the assignment
      for(Index inner = 0; inner<alignedStart ; ++inner)
        dst.copyCoeffByOuterInner(outer, inner, src);
      // do the vectorizable part of the assignment
      for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
        dst.template copyPacketByOuterInner<Derived2, dstAlignment, Unaligned>(outer, inner, src);
      // do the non-vectorizable part of the assignment
      for(Index inner = alignedEnd; inner<innerSize ; ++inner)
        dst.copyCoeffByOuterInner(outer, inner, src);
      alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize);
    }
  }
 };
 } // end namespace internal
 /***************************************************************************
 * Part 4 : implementation of DenseBase methods
 ***************************************************************************/
 template<typename Derived>
 template<typename OtherDerived>
 EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
@ -499,90 +27,62 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)
  EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
 #ifdef EIGEN_DEBUG_ASSIGN
  internal::assign_traits<Derived, OtherDerived>::debug();
 #endif
  eigen_assert(rows() == other.rows() && cols() == other.cols());
-  internal::assign_impl<Derived, OtherDerived, int(SameType) ? int(internal::assign_traits<Derived, OtherDerived>::Traversal)
+  internal::call_assignment_no_alias(derived(),other.derived());
-                                                       : int(InvalidTraversal)>::run(derived(),other.derived());
+  
 #ifndef EIGEN_NO_DEBUG
  checkTransposeAliasing(other.derived());
 #endif
  return derived();
 }
 namespace internal {
 template<typename Derived, typename OtherDerived,
         bool EvalBeforeAssigning = (int(internal::traits<OtherDerived>::Flags) & EvalBeforeAssigningBit) != 0,
         bool NeedToTranspose = ((int(Derived::RowsAtCompileTime) == 1 && int(OtherDerived::ColsAtCompileTime) == 1)
                              |   // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
                                  // revert to || as soon as not needed anymore.
                                  (int(Derived::ColsAtCompileTime) == 1 && int(OtherDerived::RowsAtCompileTime) == 1))
                              && int(Derived::SizeAtCompileTime) != 1>
 struct assign_selector;
 template<typename Derived, typename OtherDerived>
 struct assign_selector<Derived,OtherDerived,false,false> {
  static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); }
  template<typename ActualDerived, typename ActualOtherDerived>
  static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { other.evalTo(dst); return dst; }
 };
 template<typename Derived, typename OtherDerived>
 struct assign_selector<Derived,OtherDerived,true,false> {
  static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); }
 };
 template<typename Derived, typename OtherDerived>
 struct assign_selector<Derived,OtherDerived,false,true> {
  static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); }
  template<typename ActualDerived, typename ActualOtherDerived>
  static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { Transpose<ActualDerived> dstTrans(dst); other.evalTo(dstTrans); return dst; }
 };
 template<typename Derived, typename OtherDerived>
 struct assign_selector<Derived,OtherDerived,true,true> {
  static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); }
 };
 } // end namespace internal
 template<typename Derived>
 template<typename OtherDerived>
 EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
 {
-  return internal::assign_selector<Derived,OtherDerived>::run(derived(), other.derived());
+  internal::call_assignment(derived(), other.derived());
  return derived();
 }
 template<typename Derived>
 EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase& other)
 {
-  return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
+  internal::call_assignment(derived(), other.derived());
  return derived();
 }
 template<typename Derived>
 EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const MatrixBase& other)
 {
-  return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
+  internal::call_assignment(derived(), other.derived());
  return derived();
 }
 template<typename Derived>
 template <typename OtherDerived>
 EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
 {
-  return internal::assign_selector<Derived,OtherDerived>::run(derived(), other.derived());
+  internal::call_assignment(derived(), other.derived());
  return derived();
 }
 template<typename Derived>
 template <typename OtherDerived>
 EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const EigenBase<OtherDerived>& other)
 {
-  return internal::assign_selector<Derived,OtherDerived,false>::evalTo(derived(), other.derived());
+  internal::call_assignment(derived(), other.derived());
  return derived();
 }
 template<typename Derived>
 template<typename OtherDerived>
 EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
 {
-  return internal::assign_selector<Derived,OtherDerived,false>::evalTo(derived(), other.derived());
+  other.derived().evalTo(derived());
  return derived();
 }
 } // end namespace Eigen
--- a/eigenlib/Eigen/src/Core/AssignEvaluator.h
+++ b/eigenlib/Eigen/src/Core/AssignEvaluator.h
@ -0,0 +1,918 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
 // Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 // Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_ASSIGN_EVALUATOR_H
 #define EIGEN_ASSIGN_EVALUATOR_H
 namespace Eigen {
 // This implementation is based on Assign.h
 namespace internal {
 /***************************************************************************
 * Part 1 : the logic deciding a strategy for traversal and unrolling       *
 ***************************************************************************/
 // copy_using_evaluator_traits is based on assign_traits
 template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc>
 struct copy_using_evaluator_traits
 {
  typedef typename DstEvaluator::XprType Dst;
  typedef typename Dst::Scalar DstScalar;
  enum {
    DstFlags = DstEvaluator::Flags,
    SrcFlags = SrcEvaluator::Flags
  };
 public:
  enum {
    DstAlignment = DstEvaluator::Alignment,
    SrcAlignment = SrcEvaluator::Alignment,
    DstHasDirectAccess = DstFlags & DirectAccessBit,
    JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
  };
 private:
  enum {
    InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
              : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
              : int(Dst::RowsAtCompileTime),
    InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
              : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
              : int(Dst::MaxRowsAtCompileTime),
    OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
    MaxSizeAtCompileTime = Dst::SizeAtCompileTime
  };
  // TODO distinguish between linear traversal and inner-traversals
  typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type LinearPacketType;
  typedef typename find_best_packet<DstScalar,InnerSize>::type InnerPacketType;
  enum {
    LinearPacketSize = unpacket_traits<LinearPacketType>::size,
    InnerPacketSize = unpacket_traits<InnerPacketType>::size
  };
 public:
  enum {
    LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment,
    InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment
  };
 private:
  enum {
    DstIsRowMajor = DstFlags&RowMajorBit,
    SrcIsRowMajor = SrcFlags&RowMajorBit,
    StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
    MightVectorize = bool(StorageOrdersAgree)
                  && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
                  && bool(functor_traits<AssignFunc>::PacketAccess),
    MayInnerVectorize  = MightVectorize
                       && int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0
                       && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
                       && (EIGEN_UNALIGNED_VECTORIZE  || int(JointAlignment)>=int(InnerRequiredAlignment)),
    MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
    MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess
                       && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
      /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
         so it's only good for large enough sizes. */
    MaySliceVectorize  = bool(MightVectorize) && bool(DstHasDirectAccess)
                       && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize)))
      /* slice vectorization can be slow, so we only want it if the slices are big, which is
         indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
         in a fixed-size matrix
         However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */
  };
 public:
  enum {
    Traversal = int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize) ? int(LinearVectorizedTraversal)
              : int(MayInnerVectorize)   ? int(InnerVectorizedTraversal)
              : int(MayLinearVectorize)  ? int(LinearVectorizedTraversal)
              : int(MaySliceVectorize)   ? int(SliceVectorizedTraversal)
              : int(MayLinearize)        ? int(LinearTraversal)
                                         : int(DefaultTraversal),
    Vectorized = int(Traversal) == InnerVectorizedTraversal
              || int(Traversal) == LinearVectorizedTraversal
              || int(Traversal) == SliceVectorizedTraversal
  };
  typedef typename conditional<int(Traversal)==LinearVectorizedTraversal, LinearPacketType, InnerPacketType>::type PacketType;
 private:
  enum {
    ActualPacketSize    = int(Traversal)==LinearVectorizedTraversal ? LinearPacketSize
                        : Vectorized ? InnerPacketSize
                        : 1,
    UnrollingLimit      = EIGEN_UNROLLING_LIMIT * ActualPacketSize,
    MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
                       && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit),
    MayUnrollInner      = int(InnerSize) != Dynamic
                       && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit)
  };
 public:
  enum {
    Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
                ? (
                    int(MayUnrollCompletely) ? int(CompleteUnrolling)
                  : int(MayUnrollInner)      ? int(InnerUnrolling)
                                             : int(NoUnrolling)
                  )
              : int(Traversal) == int(LinearVectorizedTraversal)
                ? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)))
                          ? int(CompleteUnrolling)
                          : int(NoUnrolling) )
              : int(Traversal) == int(LinearTraversal)
                ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) 
                                              : int(NoUnrolling) )
 #if EIGEN_UNALIGNED_VECTORIZE
              : int(Traversal) == int(SliceVectorizedTraversal)
                ? ( bool(MayUnrollInner) ? int(InnerUnrolling)
                                         : int(NoUnrolling) )
 #endif
              : int(NoUnrolling)
  };
 #ifdef EIGEN_DEBUG_ASSIGN
  static void debug()
  {
    std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl;
    std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl;
    std::cerr.setf(std::ios::hex, std::ios::basefield);
    std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl;
    std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl;
    std::cerr.unsetf(std::ios::hex);
    EIGEN_DEBUG_VAR(DstAlignment)
    EIGEN_DEBUG_VAR(SrcAlignment)
    EIGEN_DEBUG_VAR(LinearRequiredAlignment)
    EIGEN_DEBUG_VAR(InnerRequiredAlignment)
    EIGEN_DEBUG_VAR(JointAlignment)
    EIGEN_DEBUG_VAR(InnerSize)
    EIGEN_DEBUG_VAR(InnerMaxSize)
    EIGEN_DEBUG_VAR(LinearPacketSize)
    EIGEN_DEBUG_VAR(InnerPacketSize)
    EIGEN_DEBUG_VAR(ActualPacketSize)
    EIGEN_DEBUG_VAR(StorageOrdersAgree)
    EIGEN_DEBUG_VAR(MightVectorize)
    EIGEN_DEBUG_VAR(MayLinearize)
    EIGEN_DEBUG_VAR(MayInnerVectorize)
    EIGEN_DEBUG_VAR(MayLinearVectorize)
    EIGEN_DEBUG_VAR(MaySliceVectorize)
    std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
    EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost)
    EIGEN_DEBUG_VAR(UnrollingLimit)
    EIGEN_DEBUG_VAR(MayUnrollCompletely)
    EIGEN_DEBUG_VAR(MayUnrollInner)
    std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
    std::cerr << std::endl;
  }
 #endif
 };
 /***************************************************************************
 * Part 2 : meta-unrollers
 ***************************************************************************/
 /************************
 *** Default traversal ***
 ************************/
 template<typename Kernel, int Index, int Stop>
 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
 {
  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
  typedef typename DstEvaluatorType::XprType DstXprType;
  enum {
    outer = Index / DstXprType::InnerSizeAtCompileTime,
    inner = Index % DstXprType::InnerSizeAtCompileTime
  };
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  {
    kernel.assignCoeffByOuterInner(outer, inner);
    copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
  }
 };
 template<typename Kernel, int Stop>
 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>
 {
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
 };
 template<typename Kernel, int Index_, int Stop>
 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
 {
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
  {
    kernel.assignCoeffByOuterInner(outer, Index_);
    copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_+1, Stop>::run(kernel, outer);
  }
 };
 template<typename Kernel, int Stop>
 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>
 {
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { }
 };
 /***********************
 *** Linear traversal ***
 ***********************/
 template<typename Kernel, int Index, int Stop>
 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
 {
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel)
  {
    kernel.assignCoeff(Index);
    copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
  }
 };
 template<typename Kernel, int Stop>
 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop>
 {
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
 };
 /**************************
 *** Inner vectorization ***
 **************************/
 template<typename Kernel, int Index, int Stop>
 struct copy_using_evaluator_innervec_CompleteUnrolling
 {
  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
  typedef typename DstEvaluatorType::XprType DstXprType;
  typedef typename Kernel::PacketType PacketType;
  enum {
    outer = Index / DstXprType::InnerSizeAtCompileTime,
    inner = Index % DstXprType::InnerSizeAtCompileTime,
    SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
    DstAlignment = Kernel::AssignmentTraits::DstAlignment
  };
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  {
    kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
    enum { NextIndex = Index + unpacket_traits<PacketType>::size };
    copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
  }
 };
 template<typename Kernel, int Stop>
 struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
 {
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
 };
 template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
 struct copy_using_evaluator_innervec_InnerUnrolling
 {
  typedef typename Kernel::PacketType PacketType;
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
  {
    kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
    enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
    copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel, outer);
  }
 };
 template<typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
 struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment>
 {
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }
 };
 /***************************************************************************
 * Part 3 : implementation of all cases
 ***************************************************************************/
 // dense_assignment_loop is based on assign_impl
 template<typename Kernel,
         int Traversal = Kernel::AssignmentTraits::Traversal,
         int Unrolling = Kernel::AssignmentTraits::Unrolling>
 struct dense_assignment_loop;
 /************************
 *** Default traversal ***
 ************************/
 template<typename Kernel>
 struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>
 {
  EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel)
  {
    for(Index outer = 0; outer < kernel.outerSize(); ++outer) {
      for(Index inner = 0; inner < kernel.innerSize(); ++inner) {
        kernel.assignCoeffByOuterInner(outer, inner);
      }
    }
  }
 };
 template<typename Kernel>
 struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling>
 {
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  {
    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
    copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
  }
 };
 template<typename Kernel>
 struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
 {
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  {
    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
    const Index outerSize = kernel.outerSize();
    for(Index outer = 0; outer < outerSize; ++outer)
      copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
  }
 };
 /***************************
 *** Linear vectorization ***
 ***************************/
 // The goal of unaligned_dense_assignment_loop is simply to factorize the handling
 // of the non vectorizable beginning and ending parts
 template <bool IsAligned = false>
 struct unaligned_dense_assignment_loop
 {
  // if IsAligned = true, then do nothing
  template <typename Kernel>
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {}
 };
 template <>
 struct unaligned_dense_assignment_loop<false>
 {
  // MSVC must not inline this functions. If it does, it fails to optimize the
  // packet access path.
  // FIXME check which version exhibits this issue
 #if EIGEN_COMP_MSVC
  template <typename Kernel>
  static EIGEN_DONT_INLINE void run(Kernel &kernel,
                                    Index start,
                                    Index end)
 #else
  template <typename Kernel>
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel,
                                      Index start,
                                      Index end)
 #endif
  {
    for (Index index = start; index < end; ++index)
      kernel.assignCoeff(index);
  }
 };
 template<typename Kernel>
 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
 {
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  {
    const Index size = kernel.size();
    typedef typename Kernel::Scalar Scalar;
    typedef typename Kernel::PacketType PacketType;
    enum {
      requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment,
      packetSize = unpacket_traits<PacketType>::size,
      dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
      dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)
                                                            : int(Kernel::AssignmentTraits::DstAlignment),
      srcAlignment = Kernel::AssignmentTraits::JointAlignment
    };
    const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size);
    const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
    unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
    for(Index index = alignedStart; index < alignedEnd; index += packetSize)
      kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index);
    unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
  }
 };
 template<typename Kernel>
 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>
 {
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  {
    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
    typedef typename Kernel::PacketType PacketType;
    enum { size = DstXprType::SizeAtCompileTime,
           packetSize =unpacket_traits<PacketType>::size,
           alignedSize = (size/packetSize)*packetSize };
    copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
    copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
  }
 };
 /**************************
 *** Inner vectorization ***
 **************************/
 template<typename Kernel>
 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
 {
  typedef typename Kernel::PacketType PacketType;
  enum {
    SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
    DstAlignment = Kernel::AssignmentTraits::DstAlignment
  };
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  {
    const Index innerSize = kernel.innerSize();
    const Index outerSize = kernel.outerSize();
    const Index packetSize = unpacket_traits<PacketType>::size;
    for(Index outer = 0; outer < outerSize; ++outer)
      for(Index inner = 0; inner < innerSize; inner+=packetSize)
        kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
  }
 };
 template<typename Kernel>
 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>
 {
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  {
    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
    copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
  }
 };
 template<typename Kernel>
 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
 {
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  {
    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
    typedef typename Kernel::AssignmentTraits Traits;
    const Index outerSize = kernel.outerSize();
    for(Index outer = 0; outer < outerSize; ++outer)
      copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime,
                                                   Traits::SrcAlignment, Traits::DstAlignment>::run(kernel, outer);
  }
 };
 /***********************
 *** Linear traversal ***
 ***********************/
 template<typename Kernel>
 struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
 {
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  {
    const Index size = kernel.size();
    for(Index i = 0; i < size; ++i)
      kernel.assignCoeff(i);
  }
 };
 template<typename Kernel>
 struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
 {
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  {
    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
    copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
  }
 };
 /**************************
 *** Slice vectorization ***
 ***************************/
 template<typename Kernel>
 struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
 {
  EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
  {
    typedef typename Kernel::Scalar Scalar;
    typedef typename Kernel::PacketType PacketType;
    enum {
      packetSize = unpacket_traits<PacketType>::size,
      requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment),
      alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
      dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
      dstAlignment = alignable ? int(requestedAlignment)
                               : int(Kernel::AssignmentTraits::DstAlignment)
    };
    const Scalar *dst_ptr = kernel.dstDataPtr();
    if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)
    {
      // the pointer is not aligend-on scalar, so alignment is not possible
      return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
    }
    const Index packetAlignedMask = packetSize - 1;
    const Index innerSize = kernel.innerSize();
    const Index outerSize = kernel.outerSize();
    const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
    Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);
    for(Index outer = 0; outer < outerSize; ++outer)
    {
      const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
      // do the non-vectorizable part of the assignment
      for(Index inner = 0; inner<alignedStart ; ++inner)
        kernel.assignCoeffByOuterInner(outer, inner);
      // do the vectorizable part of the assignment
      for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
        kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
      // do the non-vectorizable part of the assignment
      for(Index inner = alignedEnd; inner<innerSize ; ++inner)
        kernel.assignCoeffByOuterInner(outer, inner);
      alignedStart = numext::mini((alignedStart+alignedStep)%packetSize, innerSize);
    }
  }
 };
 #if EIGEN_UNALIGNED_VECTORIZE
 template<typename Kernel>
 struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
 {
  EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
  {
    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
    typedef typename Kernel::PacketType PacketType;
    enum { size = DstXprType::InnerSizeAtCompileTime,
           packetSize =unpacket_traits<PacketType>::size,
           vectorizableSize = (size/packetSize)*packetSize };
    for(Index outer = 0; outer < kernel.outerSize(); ++outer)
    {
      copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer);
      copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, size>::run(kernel, outer);
    }
  }
 };
 #endif
 /***************************************************************************
 * Part 4 : Generic dense assignment kernel
 ***************************************************************************/
 // This class generalize the assignment of a coefficient (or packet) from one dense evaluator
 // to another dense writable evaluator.
 // It is parametrized by the two evaluators, and the actual assignment functor.
 // This abstraction level permits to keep the evaluation loops as simple and as generic as possible.
 // One can customize the assignment using this generic dense_assignment_kernel with different
 // functors, or by completely overloading it, by-passing a functor.
 template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
 class generic_dense_assignment_kernel
 {
 protected:
  typedef typename DstEvaluatorTypeT::XprType DstXprType;
  typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
 public:
  typedef DstEvaluatorTypeT DstEvaluatorType;
  typedef SrcEvaluatorTypeT SrcEvaluatorType;
  typedef typename DstEvaluatorType::Scalar Scalar;
  typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
  typedef typename AssignmentTraits::PacketType PacketType;
  EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
    : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
  {
    #ifdef EIGEN_DEBUG_ASSIGN
    AssignmentTraits::debug();
    #endif
  }
  EIGEN_DEVICE_FUNC Index size() const        { return m_dstExpr.size(); }
  EIGEN_DEVICE_FUNC Index innerSize() const   { return m_dstExpr.innerSize(); }
  EIGEN_DEVICE_FUNC Index outerSize() const   { return m_dstExpr.outerSize(); }
  EIGEN_DEVICE_FUNC Index rows() const        { return m_dstExpr.rows(); }
  EIGEN_DEVICE_FUNC Index cols() const        { return m_dstExpr.cols(); }
  EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); }
  EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; }
  EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; }
  /// Assign src(row,col) to dst(row,col) through the assignment functor.
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)
  {
    m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
  }
  /// \sa assignCoeff(Index,Index)
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index)
  {
    m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
  }
  /// \sa assignCoeff(Index,Index)
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner)
  {
    Index row = rowIndexByOuterInner(outer, inner); 
    Index col = colIndexByOuterInner(outer, inner); 
    assignCoeff(row, col);
  }
  template<int StoreMode, int LoadMode, typename PacketType>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col)
  {
    m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
  }
  template<int StoreMode, int LoadMode, typename PacketType>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index)
  {
    m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));
  }
  template<int StoreMode, int LoadMode, typename PacketType>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner)
  {
    Index row = rowIndexByOuterInner(outer, inner); 
    Index col = colIndexByOuterInner(outer, inner);
    assignPacket<StoreMode,LoadMode,PacketType>(row, col);
  }
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)
  {
    typedef typename DstEvaluatorType::ExpressionTraits Traits;
    return int(Traits::RowsAtCompileTime) == 1 ? 0
      : int(Traits::ColsAtCompileTime) == 1 ? inner
      : int(DstEvaluatorType::Flags)&RowMajorBit ? outer
      : inner;
  }
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner)
  {
    typedef typename DstEvaluatorType::ExpressionTraits Traits;
    return int(Traits::ColsAtCompileTime) == 1 ? 0
      : int(Traits::RowsAtCompileTime) == 1 ? inner
      : int(DstEvaluatorType::Flags)&RowMajorBit ? inner
      : outer;
  }
  EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const
  {
    return m_dstExpr.data();
  }
 protected:
  DstEvaluatorType& m_dst;
  const SrcEvaluatorType& m_src;
  const Functor &m_functor;
  // TODO find a way to avoid the needs of the original expression
  DstXprType& m_dstExpr;
 };
 /***************************************************************************
 * Part 5 : Entry point for dense rectangular assignment
 ***************************************************************************/
 template<typename DstXprType, typename SrcXprType, typename Functor>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
 {
  typedef evaluator<DstXprType> DstEvaluatorType;
  typedef evaluator<SrcXprType> SrcEvaluatorType;
  SrcEvaluatorType srcEvaluator(src);
  // NOTE To properly handle A = (A*A.transpose())/s with A rectangular,
  // we need to resize the destination after the source evaluator has been created.
  Index dstRows = src.rows();
  Index dstCols = src.cols();
  if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
    dst.resize(dstRows, dstCols);
  DstEvaluatorType dstEvaluator(dst);
  typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
  Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
  dense_assignment_loop<Kernel>::run(kernel);
 }
 template<typename DstXprType, typename SrcXprType>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src)
 {
  call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
 }
 /***************************************************************************
 * Part 6 : Generic assignment
 ***************************************************************************/
 // Based on the respective shapes of the destination and source,
 // the class AssignmentKind determine the kind of assignment mechanism.
 // AssignmentKind must define a Kind typedef.
 template<typename DstShape, typename SrcShape> struct AssignmentKind;
 // Assignement kind defined in this file:
 struct Dense2Dense {};
 struct EigenBase2EigenBase {};
 template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; };
 template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; };
 // This is the main assignment class
 template< typename DstXprType, typename SrcXprType, typename Functor,
          typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind,
          typename EnableIf = void>
 struct Assignment;
 // The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition.
 // Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated.
 // So this intermediate function removes everything related to "assume-aliasing" such that Assignment
 // does not has to bother about these annoying details.
 template<typename Dst, typename Src>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 void call_assignment(Dst& dst, const Src& src)
 {
  call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
 }
 template<typename Dst, typename Src>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 void call_assignment(const Dst& dst, const Src& src)
 {
  call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
 }
 // Deal with "assume-aliasing"
 template<typename Dst, typename Src, typename Func>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
 {
  typename plain_matrix_type<Src>::type tmp(src);
  call_assignment_no_alias(dst, tmp, func);
 }
 template<typename Dst, typename Src, typename Func>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
 {
  call_assignment_no_alias(dst, src, func);
 }
 // by-pass "assume-aliasing"
 // When there is no aliasing, we require that 'dst' has been properly resized
 template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
 {
  call_assignment_no_alias(dst.expression(), src, func);
 }
 template<typename Dst, typename Src, typename Func>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
 {
  enum {
    NeedToTranspose = (    (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
                        || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)
                      ) && int(Dst::SizeAtCompileTime) != 1
  };
  typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
  typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
  ActualDstType actualDst(dst);
  // TODO check whether this is the right place to perform these checks:
  EIGEN_STATIC_ASSERT_LVALUE(Dst)
  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
  EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
  Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
 }
 template<typename Dst, typename Src>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 void call_assignment_no_alias(Dst& dst, const Src& src)
 {
  call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
 }
 template<typename Dst, typename Src, typename Func>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
 {
  // TODO check whether this is the right place to perform these checks:
  EIGEN_STATIC_ASSERT_LVALUE(Dst)
  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
  EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
  Assignment<Dst,Src,Func>::run(dst, src, func);
 }
 template<typename Dst, typename Src>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
 {
  call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
 }
 // forward declaration
 template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
 // Generic Dense to Dense assignment
 // Note that the last template argument "Weak" is needed to make it possible to perform
 // both partial specialization+SFINAE without ambiguous specialization
 template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
 struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
 {
  EIGEN_DEVICE_FUNC
  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
  {
 #ifndef EIGEN_NO_DEBUG
    internal::check_for_aliasing(dst, src);
 #endif
    call_dense_assignment_loop(dst, src, func);
  }
 };
 // Generic assignment through evalTo.
 // TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
 // Note that the last template argument "Weak" is needed to make it possible to perform
 // both partial specialization+SFINAE without ambiguous specialization
 template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
 struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
 {
  EIGEN_DEVICE_FUNC
  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
  {
    Index dstRows = src.rows();
    Index dstCols = src.cols();
    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
      dst.resize(dstRows, dstCols);
    eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
    src.evalTo(dst);
  }
  // NOTE The following two functions are templated to avoid their instanciation if not needed
  //      This is needed because some expressions supports evalTo only and/or have 'void' as scalar type.
  template<typename SrcScalarType>
  EIGEN_DEVICE_FUNC
  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
  {
    Index dstRows = src.rows();
    Index dstCols = src.cols();
    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
      dst.resize(dstRows, dstCols);
    eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
    src.addTo(dst);
  }
  template<typename SrcScalarType>
  EIGEN_DEVICE_FUNC
  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
  {
    Index dstRows = src.rows();
    Index dstCols = src.cols();
    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
      dst.resize(dstRows, dstCols);
    eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
    src.subTo(dst);
  }
 };
 } // namespace internal
 } // end namespace Eigen
 #endif // EIGEN_ASSIGN_EVALUATOR_H
--- a/eigenlib/Eigen/src/Core/Assign_MKL.h
+++ b/eigenlib/Eigen/src/Core/Assign_MKL.h
@ -1,6 +1,7 @@
 /*
 Copyright (c) 2011, Intel Corporation. All rights reserved.
-
+ Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
 Redistribution and use in source and binary forms, with or without modification,
 are permitted provided that the following conditions are met:
@ -37,17 +38,13 @@ namespace Eigen {
 namespace internal {
-template<typename Op> struct vml_call
+template<typename Dst, typename Src>
 { enum { IsSupported = 0 }; };
 template<typename Dst, typename Src, typename UnaryOp>
 class vml_assign_traits
 {
  private:
    enum {
      DstHasDirectAccess = Dst::Flags & DirectAccessBit,
      SrcHasDirectAccess = Src::Flags & DirectAccessBit,
      StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
      InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
                : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
@ -57,165 +54,120 @@ class vml_assign_traits
                    : int(Dst::MaxRowsAtCompileTime),
      MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
-      MightEnableVml =  vml_call<UnaryOp>::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess
+      MightEnableVml = StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
                     && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
      MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit),
      VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,
-      LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD,
+      LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD
      MayEnableVml = MightEnableVml && LargeEnough,
      MayLinearize = MayEnableVml && MightLinearize
    };
  public:
    enum {
-      Traversal = MayLinearize ? LinearVectorizedTraversal
+      EnableVml = MightEnableVml && LargeEnough,
-                : MayEnableVml ? InnerVectorizedTraversal
+      Traversal = MightLinearize ? LinearTraversal : DefaultTraversal
                : DefaultTraversal
    };
 };
-template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling,
+#define EIGEN_PP_EXPAND(ARG) ARG
         int VmlTraversal = vml_assign_traits<Derived1, Derived2, UnaryOp>::Traversal >
 struct vml_assign_impl
  : assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>
 {
 };
 template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
 struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, InnerVectorizedTraversal>
 {
  typedef typename Derived1::Scalar Scalar;
  typedef typename Derived1::Index Index;
  static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
  {
    // in case we want to (or have to) skip VML at runtime we can call:
    // assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
    const Index innerSize = dst.innerSize();
    const Index outerSize = dst.outerSize();
    for(Index outer = 0; outer < outerSize; ++outer) {
      const Scalar *src_ptr = src.IsRowMajor ?  &(src.nestedExpression().coeffRef(outer,0)) :
                                                &(src.nestedExpression().coeffRef(0, outer));
      Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));
      vml_call<UnaryOp>::run(src.functor(), innerSize, src_ptr, dst_ptr );
    }
  }
 };
 template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
 struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, LinearVectorizedTraversal>
 {
  static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
  {
    // in case we want to (or have to) skip VML at runtime we can call:
    // assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
    vml_call<UnaryOp>::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() );
  }
 };
 // Macroses
 #define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \
  template<typename Derived1, typename Derived2, typename UnaryOp> \
  struct assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>, TRAVERSAL, UNROLLING, Specialized>  {  \
    static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp<UnaryOp, Derived2> &src) { \
      vml_assign_impl<Derived1,Derived2,UnaryOp,TRAVERSAL,UNROLLING>::run(dst, src); \
    } \
  };
 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling)
 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling)
 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling)
 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling)
 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling)
 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling)
 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling)
 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling)
 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling)
 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling)
 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling)
 #if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
-#define  EIGEN_MKL_VML_MODE VML_HA
+#define EIGEN_VMLMODE_EXPAND_LA , VML_HA
 #else
-#define  EIGEN_MKL_VML_MODE VML_LA
+#define EIGEN_VMLMODE_EXPAND_LA , VML_LA
 #endif
-#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE)     \
+#define EIGEN_VMLMODE_EXPAND__ 
-  template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > {               \
+
-    enum { IsSupported = 1 };                                                    \
+#define EIGEN_VMLMODE_PREFIX_LA vm
-    static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/,        \
+#define EIGEN_VMLMODE_PREFIX__  v
-                            int size, const EIGENTYPE* src, EIGENTYPE* dst) {    \
+#define EIGEN_VMLMODE_PREFIX(VMLMODE) EIGEN_CAT(EIGEN_VMLMODE_PREFIX_,VMLMODE)
-      VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst);                           \
+
-    }                                                                            \
+#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE)                                           \
  template< typename DstXprType, typename SrcXprNested>                                                                         \
  struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE,EIGENTYPE>,   \
                   Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> {              \
    typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType;                                            \
    static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &/*func*/) {                   \
      eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());                                                       \
      if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) {                                              \
        VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(),                                                        \
              (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) );                                           \
      } else {                                                                                                                  \
        const Index outerSize = dst.outerSize();                                                                                \
        for(Index outer = 0; outer < outerSize; ++outer) {                                                                      \
          const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) :                             \
                                                      &(src.nestedExpression().coeffRef(0, outer));                             \
          EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));                           \
          VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr,                                                                      \
                (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE));                                             \
        }                                                                                                                       \
      }                                                                                                                         \
    }                                                                                                                           \
  };                                                                                                                            \
 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE)                                                         \
  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),s##VMLOP), float, float, VMLMODE)           \
  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),d##VMLOP), double, double, VMLMODE)
 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE)                                                         \
  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),c##VMLOP), scomplex, MKL_Complex8, VMLMODE) \
  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),z##VMLOP), dcomplex, MKL_Complex16, VMLMODE)
 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE)                                                              \
  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE)                                                               \
  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE)
 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sin,   Sin,   LA)
 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(asin,  Asin,  LA)
 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sinh,  Sinh,  LA)
 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cos,   Cos,   LA)
 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(acos,  Acos,  LA)
 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cosh,  Cosh,  LA)
 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tan,   Tan,   LA)
 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(atan,  Atan,  LA)
 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tanh,  Tanh,  LA)
 // EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs,   Abs,    _)
 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(exp,   Exp,   LA)
 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log,   Ln,    LA)
 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log10, Log10, LA)
 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sqrt,  Sqrt,  _)
 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr,   _)
 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(arg, Arg,      _)
 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(round, Round,  _)
 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(floor, Floor,  _)
 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil,  Ceil,   _)
 #define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE)                                           \
  template< typename DstXprType, typename SrcXprNested, typename Plain>                                                       \
  struct Assignment<DstXprType, CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested,                       \
                    const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> >, assign_op<EIGENTYPE,EIGENTYPE>,    \
                   Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> {            \
    typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested,                                           \
                    const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> > SrcXprType;                         \
    static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &/*func*/) {                 \
      eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());                                                     \
      VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.rhs().functor().m_other);                                       \
      if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal)                                              \
      {                                                                                                                       \
        VMLOP( dst.size(), (const VMLTYPE*)src.lhs().data(), exponent,                                                        \
              (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) );                                         \
      } else {                                                                                                                \
        const Index outerSize = dst.outerSize();                                                                              \
        for(Index outer = 0; outer < outerSize; ++outer) {                                                                    \
          const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.lhs().coeffRef(outer,0)) :                                        \
                                                      &(src.lhs().coeffRef(0, outer));                                        \
          EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));                         \
          VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, exponent,                                                          \
                 (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE));                                          \
        }                                                                                                                     \
      }                                                                                                                       \
    }                                                                                                                         \
  };
-
+  
-#define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE)  \
+EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmsPowx, float,    float,         LA)
-  template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > {               \
+EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdPowx, double,   double,        LA)
-    enum { IsSupported = 1 };                                                    \
+EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcPowx, scomplex, MKL_Complex8,  LA)
-    static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/,        \
+EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzPowx, dcomplex, MKL_Complex16, LA)
                            int size, const EIGENTYPE* src, EIGENTYPE* dst) {    \
      MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE;                                    \
      VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode);                  \
    }                                                                            \
  };
 #define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE)       \
  template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > {               \
    enum { IsSupported = 1 };                                                    \
    static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func,        \
                          int size, const EIGENTYPE* src, EIGENTYPE* dst) {      \
      EIGENTYPE exponent = func.m_exponent;                                      \
      MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE;                                    \
      VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent,               \
                        (VMLTYPE*)dst, &vmlMode);                                \
    }                                                                            \
  };
 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP)                   \
  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float)             \
  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double)
 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP)                \
  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8)   \
  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16)
 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP)                        \
  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP)                         \
  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP)
 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP)                \
  EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float)         \
  EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double)
 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP)             \
  EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8)  \
  EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16)
 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP)                     \
  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP)                      \
  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP)
 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin,  Sin)
 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin)
 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos,  Cos)
 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos)
 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan,  Tan)
 //EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs,  Abs)
 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp,  Exp)
 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log,  Ln)
 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt)
 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr)
 // The vm*powx functions are not avaibale in the windows version of MKL.
 #ifndef _WIN32
 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float)
 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double)
 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8)
 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16)
 #endif
 } // end namespace internal
--- a/eigenlib/Eigen/src/Core/BandMatrix.h
+++ b/eigenlib/Eigen/src/Core/BandMatrix.h
@ -32,7 +32,7 @@ class BandMatrixBase : public EigenBase<Derived>
    };
    typedef typename internal::traits<Derived>::Scalar Scalar;
    typedef Matrix<Scalar,RowsAtCompileTime,ColsAtCompileTime> DenseMatrixType;
-    typedef typename DenseMatrixType::Index Index;
+    typedef typename DenseMatrixType::StorageIndex StorageIndex;
    typedef typename internal::traits<Derived>::CoefficientsType CoefficientsType;
    typedef EigenBase<Derived> Base;
@ -161,15 +161,15 @@ class BandMatrixBase : public EigenBase<Derived>
  *
  * \brief Represents a rectangular matrix with a banded storage
  *
-  * \param _Scalar Numeric type, i.e. float, double, int
+  * \tparam _Scalar Numeric type, i.e. float, double, int
-  * \param Rows Number of rows, or \b Dynamic
+  * \tparam _Rows Number of rows, or \b Dynamic
-  * \param Cols Number of columns, or \b Dynamic
+  * \tparam _Cols Number of columns, or \b Dynamic
-  * \param Supers Number of super diagonal
+  * \tparam _Supers Number of super diagonal
-  * \param Subs Number of sub diagonal
+  * \tparam _Subs Number of sub diagonal
-  * \param _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint
+  * \tparam _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint
-  *                 The former controls \ref TopicStorageOrders "storage order", and defaults to
+  *                  The former controls \ref TopicStorageOrders "storage order", and defaults to
-  *                 column-major. The latter controls whether the matrix represents a selfadjoint 
+  *                  column-major. The latter controls whether the matrix represents a selfadjoint
-  *                 matrix in which case either Supers of Subs have to be null.
+  *                  matrix in which case either Supers of Subs have to be null.
  *
  * \sa class TridiagonalMatrix
  */
@ -179,7 +179,7 @@ struct traits<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
 {
  typedef _Scalar Scalar;
  typedef Dense StorageKind;
-  typedef DenseIndex Index;
+  typedef Eigen::Index StorageIndex;
  enum {
    CoeffReadCost = NumTraits<Scalar>::ReadCost,
    RowsAtCompileTime = _Rows,
@ -201,10 +201,10 @@ class BandMatrix : public BandMatrixBase<BandMatrix<_Scalar,Rows,Cols,Supers,Sub
  public:
    typedef typename internal::traits<BandMatrix>::Scalar Scalar;
-    typedef typename internal::traits<BandMatrix>::Index Index;
+    typedef typename internal::traits<BandMatrix>::StorageIndex StorageIndex;
    typedef typename internal::traits<BandMatrix>::CoefficientsType CoefficientsType;
-    inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs)
+    explicit inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs)
      : m_coeffs(1+supers+subs,cols),
        m_rows(rows), m_supers(supers), m_subs(subs)
    {
@ -241,7 +241,7 @@ struct traits<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Opt
 {
  typedef typename _CoefficientsType::Scalar Scalar;
  typedef typename _CoefficientsType::StorageKind StorageKind;
-  typedef typename _CoefficientsType::Index Index;
+  typedef typename _CoefficientsType::StorageIndex StorageIndex;
  enum {
    CoeffReadCost = internal::traits<_CoefficientsType>::CoeffReadCost,
    RowsAtCompileTime = _Rows,
@ -264,9 +264,9 @@ class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsT
    typedef typename internal::traits<BandMatrixWrapper>::Scalar Scalar;
    typedef typename internal::traits<BandMatrixWrapper>::CoefficientsType CoefficientsType;
-    typedef typename internal::traits<BandMatrixWrapper>::Index Index;
+    typedef typename internal::traits<BandMatrixWrapper>::StorageIndex StorageIndex;
-    inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs)
+    explicit inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs)
      : m_coeffs(coeffs),
        m_rows(rows), m_supers(supers), m_subs(subs)
    {
@ -302,9 +302,9 @@ class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsT
  *
  * \brief Represents a tridiagonal matrix with a compact banded storage
  *
-  * \param _Scalar Numeric type, i.e. float, double, int
+  * \tparam Scalar Numeric type, i.e. float, double, int
-  * \param Size Number of rows and cols, or \b Dynamic
+  * \tparam Size Number of rows and cols, or \b Dynamic
-  * \param _Options Can be 0 or \b SelfAdjoint
+  * \tparam Options Can be 0 or \b SelfAdjoint
  *
  * \sa class BandMatrix
  */
@ -312,9 +312,9 @@ template<typename Scalar, int Size, int Options>
 class TridiagonalMatrix : public BandMatrix<Scalar,Size,Size,Options&SelfAdjoint?0:1,1,Options|RowMajor>
 {
    typedef BandMatrix<Scalar,Size,Size,Options&SelfAdjoint?0:1,1,Options|RowMajor> Base;
-    typedef typename Base::Index Index;
+    typedef typename Base::StorageIndex StorageIndex;
  public:
-    TridiagonalMatrix(Index size = Size) : Base(size,size,Options&SelfAdjoint?0:1,1) {}
+    explicit TridiagonalMatrix(Index size = Size) : Base(size,size,Options&SelfAdjoint?0:1,1) {}
    inline typename Base::template DiagonalIntReturnType<1>::Type super()
    { return Base::template diagonal<1>(); }
@ -327,6 +327,25 @@ class TridiagonalMatrix : public BandMatrix<Scalar,Size,Size,Options&SelfAdjoint
  protected:
 };
 struct BandShape {};
 template<typename _Scalar, int _Rows, int _Cols, int _Supers, int _Subs, int _Options>
 struct evaluator_traits<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
  : public evaluator_traits_base<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
 {
  typedef BandShape Shape;
 };
 template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
 struct evaluator_traits<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
  : public evaluator_traits_base<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
 {
  typedef BandShape Shape;
 };
 template<> struct AssignmentKind<DenseShape,BandShape> { typedef EigenBase2EigenBase Kind; };
 } // end namespace internal
 } // end namespace Eigen
--- a/eigenlib/Eigen/src/Core/Block.h
+++ b/eigenlib/Eigen/src/Core/Block.h
@ -13,14 +13,70 @@
 namespace Eigen { 
 namespace internal {
 template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
 struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprType>
 {
  typedef typename traits<XprType>::Scalar Scalar;
  typedef typename traits<XprType>::StorageKind StorageKind;
  typedef typename traits<XprType>::XprKind XprKind;
  typedef typename ref_selector<XprType>::type XprTypeNested;
  typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
  enum{
    MatrixRows = traits<XprType>::RowsAtCompileTime,
    MatrixCols = traits<XprType>::ColsAtCompileTime,
    RowsAtCompileTime = MatrixRows == 0 ? 0 : BlockRows,
    ColsAtCompileTime = MatrixCols == 0 ? 0 : BlockCols,
    MaxRowsAtCompileTime = BlockRows==0 ? 0
                         : RowsAtCompileTime != Dynamic ? int(RowsAtCompileTime)
                         : int(traits<XprType>::MaxRowsAtCompileTime),
    MaxColsAtCompileTime = BlockCols==0 ? 0
                         : ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime)
                         : int(traits<XprType>::MaxColsAtCompileTime),
    XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0,
    IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
               : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
               : XprTypeIsRowMajor,
    HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor),
    InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
    InnerStrideAtCompileTime = HasSameStorageOrderAsXprType
                             ? int(inner_stride_at_compile_time<XprType>::ret)
                             : int(outer_stride_at_compile_time<XprType>::ret),
    OuterStrideAtCompileTime = HasSameStorageOrderAsXprType
                             ? int(outer_stride_at_compile_time<XprType>::ret)
                             : int(inner_stride_at_compile_time<XprType>::ret),
    // FIXME, this traits is rather specialized for dense object and it needs to be cleaned further
    FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
    FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
    Flags = (traits<XprType>::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit,
    // FIXME DirectAccessBit should not be handled by expressions
    // 
    // Alignment is needed by MapBase's assertions
    // We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator
    Alignment = 0
  };
 };
 template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false,
         bool HasDirectAccess = internal::has_direct_access<XprType>::ret> class BlockImpl_dense;
 } // end namespace internal
 template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, typename StorageKind> class BlockImpl;
 /** \class Block
  * \ingroup Core_Module
  *
  * \brief Expression of a fixed-size or dynamic-size block
  *
-  * \param XprType the type of the expression in which we are taking a block
+  * \tparam XprType the type of the expression in which we are taking a block
-  * \param BlockRows the number of rows of the block we are taking at compile time (optional)
+  * \tparam BlockRows the number of rows of the block we are taking at compile time (optional)
-  * \param BlockCols the number of columns of the block we are taking at compile time (optional)
+  * \tparam BlockCols the number of columns of the block we are taking at compile time (optional)
  * \tparam InnerPanel is true, if the block maps to a set of rows of a row major matrix or
  *         to set of columns of a column major matrix (optional). The parameter allows to determine
  *         at compile time whether aligned access is possible on the block expression.
  *
  * This class represents an expression of either a fixed-size or dynamic-size block. It is the return
  * type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block<int,int>(Index,Index) and
@ -44,62 +100,6 @@ namespace Eigen {
  *
  * \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock
  */
 namespace internal {
 template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
 struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprType>
 {
  typedef typename traits<XprType>::Scalar Scalar;
  typedef typename traits<XprType>::StorageKind StorageKind;
  typedef typename traits<XprType>::XprKind XprKind;
  typedef typename nested<XprType>::type XprTypeNested;
  typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
  enum{
    MatrixRows = traits<XprType>::RowsAtCompileTime,
    MatrixCols = traits<XprType>::ColsAtCompileTime,
    RowsAtCompileTime = MatrixRows == 0 ? 0 : BlockRows,
    ColsAtCompileTime = MatrixCols == 0 ? 0 : BlockCols,
    MaxRowsAtCompileTime = BlockRows==0 ? 0
                         : RowsAtCompileTime != Dynamic ? int(RowsAtCompileTime)
                         : int(traits<XprType>::MaxRowsAtCompileTime),
    MaxColsAtCompileTime = BlockCols==0 ? 0
                         : ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime)
                         : int(traits<XprType>::MaxColsAtCompileTime),
    XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0,
    IsDense = is_same<StorageKind,Dense>::value,
    IsRowMajor = (IsDense&&MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
               : (IsDense&&MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
               : XprTypeIsRowMajor,
    HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor),
    InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
    InnerStrideAtCompileTime = HasSameStorageOrderAsXprType
                             ? int(inner_stride_at_compile_time<XprType>::ret)
                             : int(outer_stride_at_compile_time<XprType>::ret),
    OuterStrideAtCompileTime = HasSameStorageOrderAsXprType
                             ? int(outer_stride_at_compile_time<XprType>::ret)
                             : int(inner_stride_at_compile_time<XprType>::ret),
    MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0)
                       && (InnerStrideAtCompileTime == 1)
                        ? PacketAccessBit : 0,
    MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % 16) == 0)) ? AlignedBit : 0,
    FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (traits<XprType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
    FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
    FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
    Flags0 = traits<XprType>::Flags & ( (HereditaryBits & ~RowMajorBit) |
                                        DirectAccessBit |
                                        MaskPacketAccessBit |
                                        MaskAlignedBit),
    Flags = Flags0 | FlagsLinearAccessBit | FlagsLvalueBit | FlagsRowMajorBit
  };
 };
 template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false,
         bool HasDirectAccess = internal::has_direct_access<XprType>::ret> class BlockImpl_dense;
 } // end namespace internal
 template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, typename StorageKind> class BlockImpl;
 template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class Block
  : public BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, typename internal::traits<XprType>::StorageKind>
 {
@ -109,9 +109,12 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
    typedef Impl Base;
    EIGEN_GENERIC_PUBLIC_INTERFACE(Block)
    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block)
    typedef typename internal::remove_all<XprType>::type NestedExpression;
    /** Column or Row constructor
      */
    EIGEN_DEVICE_FUNC
    inline Block(XprType& xpr, Index i) : Impl(xpr,i)
    {
      eigen_assert( (i>=0) && (
@ -121,25 +124,27 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
    /** Fixed-size constructor
      */
-    inline Block(XprType& xpr, Index a_startRow, Index a_startCol)
+    EIGEN_DEVICE_FUNC
-      : Impl(xpr, a_startRow, a_startCol)
+    inline Block(XprType& xpr, Index startRow, Index startCol)
      : Impl(xpr, startRow, startCol)
    {
      EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE)
-      eigen_assert(a_startRow >= 0 && BlockRows >= 1 && a_startRow + BlockRows <= xpr.rows()
+      eigen_assert(startRow >= 0 && BlockRows >= 0 && startRow + BlockRows <= xpr.rows()
-             && a_startCol >= 0 && BlockCols >= 1 && a_startCol + BlockCols <= xpr.cols());
+             && startCol >= 0 && BlockCols >= 0 && startCol + BlockCols <= xpr.cols());
    }
    /** Dynamic-size constructor
      */
    EIGEN_DEVICE_FUNC
    inline Block(XprType& xpr,
-          Index a_startRow, Index a_startCol,
+          Index startRow, Index startCol,
          Index blockRows, Index blockCols)
-      : Impl(xpr, a_startRow, a_startCol, blockRows, blockCols)
+      : Impl(xpr, startRow, startCol, blockRows, blockCols)
    {
      eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows)
          && (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols));
-      eigen_assert(a_startRow >= 0 && blockRows >= 0 && a_startRow  <= xpr.rows() - blockRows
+      eigen_assert(startRow >= 0 && blockRows >= 0 && startRow  <= xpr.rows() - blockRows
-          && a_startCol >= 0 && blockCols >= 0 && a_startCol <= xpr.cols() - blockCols);
+          && startCol >= 0 && blockCols >= 0 && startCol <= xpr.cols() - blockCols);
    }
 };
@ -150,14 +155,15 @@ class BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, Dense>
  : public internal::BlockImpl_dense<XprType, BlockRows, BlockCols, InnerPanel>
 {
    typedef internal::BlockImpl_dense<XprType, BlockRows, BlockCols, InnerPanel> Impl;
-    typedef typename XprType::Index Index;
+    typedef typename XprType::StorageIndex StorageIndex;
  public:
    typedef Impl Base;
    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
-    inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {}
+    EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {}
-    inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol) : Impl(xpr, a_startRow, a_startCol) {}
+    EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index startRow, Index startCol) : Impl(xpr, startRow, startCol) {}
-    inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol, Index blockRows, Index blockCols)
+    EIGEN_DEVICE_FUNC
-      : Impl(xpr, a_startRow, a_startCol, blockRows, blockCols) {}
+    inline BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
      : Impl(xpr, startRow, startCol, blockRows, blockCols) {}
 };
 namespace internal {
@ -167,16 +173,18 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
  : public internal::dense_xpr_base<Block<XprType, BlockRows, BlockCols, InnerPanel> >::type
 {
    typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
    typedef typename internal::ref_selector<XprType>::non_const_type XprTypeNested;
  public:
    typedef typename internal::dense_xpr_base<BlockType>::type Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(BlockType)
    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense)
-    class InnerIterator;
+    // class InnerIterator; // FIXME apparently never used
    /** Column or Row constructor
      */
    EIGEN_DEVICE_FUNC
    inline BlockImpl_dense(XprType& xpr, Index i)
      : m_xpr(xpr),
        // It is a row if and only if BlockRows==1 and BlockCols==XprType::ColsAtCompileTime,
@ -191,75 +199,76 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
    /** Fixed-size constructor
      */
-    inline BlockImpl_dense(XprType& xpr, Index a_startRow, Index a_startCol)
+    EIGEN_DEVICE_FUNC
-      : m_xpr(xpr), m_startRow(a_startRow), m_startCol(a_startCol),
+    inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
      : m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),
                    m_blockRows(BlockRows), m_blockCols(BlockCols)
    {}
    /** Dynamic-size constructor
      */
    EIGEN_DEVICE_FUNC
    inline BlockImpl_dense(XprType& xpr,
-          Index a_startRow, Index a_startCol,
+          Index startRow, Index startCol,
          Index blockRows, Index blockCols)
-      : m_xpr(xpr), m_startRow(a_startRow), m_startCol(a_startCol),
+      : m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),
                    m_blockRows(blockRows), m_blockCols(blockCols)
    {}
-    inline Index rows() const { return m_blockRows.value(); }
+    EIGEN_DEVICE_FUNC inline Index rows() const { return m_blockRows.value(); }
-    inline Index cols() const { return m_blockCols.value(); }
+    EIGEN_DEVICE_FUNC inline Index cols() const { return m_blockCols.value(); }
    EIGEN_DEVICE_FUNC
    inline Scalar& coeffRef(Index rowId, Index colId)
    {
      EIGEN_STATIC_ASSERT_LVALUE(XprType)
-      return m_xpr.const_cast_derived()
+      return m_xpr.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
               .coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
    }
    EIGEN_DEVICE_FUNC
    inline const Scalar& coeffRef(Index rowId, Index colId) const
    {
-      return m_xpr.derived()
+      return m_xpr.derived().coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
               .coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
    }
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index rowId, Index colId) const
    {
      return m_xpr.coeff(rowId + m_startRow.value(), colId + m_startCol.value());
    }
    EIGEN_DEVICE_FUNC
    inline Scalar& coeffRef(Index index)
    {
      EIGEN_STATIC_ASSERT_LVALUE(XprType)
-      return m_xpr.const_cast_derived()
+      return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
-             .coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+                            m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
                       m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
    }
    EIGEN_DEVICE_FUNC
    inline const Scalar& coeffRef(Index index) const
    {
-      return m_xpr.const_cast_derived()
+      return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
-             .coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+                            m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
                       m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
    }
    EIGEN_DEVICE_FUNC
    inline const CoeffReturnType coeff(Index index) const
    {
-      return m_xpr
+      return m_xpr.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
-             .coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+                         m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
                    m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
    }
    template<int LoadMode>
    inline PacketScalar packet(Index rowId, Index colId) const
    {
-      return m_xpr.template packet<Unaligned>
+      return m_xpr.template packet<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value());
              (rowId + m_startRow.value(), colId + m_startCol.value());
    }
    template<int LoadMode>
    inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
    {
-      m_xpr.const_cast_derived().template writePacket<Unaligned>
+      m_xpr.template writePacket<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value(), val);
              (rowId + m_startRow.value(), colId + m_startCol.value(), val);
    }
    template<int LoadMode>
@ -273,40 +282,46 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
    template<int LoadMode>
    inline void writePacket(Index index, const PacketScalar& val)
    {
-      m_xpr.const_cast_derived().template writePacket<Unaligned>
+      m_xpr.template writePacket<Unaligned>
         (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
          m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), val);
    }
    #ifdef EIGEN_PARSED_BY_DOXYGEN
    /** \sa MapBase::data() */
-    inline const Scalar* data() const;
+    EIGEN_DEVICE_FUNC inline const Scalar* data() const;
-    inline Index innerStride() const;
+    EIGEN_DEVICE_FUNC inline Index innerStride() const;
-    inline Index outerStride() const;
+    EIGEN_DEVICE_FUNC inline Index outerStride() const;
    #endif
-    const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const 
+    EIGEN_DEVICE_FUNC
    const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const
    { 
      return m_xpr; 
    }
    EIGEN_DEVICE_FUNC
    XprType& nestedExpression() { return m_xpr; }
-    Index startRow() const 
+    EIGEN_DEVICE_FUNC
    StorageIndex startRow() const
    { 
      return m_startRow.value(); 
    }
-    Index startCol() const 
+    EIGEN_DEVICE_FUNC
    StorageIndex startCol() const
    { 
      return m_startCol.value(); 
    }
  protected:
-    const typename XprType::Nested m_xpr;
+    XprTypeNested m_xpr;
-    const internal::variable_if_dynamic<Index, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
+    const internal::variable_if_dynamic<StorageIndex, (XprType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
-    const internal::variable_if_dynamic<Index, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;
+    const internal::variable_if_dynamic<StorageIndex, (XprType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
-    const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_blockRows;
+    const internal::variable_if_dynamic<StorageIndex, RowsAtCompileTime> m_blockRows;
-    const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_blockCols;
+    const internal::variable_if_dynamic<StorageIndex, ColsAtCompileTime> m_blockCols;
 };
 /** \internal Internal implementation of dense Blocks in the direct access case.*/
@ -315,6 +330,10 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
  : public MapBase<Block<XprType, BlockRows, BlockCols, InnerPanel> >
 {
    typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
    typedef typename internal::ref_selector<XprType>::non_const_type XprTypeNested;
    enum {
      XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0
    };
  public:
    typedef MapBase<BlockType> Base;
@ -323,42 +342,52 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
    /** Column or Row constructor
      */
    EIGEN_DEVICE_FUNC
    inline BlockImpl_dense(XprType& xpr, Index i)
-      : Base(internal::const_cast_ptr(&xpr.coeffRef(
+      : Base(xpr.data() + i * (    ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && (!XprTypeIsRowMajor)) 
-              (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0,
+                                || ((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && ( XprTypeIsRowMajor)) ? xpr.innerStride() : xpr.outerStride()),
              (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0)),
             BlockRows==1 ? 1 : xpr.rows(),
             BlockCols==1 ? 1 : xpr.cols()),
-        m_xpr(xpr)
+        m_xpr(xpr),
        m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0),
        m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0)
    {
      init();
    }
    /** Fixed-size constructor
      */
    EIGEN_DEVICE_FUNC
    inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
-      : Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol))), m_xpr(xpr)
+      : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol)),
        m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
    {
      init();
    }
    /** Dynamic-size constructor
      */
    EIGEN_DEVICE_FUNC
    inline BlockImpl_dense(XprType& xpr,
          Index startRow, Index startCol,
          Index blockRows, Index blockCols)
-      : Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol)), blockRows, blockCols),
+      : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol), blockRows, blockCols),
-        m_xpr(xpr)
+        m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
    {
      init();
    }
-    const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const 
+    EIGEN_DEVICE_FUNC
    const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const
    { 
      return m_xpr; 
    }
    EIGEN_DEVICE_FUNC
    XprType& nestedExpression() { return m_xpr; }
    /** \sa MapBase::innerStride() */
    EIGEN_DEVICE_FUNC
    inline Index innerStride() const
    {
      return internal::traits<BlockType>::HasSameStorageOrderAsXprType
@ -367,11 +396,24 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
    }
    /** \sa MapBase::outerStride() */
    EIGEN_DEVICE_FUNC
    inline Index outerStride() const
    {
      return m_outerStride;
    }
    EIGEN_DEVICE_FUNC
    StorageIndex startRow() const
    {
      return m_startRow.value();
    }
    EIGEN_DEVICE_FUNC
    StorageIndex startCol() const
    {
      return m_startCol.value();
    }
  #ifndef __SUNPRO_CC
  // FIXME sunstudio is not friendly with the above friend...
  // META-FIXME there is no 'friend' keyword around here. Is this obsolete?
@ -380,6 +422,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
    #ifndef EIGEN_PARSED_BY_DOXYGEN
    /** \internal used by allowAligned() */
    EIGEN_DEVICE_FUNC
    inline BlockImpl_dense(XprType& xpr, const Scalar* data, Index blockRows, Index blockCols)
      : Base(data, blockRows, blockCols), m_xpr(xpr)
    {
@ -388,6 +431,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
    #endif
  protected:
    EIGEN_DEVICE_FUNC
    void init()
    {
      m_outerStride = internal::traits<BlockType>::HasSameStorageOrderAsXprType
@ -395,7 +439,9 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
                    : m_xpr.innerStride();
    }
-    typename XprType::Nested m_xpr;
+    XprTypeNested m_xpr;
    const internal::variable_if_dynamic<StorageIndex, (XprType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
    const internal::variable_if_dynamic<StorageIndex, (XprType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
    Index m_outerStride;
 };
--- a/eigenlib/Eigen/src/Core/BooleanRedux.h
+++ b/eigenlib/Eigen/src/Core/BooleanRedux.h
@ -17,9 +17,10 @@ namespace internal {
 template<typename Derived, int UnrollCount>
 struct all_unroller
 {
  typedef typename Derived::ExpressionTraits Traits;
  enum {
-    col = (UnrollCount-1) / Derived::RowsAtCompileTime,
+    col = (UnrollCount-1) / Traits::RowsAtCompileTime,
-    row = (UnrollCount-1) % Derived::RowsAtCompileTime
+    row = (UnrollCount-1) % Traits::RowsAtCompileTime
  };
  static inline bool run(const Derived &mat)
@ -43,11 +44,12 @@ struct all_unroller<Derived, Dynamic>
 template<typename Derived, int UnrollCount>
 struct any_unroller
 {
  typedef typename Derived::ExpressionTraits Traits;
  enum {
-    col = (UnrollCount-1) / Derived::RowsAtCompileTime,
+    col = (UnrollCount-1) / Traits::RowsAtCompileTime,
-    row = (UnrollCount-1) % Derived::RowsAtCompileTime
+    row = (UnrollCount-1) % Traits::RowsAtCompileTime
  };
-
+  
  static inline bool run(const Derived &mat)
  {
    return any_unroller<Derived, UnrollCount-1>::run(mat) || mat.coeff(row, col);
@ -78,19 +80,19 @@ struct any_unroller<Derived, Dynamic>
 template<typename Derived>
 inline bool DenseBase<Derived>::all() const
 {
  typedef internal::evaluator<Derived> Evaluator;
  enum {
    unroll = SizeAtCompileTime != Dynamic
-          && CoeffReadCost != Dynamic
+          && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
          && NumTraits<Scalar>::AddCost != Dynamic
          && SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
  };
  Evaluator evaluator(derived());
  if(unroll)
-    return internal::all_unroller<Derived, unroll ? int(SizeAtCompileTime) : Dynamic>::run(derived());
+    return internal::all_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(evaluator);
  else
  {
    for(Index j = 0; j < cols(); ++j)
      for(Index i = 0; i < rows(); ++i)
-        if (!coeff(i, j)) return false;
+        if (!evaluator.coeff(i, j)) return false;
    return true;
  }
 }
@ -102,19 +104,19 @@ inline bool DenseBase<Derived>::all() const
 template<typename Derived>
 inline bool DenseBase<Derived>::any() const
 {
  typedef internal::evaluator<Derived> Evaluator;
  enum {
    unroll = SizeAtCompileTime != Dynamic
-          && CoeffReadCost != Dynamic
+          && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
          && NumTraits<Scalar>::AddCost != Dynamic
          && SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
  };
  Evaluator evaluator(derived());
  if(unroll)
-    return internal::any_unroller<Derived, unroll ? int(SizeAtCompileTime) : Dynamic>::run(derived());
+    return internal::any_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(evaluator);
  else
  {
    for(Index j = 0; j < cols(); ++j)
      for(Index i = 0; i < rows(); ++i)
-        if (coeff(i, j)) return true;
+        if (evaluator.coeff(i, j)) return true;
    return false;
  }
 }
@ -124,7 +126,7 @@ inline bool DenseBase<Derived>::any() const
  * \sa all(), any()
  */
 template<typename Derived>
-inline typename DenseBase<Derived>::Index DenseBase<Derived>::count() const
+inline Eigen::Index DenseBase<Derived>::count() const
 {
  return derived().template cast<bool>().template cast<Index>().sum();
 }
@ -136,7 +138,11 @@ inline typename DenseBase<Derived>::Index DenseBase<Derived>::count() const
 template<typename Derived>
 inline bool DenseBase<Derived>::hasNaN() const
 {
 #if EIGEN_COMP_MSVC || (defined __FAST_MATH__)
  return derived().array().isNaN().any();
 #else
  return !((derived().array()==derived().array()).all());
 #endif
 }
 /** \returns true if \c *this contains only finite numbers, i.e., no NaN and no +/-INF values.
@ -146,7 +152,11 @@ inline bool DenseBase<Derived>::hasNaN() const
 template<typename Derived>
 inline bool DenseBase<Derived>::allFinite() const
 {
 #if EIGEN_COMP_MSVC || (defined __FAST_MATH__)
  return derived().array().isFinite().all();
 #else
  return !((derived()-derived()).hasNaN());
 #endif
 }
 } // end namespace Eigen
--- a/eigenlib/Eigen/src/Core/CMakeLists.txt
+++ b/eigenlib/Eigen/src/Core/CMakeLists.txt
@ -1,10 +0,0 @@
 FILE(GLOB Eigen_Core_SRCS "*.h")
 INSTALL(FILES
  ${Eigen_Core_SRCS}
  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core COMPONENT Devel
  )
 ADD_SUBDIRECTORY(products)
 ADD_SUBDIRECTORY(util)
 ADD_SUBDIRECTORY(arch)
--- a/eigenlib/Eigen/src/Core/CommaInitializer.h
+++ b/eigenlib/Eigen/src/Core/CommaInitializer.h
@ -22,14 +22,14 @@ namespace Eigen {
  * the return type of MatrixBase::operator<<, and most of the time this is the only
  * way it is used.
  *
-  * \sa \ref MatrixBaseCommaInitRef "MatrixBase::operator<<", CommaInitializer::finished()
+  * \sa \blank \ref MatrixBaseCommaInitRef "MatrixBase::operator<<", CommaInitializer::finished()
  */
 template<typename XprType>
 struct CommaInitializer
 {
  typedef typename XprType::Scalar Scalar;
  typedef typename XprType::Index Index;
  EIGEN_DEVICE_FUNC
  inline CommaInitializer(XprType& xpr, const Scalar& s)
    : m_xpr(xpr), m_row(0), m_col(1), m_currentBlockRows(1)
  {
@ -37,6 +37,7 @@ struct CommaInitializer
  }
  template<typename OtherDerived>
  EIGEN_DEVICE_FUNC
  inline CommaInitializer(XprType& xpr, const DenseBase<OtherDerived>& other)
    : m_xpr(xpr), m_row(0), m_col(other.cols()), m_currentBlockRows(other.rows())
  {
@ -46,6 +47,7 @@ struct CommaInitializer
  /* Copy/Move constructor which transfers ownership. This is crucial in 
   * absence of return value optimization to avoid assertions during destruction. */
  // FIXME in C++11 mode this could be replaced by a proper RValue constructor
  EIGEN_DEVICE_FUNC
  inline CommaInitializer(const CommaInitializer& o)
  : m_xpr(o.m_xpr), m_row(o.m_row), m_col(o.m_col), m_currentBlockRows(o.m_currentBlockRows) {
    // Mark original object as finished. In absence of R-value references we need to const_cast:
@ -55,6 +57,7 @@ struct CommaInitializer
  }
  /* inserts a scalar value in the target matrix */
  EIGEN_DEVICE_FUNC
  CommaInitializer& operator,(const Scalar& s)
  {
    if (m_col==m_xpr.cols())
@ -74,11 +77,10 @@ struct CommaInitializer
  /* inserts a matrix expression in the target matrix */
  template<typename OtherDerived>
  EIGEN_DEVICE_FUNC
  CommaInitializer& operator,(const DenseBase<OtherDerived>& other)
  {
-    if(other.cols()==0 || other.rows()==0)
+    if (m_col==m_xpr.cols() && (other.cols()!=0 || other.rows()!=m_currentBlockRows))
      return *this;
    if (m_col==m_xpr.cols())
    {
      m_row+=m_currentBlockRows;
      m_col = 0;
@ -86,24 +88,22 @@ struct CommaInitializer
      eigen_assert(m_row+m_currentBlockRows<=m_xpr.rows()
        && "Too many rows passed to comma initializer (operator<<)");
    }
-    eigen_assert(m_col<m_xpr.cols()
+    eigen_assert((m_col + other.cols() <= m_xpr.cols())
      && "Too many coefficients passed to comma initializer (operator<<)");
    eigen_assert(m_currentBlockRows==other.rows());
-    if (OtherDerived::SizeAtCompileTime != Dynamic)
+    m_xpr.template block<OtherDerived::RowsAtCompileTime, OtherDerived::ColsAtCompileTime>
-      m_xpr.template block<OtherDerived::RowsAtCompileTime != Dynamic ? OtherDerived::RowsAtCompileTime : 1,
+                    (m_row, m_col, other.rows(), other.cols()) = other;
                              OtherDerived::ColsAtCompileTime != Dynamic ? OtherDerived::ColsAtCompileTime : 1>
                    (m_row, m_col) = other;
    else
      m_xpr.block(m_row, m_col, other.rows(), other.cols()) = other;
    m_col += other.cols();
    return *this;
  }
  EIGEN_DEVICE_FUNC
  inline ~CommaInitializer()
 #if defined VERIFY_RAISES_ASSERT && (!defined EIGEN_NO_ASSERTION_CHECKING) && defined EIGEN_EXCEPTIONS
  EIGEN_EXCEPTION_SPEC(Eigen::eigen_assert_exception)
 #endif
  {
-    eigen_assert((m_row+m_currentBlockRows) == m_xpr.rows()
+      finished();
         && m_col == m_xpr.cols()
         && "Too few coefficients passed to comma initializer (operator<<)");
  }
  /** \returns the built matrix once all its coefficients have been set.
@ -113,9 +113,15 @@ struct CommaInitializer
    * quaternion.fromRotationMatrix((Matrix3f() << axis0, axis1, axis2).finished());
    * \endcode
    */
-  inline XprType& finished() { return m_xpr; }
+  EIGEN_DEVICE_FUNC
  inline XprType& finished() {
      eigen_assert(((m_row+m_currentBlockRows) == m_xpr.rows() || m_xpr.cols() == 0)
           && m_col == m_xpr.cols()
           && "Too few coefficients passed to comma initializer (operator<<)");
      return m_xpr;
  }
-  XprType& m_xpr;   // target expression
+  XprType& m_xpr;           // target expression
  Index m_row;              // current row id
  Index m_col;              // current col id
  Index m_currentBlockRows; // current block height
--- a/eigenlib/Eigen/src/Core/ConditionEstimator.h
+++ b/eigenlib/Eigen/src/Core/ConditionEstimator.h
@ -0,0 +1,175 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2016 Rasmus Munk Larsen (rmlarsen@google.com)
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_CONDITIONESTIMATOR_H
 #define EIGEN_CONDITIONESTIMATOR_H
 namespace Eigen {
 namespace internal {
 template <typename Vector, typename RealVector, bool IsComplex>
 struct rcond_compute_sign {
  static inline Vector run(const Vector& v) {
    const RealVector v_abs = v.cwiseAbs();
    return (v_abs.array() == static_cast<typename Vector::RealScalar>(0))
            .select(Vector::Ones(v.size()), v.cwiseQuotient(v_abs));
  }
 };
 // Partial specialization to avoid elementwise division for real vectors.
 template <typename Vector>
 struct rcond_compute_sign<Vector, Vector, false> {
  static inline Vector run(const Vector& v) {
    return (v.array() < static_cast<typename Vector::RealScalar>(0))
           .select(-Vector::Ones(v.size()), Vector::Ones(v.size()));
  }
 };
 /**
  * \returns an estimate of ||inv(matrix)||_1 given a decomposition of
  * \a matrix that implements .solve() and .adjoint().solve() methods.
  *
  * This function implements Algorithms 4.1 and 5.1 from
  *   http://www.maths.manchester.ac.uk/~higham/narep/narep135.pdf
  * which also forms the basis for the condition number estimators in
  * LAPACK. Since at most 10 calls to the solve method of dec are
  * performed, the total cost is O(dims^2), as opposed to O(dims^3)
  * needed to compute the inverse matrix explicitly.
  *
  * The most common usage is in estimating the condition number
  * ||matrix||_1 * ||inv(matrix)||_1. The first term ||matrix||_1 can be
  * computed directly in O(n^2) operations.
  *
  * Supports the following decompositions: FullPivLU, PartialPivLU, LDLT, and
  * LLT.
  *
  * \sa FullPivLU, PartialPivLU, LDLT, LLT.
  */
 template <typename Decomposition>
 typename Decomposition::RealScalar rcond_invmatrix_L1_norm_estimate(const Decomposition& dec)
 {
  typedef typename Decomposition::MatrixType MatrixType;
  typedef typename Decomposition::Scalar Scalar;
  typedef typename Decomposition::RealScalar RealScalar;
  typedef typename internal::plain_col_type<MatrixType>::type Vector;
  typedef typename internal::plain_col_type<MatrixType, RealScalar>::type RealVector;
  const bool is_complex = (NumTraits<Scalar>::IsComplex != 0);
  eigen_assert(dec.rows() == dec.cols());
  const Index n = dec.rows();
  if (n == 0)
    return 0;
  // Disable Index to float conversion warning
 #ifdef __INTEL_COMPILER
  #pragma warning push
  #pragma warning ( disable : 2259 )
 #endif
  Vector v = dec.solve(Vector::Ones(n) / Scalar(n));
 #ifdef __INTEL_COMPILER
  #pragma warning pop
 #endif
  // lower_bound is a lower bound on
  //   ||inv(matrix)||_1  = sup_v ||inv(matrix) v||_1 / ||v||_1
  // and is the objective maximized by the ("super-") gradient ascent
  // algorithm below.
  RealScalar lower_bound = v.template lpNorm<1>();
  if (n == 1)
    return lower_bound;
  // Gradient ascent algorithm follows: We know that the optimum is achieved at
  // one of the simplices v = e_i, so in each iteration we follow a
  // super-gradient to move towards the optimal one.
  RealScalar old_lower_bound = lower_bound;
  Vector sign_vector(n);
  Vector old_sign_vector;
  Index v_max_abs_index = -1;
  Index old_v_max_abs_index = v_max_abs_index;
  for (int k = 0; k < 4; ++k)
  {
    sign_vector = internal::rcond_compute_sign<Vector, RealVector, is_complex>::run(v);
    if (k > 0 && !is_complex && sign_vector == old_sign_vector) {
      // Break if the solution stagnated.
      break;
    }
    // v_max_abs_index = argmax |real( inv(matrix)^T * sign_vector )|
    v = dec.adjoint().solve(sign_vector);
    v.real().cwiseAbs().maxCoeff(&v_max_abs_index);
    if (v_max_abs_index == old_v_max_abs_index) {
      // Break if the solution stagnated.
      break;
    }
    // Move to the new simplex e_j, where j = v_max_abs_index.
    v = dec.solve(Vector::Unit(n, v_max_abs_index));  // v = inv(matrix) * e_j.
    lower_bound = v.template lpNorm<1>();
    if (lower_bound <= old_lower_bound) {
      // Break if the gradient step did not increase the lower_bound.
      break;
    }
    if (!is_complex) {
      old_sign_vector = sign_vector;
    }
    old_v_max_abs_index = v_max_abs_index;
    old_lower_bound = lower_bound;
  }
  // The following calculates an independent estimate of ||matrix||_1 by
  // multiplying matrix by a vector with entries of slowly increasing
  // magnitude and alternating sign:
  //   v_i = (-1)^{i} (1 + (i / (dim-1))), i = 0,...,dim-1.
  // This improvement to Hager's algorithm above is due to Higham. It was
  // added to make the algorithm more robust in certain corner cases where
  // large elements in the matrix might otherwise escape detection due to
  // exact cancellation (especially when op and op_adjoint correspond to a
  // sequence of backsubstitutions and permutations), which could cause
  // Hager's algorithm to vastly underestimate ||matrix||_1.
  Scalar alternating_sign(RealScalar(1));
  for (Index i = 0; i < n; ++i) {
    // The static_cast is needed when Scalar is a complex and RealScalar implements expression templates
    v[i] = alternating_sign * static_cast<RealScalar>(RealScalar(1) + (RealScalar(i) / (RealScalar(n - 1))));
    alternating_sign = -alternating_sign;
  }
  v = dec.solve(v);
  const RealScalar alternate_lower_bound = (2 * v.template lpNorm<1>()) / (3 * RealScalar(n));
  return numext::maxi(lower_bound, alternate_lower_bound);
 }
 /** \brief Reciprocal condition number estimator.
  *
  * Computing a decomposition of a dense matrix takes O(n^3) operations, while
  * this method estimates the condition number quickly and reliably in O(n^2)
  * operations.
  *
  * \returns an estimate of the reciprocal condition number
  * (1 / (||matrix||_1 * ||inv(matrix)||_1)) of matrix, given ||matrix||_1 and
  * its decomposition. Supports the following decompositions: FullPivLU,
  * PartialPivLU, LDLT, and LLT.
  *
  * \sa FullPivLU, PartialPivLU, LDLT, LLT.
  */
 template <typename Decomposition>
 typename Decomposition::RealScalar
 rcond_estimate_helper(typename Decomposition::RealScalar matrix_norm, const Decomposition& dec)
 {
  typedef typename Decomposition::RealScalar RealScalar;
  eigen_assert(dec.rows() == dec.cols());
  if (dec.rows() == 0)              return RealScalar(1);
  if (matrix_norm == RealScalar(0)) return RealScalar(0);
  if (dec.rows() == 1)              return RealScalar(1);
  const RealScalar inverse_matrix_norm = rcond_invmatrix_L1_norm_estimate(dec);
  return (inverse_matrix_norm == RealScalar(0) ? RealScalar(0)
                                               : (RealScalar(1) / inverse_matrix_norm) / matrix_norm);
 }
 }  // namespace internal
 }  // namespace Eigen
 #endif
--- a/eigenlib/Eigen/src/Core/CoreEvaluators.h
+++ b/eigenlib/Eigen/src/Core/CoreEvaluators.h
--- a/eigenlib/Eigen/src/Core/CoreIterators.h
+++ b/eigenlib/Eigen/src/Core/CoreIterators.h
@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@ -15,47 +15,113 @@ namespace Eigen {
 /* This file contains the respective InnerIterator definition of the expressions defined in Eigen/Core
 */
-/** \ingroup SparseCore_Module
+namespace internal {
-  * \class InnerIterator
+
-  * \brief An InnerIterator allows to loop over the element of a sparse (or dense) matrix or expression
+template<typename XprType, typename EvaluatorKind>
-  *
+class inner_iterator_selector;
-  * todo
+
 }
 /** \class InnerIterator
  * \brief An InnerIterator allows to loop over the element of any matrix expression.
  * 
  * \warning To be used with care because an evaluator is constructed every time an InnerIterator iterator is constructed.
  * 
  * TODO: add a usage example
  */
-
+template<typename XprType>
-// generic version for dense matrix and expressions
+class InnerIterator
 template<typename Derived> class DenseBase<Derived>::InnerIterator
 {
-  protected:
+protected:
-    typedef typename Derived::Scalar Scalar;
+  typedef internal::inner_iterator_selector<XprType, typename internal::evaluator_traits<XprType>::Kind> IteratorType;
-    typedef typename Derived::Index Index;
+  typedef internal::evaluator<XprType> EvaluatorType;
-
+  typedef typename internal::traits<XprType>::Scalar Scalar;
-    enum { IsRowMajor = (Derived::Flags&RowMajorBit)==RowMajorBit };
+public:
-  public:
+  /** Construct an iterator over the \a outerId -th row or column of \a xpr */
-    EIGEN_STRONG_INLINE InnerIterator(const Derived& expr, Index outer)
+  InnerIterator(const XprType &xpr, const Index &outerId)
-      : m_expression(expr), m_inner(0), m_outer(outer), m_end(expr.innerSize())
+    : m_eval(xpr), m_iter(m_eval, outerId, xpr.innerSize())
-    {}
+  {}
-
+  
-    EIGEN_STRONG_INLINE Scalar value() const
+  /// \returns the value of the current coefficient.
-    {
+  EIGEN_STRONG_INLINE Scalar value() const          { return m_iter.value(); }
-      return (IsRowMajor) ? m_expression.coeff(m_outer, m_inner)
+  /** Increment the iterator \c *this to the next non-zero coefficient.
-                          : m_expression.coeff(m_inner, m_outer);
+    * Explicit zeros are not skipped over. To skip explicit zeros, see class SparseView
-    }
+    */
-
+  EIGEN_STRONG_INLINE InnerIterator& operator++()   { m_iter.operator++(); return *this; }
-    EIGEN_STRONG_INLINE InnerIterator& operator++() { m_inner++; return *this; }
+  /// \returns the column or row index of the current coefficient.
-
+  EIGEN_STRONG_INLINE Index index() const           { return m_iter.index(); }
-    EIGEN_STRONG_INLINE Index index() const { return m_inner; }
+  /// \returns the row index of the current coefficient.
-    inline Index row() const { return IsRowMajor ? m_outer : index(); }
+  EIGEN_STRONG_INLINE Index row() const             { return m_iter.row(); }
-    inline Index col() const { return IsRowMajor ? index() : m_outer; }
+  /// \returns the column index of the current coefficient.
-
+  EIGEN_STRONG_INLINE Index col() const             { return m_iter.col(); }
-    EIGEN_STRONG_INLINE operator bool() const { return m_inner < m_end && m_inner>=0; }
+  /// \returns \c true if the iterator \c *this still references a valid coefficient.
-
+  EIGEN_STRONG_INLINE operator bool() const         { return m_iter; }
-  protected:
+  
-    const Derived& m_expression;
+protected:
-    Index m_inner;
+  EvaluatorType m_eval;
-    const Index m_outer;
+  IteratorType m_iter;
-    const Index m_end;
+private:
  // If you get here, then you're not using the right InnerIterator type, e.g.:
  //   SparseMatrix<double,RowMajor> A;
  //   SparseMatrix<double>::InnerIterator it(A,0);
  template<typename T> InnerIterator(const EigenBase<T>&,Index outer);
 };
 namespace internal {
 // Generic inner iterator implementation for dense objects
 template<typename XprType>
 class inner_iterator_selector<XprType, IndexBased>
 {
 protected:
  typedef evaluator<XprType> EvaluatorType;
  typedef typename traits<XprType>::Scalar Scalar;
  enum { IsRowMajor = (XprType::Flags&RowMajorBit)==RowMajorBit };
 public:
  EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &innerSize)
    : m_eval(eval), m_inner(0), m_outer(outerId), m_end(innerSize)
  {}
  EIGEN_STRONG_INLINE Scalar value() const
  {
    return (IsRowMajor) ? m_eval.coeff(m_outer, m_inner)
                        : m_eval.coeff(m_inner, m_outer);
  }
  EIGEN_STRONG_INLINE inner_iterator_selector& operator++() { m_inner++; return *this; }
  EIGEN_STRONG_INLINE Index index() const { return m_inner; }
  inline Index row() const { return IsRowMajor ? m_outer : index(); }
  inline Index col() const { return IsRowMajor ? index() : m_outer; }
  EIGEN_STRONG_INLINE operator bool() const { return m_inner < m_end && m_inner>=0; }
 protected:
  const EvaluatorType& m_eval;
  Index m_inner;
  const Index m_outer;
  const Index m_end;
 };
 // For iterator-based evaluator, inner-iterator is already implemented as
 // evaluator<>::InnerIterator
 template<typename XprType>
 class inner_iterator_selector<XprType, IteratorBased>
 : public evaluator<XprType>::InnerIterator
 {
 protected:
  typedef typename evaluator<XprType>::InnerIterator Base;
  typedef evaluator<XprType> EvaluatorType;
 public:
  EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &/*innerSize*/)
    : Base(eval, outerId)
  {}  
 };
 } // end namespace internal
 } // end namespace Eigen
 #endif // EIGEN_COREITERATORS_H
--- a/eigenlib/Eigen/src/Core/CwiseBinaryOp.h
+++ b/eigenlib/Eigen/src/Core/CwiseBinaryOp.h
@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
@ -13,26 +13,6 @@
 namespace Eigen {
 /** \class CwiseBinaryOp
  * \ingroup Core_Module
  *
  * \brief Generic expression where a coefficient-wise binary operator is applied to two expressions
  *
  * \param BinaryOp template functor implementing the operator
  * \param Lhs the type of the left-hand side
  * \param Rhs the type of the right-hand side
  *
  * This class represents an expression  where a coefficient-wise binary operator is applied to two expressions.
  * It is the return type of binary operators, by which we mean only those binary operators where
  * both the left-hand side and the right-hand side are Eigen expressions.
  * For example, the return type of matrix1+matrix2 is a CwiseBinaryOp.
  *
  * Most of the time, this is the only way that it is used, so you typically don't have to name
  * CwiseBinaryOp types explicitly.
  *
  * \sa MatrixBase::binaryExpr(const MatrixBase<OtherDerived> &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp
  */
 namespace internal {
 template<typename BinaryOp, typename Lhs, typename Rhs>
 struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
@ -52,76 +32,75 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
  // we still want to handle the case when the result type is different.
  typedef typename result_of<
                     BinaryOp(
-                       typename Lhs::Scalar,
+                       const typename Lhs::Scalar&,
-                       typename Rhs::Scalar
+                       const typename Rhs::Scalar&
                     )
                   >::type Scalar;
-  typedef typename promote_storage_type<typename traits<Lhs>::StorageKind,
+  typedef typename cwise_promote_storage_type<typename traits<Lhs>::StorageKind,
-                                           typename traits<Rhs>::StorageKind>::ret StorageKind;
+                                              typename traits<Rhs>::StorageKind,
-  typedef typename promote_index_type<typename traits<Lhs>::Index,
+                                              BinaryOp>::ret StorageKind;
-                                         typename traits<Rhs>::Index>::type Index;
+  typedef typename promote_index_type<typename traits<Lhs>::StorageIndex,
                                      typename traits<Rhs>::StorageIndex>::type StorageIndex;
  typedef typename Lhs::Nested LhsNested;
  typedef typename Rhs::Nested RhsNested;
  typedef typename remove_reference<LhsNested>::type _LhsNested;
  typedef typename remove_reference<RhsNested>::type _RhsNested;
  enum {
-    LhsCoeffReadCost = _LhsNested::CoeffReadCost,
+    Flags = cwise_promote_storage_order<typename traits<Lhs>::StorageKind,typename traits<Rhs>::StorageKind,_LhsNested::Flags & RowMajorBit,_RhsNested::Flags & RowMajorBit>::value
    RhsCoeffReadCost = _RhsNested::CoeffReadCost,
    LhsFlags = _LhsNested::Flags,
    RhsFlags = _RhsNested::Flags,
    SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value,
    StorageOrdersAgree = (int(Lhs::Flags)&RowMajorBit)==(int(Rhs::Flags)&RowMajorBit),
    Flags0 = (int(LhsFlags) | int(RhsFlags)) & (
        HereditaryBits
      | (int(LhsFlags) & int(RhsFlags) &
           ( AlignedBit
           | (StorageOrdersAgree ? LinearAccessBit : 0)
           | (functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)
           )
        )
     ),
    Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit),
    CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + functor_traits<BinaryOp>::Cost
  };
 };
 } // end namespace internal
 // we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor
 // that would take two operands of different types. If there were such an example, then this check should be
 // moved to the BinaryOp functors, on a per-case basis. This would however require a change in the BinaryOp functors, as
 // currently they take only one typename Scalar template parameter.
 // It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths.
 // So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to
 // add together a float matrix and a double matrix.
 #define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \
  EIGEN_STATIC_ASSERT((internal::functor_is_product_like<BINOP>::ret \
                        ? int(internal::scalar_product_traits<LHS, RHS>::Defined) \
                        : int(internal::is_same<LHS, RHS>::value)), \
    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
 template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>
 class CwiseBinaryOpImpl;
-template<typename BinaryOp, typename Lhs, typename Rhs>
+/** \class CwiseBinaryOp
-class CwiseBinaryOp : internal::no_assignment_operator,
+  * \ingroup Core_Module
  *
  * \brief Generic expression where a coefficient-wise binary operator is applied to two expressions
  *
  * \tparam BinaryOp template functor implementing the operator
  * \tparam LhsType the type of the left-hand side
  * \tparam RhsType the type of the right-hand side
  *
  * This class represents an expression  where a coefficient-wise binary operator is applied to two expressions.
  * It is the return type of binary operators, by which we mean only those binary operators where
  * both the left-hand side and the right-hand side are Eigen expressions.
  * For example, the return type of matrix1+matrix2 is a CwiseBinaryOp.
  *
  * Most of the time, this is the only way that it is used, so you typically don't have to name
  * CwiseBinaryOp types explicitly.
  *
  * \sa MatrixBase::binaryExpr(const MatrixBase<OtherDerived> &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp
  */
 template<typename BinaryOp, typename LhsType, typename RhsType>
 class CwiseBinaryOp : 
  public CwiseBinaryOpImpl<
-          BinaryOp, Lhs, Rhs,
+          BinaryOp, LhsType, RhsType,
-          typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind,
+          typename internal::cwise_promote_storage_type<typename internal::traits<LhsType>::StorageKind,
-                                           typename internal::traits<Rhs>::StorageKind>::ret>
+                                                        typename internal::traits<RhsType>::StorageKind,
                                                        BinaryOp>::ret>,
  internal::no_assignment_operator
 {
  public:
    typedef typename internal::remove_all<BinaryOp>::type Functor;
    typedef typename internal::remove_all<LhsType>::type Lhs;
    typedef typename internal::remove_all<RhsType>::type Rhs;
    typedef typename CwiseBinaryOpImpl<
-        BinaryOp, Lhs, Rhs,
+        BinaryOp, LhsType, RhsType,
-        typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind,
+        typename internal::cwise_promote_storage_type<typename internal::traits<LhsType>::StorageKind,
-                                         typename internal::traits<Rhs>::StorageKind>::ret>::Base Base;
+                                                      typename internal::traits<Rhs>::StorageKind,
                                                      BinaryOp>::ret>::Base Base;
    EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseBinaryOp)
-    typedef typename internal::nested<Lhs>::type LhsNested;
+    typedef typename internal::ref_selector<LhsType>::type LhsNested;
-    typedef typename internal::nested<Rhs>::type RhsNested;
+    typedef typename internal::ref_selector<RhsType>::type RhsNested;
    typedef typename internal::remove_reference<LhsNested>::type _LhsNested;
    typedef typename internal::remove_reference<RhsNested>::type _RhsNested;
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& aLhs, const Rhs& aRhs, const BinaryOp& func = BinaryOp())
      : m_lhs(aLhs), m_rhs(aRhs), m_functor(func)
    {
@ -131,6 +110,7 @@ class CwiseBinaryOp : internal::no_assignment_operator,
      eigen_assert(aLhs.rows() == aRhs.rows() && aLhs.cols() == aRhs.cols());
    }
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index rows() const {
      // return the fixed size type if available to enable compile time optimizations
      if (internal::traits<typename internal::remove_all<LhsNested>::type>::RowsAtCompileTime==Dynamic)
@ -138,6 +118,7 @@ class CwiseBinaryOp : internal::no_assignment_operator,
      else
        return m_lhs.rows();
    }
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index cols() const {
      // return the fixed size type if available to enable compile time optimizations
      if (internal::traits<typename internal::remove_all<LhsNested>::type>::ColsAtCompileTime==Dynamic)
@ -147,10 +128,13 @@ class CwiseBinaryOp : internal::no_assignment_operator,
    }
    /** \returns the left hand side nested expression */
    EIGEN_DEVICE_FUNC
    const _LhsNested& lhs() const { return m_lhs; }
    /** \returns the right hand side nested expression */
    EIGEN_DEVICE_FUNC
    const _RhsNested& rhs() const { return m_rhs; }
    /** \returns the functor representing the binary operation */
    EIGEN_DEVICE_FUNC
    const BinaryOp& functor() const { return m_functor; }
  protected:
@ -159,41 +143,13 @@ class CwiseBinaryOp : internal::no_assignment_operator,
    const BinaryOp m_functor;
 };
-template<typename BinaryOp, typename Lhs, typename Rhs>
+// Generic API dispatcher
-class CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Dense>
+template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>
-  : public internal::dense_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type
+class CwiseBinaryOpImpl
  : public internal::generic_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type
 {
-    typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> Derived;
+public:
-  public:
+  typedef typename internal::generic_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type Base;
    typedef typename internal::dense_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type Base;
    EIGEN_DENSE_PUBLIC_INTERFACE( Derived )
    EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const
    {
      return derived().functor()(derived().lhs().coeff(rowId, colId),
                                 derived().rhs().coeff(rowId, colId));
    }
    template<int LoadMode>
    EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const
    {
      return derived().functor().packetOp(derived().lhs().template packet<LoadMode>(rowId, colId),
                                          derived().rhs().template packet<LoadMode>(rowId, colId));
    }
    EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
    {
      return derived().functor()(derived().lhs().coeff(index),
                                 derived().rhs().coeff(index));
    }
    template<int LoadMode>
    EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
    {
      return derived().functor().packetOp(derived().lhs().template packet<LoadMode>(index),
                                          derived().rhs().template packet<LoadMode>(index));
    }
 };
 /** replaces \c *this by \c *this - \a other.
@ -205,8 +161,7 @@ template<typename OtherDerived>
 EIGEN_STRONG_INLINE Derived &
 MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other)
 {
-  SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, Derived, OtherDerived> tmp(derived());
+  call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
  tmp = other.derived();
  return derived();
 }
@ -219,11 +174,11 @@ template<typename OtherDerived>
 EIGEN_STRONG_INLINE Derived &
 MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
 {
-  SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, Derived, OtherDerived> tmp(derived());
+  call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
  tmp = other.derived();
  return derived();
 }
 } // end namespace Eigen
 #endif // EIGEN_CWISE_BINARY_OP_H
--- a/eigenlib/Eigen/src/Core/CwiseNullaryOp.h
+++ b/eigenlib/Eigen/src/Core/CwiseNullaryOp.h
@ -12,13 +12,24 @@
 namespace Eigen {
 namespace internal {
 template<typename NullaryOp, typename PlainObjectType>
 struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectType>
 {
  enum {
    Flags = traits<PlainObjectType>::Flags & RowMajorBit
  };
 };
 } // namespace internal
 /** \class CwiseNullaryOp
  * \ingroup Core_Module
  *
  * \brief Generic expression of a matrix where all coefficients are defined by a functor
  *
-  * \param NullaryOp template functor implementing the operator
+  * \tparam NullaryOp template functor implementing the operator
-  * \param PlainObjectType the underlying plain matrix/array type
+  * \tparam PlainObjectType the underlying plain matrix/array type
  *
  * This class represents an expression of a generic nullary operator.
  * It is the return type of the Ones(), Zero(), Constant(), Identity() and Random() methods,
@ -27,68 +38,49 @@ namespace Eigen {
  * However, if you want to write a function returning such an expression, you
  * will need to use this class.
  *
-  * \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr()
+  * The functor NullaryOp must expose one of the following method:
    <table class="manual">
    <tr            ><td>\c operator()() </td><td>if the procedural generation does not depend on the coefficient entries (e.g., random numbers)</td></tr>
    <tr class="alt"><td>\c operator()(Index i)</td><td>if the procedural generation makes sense for vectors only and that it depends on the coefficient index \c i (e.g., linspace) </td></tr>
    <tr            ><td>\c operator()(Index i,Index j)</td><td>if the procedural generation depends on the matrix coordinates \c i, \c j (e.g., to generate a checkerboard with 0 and 1)</td></tr>
    </table>
  * It is also possible to expose the last two operators if the generation makes sense for matrices but can be optimized for vectors.
  *
  * See DenseBase::NullaryExpr(Index,const CustomNullaryOp&) for an example binding
  * C++11 random number generators.
  *
  * A nullary expression can also be used to implement custom sophisticated matrix manipulations
  * that cannot be covered by the existing set of natively supported matrix manipulations.
  * See this \ref TopicCustomizing_NullaryExpr "page" for some examples and additional explanations
  * on the behavior of CwiseNullaryOp.
  *
  * \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr
  */
 namespace internal {
 template<typename NullaryOp, typename PlainObjectType>
-struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectType>
+class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type, internal::no_assignment_operator
 {
  enum {
    Flags = (traits<PlainObjectType>::Flags
      & (  HereditaryBits
         | (functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0)
         | (functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0)))
      | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit),
    CoeffReadCost = functor_traits<NullaryOp>::Cost
  };
 };
 }
 template<typename NullaryOp, typename PlainObjectType>
 class CwiseNullaryOp : internal::no_assignment_operator,
  public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type
 {
  public:
    typedef typename internal::dense_xpr_base<CwiseNullaryOp>::type Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(CwiseNullaryOp)
-    CwiseNullaryOp(Index nbRows, Index nbCols, const NullaryOp& func = NullaryOp())
+    EIGEN_DEVICE_FUNC
-      : m_rows(nbRows), m_cols(nbCols), m_functor(func)
+    CwiseNullaryOp(Index rows, Index cols, const NullaryOp& func = NullaryOp())
      : m_rows(rows), m_cols(cols), m_functor(func)
    {
-      eigen_assert(nbRows >= 0
+      eigen_assert(rows >= 0
-            && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == nbRows)
+            && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
-            &&  nbCols >= 0
+            &&  cols >= 0
-            && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == nbCols));
+            && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
    }
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index rows() const { return m_rows.value(); }
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index cols() const { return m_cols.value(); }
    EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const
    {
      return m_functor(rowId, colId);
    }
    template<int LoadMode>
    EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const
    {
      return m_functor.packetOp(rowId, colId);
    }
    EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
    {
      return m_functor(index);
    }
    template<int LoadMode>
    EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
    {
      return m_functor.packetOp(index);
    }
    /** \returns the functor representing the nullary operation */
    EIGEN_DEVICE_FUNC
    const NullaryOp& functor() const { return m_functor; }
  protected:
@ -113,10 +105,10 @@ class CwiseNullaryOp : internal::no_assignment_operator,
  */
 template<typename Derived>
 template<typename CustomNullaryOp>
-EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, Derived>
+EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
 DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func)
 {
-  return CwiseNullaryOp<CustomNullaryOp, Derived>(rows, cols, func);
+  return CwiseNullaryOp<CustomNullaryOp, PlainObject>(rows, cols, func);
 }
 /** \returns an expression of a matrix defined by a custom functor \a func
@ -132,16 +124,19 @@ DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& f
  *
  * The template parameter \a CustomNullaryOp is the type of the functor.
  *
  * Here is an example with C++11 random generators: \include random_cpp11.cpp
  * Output: \verbinclude random_cpp11.out
  * 
  * \sa class CwiseNullaryOp
  */
 template<typename Derived>
 template<typename CustomNullaryOp>
-EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, Derived>
+EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
 DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  if(RowsAtCompileTime == 1) return CwiseNullaryOp<CustomNullaryOp, Derived>(1, size, func);
+  if(RowsAtCompileTime == 1) return CwiseNullaryOp<CustomNullaryOp, PlainObject>(1, size, func);
-  else return CwiseNullaryOp<CustomNullaryOp, Derived>(size, 1, func);
+  else return CwiseNullaryOp<CustomNullaryOp, PlainObject>(size, 1, func);
 }
 /** \returns an expression of a matrix defined by a custom functor \a func
@ -155,19 +150,19 @@ DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func)
  */
 template<typename Derived>
 template<typename CustomNullaryOp>
-EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, Derived>
+EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
 DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func)
 {
-  return CwiseNullaryOp<CustomNullaryOp, Derived>(RowsAtCompileTime, ColsAtCompileTime, func);
+  return CwiseNullaryOp<CustomNullaryOp, PlainObject>(RowsAtCompileTime, ColsAtCompileTime, func);
 }
 /** \returns an expression of a constant matrix of value \a value
  *
-  * The parameters \a nbRows and \a nbCols are the number of rows and of columns of
+  * The parameters \a rows and \a cols are the number of rows and of columns of
  * the returned matrix. Must be compatible with this DenseBase type.
  *
  * This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
-  * it is redundant to pass \a nbRows and \a nbCols as arguments, so Zero() should be used
+  * it is redundant to pass \a rows and \a cols as arguments, so Zero() should be used
  * instead.
  *
  * The template parameter \a CustomNullaryOp is the type of the functor.
@ -176,9 +171,9 @@ DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func)
  */
 template<typename Derived>
 EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
-DenseBase<Derived>::Constant(Index nbRows, Index nbCols, const Scalar& value)
+DenseBase<Derived>::Constant(Index rows, Index cols, const Scalar& value)
 {
-  return DenseBase<Derived>::NullaryExpr(nbRows, nbCols, internal::scalar_constant_op<Scalar>(value));
+  return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_constant_op<Scalar>(value));
 }
 /** \returns an expression of a constant matrix of value \a value
@ -220,46 +215,33 @@ DenseBase<Derived>::Constant(const Scalar& value)
  return DenseBase<Derived>::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_constant_op<Scalar>(value));
 }
-/**
+/** \deprecated because of accuracy loss. In Eigen 3.3, it is an alias for LinSpaced(Index,const Scalar&,const Scalar&)
  * \brief Sets a linearly space vector.
  *
-  * The function generates 'size' equally spaced values in the closed interval [low,high].
+  * \sa LinSpaced(Index,Scalar,Scalar), setLinSpaced(Index,const Scalar&,const Scalar&)
  * This particular version of LinSpaced() uses sequential access, i.e. vector access is
  * assumed to be a(0), a(1), ..., a(size). This assumption allows for better vectorization
  * and yields faster code than the random access version.
  *
  * When size is set to 1, a vector of length 1 containing 'high' is returned.
  *
  * \only_for_vectors
  *
  * Example: \include DenseBase_LinSpaced_seq.cpp
  * Output: \verbinclude DenseBase_LinSpaced_seq.out
  *
  * \sa setLinSpaced(Index,const Scalar&,const Scalar&), LinSpaced(Index,Scalar,Scalar), CwiseNullaryOp
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE const typename DenseBase<Derived>::SequentialLinSpacedReturnType
+EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
 DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,false>(low,high,size));
+  return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar>(low,high,size));
 }
-/**
+/** \deprecated because of accuracy loss. In Eigen 3.3, it is an alias for LinSpaced(const Scalar&,const Scalar&)
-  * \copydoc DenseBase::LinSpaced(Sequential_t, Index, const Scalar&, const Scalar&)
+  *
-  * Special version for fixed size types which does not require the size parameter.
+  * \sa LinSpaced(Scalar,Scalar)
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE const typename DenseBase<Derived>::SequentialLinSpacedReturnType
+EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
 DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
-  return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,false>(low,high,Derived::SizeAtCompileTime));
+  return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar>(low,high,Derived::SizeAtCompileTime));
 }
 /**
-  * \brief Sets a linearly space vector.
+  * \brief Sets a linearly spaced vector.
  *
  * The function generates 'size' equally spaced values in the closed interval [low,high].
  * When size is set to 1, a vector of length 1 containing 'high' is returned.
@ -269,14 +251,24 @@ DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& hig
  * Example: \include DenseBase_LinSpaced.cpp
  * Output: \verbinclude DenseBase_LinSpaced.out
  *
-  * \sa setLinSpaced(Index,const Scalar&,const Scalar&), LinSpaced(Sequential_t,Index,const Scalar&,const Scalar&,Index), CwiseNullaryOp
+  * For integer scalar types, an even spacing is possible if and only if the length of the range,
  * i.e., \c high-low is a scalar multiple of \c size-1, or if \c size is a scalar multiple of the
  * number of values \c high-low+1 (meaning each value can be repeated the same number of time).
  * If one of these two considions is not satisfied, then \c high is lowered to the largest value
  * satisfying one of this constraint.
  * Here are some examples:
  *
  * Example: \include DenseBase_LinSpacedInt.cpp
  * Output: \verbinclude DenseBase_LinSpacedInt.out
  *
  * \sa setLinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp
  */
 template<typename Derived>
 EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
 DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,true>(low,high,size));
+  return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar>(low,high,size));
 }
 /**
@ -289,7 +281,7 @@ DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
-  return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,true>(low,high,Derived::SizeAtCompileTime));
+  return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar>(low,high,Derived::SizeAtCompileTime));
 }
 /** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */
@ -297,9 +289,10 @@ template<typename Derived>
 bool DenseBase<Derived>::isApproxToConstant
 (const Scalar& val, const RealScalar& prec) const
 {
  typename internal::nested_eval<Derived,1>::type self(derived());
  for(Index j = 0; j < cols(); ++j)
    for(Index i = 0; i < rows(); ++i)
-      if(!internal::isApprox(this->coeff(i, j), val, prec))
+      if(!internal::isApprox(self.coeff(i, j), val, prec))
        return false;
  return true;
 }
@ -324,7 +317,7 @@ EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
  setConstant(val);
 }
-/** Sets all coefficients in this expression to \a value.
+/** Sets all coefficients in this expression to value \a val.
  *
  * \sa fill(), setConstant(Index,const Scalar&), setConstant(Index,Index,const Scalar&), setZero(), setOnes(), Constant(), class CwiseNullaryOp, setZero(), setOnes()
  */
@ -334,7 +327,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
  return derived() = Constant(rows(), cols(), val);
 }
-/** Resizes to the given \a size, and sets all coefficients in this expression to the given \a value.
+/** Resizes to the given \a size, and sets all coefficients in this expression to the given value \a val.
  *
  * \only_for_vectors
  *
@ -351,10 +344,10 @@ PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
  return setConstant(val);
 }
-/** Resizes to the given size, and sets all coefficients in this expression to the given \a value.
+/** Resizes to the given size, and sets all coefficients in this expression to the given value \a val.
  *
-  * \param nbRows the new number of rows
+  * \param rows the new number of rows
-  * \param nbCols the new number of columns
+  * \param cols the new number of columns
  * \param val the value to which all coefficients are set
  *
  * Example: \include Matrix_setConstant_int_int.cpp
@ -364,14 +357,14 @@ PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
  */
 template<typename Derived>
 EIGEN_STRONG_INLINE Derived&
-PlainObjectBase<Derived>::setConstant(Index nbRows, Index nbCols, const Scalar& val)
+PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
 {
-  resize(nbRows, nbCols);
+  resize(rows, cols);
  return setConstant(val);
 }
 /**
-  * \brief Sets a linearly space vector.
+  * \brief Sets a linearly spaced vector.
  *
  * The function generates 'size' equally spaced values in the closed interval [low,high].
  * When size is set to 1, a vector of length 1 containing 'high' is returned.
@ -381,24 +374,30 @@ PlainObjectBase<Derived>::setConstant(Index nbRows, Index nbCols, const Scalar&
  * Example: \include DenseBase_setLinSpaced.cpp
  * Output: \verbinclude DenseBase_setLinSpaced.out
  *
-  * \sa CwiseNullaryOp
+  * For integer scalar types, do not miss the explanations on the definition
  * of \link LinSpaced(Index,const Scalar&,const Scalar&) even spacing \endlink.
  *
  * \sa LinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp
  */
 template<typename Derived>
 EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,false>(low,high,newSize));
+  return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,PacketScalar>(low,high,newSize));
 }
 /**
-  * \brief Sets a linearly space vector.
+  * \brief Sets a linearly spaced vector.
  *
-  * The function fill *this with equally spaced values in the closed interval [low,high].
+  * The function fills \c *this with equally spaced values in the closed interval [low,high].
  * When size is set to 1, a vector of length 1 containing 'high' is returned.
  *
  * \only_for_vectors
  *
-  * \sa setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp
+  * For integer scalar types, do not miss the explanations on the definition
  * of \link LinSpaced(Index,const Scalar&,const Scalar&) even spacing \endlink.
  *
  * \sa LinSpaced(Index,const Scalar&,const Scalar&), setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp
  */
 template<typename Derived>
 EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high)
@ -425,9 +424,9 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low,
  */
 template<typename Derived>
 EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
-DenseBase<Derived>::Zero(Index nbRows, Index nbCols)
+DenseBase<Derived>::Zero(Index rows, Index cols)
 {
-  return Constant(nbRows, nbCols, Scalar(0));
+  return Constant(rows, cols, Scalar(0));
 }
 /** \returns an expression of a zero vector.
@ -481,9 +480,10 @@ DenseBase<Derived>::Zero()
 template<typename Derived>
 bool DenseBase<Derived>::isZero(const RealScalar& prec) const
 {
  typename internal::nested_eval<Derived,1>::type self(derived());
  for(Index j = 0; j < cols(); ++j)
    for(Index i = 0; i < rows(); ++i)
-      if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast<Scalar>(1), prec))
+      if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast<Scalar>(1), prec))
        return false;
  return true;
 }
@ -520,8 +520,8 @@ PlainObjectBase<Derived>::setZero(Index newSize)
 /** Resizes to the given size, and sets all coefficients in this expression to zero.
  *
-  * \param nbRows the new number of rows
+  * \param rows the new number of rows
-  * \param nbCols the new number of columns
+  * \param cols the new number of columns
  *
  * Example: \include Matrix_setZero_int_int.cpp
  * Output: \verbinclude Matrix_setZero_int_int.out
@ -530,9 +530,9 @@ PlainObjectBase<Derived>::setZero(Index newSize)
  */
 template<typename Derived>
 EIGEN_STRONG_INLINE Derived&
-PlainObjectBase<Derived>::setZero(Index nbRows, Index nbCols)
+PlainObjectBase<Derived>::setZero(Index rows, Index cols)
 {
-  resize(nbRows, nbCols);
+  resize(rows, cols);
  return setConstant(Scalar(0));
 }
@ -540,7 +540,7 @@ PlainObjectBase<Derived>::setZero(Index nbRows, Index nbCols)
 /** \returns an expression of a matrix where all coefficients equal one.
  *
-  * The parameters \a nbRows and \a nbCols are the number of rows and of columns of
+  * The parameters \a rows and \a cols are the number of rows and of columns of
  * the returned matrix. Must be compatible with this MatrixBase type.
  *
  * This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
@ -554,9 +554,9 @@ PlainObjectBase<Derived>::setZero(Index nbRows, Index nbCols)
  */
 template<typename Derived>
 EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
-DenseBase<Derived>::Ones(Index nbRows, Index nbCols)
+DenseBase<Derived>::Ones(Index rows, Index cols)
 {
-  return Constant(nbRows, nbCols, Scalar(1));
+  return Constant(rows, cols, Scalar(1));
 }
 /** \returns an expression of a vector where all coefficients equal one.
@ -646,8 +646,8 @@ PlainObjectBase<Derived>::setOnes(Index newSize)
 /** Resizes to the given size, and sets all coefficients in this expression to one.
  *
-  * \param nbRows the new number of rows
+  * \param rows the new number of rows
-  * \param nbCols the new number of columns
+  * \param cols the new number of columns
  *
  * Example: \include Matrix_setOnes_int_int.cpp
  * Output: \verbinclude Matrix_setOnes_int_int.out
@ -656,9 +656,9 @@ PlainObjectBase<Derived>::setOnes(Index newSize)
  */
 template<typename Derived>
 EIGEN_STRONG_INLINE Derived&
-PlainObjectBase<Derived>::setOnes(Index nbRows, Index nbCols)
+PlainObjectBase<Derived>::setOnes(Index rows, Index cols)
 {
-  resize(nbRows, nbCols);
+  resize(rows, cols);
  return setConstant(Scalar(1));
 }
@ -666,7 +666,7 @@ PlainObjectBase<Derived>::setOnes(Index nbRows, Index nbCols)
 /** \returns an expression of the identity matrix (not necessarily square).
  *
-  * The parameters \a nbRows and \a nbCols are the number of rows and of columns of
+  * The parameters \a rows and \a cols are the number of rows and of columns of
  * the returned matrix. Must be compatible with this MatrixBase type.
  *
  * This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
@ -680,9 +680,9 @@ PlainObjectBase<Derived>::setOnes(Index nbRows, Index nbCols)
  */
 template<typename Derived>
 EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
-MatrixBase<Derived>::Identity(Index nbRows, Index nbCols)
+MatrixBase<Derived>::Identity(Index rows, Index cols)
 {
-  return DenseBase<Derived>::NullaryExpr(nbRows, nbCols, internal::scalar_identity_op<Scalar>());
+  return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_identity_op<Scalar>());
 }
 /** \returns an expression of the identity matrix (not necessarily square).
@ -716,18 +716,19 @@ template<typename Derived>
 bool MatrixBase<Derived>::isIdentity
 (const RealScalar& prec) const
 {
  typename internal::nested_eval<Derived,1>::type self(derived());
  for(Index j = 0; j < cols(); ++j)
  {
    for(Index i = 0; i < rows(); ++i)
    {
      if(i == j)
      {
-        if(!internal::isApprox(this->coeff(i, j), static_cast<Scalar>(1), prec))
+        if(!internal::isApprox(self.coeff(i, j), static_cast<Scalar>(1), prec))
          return false;
      }
      else
      {
-        if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast<RealScalar>(1), prec))
+        if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast<RealScalar>(1), prec))
          return false;
      }
    }
@ -740,6 +741,7 @@ namespace internal {
 template<typename Derived, bool Big = (Derived::SizeAtCompileTime>=16)>
 struct setIdentity_impl
 {
  EIGEN_DEVICE_FUNC
  static EIGEN_STRONG_INLINE Derived& run(Derived& m)
  {
    return m = Derived::Identity(m.rows(), m.cols());
@ -749,11 +751,11 @@ struct setIdentity_impl
 template<typename Derived>
 struct setIdentity_impl<Derived, true>
 {
-  typedef typename Derived::Index Index;
+  EIGEN_DEVICE_FUNC
  static EIGEN_STRONG_INLINE Derived& run(Derived& m)
  {
    m.setZero();
-    const Index size = (std::min)(m.rows(), m.cols());
+    const Index size = numext::mini(m.rows(), m.cols());
    for(Index i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1);
    return m;
  }
@ -776,8 +778,8 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
 /** \brief Resizes to the given size, and writes the identity expression (not necessarily square) into *this.
  *
-  * \param nbRows the new number of rows
+  * \param rows the new number of rows
-  * \param nbCols the new number of columns
+  * \param cols the new number of columns
  *
  * Example: \include Matrix_setIdentity_int_int.cpp
  * Output: \verbinclude Matrix_setIdentity_int_int.out
@ -785,9 +787,9 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
  * \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Identity()
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index nbRows, Index nbCols)
+EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index cols)
 {
-  derived().resize(nbRows, nbCols);
+  derived().resize(rows, cols);
  return setIdentity();
 }
--- a/eigenlib/Eigen/src/Core/CwiseTernaryOp.h
+++ b/eigenlib/Eigen/src/Core/CwiseTernaryOp.h
@ -0,0 +1,197 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 // Copyright (C) 2016 Eugene Brevdo <ebrevdo@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_CWISE_TERNARY_OP_H
 #define EIGEN_CWISE_TERNARY_OP_H
 namespace Eigen {
 namespace internal {
 template <typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>
 struct traits<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > {
  // we must not inherit from traits<Arg1> since it has
  // the potential to cause problems with MSVC
  typedef typename remove_all<Arg1>::type Ancestor;
  typedef typename traits<Ancestor>::XprKind XprKind;
  enum {
    RowsAtCompileTime = traits<Ancestor>::RowsAtCompileTime,
    ColsAtCompileTime = traits<Ancestor>::ColsAtCompileTime,
    MaxRowsAtCompileTime = traits<Ancestor>::MaxRowsAtCompileTime,
    MaxColsAtCompileTime = traits<Ancestor>::MaxColsAtCompileTime
  };
  // even though we require Arg1, Arg2, and Arg3 to have the same scalar type
  // (see CwiseTernaryOp constructor),
  // we still want to handle the case when the result type is different.
  typedef typename result_of<TernaryOp(
      const typename Arg1::Scalar&, const typename Arg2::Scalar&,
      const typename Arg3::Scalar&)>::type Scalar;
  typedef typename internal::traits<Arg1>::StorageKind StorageKind;
  typedef typename internal::traits<Arg1>::StorageIndex StorageIndex;
  typedef typename Arg1::Nested Arg1Nested;
  typedef typename Arg2::Nested Arg2Nested;
  typedef typename Arg3::Nested Arg3Nested;
  typedef typename remove_reference<Arg1Nested>::type _Arg1Nested;
  typedef typename remove_reference<Arg2Nested>::type _Arg2Nested;
  typedef typename remove_reference<Arg3Nested>::type _Arg3Nested;
  enum { Flags = _Arg1Nested::Flags & RowMajorBit };
 };
 }  // end namespace internal
 template <typename TernaryOp, typename Arg1, typename Arg2, typename Arg3,
          typename StorageKind>
 class CwiseTernaryOpImpl;
 /** \class CwiseTernaryOp
  * \ingroup Core_Module
  *
  * \brief Generic expression where a coefficient-wise ternary operator is
 * applied to two expressions
  *
  * \tparam TernaryOp template functor implementing the operator
  * \tparam Arg1Type the type of the first argument
  * \tparam Arg2Type the type of the second argument
  * \tparam Arg3Type the type of the third argument
  *
  * This class represents an expression where a coefficient-wise ternary
 * operator is applied to three expressions.
  * It is the return type of ternary operators, by which we mean only those
 * ternary operators where
  * all three arguments are Eigen expressions.
  * For example, the return type of betainc(matrix1, matrix2, matrix3) is a
 * CwiseTernaryOp.
  *
  * Most of the time, this is the only way that it is used, so you typically
 * don't have to name
  * CwiseTernaryOp types explicitly.
  *
  * \sa MatrixBase::ternaryExpr(const MatrixBase<Argument2> &, const
 * MatrixBase<Argument3> &, const CustomTernaryOp &) const, class CwiseBinaryOp,
 * class CwiseUnaryOp, class CwiseNullaryOp
  */
 template <typename TernaryOp, typename Arg1Type, typename Arg2Type,
          typename Arg3Type>
 class CwiseTernaryOp : public CwiseTernaryOpImpl<
                           TernaryOp, Arg1Type, Arg2Type, Arg3Type,
                           typename internal::traits<Arg1Type>::StorageKind>,
                       internal::no_assignment_operator
 {
 public:
  typedef typename internal::remove_all<Arg1Type>::type Arg1;
  typedef typename internal::remove_all<Arg2Type>::type Arg2;
  typedef typename internal::remove_all<Arg3Type>::type Arg3;
  typedef typename CwiseTernaryOpImpl<
      TernaryOp, Arg1Type, Arg2Type, Arg3Type,
      typename internal::traits<Arg1Type>::StorageKind>::Base Base;
  EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseTernaryOp)
  typedef typename internal::ref_selector<Arg1Type>::type Arg1Nested;
  typedef typename internal::ref_selector<Arg2Type>::type Arg2Nested;
  typedef typename internal::ref_selector<Arg3Type>::type Arg3Nested;
  typedef typename internal::remove_reference<Arg1Nested>::type _Arg1Nested;
  typedef typename internal::remove_reference<Arg2Nested>::type _Arg2Nested;
  typedef typename internal::remove_reference<Arg3Nested>::type _Arg3Nested;
  EIGEN_DEVICE_FUNC
  EIGEN_STRONG_INLINE CwiseTernaryOp(const Arg1& a1, const Arg2& a2,
                                     const Arg3& a3,
                                     const TernaryOp& func = TernaryOp())
      : m_arg1(a1), m_arg2(a2), m_arg3(a3), m_functor(func) {
    // require the sizes to match
    EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg2)
    EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg3)
    // The index types should match
    EIGEN_STATIC_ASSERT((internal::is_same<
                         typename internal::traits<Arg1Type>::StorageKind,
                         typename internal::traits<Arg2Type>::StorageKind>::value),
                        STORAGE_KIND_MUST_MATCH)
    EIGEN_STATIC_ASSERT((internal::is_same<
                         typename internal::traits<Arg1Type>::StorageKind,
                         typename internal::traits<Arg3Type>::StorageKind>::value),
                        STORAGE_KIND_MUST_MATCH)
    eigen_assert(a1.rows() == a2.rows() && a1.cols() == a2.cols() &&
                 a1.rows() == a3.rows() && a1.cols() == a3.cols());
  }
  EIGEN_DEVICE_FUNC
  EIGEN_STRONG_INLINE Index rows() const {
    // return the fixed size type if available to enable compile time
    // optimizations
    if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
                RowsAtCompileTime == Dynamic &&
        internal::traits<typename internal::remove_all<Arg2Nested>::type>::
                RowsAtCompileTime == Dynamic)
      return m_arg3.rows();
    else if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
                     RowsAtCompileTime == Dynamic &&
             internal::traits<typename internal::remove_all<Arg3Nested>::type>::
                     RowsAtCompileTime == Dynamic)
      return m_arg2.rows();
    else
      return m_arg1.rows();
  }
  EIGEN_DEVICE_FUNC
  EIGEN_STRONG_INLINE Index cols() const {
    // return the fixed size type if available to enable compile time
    // optimizations
    if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
                ColsAtCompileTime == Dynamic &&
        internal::traits<typename internal::remove_all<Arg2Nested>::type>::
                ColsAtCompileTime == Dynamic)
      return m_arg3.cols();
    else if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
                     ColsAtCompileTime == Dynamic &&
             internal::traits<typename internal::remove_all<Arg3Nested>::type>::
                     ColsAtCompileTime == Dynamic)
      return m_arg2.cols();
    else
      return m_arg1.cols();
  }
  /** \returns the first argument nested expression */
  EIGEN_DEVICE_FUNC
  const _Arg1Nested& arg1() const { return m_arg1; }
  /** \returns the first argument nested expression */
  EIGEN_DEVICE_FUNC
  const _Arg2Nested& arg2() const { return m_arg2; }
  /** \returns the third argument nested expression */
  EIGEN_DEVICE_FUNC
  const _Arg3Nested& arg3() const { return m_arg3; }
  /** \returns the functor representing the ternary operation */
  EIGEN_DEVICE_FUNC
  const TernaryOp& functor() const { return m_functor; }
 protected:
  Arg1Nested m_arg1;
  Arg2Nested m_arg2;
  Arg3Nested m_arg3;
  const TernaryOp m_functor;
 };
 // Generic API dispatcher
 template <typename TernaryOp, typename Arg1, typename Arg2, typename Arg3,
          typename StorageKind>
 class CwiseTernaryOpImpl
    : public internal::generic_xpr_base<
          CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >::type {
 public:
  typedef typename internal::generic_xpr_base<
      CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >::type Base;
 };
 }  // end namespace Eigen
 #endif  // EIGEN_CWISE_TERNARY_OP_H
--- a/eigenlib/Eigen/src/Core/CwiseUnaryOp.h
+++ b/eigenlib/Eigen/src/Core/CwiseUnaryOp.h
@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
@ -13,13 +13,32 @@
 namespace Eigen { 
 namespace internal {
 template<typename UnaryOp, typename XprType>
 struct traits<CwiseUnaryOp<UnaryOp, XprType> >
 : traits<XprType>
 {
  typedef typename result_of<
                     UnaryOp(const typename XprType::Scalar&)
                   >::type Scalar;
  typedef typename XprType::Nested XprTypeNested;
  typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
  enum {
    Flags = _XprTypeNested::Flags & RowMajorBit 
  };
 };
 }
 template<typename UnaryOp, typename XprType, typename StorageKind>
 class CwiseUnaryOpImpl;
 /** \class CwiseUnaryOp
  * \ingroup Core_Module
  *
  * \brief Generic expression where a coefficient-wise unary operator is applied to an expression
  *
-  * \param UnaryOp template functor implementing the operator
+  * \tparam UnaryOp template functor implementing the operator
-  * \param XprType the type of the expression to which we are applying the unary operator
+  * \tparam XprType the type of the expression to which we are applying the unary operator
  *
  * This class represents an expression where a unary operator is applied to an expression.
  * It is the return type of all operations taking exactly 1 input expression, regardless of the
@ -32,93 +51,51 @@ namespace Eigen {
  *
  * \sa MatrixBase::unaryExpr(const CustomUnaryOp &) const, class CwiseBinaryOp, class CwiseNullaryOp
  */
 namespace internal {
 template<typename UnaryOp, typename XprType>
-struct traits<CwiseUnaryOp<UnaryOp, XprType> >
+class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal::traits<XprType>::StorageKind>, internal::no_assignment_operator
 : traits<XprType>
 {
  typedef typename result_of<
                     UnaryOp(typename XprType::Scalar)
                   >::type Scalar;
  typedef typename XprType::Nested XprTypeNested;
  typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
  enum {
    Flags = _XprTypeNested::Flags & (
      HereditaryBits | LinearAccessBit | AlignedBit
      | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)),
    CoeffReadCost = _XprTypeNested::CoeffReadCost + functor_traits<UnaryOp>::Cost
  };
 };
 }
 template<typename UnaryOp, typename XprType, typename StorageKind>
 class CwiseUnaryOpImpl;
 template<typename UnaryOp, typename XprType>
 class CwiseUnaryOp : internal::no_assignment_operator,
  public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal::traits<XprType>::StorageKind>
 {
  public:
    typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base;
    EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp)
    typedef typename internal::ref_selector<XprType>::type XprTypeNested;
    typedef typename internal::remove_all<XprType>::type NestedExpression;
-    inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    explicit CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
      : m_xpr(xpr), m_functor(func) {}
-    EIGEN_STRONG_INLINE Index rows() const { return m_xpr.rows(); }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    EIGEN_STRONG_INLINE Index cols() const { return m_xpr.cols(); }
+    Index rows() const { return m_xpr.rows(); }
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    Index cols() const { return m_xpr.cols(); }
    /** \returns the functor representing the unary operation */
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    const UnaryOp& functor() const { return m_functor; }
    /** \returns the nested expression */
-    const typename internal::remove_all<typename XprType::Nested>::type&
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    const typename internal::remove_all<XprTypeNested>::type&
    nestedExpression() const { return m_xpr; }
    /** \returns the nested expression */
-    typename internal::remove_all<typename XprType::Nested>::type&
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    nestedExpression() { return m_xpr.const_cast_derived(); }
+    typename internal::remove_all<XprTypeNested>::type&
    nestedExpression() { return m_xpr; }
  protected:
-    typename XprType::Nested m_xpr;
+    XprTypeNested m_xpr;
    const UnaryOp m_functor;
 };
-// This is the generic implementation for dense storage.
+// Generic API dispatcher
-// It can be used for any expression types implementing the dense concept.
+template<typename UnaryOp, typename XprType, typename StorageKind>
-template<typename UnaryOp, typename XprType>
+class CwiseUnaryOpImpl
-class CwiseUnaryOpImpl<UnaryOp,XprType,Dense>
+  : public internal::generic_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type
  : public internal::dense_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type
 {
-  public:
+public:
-
+  typedef typename internal::generic_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type Base;
    typedef CwiseUnaryOp<UnaryOp, XprType> Derived;
    typedef typename internal::dense_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
    EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const
    {
      return derived().functor()(derived().nestedExpression().coeff(rowId, colId));
    }
    template<int LoadMode>
    EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const
    {
      return derived().functor().packetOp(derived().nestedExpression().template packet<LoadMode>(rowId, colId));
    }
    EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
    {
      return derived().functor()(derived().nestedExpression().coeff(index));
    }
    template<int LoadMode>
    EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
    {
      return derived().functor().packetOp(derived().nestedExpression().template packet<LoadMode>(index));
    }
 };
 } // end namespace Eigen
--- a/eigenlib/Eigen/src/Core/CwiseUnaryView.h
+++ b/eigenlib/Eigen/src/Core/CwiseUnaryView.h
@ -12,33 +12,19 @@
 namespace Eigen {
 /** \class CwiseUnaryView
  * \ingroup Core_Module
  *
  * \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector
  *
  * \param ViewOp template functor implementing the view
  * \param MatrixType the type of the matrix we are applying the unary operator
  *
  * This class represents a lvalue expression of a generic unary view operator of a matrix or a vector.
  * It is the return type of real() and imag(), and most of the time this is the only way it is used.
  *
  * \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp
  */
 namespace internal {
 template<typename ViewOp, typename MatrixType>
 struct traits<CwiseUnaryView<ViewOp, MatrixType> >
 : traits<MatrixType>
 {
  typedef typename result_of<
-                     ViewOp(typename traits<MatrixType>::Scalar)
+                     ViewOp(const typename traits<MatrixType>::Scalar&)
                   >::type Scalar;
  typedef typename MatrixType::Nested MatrixTypeNested;
  typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
  enum {
-    Flags = (traits<_MatrixTypeNested>::Flags & (HereditaryBits | LvalueBit | LinearAccessBit | DirectAccessBit)),
+    FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
-    CoeffReadCost = traits<_MatrixTypeNested>::CoeffReadCost + functor_traits<ViewOp>::Cost,
+    Flags = traits<_MatrixTypeNested>::Flags & (RowMajorBit | FlagsLvalueBit | DirectAccessBit), // FIXME DirectAccessBit should not be handled by expressions
    MatrixTypeInnerStride =  inner_stride_at_compile_time<MatrixType>::ret,
    // need to cast the sizeof's from size_t to int explicitly, otherwise:
    // "error: no integral type can represent all of the enumerator values
@ -55,6 +41,19 @@ struct traits<CwiseUnaryView<ViewOp, MatrixType> >
 template<typename ViewOp, typename MatrixType, typename StorageKind>
 class CwiseUnaryViewImpl;
 /** \class CwiseUnaryView
  * \ingroup Core_Module
  *
  * \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector
  *
  * \tparam ViewOp template functor implementing the view
  * \tparam MatrixType the type of the matrix we are applying the unary operator
  *
  * This class represents a lvalue expression of a generic unary view operator of a matrix or a vector.
  * It is the return type of real() and imag(), and most of the time this is the only way it is used.
  *
  * \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp
  */
 template<typename ViewOp, typename MatrixType>
 class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename internal::traits<MatrixType>::StorageKind>
 {
@ -62,8 +61,10 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in
    typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
    EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView)
    typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
    typedef typename internal::remove_all<MatrixType>::type NestedExpression;
-    inline CwiseUnaryView(const MatrixType& mat, const ViewOp& func = ViewOp())
+    explicit inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp())
      : m_matrix(mat), m_functor(func) {}
    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryView)
@ -75,19 +76,27 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in
    const ViewOp& functor() const { return m_functor; }
    /** \returns the nested expression */
-    const typename internal::remove_all<typename MatrixType::Nested>::type&
+    const typename internal::remove_all<MatrixTypeNested>::type&
    nestedExpression() const { return m_matrix; }
    /** \returns the nested expression */
-    typename internal::remove_all<typename MatrixType::Nested>::type&
+    typename internal::remove_reference<MatrixTypeNested>::type&
    nestedExpression() { return m_matrix.const_cast_derived(); }
  protected:
-    // FIXME changed from MatrixType::Nested because of a weird compilation error with sun CC
+    MatrixTypeNested m_matrix;
    typename internal::nested<MatrixType>::type m_matrix;
    ViewOp m_functor;
 };
 // Generic API dispatcher
 template<typename ViewOp, typename XprType, typename StorageKind>
 class CwiseUnaryViewImpl
  : public internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType> >::type
 {
 public:
  typedef typename internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType> >::type Base;
 };
 template<typename ViewOp, typename MatrixType>
 class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
  : public internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type
@ -100,38 +109,18 @@ class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
    EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl)
-    inline Scalar* data() { return &coeffRef(0); }
+    EIGEN_DEVICE_FUNC inline Scalar* data() { return &(this->coeffRef(0)); }
-    inline const Scalar* data() const { return &coeff(0); }
+    EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(this->coeff(0)); }
-    inline Index innerStride() const
+    EIGEN_DEVICE_FUNC inline Index innerStride() const
    {
      return derived().nestedExpression().innerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
    }
-    inline Index outerStride() const
+    EIGEN_DEVICE_FUNC inline Index outerStride() const
    {
      return derived().nestedExpression().outerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
    }
    EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const
    {
      return derived().functor()(derived().nestedExpression().coeff(row, col));
    }
    EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
    {
      return derived().functor()(derived().nestedExpression().coeff(index));
    }
    EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col)
    {
      return derived().functor()(const_cast_derived().nestedExpression().coeffRef(row, col));
    }
    EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
    {
      return derived().functor()(const_cast_derived().nestedExpression().coeffRef(index));
    }
 };
 } // end namespace Eigen
--- a/eigenlib/Eigen/src/Core/DenseBase.h
+++ b/eigenlib/Eigen/src/Core/DenseBase.h
@ -34,37 +34,45 @@ static inline void check_DenseIndex_is_signed() {
  * \tparam Derived is the derived type, e.g., a matrix type or an expression.
  *
  * This class can be extended with the help of the plugin mechanism described on the page
-  * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_DENSEBASE_PLUGIN.
+  * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_DENSEBASE_PLUGIN.
  *
-  * \sa \ref TopicClassHierarchy
+  * \sa \blank \ref TopicClassHierarchy
  */
 template<typename Derived> class DenseBase
 #ifndef EIGEN_PARSED_BY_DOXYGEN
  : public internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
                                     typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>
 #else
  : public DenseCoeffsBase<Derived>
 #else
  : public DenseCoeffsBase<Derived,DirectWriteAccessors>
 #endif // not EIGEN_PARSED_BY_DOXYGEN
 {
  public:
    using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
                typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator*;
-    class InnerIterator;
+    /** Inner iterator type to iterate over the coefficients of a row or column.
      * \sa class InnerIterator
      */
    typedef Eigen::InnerIterator<Derived> InnerIterator;
    typedef typename internal::traits<Derived>::StorageKind StorageKind;
-    /** \brief The type of indices 
+    /**
-      * \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE.
+      * \brief The type used to store indices
-      * \sa \ref TopicPreprocessorDirectives.
+      * \details This typedef is relevant for types that store multiple indices such as
-      */
+      *          PermutationMatrix or Transpositions, otherwise it defaults to Eigen::Index
-    typedef typename internal::traits<Derived>::Index Index; 
+      * \sa \blank \ref TopicPreprocessorDirectives, Eigen::Index, SparseMatrixBase.
     */
    typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
    /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex<float>, etc. */
    typedef typename internal::traits<Derived>::Scalar Scalar;
-    typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+    
    /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex<float>, etc.
      *
      * It is an alias for the Scalar type */
    typedef Scalar value_type;
    typedef typename NumTraits<Scalar>::Real RealScalar;
    typedef DenseCoeffsBase<Derived> Base;
    using Base::derived;
    using Base::const_cast_derived;
    using Base::rows;
@ -74,16 +82,6 @@ template<typename Derived> class DenseBase
    using Base::colIndexByOuterInner;
    using Base::coeff;
    using Base::coeffByOuterInner;
    using Base::packet;
    using Base::packetByOuterInner;
    using Base::writePacket;
    using Base::writePacketByOuterInner;
    using Base::coeffRef;
    using Base::coeffRefByOuterInner;
    using Base::copyCoeff;
    using Base::copyCoeffByOuterInner;
    using Base::copyPacket;
    using Base::copyPacketByOuterInner;
    using Base::operator();
    using Base::operator[];
    using Base::x;
@ -169,30 +167,54 @@ template<typename Derived> class DenseBase
      InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? int(SizeAtCompileTime)
                             : int(IsRowMajor) ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
      CoeffReadCost = internal::traits<Derived>::CoeffReadCost,
        /**< This is a rough measure of how expensive it is to read one coefficient from
          * this expression.
          */
      InnerStrideAtCompileTime = internal::inner_stride_at_compile_time<Derived>::ret,
      OuterStrideAtCompileTime = internal::outer_stride_at_compile_time<Derived>::ret
    };
    typedef typename internal::find_best_packet<Scalar,SizeAtCompileTime>::type PacketScalar;
-    enum { ThisConstantIsPrivateInPlainObjectBase };
+    enum { IsPlainObjectBase = 0 };
    /** The plain matrix type corresponding to this expression.
      * \sa PlainObject */
    typedef Matrix<typename internal::traits<Derived>::Scalar,
                internal::traits<Derived>::RowsAtCompileTime,
                internal::traits<Derived>::ColsAtCompileTime,
                AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
                internal::traits<Derived>::MaxRowsAtCompileTime,
                internal::traits<Derived>::MaxColsAtCompileTime
          > PlainMatrix;
    /** The plain array type corresponding to this expression.
      * \sa PlainObject */
    typedef Array<typename internal::traits<Derived>::Scalar,
                internal::traits<Derived>::RowsAtCompileTime,
                internal::traits<Derived>::ColsAtCompileTime,
                AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
                internal::traits<Derived>::MaxRowsAtCompileTime,
                internal::traits<Derived>::MaxColsAtCompileTime
          > PlainArray;
    /** \brief The plain matrix or array type corresponding to this expression.
      *
      * This is not necessarily exactly the return type of eval(). In the case of plain matrices,
      * the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed
      * that the return type of eval() is either PlainObject or const PlainObject&.
      */
    typedef typename internal::conditional<internal::is_same<typename internal::traits<Derived>::XprKind,MatrixXpr >::value,
                                 PlainMatrix, PlainArray>::type PlainObject;
    /** \returns the number of nonzero coefficients which is in practice the number
      * of stored coefficients. */
    EIGEN_DEVICE_FUNC
    inline Index nonZeros() const { return size(); }
    /** \returns true if either the number of rows or the number of columns is equal to 1.
      * In other words, this function returns
      * \code rows()==1 || cols()==1 \endcode
      * \sa rows(), cols(), IsVectorAtCompileTime. */
    /** \returns the outer size.
      *
      * \note For a vector, this returns just 1. For a matrix (non-vector), this is the major dimension
      * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of columns for a
      * column-major matrix, and the number of rows for a row-major matrix. */
    EIGEN_DEVICE_FUNC
    Index outerSize() const
    {
      return IsVectorAtCompileTime ? 1
@ -204,6 +226,7 @@ template<typename Derived> class DenseBase
      * \note For a vector, this is just the size. For a matrix (non-vector), this is the minor dimension
      * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of rows for a 
      * column-major matrix, and the number of columns for a row-major matrix. */
    EIGEN_DEVICE_FUNC
    Index innerSize() const
    {
      return IsVectorAtCompileTime ? this->size()
@ -214,6 +237,7 @@ template<typename Derived> class DenseBase
      * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does
      * nothing else.
      */
    EIGEN_DEVICE_FUNC
    void resize(Index newSize)
    {
      EIGEN_ONLY_USED_FOR_DEBUG(newSize);
@ -224,22 +248,22 @@ template<typename Derived> class DenseBase
      * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does
      * nothing else.
      */
-    void resize(Index nbRows, Index nbCols)
+    EIGEN_DEVICE_FUNC
    void resize(Index rows, Index cols)
    {
-      EIGEN_ONLY_USED_FOR_DEBUG(nbRows);
+      EIGEN_ONLY_USED_FOR_DEBUG(rows);
-      EIGEN_ONLY_USED_FOR_DEBUG(nbCols);
+      EIGEN_ONLY_USED_FOR_DEBUG(cols);
-      eigen_assert(nbRows == this->rows() && nbCols == this->cols()
+      eigen_assert(rows == this->rows() && cols == this->cols()
                && "DenseBase::resize() does not actually allow to resize.");
    }
 #ifndef EIGEN_PARSED_BY_DOXYGEN
    /** \internal Represents a matrix with all coefficients equal to one another*/
-    typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType;
+    typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
-    /** \internal Represents a vector with linearly spaced coefficients that allows sequential access only. */
+    /** \internal \deprecated Represents a vector with linearly spaced coefficients that allows sequential access only. */
-    typedef CwiseNullaryOp<internal::linspaced_op<Scalar,false>,Derived> SequentialLinSpacedReturnType;
+    typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar>,PlainObject> SequentialLinSpacedReturnType;
    /** \internal Represents a vector with linearly spaced coefficients that allows random access. */
-    typedef CwiseNullaryOp<internal::linspaced_op<Scalar,true>,Derived> RandomAccessLinSpacedReturnType;
+    typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar>,PlainObject> RandomAccessLinSpacedReturnType;
    /** \internal the return type of MatrixBase::eigenvalues() */
    typedef Matrix<typename NumTraits<typename internal::traits<Derived>::Scalar>::Real, internal::traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
@ -247,120 +271,133 @@ template<typename Derived> class DenseBase
    /** Copies \a other into *this. \returns a reference to *this. */
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    Derived& operator=(const DenseBase<OtherDerived>& other);
    /** Special case of the template operator=, in order to prevent the compiler
      * from generating a default operator= (issue hit with g++ 4.1)
      */
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    Derived& operator=(const DenseBase& other);
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    Derived& operator=(const EigenBase<OtherDerived> &other);
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    Derived& operator+=(const EigenBase<OtherDerived> &other);
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    Derived& operator-=(const EigenBase<OtherDerived> &other);
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    Derived& operator=(const ReturnByValue<OtherDerived>& func);
-    /** \internal Copies \a other into *this without evaluating other. \returns a reference to *this. */
+    /** \ínternal
      * Copies \a other into *this without evaluating other. \returns a reference to *this.
      * \deprecated */
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    Derived& lazyAssign(const DenseBase<OtherDerived>& other);
-    /** \internal Evaluates \a other into *this. \returns a reference to *this. */
+    EIGEN_DEVICE_FUNC
    template<typename OtherDerived>
    Derived& lazyAssign(const ReturnByValue<OtherDerived>& other);
    CommaInitializer<Derived> operator<< (const Scalar& s);
    /** \deprecated it now returns \c *this */
    template<unsigned int Added,unsigned int Removed>
-    const Flagged<Derived, Added, Removed> flagged() const;
+    EIGEN_DEPRECATED
    const Derived& flagged() const
    { return derived(); }
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    CommaInitializer<Derived> operator<< (const DenseBase<OtherDerived>& other);
-    Eigen::Transpose<Derived> transpose();
+    typedef Transpose<Derived> TransposeReturnType;
-	typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
+    EIGEN_DEVICE_FUNC
    TransposeReturnType transpose();
    typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
    EIGEN_DEVICE_FUNC
    ConstTransposeReturnType transpose() const;
    EIGEN_DEVICE_FUNC
    void transposeInPlace();
 #ifndef EIGEN_NO_DEBUG
  protected:
    template<typename OtherDerived>
    void checkTransposeAliasing(const OtherDerived& other) const;
  public:
 #endif
-
+    EIGEN_DEVICE_FUNC static const ConstantReturnType
    static const ConstantReturnType
    Constant(Index rows, Index cols, const Scalar& value);
-    static const ConstantReturnType
+    EIGEN_DEVICE_FUNC static const ConstantReturnType
    Constant(Index size, const Scalar& value);
-    static const ConstantReturnType
+    EIGEN_DEVICE_FUNC static const ConstantReturnType
    Constant(const Scalar& value);
-    static const SequentialLinSpacedReturnType
+    EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType
    LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high);
-    static const RandomAccessLinSpacedReturnType
+    EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
    LinSpaced(Index size, const Scalar& low, const Scalar& high);
-    static const SequentialLinSpacedReturnType
+    EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType
    LinSpaced(Sequential_t, const Scalar& low, const Scalar& high);
-    static const RandomAccessLinSpacedReturnType
+    EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
    LinSpaced(const Scalar& low, const Scalar& high);
-    template<typename CustomNullaryOp>
+    template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
-    static const CwiseNullaryOp<CustomNullaryOp, Derived>
+    static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
    NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func);
-    template<typename CustomNullaryOp>
+    template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
-    static const CwiseNullaryOp<CustomNullaryOp, Derived>
+    static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
    NullaryExpr(Index size, const CustomNullaryOp& func);
-    template<typename CustomNullaryOp>
+    template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
-    static const CwiseNullaryOp<CustomNullaryOp, Derived>
+    static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
    NullaryExpr(const CustomNullaryOp& func);
-    static const ConstantReturnType Zero(Index rows, Index cols);
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index rows, Index cols);
-    static const ConstantReturnType Zero(Index size);
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index size);
-    static const ConstantReturnType Zero();
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Zero();
-    static const ConstantReturnType Ones(Index rows, Index cols);
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index rows, Index cols);
-    static const ConstantReturnType Ones(Index size);
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index size);
-    static const ConstantReturnType Ones();
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Ones();
-    void fill(const Scalar& value);
+    EIGEN_DEVICE_FUNC void fill(const Scalar& value);
-    Derived& setConstant(const Scalar& value);
+    EIGEN_DEVICE_FUNC Derived& setConstant(const Scalar& value);
-    Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high);
+    EIGEN_DEVICE_FUNC Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high);
-    Derived& setLinSpaced(const Scalar& low, const Scalar& high);
+    EIGEN_DEVICE_FUNC Derived& setLinSpaced(const Scalar& low, const Scalar& high);
-    Derived& setZero();
+    EIGEN_DEVICE_FUNC Derived& setZero();
-    Derived& setOnes();
+    EIGEN_DEVICE_FUNC Derived& setOnes();
-    Derived& setRandom();
+    EIGEN_DEVICE_FUNC Derived& setRandom();
-    template<typename OtherDerived>
+    template<typename OtherDerived> EIGEN_DEVICE_FUNC
    bool isApprox(const DenseBase<OtherDerived>& other,
                  const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
    EIGEN_DEVICE_FUNC 
    bool isMuchSmallerThan(const RealScalar& other,
                           const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
-    template<typename OtherDerived>
+    template<typename OtherDerived> EIGEN_DEVICE_FUNC
    bool isMuchSmallerThan(const DenseBase<OtherDerived>& other,
                           const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
-    bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+    EIGEN_DEVICE_FUNC bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
-    bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+    EIGEN_DEVICE_FUNC bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
-    bool isZero(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+    EIGEN_DEVICE_FUNC bool isZero(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
-    bool isOnes(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+    EIGEN_DEVICE_FUNC bool isOnes(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
    inline bool hasNaN() const;
    inline bool allFinite() const;
-    inline Derived& operator*=(const Scalar& other);
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    inline Derived& operator/=(const Scalar& other);
+    Derived& operator*=(const Scalar& other);
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    Derived& operator/=(const Scalar& other);
    typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType;
    /** \returns the matrix or vector obtained by evaluating this expression.
      *
      * Notice that in the case of a plain matrix or vector (not an expression) this function just returns
      * a const reference, in order to avoid a useless copy.
      * 
      * \warning Be carefull with eval() and the auto C++ keyword, as detailed in this \link TopicPitfalls_auto_keyword page \endlink.
      */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE EvalReturnType eval() const
    {
      // Even though MSVC does not honor strong inlining when the return type
@ -368,61 +405,68 @@ template<typename Derived> class DenseBase
      // size types on MSVC.
      return typename internal::eval<Derived>::type(derived());
    }
-
+    
    /** swaps *this with the expression \a other.
      *
      */
    template<typename OtherDerived>
-    void swap(const DenseBase<OtherDerived>& other,
+    EIGEN_DEVICE_FUNC
-              int = OtherDerived::ThisConstantIsPrivateInPlainObjectBase)
+    void swap(const DenseBase<OtherDerived>& other)
    {
-      SwapWrapper<Derived>(derived()).lazyAssign(other.derived());
+      EIGEN_STATIC_ASSERT(!OtherDerived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);
      eigen_assert(rows()==other.rows() && cols()==other.cols());
      call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>());
    }
    /** swaps *this with the matrix or array \a other.
      *
      */
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    void swap(PlainObjectBase<OtherDerived>& other)
    {
-      SwapWrapper<Derived>(derived()).lazyAssign(other.derived());
+      eigen_assert(rows()==other.rows() && cols()==other.cols());
      call_assignment(derived(), other.derived(), internal::swap_assign_op<Scalar>());
    }
    EIGEN_DEVICE_FUNC inline const NestByValue<Derived> nestByValue() const;
    EIGEN_DEVICE_FUNC inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
    EIGEN_DEVICE_FUNC inline ForceAlignedAccess<Derived> forceAlignedAccess();
    template<bool Enable> EIGEN_DEVICE_FUNC
    inline const typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf() const;
    template<bool Enable> EIGEN_DEVICE_FUNC
    inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();
-    inline const NestByValue<Derived> nestByValue() const;
+    EIGEN_DEVICE_FUNC Scalar sum() const;
-    inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
+    EIGEN_DEVICE_FUNC Scalar mean() const;
-    inline ForceAlignedAccess<Derived> forceAlignedAccess();
+    EIGEN_DEVICE_FUNC Scalar trace() const;
    template<bool Enable> inline const typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf() const;
    template<bool Enable> inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();
-    Scalar sum() const;
+    EIGEN_DEVICE_FUNC Scalar prod() const;
    Scalar mean() const;
    Scalar trace() const;
-    Scalar prod() const;
+    EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar minCoeff() const;
    EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar maxCoeff() const;
-    typename internal::traits<Derived>::Scalar minCoeff() const;
+    template<typename IndexType> EIGEN_DEVICE_FUNC
    typename internal::traits<Derived>::Scalar maxCoeff() const;
    template<typename IndexType>
    typename internal::traits<Derived>::Scalar minCoeff(IndexType* row, IndexType* col) const;
-    template<typename IndexType>
+    template<typename IndexType> EIGEN_DEVICE_FUNC
    typename internal::traits<Derived>::Scalar maxCoeff(IndexType* row, IndexType* col) const;
-    template<typename IndexType>
+    template<typename IndexType> EIGEN_DEVICE_FUNC
    typename internal::traits<Derived>::Scalar minCoeff(IndexType* index) const;
-    template<typename IndexType>
+    template<typename IndexType> EIGEN_DEVICE_FUNC
    typename internal::traits<Derived>::Scalar maxCoeff(IndexType* index) const;
    template<typename BinaryOp>
-    typename internal::result_of<BinaryOp(typename internal::traits<Derived>::Scalar)>::type
+    EIGEN_DEVICE_FUNC
-    redux(const BinaryOp& func) const;
+    Scalar redux(const BinaryOp& func) const;
    template<typename Visitor>
    EIGEN_DEVICE_FUNC
    void visit(Visitor& func) const;
    inline const WithFormat<Derived> format(const IOFormat& fmt) const;
    /** \returns the unique coefficient of a 1x1 expression */
    EIGEN_DEVICE_FUNC
    CoeffReturnType value() const
    {
      EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
@ -430,8 +474,8 @@ template<typename Derived> class DenseBase
      return derived().coeff(0,0);
    }
-    bool all(void) const;
+    bool all() const;
-    bool any(void) const;
+    bool any() const;
    Index count() const;
    typedef VectorwiseOp<Derived, Horizontal> RowwiseReturnType;
@ -439,14 +483,35 @@ template<typename Derived> class DenseBase
    typedef VectorwiseOp<Derived, Vertical> ColwiseReturnType;
    typedef const VectorwiseOp<const Derived, Vertical> ConstColwiseReturnType;
-    ConstRowwiseReturnType rowwise() const;
+    /** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations
-    RowwiseReturnType rowwise();
+    *
-    ConstColwiseReturnType colwise() const;
+    * Example: \include MatrixBase_rowwise.cpp
-    ColwiseReturnType colwise();
+    * Output: \verbinclude MatrixBase_rowwise.out
    *
    * \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
    */
    //Code moved here due to a CUDA compiler bug
    EIGEN_DEVICE_FUNC inline ConstRowwiseReturnType rowwise() const {
      return ConstRowwiseReturnType(derived());
    }
    EIGEN_DEVICE_FUNC RowwiseReturnType rowwise();
-    static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random(Index rows, Index cols);
+    /** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations
-    static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random(Index size);
+    *
-    static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random();
+    * Example: \include MatrixBase_colwise.cpp
    * Output: \verbinclude MatrixBase_colwise.out
    *
    * \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
    */
    EIGEN_DEVICE_FUNC inline ConstColwiseReturnType colwise() const {
      return ConstColwiseReturnType(derived());
    }
    EIGEN_DEVICE_FUNC ColwiseReturnType colwise();
    typedef CwiseNullaryOp<internal::scalar_random_op<Scalar>,PlainObject> RandomReturnType;
    static const RandomReturnType Random(Index rows, Index cols);
    static const RandomReturnType Random(Index size);
    static const RandomReturnType Random();
    template<typename ThenDerived,typename ElseDerived>
    const Select<Derived,ThenDerived,ElseDerived>
@ -464,45 +529,56 @@ template<typename Derived> class DenseBase
    template<int p> RealScalar lpNorm() const;
    template<int RowFactor, int ColFactor>
-    inline const Replicate<Derived,RowFactor,ColFactor> replicate() const;
+    EIGEN_DEVICE_FUNC
-    
+    const Replicate<Derived,RowFactor,ColFactor> replicate() const;
-    typedef Replicate<Derived,Dynamic,Dynamic> ReplicateReturnType;
+    /**
-    inline const ReplicateReturnType replicate(Index rowFacor,Index colFactor) const;
+    * \return an expression of the replication of \c *this
    *
    * Example: \include MatrixBase_replicate_int_int.cpp
    * Output: \verbinclude MatrixBase_replicate_int_int.out
    *
    * \sa VectorwiseOp::replicate(), DenseBase::replicate<int,int>(), class Replicate
    */
    //Code moved here due to a CUDA compiler bug
    EIGEN_DEVICE_FUNC
    const Replicate<Derived, Dynamic, Dynamic> replicate(Index rowFactor, Index colFactor) const
    {
      return Replicate<Derived, Dynamic, Dynamic>(derived(), rowFactor, colFactor);
    }
    typedef Reverse<Derived, BothDirections> ReverseReturnType;
    typedef const Reverse<const Derived, BothDirections> ConstReverseReturnType;
-    ReverseReturnType reverse();
+    EIGEN_DEVICE_FUNC ReverseReturnType reverse();
-    ConstReverseReturnType reverse() const;
+    /** This is the const version of reverse(). */
-    void reverseInPlace();
+    //Code moved here due to a CUDA compiler bug
    EIGEN_DEVICE_FUNC ConstReverseReturnType reverse() const
    {
      return ConstReverseReturnType(derived());
    }
    EIGEN_DEVICE_FUNC void reverseInPlace();
 #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase
 #define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
 #define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND)
 #   include "../plugins/BlockMethods.h"
 #   ifdef EIGEN_DENSEBASE_PLUGIN
 #     include EIGEN_DENSEBASE_PLUGIN
 #   endif
 #undef EIGEN_CURRENT_STORAGE_BASE_CLASS
-
+#undef EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
-#ifdef EIGEN2_SUPPORT
+#undef EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF
    Block<Derived> corner(CornerType type, Index cRows, Index cCols);
    const Block<Derived> corner(CornerType type, Index cRows, Index cCols) const;
    template<int CRows, int CCols>
    Block<Derived, CRows, CCols> corner(CornerType type);
    template<int CRows, int CCols>
    const Block<Derived, CRows, CCols> corner(CornerType type) const;
 #endif // EIGEN2_SUPPORT
    // disable the use of evalTo for dense objects with a nice compilation error
-    template<typename Dest> inline void evalTo(Dest& ) const
+    template<typename Dest>
    EIGEN_DEVICE_FUNC
    inline void evalTo(Dest& ) const
    {
      EIGEN_STATIC_ASSERT((internal::is_same<Dest,void>::value),THE_EVAL_EVALTO_FUNCTION_SHOULD_NEVER_BE_CALLED_FOR_DENSE_OBJECTS);
    }
  protected:
    /** Default constructor. Do nothing. */
-    DenseBase()
+    EIGEN_DEVICE_FUNC DenseBase()
    {
      /* Just checks for self-consistency of the flags.
       * Only do it when debugging Eigen, as this borders on paranoiac and could slow compilation down
@ -515,9 +591,9 @@ template<typename Derived> class DenseBase
    }
  private:
-    explicit DenseBase(int);
+    EIGEN_DEVICE_FUNC explicit DenseBase(int);
-    DenseBase(int,int);
+    EIGEN_DEVICE_FUNC DenseBase(int,int);
-    template<typename OtherDerived> explicit DenseBase(const DenseBase<OtherDerived>&);
+    template<typename OtherDerived> EIGEN_DEVICE_FUNC explicit DenseBase(const DenseBase<OtherDerived>&);
 };
 } // end namespace Eigen
--- a/eigenlib/Eigen/src/Core/DenseCoeffsBase.h
+++ b/eigenlib/Eigen/src/Core/DenseCoeffsBase.h
@ -35,7 +35,6 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
 {
  public:
    typedef typename internal::traits<Derived>::StorageKind StorageKind;
    typedef typename internal::traits<Derived>::Index Index;
    typedef typename internal::traits<Derived>::Scalar Scalar;
    typedef typename internal::packet_traits<Scalar>::type PacketScalar;
@ -61,6 +60,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
    using Base::size;
    using Base::derived;
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) const
    {
      return int(Derived::RowsAtCompileTime) == 1 ? 0
@ -69,6 +69,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
          : inner;
    }
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) const
    {
      return int(Derived::ColsAtCompileTime) == 1 ? 0
@ -91,13 +92,15 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
      *
      * \sa operator()(Index,Index) const, coeffRef(Index,Index), coeff(Index) const
      */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const
    {
      eigen_internal_assert(row >= 0 && row < rows()
-                        && col >= 0 && col < cols());
+                         && col >= 0 && col < cols());
-      return derived().coeff(row, col);
+      return internal::evaluator<Derived>(derived()).coeff(row,col);
    }
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
    {
      return coeff(rowIndexByOuterInner(outer, inner),
@ -108,11 +111,12 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
      *
      * \sa operator()(Index,Index), operator[](Index)
      */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType operator()(Index row, Index col) const
    {
      eigen_assert(row >= 0 && row < rows()
          && col >= 0 && col < cols());
-      return derived().coeff(row, col);
+      return coeff(row, col);
    }
    /** Short version: don't use this function, use
@ -130,11 +134,14 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
      * \sa operator[](Index) const, coeffRef(Index), coeff(Index,Index) const
      */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType
    coeff(Index index) const
    {
      EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,
                          THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)
      eigen_internal_assert(index >= 0 && index < size());
-      return derived().coeff(index);
+      return internal::evaluator<Derived>(derived()).coeff(index);
    }
@ -146,15 +153,14 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
      * z() const, w() const
      */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType
    operator[](Index index) const
    {
      #ifndef EIGEN2_SUPPORT
      EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
                          THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
      #endif
      eigen_assert(index >= 0 && index < size());
-      return derived().coeff(index);
+      return coeff(index);
    }
    /** \returns the coefficient at given index.
@ -167,32 +173,49 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
      * z() const, w() const
      */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType
    operator()(Index index) const
    {
      eigen_assert(index >= 0 && index < size());
-      return derived().coeff(index);
+      return coeff(index);
    }
    /** equivalent to operator[](0).  */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType
    x() const { return (*this)[0]; }
    /** equivalent to operator[](1).  */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType
-    y() const { return (*this)[1]; }
+    y() const
    {
      EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS);
      return (*this)[1];
    }
    /** equivalent to operator[](2).  */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType
-    z() const { return (*this)[2]; }
+    z() const
    {
      EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS);
      return (*this)[2];
    }
    /** equivalent to operator[](3).  */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType
-    w() const { return (*this)[3]; }
+    w() const
    {
      EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS);
      return (*this)[3];
    }
    /** \internal
      * \returns the packet of coefficients starting at the given row and column. It is your responsibility
@ -207,9 +230,9 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
    template<int LoadMode>
    EIGEN_STRONG_INLINE PacketReturnType packet(Index row, Index col) const
    {
-      eigen_internal_assert(row >= 0 && row < rows()
+      typedef typename internal::packet_traits<Scalar>::type DefaultPacketType;
-                      && col >= 0 && col < cols());
+      eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
-      return derived().template packet<LoadMode>(row,col);
+      return internal::evaluator<Derived>(derived()).template packet<LoadMode,DefaultPacketType>(row,col);
    }
@ -234,8 +257,11 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
    template<int LoadMode>
    EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
    {
      EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,
                          THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)
      typedef typename internal::packet_traits<Scalar>::type DefaultPacketType;
      eigen_internal_assert(index >= 0 && index < size());
-      return derived().template packet<LoadMode>(index);
+      return internal::evaluator<Derived>(derived()).template packet<LoadMode,DefaultPacketType>(index);
    }
  protected:
@ -278,7 +304,6 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
    typedef DenseCoeffsBase<Derived, ReadOnlyAccessors> Base;
    typedef typename internal::traits<Derived>::StorageKind StorageKind;
    typedef typename internal::traits<Derived>::Index Index;
    typedef typename internal::traits<Derived>::Scalar Scalar;
    typedef typename internal::packet_traits<Scalar>::type PacketScalar;
    typedef typename NumTraits<Scalar>::Real RealScalar;
@ -311,13 +336,15 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
      *
      * \sa operator()(Index,Index), coeff(Index, Index) const, coeffRef(Index)
      */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col)
    {
      eigen_internal_assert(row >= 0 && row < rows()
-                        && col >= 0 && col < cols());
+                         && col >= 0 && col < cols());
-      return derived().coeffRef(row, col);
+      return internal::evaluator<Derived>(derived()).coeffRef(row,col);
    }
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar&
    coeffRefByOuterInner(Index outer, Index inner)
    {
@ -330,12 +357,13 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
      * \sa operator[](Index)
      */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar&
    operator()(Index row, Index col)
    {
      eigen_assert(row >= 0 && row < rows()
          && col >= 0 && col < cols());
-      return derived().coeffRef(row, col);
+      return coeffRef(row, col);
    }
@ -354,11 +382,14 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
      * \sa operator[](Index), coeff(Index) const, coeffRef(Index,Index)
      */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar&
    coeffRef(Index index)
    {
      EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,
                          THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)
      eigen_internal_assert(index >= 0 && index < size());
-      return derived().coeffRef(index);
+      return internal::evaluator<Derived>(derived()).coeffRef(index);
    }
    /** \returns a reference to the coefficient at given index.
@ -368,15 +399,14 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
      * \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w()
      */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar&
    operator[](Index index)
    {
      #ifndef EIGEN2_SUPPORT
      EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
                          THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
      #endif
      eigen_assert(index >= 0 && index < size());
-      return derived().coeffRef(index);
+      return coeffRef(index);
    }
    /** \returns a reference to the coefficient at given index.
@ -388,167 +418,49 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
      * \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w()
      */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar&
    operator()(Index index)
    {
      eigen_assert(index >= 0 && index < size());
-      return derived().coeffRef(index);
+      return coeffRef(index);
    }
    /** equivalent to operator[](0).  */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar&
    x() { return (*this)[0]; }
    /** equivalent to operator[](1).  */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar&
-    y() { return (*this)[1]; }
+    y()
    {
      EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS);
      return (*this)[1];
    }
    /** equivalent to operator[](2).  */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar&
-    z() { return (*this)[2]; }
+    z()
    {
      EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS);
      return (*this)[2];
    }
    /** equivalent to operator[](3).  */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar&
-    w() { return (*this)[3]; }
+    w()
    /** \internal
      * Stores the given packet of coefficients, at the given row and column of this expression. It is your responsibility
      * to ensure that a packet really starts there. This method is only available on expressions having the
      * PacketAccessBit.
      *
      * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select
      * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
      * starting at an address which is a multiple of the packet size.
      */
    template<int StoreMode>
    EIGEN_STRONG_INLINE void writePacket
    (Index row, Index col, const typename internal::packet_traits<Scalar>::type& val)
    {
-      eigen_internal_assert(row >= 0 && row < rows()
+      EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS);
-                        && col >= 0 && col < cols());
+      return (*this)[3];
      derived().template writePacket<StoreMode>(row,col,val);
    }
    /** \internal */
    template<int StoreMode>
    EIGEN_STRONG_INLINE void writePacketByOuterInner
    (Index outer, Index inner, const typename internal::packet_traits<Scalar>::type& val)
    {
      writePacket<StoreMode>(rowIndexByOuterInner(outer, inner),
                            colIndexByOuterInner(outer, inner),
                            val);
    }
    /** \internal
      * Stores the given packet of coefficients, at the given index in this expression. It is your responsibility
      * to ensure that a packet really starts there. This method is only available on expressions having the
      * PacketAccessBit and the LinearAccessBit.
      *
      * The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select
      * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
      * starting at an address which is a multiple of the packet size.
      */
    template<int StoreMode>
    EIGEN_STRONG_INLINE void writePacket
    (Index index, const typename internal::packet_traits<Scalar>::type& val)
    {
      eigen_internal_assert(index >= 0 && index < size());
      derived().template writePacket<StoreMode>(index,val);
    }
 #ifndef EIGEN_PARSED_BY_DOXYGEN
    /** \internal Copies the coefficient at position (row,col) of other into *this.
      *
      * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
      * with usual assignments.
      *
      * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox.
      */
    template<typename OtherDerived>
    EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other)
    {
      eigen_internal_assert(row >= 0 && row < rows()
                        && col >= 0 && col < cols());
      derived().coeffRef(row, col) = other.derived().coeff(row, col);
    }
    /** \internal Copies the coefficient at the given index of other into *this.
      *
      * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
      * with usual assignments.
      *
      * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox.
      */
    template<typename OtherDerived>
    EIGEN_STRONG_INLINE void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
    {
      eigen_internal_assert(index >= 0 && index < size());
      derived().coeffRef(index) = other.derived().coeff(index);
    }
    template<typename OtherDerived>
    EIGEN_STRONG_INLINE void copyCoeffByOuterInner(Index outer, Index inner, const DenseBase<OtherDerived>& other)
    {
      const Index row = rowIndexByOuterInner(outer,inner);
      const Index col = colIndexByOuterInner(outer,inner);
      // derived() is important here: copyCoeff() may be reimplemented in Derived!
      derived().copyCoeff(row, col, other);
    }
    /** \internal Copies the packet at position (row,col) of other into *this.
      *
      * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
      * with usual assignments.
      *
      * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox.
      */
    template<typename OtherDerived, int StoreMode, int LoadMode>
    EIGEN_STRONG_INLINE void copyPacket(Index row, Index col, const DenseBase<OtherDerived>& other)
    {
      eigen_internal_assert(row >= 0 && row < rows()
                        && col >= 0 && col < cols());
      derived().template writePacket<StoreMode>(row, col,
        other.derived().template packet<LoadMode>(row, col));
    }
    /** \internal Copies the packet at the given index of other into *this.
      *
      * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
      * with usual assignments.
      *
      * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox.
      */
    template<typename OtherDerived, int StoreMode, int LoadMode>
    EIGEN_STRONG_INLINE void copyPacket(Index index, const DenseBase<OtherDerived>& other)
    {
      eigen_internal_assert(index >= 0 && index < size());
      derived().template writePacket<StoreMode>(index,
        other.derived().template packet<LoadMode>(index));
    }
    /** \internal */
    template<typename OtherDerived, int StoreMode, int LoadMode>
    EIGEN_STRONG_INLINE void copyPacketByOuterInner(Index outer, Index inner, const DenseBase<OtherDerived>& other)
    {
      const Index row = rowIndexByOuterInner(outer,inner);
      const Index col = colIndexByOuterInner(outer,inner);
      // derived() is important here: copyCoeff() may be reimplemented in Derived!
      derived().template copyPacket< OtherDerived, StoreMode, LoadMode>(row, col, other);
    }
 #endif
 };
 /** \brief Base class providing direct read-only coefficient access to matrices and arrays.
@ -560,7 +472,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
  * inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which defines functions to access entries read-only using
  * \c operator() .
  *
-  * \sa \ref TopicClassHierarchy
+  * \sa \blank \ref TopicClassHierarchy
  */
 template<typename Derived>
 class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived, ReadOnlyAccessors>
@ -568,7 +480,6 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
  public:
    typedef DenseCoeffsBase<Derived, ReadOnlyAccessors> Base;
    typedef typename internal::traits<Derived>::Index Index;
    typedef typename internal::traits<Derived>::Scalar Scalar;
    typedef typename NumTraits<Scalar>::Real RealScalar;
@ -581,6 +492,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
      *
      * \sa outerStride(), rowStride(), colStride()
      */
    EIGEN_DEVICE_FUNC
    inline Index innerStride() const
    {
      return derived().innerStride();
@ -591,6 +503,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
      *
      * \sa innerStride(), rowStride(), colStride()
      */
    EIGEN_DEVICE_FUNC
    inline Index outerStride() const
    {
      return derived().outerStride();
@ -606,6 +519,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
      *
      * \sa innerStride(), outerStride(), colStride()
      */
    EIGEN_DEVICE_FUNC
    inline Index rowStride() const
    {
      return Derived::IsRowMajor ? outerStride() : innerStride();
@ -615,6 +529,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
      *
      * \sa innerStride(), outerStride(), rowStride()
      */
    EIGEN_DEVICE_FUNC
    inline Index colStride() const
    {
      return Derived::IsRowMajor ? innerStride() : outerStride();
@ -630,7 +545,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
  * inherits DenseCoeffsBase<Derived, WriteAccessors> which defines functions to access entries read/write using
  * \c operator().
  *
-  * \sa \ref TopicClassHierarchy
+  * \sa \blank \ref TopicClassHierarchy
  */
 template<typename Derived>
 class DenseCoeffsBase<Derived, DirectWriteAccessors>
@ -639,7 +554,6 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
  public:
    typedef DenseCoeffsBase<Derived, WriteAccessors> Base;
    typedef typename internal::traits<Derived>::Index Index;
    typedef typename internal::traits<Derived>::Scalar Scalar;
    typedef typename NumTraits<Scalar>::Real RealScalar;
@ -652,6 +566,7 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
      *
      * \sa outerStride(), rowStride(), colStride()
      */
    EIGEN_DEVICE_FUNC
    inline Index innerStride() const
    {
      return derived().innerStride();
@ -662,6 +577,7 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
      *
      * \sa innerStride(), rowStride(), colStride()
      */
    EIGEN_DEVICE_FUNC
    inline Index outerStride() const
    {
      return derived().outerStride();
@ -677,6 +593,7 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
      *
      * \sa innerStride(), outerStride(), colStride()
      */
    EIGEN_DEVICE_FUNC
    inline Index rowStride() const
    {
      return Derived::IsRowMajor ? outerStride() : innerStride();
@ -686,6 +603,7 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
      *
      * \sa innerStride(), outerStride(), rowStride()
      */
    EIGEN_DEVICE_FUNC
    inline Index colStride() const
    {
      return Derived::IsRowMajor ? innerStride() : outerStride();
@ -694,33 +612,42 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
 namespace internal {
-template<typename Derived, bool JustReturnZero>
+template<int Alignment, typename Derived, bool JustReturnZero>
 struct first_aligned_impl
 {
-  static inline typename Derived::Index run(const Derived&)
+  static inline Index run(const Derived&)
  { return 0; }
 };
-template<typename Derived>
+template<int Alignment, typename Derived>
-struct first_aligned_impl<Derived, false>
+struct first_aligned_impl<Alignment, Derived, false>
 {
-  static inline typename Derived::Index run(const Derived& m)
+  static inline Index run(const Derived& m)
  {
-    return internal::first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size());
+    return internal::first_aligned<Alignment>(m.data(), m.size());
  }
 };
-/** \internal \returns the index of the first element of the array that is well aligned for vectorization.
+/** \internal \returns the index of the first element of the array stored by \a m that is properly aligned with respect to \a Alignment for vectorization.
  *
  * \tparam Alignment requested alignment in Bytes.
  *
  * There is also the variant first_aligned(const Scalar*, Integer) defined in Memory.h. See it for more
  * documentation.
  */
-template<typename Derived>
+template<int Alignment, typename Derived>
-static inline typename Derived::Index first_aligned(const Derived& m)
+static inline Index first_aligned(const DenseBase<Derived>& m)
 {
-  return first_aligned_impl
+  enum { ReturnZero = (int(evaluator<Derived>::Alignment) >= Alignment) || !(Derived::Flags & DirectAccessBit) };
-          <Derived, (Derived::Flags & AlignedBit) || !(Derived::Flags & DirectAccessBit)>
+  return first_aligned_impl<Alignment, Derived, ReturnZero>::run(m.derived());
-          ::run(m);
+}
 template<typename Derived>
 static inline Index first_default_aligned(const DenseBase<Derived>& m)
 {
  typedef typename Derived::Scalar Scalar;
  typedef typename packet_traits<Scalar>::type DefaultPacketType;
  return internal::first_aligned<int(unpacket_traits<DefaultPacketType>::alignment),Derived>(m);
 }
 template<typename Derived, bool HasDirectAccess = has_direct_access<Derived>::ret>
--- a/eigenlib/Eigen/src/Core/DenseStorage.h
+++ b/eigenlib/Eigen/src/Core/DenseStorage.h
@ -3,7 +3,7 @@
 //
 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
 // Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
-// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
+// Copyright (C) 2010-2013 Hauke Heibel <hauke.heibel@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@ -24,7 +24,9 @@ namespace internal {
 struct constructor_without_unaligned_array_assert {};
-template<typename T, int Size> void check_static_allocation_size()
+template<typename T, int Size>
 EIGEN_DEVICE_FUNC
 void check_static_allocation_size()
 {
  // if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit
  #if EIGEN_STACK_ALLOCATION_LIMIT
@ -38,18 +40,19 @@ template<typename T, int Size> void check_static_allocation_size()
  */
 template <typename T, int Size, int MatrixOrArrayOptions,
          int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0
-                        : (((Size*sizeof(T))%16)==0) ? 16
+                        : compute_default_alignment<T,Size>::value >
                        : 0 >
 struct plain_array
 {
  T array[Size];
-  plain_array() 
+  EIGEN_DEVICE_FUNC
  plain_array()
  { 
    check_static_allocation_size<T,Size>();
  }
-  plain_array(constructor_without_unaligned_array_assert) 
+  EIGEN_DEVICE_FUNC
  plain_array(constructor_without_unaligned_array_assert)
  { 
    check_static_allocation_size<T,Size>();
  }
@ -64,29 +67,88 @@ struct plain_array
  template<typename PtrType>
  EIGEN_ALWAYS_INLINE PtrType eigen_unaligned_array_assert_workaround_gcc47(PtrType array) { return array; }
  #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
-    eigen_assert((reinterpret_cast<size_t>(eigen_unaligned_array_assert_workaround_gcc47(array)) & sizemask) == 0 \
+    eigen_assert((internal::UIntPtr(eigen_unaligned_array_assert_workaround_gcc47(array)) & (sizemask)) == 0 \
              && "this assertion is explained here: " \
              "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
              " **** READ THIS WEB PAGE !!! ****");
 #else
  #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
-    eigen_assert((reinterpret_cast<size_t>(array) & sizemask) == 0 \
+    eigen_assert((internal::UIntPtr(array) & (sizemask)) == 0 \
              && "this assertion is explained here: " \
              "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
              " **** READ THIS WEB PAGE !!! ****");
 #endif
 template <typename T, int Size, int MatrixOrArrayOptions>
-struct plain_array<T, Size, MatrixOrArrayOptions, 16>
+struct plain_array<T, Size, MatrixOrArrayOptions, 8>
 {
-  EIGEN_USER_ALIGN16 T array[Size];
+  EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size];
  EIGEN_DEVICE_FUNC
  plain_array() 
-  { 
+  {
-    EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(0xf);
+    EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7);
    check_static_allocation_size<T,Size>();
  }
  EIGEN_DEVICE_FUNC
  plain_array(constructor_without_unaligned_array_assert) 
  { 
    check_static_allocation_size<T,Size>();
  }
 };
 template <typename T, int Size, int MatrixOrArrayOptions>
 struct plain_array<T, Size, MatrixOrArrayOptions, 16>
 {
  EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size];
  EIGEN_DEVICE_FUNC
  plain_array() 
  { 
    EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15);
    check_static_allocation_size<T,Size>();
  }
  EIGEN_DEVICE_FUNC
  plain_array(constructor_without_unaligned_array_assert) 
  { 
    check_static_allocation_size<T,Size>();
  }
 };
 template <typename T, int Size, int MatrixOrArrayOptions>
 struct plain_array<T, Size, MatrixOrArrayOptions, 32>
 {
  EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size];
  EIGEN_DEVICE_FUNC
  plain_array() 
  {
    EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31);
    check_static_allocation_size<T,Size>();
  }
  EIGEN_DEVICE_FUNC
  plain_array(constructor_without_unaligned_array_assert) 
  { 
    check_static_allocation_size<T,Size>();
  }
 };
 template <typename T, int Size, int MatrixOrArrayOptions>
 struct plain_array<T, Size, MatrixOrArrayOptions, 64>
 {
  EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size];
  EIGEN_DEVICE_FUNC
  plain_array() 
  { 
    EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63);
    check_static_allocation_size<T,Size>();
  }
  EIGEN_DEVICE_FUNC
  plain_array(constructor_without_unaligned_array_assert) 
  { 
    check_static_allocation_size<T,Size>();
@ -96,9 +158,9 @@ struct plain_array<T, Size, MatrixOrArrayOptions, 16>
 template <typename T, int MatrixOrArrayOptions, int Alignment>
 struct plain_array<T, 0, MatrixOrArrayOptions, Alignment>
 {
-  EIGEN_USER_ALIGN16 T array[1];
+  T array[1];
-  plain_array() {}
+  EIGEN_DEVICE_FUNC plain_array() {}
-  plain_array(constructor_without_unaligned_array_assert) {}
+  EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {}
 };
 } // end namespace internal
@ -122,33 +184,50 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt
 {
    internal::plain_array<T,Size,_Options> m_data;
  public:
-    inline DenseStorage() {}
+    EIGEN_DEVICE_FUNC DenseStorage() {}
-    inline DenseStorage(internal::constructor_without_unaligned_array_assert)
+    EIGEN_DEVICE_FUNC
    explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
      : m_data(internal::constructor_without_unaligned_array_assert()) {}
-    inline DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
+    EIGEN_DEVICE_FUNC 
-    inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
+    DenseStorage(const DenseStorage& other) : m_data(other.m_data) {}
-    static inline DenseIndex rows(void) {return _Rows;}
+    EIGEN_DEVICE_FUNC 
-    static inline DenseIndex cols(void) {return _Cols;}
+    DenseStorage& operator=(const DenseStorage& other)
-    inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
+    { 
-    inline void resize(DenseIndex,DenseIndex,DenseIndex) {}
+      if (this != &other) m_data = other.m_data;
-    inline const T *data() const { return m_data.array; }
+      return *this; 
-    inline T *data() { return m_data.array; }
+    }
    EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) {
      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
      eigen_internal_assert(size==rows*cols && rows==_Rows && cols==_Cols);
      EIGEN_UNUSED_VARIABLE(size);
      EIGEN_UNUSED_VARIABLE(rows);
      EIGEN_UNUSED_VARIABLE(cols);
    }
    EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
    EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
    EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
    EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
    EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}
    EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
    EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
 };
 // null matrix
 template<typename T, int _Rows, int _Cols, int _Options> class DenseStorage<T, 0, _Rows, _Cols, _Options>
 {
  public:
-    inline DenseStorage() {}
+    EIGEN_DEVICE_FUNC DenseStorage() {}
-    inline DenseStorage(internal::constructor_without_unaligned_array_assert) {}
+    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) {}
-    inline DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
+    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) {}
-    inline void swap(DenseStorage& ) {}
+    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) { return *this; }
-    static inline DenseIndex rows(void) {return _Rows;}
+    EIGEN_DEVICE_FUNC DenseStorage(Index,Index,Index) {}
-    static inline DenseIndex cols(void) {return _Cols;}
+    EIGEN_DEVICE_FUNC void swap(DenseStorage& ) {}
-    inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
+    EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
-    inline void resize(DenseIndex,DenseIndex,DenseIndex) {}
+    EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
-    inline const T *data() const { return 0; }
+    EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
-    inline T *data() { return 0; }
+    EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}
    EIGEN_DEVICE_FUNC const T *data() const { return 0; }
    EIGEN_DEVICE_FUNC T *data() { return 0; }
 };
 // more specializations for null matrices; these are necessary to resolve ambiguities
@ -165,86 +244,157 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, 0, Dynamic,
 template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic, Dynamic, _Options>
 {
    internal::plain_array<T,Size,_Options> m_data;
-    DenseIndex m_rows;
+    Index m_rows;
-    DenseIndex m_cols;
+    Index m_cols;
  public:
-    inline DenseStorage() : m_rows(0), m_cols(0) {}
+    EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {}
-    inline DenseStorage(internal::constructor_without_unaligned_array_assert)
+    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
      : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
-    inline DenseStorage(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) : m_rows(nbRows), m_cols(nbCols) {}
+    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {}
-    inline void swap(DenseStorage& other)
+    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) 
    { 
      if (this != &other)
      {
        m_data = other.m_data;
        m_rows = other.m_rows;
        m_cols = other.m_cols;
      }
      return *this; 
    }
    EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {}
    EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
    { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
-    inline DenseIndex rows() const {return m_rows;}
+    EIGEN_DEVICE_FUNC Index rows() const {return m_rows;}
-    inline DenseIndex cols() const {return m_cols;}
+    EIGEN_DEVICE_FUNC Index cols() const {return m_cols;}
-    inline void conservativeResize(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) { m_rows = nbRows; m_cols = nbCols; }
+    EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }
-    inline void resize(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) { m_rows = nbRows; m_cols = nbCols; }
+    EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }
-    inline const T *data() const { return m_data.array; }
+    EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
-    inline T *data() { return m_data.array; }
+    EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
 };
 // dynamic-size matrix with fixed-size storage and fixed width
 template<typename T, int Size, int _Cols, int _Options> class DenseStorage<T, Size, Dynamic, _Cols, _Options>
 {
    internal::plain_array<T,Size,_Options> m_data;
-    DenseIndex m_rows;
+    Index m_rows;
  public:
-    inline DenseStorage() : m_rows(0) {}
+    EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {}
-    inline DenseStorage(internal::constructor_without_unaligned_array_assert)
+    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
      : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}
-    inline DenseStorage(DenseIndex, DenseIndex nbRows, DenseIndex) : m_rows(nbRows) {}
+    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {}
-    inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
+    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) 
-    inline DenseIndex rows(void) const {return m_rows;}
+    {
-    inline DenseIndex cols(void) const {return _Cols;}
+      if (this != &other)
-    inline void conservativeResize(DenseIndex, DenseIndex nbRows, DenseIndex) { m_rows = nbRows; }
+      {
-    inline void resize(DenseIndex, DenseIndex nbRows, DenseIndex) { m_rows = nbRows; }
+        m_data = other.m_data;
-    inline const T *data() const { return m_data.array; }
+        m_rows = other.m_rows;
-    inline T *data() { return m_data.array; }
+      }
      return *this; 
    }
    EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index) : m_rows(rows) {}
    EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
    EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
    EIGEN_DEVICE_FUNC Index cols(void) const {return _Cols;}
    EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index) { m_rows = rows; }
    EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index) { m_rows = rows; }
    EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
    EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
 };
 // dynamic-size matrix with fixed-size storage and fixed height
 template<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Size, _Rows, Dynamic, _Options>
 {
    internal::plain_array<T,Size,_Options> m_data;
-    DenseIndex m_cols;
+    Index m_cols;
  public:
-    inline DenseStorage() : m_cols(0) {}
+    EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {}
-    inline DenseStorage(internal::constructor_without_unaligned_array_assert)
+    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
      : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {}
-    inline DenseStorage(DenseIndex, DenseIndex, DenseIndex nbCols) : m_cols(nbCols) {}
+    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {}
-    inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
+    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
-    inline DenseIndex rows(void) const {return _Rows;}
+    {
-    inline DenseIndex cols(void) const {return m_cols;}
+      if (this != &other)
-    inline void conservativeResize(DenseIndex, DenseIndex, DenseIndex nbCols) { m_cols = nbCols; }
+      {
-    inline void resize(DenseIndex, DenseIndex, DenseIndex nbCols) { m_cols = nbCols; }
+        m_data = other.m_data;
-    inline const T *data() const { return m_data.array; }
+        m_cols = other.m_cols;
-    inline T *data() { return m_data.array; }
+      }
      return *this;
    }
    EIGEN_DEVICE_FUNC DenseStorage(Index, Index, Index cols) : m_cols(cols) {}
    EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
    EIGEN_DEVICE_FUNC Index rows(void) const {return _Rows;}
    EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
    void conservativeResize(Index, Index, Index cols) { m_cols = cols; }
    void resize(Index, Index, Index cols) { m_cols = cols; }
    EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
    EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
 };
 // purely dynamic matrix.
 template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynamic, _Options>
 {
    T *m_data;
-    DenseIndex m_rows;
+    Index m_rows;
-    DenseIndex m_cols;
+    Index m_cols;
  public:
-    inline DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
+    EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
-    inline DenseStorage(internal::constructor_without_unaligned_array_assert)
+    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
       : m_data(0), m_rows(0), m_cols(0) {}
-    inline DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols)
+    EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols)
-      : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows), m_cols(nbCols)
+      : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows), m_cols(cols)
-    { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
+    {
-    inline ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
+      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
-    inline void swap(DenseStorage& other)
+      eigen_internal_assert(size==rows*cols && rows>=0 && cols >=0);
    }
    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
      : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*other.m_cols))
      , m_rows(other.m_rows)
      , m_cols(other.m_cols)
    {
      internal::smart_copy(other.m_data, other.m_data+other.m_rows*other.m_cols, m_data);
    }
    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
    {
      if (this != &other)
      {
        DenseStorage tmp(other);
        this->swap(tmp);
      }
      return *this;
    }
 #if EIGEN_HAS_RVALUE_REFERENCES
    EIGEN_DEVICE_FUNC
    DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
      : m_data(std::move(other.m_data))
      , m_rows(std::move(other.m_rows))
      , m_cols(std::move(other.m_cols))
    {
      other.m_data = nullptr;
      other.m_rows = 0;
      other.m_cols = 0;
    }
    EIGEN_DEVICE_FUNC
    DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
    {
      using std::swap;
      swap(m_data, other.m_data);
      swap(m_rows, other.m_rows);
      swap(m_cols, other.m_cols);
      return *this;
    }
 #endif
    EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
    EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
    { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
-    inline DenseIndex rows(void) const {return m_rows;}
+    EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
-    inline DenseIndex cols(void) const {return m_cols;}
+    EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
-    inline void conservativeResize(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols)
+    void conservativeResize(Index size, Index rows, Index cols)
    {
      m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*m_cols);
-      m_rows = nbRows;
+      m_rows = rows;
-      m_cols = nbCols;
+      m_cols = cols;
    }
-    void resize(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols)
+    EIGEN_DEVICE_FUNC void resize(Index size, Index rows, Index cols)
    {
      if(size != m_rows*m_cols)
      {
@ -255,33 +405,70 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
          m_data = 0;
        EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
      }
-      m_rows = nbRows;
+      m_rows = rows;
-      m_cols = nbCols;
+      m_cols = cols;
    }
-    inline const T *data() const { return m_data; }
+    EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
-    inline T *data() { return m_data; }
+    EIGEN_DEVICE_FUNC T *data() { return m_data; }
 };
 // matrix with dynamic width and fixed height (so that matrix has dynamic size).
 template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Rows, Dynamic, _Options>
 {
    T *m_data;
-    DenseIndex m_cols;
+    Index m_cols;
  public:
-    inline DenseStorage() : m_data(0), m_cols(0) {}
+    EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_cols(0) {}
-    inline DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
+    explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
-    inline DenseStorage(DenseIndex size, DenseIndex, DenseIndex nbCols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(nbCols)
+    EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(cols)
-    { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
+    {
-    inline ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
+      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
-    inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
+      eigen_internal_assert(size==rows*cols && rows==_Rows && cols >=0);
-    static inline DenseIndex rows(void) {return _Rows;}
+      EIGEN_UNUSED_VARIABLE(rows);
-    inline DenseIndex cols(void) const {return m_cols;}
+    }
-    inline void conservativeResize(DenseIndex size, DenseIndex, DenseIndex nbCols)
+    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
      : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(_Rows*other.m_cols))
      , m_cols(other.m_cols)
    {
      internal::smart_copy(other.m_data, other.m_data+_Rows*m_cols, m_data);
    }
    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
    {
      if (this != &other)
      {
        DenseStorage tmp(other);
        this->swap(tmp);
      }
      return *this;
    }    
 #if EIGEN_HAS_RVALUE_REFERENCES
    EIGEN_DEVICE_FUNC
    DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
      : m_data(std::move(other.m_data))
      , m_cols(std::move(other.m_cols))
    {
      other.m_data = nullptr;
      other.m_cols = 0;
    }
    EIGEN_DEVICE_FUNC
    DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
    {
      using std::swap;
      swap(m_data, other.m_data);
      swap(m_cols, other.m_cols);
      return *this;
    }
 #endif
    EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
    EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
    EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
    EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
    EIGEN_DEVICE_FUNC void conservativeResize(Index size, Index, Index cols)
    {
      m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, _Rows*m_cols);
-      m_cols = nbCols;
+      m_cols = cols;
    }
-    EIGEN_STRONG_INLINE void resize(DenseIndex size, DenseIndex, DenseIndex nbCols)
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index, Index cols)
    {
      if(size != _Rows*m_cols)
      {
@ -292,32 +479,69 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
          m_data = 0;
        EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
      }
-      m_cols = nbCols;
+      m_cols = cols;
    }
-    inline const T *data() const { return m_data; }
+    EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
-    inline T *data() { return m_data; }
+    EIGEN_DEVICE_FUNC T *data() { return m_data; }
 };
 // matrix with dynamic height and fixed width (so that matrix has dynamic size).
 template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dynamic, _Cols, _Options>
 {
    T *m_data;
-    DenseIndex m_rows;
+    Index m_rows;
  public:
-    inline DenseStorage() : m_data(0), m_rows(0) {}
+    EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0) {}
-    inline DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
+    explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
-    inline DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows)
+    EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows)
-    { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
+    {
-    inline ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
+      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
-    inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
+      eigen_internal_assert(size==rows*cols && rows>=0 && cols == _Cols);
-    inline DenseIndex rows(void) const {return m_rows;}
+      EIGEN_UNUSED_VARIABLE(cols);
-    static inline DenseIndex cols(void) {return _Cols;}
+    }
-    inline void conservativeResize(DenseIndex size, DenseIndex nbRows, DenseIndex)
+    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
      : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*_Cols))
      , m_rows(other.m_rows)
    {
      internal::smart_copy(other.m_data, other.m_data+other.m_rows*_Cols, m_data);
    }
    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
    {
      if (this != &other)
      {
        DenseStorage tmp(other);
        this->swap(tmp);
      }
      return *this;
    }    
 #if EIGEN_HAS_RVALUE_REFERENCES
    EIGEN_DEVICE_FUNC
    DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
      : m_data(std::move(other.m_data))
      , m_rows(std::move(other.m_rows))
    {
      other.m_data = nullptr;
      other.m_rows = 0;
    }
    EIGEN_DEVICE_FUNC
    DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
    {
      using std::swap;
      swap(m_data, other.m_data);
      swap(m_rows, other.m_rows);
      return *this;
    }
 #endif
    EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
    EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
    EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
    EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
    void conservativeResize(Index size, Index rows, Index)
    {
      m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);
-      m_rows = nbRows;
+      m_rows = rows;
    }
-    EIGEN_STRONG_INLINE void resize(DenseIndex size, DenseIndex nbRows, DenseIndex)
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index rows, Index)
    {
      if(size != m_rows*_Cols)
      {
@ -328,10 +552,10 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
          m_data = 0;
        EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
      }
-      m_rows = nbRows;
+      m_rows = rows;
    }
-    inline const T *data() const { return m_data; }
+    EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
-    inline T *data() { return m_data; }
+    EIGEN_DEVICE_FUNC T *data() { return m_data; }
 };
 } // end namespace Eigen
--- a/eigenlib/Eigen/src/Core/Diagonal.h
+++ b/eigenlib/Eigen/src/Core/Diagonal.h
@ -37,7 +37,7 @@ template<typename MatrixType, int DiagIndex>
 struct traits<Diagonal<MatrixType,DiagIndex> >
 : traits<MatrixType>
 {
-  typedef typename nested<MatrixType>::type MatrixTypeNested;
+  typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
  typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
  typedef typename MatrixType::StorageKind StorageKind;
  enum {
@ -52,8 +52,7 @@ struct traits<Diagonal<MatrixType,DiagIndex> >
                                                 MatrixType::MaxColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))),
    MaxColsAtCompileTime = 1,
    MaskLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
-    Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit,
+    Flags = (unsigned int)_MatrixTypeNested::Flags & (RowMajorBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, // FIXME DirectAccessBit should not be handled by expressions
    CoeffReadCost = _MatrixTypeNested::CoeffReadCost,
    MatrixTypeOuterStride = outer_stride_at_compile_time<MatrixType>::ret,
    InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1,
    OuterStrideAtCompileTime = 0
@ -70,20 +69,28 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
    typedef typename internal::dense_xpr_base<Diagonal>::type Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal)
-    inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {}
+    EIGEN_DEVICE_FUNC
    explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {}
    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)
    EIGEN_DEVICE_FUNC
    inline Index rows() const
-    { return m_index.value()<0 ? (std::min<Index>)(m_matrix.cols(),m_matrix.rows()+m_index.value()) : (std::min<Index>)(m_matrix.rows(),m_matrix.cols()-m_index.value()); }
+    {
      return m_index.value()<0 ? numext::mini<Index>(m_matrix.cols(),m_matrix.rows()+m_index.value())
                               : numext::mini<Index>(m_matrix.rows(),m_matrix.cols()-m_index.value());
    }
    EIGEN_DEVICE_FUNC
    inline Index cols() const { return 1; }
    EIGEN_DEVICE_FUNC
    inline Index innerStride() const
    {
      return m_matrix.outerStride() + 1;
    }
    EIGEN_DEVICE_FUNC
    inline Index outerStride() const
    {
      return 0;
@ -95,62 +102,75 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
                       const Scalar
                     >::type ScalarWithConstIfNotLvalue;
-    inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
+    EIGEN_DEVICE_FUNC
-    inline const Scalar* data() const { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
+    inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.coeffRef(rowOffset(), colOffset())); }
    EIGEN_DEVICE_FUNC
    inline const Scalar* data() const { return &(m_matrix.coeffRef(rowOffset(), colOffset())); }
    EIGEN_DEVICE_FUNC
    inline Scalar& coeffRef(Index row, Index)
    {
      EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
-      return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset());
+      return m_matrix.coeffRef(row+rowOffset(), row+colOffset());
    }
    EIGEN_DEVICE_FUNC
    inline const Scalar& coeffRef(Index row, Index) const
    {
-      return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset());
+      return m_matrix.coeffRef(row+rowOffset(), row+colOffset());
    }
    EIGEN_DEVICE_FUNC
    inline CoeffReturnType coeff(Index row, Index) const
    {
      return m_matrix.coeff(row+rowOffset(), row+colOffset());
    }
    EIGEN_DEVICE_FUNC
    inline Scalar& coeffRef(Index idx)
    {
      EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
-      return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset());
+      return m_matrix.coeffRef(idx+rowOffset(), idx+colOffset());
    }
    EIGEN_DEVICE_FUNC
    inline const Scalar& coeffRef(Index idx) const
    {
-      return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset());
+      return m_matrix.coeffRef(idx+rowOffset(), idx+colOffset());
    }
    EIGEN_DEVICE_FUNC
    inline CoeffReturnType coeff(Index idx) const
    {
      return m_matrix.coeff(idx+rowOffset(), idx+colOffset());
    }
-    const typename internal::remove_all<typename MatrixType::Nested>::type& 
+    EIGEN_DEVICE_FUNC
    inline const typename internal::remove_all<typename MatrixType::Nested>::type& 
    nestedExpression() const 
    {
      return m_matrix;
    }
-    int index() const
+    EIGEN_DEVICE_FUNC
    inline Index index() const
    {
      return m_index.value();
    }
  protected:
-    typename MatrixType::Nested m_matrix;
+    typename internal::ref_selector<MatrixType>::non_const_type m_matrix;
    const internal::variable_if_dynamicindex<Index, DiagIndex> m_index;
  private:
    // some compilers may fail to optimize std::max etc in case of compile-time constants...
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index absDiagIndex() const { return m_index.value()>0 ? m_index.value() : -m_index.value(); }
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); }
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value()>0 ? m_index.value() : 0; }
-    // triger a compile time error is someone try to call packet
+    // trigger a compile-time error if someone try to call packet
    template<int LoadMode> typename MatrixType::PacketReturnType packet(Index) const;
    template<int LoadMode> typename MatrixType::PacketReturnType packet(Index,Index) const;
 };
@ -167,7 +187,7 @@ template<typename Derived>
 inline typename MatrixBase<Derived>::DiagonalReturnType
 MatrixBase<Derived>::diagonal()
 {
-  return derived();
+  return DiagonalReturnType(derived());
 }
 /** This is the const version of diagonal(). */
@ -216,20 +236,20 @@ MatrixBase<Derived>::diagonal(Index index) const
  *
  * \sa MatrixBase::diagonal(), class Diagonal */
 template<typename Derived>
-template<int Index>
+template<int Index_>
-inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<Index>::Type
+inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<Index_>::Type
 MatrixBase<Derived>::diagonal()
 {
-  return derived();
+  return typename DiagonalIndexReturnType<Index_>::Type(derived());
 }
 /** This is the const version of diagonal<int>(). */
 template<typename Derived>
-template<int Index>
+template<int Index_>
-inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<Index>::Type
+inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<Index_>::Type
 MatrixBase<Derived>::diagonal() const
 {
-  return derived();
+  return typename ConstDiagonalIndexReturnType<Index_>::Type(derived());
 }
 } // end namespace Eigen
--- a/eigenlib/Eigen/src/Core/DiagonalMatrix.h
+++ b/eigenlib/Eigen/src/Core/DiagonalMatrix.h
@ -22,7 +22,7 @@ class DiagonalBase : public EigenBase<Derived>
    typedef typename DiagonalVectorType::Scalar Scalar;
    typedef typename DiagonalVectorType::RealScalar RealScalar;
    typedef typename internal::traits<Derived>::StorageKind StorageKind;
-    typedef typename internal::traits<Derived>::Index Index;
+    typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
    enum {
      RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
@ -30,79 +30,61 @@ class DiagonalBase : public EigenBase<Derived>
      MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
      MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
      IsVectorAtCompileTime = 0,
-      Flags = 0
+      Flags = NoPreferredStorageOrderBit
    };
    typedef Matrix<Scalar, RowsAtCompileTime, ColsAtCompileTime, 0, MaxRowsAtCompileTime, MaxColsAtCompileTime> DenseMatrixType;
    typedef DenseMatrixType DenseType;
    typedef DiagonalMatrix<Scalar,DiagonalVectorType::SizeAtCompileTime,DiagonalVectorType::MaxSizeAtCompileTime> PlainObject;
    EIGEN_DEVICE_FUNC
    inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
    EIGEN_DEVICE_FUNC
    inline Derived& derived() { return *static_cast<Derived*>(this); }
    EIGEN_DEVICE_FUNC
    DenseMatrixType toDenseMatrix() const { return derived(); }
-    template<typename DenseDerived>
+    
-    void evalTo(MatrixBase<DenseDerived> &other) const;
+    EIGEN_DEVICE_FUNC
    template<typename DenseDerived>
    void addTo(MatrixBase<DenseDerived> &other) const
    { other.diagonal() += diagonal(); }
    template<typename DenseDerived>
    void subTo(MatrixBase<DenseDerived> &other) const
    { other.diagonal() -= diagonal(); }
    inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); }
    EIGEN_DEVICE_FUNC
    inline DiagonalVectorType& diagonal() { return derived().diagonal(); }
    EIGEN_DEVICE_FUNC
    inline Index rows() const { return diagonal().size(); }
    EIGEN_DEVICE_FUNC
    inline Index cols() const { return diagonal().size(); }
    /** \returns the diagonal matrix product of \c *this by the matrix \a matrix.
      */
    template<typename MatrixDerived>
-    const DiagonalProduct<MatrixDerived, Derived, OnTheLeft>
+    EIGEN_DEVICE_FUNC
    const Product<Derived,MatrixDerived,LazyProduct>
    operator*(const MatrixBase<MatrixDerived> &matrix) const
    {
-      return DiagonalProduct<MatrixDerived, Derived, OnTheLeft>(matrix.derived(), derived());
+      return Product<Derived, MatrixDerived, LazyProduct>(derived(),matrix.derived());
    }
-    inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> >
+    typedef DiagonalWrapper<const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> > InverseReturnType;
    EIGEN_DEVICE_FUNC
    inline const InverseReturnType
    inverse() const
    {
-      return diagonal().cwiseInverse();
+      return InverseReturnType(diagonal().cwiseInverse());
    }
-    inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DiagonalVectorType> >
+    EIGEN_DEVICE_FUNC
    inline const DiagonalWrapper<const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DiagonalVectorType,Scalar,product) >
    operator*(const Scalar& scalar) const
    {
-      return diagonal() * scalar;
+      return DiagonalWrapper<const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DiagonalVectorType,Scalar,product) >(diagonal() * scalar);
    }
-    friend inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DiagonalVectorType> >
+    EIGEN_DEVICE_FUNC
    friend inline const DiagonalWrapper<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,DiagonalVectorType,product) >
    operator*(const Scalar& scalar, const DiagonalBase& other)
    {
-      return other.diagonal() * scalar;
+      return DiagonalWrapper<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,DiagonalVectorType,product) >(scalar * other.diagonal());
    }
    #ifdef EIGEN2_SUPPORT
    template<typename OtherDerived>
    bool isApprox(const DiagonalBase<OtherDerived>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
    {
      return diagonal().isApprox(other.diagonal(), precision);
    }
    template<typename OtherDerived>
    bool isApprox(const MatrixBase<OtherDerived>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
    {
      return toDenseMatrix().isApprox(other, precision);
    }
    #endif
 };
 template<typename Derived>
 template<typename DenseDerived>
 void DiagonalBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
 {
  other.setZero();
  other.diagonal() = diagonal();
 }
 #endif
 /** \class DiagonalMatrix
@ -124,10 +106,9 @@ struct traits<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime> >
 : traits<Matrix<_Scalar,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
 {
  typedef Matrix<_Scalar,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1> DiagonalVectorType;
-  typedef Dense StorageKind;
+  typedef DiagonalShape StorageKind;
  typedef DenseIndex Index;
  enum {
-    Flags = LvalueBit
+    Flags = LvalueBit | NoPreferredStorageOrderBit
  };
 };
 }
@ -141,7 +122,7 @@ class DiagonalMatrix
    typedef const DiagonalMatrix& Nested;
    typedef _Scalar Scalar;
    typedef typename internal::traits<DiagonalMatrix>::StorageKind StorageKind;
-    typedef typename internal::traits<DiagonalMatrix>::Index Index;
+    typedef typename internal::traits<DiagonalMatrix>::StorageIndex StorageIndex;
    #endif
  protected:
@ -151,24 +132,31 @@ class DiagonalMatrix
  public:
    /** const version of diagonal(). */
    EIGEN_DEVICE_FUNC
    inline const DiagonalVectorType& diagonal() const { return m_diagonal; }
    /** \returns a reference to the stored vector of diagonal coefficients. */
    EIGEN_DEVICE_FUNC
    inline DiagonalVectorType& diagonal() { return m_diagonal; }
    /** Default constructor without initialization */
    EIGEN_DEVICE_FUNC
    inline DiagonalMatrix() {}
    /** Constructs a diagonal matrix with given dimension  */
-    inline DiagonalMatrix(Index dim) : m_diagonal(dim) {}
+    EIGEN_DEVICE_FUNC
    explicit inline DiagonalMatrix(Index dim) : m_diagonal(dim) {}
    /** 2D constructor. */
    EIGEN_DEVICE_FUNC
    inline DiagonalMatrix(const Scalar& x, const Scalar& y) : m_diagonal(x,y) {}
    /** 3D constructor. */
    EIGEN_DEVICE_FUNC
    inline DiagonalMatrix(const Scalar& x, const Scalar& y, const Scalar& z) : m_diagonal(x,y,z) {}
    /** Copy constructor. */
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    inline DiagonalMatrix(const DiagonalBase<OtherDerived>& other) : m_diagonal(other.diagonal()) {}
    #ifndef EIGEN_PARSED_BY_DOXYGEN
@ -178,11 +166,13 @@ class DiagonalMatrix
    /** generic constructor from expression of the diagonal coefficients */
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    explicit inline DiagonalMatrix(const MatrixBase<OtherDerived>& other) : m_diagonal(other)
    {}
    /** Copy operator. */
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    DiagonalMatrix& operator=(const DiagonalBase<OtherDerived>& other)
    {
      m_diagonal = other.diagonal();
@ -193,6 +183,7 @@ class DiagonalMatrix
    /** This is a special case of the templated operator=. Its purpose is to
      * prevent a default operator= from hiding the templated operator=.
      */
    EIGEN_DEVICE_FUNC
    DiagonalMatrix& operator=(const DiagonalMatrix& other)
    {
      m_diagonal = other.diagonal();
@ -201,14 +192,19 @@ class DiagonalMatrix
    #endif
    /** Resizes to given size. */
    EIGEN_DEVICE_FUNC
    inline void resize(Index size) { m_diagonal.resize(size); }
    /** Sets all coefficients to zero. */
    EIGEN_DEVICE_FUNC
    inline void setZero() { m_diagonal.setZero(); }
    /** Resizes and sets all coefficients to zero. */
    EIGEN_DEVICE_FUNC
    inline void setZero(Index size) { m_diagonal.setZero(size); }
    /** Sets this matrix to be the identity matrix of the current size. */
    EIGEN_DEVICE_FUNC
    inline void setIdentity() { m_diagonal.setOnes(); }
    /** Sets this matrix to be the identity matrix of the given size. */
    EIGEN_DEVICE_FUNC
    inline void setIdentity(Index size) { m_diagonal.setOnes(size); }
 };
@ -232,14 +228,15 @@ struct traits<DiagonalWrapper<_DiagonalVectorType> >
 {
  typedef _DiagonalVectorType DiagonalVectorType;
  typedef typename DiagonalVectorType::Scalar Scalar;
-  typedef typename DiagonalVectorType::Index Index;
+  typedef typename DiagonalVectorType::StorageIndex StorageIndex;
-  typedef typename DiagonalVectorType::StorageKind StorageKind;
+  typedef DiagonalShape StorageKind;
  typedef typename traits<DiagonalVectorType>::XprKind XprKind;
  enum {
    RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
    ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
-    MaxRowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
+    MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
-    MaxColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
+    MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
-    Flags =  traits<DiagonalVectorType>::Flags & LvalueBit
+    Flags =  (traits<DiagonalVectorType>::Flags & LvalueBit) | NoPreferredStorageOrderBit
  };
 };
 }
@ -255,9 +252,11 @@ class DiagonalWrapper
    #endif
    /** Constructor from expression of diagonal coefficients to wrap. */
-    inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {}
+    EIGEN_DEVICE_FUNC
    explicit inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {}
    /** \returns a const reference to the wrapped expression of diagonal coefficients. */
    EIGEN_DEVICE_FUNC
    const DiagonalVectorType& diagonal() const { return m_diagonal; }
  protected:
@ -277,7 +276,7 @@ template<typename Derived>
 inline const DiagonalWrapper<const Derived>
 MatrixBase<Derived>::asDiagonal() const
 {
-  return derived();
+  return DiagonalWrapper<const Derived>(derived());
 }
 /** \returns true if *this is approximately equal to a diagonal matrix,
@ -291,12 +290,11 @@ MatrixBase<Derived>::asDiagonal() const
 template<typename Derived>
 bool MatrixBase<Derived>::isDiagonal(const RealScalar& prec) const
 {
  using std::abs;
  if(cols() != rows()) return false;
  RealScalar maxAbsOnDiagonal = static_cast<RealScalar>(-1);
  for(Index j = 0; j < cols(); ++j)
  {
-    RealScalar absOnDiagonal = abs(coeff(j,j));
+    RealScalar absOnDiagonal = numext::abs(coeff(j,j));
    if(absOnDiagonal > maxAbsOnDiagonal) maxAbsOnDiagonal = absOnDiagonal;
  }
  for(Index j = 0; j < cols(); ++j)
@ -308,6 +306,38 @@ bool MatrixBase<Derived>::isDiagonal(const RealScalar& prec) const
  return true;
 }
 namespace internal {
 template<> struct storage_kind_to_shape<DiagonalShape> { typedef DiagonalShape Shape; };
 struct Diagonal2Dense {};
 template<> struct AssignmentKind<DenseShape,DiagonalShape> { typedef Diagonal2Dense Kind; };
 // Diagonal matrix to Dense assignment
 template< typename DstXprType, typename SrcXprType, typename Functor>
 struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Dense>
 {
  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
  {
    Index dstRows = src.rows();
    Index dstCols = src.cols();
    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
      dst.resize(dstRows, dstCols);
    dst.setZero();
    dst.diagonal() = src.diagonal();
  }
  static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
  { dst.diagonal() += src.diagonal(); }
  static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
  { dst.diagonal() -= src.diagonal(); }
 };
 } // namespace internal
 } // end namespace Eigen
 #endif // EIGEN_DIAGONALMATRIX_H
--- a/eigenlib/Eigen/src/Core/DiagonalProduct.h
+++ b/eigenlib/Eigen/src/Core/DiagonalProduct.h
@ -13,116 +13,14 @@
 namespace Eigen { 
 namespace internal {
 template<typename MatrixType, typename DiagonalType, int ProductOrder>
 struct traits<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> >
 : traits<MatrixType>
 {
  typedef typename scalar_product_traits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
  enum {
    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
    ColsAtCompileTime = MatrixType::ColsAtCompileTime,
    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
    _StorageOrder = MatrixType::Flags & RowMajorBit ? RowMajor : ColMajor,
    _ScalarAccessOnDiag =  !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)
                          ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),
    _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
    // FIXME currently we need same types, but in the future the next rule should be the one
    //_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))),
    _Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))),
    _LinearAccessMask = (RowsAtCompileTime==1 || ColsAtCompileTime==1) ? LinearAccessBit : 0,
    Flags = ((HereditaryBits|_LinearAccessMask|AlignedBit) & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0),//(int(MatrixType::Flags)&int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit),
    CoeffReadCost = NumTraits<Scalar>::MulCost + MatrixType::CoeffReadCost + DiagonalType::DiagonalVectorType::CoeffReadCost
  };
 };
 }
 template<typename MatrixType, typename DiagonalType, int ProductOrder>
 class DiagonalProduct : internal::no_assignment_operator,
                        public MatrixBase<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> >
 {
  public:
    typedef MatrixBase<DiagonalProduct> Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(DiagonalProduct)
    inline DiagonalProduct(const MatrixType& matrix, const DiagonalType& diagonal)
      : m_matrix(matrix), m_diagonal(diagonal)
    {
      eigen_assert(diagonal.diagonal().size() == (ProductOrder == OnTheLeft ? matrix.rows() : matrix.cols()));
    }
    EIGEN_STRONG_INLINE Index rows() const { return m_matrix.rows(); }
    EIGEN_STRONG_INLINE Index cols() const { return m_matrix.cols(); }
    EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
    {
      return m_diagonal.diagonal().coeff(ProductOrder == OnTheLeft ? row : col) * m_matrix.coeff(row, col);
    }
    EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
    {
      enum {
        StorageOrder = int(MatrixType::Flags) & RowMajorBit ? RowMajor : ColMajor
      };
      return coeff(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
    }
    template<int LoadMode>
    EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
    {
      enum {
        StorageOrder = Flags & RowMajorBit ? RowMajor : ColMajor
      };
      const Index indexInDiagonalVector = ProductOrder == OnTheLeft ? row : col;
      return packet_impl<LoadMode>(row,col,indexInDiagonalVector,typename internal::conditional<
        ((int(StorageOrder) == RowMajor && int(ProductOrder) == OnTheLeft)
       ||(int(StorageOrder) == ColMajor && int(ProductOrder) == OnTheRight)), internal::true_type, internal::false_type>::type());
    }
    template<int LoadMode>
    EIGEN_STRONG_INLINE PacketScalar packet(Index idx) const
    {
      enum {
        StorageOrder = int(MatrixType::Flags) & RowMajorBit ? RowMajor : ColMajor
      };
      return packet<LoadMode>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
    }
  protected:
    template<int LoadMode>
    EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::true_type) const
    {
      return internal::pmul(m_matrix.template packet<LoadMode>(row, col),
                     internal::pset1<PacketScalar>(m_diagonal.diagonal().coeff(id)));
    }
    template<int LoadMode>
    EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::false_type) const
    {
      enum {
        InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
        DiagonalVectorPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned)
      };
      return internal::pmul(m_matrix.template packet<LoadMode>(row, col),
                     m_diagonal.diagonal().template packet<DiagonalVectorPacketLoadMode>(id));
    }
    typename MatrixType::Nested m_matrix;
    typename DiagonalType::Nested m_diagonal;
 };
 /** \returns the diagonal matrix product of \c *this by the diagonal matrix \a diagonal.
  */
 template<typename Derived>
 template<typename DiagonalDerived>
-inline const DiagonalProduct<Derived, DiagonalDerived, OnTheRight>
+inline const Product<Derived, DiagonalDerived, LazyProduct>
 MatrixBase<Derived>::operator*(const DiagonalBase<DiagonalDerived> &a_diagonal) const
 {
-  return DiagonalProduct<Derived, DiagonalDerived, OnTheRight>(derived(), a_diagonal.derived());
+  return Product<Derived, DiagonalDerived, LazyProduct>(derived(),a_diagonal.derived());
 }
 } // end namespace Eigen
--- a/eigenlib/Eigen/src/Core/Dot.h
+++ b/eigenlib/Eigen/src/Core/Dot.h
@ -28,26 +28,31 @@ template<typename T, typename U,
 >
 struct dot_nocheck
 {
-  typedef typename scalar_product_traits<typename traits<T>::Scalar,typename traits<U>::Scalar>::ReturnType ResScalar;
+  typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
  typedef typename conj_prod::result_type ResScalar;
  EIGEN_DEVICE_FUNC
  static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
  {
-    return a.template binaryExpr<scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> >(b).sum();
+    return a.template binaryExpr<conj_prod>(b).sum();
  }
 };
 template<typename T, typename U>
 struct dot_nocheck<T, U, true>
 {
-  typedef typename scalar_product_traits<typename traits<T>::Scalar,typename traits<U>::Scalar>::ReturnType ResScalar;
+  typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
  typedef typename conj_prod::result_type ResScalar;
  EIGEN_DEVICE_FUNC
  static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
  {
-    return a.transpose().template binaryExpr<scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> >(b).sum();
+    return a.transpose().template binaryExpr<conj_prod>(b).sum();
  }
 };
 } // end namespace internal
-/** \returns the dot product of *this with other.
+/** \fn MatrixBase::dot
  * \returns the dot product of *this with other.
  *
  * \only_for_vectors
  *
@ -59,55 +64,30 @@ struct dot_nocheck<T, U, true>
  */
 template<typename Derived>
 template<typename OtherDerived>
-typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
+EIGEN_DEVICE_FUNC
 typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
 MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
  EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
 #if !(defined(EIGEN_NO_STATIC_ASSERT) && defined(EIGEN_NO_DEBUG))
  typedef internal::scalar_conj_product_op<Scalar,typename OtherDerived::Scalar> func;
  EIGEN_CHECK_BINARY_COMPATIBILIY(func,Scalar,typename OtherDerived::Scalar);
-
+#endif
  eigen_assert(size() == other.size());
  return internal::dot_nocheck<Derived,OtherDerived>::run(*this, other);
 }
 #ifdef EIGEN2_SUPPORT
 /** \returns the dot product of *this with other, with the Eigen2 convention that the dot product is linear in the first variable
  * (conjugating the second variable). Of course this only makes a difference in the complex case.
  *
  * This method is only available in EIGEN2_SUPPORT mode.
  *
  * \only_for_vectors
  *
  * \sa dot()
  */
 template<typename Derived>
 template<typename OtherDerived>
 typename internal::traits<Derived>::Scalar
 MatrixBase<Derived>::eigen2_dot(const MatrixBase<OtherDerived>& other) const
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
  EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
  EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),
    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
  eigen_assert(size() == other.size());
  return internal::dot_nocheck<OtherDerived,Derived>::run(other,*this);
 }
 #endif
 //---------- implementation of L2 norm and related functions ----------
 /** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the Frobenius norm.
  * In both cases, it consists in the sum of the square of all the matrix entries.
  * For vectors, this is also equals to the dot product of \c *this with itself.
  *
-  * \sa dot(), norm()
+  * \sa dot(), norm(), lpNorm()
  */
 template<typename Derived>
 EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const
@ -119,16 +99,18 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
  * In both cases, it consists in the square root of the sum of the square of all the matrix entries.
  * For vectors, this is also equals to the square root of the dot product of \c *this with itself.
  *
-  * \sa dot(), squaredNorm()
+  * \sa lpNorm(), dot(), squaredNorm()
  */
 template<typename Derived>
 inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
 {
-  using std::sqrt;
+  return numext::sqrt(squaredNorm());
  return sqrt(squaredNorm());
 }
-/** \returns an expression of the quotient of *this by its own norm.
+/** \returns an expression of the quotient of \c *this by its own norm.
  *
  * \warning If the input vector is too small (i.e., this->norm()==0),
  *          then this function returns a copy of the input.
  *
  * \only_for_vectors
  *
@ -138,22 +120,77 @@ template<typename Derived>
 inline const typename MatrixBase<Derived>::PlainObject
 MatrixBase<Derived>::normalized() const
 {
-  typedef typename internal::nested<Derived>::type Nested;
+  typedef typename internal::nested_eval<Derived,2>::type _Nested;
  typedef typename internal::remove_reference<Nested>::type _Nested;
  _Nested n(derived());
-  return n / n.norm();
+  RealScalar z = n.squaredNorm();
  // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
  if(z>RealScalar(0))
    return n / numext::sqrt(z);
  else
    return n;
 }
 /** Normalizes the vector, i.e. divides it by its own norm.
  *
  * \only_for_vectors
  *
  * \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged.
  *
  * \sa norm(), normalized()
  */
 template<typename Derived>
 inline void MatrixBase<Derived>::normalize()
 {
-  *this /= norm();
+  RealScalar z = squaredNorm();
  // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
  if(z>RealScalar(0))
    derived() /= numext::sqrt(z);
 }
 /** \returns an expression of the quotient of \c *this by its own norm while avoiding underflow and overflow.
  *
  * \only_for_vectors
  *
  * This method is analogue to the normalized() method, but it reduces the risk of
  * underflow and overflow when computing the norm.
  *
  * \warning If the input vector is too small (i.e., this->norm()==0),
  *          then this function returns a copy of the input.
  *
  * \sa stableNorm(), stableNormalize(), normalized()
  */
 template<typename Derived>
 inline const typename MatrixBase<Derived>::PlainObject
 MatrixBase<Derived>::stableNormalized() const
 {
  typedef typename internal::nested_eval<Derived,3>::type _Nested;
  _Nested n(derived());
  RealScalar w = n.cwiseAbs().maxCoeff();
  RealScalar z = (n/w).squaredNorm();
  if(z>RealScalar(0))
    return n / (numext::sqrt(z)*w);
  else
    return n;
 }
 /** Normalizes the vector while avoid underflow and overflow
  *
  * \only_for_vectors
  *
  * This method is analogue to the normalize() method, but it reduces the risk of
  * underflow and overflow when computing the norm.
  *
  * \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged.
  *
  * \sa stableNorm(), stableNormalized(), normalize()
  */
 template<typename Derived>
 inline void MatrixBase<Derived>::stableNormalize()
 {
  RealScalar w = cwiseAbs().maxCoeff();
  RealScalar z = (derived()/w).squaredNorm();
  if(z>RealScalar(0))
    derived() /= numext::sqrt(z)*w;
 }
 //---------- implementation of other norms ----------
@ -164,9 +201,10 @@ template<typename Derived, int p>
 struct lpNorm_selector
 {
  typedef typename NumTraits<typename traits<Derived>::Scalar>::Real RealScalar;
  EIGEN_DEVICE_FUNC
  static inline RealScalar run(const MatrixBase<Derived>& m)
  {
-    using std::pow;
+    EIGEN_USING_STD_MATH(pow)
    return pow(m.cwiseAbs().array().pow(p).sum(), RealScalar(1)/p);
  }
 };
@ -174,6 +212,7 @@ struct lpNorm_selector
 template<typename Derived>
 struct lpNorm_selector<Derived, 1>
 {
  EIGEN_DEVICE_FUNC
  static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
  {
    return m.cwiseAbs().sum();
@ -183,6 +222,7 @@ struct lpNorm_selector<Derived, 1>
 template<typename Derived>
 struct lpNorm_selector<Derived, 2>
 {
  EIGEN_DEVICE_FUNC
  static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
  {
    return m.norm();
@ -192,23 +232,35 @@ struct lpNorm_selector<Derived, 2>
 template<typename Derived>
 struct lpNorm_selector<Derived, Infinity>
 {
-  static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
+  typedef typename NumTraits<typename traits<Derived>::Scalar>::Real RealScalar;
  EIGEN_DEVICE_FUNC
  static inline RealScalar run(const MatrixBase<Derived>& m)
  {
    if(Derived::SizeAtCompileTime==0 || (Derived::SizeAtCompileTime==Dynamic && m.size()==0))
      return RealScalar(0);
    return m.cwiseAbs().maxCoeff();
  }
 };
 } // end namespace internal
-/** \returns the \f$ \ell^p \f$ norm of *this, that is, returns the p-th root of the sum of the p-th powers of the absolute values
+/** \returns the \b coefficient-wise \f$ \ell^p \f$ norm of \c *this, that is, returns the p-th root of the sum of the p-th powers of the absolute values
-  *          of the coefficients of *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^\infty \f$
+  *          of the coefficients of \c *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^\infty \f$
-  *          norm, that is the maximum of the absolute values of the coefficients of *this.
+  *          norm, that is the maximum of the absolute values of the coefficients of \c *this.
  *
  * In all cases, if \c *this is empty, then the value 0 is returned.
  *
  * \note For matrices, this function does not compute the <a href="https://en.wikipedia.org/wiki/Operator_norm">operator-norm</a>. That is, if \c *this is a matrix, then its coefficients are interpreted as a 1D vector. Nonetheless, you can easily compute the 1-norm and \f$\infty\f$-norm matrix operator norms using \link TutorialReductionsVisitorsBroadcastingReductionsNorm partial reductions \endlink.
  *
  * \sa norm()
  */
 template<typename Derived>
 template<int p>
 #ifndef EIGEN_PARSED_BY_DOXYGEN
 inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
 #else
 MatrixBase<Derived>::RealScalar
 #endif
 MatrixBase<Derived>::lpNorm() const
 {
  return internal::lpNorm_selector<Derived, p>::run(*this);
@ -227,8 +279,8 @@ template<typename OtherDerived>
 bool MatrixBase<Derived>::isOrthogonal
 (const MatrixBase<OtherDerived>& other, const RealScalar& prec) const
 {
-  typename internal::nested<Derived,2>::type nested(derived());
+  typename internal::nested_eval<Derived,2>::type nested(derived());
-  typename internal::nested<OtherDerived,2>::type otherNested(other.derived());
+  typename internal::nested_eval<OtherDerived,2>::type otherNested(other.derived());
  return numext::abs2(nested.dot(otherNested)) <= prec * prec * nested.squaredNorm() * otherNested.squaredNorm();
 }
@ -246,13 +298,13 @@ bool MatrixBase<Derived>::isOrthogonal
 template<typename Derived>
 bool MatrixBase<Derived>::isUnitary(const RealScalar& prec) const
 {
-  typename Derived::Nested nested(derived());
+  typename internal::nested_eval<Derived,1>::type self(derived());
  for(Index i = 0; i < cols(); ++i)
  {
-    if(!internal::isApprox(nested.col(i).squaredNorm(), static_cast<RealScalar>(1), prec))
+    if(!internal::isApprox(self.col(i).squaredNorm(), static_cast<RealScalar>(1), prec))
      return false;
    for(Index j = 0; j < i; ++j)
-      if(!internal::isMuchSmallerThan(nested.col(i).dot(nested.col(j)), static_cast<Scalar>(1), prec))
+      if(!internal::isMuchSmallerThan(self.col(i).dot(self.col(j)), static_cast<Scalar>(1), prec))
        return false;
  }
  return true;
--- a/eigenlib/Eigen/src/Core/EigenBase.h
+++ b/eigenlib/Eigen/src/Core/EigenBase.h
@ -13,7 +13,9 @@
 namespace Eigen {
-/** Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T).
+/** \class EigenBase
  * 
  * Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T).
  *
  * In other words, an EigenBase object is an object that can be copied into a MatrixBase.
  *
@ -21,39 +23,57 @@ namespace Eigen {
  *
  * Notice that this class is trivial, it is only used to disambiguate overloaded functions.
  *
-  * \sa \ref TopicClassHierarchy
+  * \sa \blank \ref TopicClassHierarchy
  */
 template<typename Derived> struct EigenBase
 {
 //   typedef typename internal::plain_matrix_type<Derived>::type PlainObject;
  /** \brief The interface type of indices
    * \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE.
    * \deprecated Since Eigen 3.3, its usage is deprecated. Use Eigen::Index instead.
    * \sa StorageIndex, \ref TopicPreprocessorDirectives.
    */
  typedef Eigen::Index Index;
  // FIXME is it needed?
  typedef typename internal::traits<Derived>::StorageKind StorageKind;
  typedef typename internal::traits<Derived>::Index Index;
  /** \returns a reference to the derived object */
  EIGEN_DEVICE_FUNC
  Derived& derived() { return *static_cast<Derived*>(this); }
  /** \returns a const reference to the derived object */
  EIGEN_DEVICE_FUNC
  const Derived& derived() const { return *static_cast<const Derived*>(this); }
  EIGEN_DEVICE_FUNC
  inline Derived& const_cast_derived() const
  { return *static_cast<Derived*>(const_cast<EigenBase*>(this)); }
  EIGEN_DEVICE_FUNC
  inline const Derived& const_derived() const
  { return *static_cast<const Derived*>(this); }
  /** \returns the number of rows. \sa cols(), RowsAtCompileTime */
  EIGEN_DEVICE_FUNC
  inline Index rows() const { return derived().rows(); }
  /** \returns the number of columns. \sa rows(), ColsAtCompileTime*/
  EIGEN_DEVICE_FUNC
  inline Index cols() const { return derived().cols(); }
  /** \returns the number of coefficients, which is rows()*cols().
    * \sa rows(), cols(), SizeAtCompileTime. */
  EIGEN_DEVICE_FUNC
  inline Index size() const { return rows() * cols(); }
  /** \internal Don't use it, but do the equivalent: \code dst = *this; \endcode */
-  template<typename Dest> inline void evalTo(Dest& dst) const
+  template<typename Dest>
  EIGEN_DEVICE_FUNC
  inline void evalTo(Dest& dst) const
  { derived().evalTo(dst); }
  /** \internal Don't use it, but do the equivalent: \code dst += *this; \endcode */
-  template<typename Dest> inline void addTo(Dest& dst) const
+  template<typename Dest>
  EIGEN_DEVICE_FUNC
  inline void addTo(Dest& dst) const
  {
    // This is the default implementation,
    // derived class can reimplement it in a more optimized way.
@ -63,7 +83,9 @@ template<typename Derived> struct EigenBase
  }
  /** \internal Don't use it, but do the equivalent: \code dst -= *this; \endcode */
-  template<typename Dest> inline void subTo(Dest& dst) const
+  template<typename Dest>
  EIGEN_DEVICE_FUNC
  inline void subTo(Dest& dst) const
  {
    // This is the default implementation,
    // derived class can reimplement it in a more optimized way.
@ -73,7 +95,8 @@ template<typename Derived> struct EigenBase
  }
  /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheRight(*this); \endcode */
-  template<typename Dest> inline void applyThisOnTheRight(Dest& dst) const
+  template<typename Dest>
  EIGEN_DEVICE_FUNC inline void applyThisOnTheRight(Dest& dst) const
  {
    // This is the default implementation,
    // derived class can reimplement it in a more optimized way.
@ -81,7 +104,8 @@ template<typename Derived> struct EigenBase
  }
  /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheLeft(*this); \endcode */
-  template<typename Dest> inline void applyThisOnTheLeft(Dest& dst) const
+  template<typename Dest>
  EIGEN_DEVICE_FUNC inline void applyThisOnTheLeft(Dest& dst) const
  {
    // This is the default implementation,
    // derived class can reimplement it in a more optimized way.
@ -106,7 +130,7 @@ template<typename Derived>
 template<typename OtherDerived>
 Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
 {
-  other.derived().evalTo(derived());
+  call_assignment(derived(), other.derived());
  return derived();
 }
@ -114,7 +138,7 @@ template<typename Derived>
 template<typename OtherDerived>
 Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)
 {
-  other.derived().addTo(derived());
+  call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
  return derived();
 }
@ -122,7 +146,7 @@ template<typename Derived>
 template<typename OtherDerived>
 Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other)
 {
-  other.derived().subTo(derived());
+  call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
  return derived();
 }
--- a/eigenlib/Eigen/src/Core/Flagged.h
+++ b/eigenlib/Eigen/src/Core/Flagged.h
@ -1,140 +0,0 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_FLAGGED_H
 #define EIGEN_FLAGGED_H
 namespace Eigen { 
 /** \class Flagged
  * \ingroup Core_Module
  *
  * \brief Expression with modified flags
  *
  * \param ExpressionType the type of the object of which we are modifying the flags
  * \param Added the flags added to the expression
  * \param Removed the flags removed from the expression (has priority over Added).
  *
  * This class represents an expression whose flags have been modified.
  * It is the return type of MatrixBase::flagged()
  * and most of the time this is the only way it is used.
  *
  * \sa MatrixBase::flagged()
  */
 namespace internal {
 template<typename ExpressionType, unsigned int Added, unsigned int Removed>
 struct traits<Flagged<ExpressionType, Added, Removed> > : traits<ExpressionType>
 {
  enum { Flags = (ExpressionType::Flags | Added) & ~Removed };
 };
 }
 template<typename ExpressionType, unsigned int Added, unsigned int Removed> class Flagged
  : public MatrixBase<Flagged<ExpressionType, Added, Removed> >
 {
  public:
    typedef MatrixBase<Flagged> Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(Flagged)
    typedef typename internal::conditional<internal::must_nest_by_value<ExpressionType>::ret,
        ExpressionType, const ExpressionType&>::type ExpressionTypeNested;
    typedef typename ExpressionType::InnerIterator InnerIterator;
    inline Flagged(const ExpressionType& matrix) : m_matrix(matrix) {}
    inline Index rows() const { return m_matrix.rows(); }
    inline Index cols() const { return m_matrix.cols(); }
    inline Index outerStride() const { return m_matrix.outerStride(); }
    inline Index innerStride() const { return m_matrix.innerStride(); }
    inline CoeffReturnType coeff(Index row, Index col) const
    {
      return m_matrix.coeff(row, col);
    }
    inline CoeffReturnType coeff(Index index) const
    {
      return m_matrix.coeff(index);
    }
    inline const Scalar& coeffRef(Index row, Index col) const
    {
      return m_matrix.const_cast_derived().coeffRef(row, col);
    }
    inline const Scalar& coeffRef(Index index) const
    {
      return m_matrix.const_cast_derived().coeffRef(index);
    }
    inline Scalar& coeffRef(Index row, Index col)
    {
      return m_matrix.const_cast_derived().coeffRef(row, col);
    }
    inline Scalar& coeffRef(Index index)
    {
      return m_matrix.const_cast_derived().coeffRef(index);
    }
    template<int LoadMode>
    inline const PacketScalar packet(Index row, Index col) const
    {
      return m_matrix.template packet<LoadMode>(row, col);
    }
    template<int LoadMode>
    inline void writePacket(Index row, Index col, const PacketScalar& x)
    {
      m_matrix.const_cast_derived().template writePacket<LoadMode>(row, col, x);
    }
    template<int LoadMode>
    inline const PacketScalar packet(Index index) const
    {
      return m_matrix.template packet<LoadMode>(index);
    }
    template<int LoadMode>
    inline void writePacket(Index index, const PacketScalar& x)
    {
      m_matrix.const_cast_derived().template writePacket<LoadMode>(index, x);
    }
    const ExpressionType& _expression() const { return m_matrix; }
    template<typename OtherDerived>
    typename ExpressionType::PlainObject solveTriangular(const MatrixBase<OtherDerived>& other) const;
    template<typename OtherDerived>
    void solveTriangularInPlace(const MatrixBase<OtherDerived>& other) const;
  protected:
    ExpressionTypeNested m_matrix;
 };
 /** \returns an expression of *this with added and removed flags
  *
  * This is mostly for internal use.
  *
  * \sa class Flagged
  */
 template<typename Derived>
 template<unsigned int Added,unsigned int Removed>
 inline const Flagged<Derived, Added, Removed>
 DenseBase<Derived>::flagged() const
 {
  return derived();
 }
 } // end namespace Eigen
 #endif // EIGEN_FLAGGED_H
--- a/eigenlib/Eigen/src/Core/ForceAlignedAccess.h
+++ b/eigenlib/Eigen/src/Core/ForceAlignedAccess.h
@ -39,29 +39,29 @@ template<typename ExpressionType> class ForceAlignedAccess
    typedef typename internal::dense_xpr_base<ForceAlignedAccess>::type Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(ForceAlignedAccess)
-    inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {}
+    EIGEN_DEVICE_FUNC explicit inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {}
-    inline Index rows() const { return m_expression.rows(); }
+    EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); }
-    inline Index cols() const { return m_expression.cols(); }
+    EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); }
-    inline Index outerStride() const { return m_expression.outerStride(); }
+    EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); }
-    inline Index innerStride() const { return m_expression.innerStride(); }
+    EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); }
-    inline const CoeffReturnType coeff(Index row, Index col) const
+    EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const
    {
      return m_expression.coeff(row, col);
    }
-    inline Scalar& coeffRef(Index row, Index col)
+    EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col)
    {
      return m_expression.const_cast_derived().coeffRef(row, col);
    }
-    inline const CoeffReturnType coeff(Index index) const
+    EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const
    {
      return m_expression.coeff(index);
    }
-    inline Scalar& coeffRef(Index index)
+    EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index)
    {
      return m_expression.const_cast_derived().coeffRef(index);
    }
@ -90,7 +90,7 @@ template<typename ExpressionType> class ForceAlignedAccess
      m_expression.const_cast_derived().template writePacket<Aligned>(index, x);
    }
-    operator const ExpressionType&() const { return m_expression; }
+    EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; }
  protected:
    const ExpressionType& m_expression;
@ -127,7 +127,7 @@ template<bool Enable>
 inline typename internal::add_const_on_value_type<typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type>::type
 MatrixBase<Derived>::forceAlignedAccessIf() const
 {
-  return derived();
+  return derived();  // FIXME This should not work but apparently is never used
 }
 /** \returns an expression of *this with forced aligned access if \a Enable is true.
@ -138,7 +138,7 @@ template<bool Enable>
 inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type
 MatrixBase<Derived>::forceAlignedAccessIf()
 {
-  return derived();
+  return derived();  // FIXME This should not work but apparently is never used
 }
 } // end namespace Eigen
--- a/eigenlib/Eigen/src/Core/Functors.h
+++ b/eigenlib/Eigen/src/Core/Functors.h
--- a/eigenlib/Eigen/src/Core/Fuzzy.h
+++ b/eigenlib/Eigen/src/Core/Fuzzy.h
@ -19,18 +19,19 @@ namespace internal
 template<typename Derived, typename OtherDerived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
 struct isApprox_selector
 {
  EIGEN_DEVICE_FUNC
  static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec)
  {
-    using std::min;
+    typename internal::nested_eval<Derived,2>::type nested(x);
-    typename internal::nested<Derived,2>::type nested(x);
+    typename internal::nested_eval<OtherDerived,2>::type otherNested(y);
-    typename internal::nested<OtherDerived,2>::type otherNested(y);
+    return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * numext::mini(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
    return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * (min)(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
  }
 };
 template<typename Derived, typename OtherDerived>
 struct isApprox_selector<Derived, OtherDerived, true>
 {
  EIGEN_DEVICE_FUNC
  static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar&)
  {
    return x.matrix() == y.matrix();
@ -40,6 +41,7 @@ struct isApprox_selector<Derived, OtherDerived, true>
 template<typename Derived, typename OtherDerived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
 struct isMuchSmallerThan_object_selector
 {
  EIGEN_DEVICE_FUNC
  static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec)
  {
    return x.cwiseAbs2().sum() <= numext::abs2(prec) * y.cwiseAbs2().sum();
@ -49,6 +51,7 @@ struct isMuchSmallerThan_object_selector
 template<typename Derived, typename OtherDerived>
 struct isMuchSmallerThan_object_selector<Derived, OtherDerived, true>
 {
  EIGEN_DEVICE_FUNC
  static bool run(const Derived& x, const OtherDerived&, const typename Derived::RealScalar&)
  {
    return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix();
@ -58,6 +61,7 @@ struct isMuchSmallerThan_object_selector<Derived, OtherDerived, true>
 template<typename Derived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
 struct isMuchSmallerThan_scalar_selector
 {
  EIGEN_DEVICE_FUNC
  static bool run(const Derived& x, const typename Derived::RealScalar& y, const typename Derived::RealScalar& prec)
  {
    return x.cwiseAbs2().sum() <= numext::abs2(prec * y);
@ -67,6 +71,7 @@ struct isMuchSmallerThan_scalar_selector
 template<typename Derived>
 struct isMuchSmallerThan_scalar_selector<Derived, true>
 {
  EIGEN_DEVICE_FUNC
  static bool run(const Derived& x, const typename Derived::RealScalar&, const typename Derived::RealScalar&)
  {
    return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix();
--- a/eigenlib/Eigen/src/Core/GeneralProduct.h
+++ b/eigenlib/Eigen/src/Core/GeneralProduct.h
@ -11,29 +11,7 @@
 #ifndef EIGEN_GENERAL_PRODUCT_H
 #define EIGEN_GENERAL_PRODUCT_H
-namespace Eigen { 
+namespace Eigen {
 /** \class GeneralProduct
  * \ingroup Core_Module
  *
  * \brief Expression of the product of two general matrices or vectors
  *
  * \param LhsNested the type used to store the left-hand side
  * \param RhsNested the type used to store the right-hand side
  * \param ProductMode the type of the product
  *
  * This class represents an expression of the product of two general matrices.
  * We call a general matrix, a dense matrix with full storage. For instance,
  * This excludes triangular, selfadjoint, and sparse matrices.
  * It is the return type of the operator* between general matrices. Its template
  * arguments are determined automatically by ProductReturnType. Therefore,
  * GeneralProduct should never be used direclty. To determine the result type of a
  * function which involves a matrix product, use ProductReturnType::Type.
  *
  * \sa ProductReturnType, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
  */
 template<typename Lhs, typename Rhs, int ProductType = internal::product_type<Lhs,Rhs>::value>
 class GeneralProduct;
 enum {
  Large = 2,
@ -47,7 +25,8 @@ template<int Rows, int Cols, int Depth> struct product_type_selector;
 template<int Size, int MaxSize> struct product_size_category
 {
  enum { is_large = MaxSize == Dynamic ||
-                    Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD,
+                    Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
                    (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
         value = is_large  ? Large
               : Size == 1 ? 1
                           : Small
@ -59,15 +38,14 @@ template<typename Lhs, typename Rhs> struct product_type
  typedef typename remove_all<Lhs>::type _Lhs;
  typedef typename remove_all<Rhs>::type _Rhs;
  enum {
-    MaxRows  = _Lhs::MaxRowsAtCompileTime,
+    MaxRows = traits<_Lhs>::MaxRowsAtCompileTime,
-    Rows  = _Lhs::RowsAtCompileTime,
+    Rows    = traits<_Lhs>::RowsAtCompileTime,
-    MaxCols  = _Rhs::MaxColsAtCompileTime,
+    MaxCols = traits<_Rhs>::MaxColsAtCompileTime,
-    Cols  = _Rhs::ColsAtCompileTime,
+    Cols    = traits<_Rhs>::ColsAtCompileTime,
-    MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime,
+    MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::MaxColsAtCompileTime,
-                                           _Rhs::MaxRowsAtCompileTime),
+                                           traits<_Rhs>::MaxRowsAtCompileTime),
-    Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime,
+    Depth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::ColsAtCompileTime,
-                                        _Rhs::RowsAtCompileTime),
+                                        traits<_Rhs>::RowsAtCompileTime)
    LargeThreshold = EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
  };
  // the splitting into different lines of code here, introducing the _select enums and the typedef below,
@ -82,7 +60,8 @@ private:
 public:
  enum {
-    value = selector::ret
+    value = selector::ret,
    ret = selector::ret
  };
 #ifdef EIGEN_DEBUG_PRODUCT
  static void debug()
@ -98,12 +77,13 @@ public:
 #endif
 };
 /* The following allows to select the kind of product at compile time
 * based on the three dimensions of the product.
 * This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
 // FIXME I'm not sure the current mapping is the ideal one.
 template<int M, int N>  struct product_type_selector<M,N,1>              { enum { ret = OuterProduct }; };
 template<int M>         struct product_type_selector<M, 1, 1>            { enum { ret = LazyCoeffBasedProductMode }; };
 template<int N>         struct product_type_selector<1, N, 1>            { enum { ret = LazyCoeffBasedProductMode }; };
 template<int Depth>     struct product_type_selector<1,    1,    Depth>  { enum { ret = InnerProduct }; };
 template<>              struct product_type_selector<1,    1,    1>      { enum { ret = InnerProduct }; };
 template<>              struct product_type_selector<Small,1,    Small>  { enum { ret = CoeffBasedProductMode }; };
@ -122,60 +102,12 @@ template<>              struct product_type_selector<Small,Small,Large>  { enum
 template<>              struct product_type_selector<Large,Small,Large>  { enum { ret = GemmProduct }; };
 template<>              struct product_type_selector<Small,Large,Large>  { enum { ret = GemmProduct }; };
 template<>              struct product_type_selector<Large,Large,Large>  { enum { ret = GemmProduct }; };
-template<>              struct product_type_selector<Large,Small,Small>  { enum { ret = GemmProduct }; };
+template<>              struct product_type_selector<Large,Small,Small>  { enum { ret = CoeffBasedProductMode }; };
-template<>              struct product_type_selector<Small,Large,Small>  { enum { ret = GemmProduct }; };
+template<>              struct product_type_selector<Small,Large,Small>  { enum { ret = CoeffBasedProductMode }; };
 template<>              struct product_type_selector<Large,Large,Small>  { enum { ret = GemmProduct }; };
 } // end namespace internal
 /** \class ProductReturnType
  * \ingroup Core_Module
  *
  * \brief Helper class to get the correct and optimized returned type of operator*
  *
  * \param Lhs the type of the left-hand side
  * \param Rhs the type of the right-hand side
  * \param ProductMode the type of the product (determined automatically by internal::product_mode)
  *
  * This class defines the typename Type representing the optimized product expression
  * between two matrix expressions. In practice, using ProductReturnType<Lhs,Rhs>::Type
  * is the recommended way to define the result type of a function returning an expression
  * which involve a matrix product. The class Product should never be
  * used directly.
  *
  * \sa class Product, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
  */
 template<typename Lhs, typename Rhs, int ProductType>
 struct ProductReturnType
 {
  // TODO use the nested type to reduce instanciations ????
 //   typedef typename internal::nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
 //   typedef typename internal::nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
  typedef GeneralProduct<Lhs/*Nested*/, Rhs/*Nested*/, ProductType> Type;
 };
 template<typename Lhs, typename Rhs>
 struct ProductReturnType<Lhs,Rhs,CoeffBasedProductMode>
 {
  typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
  typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
  typedef CoeffBasedProduct<LhsNested, RhsNested, EvalBeforeAssigningBit | EvalBeforeNestingBit> Type;
 };
 template<typename Lhs, typename Rhs>
 struct ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
 {
  typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
  typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
  typedef CoeffBasedProduct<LhsNested, RhsNested, NestByRefBit> Type;
 };
 // this is a workaround for sun CC
 template<typename Lhs, typename Rhs>
 struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
 {};
 /***********************************************************************
 *  Implementation of Inner Vector Vector Product
 ***********************************************************************/
@ -187,119 +119,10 @@ struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedPr
 // product ends up to a row-vector times col-vector product... To tackle this use
 // case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);
 namespace internal {
 template<typename Lhs, typename Rhs>
 struct traits<GeneralProduct<Lhs,Rhs,InnerProduct> >
 : traits<Matrix<typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> >
 {};
 }
 template<typename Lhs, typename Rhs>
 class GeneralProduct<Lhs, Rhs, InnerProduct>
  : internal::no_assignment_operator,
    public Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1>
 {
    typedef Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> Base;
  public:
    GeneralProduct(const Lhs& lhs, const Rhs& rhs)
    {
      EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
      Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
    }
    /** Convertion to scalar */
    operator const typename Base::Scalar() const {
      return Base::coeff(0,0);
    }
 };
 /***********************************************************************
 *  Implementation of Outer Vector Vector Product
 ***********************************************************************/
 namespace internal {
 // Column major
 template<typename ProductType, typename Dest, typename Func>
 EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const false_type&)
 {
  typedef typename Dest::Index Index;
  // FIXME make sure lhs is sequentially stored
  // FIXME not very good if rhs is real and lhs complex while alpha is real too
  const Index cols = dest.cols();
  for (Index j=0; j<cols; ++j)
    func(dest.col(j), prod.rhs().coeff(0,j) * prod.lhs());
 }
 // Row major
 template<typename ProductType, typename Dest, typename Func>
 EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const true_type&) {
  typedef typename Dest::Index Index;
  // FIXME make sure rhs is sequentially stored
  // FIXME not very good if lhs is real and rhs complex while alpha is real too
  const Index rows = dest.rows();
  for (Index i=0; i<rows; ++i)
    func(dest.row(i), prod.lhs().coeff(i,0) * prod.rhs());
 }
 template<typename Lhs, typename Rhs>
 struct traits<GeneralProduct<Lhs,Rhs,OuterProduct> >
 : traits<ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs> >
 {};
 }
 template<typename Lhs, typename Rhs>
 class GeneralProduct<Lhs, Rhs, OuterProduct>
  : public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
 {
    template<typename T> struct IsRowMajor : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
  public:
    EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
    GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
    {
      EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
    }
    struct set  { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived()  = src; } };
    struct add  { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
    struct sub  { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
    struct adds {
      Scalar m_scale;
      adds(const Scalar& s) : m_scale(s) {}
      template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const {
        dst.const_cast_derived() += m_scale * src;
      }
    };
    template<typename Dest>
    inline void evalTo(Dest& dest) const {
      internal::outer_product_selector_run(*this, dest, set(), IsRowMajor<Dest>());
    }
    template<typename Dest>
    inline void addTo(Dest& dest) const {
      internal::outer_product_selector_run(*this, dest, add(), IsRowMajor<Dest>());
    }
    template<typename Dest>
    inline void subTo(Dest& dest) const {
      internal::outer_product_selector_run(*this, dest, sub(), IsRowMajor<Dest>());
    }
    template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
    {
      internal::outer_product_selector_run(*this, dest, adds(alpha), IsRowMajor<Dest>());
    }
 };
 /***********************************************************************
 *  Implementation of General Matrix Vector Product
 ***********************************************************************/
@ -313,60 +136,13 @@ class GeneralProduct<Lhs, Rhs, OuterProduct>
 */
 namespace internal {
 template<typename Lhs, typename Rhs>
 struct traits<GeneralProduct<Lhs,Rhs,GemvProduct> >
 : traits<ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs> >
 {};
 template<int Side, int StorageOrder, bool BlasCompatible>
-struct gemv_selector;
+struct gemv_dense_selector;
 } // end namespace internal
 template<typename Lhs, typename Rhs>
 class GeneralProduct<Lhs, Rhs, GemvProduct>
  : public ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs>
 {
  public:
    EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
    typedef typename Lhs::Scalar LhsScalar;
    typedef typename Rhs::Scalar RhsScalar;
    GeneralProduct(const Lhs& a_lhs, const Rhs& a_rhs) : Base(a_lhs,a_rhs)
    {
 //       EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::Scalar, typename Rhs::Scalar>::value),
 //         YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
    }
    enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
    typedef typename internal::conditional<int(Side)==OnTheRight,_LhsNested,_RhsNested>::type MatrixType;
    template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
    {
      eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
      internal::gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
                       bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha);
    }
 };
 namespace internal {
 // The vector is on the left => transposition
 template<int StorageOrder, bool BlasCompatible>
 struct gemv_selector<OnTheLeft,StorageOrder,BlasCompatible>
 {
  template<typename ProductType, typename Dest>
  static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
  {
    Transpose<Dest> destT(dest);
    enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
    gemv_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
      ::run(GeneralProduct<Transpose<const typename ProductType::_RhsNested>,Transpose<const typename ProductType::_LhsNested>, GemvProduct>
        (prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha);
  }
 };
 template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if;
 template<typename Scalar,int Size,int MaxSize>
@ -384,126 +160,161 @@ struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
 template<typename Scalar,int Size,int MaxSize>
 struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
 {
  #if EIGEN_ALIGN_STATICALLY
  internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
  EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
  #else
  // Some architectures cannot align on the stack,
  // => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
  enum {
    ForceAlignment  = internal::packet_traits<Scalar>::Vectorizable,
    PacketSize      = internal::packet_traits<Scalar>::size
  };
-  internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data;
+  #if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
  internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0,EIGEN_PLAIN_ENUM_MIN(AlignedMax,PacketSize)> m_data;
  EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
  #else
  // Some architectures cannot align on the stack,
  // => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
  internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?EIGEN_MAX_ALIGN_BYTES:0),0> m_data;
  EIGEN_STRONG_INLINE Scalar* data() {
    return ForceAlignment
-            ? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(15))) + 16)
+            ? reinterpret_cast<Scalar*>((internal::UIntPtr(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)
            : m_data.array;
  }
  #endif
 };
-template<> struct gemv_selector<OnTheRight,ColMajor,true>
+// The vector is on the left => transposition
 template<int StorageOrder, bool BlasCompatible>
 struct gemv_dense_selector<OnTheLeft,StorageOrder,BlasCompatible>
 {
-  template<typename ProductType, typename Dest>
+  template<typename Lhs, typename Rhs, typename Dest>
-  static inline void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
+  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
  {
-    typedef typename ProductType::Index Index;
+    Transpose<Dest> destT(dest);
-    typedef typename ProductType::LhsScalar   LhsScalar;
+    enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
-    typedef typename ProductType::RhsScalar   RhsScalar;
+    gemv_dense_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
-    typedef typename ProductType::Scalar      ResScalar;
+      ::run(rhs.transpose(), lhs.transpose(), destT, alpha);
-    typedef typename ProductType::RealScalar  RealScalar;
+  }
-    typedef typename ProductType::ActualLhsType ActualLhsType;
+};
    typedef typename ProductType::ActualRhsType ActualRhsType;
    typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
    typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
    typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
-    ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
+template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
-    ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
+{
  template<typename Lhs, typename Rhs, typename Dest>
  static inline void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
  {
    typedef typename Lhs::Scalar   LhsScalar;
    typedef typename Rhs::Scalar   RhsScalar;
    typedef typename Dest::Scalar  ResScalar;
    typedef typename Dest::RealScalar  RealScalar;
    typedef internal::blas_traits<Lhs> LhsBlasTraits;
    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
    typedef internal::blas_traits<Rhs> RhsBlasTraits;
    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
    typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
-    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
+    ActualLhsType actualLhs = LhsBlasTraits::extract(lhs);
-                                  * RhsBlasTraits::extractScalarFactor(prod.rhs());
+    ActualRhsType actualRhs = RhsBlasTraits::extract(rhs);
    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
                                  * RhsBlasTraits::extractScalarFactor(rhs);
    // make sure Dest is a compile-time vector type (bug 1166)
    typedef typename conditional<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr>::type ActualDest;
    enum {
      // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
      // on, the other hand it is good for the cache to pack the vector anyways...
-      EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1,
+      EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1),
      ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
-      MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal
+      MightCannotUseDest = (!EvalToDestAtCompileTime) || ComplexByReal
    };
-    gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
+    typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
-
+    typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;
    bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
    bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
    RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
-    ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
+    if(!MightCannotUseDest)
                                                  evalToDest ? dest.data() : static_dest.data());
    if(!evalToDest)
    {
-      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+      // shortcut if we are sure to be able to use dest directly,
-      int size = dest.size();
+      // this ease the compiler to generate cleaner and more optimzized code for most common cases
-      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+      general_matrix_vector_product
-      #endif
+          <Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
-      if(!alphaIsCompatible)
+          actualLhs.rows(), actualLhs.cols(),
-      {
+          LhsMapper(actualLhs.data(), actualLhs.outerStride()),
-        MappedDest(actualDestPtr, dest.size()).setZero();
+          RhsMapper(actualRhs.data(), actualRhs.innerStride()),
-        compatibleAlpha = RhsScalar(1);
+          dest.data(), 1,
-      }
+          compatibleAlpha);
      else
        MappedDest(actualDestPtr, dest.size()) = dest;
    }
-
+    else
    general_matrix_vector_product
      <Index,LhsScalar,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
        actualLhs.rows(), actualLhs.cols(),
        actualLhs.data(), actualLhs.outerStride(),
        actualRhs.data(), actualRhs.innerStride(),
        actualDestPtr, 1,
        compatibleAlpha);
    if (!evalToDest)
    {
-      if(!alphaIsCompatible)
+      gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
-        dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
+
-      else
+      const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
-        dest = MappedDest(actualDestPtr, dest.size());
+      const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
      ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
                                                    evalToDest ? dest.data() : static_dest.data());
      if(!evalToDest)
      {
        #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
        Index size = dest.size();
        EIGEN_DENSE_STORAGE_CTOR_PLUGIN
        #endif
        if(!alphaIsCompatible)
        {
          MappedDest(actualDestPtr, dest.size()).setZero();
          compatibleAlpha = RhsScalar(1);
        }
        else
          MappedDest(actualDestPtr, dest.size()) = dest;
      }
      general_matrix_vector_product
          <Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
          actualLhs.rows(), actualLhs.cols(),
          LhsMapper(actualLhs.data(), actualLhs.outerStride()),
          RhsMapper(actualRhs.data(), actualRhs.innerStride()),
          actualDestPtr, 1,
          compatibleAlpha);
      if (!evalToDest)
      {
        if(!alphaIsCompatible)
          dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size());
        else
          dest = MappedDest(actualDestPtr, dest.size());
      }
    }
  }
 };
-template<> struct gemv_selector<OnTheRight,RowMajor,true>
+template<> struct gemv_dense_selector<OnTheRight,RowMajor,true>
 {
-  template<typename ProductType, typename Dest>
+  template<typename Lhs, typename Rhs, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
+  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
  {
-    typedef typename ProductType::LhsScalar LhsScalar;
+    typedef typename Lhs::Scalar   LhsScalar;
-    typedef typename ProductType::RhsScalar RhsScalar;
+    typedef typename Rhs::Scalar   RhsScalar;
-    typedef typename ProductType::Scalar    ResScalar;
+    typedef typename Dest::Scalar  ResScalar;
-    typedef typename ProductType::Index Index;
+    
-    typedef typename ProductType::ActualLhsType ActualLhsType;
+    typedef internal::blas_traits<Lhs> LhsBlasTraits;
-    typedef typename ProductType::ActualRhsType ActualRhsType;
+    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
-    typedef typename ProductType::_ActualRhsType _ActualRhsType;
+    typedef internal::blas_traits<Rhs> RhsBlasTraits;
-    typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
+    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
-    typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
+    typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
-    typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
+    typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
-    typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
+    typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
-    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
+    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
-                                  * RhsBlasTraits::extractScalarFactor(prod.rhs());
+                                  * RhsBlasTraits::extractScalarFactor(rhs);
    enum {
      // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
      // on, the other hand it is good for the cache to pack the vector anyways...
-      DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1
+      DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1
    };
-    gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
+    gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
    ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
        DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
@ -511,45 +322,48 @@ template<> struct gemv_selector<OnTheRight,RowMajor,true>
    if(!DirectlyUseRhs)
    {
      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
-      int size = actualRhs.size();
+      Index size = actualRhs.size();
      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
      #endif
-      Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
+      Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
    }
    typedef const_blas_data_mapper<LhsScalar,Index,RowMajor> LhsMapper;
    typedef const_blas_data_mapper<RhsScalar,Index,ColMajor> RhsMapper;
    general_matrix_vector_product
-      <Index,LhsScalar,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
+        <Index,LhsScalar,LhsMapper,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
        actualLhs.rows(), actualLhs.cols(),
-        actualLhs.data(), actualLhs.outerStride(),
+        LhsMapper(actualLhs.data(), actualLhs.outerStride()),
-        actualRhsPtr, 1,
+        RhsMapper(actualRhsPtr, 1),
-        dest.data(), dest.innerStride(),
+        dest.data(), dest.col(0).innerStride(), //NOTE  if dest is not a vector at compile-time, then dest.innerStride() might be wrong. (bug 1166)
        actualAlpha);
  }
 };
-template<> struct gemv_selector<OnTheRight,ColMajor,false>
+template<> struct gemv_dense_selector<OnTheRight,ColMajor,false>
 {
-  template<typename ProductType, typename Dest>
+  template<typename Lhs, typename Rhs, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
+  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
  {
-    typedef typename Dest::Index Index;
+    EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
-    // TODO makes sure dest is sequentially stored in memory, otherwise use a temp
+    // TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, otherwise use a temp
-    const Index size = prod.rhs().rows();
+    typename nested_eval<Rhs,1>::type actual_rhs(rhs);
    const Index size = rhs.rows();
    for(Index k=0; k<size; ++k)
-      dest += (alpha*prod.rhs().coeff(k)) * prod.lhs().col(k);
+      dest += (alpha*actual_rhs.coeff(k)) * lhs.col(k);
  }
 };
-template<> struct gemv_selector<OnTheRight,RowMajor,false>
+template<> struct gemv_dense_selector<OnTheRight,RowMajor,false>
 {
-  template<typename ProductType, typename Dest>
+  template<typename Lhs, typename Rhs, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
+  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
  {
-    typedef typename Dest::Index Index;
+    EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
-    // TODO makes sure rhs is sequentially stored in memory, otherwise use a temp
+    typename nested_eval<Rhs,Lhs::RowsAtCompileTime>::type actual_rhs(rhs);
-    const Index rows = prod.rows();
+    const Index rows = dest.rows();
    for(Index i=0; i<rows; ++i)
-      dest.coeffRef(i) += alpha * (prod.lhs().row(i).cwiseProduct(prod.rhs().transpose())).sum();
+      dest.coeffRef(i) += alpha * (lhs.row(i).cwiseProduct(actual_rhs.transpose())).sum();
  }
 };
@ -565,9 +379,11 @@ template<> struct gemv_selector<OnTheRight,RowMajor,false>
  *
  * \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
  */
 #ifndef __CUDACC__
 template<typename Derived>
 template<typename OtherDerived>
-inline const typename ProductReturnType<Derived, OtherDerived>::Type
+inline const Product<Derived, OtherDerived>
 MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
 {
  // A note regarding the function declaration: In MSVC, this function will sometimes
@ -592,9 +408,12 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
 #ifdef EIGEN_DEBUG_PRODUCT
  internal::product_type<Derived,OtherDerived>::debug();
 #endif
-  return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
+
  return Product<Derived, OtherDerived>(derived(), other.derived());
 }
 #endif // __CUDACC__
 /** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
  *
  * The returned product will behave like any other expressions: the coefficients of the product will be
@ -608,7 +427,7 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
  */
 template<typename Derived>
 template<typename OtherDerived>
-const typename LazyProductReturnType<Derived,OtherDerived>::Type
+const Product<Derived,OtherDerived,LazyProduct>
 MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
 {
  enum {
@ -627,7 +446,7 @@ MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
    INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
  EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
-  return typename LazyProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
+  return Product<Derived,OtherDerived,LazyProduct>(derived(), other.derived());
 }
 } // end namespace Eigen
--- a/eigenlib/Eigen/src/Core/GenericPacketMath.h
+++ b/eigenlib/Eigen/src/Core/GenericPacketMath.h
@ -42,21 +42,28 @@ namespace internal {
 struct default_packet_traits
 {
  enum {
    HasHalfPacket = 0,
    HasAdd    = 1,
    HasSub    = 1,
    HasMul    = 1,
    HasNegate = 1,
    HasAbs    = 1,
    HasArg    = 0,
    HasAbs2   = 1,
    HasMin    = 1,
    HasMax    = 1,
    HasConj   = 1,
    HasSetLinear = 1,
    HasBlend  = 0,
    HasDiv    = 0,
    HasSqrt   = 0,
    HasRsqrt  = 0,
    HasExp    = 0,
    HasLog    = 0,
    HasLog1p  = 0,
    HasLog10  = 0,
    HasPow    = 0,
    HasSin    = 0,
@ -64,17 +71,37 @@ struct default_packet_traits
    HasTan    = 0,
    HasASin   = 0,
    HasACos   = 0,
-    HasATan   = 0
+    HasATan   = 0,
    HasSinh   = 0,
    HasCosh   = 0,
    HasTanh   = 0,
    HasLGamma = 0,
    HasDiGamma = 0,
    HasZeta = 0,
    HasPolygamma = 0,
    HasErf = 0,
    HasErfc = 0,
    HasIGamma = 0,
    HasIGammac = 0,
    HasBetaInc = 0,
    HasRound  = 0,
    HasFloor  = 0,
    HasCeil   = 0,
    HasSign   = 0
  };
 };
 template<typename T> struct packet_traits : default_packet_traits
 {
  typedef T type;
  typedef T half;
  enum {
    Vectorizable = 0,
    size = 1,
-    AlignedOnScalar = 0
+    AlignedOnScalar = 0,
    HasHalfPacket = 0
  };
  enum {
    HasAdd    = 0,
@ -90,135 +117,239 @@ template<typename T> struct packet_traits : default_packet_traits
  };
 };
 template<typename T> struct packet_traits<const T> : packet_traits<T> { };
 template <typename Src, typename Tgt> struct type_casting_traits {
  enum {
    VectorizedCast = 0,
    SrcCoeffRatio = 1,
    TgtCoeffRatio = 1
  };
 };
 /** \internal \returns static_cast<TgtType>(a) (coeff-wise) */
 template <typename SrcPacket, typename TgtPacket>
 EIGEN_DEVICE_FUNC inline TgtPacket
 pcast(const SrcPacket& a) {
  return static_cast<TgtPacket>(a);
 }
 template <typename SrcPacket, typename TgtPacket>
 EIGEN_DEVICE_FUNC inline TgtPacket
 pcast(const SrcPacket& a, const SrcPacket& /*b*/) {
  return static_cast<TgtPacket>(a);
 }
 template <typename SrcPacket, typename TgtPacket>
 EIGEN_DEVICE_FUNC inline TgtPacket
 pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) {
  return static_cast<TgtPacket>(a);
 }
 /** \internal \returns a + b (coeff-wise) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 padd(const Packet& a,
        const Packet& b) { return a+b; }
 /** \internal \returns a - b (coeff-wise) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 psub(const Packet& a,
        const Packet& b) { return a-b; }
 /** \internal \returns -a (coeff-wise) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pnegate(const Packet& a) { return -a; }
 /** \internal \returns conj(a) (coeff-wise) */
-template<typename Packet> inline Packet
+
 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pconj(const Packet& a) { return numext::conj(a); }
 /** \internal \returns a * b (coeff-wise) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pmul(const Packet& a,
        const Packet& b) { return a*b; }
 /** \internal \returns a / b (coeff-wise) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pdiv(const Packet& a,
        const Packet& b) { return a/b; }
 /** \internal \returns the min of \a a and \a b  (coeff-wise) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pmin(const Packet& a,
-        const Packet& b) { using std::min; return (min)(a, b); }
+        const Packet& b) { return numext::mini(a, b); }
 /** \internal \returns the max of \a a and \a b  (coeff-wise) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pmax(const Packet& a,
-        const Packet& b) { using std::max; return (max)(a, b); }
+        const Packet& b) { return numext::maxi(a, b); }
 /** \internal \returns the absolute value of \a a */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pabs(const Packet& a) { using std::abs; return abs(a); }
 /** \internal \returns the phase angle of \a a */
 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 parg(const Packet& a) { using numext::arg; return arg(a); }
 /** \internal \returns the bitwise and of \a a and \a b */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pand(const Packet& a, const Packet& b) { return a & b; }
 /** \internal \returns the bitwise or of \a a and \a b */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 por(const Packet& a, const Packet& b) { return a | b; }
 /** \internal \returns the bitwise xor of \a a and \a b */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pxor(const Packet& a, const Packet& b) { return a ^ b; }
 /** \internal \returns the bitwise andnot of \a a and \a b */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pandnot(const Packet& a, const Packet& b) { return a & (!b); }
 /** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
 /** \internal \returns a packet version of \a *from, (un-aligned load) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
 /** \internal \returns a packet with elements of \a *from duplicated.
  * For instance, for a packet of 8 elements, 4 scalar will be read from \a *from and
  * duplicated to form: {from[0],from[0],from[1],from[1],,from[2],from[2],,from[3],from[3]}
  * Currently, this function is only used for scalar * complex products.
 */
 template<typename Packet> inline Packet
 ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
 /** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
 /** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */
 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pload1(const typename unpacket_traits<Packet>::type  *a) { return pset1<Packet>(*a); }
 /** \internal \returns a packet with elements of \a *from duplicated.
  * For instance, for a packet of 8 elements, 4 scalars will be read from \a *from and
  * duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]}
  * Currently, this function is only used for scalar * complex products.
  */
 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
 /** \internal \returns a packet with elements of \a *from quadrupled.
  * For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and
  * replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]}
  * Currently, this function is only used in matrix products.
  * For packet-size smaller or equal to 4, this function is equivalent to pload1 
  */
 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 ploadquad(const typename unpacket_traits<Packet>::type* from)
 { return pload1<Packet>(from); }
 /** \internal equivalent to
  * \code
  * a0 = pload1(a+0);
  * a1 = pload1(a+1);
  * a2 = pload1(a+2);
  * a3 = pload1(a+3);
  * \endcode
  * \sa pset1, pload1, ploaddup, pbroadcast2
  */
 template<typename Packet> EIGEN_DEVICE_FUNC
 inline void pbroadcast4(const typename unpacket_traits<Packet>::type *a,
                        Packet& a0, Packet& a1, Packet& a2, Packet& a3)
 {
  a0 = pload1<Packet>(a+0);
  a1 = pload1<Packet>(a+1);
  a2 = pload1<Packet>(a+2);
  a3 = pload1<Packet>(a+3);
 }
 /** \internal equivalent to
  * \code
  * a0 = pload1(a+0);
  * a1 = pload1(a+1);
  * \endcode
  * \sa pset1, pload1, ploaddup, pbroadcast4
  */
 template<typename Packet> EIGEN_DEVICE_FUNC
 inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
                        Packet& a0, Packet& a1)
 {
  a0 = pload1<Packet>(a+0);
  a1 = pload1<Packet>(a+1);
 }
 /** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
-template<typename Scalar> inline typename packet_traits<Scalar>::type
+template<typename Packet> inline Packet
-plset(const Scalar& a) { return a; }
+plset(const typename unpacket_traits<Packet>::type& a) { return a; }
 /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
-template<typename Scalar, typename Packet> inline void pstore(Scalar* to, const Packet& from)
+template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
 { (*to) = from; }
 /** \internal copy the packet \a from to \a *to, (un-aligned store) */
-template<typename Scalar, typename Packet> inline void pstoreu(Scalar* to, const Packet& from)
+template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
-{ (*to) = from; }
+{  (*to) = from; }
 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/)
 { return ploadu<Packet>(from); }
 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/)
 { pstore(to, from); }
 /** \internal tries to do cache prefetching of \a addr */
-template<typename Scalar> inline void prefetch(const Scalar* addr)
+template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
 {
-#if !defined(_MSC_VER)
+#ifdef __CUDA_ARCH__
-__builtin_prefetch(addr);
+#if defined(__LP64__)
  // 64-bit pointer operand constraint for inlined asm
  asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
 #else
  // 32-bit pointer operand constraint for inlined asm
  asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr));
 #endif
 #elif (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC)
  __builtin_prefetch(addr);
 #endif
 }
 /** \internal \returns the first element of a packet */
-template<typename Packet> inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
 { return a; }
 /** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 preduxp(const Packet* vecs) { return vecs[0]; }
 /** \internal \returns the sum of the elements of \a a*/
-template<typename Packet> inline typename unpacket_traits<Packet>::type predux(const Packet& a)
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a)
 { return a; }
 /** \internal \returns the sum of the elements of \a a by block of 4 elements.
  * For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}
  * For packet-size smaller or equal to 4, this boils down to a noop.
  */
 template<typename Packet> EIGEN_DEVICE_FUNC inline
 typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
 predux_downto4(const Packet& a)
 { return a; }
 /** \internal \returns the product of the elements of \a a*/
-template<typename Packet> inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
 { return a; }
 /** \internal \returns the min of the elements of \a a*/
-template<typename Packet> inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
 { return a; }
 /** \internal \returns the max of the elements of \a a*/
-template<typename Packet> inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
 { return a; }
 /** \internal \returns the reversed elements of \a a*/
-template<typename Packet> inline Packet preverse(const Packet& a)
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
 { return a; }
 /** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
-template<typename Packet> inline Packet pcplxflip(const Packet& a)
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
 {
  // FIXME: uncomment the following in case we drop the internal imag and real functions.
 //   using std::imag;
@ -250,6 +381,22 @@ Packet pasin(const Packet& a) { using std::asin; return asin(a); }
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet pacos(const Packet& a) { using std::acos; return acos(a); }
 /** \internal \returns the arc tangent of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet patan(const Packet& a) { using std::atan; return atan(a); }
 /** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet psinh(const Packet& a) { using std::sinh; return sinh(a); }
 /** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet pcosh(const Packet& a) { using std::cosh; return cosh(a); }
 /** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); }
 /** \internal \returns the exp of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet pexp(const Packet& a) { using std::exp; return exp(a); }
@ -258,10 +405,36 @@ Packet pexp(const Packet& a) { using std::exp; return exp(a); }
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet plog(const Packet& a) { using std::log; return log(a); }
 /** \internal \returns the log1p of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet plog1p(const Packet& a) { return numext::log1p(a); }
 /** \internal \returns the log10 of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet plog10(const Packet& a) { using std::log10; return log10(a); }
 /** \internal \returns the square-root of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); }
 /** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet prsqrt(const Packet& a) {
  return pdiv(pset1<Packet>(1), psqrt(a));
 }
 /** \internal \returns the rounded value of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet pround(const Packet& a) { using numext::round; return round(a); }
 /** \internal \returns the floor of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
 /** \internal \returns the ceil of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
 /***************************************************************************
 * The following functions might not have to be overwritten for vectorized types
 ***************************************************************************/
@ -275,34 +448,45 @@ inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename u
 }
 /** \internal \returns a * b + c (coeff-wise) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pmadd(const Packet&  a,
         const Packet&  b,
         const Packet&  c)
 { return padd(pmul(a, b),c); }
 /** \internal \returns a packet version of \a *from.
-  * If LoadMode equals #Aligned, \a from must be 16 bytes aligned */
+  * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
-template<typename Packet, int LoadMode>
+template<typename Packet, int Alignment>
-inline Packet ploadt(const typename unpacket_traits<Packet>::type* from)
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from)
 {
-  if(LoadMode == Aligned)
+  if(Alignment >= unpacket_traits<Packet>::alignment)
    return pload<Packet>(from);
  else
    return ploadu<Packet>(from);
 }
 /** \internal copy the packet \a from to \a *to.
-  * If StoreMode equals #Aligned, \a to must be 16 bytes aligned */
+  * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
-template<typename Scalar, typename Packet, int LoadMode>
+template<typename Scalar, typename Packet, int Alignment>
-inline void pstoret(Scalar* to, const Packet& from)
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from)
 {
-  if(LoadMode == Aligned)
+  if(Alignment >= unpacket_traits<Packet>::alignment)
    pstore(to, from);
  else
    pstoreu(to, from);
 }
 /** \internal \returns a packet version of \a *from.
  * Unlike ploadt, ploadt_ro takes advantage of the read-only memory path on the
  * hardware if available to speedup the loading of data that won't be modified
  * by the current computation.
  */
 template<typename Packet, int LoadMode>
 inline Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
 {
  return ploadt<Packet, LoadMode>(from);
 }
 /** \internal default implementation of palign() allowing partial specialization */
 template<int Offset,typename PacketType>
 struct palign_impl
@ -336,15 +520,74 @@ inline void palign(PacketType& first, const PacketType& second)
 * Fast complex products (GCC generates a function call which is very slow)
 ***************************************************************************/
 // Eigen+CUDA does not support complexes.
 #ifndef __CUDACC__
 template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
 { return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
 template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
 { return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
 #endif
 /***************************************************************************
 * PacketBlock, that is a collection of N packets where the number of words
 * in the packet is a multiple of N.
 ***************************************************************************/
 template <typename Packet,int N=unpacket_traits<Packet>::size> struct PacketBlock {
  Packet packet[N];
 };
 template<typename Packet> EIGEN_DEVICE_FUNC inline void
 ptranspose(PacketBlock<Packet,1>& /*kernel*/) {
  // Nothing to do in the scalar case, i.e. a 1x1 matrix.
 }
 /***************************************************************************
 * Selector, i.e. vector of N boolean values used to select (i.e. blend)
 * words from 2 packets.
 ***************************************************************************/
 template <size_t N> struct Selector {
  bool select[N];
 };
 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) {
  return ifPacket.select[0] ? thenPacket : elsePacket;
 }
 /** \internal \returns \a a with the first coefficient replaced by the scalar b */
 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pinsertfirst(const Packet& a, typename unpacket_traits<Packet>::type b)
 {
  // Default implementation based on pblend.
  // It must be specialized for higher performance.
  Selector<unpacket_traits<Packet>::size> mask;
  mask.select[0] = true;
  // This for loop should be optimized away by the compiler.
  for(Index i=1; i<unpacket_traits<Packet>::size; ++i)
    mask.select[i] = false;
  return pblend(mask, pset1<Packet>(b), a);
 }
 /** \internal \returns \a a with the last coefficient replaced by the scalar b */
 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pinsertlast(const Packet& a, typename unpacket_traits<Packet>::type b)
 {
  // Default implementation based on pblend.
  // It must be specialized for higher performance.
  Selector<unpacket_traits<Packet>::size> mask;
  // This for loop should be optimized away by the compiler.
  for(Index i=0; i<unpacket_traits<Packet>::size-1; ++i)
    mask.select[i] = false;
  mask.select[unpacket_traits<Packet>::size-1] = true;
  return pblend(mask, pset1<Packet>(b), a);
 }
 } // end namespace internal
 } // end namespace Eigen
 #endif // EIGEN_GENERIC_PACKET_MATH_H
--- a/eigenlib/Eigen/src/Core/GlobalFunctions.h
+++ b/eigenlib/Eigen/src/Core/GlobalFunctions.h
@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2010-2012 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2010-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
 // Copyright (C) 2010 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
@ -11,13 +11,30 @@
 #ifndef EIGEN_GLOBAL_FUNCTIONS_H
 #define EIGEN_GLOBAL_FUNCTIONS_H
-#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR) \
+#ifdef EIGEN_PARSED_BY_DOXYGEN
 #define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR,DOC_OP,DOC_DETAILS) \
  /** \returns an expression of the coefficient-wise DOC_OP of \a x
    DOC_DETAILS
    \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_##NAME">Math functions</a>, class CwiseUnaryOp
    */ \
  template<typename Derived> \
  inline const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> \
-  NAME(const Eigen::ArrayBase<Derived>& x) { \
+  NAME(const Eigen::ArrayBase<Derived>& x);
-    return x.derived(); \
+
 #else
 #define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR,DOC_OP,DOC_DETAILS) \
  template<typename Derived> \
  inline const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> \
  (NAME)(const Eigen::ArrayBase<Derived>& x) { \
    return Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived>(x.derived()); \
  }
 #endif // EIGEN_PARSED_BY_DOXYGEN
 #define EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(NAME,FUNCTOR) \
  \
  template<typename Derived> \
@ -30,55 +47,133 @@
  { \
    static inline typename NAME##_retval<ArrayBase<Derived> >::type run(const Eigen::ArrayBase<Derived>& x) \
    { \
-      return x.derived(); \
+      return typename NAME##_retval<ArrayBase<Derived> >::type(x.derived()); \
    } \
  };
 namespace Eigen
 {
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(real,scalar_real_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(real,scalar_real_op,real part,\sa ArrayBase::real)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(imag,scalar_imag_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(imag,scalar_imag_op,imaginary part,\sa ArrayBase::imag)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(conj,scalar_conjugate_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(conj,scalar_conjugate_op,complex conjugate,\sa ArrayBase::conjugate)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sin,scalar_sin_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(inverse,scalar_inverse_op,inverse,\sa ArrayBase::inverse)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cos,scalar_cos_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sin,scalar_sin_op,sine,\sa ArrayBase::sin)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cos,scalar_cos_op,cosine,\sa ArrayBase::cos)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op,tangent,\sa ArrayBase::tan)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atan,scalar_atan_op,arc-tangent,\sa ArrayBase::atan)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op,arc-sine,\sa ArrayBase::asin)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op,arc-consine,\sa ArrayBase::acos)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op,hyperbolic sine,\sa ArrayBase::sinh)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sqrt,scalar_sqrt_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op,hyperbolic cosine,\sa ArrayBase::cosh)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op,hyperbolic tangent,\sa ArrayBase::tanh)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op,natural logarithm of the gamma function,\sa ArrayBase::lgamma)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op,derivative of lgamma,\sa ArrayBase::digamma)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\sa ArrayBase::erf)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op,complement error function,\sa ArrayBase::erfc)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op,exponential,\sa ArrayBase::exp)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op,natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log1p,scalar_log1p_op,natural logarithm of 1 plus the value,\sa ArrayBase::log1p)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op,base 10 logarithm,\sa Eigen::log DOXCOMMA ArrayBase::log)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op,absolute value,\sa ArrayBase::abs DOXCOMMA MatrixBase::cwiseAbs)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs2,scalar_abs2_op,squared absolute value,\sa ArrayBase::abs2 DOXCOMMA MatrixBase::cwiseAbs2)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(arg,scalar_arg_op,complex argument,\sa ArrayBase::arg)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sqrt,scalar_sqrt_op,square root,\sa ArrayBase::sqrt DOXCOMMA MatrixBase::cwiseSqrt)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(rsqrt,scalar_rsqrt_op,reciprocal square root,\sa ArrayBase::rsqrt)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(square,scalar_square_op,square (power 2),\sa Eigen::abs2 DOXCOMMA Eigen::pow DOXCOMMA ArrayBase::square)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cube,scalar_cube_op,cube (power 3),\sa Eigen::pow DOXCOMMA ArrayBase::cube)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(round,scalar_round_op,nearest integer,\sa Eigen::floor DOXCOMMA Eigen::ceil DOXCOMMA ArrayBase::round)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(floor,scalar_floor_op,nearest integer not greater than the giben value,\sa Eigen::ceil DOXCOMMA ArrayBase::floor)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ceil,scalar_ceil_op,nearest integer not less than the giben value,\sa Eigen::floor DOXCOMMA ArrayBase::ceil)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isnan,scalar_isnan_op,not-a-number test,\sa Eigen::isinf DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isnan)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isinf,scalar_isinf_op,infinite value test,\sa Eigen::isnan DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isinf)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isfinite,scalar_isfinite_op,finite value test,\sa Eigen::isinf DOXCOMMA Eigen::isnan DOXCOMMA ArrayBase::isfinite)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign,scalar_sign_op,sign (or 0),\sa ArrayBase::sign)
-  template<typename Derived>
+  /** \returns an expression of the coefficient-wise power of \a x to the given constant \a exponent.
-  inline const Eigen::CwiseUnaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar>, const Derived>
+    *
-  pow(const Eigen::ArrayBase<Derived>& x, const typename Derived::Scalar& exponent) {
+    * \tparam ScalarExponent is the scalar type of \a exponent. It must be compatible with the scalar type of the given expression (\c Derived::Scalar).
    *
    * \sa ArrayBase::pow()
    *
    * \relates ArrayBase
    */
 #ifdef EIGEN_PARSED_BY_DOXYGEN
  template<typename Derived,typename ScalarExponent>
  inline const CwiseBinaryOp<internal::scalar_pow_op<Derived::Scalar,ScalarExponent>,Derived,Constant<ScalarExponent> >
  pow(const Eigen::ArrayBase<Derived>& x, const ScalarExponent& exponent);
 #else
  template<typename Derived,typename ScalarExponent>
  inline typename internal::enable_if<   !(internal::is_same<typename Derived::Scalar,ScalarExponent>::value) && EIGEN_SCALAR_BINARY_SUPPORTED(pow,typename Derived::Scalar,ScalarExponent),
          const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,ScalarExponent,pow) >::type
  pow(const Eigen::ArrayBase<Derived>& x, const ScalarExponent& exponent) {
    return x.derived().pow(exponent);
  }
  template<typename Derived>
-  inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename Derived::Scalar>, const Derived, const Derived>
+  inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename Derived::Scalar,pow)
-  pow(const Eigen::ArrayBase<Derived>& x, const Eigen::ArrayBase<Derived>& exponents) 
+  pow(const Eigen::ArrayBase<Derived>& x, const typename Derived::Scalar& exponent) {
    return x.derived().pow(exponent);
  }
 #endif
  /** \returns an expression of the coefficient-wise power of \a x to the given array of \a exponents.
    *
    * This function computes the coefficient-wise power.
    *
    * Example: \include Cwise_array_power_array.cpp
    * Output: \verbinclude Cwise_array_power_array.out
    * 
    * \sa ArrayBase::pow()
    *
    * \relates ArrayBase
    */
  template<typename Derived,typename ExponentDerived>
  inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>
  pow(const Eigen::ArrayBase<Derived>& x, const Eigen::ArrayBase<ExponentDerived>& exponents) 
  {
-    return Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename Derived::Scalar>, const Derived, const Derived>(
+    return Eigen::CwiseBinaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>(
      x.derived(),
      exponents.derived()
    );
  }
-  /**
+  /** \returns an expression of the coefficient-wise power of the scalar \a x to the given array of \a exponents.
-  * \brief Component-wise division of a scalar by array elements.
+    *
-  **/
+    * This function computes the coefficient-wise power between a scalar and an array of exponents.
-  template <typename Derived>
+    *
-  inline const Eigen::CwiseUnaryOp<Eigen::internal::scalar_inverse_mult_op<typename Derived::Scalar>, const Derived>
+    * \tparam Scalar is the scalar type of \a x. It must be compatible with the scalar type of the given array expression (\c Derived::Scalar).
-    operator/(const typename Derived::Scalar& s, const Eigen::ArrayBase<Derived>& a)
+    *
    * Example: \include Cwise_scalar_power_array.cpp
    * Output: \verbinclude Cwise_scalar_power_array.out
    * 
    * \sa ArrayBase::pow()
    *
    * \relates ArrayBase
    */
 #ifdef EIGEN_PARSED_BY_DOXYGEN
  template<typename Scalar,typename Derived>
  inline const CwiseBinaryOp<internal::scalar_pow_op<Scalar,Derived::Scalar>,Constant<Scalar>,Derived>
  pow(const Scalar& x,const Eigen::ArrayBase<Derived>& x);
 #else
  template<typename Scalar, typename Derived>
  inline typename internal::enable_if<   !(internal::is_same<typename Derived::Scalar,Scalar>::value) && EIGEN_SCALAR_BINARY_SUPPORTED(pow,Scalar,typename Derived::Scalar),
          const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow) >::type
  pow(const Scalar& x, const Eigen::ArrayBase<Derived>& exponents)
  {
-    return Eigen::CwiseUnaryOp<Eigen::internal::scalar_inverse_mult_op<typename Derived::Scalar>, const Derived>(
+    return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow)(
-      a.derived(),
+            typename internal::plain_constant_type<Derived,Scalar>::type(exponents.rows(), exponents.cols(), x), exponents.derived() );
      Eigen::internal::scalar_inverse_mult_op<typename Derived::Scalar>(s)  
    );
  }
  template<typename Derived>
  inline const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,pow)
  pow(const typename Derived::Scalar& x, const Eigen::ArrayBase<Derived>& exponents)
  {
    return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,pow)(
      typename internal::plain_constant_type<Derived,typename Derived::Scalar>::type(exponents.rows(), exponents.cols(), x), exponents.derived() );
  }
 #endif
  namespace internal
  {
    EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(real,scalar_real_op)
--- a/eigenlib/Eigen/src/Core/IO.h
+++ b/eigenlib/Eigen/src/Core/IO.h
@ -49,7 +49,7 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat&
  */
 struct IOFormat
 {
-  /** Default contructor, see class IOFormat for the meaning of the parameters */
+  /** Default constructor, see class IOFormat for the meaning of the parameters */
  IOFormat(int _precision = StreamPrecision, int _flags = 0,
    const std::string& _coeffSeparator = " ",
    const std::string& _rowSeparator = "\n", const std::string& _rowPrefix="", const std::string& _rowSuffix="",
@ -57,6 +57,10 @@ struct IOFormat
  : matPrefix(_matPrefix), matSuffix(_matSuffix), rowPrefix(_rowPrefix), rowSuffix(_rowSuffix), rowSeparator(_rowSeparator),
    rowSpacer(""), coeffSeparator(_coeffSeparator), precision(_precision), flags(_flags)
  {
    // TODO check if rowPrefix, rowSuffix or rowSeparator contains a newline
    // don't add rowSpacer if columns are not to be aligned
    if((flags & DontAlignCols))
      return;
    int i = int(matSuffix.length())-1;
    while (i>=0 && matSuffix[i]!='\n')
    {
@ -76,7 +80,7 @@ struct IOFormat
  *
  * \brief Pseudo expression providing matrix output with given format
  *
-  * \param ExpressionType the type of the object on which IO stream operations are performed
+  * \tparam ExpressionType the type of the object on which IO stream operations are performed
  *
  * This class represents an expression with stream operators controlled by a given IOFormat.
  * It is the return type of DenseBase::format()
@ -101,7 +105,7 @@ class WithFormat
    }
  protected:
-    const typename ExpressionType::Nested m_matrix;
+    typename ExpressionType::Nested m_matrix;
    IOFormat m_format;
 };
@ -121,31 +125,17 @@ DenseBase<Derived>::format(const IOFormat& fmt) const
 namespace internal {
-template<typename Scalar, bool IsInteger>
+// NOTE: This helper is kept for backward compatibility with previous code specializing
-struct significant_decimals_default_impl
+//       this internal::significant_decimals_impl structure. In the future we should directly
-{
+//       call digits10() which has been introduced in July 2016 in 3.3.
  typedef typename NumTraits<Scalar>::Real RealScalar;
  static inline int run()
  {
    using std::ceil;
    using std::log;
    return cast<RealScalar,int>(ceil(-log(NumTraits<RealScalar>::epsilon())/log(RealScalar(10))));
  }
 };
 template<typename Scalar>
 struct significant_decimals_default_impl<Scalar, true>
 {
  static inline int run()
  {
    return 0;
  }
 };
 template<typename Scalar>
 struct significant_decimals_impl
-  : significant_decimals_default_impl<Scalar, NumTraits<Scalar>::IsInteger>
+{
-{};
+  static inline int run()
  {
    return NumTraits<Scalar>::digits10();
  }
 };
 /** \internal
  * print the matrix \a _m to the output stream \a s using the output format \a fmt */
@ -160,7 +150,6 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat&
  typename Derived::Nested m = _m;
  typedef typename Derived::Scalar Scalar;
  typedef typename Derived::Index Index;
  Index width = 0;
--- a/eigenlib/Eigen/src/Core/Inverse.h
+++ b/eigenlib/Eigen/src/Core/Inverse.h
@ -0,0 +1,118 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_INVERSE_H
 #define EIGEN_INVERSE_H
 namespace Eigen { 
 template<typename XprType,typename StorageKind> class InverseImpl;
 namespace internal {
 template<typename XprType>
 struct traits<Inverse<XprType> >
  : traits<typename XprType::PlainObject>
 {
  typedef typename XprType::PlainObject PlainObject;
  typedef traits<PlainObject> BaseTraits;
  enum {
    Flags = BaseTraits::Flags & RowMajorBit
  };
 };
 } // end namespace internal
 /** \class Inverse
  *
  * \brief Expression of the inverse of another expression
  *
  * \tparam XprType the type of the expression we are taking the inverse
  *
  * This class represents an abstract expression of A.inverse()
  * and most of the time this is the only way it is used.
  *
  */
 template<typename XprType>
 class Inverse : public InverseImpl<XprType,typename internal::traits<XprType>::StorageKind>
 {
 public:
  typedef typename XprType::StorageIndex StorageIndex;
  typedef typename XprType::PlainObject                       PlainObject;
  typedef typename XprType::Scalar                            Scalar;
  typedef typename internal::ref_selector<XprType>::type      XprTypeNested;
  typedef typename internal::remove_all<XprTypeNested>::type  XprTypeNestedCleaned;
  typedef typename internal::ref_selector<Inverse>::type Nested;
  typedef typename internal::remove_all<XprType>::type NestedExpression;
  explicit EIGEN_DEVICE_FUNC Inverse(const XprType &xpr)
    : m_xpr(xpr)
  {}
  EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
  EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
  EIGEN_DEVICE_FUNC const XprTypeNestedCleaned& nestedExpression() const { return m_xpr; }
 protected:
  XprTypeNested m_xpr;
 };
 // Generic API dispatcher
 template<typename XprType, typename StorageKind>
 class InverseImpl
  : public internal::generic_xpr_base<Inverse<XprType> >::type
 {
 public:
  typedef typename internal::generic_xpr_base<Inverse<XprType> >::type Base;
  typedef typename XprType::Scalar Scalar;
 private:
  Scalar coeff(Index row, Index col) const;
  Scalar coeff(Index i) const;
 };
 namespace internal {
 /** \internal
  * \brief Default evaluator for Inverse expression.
  * 
  * This default evaluator for Inverse expression simply evaluate the inverse into a temporary
  * by a call to internal::call_assignment_no_alias.
  * Therefore, inverse implementers only have to specialize Assignment<Dst,Inverse<...>, ...> for
  * there own nested expression.
  *
  * \sa class Inverse
  */
 template<typename ArgType>
 struct unary_evaluator<Inverse<ArgType> >
  : public evaluator<typename Inverse<ArgType>::PlainObject>
 {
  typedef Inverse<ArgType> InverseType;
  typedef typename InverseType::PlainObject PlainObject;
  typedef evaluator<PlainObject> Base;
  enum { Flags = Base::Flags | EvalBeforeNestingBit };
  unary_evaluator(const InverseType& inv_xpr)
    : m_result(inv_xpr.rows(), inv_xpr.cols())
  {
    ::new (static_cast<Base*>(this)) Base(m_result);
    internal::call_assignment_no_alias(m_result, inv_xpr);
  }
 protected:
  PlainObject m_result;
 };
 } // end namespace internal
 } // end namespace Eigen
 #endif // EIGEN_INVERSE_H
--- a/eigenlib/Eigen/src/Core/Map.h
+++ b/eigenlib/Eigen/src/Core/Map.h
@ -13,13 +13,35 @@
 namespace Eigen { 
 namespace internal {
 template<typename PlainObjectType, int MapOptions, typename StrideType>
 struct traits<Map<PlainObjectType, MapOptions, StrideType> >
  : public traits<PlainObjectType>
 {
  typedef traits<PlainObjectType> TraitsBase;
  enum {
    InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
                             ? int(PlainObjectType::InnerStrideAtCompileTime)
                             : int(StrideType::InnerStrideAtCompileTime),
    OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
                             ? int(PlainObjectType::OuterStrideAtCompileTime)
                             : int(StrideType::OuterStrideAtCompileTime),
    Alignment = int(MapOptions)&int(AlignedMask),
    Flags0 = TraitsBase::Flags & (~NestByRefBit),
    Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
  };
 private:
  enum { Options }; // Expressions don't have Options
 };
 }
 /** \class Map
  * \ingroup Core_Module
  *
  * \brief A matrix or vector expression mapping an existing array of data.
  *
  * \tparam PlainObjectType the equivalent matrix type of the mapped data
-  * \tparam MapOptions specifies whether the pointer is \c #Aligned, or \c #Unaligned.
+  * \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
  *                The default is \c #Unaligned.
  * \tparam StrideType optionally specifies strides. By default, Map assumes the memory layout
  *                   of an ordinary, contiguous array. This can be overridden by specifying strides.
@ -63,44 +85,6 @@ namespace Eigen {
  *
  * \sa PlainObjectBase::Map(), \ref TopicStorageOrders
  */
 namespace internal {
 template<typename PlainObjectType, int MapOptions, typename StrideType>
 struct traits<Map<PlainObjectType, MapOptions, StrideType> >
  : public traits<PlainObjectType>
 {
  typedef traits<PlainObjectType> TraitsBase;
  typedef typename PlainObjectType::Index Index;
  typedef typename PlainObjectType::Scalar Scalar;
  enum {
    InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
                             ? int(PlainObjectType::InnerStrideAtCompileTime)
                             : int(StrideType::InnerStrideAtCompileTime),
    OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
                             ? int(PlainObjectType::OuterStrideAtCompileTime)
                             : int(StrideType::OuterStrideAtCompileTime),
    HasNoInnerStride = InnerStrideAtCompileTime == 1,
    HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0,
    HasNoStride = HasNoInnerStride && HasNoOuterStride,
    IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
    IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
    KeepsPacketAccess = bool(HasNoInnerStride)
                        && ( bool(IsDynamicSize)
                           || HasNoOuterStride
                           || ( OuterStrideAtCompileTime!=Dynamic
                           && ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%16)==0 ) ),
    Flags0 = TraitsBase::Flags & (~NestByRefBit),
    Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
    Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
           ? int(Flags1) : int(Flags1 & ~LinearAccessBit),
    Flags3 = is_lvalue<PlainObjectType>::value ? int(Flags2) : (int(Flags2) & ~LvalueBit),
    Flags = KeepsPacketAccess ? int(Flags3) : (int(Flags3) & ~PacketAccessBit)
  };
 private:
  enum { Options }; // Expressions don't have Options
 };
 }
 template<typename PlainObjectType, int MapOptions, typename StrideType> class Map
  : public MapBase<Map<PlainObjectType, MapOptions, StrideType> >
 {
@ -110,19 +94,17 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
    EIGEN_DENSE_PUBLIC_INTERFACE(Map)
    typedef typename Base::PointerType PointerType;
 #if EIGEN2_SUPPORT_STAGE <= STAGE30_FULL_EIGEN3_API
    typedef const Scalar* PointerArgType;
    inline PointerType cast_to_pointer_type(PointerArgType ptr) { return const_cast<PointerType>(ptr); }
 #else
    typedef PointerType PointerArgType;
    EIGEN_DEVICE_FUNC
    inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; }
 #endif
    EIGEN_DEVICE_FUNC
    inline Index innerStride() const
    {
      return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;
    }
    EIGEN_DEVICE_FUNC
    inline Index outerStride() const
    {
      return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
@ -134,10 +116,11 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
    /** Constructor in the fixed-size case.
      *
      * \param dataPtr pointer to the array to map
-      * \param a_stride optional Stride object, passing the strides.
+      * \param stride optional Stride object, passing the strides.
      */
-    inline Map(PointerArgType dataPtr, const StrideType& a_stride = StrideType())
+    EIGEN_DEVICE_FUNC
-      : Base(cast_to_pointer_type(dataPtr)), m_stride(a_stride)
+    explicit inline Map(PointerArgType dataPtr, const StrideType& stride = StrideType())
      : Base(cast_to_pointer_type(dataPtr)), m_stride(stride)
    {
      PlainObjectType::Base::_check_template_params();
    }
@ -145,11 +128,12 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
    /** Constructor in the dynamic-size vector case.
      *
      * \param dataPtr pointer to the array to map
-      * \param a_size the size of the vector expression
+      * \param size the size of the vector expression
-      * \param a_stride optional Stride object, passing the strides.
+      * \param stride optional Stride object, passing the strides.
      */
-    inline Map(PointerArgType dataPtr, Index a_size, const StrideType& a_stride = StrideType())
+    EIGEN_DEVICE_FUNC
-      : Base(cast_to_pointer_type(dataPtr), a_size), m_stride(a_stride)
+    inline Map(PointerArgType dataPtr, Index size, const StrideType& stride = StrideType())
      : Base(cast_to_pointer_type(dataPtr), size), m_stride(stride)
    {
      PlainObjectType::Base::_check_template_params();
    }
@ -157,12 +141,13 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
    /** Constructor in the dynamic-size matrix case.
      *
      * \param dataPtr pointer to the array to map
-      * \param nbRows the number of rows of the matrix expression
+      * \param rows the number of rows of the matrix expression
-      * \param nbCols the number of columns of the matrix expression
+      * \param cols the number of columns of the matrix expression
-      * \param a_stride optional Stride object, passing the strides.
+      * \param stride optional Stride object, passing the strides.
      */
-    inline Map(PointerArgType dataPtr, Index nbRows, Index nbCols, const StrideType& a_stride = StrideType())
+    EIGEN_DEVICE_FUNC
-      : Base(cast_to_pointer_type(dataPtr), nbRows, nbCols), m_stride(a_stride)
+    inline Map(PointerArgType dataPtr, Index rows, Index cols, const StrideType& stride = StrideType())
      : Base(cast_to_pointer_type(dataPtr), rows, cols), m_stride(stride)
    {
      PlainObjectType::Base::_check_template_params();
    }
@ -173,19 +158,6 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
    StrideType m_stride;
 };
 template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
 inline Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>
  ::Array(const Scalar *data)
 {
  this->_set_noalias(Eigen::Map<const Array>(data));
 }
 template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
 inline Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>
  ::Matrix(const Scalar *data)
 {
  this->_set_noalias(Eigen::Map<const Matrix>(data));
 }
 } // end namespace Eigen
--- a/eigenlib/Eigen/src/Core/MapBase.h
+++ b/eigenlib/Eigen/src/Core/MapBase.h
@ -12,15 +12,25 @@
 #define EIGEN_MAPBASE_H
 #define EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) \
-      EIGEN_STATIC_ASSERT((int(internal::traits<Derived>::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \
+      EIGEN_STATIC_ASSERT((int(internal::evaluator<Derived>::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \
                          YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT)
 namespace Eigen { 
-/** \class MapBase
+/** \ingroup Core_Module
  * \ingroup Core_Module
  *
-  * \brief Base class for Map and Block expression with direct access
+  * \brief Base class for dense Map and Block expression with direct access
  *
  * This base class provides the const low-level accessors (e.g. coeff, coeffRef) of dense
  * Map and Block objects with direct access.
  * Typical users do not have to directly deal with this class.
  *
  * This class can be extended by through the macro plugin \c EIGEN_MAPBASE_PLUGIN.
  * See \link TopicCustomizing_Plugins customizing Eigen \endlink for details.
  *
  * The \c Derived class has to provide the following two methods describing the memory layout:
  *  \code Index innerStride() const; \endcode
  *  \code Index outerStride() const; \endcode
  *
  * \sa class Map, class Block
  */
@ -37,7 +47,6 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
    };
    typedef typename internal::traits<Derived>::StorageKind StorageKind;
    typedef typename internal::traits<Derived>::Index Index;
    typedef typename internal::traits<Derived>::Scalar Scalar;
    typedef typename internal::packet_traits<Scalar>::type PacketScalar;
    typedef typename NumTraits<Scalar>::Real RealScalar;
@ -76,8 +85,10 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
    typedef typename Base::CoeffReturnType CoeffReturnType;
-    inline Index rows() const { return m_rows.value(); }
+    /** \copydoc DenseBase::rows() */
-    inline Index cols() const { return m_cols.value(); }
+    EIGEN_DEVICE_FUNC inline Index rows() const { return m_rows.value(); }
    /** \copydoc DenseBase::cols() */
    EIGEN_DEVICE_FUNC inline Index cols() const { return m_cols.value(); }
    /** Returns a pointer to the first coefficient of the matrix or vector.
      *
@ -85,30 +96,39 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
      *
      * \sa innerStride(), outerStride()
      */
-    inline const Scalar* data() const { return m_data; }
+    EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_data; }
    /** \copydoc PlainObjectBase::coeff(Index,Index) const */
    EIGEN_DEVICE_FUNC
    inline const Scalar& coeff(Index rowId, Index colId) const
    {
      return m_data[colId * colStride() + rowId * rowStride()];
    }
    /** \copydoc PlainObjectBase::coeff(Index) const */
    EIGEN_DEVICE_FUNC
    inline const Scalar& coeff(Index index) const
    {
      EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
      return m_data[index * innerStride()];
    }
    /** \copydoc PlainObjectBase::coeffRef(Index,Index) const */
    EIGEN_DEVICE_FUNC
    inline const Scalar& coeffRef(Index rowId, Index colId) const
    {
      return this->m_data[colId * colStride() + rowId * rowStride()];
    }
    /** \copydoc PlainObjectBase::coeffRef(Index) const */
    EIGEN_DEVICE_FUNC
    inline const Scalar& coeffRef(Index index) const
    {
      EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
      return this->m_data[index * innerStride()];
    }
    /** \internal */
    template<int LoadMode>
    inline PacketScalar packet(Index rowId, Index colId) const
    {
@ -116,6 +136,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
               (m_data + (colId * colStride() + rowId * rowStride()));
    }
    /** \internal */
    template<int LoadMode>
    inline PacketScalar packet(Index index) const
    {
@ -123,12 +144,16 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
      return internal::ploadt<PacketScalar, LoadMode>(m_data + index * innerStride());
    }
-    inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
+    /** \internal Constructor for fixed size matrices or vectors */
    EIGEN_DEVICE_FUNC
    explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
    {
      EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
-      checkSanity();
+      checkSanity<Derived>();
    }
    /** \internal Constructor for dynamically sized vectors */
    EIGEN_DEVICE_FUNC
    inline MapBase(PointerType dataPtr, Index vecSize)
            : m_data(dataPtr),
              m_rows(RowsAtCompileTime == Dynamic ? vecSize : Index(RowsAtCompileTime)),
@ -137,34 +162,56 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
      EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
      eigen_assert(vecSize >= 0);
      eigen_assert(dataPtr == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == vecSize);
-      checkSanity();
+      checkSanity<Derived>();
    }
-    inline MapBase(PointerType dataPtr, Index nbRows, Index nbCols)
+    /** \internal Constructor for dynamically sized matrices */
-            : m_data(dataPtr), m_rows(nbRows), m_cols(nbCols)
+    EIGEN_DEVICE_FUNC
    inline MapBase(PointerType dataPtr, Index rows, Index cols)
            : m_data(dataPtr), m_rows(rows), m_cols(cols)
    {
      eigen_assert( (dataPtr == 0)
-              || (   nbRows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == nbRows)
+              || (   rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
-                  && nbCols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == nbCols)));
+                  && cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)));
-      checkSanity();
+      checkSanity<Derived>();
    }
    #ifdef EIGEN_MAPBASE_PLUGIN
    #include EIGEN_MAPBASE_PLUGIN
    #endif
  protected:
-    void checkSanity() const
+    template<typename T>
    EIGEN_DEVICE_FUNC
    void checkSanity(typename internal::enable_if<(internal::traits<T>::Alignment>0),void*>::type = 0) const
    {
-      EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(internal::traits<Derived>::Flags&PacketAccessBit,
+#if EIGEN_MAX_ALIGN_BYTES>0
-                                        internal::inner_stride_at_compile_time<Derived>::ret==1),
+      eigen_assert((   ((internal::UIntPtr(m_data) % internal::traits<Derived>::Alignment) == 0)
-                          PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
+                    || (cols() * rows() * innerStride() * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned");
-      eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % 16) == 0)
+#endif
                   && "input pointer is not aligned on a 16 byte boundary");
    }
    template<typename T>
    EIGEN_DEVICE_FUNC
    void checkSanity(typename internal::enable_if<internal::traits<T>::Alignment==0,void*>::type = 0) const
    {}
    PointerType m_data;
    const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
    const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
 };
 /** \ingroup Core_Module
  *
  * \brief Base class for non-const dense Map and Block expression with direct access
  *
  * This base class provides the non-const low-level accessors (e.g. coeff and coeffRef) of
  * dense Map and Block objects with direct access.
  * It inherits MapBase<Derived, ReadOnlyAccessors> which defines the const variant for reading specific entries.
  *
  * \sa class Map, class Block
  */
 template<typename Derived> class MapBase<Derived, WriteAccessors>
  : public MapBase<Derived, ReadOnlyAccessors>
 {
@ -175,7 +222,7 @@ template<typename Derived> class MapBase<Derived, WriteAccessors>
    typedef typename Base::Scalar Scalar;
    typedef typename Base::PacketScalar PacketScalar;
-    typedef typename Base::Index Index;
+    typedef typename Base::StorageIndex StorageIndex;
    typedef typename Base::PointerType PointerType;
    using Base::derived;
@ -196,14 +243,18 @@ template<typename Derived> class MapBase<Derived, WriteAccessors>
                    const Scalar
                  >::type ScalarWithConstIfNotLvalue;
    EIGEN_DEVICE_FUNC
    inline const Scalar* data() const { return this->m_data; }
    EIGEN_DEVICE_FUNC
    inline ScalarWithConstIfNotLvalue* data() { return this->m_data; } // no const-cast here so non-const-correct code will give a compile error
    EIGEN_DEVICE_FUNC
    inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col)
    {
      return this->m_data[col * colStride() + row * rowStride()];
    }
    EIGEN_DEVICE_FUNC
    inline ScalarWithConstIfNotLvalue& coeffRef(Index index)
    {
      EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
@ -225,10 +276,11 @@ template<typename Derived> class MapBase<Derived, WriteAccessors>
                (this->m_data + index * innerStride(), val);
    }
-    explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {}
+    EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {}
-    inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {}
+    EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {}
-    inline MapBase(PointerType dataPtr, Index nbRows, Index nbCols) : Base(dataPtr, nbRows, nbCols) {}
+    EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index rows, Index cols) : Base(dataPtr, rows, cols) {}
    EIGEN_DEVICE_FUNC
    Derived& operator=(const MapBase& other)
    {
      ReadOnlyMapBase::Base::operator=(other);
--- a/eigenlib/Eigen/src/Core/MathFunctions.h
+++ b/eigenlib/Eigen/src/Core/MathFunctions.h
--- a/eigenlib/Eigen/src/Core/MathFunctionsImpl.h
+++ b/eigenlib/Eigen/src/Core/MathFunctionsImpl.h
@ -0,0 +1,78 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com)
 // Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_MATHFUNCTIONSIMPL_H
 #define EIGEN_MATHFUNCTIONSIMPL_H
 namespace Eigen {
 namespace internal {
 /** \internal \returns the hyperbolic tan of \a a (coeff-wise)
    Doesn't do anything fancy, just a 13/6-degree rational interpolant which
    is accurate up to a couple of ulp in the range [-9, 9], outside of which
    the tanh(x) = +/-1.
    This implementation works on both scalars and packets.
 */
 template<typename T>
 T generic_fast_tanh_float(const T& a_x)
 {
  // Clamp the inputs to the range [-9, 9] since anything outside
  // this range is +/-1.0f in single-precision.
  const T plus_9 = pset1<T>(9.f);
  const T minus_9 = pset1<T>(-9.f);
  // NOTE GCC prior to 6.3 might improperly optimize this max/min
  //      step such that if a_x is nan, x will be either 9 or -9,
  //      and tanh will return 1 or -1 instead of nan.
  //      This is supposed to be fixed in gcc6.3,
  //      see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867
  const T x = pmax(minus_9,pmin(plus_9,a_x));
  // The monomial coefficients of the numerator polynomial (odd).
  const T alpha_1 = pset1<T>(4.89352455891786e-03f);
  const T alpha_3 = pset1<T>(6.37261928875436e-04f);
  const T alpha_5 = pset1<T>(1.48572235717979e-05f);
  const T alpha_7 = pset1<T>(5.12229709037114e-08f);
  const T alpha_9 = pset1<T>(-8.60467152213735e-11f);
  const T alpha_11 = pset1<T>(2.00018790482477e-13f);
  const T alpha_13 = pset1<T>(-2.76076847742355e-16f);
  // The monomial coefficients of the denominator polynomial (even).
  const T beta_0 = pset1<T>(4.89352518554385e-03f);
  const T beta_2 = pset1<T>(2.26843463243900e-03f);
  const T beta_4 = pset1<T>(1.18534705686654e-04f);
  const T beta_6 = pset1<T>(1.19825839466702e-06f);
  // Since the polynomials are odd/even, we need x^2.
  const T x2 = pmul(x, x);
  // Evaluate the numerator polynomial p.
  T p = pmadd(x2, alpha_13, alpha_11);
  p = pmadd(x2, p, alpha_9);
  p = pmadd(x2, p, alpha_7);
  p = pmadd(x2, p, alpha_5);
  p = pmadd(x2, p, alpha_3);
  p = pmadd(x2, p, alpha_1);
  p = pmul(x, p);
  // Evaluate the denominator polynomial p.
  T q = pmadd(x2, beta_6, beta_4);
  q = pmadd(x2, q, beta_2);
  q = pmadd(x2, q, beta_0);
  // Divide the numerator by the denominator.
  return pdiv(p, q);
 }
 } // end namespace internal
 } // end namespace Eigen
 #endif // EIGEN_MATHFUNCTIONSIMPL_H
--- a/eigenlib/Eigen/src/Core/Matrix.h
+++ b/eigenlib/Eigen/src/Core/Matrix.h
@ -13,6 +13,45 @@
 namespace Eigen {
 namespace internal {
 template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
 struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
 {
 private:
  enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret };
  typedef typename find_best_packet<_Scalar,size>::type PacketScalar;
  enum {
      row_major_bit = _Options&RowMajor ? RowMajorBit : 0,
      is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic,
      max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols,
      default_alignment = compute_default_alignment<_Scalar,max_size>::value,
      actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
      required_alignment = unpacket_traits<PacketScalar>::alignment,
      packet_access_bit = (packet_traits<_Scalar>::Vectorizable && (EIGEN_UNALIGNED_VECTORIZE || (actual_alignment>=required_alignment))) ? PacketAccessBit : 0
    };
 public:
  typedef _Scalar Scalar;
  typedef Dense StorageKind;
  typedef Eigen::Index StorageIndex;
  typedef MatrixXpr XprKind;
  enum {
    RowsAtCompileTime = _Rows,
    ColsAtCompileTime = _Cols,
    MaxRowsAtCompileTime = _MaxRows,
    MaxColsAtCompileTime = _MaxCols,
    Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
    Options = _Options,
    InnerStrideAtCompileTime = 1,
    OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime,
    // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
    EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit,
    Alignment = actual_alignment
  };
 };
 }
 /** \class Matrix
  * \ingroup Core_Module
  *
@ -24,13 +63,13 @@ namespace Eigen {
  * The %Matrix class encompasses \em both fixed-size and dynamic-size objects (\ref fixedsize "note").
  *
  * The first three template parameters are required:
-  * \tparam _Scalar \anchor matrix_tparam_scalar Numeric type, e.g. float, double, int or std::complex<float>.
+  * \tparam _Scalar Numeric type, e.g. float, double, int or std::complex<float>.
-  *                 User defined sclar types are supported as well (see \ref user_defined_scalars "here").
+  *                 User defined scalar types are supported as well (see \ref user_defined_scalars "here").
  * \tparam _Rows Number of rows, or \b Dynamic
  * \tparam _Cols Number of columns, or \b Dynamic
  *
  * The remaining template parameters are optional -- in most cases you don't have to worry about them.
-  * \tparam _Options \anchor matrix_tparam_options A combination of either \b #RowMajor or \b #ColMajor, and of either
+  * \tparam _Options A combination of either \b #RowMajor or \b #ColMajor, and of either
  *                 \b #AutoAlign or \b #DontAlign.
  *                 The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required
  *                 for vectorization. It defaults to aligning matrices except for fixed sizes that aren't a multiple of the packet size.
@ -67,7 +106,7 @@ namespace Eigen {
  * \endcode
  *
  * This class can be extended with the help of the plugin mechanism described on the page
-  * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_MATRIX_PLUGIN.
+  * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_MATRIX_PLUGIN.
  *
  * <i><b>Some notes:</b></i>
  *
@ -97,32 +136,44 @@ namespace Eigen {
  * are the dimensions of the original matrix, while _Rows and _Cols are Dynamic.</dd>
  * </dl>
  *
-  * \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy, 
+  * <i><b>ABI and storage layout</b></i>
-  * \ref TopicStorageOrders 
+  *
  * The table below summarizes the ABI of some possible Matrix instances which is fixed thorough the lifetime of Eigen 3.
  * <table  class="manual">
  * <tr><th>Matrix type</th><th>Equivalent C structure</th></tr>
  * <tr><td>\code Matrix<T,Dynamic,Dynamic> \endcode</td><td>\code
  * struct {
  *   T *data;                  // with (size_t(data)%EIGEN_MAX_ALIGN_BYTES)==0
  *   Eigen::Index rows, cols;
  *  };
  * \endcode</td></tr>
  * <tr class="alt"><td>\code
  * Matrix<T,Dynamic,1>
  * Matrix<T,1,Dynamic> \endcode</td><td>\code
  * struct {
  *   T *data;                  // with (size_t(data)%EIGEN_MAX_ALIGN_BYTES)==0
  *   Eigen::Index size;
  *  };
  * \endcode</td></tr>
  * <tr><td>\code Matrix<T,Rows,Cols> \endcode</td><td>\code
  * struct {
  *   T data[Rows*Cols];        // with (size_t(data)%A(Rows*Cols*sizeof(T)))==0
  *  };
  * \endcode</td></tr>
  * <tr class="alt"><td>\code Matrix<T,Dynamic,Dynamic,0,MaxRows,MaxCols> \endcode</td><td>\code
  * struct {
  *   T data[MaxRows*MaxCols];  // with (size_t(data)%A(MaxRows*MaxCols*sizeof(T)))==0
  *   Eigen::Index rows, cols;
  *  };
  * \endcode</td></tr>
  * </table>
  * Note that in this table Rows, Cols, MaxRows and MaxCols are all positive integers. A(S) is defined to the largest possible power-of-two
  * smaller to EIGEN_MAX_STATIC_ALIGN_BYTES.
  *
  * \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy,
  * \ref TopicStorageOrders
  */
 namespace internal {
 template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
 struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
 {
  typedef _Scalar Scalar;
  typedef Dense StorageKind;
  typedef DenseIndex Index;
  typedef MatrixXpr XprKind;
  enum {
    RowsAtCompileTime = _Rows,
    ColsAtCompileTime = _Cols,
    MaxRowsAtCompileTime = _MaxRows,
    MaxColsAtCompileTime = _MaxCols,
    Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
    CoeffReadCost = NumTraits<Scalar>::ReadCost,
    Options = _Options,
    InnerStrideAtCompileTime = 1,
    OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime
  };
 };
 }
 template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
 class Matrix
  : public PlainObjectBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
@ -151,6 +202,7 @@ class Matrix
      *
      * \callgraph
      */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Matrix& operator=(const Matrix& other)
    {
      return Base::_set(other);
@ -167,7 +219,8 @@ class Matrix
      * remain row-vectors and vectors remain vectors.
      */
    template<typename OtherDerived>
-    EIGEN_STRONG_INLINE Matrix& operator=(const MatrixBase<OtherDerived>& other)
+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Matrix& operator=(const DenseBase<OtherDerived>& other)
    {
      return Base::_set(other);
    }
@ -179,12 +232,14 @@ class Matrix
      * \copydetails DenseBase::operator=(const EigenBase<OtherDerived> &other)
      */
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Matrix& operator=(const EigenBase<OtherDerived> &other)
    {
      return Base::operator=(other);
    }
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Matrix& operator=(const ReturnByValue<OtherDerived>& func)
    {
      return Base::operator=(func);
@ -200,6 +255,7 @@ class Matrix
      *
      * \sa resize(Index,Index)
      */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Matrix() : Base()
    {
      Base::_check_template_params();
@ -207,45 +263,87 @@ class Matrix
    }
    // FIXME is it still needed
-    Matrix(internal::constructor_without_unaligned_array_assert)
+    EIGEN_DEVICE_FUNC
    explicit Matrix(internal::constructor_without_unaligned_array_assert)
      : Base(internal::constructor_without_unaligned_array_assert())
    { Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
-    /** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors
+#if EIGEN_HAS_RVALUE_REFERENCES
-      *
+    EIGEN_DEVICE_FUNC
-      * Note that this is only useful for dynamic-size vectors. For fixed-size vectors,
+    Matrix(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
-      * it is redundant to pass the dimension here, so it makes more sense to use the default
+      : Base(std::move(other))
      * constructor Matrix() instead.
      */
    EIGEN_STRONG_INLINE explicit Matrix(Index dim)
      : Base(dim, RowsAtCompileTime == 1 ? 1 : dim, ColsAtCompileTime == 1 ? 1 : dim)
    {
      Base::_check_template_params();
-      EIGEN_STATIC_ASSERT_VECTOR_ONLY(Matrix)
+      if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
-      eigen_assert(dim >= 0);
+        Base::_set_noalias(other);
      eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim);
      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
    }
    EIGEN_DEVICE_FUNC
    Matrix& operator=(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
    {
      other.swap(*this);
      return *this;
    }
 #endif
    #ifndef EIGEN_PARSED_BY_DOXYGEN
    // This constructor is for both 1x1 matrices and dynamic vectors
    template<typename T>
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE explicit Matrix(const T& x)
    {
      Base::_check_template_params();
      Base::template _init1<T>(x);
    }
    template<typename T0, typename T1>
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Matrix(const T0& x, const T1& y)
    {
      Base::_check_template_params();
      Base::template _init2<T0,T1>(x, y);
    }
    #else
    /** \brief Constructs a fixed-sized matrix initialized with coefficients starting at \a data */
    EIGEN_DEVICE_FUNC
    explicit Matrix(const Scalar *data);
    /** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors
      *
      * This is useful for dynamic-size vectors. For fixed-size vectors,
      * it is redundant to pass these parameters, so one should use the default constructor
      * Matrix() instead.
      * 
      * \warning This constructor is disabled for fixed-size \c 1x1 matrices. For instance,
      * calling Matrix<double,1,1>(1) will call the initialization constructor: Matrix(const Scalar&).
      * For fixed-size \c 1x1 matrices it is therefore recommended to use the default
      * constructor Matrix() instead, especially when using one of the non standard
      * \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives).
      */
    EIGEN_STRONG_INLINE explicit Matrix(Index dim);
    /** \brief Constructs an initialized 1x1 matrix with the given coefficient */
    Matrix(const Scalar& x);
    /** \brief Constructs an uninitialized matrix with \a rows rows and \a cols columns.
      *
      * This is useful for dynamic-size matrices. For fixed-size matrices,
      * it is redundant to pass these parameters, so one should use the default constructor
-      * Matrix() instead. */
+      * Matrix() instead.
      * 
      * \warning This constructor is disabled for fixed-size \c 1x2 and \c 2x1 vectors. For instance,
      * calling Matrix2f(2,1) will call the initialization constructor: Matrix(const Scalar& x, const Scalar& y).
      * For fixed-size \c 1x2 or \c 2x1 vectors it is therefore recommended to use the default
      * constructor Matrix() instead, especially when using one of the non standard
      * \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives).
      */
    EIGEN_DEVICE_FUNC
    Matrix(Index rows, Index cols);
    /** \brief Constructs an initialized 2D vector with given coefficients */
    Matrix(const Scalar& x, const Scalar& y);
    #endif
    /** \brief Constructs an initialized 3D vector with given coefficients */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z)
    {
      Base::_check_template_params();
@ -255,6 +353,7 @@ class Matrix
      m_storage.data()[2] = z;
    }
    /** \brief Constructs an initialized 4D vector with given coefficients */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z, const Scalar& w)
    {
      Base::_check_template_params();
@ -265,76 +364,33 @@ class Matrix
      m_storage.data()[3] = w;
    }
    explicit Matrix(const Scalar *data);
    /** \brief Constructor copying the value of the expression \a other */
    template<typename OtherDerived>
    EIGEN_STRONG_INLINE Matrix(const MatrixBase<OtherDerived>& other)
             : Base(other.rows() * other.cols(), other.rows(), other.cols())
    {
      // This test resides here, to bring the error messages closer to the user. Normally, these checks
      // are performed deeply within the library, thus causing long and scary error traces.
      EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),
        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
      Base::_check_template_params();
      Base::_set_noalias(other);
    }
    /** \brief Copy constructor */
-    EIGEN_STRONG_INLINE Matrix(const Matrix& other)
+    EIGEN_DEVICE_FUNC
-            : Base(other.rows() * other.cols(), other.rows(), other.cols())
+    EIGEN_STRONG_INLINE Matrix(const Matrix& other) : Base(other)
-    {
+    { }
      Base::_check_template_params();
      Base::_set_noalias(other);
    }
    /** \brief Copy constructor with in-place evaluation */
    template<typename OtherDerived>
    EIGEN_STRONG_INLINE Matrix(const ReturnByValue<OtherDerived>& other)
    {
      Base::_check_template_params();
      Base::resize(other.rows(), other.cols());
      other.evalTo(*this);
    }
    /** \brief Copy constructor for generic expressions.
      * \sa MatrixBase::operator=(const EigenBase<OtherDerived>&)
      */
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Matrix(const EigenBase<OtherDerived> &other)
-      : Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
+      : Base(other.derived())
-    {
+    { }
      Base::_check_template_params();
      Base::_resize_to_match(other);
      // FIXME/CHECK: isn't *this = other.derived() more efficient. it allows to
      //              go for pure _set() implementations, right?
      *this = other;
    }
-    /** \internal
+    EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
-      * \brief Override MatrixBase::swap() since for dynamic-sized matrices
+    EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
      * of same type it is enough to swap the data pointers.
      */
    template<typename OtherDerived>
    void swap(MatrixBase<OtherDerived> const & other)
    { this->_swap(other.derived()); }
    inline Index innerStride() const { return 1; }
    inline Index outerStride() const { return this->innerSize(); }
    /////////// Geometry module ///////////
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    explicit Matrix(const RotationBase<OtherDerived,ColsAtCompileTime>& r);
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    Matrix& operator=(const RotationBase<OtherDerived,ColsAtCompileTime>& r);
    #ifdef EIGEN2_SUPPORT
    template<typename OtherDerived>
    explicit Matrix(const eigen2_RotationBase<OtherDerived,ColsAtCompileTime>& r);
    template<typename OtherDerived>
    Matrix& operator=(const eigen2_RotationBase<OtherDerived,ColsAtCompileTime>& r);
    #endif
    // allow to extend Matrix outside Eigen
    #ifdef EIGEN_MATRIX_PLUGIN
    #include EIGEN_MATRIX_PLUGIN
--- a/eigenlib/Eigen/src/Core/MatrixBase.h
+++ b/eigenlib/Eigen/src/Core/MatrixBase.h
@ -41,9 +41,9 @@ namespace Eigen {
  * \endcode
  *
  * This class can be extended with the help of the plugin mechanism described on the page
-  * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_MATRIXBASE_PLUGIN.
+  * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_MATRIXBASE_PLUGIN.
  *
-  * \sa \ref TopicClassHierarchy
+  * \sa \blank \ref TopicClassHierarchy
  */
 template<typename Derived> class MatrixBase
  : public DenseBase<Derived>
@ -52,7 +52,7 @@ template<typename Derived> class MatrixBase
 #ifndef EIGEN_PARSED_BY_DOXYGEN
    typedef MatrixBase StorageBaseType;
    typedef typename internal::traits<Derived>::StorageKind StorageKind;
-    typedef typename internal::traits<Derived>::Index Index;
+    typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
    typedef typename internal::traits<Derived>::Scalar Scalar;
    typedef typename internal::packet_traits<Scalar>::type PacketScalar;
    typedef typename NumTraits<Scalar>::Real RealScalar;
@ -66,7 +66,6 @@ template<typename Derived> class MatrixBase
    using Base::MaxSizeAtCompileTime;
    using Base::IsVectorAtCompileTime;
    using Base::Flags;
    using Base::CoeffReadCost;
    using Base::derived;
    using Base::const_cast_derived;
@ -98,25 +97,14 @@ template<typename Derived> class MatrixBase
    /** \returns the size of the main diagonal, which is min(rows(),cols()).
      * \sa rows(), cols(), SizeAtCompileTime. */
-    inline Index diagonalSize() const { return (std::min)(rows(),cols()); }
+    EIGEN_DEVICE_FUNC
    inline Index diagonalSize() const { return (numext::mini)(rows(),cols()); }
-    /** \brief The plain matrix type corresponding to this expression.
+    typedef typename Base::PlainObject PlainObject;
      *
      * This is not necessarily exactly the return type of eval(). In the case of plain matrices,
      * the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed
      * that the return type of eval() is either PlainObject or const PlainObject&.
      */
    typedef Matrix<typename internal::traits<Derived>::Scalar,
                internal::traits<Derived>::RowsAtCompileTime,
                internal::traits<Derived>::ColsAtCompileTime,
                AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
                internal::traits<Derived>::MaxRowsAtCompileTime,
                internal::traits<Derived>::MaxColsAtCompileTime
          > PlainObject;
 #ifndef EIGEN_PARSED_BY_DOXYGEN
    /** \internal Represents a matrix with all coefficients equal to one another*/
-    typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType;
+    typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
    /** \internal the return type of MatrixBase::adjoint() */
    typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
                        CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, ConstTransposeReturnType>,
@ -125,7 +113,7 @@ template<typename Derived> class MatrixBase
    /** \internal Return type of eigenvalues() */
    typedef Matrix<std::complex<RealScalar>, internal::traits<Derived>::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType;
    /** \internal the return type of identity */
-    typedef CwiseNullaryOp<internal::scalar_identity_op<Scalar>,Derived> IdentityReturnType;
+    typedef CwiseNullaryOp<internal::scalar_identity_op<Scalar>,PlainObject> IdentityReturnType;
    /** \internal the return type of unit vectors */
    typedef Block<const CwiseNullaryOp<internal::scalar_identity_op<Scalar>, SquareMatrixType>,
                  internal::traits<Derived>::RowsAtCompileTime,
@ -133,6 +121,7 @@ template<typename Derived> class MatrixBase
 #endif // not EIGEN_PARSED_BY_DOXYGEN
 #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::MatrixBase
 #define EIGEN_DOC_UNARY_ADDONS(X,Y)
 #   include "../plugins/CommonCwiseUnaryOps.h"
 #   include "../plugins/CommonCwiseBinaryOps.h"
 #   include "../plugins/MatrixCwiseUnaryOps.h"
@ -141,41 +130,53 @@ template<typename Derived> class MatrixBase
 #     include EIGEN_MATRIXBASE_PLUGIN
 #   endif
 #undef EIGEN_CURRENT_STORAGE_BASE_CLASS
 #undef EIGEN_DOC_UNARY_ADDONS
    /** Special case of the template operator=, in order to prevent the compiler
      * from generating a default operator= (issue hit with g++ 4.1)
      */
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    Derived& operator=(const MatrixBase& other);
    // We cannot inherit here via Base::operator= since it is causing
    // trouble with MSVC.
    template <typename OtherDerived>
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    Derived& operator=(const DenseBase<OtherDerived>& other);
    template <typename OtherDerived>
    EIGEN_DEVICE_FUNC
    Derived& operator=(const EigenBase<OtherDerived>& other);
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    Derived& operator=(const ReturnByValue<OtherDerived>& other);
    template<typename ProductDerived, typename Lhs, typename Rhs>
    Derived& lazyAssign(const ProductBase<ProductDerived, Lhs,Rhs>& other);
    template<typename MatrixPower, typename Lhs, typename Rhs>
    Derived& lazyAssign(const MatrixPowerProduct<MatrixPower, Lhs,Rhs>& other);
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    Derived& operator+=(const MatrixBase<OtherDerived>& other);
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    Derived& operator-=(const MatrixBase<OtherDerived>& other);
 #ifdef __CUDACC__
    template<typename OtherDerived>
-    const typename ProductReturnType<Derived,OtherDerived>::Type
+    EIGEN_DEVICE_FUNC
-    operator*(const MatrixBase<OtherDerived> &other) const;
+    const Product<Derived,OtherDerived,LazyProduct>
    operator*(const MatrixBase<OtherDerived> &other) const
    { return this->lazyProduct(other); }
 #else
    template<typename OtherDerived>
-    const typename LazyProductReturnType<Derived,OtherDerived>::Type
+    const Product<Derived,OtherDerived>
    operator*(const MatrixBase<OtherDerived> &other) const;
 #endif
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    const Product<Derived,OtherDerived,LazyProduct>
    lazyProduct(const MatrixBase<OtherDerived> &other) const;
    template<typename OtherDerived>
@ -188,84 +189,93 @@ template<typename Derived> class MatrixBase
    void applyOnTheRight(const EigenBase<OtherDerived>& other);
    template<typename DiagonalDerived>
-    const DiagonalProduct<Derived, DiagonalDerived, OnTheRight>
+    EIGEN_DEVICE_FUNC
    const Product<Derived, DiagonalDerived, LazyProduct>
    operator*(const DiagonalBase<DiagonalDerived> &diagonal) const;
    template<typename OtherDerived>
-    typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
+    EIGEN_DEVICE_FUNC
    typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
    dot(const MatrixBase<OtherDerived>& other) const;
-    #ifdef EIGEN2_SUPPORT
+    EIGEN_DEVICE_FUNC RealScalar squaredNorm() const;
-      template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC RealScalar norm() const;
      Scalar eigen2_dot(const MatrixBase<OtherDerived>& other) const;
    #endif
    RealScalar squaredNorm() const;
    RealScalar norm() const;
    RealScalar stableNorm() const;
    RealScalar blueNorm() const;
    RealScalar hypotNorm() const;
-    const PlainObject normalized() const;
+    EIGEN_DEVICE_FUNC const PlainObject normalized() const;
-    void normalize();
+    EIGEN_DEVICE_FUNC const PlainObject stableNormalized() const;
    EIGEN_DEVICE_FUNC void normalize();
    EIGEN_DEVICE_FUNC void stableNormalize();
-    const AdjointReturnType adjoint() const;
+    EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const;
-    void adjointInPlace();
+    EIGEN_DEVICE_FUNC void adjointInPlace();
    typedef Diagonal<Derived> DiagonalReturnType;
    EIGEN_DEVICE_FUNC
    DiagonalReturnType diagonal();
    typedef typename internal::add_const<Diagonal<const Derived> >::type ConstDiagonalReturnType;
    EIGEN_DEVICE_FUNC
    ConstDiagonalReturnType diagonal() const;
    template<int Index> struct DiagonalIndexReturnType { typedef Diagonal<Derived,Index> Type; };
    template<int Index> struct ConstDiagonalIndexReturnType { typedef const Diagonal<const Derived,Index> Type; };
-    template<int Index> typename DiagonalIndexReturnType<Index>::Type diagonal();
+    template<int Index>
-    template<int Index> typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;
+    EIGEN_DEVICE_FUNC
-    
+    typename DiagonalIndexReturnType<Index>::Type diagonal();
    template<int Index>
    EIGEN_DEVICE_FUNC
    typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;
    typedef Diagonal<Derived,DynamicIndex> DiagonalDynamicIndexReturnType;
    typedef typename internal::add_const<Diagonal<const Derived,DynamicIndex> >::type ConstDiagonalDynamicIndexReturnType;
    EIGEN_DEVICE_FUNC
    DiagonalDynamicIndexReturnType diagonal(Index index);
    EIGEN_DEVICE_FUNC
    ConstDiagonalDynamicIndexReturnType diagonal(Index index) const;
    #ifdef EIGEN2_SUPPORT
    template<unsigned int Mode> typename internal::eigen2_part_return_type<Derived, Mode>::type part();
    template<unsigned int Mode> const typename internal::eigen2_part_return_type<Derived, Mode>::type part() const;
    // huuuge hack. make Eigen2's matrix.part<Diagonal>() work in eigen3. Problem: Diagonal is now a class template instead
    // of an integer constant. Solution: overload the part() method template wrt template parameters list.
    template<template<typename T, int N> class U>
    const DiagonalWrapper<ConstDiagonalReturnType> part() const
    { return diagonal().asDiagonal(); }
    #endif // EIGEN2_SUPPORT
    template<unsigned int Mode> struct TriangularViewReturnType { typedef TriangularView<Derived, Mode> Type; };
    template<unsigned int Mode> struct ConstTriangularViewReturnType { typedef const TriangularView<const Derived, Mode> Type; };
-    template<unsigned int Mode> typename TriangularViewReturnType<Mode>::Type triangularView();
+    template<unsigned int Mode>
-    template<unsigned int Mode> typename ConstTriangularViewReturnType<Mode>::Type triangularView() const;
+    EIGEN_DEVICE_FUNC
    typename TriangularViewReturnType<Mode>::Type triangularView();
    template<unsigned int Mode>
    EIGEN_DEVICE_FUNC
    typename ConstTriangularViewReturnType<Mode>::Type triangularView() const;
    template<unsigned int UpLo> struct SelfAdjointViewReturnType { typedef SelfAdjointView<Derived, UpLo> Type; };
    template<unsigned int UpLo> struct ConstSelfAdjointViewReturnType { typedef const SelfAdjointView<const Derived, UpLo> Type; };
-    template<unsigned int UpLo> typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView();
+    template<unsigned int UpLo>
-    template<unsigned int UpLo> typename ConstSelfAdjointViewReturnType<UpLo>::Type selfadjointView() const;
+    EIGEN_DEVICE_FUNC
    typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView();
    template<unsigned int UpLo>
    EIGEN_DEVICE_FUNC
    typename ConstSelfAdjointViewReturnType<UpLo>::Type selfadjointView() const;
    const SparseView<Derived> sparseView(const Scalar& m_reference = Scalar(0),
                                         const typename NumTraits<Scalar>::Real& m_epsilon = NumTraits<Scalar>::dummy_precision()) const;
-    static const IdentityReturnType Identity();
+    EIGEN_DEVICE_FUNC static const IdentityReturnType Identity();
-    static const IdentityReturnType Identity(Index rows, Index cols);
+    EIGEN_DEVICE_FUNC static const IdentityReturnType Identity(Index rows, Index cols);
-    static const BasisReturnType Unit(Index size, Index i);
+    EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index size, Index i);
-    static const BasisReturnType Unit(Index i);
+    EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index i);
-    static const BasisReturnType UnitX();
+    EIGEN_DEVICE_FUNC static const BasisReturnType UnitX();
-    static const BasisReturnType UnitY();
+    EIGEN_DEVICE_FUNC static const BasisReturnType UnitY();
-    static const BasisReturnType UnitZ();
+    EIGEN_DEVICE_FUNC static const BasisReturnType UnitZ();
-    static const BasisReturnType UnitW();
+    EIGEN_DEVICE_FUNC static const BasisReturnType UnitW();
    EIGEN_DEVICE_FUNC
    const DiagonalWrapper<const Derived> asDiagonal() const;
    const PermutationWrapper<const Derived> asPermutation() const;
    EIGEN_DEVICE_FUNC
    Derived& setIdentity();
    EIGEN_DEVICE_FUNC
    Derived& setIdentity(Index rows, Index cols);
    bool isIdentity(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
@ -297,59 +307,45 @@ template<typename Derived> class MatrixBase
    NoAlias<Derived,Eigen::MatrixBase > noalias();
-    inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
+    // TODO forceAlignedAccess is temporarily disabled
-    inline ForceAlignedAccess<Derived> forceAlignedAccess();
+    // Need to find a nicer workaround.
-    template<bool Enable> inline typename internal::add_const_on_value_type<typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type>::type forceAlignedAccessIf() const;
+    inline const Derived& forceAlignedAccess() const { return derived(); }
-    template<bool Enable> inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();
+    inline Derived& forceAlignedAccess() { return derived(); }
    template<bool Enable> inline const Derived& forceAlignedAccessIf() const { return derived(); }
    template<bool Enable> inline Derived& forceAlignedAccessIf() { return derived(); }
-    Scalar trace() const;
+    EIGEN_DEVICE_FUNC Scalar trace() const;
-/////////// Array module ///////////
+    template<int p> EIGEN_DEVICE_FUNC RealScalar lpNorm() const;
-    template<int p> RealScalar lpNorm() const;
+    EIGEN_DEVICE_FUNC MatrixBase<Derived>& matrix() { return *this; }
-
+    EIGEN_DEVICE_FUNC const MatrixBase<Derived>& matrix() const { return *this; }
    MatrixBase<Derived>& matrix() { return *this; }
    const MatrixBase<Derived>& matrix() const { return *this; }
    /** \returns an \link Eigen::ArrayBase Array \endlink expression of this matrix
      * \sa ArrayBase::matrix() */
-    ArrayWrapper<Derived> array() { return derived(); }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ArrayWrapper<Derived> array() { return ArrayWrapper<Derived>(derived()); }
-    const ArrayWrapper<const Derived> array() const { return derived(); }
+    /** \returns a const \link Eigen::ArrayBase Array \endlink expression of this matrix
      * \sa ArrayBase::matrix() */
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArrayWrapper<const Derived> array() const { return ArrayWrapper<const Derived>(derived()); }
 /////////// LU module ///////////
-    const FullPivLU<PlainObject> fullPivLu() const;
+    inline const FullPivLU<PlainObject> fullPivLu() const;
-    const PartialPivLU<PlainObject> partialPivLu() const;
+    inline const PartialPivLU<PlainObject> partialPivLu() const;
-    #if EIGEN2_SUPPORT_STAGE < STAGE20_RESOLVE_API_CONFLICTS
+    inline const PartialPivLU<PlainObject> lu() const;
    const LU<PlainObject> lu() const;
    #endif
-    #ifdef EIGEN2_SUPPORT
+    inline const Inverse<Derived> inverse() const;
    const LU<PlainObject> eigen2_lu() const;
    #endif
    #if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
    const PartialPivLU<PlainObject> lu() const;
    #endif
    #ifdef EIGEN2_SUPPORT
    template<typename ResultType>
-    void computeInverse(MatrixBase<ResultType> *result) const {
+    inline void computeInverseAndDetWithCheck(
      *result = this->inverse();
    }
    #endif
    const internal::inverse_impl<Derived> inverse() const;
    template<typename ResultType>
    void computeInverseAndDetWithCheck(
      ResultType& inverse,
      typename ResultType::Scalar& determinant,
      bool& invertible,
      const RealScalar& absDeterminantThreshold = NumTraits<Scalar>::dummy_precision()
    ) const;
    template<typename ResultType>
-    void computeInverseWithCheck(
+    inline void computeInverseWithCheck(
      ResultType& inverse,
      bool& invertible,
      const RealScalar& absDeterminantThreshold = NumTraits<Scalar>::dummy_precision()
@ -358,65 +354,70 @@ template<typename Derived> class MatrixBase
 /////////// Cholesky module ///////////
-    const LLT<PlainObject>  llt() const;
+    inline const LLT<PlainObject>  llt() const;
-    const LDLT<PlainObject> ldlt() const;
+    inline const LDLT<PlainObject> ldlt() const;
 /////////// QR module ///////////
-    const HouseholderQR<PlainObject> householderQr() const;
+    inline const HouseholderQR<PlainObject> householderQr() const;
-    const ColPivHouseholderQR<PlainObject> colPivHouseholderQr() const;
+    inline const ColPivHouseholderQR<PlainObject> colPivHouseholderQr() const;
-    const FullPivHouseholderQR<PlainObject> fullPivHouseholderQr() const;
+    inline const FullPivHouseholderQR<PlainObject> fullPivHouseholderQr() const;
-    
+    inline const CompleteOrthogonalDecomposition<PlainObject> completeOrthogonalDecomposition() const;
    #ifdef EIGEN2_SUPPORT
    const QR<PlainObject> qr() const;
    #endif
-    EigenvaluesReturnType eigenvalues() const;
+/////////// Eigenvalues module ///////////
-    RealScalar operatorNorm() const;
+
    inline EigenvaluesReturnType eigenvalues() const;
    inline RealScalar operatorNorm() const;
 /////////// SVD module ///////////
-    JacobiSVD<PlainObject> jacobiSvd(unsigned int computationOptions = 0) const;
+    inline JacobiSVD<PlainObject> jacobiSvd(unsigned int computationOptions = 0) const;
-
+    inline BDCSVD<PlainObject>    bdcSvd(unsigned int computationOptions = 0) const;
    #ifdef EIGEN2_SUPPORT
    SVD<PlainObject> svd() const;
    #endif
 /////////// Geometry module ///////////
    #ifndef EIGEN_PARSED_BY_DOXYGEN
    /// \internal helper struct to form the return type of the cross product
    template<typename OtherDerived> struct cross_product_return_type {
-      typedef typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType Scalar;
+      typedef typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType Scalar;
      typedef Matrix<Scalar,MatrixBase::RowsAtCompileTime,MatrixBase::ColsAtCompileTime> type;
    };
    #endif // EIGEN_PARSED_BY_DOXYGEN
    template<typename OtherDerived>
-    typename cross_product_return_type<OtherDerived>::type
+    EIGEN_DEVICE_FUNC
 #ifndef EIGEN_PARSED_BY_DOXYGEN
    inline typename cross_product_return_type<OtherDerived>::type
 #else
    inline PlainObject
 #endif
    cross(const MatrixBase<OtherDerived>& other) const;
    template<typename OtherDerived>
-    PlainObject cross3(const MatrixBase<OtherDerived>& other) const;
+    EIGEN_DEVICE_FUNC
-    PlainObject unitOrthogonal(void) const;
+    inline PlainObject cross3(const MatrixBase<OtherDerived>& other) const;
-    Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;
+
-    
+    EIGEN_DEVICE_FUNC
-    #if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
+    inline PlainObject unitOrthogonal(void) const;
-    ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const;
+
    EIGEN_DEVICE_FUNC
    inline Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;
    // put this as separate enum value to work around possible GCC 4.3 bug (?)
-    enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1?Vertical:Horizontal };
+    enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits<Derived>::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical)
                                          : ColsAtCompileTime==1 ? Vertical : Horizontal };
    typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType;
-    HomogeneousReturnType homogeneous() const;
+    EIGEN_DEVICE_FUNC
-    #endif
+    inline HomogeneousReturnType homogeneous() const;
-    
+
    enum {
      SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1
    };
    typedef Block<const Derived,
                  internal::traits<Derived>::ColsAtCompileTime==1 ? SizeMinusOne : 1,
                  internal::traits<Derived>::ColsAtCompileTime==1 ? 1 : SizeMinusOne> ConstStartMinusOne;
-    typedef CwiseUnaryOp<internal::scalar_quotient1_op<typename internal::traits<Derived>::Scalar>,
+    typedef EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(ConstStartMinusOne,Scalar,quotient) HNormalizedReturnType;
-                const ConstStartMinusOne > HNormalizedReturnType;
+    EIGEN_DEVICE_FUNC
-
+    inline const HNormalizedReturnType hnormalized() const;
    const HNormalizedReturnType hnormalized() const;
 ////////// Householder module ///////////
@ -440,6 +441,15 @@ template<typename Derived> class MatrixBase
    template<typename OtherScalar>
    void applyOnTheRight(Index p, Index q, const JacobiRotation<OtherScalar>& j);
 ///////// SparseCore module /////////
    template<typename OtherDerived>
    EIGEN_STRONG_INLINE const typename SparseMatrixBase<OtherDerived>::template CwiseProductDenseReturnType<Derived>::Type
    cwiseProduct(const SparseMatrixBase<OtherDerived> &other) const
    {
      return other.cwiseProduct(derived());
    }
 ///////// MatrixFunctions module /////////
    typedef typename internal::stem_function<Scalar>::type StemFunction;
@ -452,49 +462,15 @@ template<typename Derived> class MatrixBase
    const MatrixSquareRootReturnValue<Derived> sqrt() const;
    const MatrixLogarithmReturnValue<Derived> log() const;
    const MatrixPowerReturnValue<Derived> pow(const RealScalar& p) const;
-
+    const MatrixComplexPowerReturnValue<Derived> pow(const std::complex<RealScalar>& p) const;
 #ifdef EIGEN2_SUPPORT
    template<typename ProductDerived, typename Lhs, typename Rhs>
    Derived& operator+=(const Flagged<ProductBase<ProductDerived, Lhs,Rhs>, 0,
                                      EvalBeforeAssigningBit>& other);
    template<typename ProductDerived, typename Lhs, typename Rhs>
    Derived& operator-=(const Flagged<ProductBase<ProductDerived, Lhs,Rhs>, 0,
                                      EvalBeforeAssigningBit>& other);
    /** \deprecated because .lazy() is deprecated
      * Overloaded for cache friendly product evaluation */
    template<typename OtherDerived>
    Derived& lazyAssign(const Flagged<OtherDerived, 0, EvalBeforeAssigningBit>& other)
    { return lazyAssign(other._expression()); }
    template<unsigned int Added>
    const Flagged<Derived, Added, 0> marked() const;
    const Flagged<Derived, 0, EvalBeforeAssigningBit> lazy() const;
    inline const Cwise<Derived> cwise() const;
    inline Cwise<Derived> cwise();
    VectorBlock<Derived> start(Index size);
    const VectorBlock<const Derived> start(Index size) const;
    VectorBlock<Derived> end(Index size);
    const VectorBlock<const Derived> end(Index size) const;
    template<int Size> VectorBlock<Derived,Size> start();
    template<int Size> const VectorBlock<const Derived,Size> start() const;
    template<int Size> VectorBlock<Derived,Size> end();
    template<int Size> const VectorBlock<const Derived,Size> end() const;
    Minor<Derived> minor(Index row, Index col);
    const Minor<Derived> minor(Index row, Index col) const;
 #endif
  protected:
-    MatrixBase() : Base() {}
+    EIGEN_DEVICE_FUNC MatrixBase() : Base() {}
  private:
-    explicit MatrixBase(int);
+    EIGEN_DEVICE_FUNC explicit MatrixBase(int);
-    MatrixBase(int,int);
+    EIGEN_DEVICE_FUNC MatrixBase(int,int);
-    template<typename OtherDerived> explicit MatrixBase(const MatrixBase<OtherDerived>&);
+    template<typename OtherDerived> EIGEN_DEVICE_FUNC explicit MatrixBase(const MatrixBase<OtherDerived>&);
  protected:
    // mixing arrays and matrices is not legal
    template<typename OtherDerived> Derived& operator+=(const ArrayBase<OtherDerived>& )
--- a/eigenlib/Eigen/src/Core/NestByValue.h
+++ b/eigenlib/Eigen/src/Core/NestByValue.h
@ -13,25 +13,24 @@
 namespace Eigen {
 /** \class NestByValue
  * \ingroup Core_Module
  *
  * \brief Expression which must be nested by value
  *
  * \param ExpressionType the type of the object of which we are requiring nesting-by-value
  *
  * This class is the return type of MatrixBase::nestByValue()
  * and most of the time this is the only way it is used.
  *
  * \sa MatrixBase::nestByValue()
  */
 namespace internal {
 template<typename ExpressionType>
 struct traits<NestByValue<ExpressionType> > : public traits<ExpressionType>
 {};
 }
 /** \class NestByValue
  * \ingroup Core_Module
  *
  * \brief Expression which must be nested by value
  *
  * \tparam ExpressionType the type of the object of which we are requiring nesting-by-value
  *
  * This class is the return type of MatrixBase::nestByValue()
  * and most of the time this is the only way it is used.
  *
  * \sa MatrixBase::nestByValue()
  */
 template<typename ExpressionType> class NestByValue
  : public internal::dense_xpr_base< NestByValue<ExpressionType> >::type
 {
@ -40,29 +39,29 @@ template<typename ExpressionType> class NestByValue
    typedef typename internal::dense_xpr_base<NestByValue>::type Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(NestByValue)
-    inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {}
+    EIGEN_DEVICE_FUNC explicit inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {}
-    inline Index rows() const { return m_expression.rows(); }
+    EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); }
-    inline Index cols() const { return m_expression.cols(); }
+    EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); }
-    inline Index outerStride() const { return m_expression.outerStride(); }
+    EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); }
-    inline Index innerStride() const { return m_expression.innerStride(); }
+    EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); }
-    inline const CoeffReturnType coeff(Index row, Index col) const
+    EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const
    {
      return m_expression.coeff(row, col);
    }
-    inline Scalar& coeffRef(Index row, Index col)
+    EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col)
    {
      return m_expression.const_cast_derived().coeffRef(row, col);
    }
-    inline const CoeffReturnType coeff(Index index) const
+    EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const
    {
      return m_expression.coeff(index);
    }
-    inline Scalar& coeffRef(Index index)
+    EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index)
    {
      return m_expression.const_cast_derived().coeffRef(index);
    }
@ -91,7 +90,7 @@ template<typename ExpressionType> class NestByValue
      m_expression.const_cast_derived().template writePacket<LoadMode>(index, x);
    }
-    operator const ExpressionType&() const { return m_expression; }
+    EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; }
  protected:
    const ExpressionType m_expression;
--- a/eigenlib/Eigen/src/Core/NoAlias.h
+++ b/eigenlib/Eigen/src/Core/NoAlias.h
@ -17,7 +17,7 @@ namespace Eigen {
  *
  * \brief Pseudo expression providing an operator = assuming no aliasing
  *
-  * \param ExpressionType the type of the object on which to do the lazy assignment
+  * \tparam ExpressionType the type of the object on which to do the lazy assignment
  *
  * This class represents an expression with special assignment operators
  * assuming no aliasing between the target expression and the source expression.
@ -30,62 +30,36 @@ namespace Eigen {
 template<typename ExpressionType, template <typename> class StorageBase>
 class NoAlias
 {
    typedef typename ExpressionType::Scalar Scalar;
  public:
-    NoAlias(ExpressionType& expression) : m_expression(expression) {}
+    typedef typename ExpressionType::Scalar Scalar;
-
+    
-    /** Behaves like MatrixBase::lazyAssign(other)
+    explicit NoAlias(ExpressionType& expression) : m_expression(expression) {}
      * \sa MatrixBase::lazyAssign() */
    template<typename OtherDerived>
    EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase<OtherDerived>& other)
    { return internal::assign_selector<ExpressionType,OtherDerived,false>::run(m_expression,other.derived()); }
    /** \sa MatrixBase::operator+= */
    template<typename OtherDerived>
    EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase<OtherDerived>& other)
    {
      typedef SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, ExpressionType, OtherDerived> SelfAdder;
      SelfAdder tmp(m_expression);
      typedef typename internal::nested<OtherDerived>::type OtherDerivedNested;
      typedef typename internal::remove_all<OtherDerivedNested>::type _OtherDerivedNested;
      internal::assign_selector<SelfAdder,_OtherDerivedNested,false>::run(tmp,OtherDerivedNested(other.derived()));
      return m_expression;
    }
    /** \sa MatrixBase::operator-= */
    template<typename OtherDerived>
    EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase<OtherDerived>& other)
    {
      typedef SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, ExpressionType, OtherDerived> SelfAdder;
      SelfAdder tmp(m_expression);
      typedef typename internal::nested<OtherDerived>::type OtherDerivedNested;
      typedef typename internal::remove_all<OtherDerivedNested>::type _OtherDerivedNested;
      internal::assign_selector<SelfAdder,_OtherDerivedNested,false>::run(tmp,OtherDerivedNested(other.derived()));
      return m_expression;
    }
 #ifndef EIGEN_PARSED_BY_DOXYGEN
    template<typename ProductDerived, typename Lhs, typename Rhs>
    EIGEN_STRONG_INLINE ExpressionType& operator+=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
    { other.derived().addTo(m_expression); return m_expression; }
    template<typename ProductDerived, typename Lhs, typename Rhs>
    EIGEN_STRONG_INLINE ExpressionType& operator-=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
    { other.derived().subTo(m_expression); return m_expression; }
    template<typename Lhs, typename Rhs, int NestingFlags>
    EIGEN_STRONG_INLINE ExpressionType& operator+=(const CoeffBasedProduct<Lhs,Rhs,NestingFlags>& other)
    { return m_expression.derived() += CoeffBasedProduct<Lhs,Rhs,NestByRefBit>(other.lhs(), other.rhs()); }
    template<typename Lhs, typename Rhs, int NestingFlags>
    EIGEN_STRONG_INLINE ExpressionType& operator-=(const CoeffBasedProduct<Lhs,Rhs,NestingFlags>& other)
    { return m_expression.derived() -= CoeffBasedProduct<Lhs,Rhs,NestByRefBit>(other.lhs(), other.rhs()); }
    template<typename OtherDerived>
-    ExpressionType& operator=(const ReturnByValue<OtherDerived>& func)
+    EIGEN_DEVICE_FUNC
-    { return m_expression = func; }
+    EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase<OtherDerived>& other)
-#endif
+    {
      call_assignment_no_alias(m_expression, other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
      return m_expression;
    }
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase<OtherDerived>& other)
    {
      call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
      return m_expression;
    }
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase<OtherDerived>& other)
    {
      call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
      return m_expression;
    }
    EIGEN_DEVICE_FUNC
    ExpressionType& expression() const
    {
      return m_expression;
@ -126,7 +100,7 @@ class NoAlias
 template<typename Derived>
 NoAlias<Derived,MatrixBase> MatrixBase<Derived>::noalias()
 {
-  return derived();
+  return NoAlias<Derived, Eigen::MatrixBase >(derived());
 }
 } // end namespace Eigen
--- a/eigenlib/Eigen/src/Core/NumTraits.h
+++ b/eigenlib/Eigen/src/Core/NumTraits.h
@ -12,24 +12,57 @@
 namespace Eigen {
 namespace internal {
 // default implementation of digits10(), based on numeric_limits if specialized,
 // 0 for integer types, and log10(epsilon()) otherwise.
 template< typename T,
          bool use_numeric_limits = std::numeric_limits<T>::is_specialized,
          bool is_integer = NumTraits<T>::IsInteger>
 struct default_digits10_impl
 {
  static int run() { return std::numeric_limits<T>::digits10; }
 };
 template<typename T>
 struct default_digits10_impl<T,false,false> // Floating point
 {
  static int run() {
    using std::log10;
    using std::ceil;
    typedef typename NumTraits<T>::Real Real;
    return int(ceil(-log10(NumTraits<Real>::epsilon())));
  }
 };
 template<typename T>
 struct default_digits10_impl<T,false,true> // Integer
 {
  static int run() { return 0; }
 };
 } // end namespace internal
 /** \class NumTraits
  * \ingroup Core_Module
  *
  * \brief Holds information about the various numeric (i.e. scalar) types allowed by Eigen.
  *
-  * \param T the numeric type at hand
+  * \tparam T the numeric type at hand
  *
  * This class stores enums, typedefs and static methods giving information about a numeric type.
  *
  * The provided data consists of:
-  * \li A typedef \a Real, giving the "real part" type of \a T. If \a T is already real,
+  * \li A typedef \c Real, giving the "real part" type of \a T. If \a T is already real,
-  *     then \a Real is just a typedef to \a T. If \a T is \c std::complex<U> then \a Real
+  *     then \c Real is just a typedef to \a T. If \a T is \c std::complex<U> then \c Real
  *     is a typedef to \a U.
-  * \li A typedef \a NonInteger, giving the type that should be used for operations producing non-integral values,
+  * \li A typedef \c NonInteger, giving the type that should be used for operations producing non-integral values,
  *     such as quotients, square roots, etc. If \a T is a floating-point type, then this typedef just gives
  *     \a T again. Note however that many Eigen functions such as internal::sqrt simply refuse to
  *     take integers. Outside of a few cases, Eigen doesn't do automatic type promotion. Thus, this typedef is
  *     only intended as a helper for code that needs to explicitly promote types.
  * \li A typedef \c Literal giving the type to use for numeric literals such as "2" or "0.5". For instance, for \c std::complex<U>, Literal is defined as \c U.
  *     Of course, this type must be fully compatible with \a T. In doubt, just use \a T here.
  * \li A typedef \a Nested giving the type to use to nest a value inside of the expression tree. If you don't know what
  *     this means, just use \a T here.
  * \li An enum value \a IsComplex. It is equal to 1 if \a T is a \c std::complex
@ -42,10 +75,14 @@ namespace Eigen {
  * \li An enum value \a IsSigned. It is equal to \c 1 if \a T is a signed type and to 0 if \a T is unsigned.
  * \li An enum value \a RequireInitialization. It is equal to \c 1 if the constructor of the numeric type \a T must
  *     be called, and to 0 if it is safe not to call it. Default is 0 if \a T is an arithmetic type, and 1 otherwise.
-  * \li An epsilon() function which, unlike std::numeric_limits::epsilon(), returns a \a Real instead of a \a T.
+  * \li An epsilon() function which, unlike <a href="http://en.cppreference.com/w/cpp/types/numeric_limits/epsilon">std::numeric_limits::epsilon()</a>,
  *     it returns a \a Real instead of a \a T.
  * \li A dummy_precision() function returning a weak epsilon value. It is mainly used as a default
  *     value by the fuzzy comparison operators.
  * \li highest() and lowest() functions returning the highest and lowest possible values respectively.
  * \li digits10() function returning the number of decimal digits that can be represented without change. This is
  *     the analogue of <a href="http://en.cppreference.com/w/cpp/types/numeric_limits/digits10">std::numeric_limits<T>::digits10</a>
  *     which is used as the default implementation if specialized.
  */
 template<typename T> struct GenericNumTraits
@ -67,22 +104,47 @@ template<typename T> struct GenericNumTraits
                     T
                   >::type NonInteger;
  typedef T Nested;
  typedef T Literal;
-  static inline Real epsilon() { return std::numeric_limits<T>::epsilon(); }
+  EIGEN_DEVICE_FUNC
  static inline Real epsilon()
  {
    return numext::numeric_limits<T>::epsilon();
  }
  EIGEN_DEVICE_FUNC
  static inline int digits10()
  {
    return internal::default_digits10_impl<T>::run();
  }
  EIGEN_DEVICE_FUNC
  static inline Real dummy_precision()
  {
    // make sure to override this for floating-point types
    return Real(0);
  }
-  static inline T highest() { return (std::numeric_limits<T>::max)(); }
+
-  static inline T lowest()  { return IsInteger ? (std::numeric_limits<T>::min)() : (-(std::numeric_limits<T>::max)()); }
+
-  
+  EIGEN_DEVICE_FUNC
-#ifdef EIGEN2_SUPPORT
+  static inline T highest() {
-  enum {
+    return (numext::numeric_limits<T>::max)();
-    HasFloatingPoint = !IsInteger
+  }
-  };
+
-  typedef NonInteger FloatingPoint;
+  EIGEN_DEVICE_FUNC
-#endif
+  static inline T lowest()  {
    return IsInteger ? (numext::numeric_limits<T>::min)() : (-(numext::numeric_limits<T>::max)());
  }
  EIGEN_DEVICE_FUNC
  static inline T infinity() {
    return numext::numeric_limits<T>::infinity();
  }
  EIGEN_DEVICE_FUNC
  static inline T quiet_NaN() {
    return numext::numeric_limits<T>::quiet_NaN();
  }
 };
 template<typename T> struct NumTraits : GenericNumTraits<T>
@ -91,11 +153,13 @@ template<typename T> struct NumTraits : GenericNumTraits<T>
 template<> struct NumTraits<float>
  : GenericNumTraits<float>
 {
  EIGEN_DEVICE_FUNC
  static inline float dummy_precision() { return 1e-5f; }
 };
 template<> struct NumTraits<double> : GenericNumTraits<double>
 {
  EIGEN_DEVICE_FUNC
  static inline double dummy_precision() { return 1e-12; }
 };
@ -109,6 +173,7 @@ template<typename _Real> struct NumTraits<std::complex<_Real> >
  : GenericNumTraits<std::complex<_Real> >
 {
  typedef _Real Real;
  typedef typename NumTraits<_Real>::Literal Literal;
  enum {
    IsComplex = 1,
    RequireInitialization = NumTraits<_Real>::RequireInitialization,
@ -117,8 +182,12 @@ template<typename _Real> struct NumTraits<std::complex<_Real> >
    MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
  };
  EIGEN_DEVICE_FUNC
  static inline Real epsilon() { return NumTraits<Real>::epsilon(); }
  EIGEN_DEVICE_FUNC
  static inline Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }
  EIGEN_DEVICE_FUNC
  static inline int digits10() { return NumTraits<Real>::digits10(); }
 };
 template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
@ -130,21 +199,48 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
  typedef typename NumTraits<Scalar>::NonInteger NonIntegerScalar;
  typedef Array<NonIntegerScalar, Rows, Cols, Options, MaxRows, MaxCols> NonInteger;
  typedef ArrayType & Nested;
-  
+  typedef typename NumTraits<Scalar>::Literal Literal;
  enum {
    IsComplex = NumTraits<Scalar>::IsComplex,
    IsInteger = NumTraits<Scalar>::IsInteger,
    IsSigned  = NumTraits<Scalar>::IsSigned,
    RequireInitialization = 1,
-    ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::ReadCost,
+    ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::ReadCost,
-    AddCost  = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost,
+    AddCost  = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost,
-    MulCost  = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost
+    MulCost  = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost
  };
-  
+
  EIGEN_DEVICE_FUNC
  static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }
  EIGEN_DEVICE_FUNC
  static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }
 };
 template<> struct NumTraits<std::string>
  : GenericNumTraits<std::string>
 {
  enum {
    RequireInitialization = 1,
    ReadCost = HugeCost,
    AddCost  = HugeCost,
    MulCost  = HugeCost
  };
  static inline int digits10() { return 0; }
 private:
  static inline std::string epsilon();
  static inline std::string dummy_precision();
  static inline std::string lowest();
  static inline std::string highest();
  static inline std::string infinity();
  static inline std::string quiet_NaN();
 };
 // Empty specialization for void to allow template specialization based on NumTraits<T>::Real with T==void and SFINAE.
 template<> struct NumTraits<void> {};
 } // end namespace Eigen
 #endif // EIGEN_NUMTRAITS_H
--- a/eigenlib/Eigen/src/Core/PermutationMatrix.h
+++ b/eigenlib/Eigen/src/Core/PermutationMatrix.h
@ -2,7 +2,7 @@
 // for linear algebra.
 //
 // Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
-// Copyright (C) 2009-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@ -13,14 +13,18 @@
 namespace Eigen { 
-template<int RowCol,typename IndicesType,typename MatrixType, typename StorageKind> class PermutedImpl;
+namespace internal {
 enum PermPermProduct_t {PermPermProduct};
 } // end namespace internal
 /** \class PermutationBase
  * \ingroup Core_Module
  *
  * \brief Base class for permutations
  *
-  * \param Derived the derived class
+  * \tparam Derived the derived class
  *
  * This class is the base class for all expressions representing a permutation matrix,
  * internally stored as a vector of integers.
@ -38,17 +42,6 @@ template<int RowCol,typename IndicesType,typename MatrixType, typename StorageKi
  *
  * \sa class PermutationMatrix, class PermutationWrapper
  */
 namespace internal {
 template<typename PermutationType, typename MatrixType, int Side, bool Transposed=false>
 struct permut_matrix_product_retval;
 template<typename PermutationType, typename MatrixType, int Side, bool Transposed=false>
 struct permut_sparsematrix_product_retval;
 enum PermPermProduct_t {PermPermProduct};
 } // end namespace internal
 template<typename Derived>
 class PermutationBase : public EigenBase<Derived>
 {
@ -60,19 +53,20 @@ class PermutationBase : public EigenBase<Derived>
    typedef typename Traits::IndicesType IndicesType;
    enum {
      Flags = Traits::Flags,
      CoeffReadCost = Traits::CoeffReadCost,
      RowsAtCompileTime = Traits::RowsAtCompileTime,
      ColsAtCompileTime = Traits::ColsAtCompileTime,
      MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime,
      MaxColsAtCompileTime = Traits::MaxColsAtCompileTime
    };
-    typedef typename Traits::Scalar Scalar;
+    typedef typename Traits::StorageIndex StorageIndex;
-    typedef typename Traits::Index Index;
+    typedef Matrix<StorageIndex,RowsAtCompileTime,ColsAtCompileTime,0,MaxRowsAtCompileTime,MaxColsAtCompileTime>
    typedef Matrix<Scalar,RowsAtCompileTime,ColsAtCompileTime,0,MaxRowsAtCompileTime,MaxColsAtCompileTime>
            DenseMatrixType;
-    typedef PermutationMatrix<IndicesType::SizeAtCompileTime,IndicesType::MaxSizeAtCompileTime,Index>
+    typedef PermutationMatrix<IndicesType::SizeAtCompileTime,IndicesType::MaxSizeAtCompileTime,StorageIndex>
            PlainPermutationType;
    typedef PlainPermutationType PlainObject;
    using Base::derived;
    typedef Inverse<Derived> InverseReturnType;
    typedef void Scalar;
    #endif
    /** Copies the other permutation into *this */
@ -118,7 +112,7 @@ class PermutationBase : public EigenBase<Derived>
    void evalTo(MatrixBase<DenseDerived>& other) const
    {
      other.setZero();
-      for (int i=0; i<rows();++i)
+      for (Index i=0; i<rows(); ++i)
        other.coeffRef(indices().coeff(i),i) = typename DenseDerived::Scalar(1);
    }
    #endif
@ -147,7 +141,8 @@ class PermutationBase : public EigenBase<Derived>
    /** Sets *this to be the identity permutation matrix */
    void setIdentity()
    {
-      for(Index i = 0; i < size(); ++i)
+      StorageIndex n = StorageIndex(size());
      for(StorageIndex i = 0; i < n; ++i)
        indices().coeffRef(i) = i;
    }
@ -163,18 +158,18 @@ class PermutationBase : public EigenBase<Derived>
      *
      * \returns a reference to *this.
      *
-      * \warning This is much slower than applyTranspositionOnTheRight(int,int):
+      * \warning This is much slower than applyTranspositionOnTheRight(Index,Index):
      * this has linear complexity and requires a lot of branching.
      *
-      * \sa applyTranspositionOnTheRight(int,int)
+      * \sa applyTranspositionOnTheRight(Index,Index)
      */
    Derived& applyTranspositionOnTheLeft(Index i, Index j)
    {
      eigen_assert(i>=0 && j>=0 && i<size() && j<size());
      for(Index k = 0; k < size(); ++k)
      {
-        if(indices().coeff(k) == i) indices().coeffRef(k) = j;
+        if(indices().coeff(k) == i) indices().coeffRef(k) = StorageIndex(j);
-        else if(indices().coeff(k) == j) indices().coeffRef(k) = i;
+        else if(indices().coeff(k) == j) indices().coeffRef(k) = StorageIndex(i);
      }
      return derived();
    }
@ -185,7 +180,7 @@ class PermutationBase : public EigenBase<Derived>
      *
      * This is a fast operation, it only consists in swapping two indices.
      *
-      * \sa applyTranspositionOnTheLeft(int,int)
+      * \sa applyTranspositionOnTheLeft(Index,Index)
      */
    Derived& applyTranspositionOnTheRight(Index i, Index j)
    {
@ -196,16 +191,16 @@ class PermutationBase : public EigenBase<Derived>
    /** \returns the inverse permutation matrix.
      *
-      * \note \note_try_to_help_rvo
+      * \note \blank \note_try_to_help_rvo
      */
-    inline Transpose<PermutationBase> inverse() const
+    inline InverseReturnType inverse() const
-    { return derived(); }
+    { return InverseReturnType(derived()); }
    /** \returns the tranpose permutation matrix.
      *
-      * \note \note_try_to_help_rvo
+      * \note \blank \note_try_to_help_rvo
      */
-    inline Transpose<PermutationBase> transpose() const
+    inline InverseReturnType transpose() const
-    { return derived(); }
+    { return InverseReturnType(derived()); }
    /**** multiplication helpers to hopefully get RVO ****/
@ -215,13 +210,13 @@ class PermutationBase : public EigenBase<Derived>
    template<typename OtherDerived>
    void assignTranspose(const PermutationBase<OtherDerived>& other)
    {
-      for (int i=0; i<rows();++i) indices().coeffRef(other.indices().coeff(i)) = i;
+      for (Index i=0; i<rows();++i) indices().coeffRef(other.indices().coeff(i)) = i;
    }
    template<typename Lhs,typename Rhs>
    void assignProduct(const Lhs& lhs, const Rhs& rhs)
    {
      eigen_assert(lhs.cols() == rhs.rows());
-      for (int i=0; i<rows();++i) indices().coeffRef(i) = lhs.indices().coeff(rhs.indices().coeff(i));
+      for (Index i=0; i<rows();++i) indices().coeffRef(i) = lhs.indices().coeff(rhs.indices().coeff(i));
    }
 #endif
@ -229,7 +224,7 @@ class PermutationBase : public EigenBase<Derived>
    /** \returns the product permutation matrix.
      *
-      * \note \note_try_to_help_rvo
+      * \note \blank \note_try_to_help_rvo
      */
    template<typename Other>
    inline PlainPermutationType operator*(const PermutationBase<Other>& other) const
@ -237,18 +232,18 @@ class PermutationBase : public EigenBase<Derived>
    /** \returns the product of a permutation with another inverse permutation.
      *
-      * \note \note_try_to_help_rvo
+      * \note \blank \note_try_to_help_rvo
      */
    template<typename Other>
-    inline PlainPermutationType operator*(const Transpose<PermutationBase<Other> >& other) const
+    inline PlainPermutationType operator*(const InverseImpl<Other,PermutationStorage>& other) const
    { return PlainPermutationType(internal::PermPermProduct, *this, other.eval()); }
    /** \returns the product of an inverse permutation with another permutation.
      *
-      * \note \note_try_to_help_rvo
+      * \note \blank \note_try_to_help_rvo
      */
    template<typename Other> friend
-    inline PlainPermutationType operator*(const Transpose<PermutationBase<Other> >& other, const PermutationBase& perm)
+    inline PlainPermutationType operator*(const InverseImpl<Other, PermutationStorage>& other, const PermutationBase& perm)
    { return PlainPermutationType(internal::PermPermProduct, other.eval(), perm); }
    /** \returns the determinant of the permutation matrix, which is either 1 or -1 depending on the parity of the permutation.
@ -284,39 +279,43 @@ class PermutationBase : public EigenBase<Derived>
 };
 namespace internal {
 template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
 struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex> >
 : traits<Matrix<_StorageIndex,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
 {
  typedef PermutationStorage StorageKind;
  typedef Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
  typedef _StorageIndex StorageIndex;
  typedef void Scalar;
 };
 }
 /** \class PermutationMatrix
  * \ingroup Core_Module
  *
  * \brief Permutation matrix
  *
-  * \param SizeAtCompileTime the number of rows/cols, or Dynamic
+  * \tparam SizeAtCompileTime the number of rows/cols, or Dynamic
-  * \param MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
+  * \tparam MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
-  * \param IndexType the interger type of the indices
+  * \tparam _StorageIndex the integer type of the indices
  *
  * This class represents a permutation matrix, internally stored as a vector of integers.
  *
  * \sa class PermutationBase, class PermutationWrapper, class DiagonalMatrix
  */
-
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
-namespace internal {
+class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex> >
 template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
 struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType> >
 : traits<Matrix<IndexType,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
 {
  typedef IndexType Index;
  typedef Matrix<IndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
 };
 }
 template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
 class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType> >
 {
    typedef PermutationBase<PermutationMatrix> Base;
    typedef internal::traits<PermutationMatrix> Traits;
  public:
    typedef const PermutationMatrix& Nested;
    #ifndef EIGEN_PARSED_BY_DOXYGEN
    typedef typename Traits::IndicesType IndicesType;
    typedef typename Traits::StorageIndex StorageIndex;
    #endif
    inline PermutationMatrix()
@ -324,8 +323,10 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile
    /** Constructs an uninitialized permutation matrix of given size.
      */
-    inline PermutationMatrix(int size) : m_indices(size)
+    explicit inline PermutationMatrix(Index size) : m_indices(size)
-    {}
+    {
      eigen_internal_assert(size <= NumTraits<StorageIndex>::highest());
    }
    /** Copy constructor. */
    template<typename OtherDerived>
@ -346,7 +347,7 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile
      * array's size.
      */
    template<typename Other>
-    explicit inline PermutationMatrix(const MatrixBase<Other>& a_indices) : m_indices(a_indices)
+    explicit inline PermutationMatrix(const MatrixBase<Other>& indices) : m_indices(indices)
    {}
    /** Convert the Transpositions \a tr to a permutation matrix */
@ -393,10 +394,13 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile
 #ifndef EIGEN_PARSED_BY_DOXYGEN
    template<typename Other>
-    PermutationMatrix(const Transpose<PermutationBase<Other> >& other)
+    PermutationMatrix(const InverseImpl<Other,PermutationStorage>& other)
-      : m_indices(other.nestedPermutation().size())
+      : m_indices(other.derived().nestedExpression().size())
    {
-      for (int i=0; i<m_indices.size();++i) m_indices.coeffRef(other.nestedPermutation().indices().coeff(i)) = i;
+      eigen_internal_assert(m_indices.size() <= NumTraits<StorageIndex>::highest());
      StorageIndex end = StorageIndex(m_indices.size());
      for (StorageIndex i=0; i<end;++i)
        m_indices.coeffRef(other.derived().nestedExpression().indices().coeff(i)) = i;
    }
    template<typename Lhs,typename Rhs>
    PermutationMatrix(internal::PermPermProduct_t, const Lhs& lhs, const Rhs& rhs)
@ -413,18 +417,20 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile
 namespace internal {
-template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int _PacketAccess>
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int _PacketAccess>
-struct traits<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType>,_PacketAccess> >
+struct traits<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex>,_PacketAccess> >
- : traits<Matrix<IndexType,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
+ : traits<Matrix<_StorageIndex,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
 {
-  typedef IndexType Index;
+  typedef PermutationStorage StorageKind;
-  typedef Map<const Matrix<IndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1>, _PacketAccess> IndicesType;
+  typedef Map<const Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1>, _PacketAccess> IndicesType;
  typedef _StorageIndex StorageIndex;
  typedef void Scalar;
 };
 }
-template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int _PacketAccess>
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int _PacketAccess>
-class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType>,_PacketAccess>
+class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex>,_PacketAccess>
-  : public PermutationBase<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType>,_PacketAccess> >
+  : public PermutationBase<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex>,_PacketAccess> >
 {
    typedef PermutationBase<Map> Base;
    typedef internal::traits<Map> Traits;
@ -432,14 +438,14 @@ class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType>,
    #ifndef EIGEN_PARSED_BY_DOXYGEN
    typedef typename Traits::IndicesType IndicesType;
-    typedef typename IndicesType::Scalar Index;
+    typedef typename IndicesType::Scalar StorageIndex;
    #endif
-    inline Map(const Index* indicesPtr)
+    inline Map(const StorageIndex* indicesPtr)
      : m_indices(indicesPtr)
    {}
-    inline Map(const Index* indicesPtr, Index size)
+    inline Map(const StorageIndex* indicesPtr, Index size)
      : m_indices(indicesPtr,size)
    {}
@ -474,40 +480,36 @@ class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType>,
    IndicesType m_indices;
 };
 /** \class PermutationWrapper
  * \ingroup Core_Module
  *
  * \brief Class to view a vector of integers as a permutation matrix
  *
  * \param _IndicesType the type of the vector of integer (can be any compatible expression)
  *
  * This class allows to view any vector expression of integers as a permutation matrix.
  *
  * \sa class PermutationBase, class PermutationMatrix
  */
 struct PermutationStorage {};
 template<typename _IndicesType> class TranspositionsWrapper;
 namespace internal {
 template<typename _IndicesType>
 struct traits<PermutationWrapper<_IndicesType> >
 {
  typedef PermutationStorage StorageKind;
-  typedef typename _IndicesType::Scalar Scalar;
+  typedef void Scalar;
-  typedef typename _IndicesType::Scalar Index;
+  typedef typename _IndicesType::Scalar StorageIndex;
  typedef _IndicesType IndicesType;
  enum {
    RowsAtCompileTime = _IndicesType::SizeAtCompileTime,
    ColsAtCompileTime = _IndicesType::SizeAtCompileTime,
-    MaxRowsAtCompileTime = IndicesType::MaxRowsAtCompileTime,
+    MaxRowsAtCompileTime = IndicesType::MaxSizeAtCompileTime,
-    MaxColsAtCompileTime = IndicesType::MaxColsAtCompileTime,
+    MaxColsAtCompileTime = IndicesType::MaxSizeAtCompileTime,
-    Flags = 0,
+    Flags = 0
    CoeffReadCost = _IndicesType::CoeffReadCost
  };
 };
 }
 /** \class PermutationWrapper
  * \ingroup Core_Module
  *
  * \brief Class to view a vector of integers as a permutation matrix
  *
  * \tparam _IndicesType the type of the vector of integer (can be any compatible expression)
  *
  * This class allows to view any vector expression of integers as a permutation matrix.
  *
  * \sa class PermutationBase, class PermutationMatrix
  */
 template<typename _IndicesType>
 class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesType> >
 {
@ -519,8 +521,8 @@ class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesTyp
    typedef typename Traits::IndicesType IndicesType;
    #endif
-    inline PermutationWrapper(const IndicesType& a_indices)
+    inline PermutationWrapper(const IndicesType& indices)
-      : m_indices(a_indices)
+      : m_indices(indices)
    {}
    /** const version of indices(). */
@ -532,182 +534,86 @@ class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesTyp
    typename IndicesType::Nested m_indices;
 };
 /** \returns the matrix with the permutation applied to the columns.
  */
-template<typename Derived, typename PermutationDerived>
+template<typename MatrixDerived, typename PermutationDerived>
-inline const internal::permut_matrix_product_retval<PermutationDerived, Derived, OnTheRight>
+EIGEN_DEVICE_FUNC
-operator*(const MatrixBase<Derived>& matrix,
+const Product<MatrixDerived, PermutationDerived, AliasFreeProduct>
-          const PermutationBase<PermutationDerived> &permutation)
+operator*(const MatrixBase<MatrixDerived> &matrix,
          const PermutationBase<PermutationDerived>& permutation)
 {
-  return internal::permut_matrix_product_retval
+  return Product<MatrixDerived, PermutationDerived, AliasFreeProduct>
-           <PermutationDerived, Derived, OnTheRight>
+            (matrix.derived(), permutation.derived());
           (permutation.derived(), matrix.derived());
 }
 /** \returns the matrix with the permutation applied to the rows.
  */
-template<typename Derived, typename PermutationDerived>
+template<typename PermutationDerived, typename MatrixDerived>
-inline const internal::permut_matrix_product_retval
+EIGEN_DEVICE_FUNC
-               <PermutationDerived, Derived, OnTheLeft>
+const Product<PermutationDerived, MatrixDerived, AliasFreeProduct>
 operator*(const PermutationBase<PermutationDerived> &permutation,
-          const MatrixBase<Derived>& matrix)
+          const MatrixBase<MatrixDerived>& matrix)
 {
-  return internal::permut_matrix_product_retval
+  return Product<PermutationDerived, MatrixDerived, AliasFreeProduct>
-           <PermutationDerived, Derived, OnTheLeft>
+            (permutation.derived(), matrix.derived());
           (permutation.derived(), matrix.derived());
 }
 namespace internal {
-template<typename PermutationType, typename MatrixType, int Side, bool Transposed>
+template<typename PermutationType>
-struct traits<permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
+class InverseImpl<PermutationType, PermutationStorage>
  : public EigenBase<Inverse<PermutationType> >
 {
  typedef typename MatrixType::PlainObject ReturnType;
 };
 template<typename PermutationType, typename MatrixType, int Side, bool Transposed>
 struct permut_matrix_product_retval
 : public ReturnByValue<permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
 {
    typedef typename remove_all<typename MatrixType::Nested>::type MatrixTypeNestedCleaned;
    typedef typename MatrixType::Index Index;
    permut_matrix_product_retval(const PermutationType& perm, const MatrixType& matrix)
      : m_permutation(perm), m_matrix(matrix)
    {}
    inline Index rows() const { return m_matrix.rows(); }
    inline Index cols() const { return m_matrix.cols(); }
    template<typename Dest> inline void evalTo(Dest& dst) const
    {
      const Index n = Side==OnTheLeft ? rows() : cols();
      // FIXME we need an is_same for expression that is not sensitive to constness. For instance
      // is_same_xpr<Block<const Matrix>, Block<Matrix> >::value should be true.
      if(    is_same<MatrixTypeNestedCleaned,Dest>::value
          && blas_traits<MatrixTypeNestedCleaned>::HasUsableDirectAccess
          && blas_traits<Dest>::HasUsableDirectAccess
          && extract_data(dst) == extract_data(m_matrix))
      {
        // apply the permutation inplace
        Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(m_permutation.size());
        mask.fill(false);
        Index r = 0;
        while(r < m_permutation.size())
        {
          // search for the next seed
          while(r<m_permutation.size() && mask[r]) r++;
          if(r>=m_permutation.size())
            break;
          // we got one, let's follow it until we are back to the seed
          Index k0 = r++;
          Index kPrev = k0;
          mask.coeffRef(k0) = true;
          for(Index k=m_permutation.indices().coeff(k0); k!=k0; k=m_permutation.indices().coeff(k))
          {
                  Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)
            .swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
                       (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev));
            mask.coeffRef(k) = true;
            kPrev = k;
          }
        }
      }
      else
      {
        for(int i = 0; i < n; ++i)
        {
          Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
               (dst, ((Side==OnTheLeft) ^ Transposed) ? m_permutation.indices().coeff(i) : i)
          =
          Block<const MatrixTypeNestedCleaned,Side==OnTheLeft ? 1 : MatrixType::RowsAtCompileTime,Side==OnTheRight ? 1 : MatrixType::ColsAtCompileTime>
               (m_matrix, ((Side==OnTheRight) ^ Transposed) ? m_permutation.indices().coeff(i) : i);
        }
      }
    }
  protected:
    const PermutationType& m_permutation;
    typename MatrixType::Nested m_matrix;
 };
 /* Template partial specialization for transposed/inverse permutations */
 template<typename Derived>
 struct traits<Transpose<PermutationBase<Derived> > >
 : traits<Derived>
 {};
 } // end namespace internal
 template<typename Derived>
 class Transpose<PermutationBase<Derived> >
  : public EigenBase<Transpose<PermutationBase<Derived> > >
 {
    typedef Derived PermutationType;
    typedef typename PermutationType::IndicesType IndicesType;
    typedef typename PermutationType::PlainPermutationType PlainPermutationType;
    typedef internal::traits<PermutationType> PermTraits;
  protected:
    InverseImpl() {}
  public:
    typedef Inverse<PermutationType> InverseType;
    using EigenBase<Inverse<PermutationType> >::derived;
    #ifndef EIGEN_PARSED_BY_DOXYGEN
-    typedef internal::traits<PermutationType> Traits;
+    typedef typename PermutationType::DenseMatrixType DenseMatrixType;
    typedef typename Derived::DenseMatrixType DenseMatrixType;
    enum {
-      Flags = Traits::Flags,
+      RowsAtCompileTime = PermTraits::RowsAtCompileTime,
-      CoeffReadCost = Traits::CoeffReadCost,
+      ColsAtCompileTime = PermTraits::ColsAtCompileTime,
-      RowsAtCompileTime = Traits::RowsAtCompileTime,
+      MaxRowsAtCompileTime = PermTraits::MaxRowsAtCompileTime,
-      ColsAtCompileTime = Traits::ColsAtCompileTime,
+      MaxColsAtCompileTime = PermTraits::MaxColsAtCompileTime
      MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime,
      MaxColsAtCompileTime = Traits::MaxColsAtCompileTime
    };
    typedef typename Traits::Scalar Scalar;
    #endif
    Transpose(const PermutationType& p) : m_permutation(p) {}
    inline int rows() const { return m_permutation.rows(); }
    inline int cols() const { return m_permutation.cols(); }
    #ifndef EIGEN_PARSED_BY_DOXYGEN
    template<typename DenseDerived>
    void evalTo(MatrixBase<DenseDerived>& other) const
    {
      other.setZero();
-      for (int i=0; i<rows();++i)
+      for (Index i=0; i<derived().rows();++i)
-        other.coeffRef(i, m_permutation.indices().coeff(i)) = typename DenseDerived::Scalar(1);
+        other.coeffRef(i, derived().nestedExpression().indices().coeff(i)) = typename DenseDerived::Scalar(1);
    }
    #endif
    /** \return the equivalent permutation matrix */
-    PlainPermutationType eval() const { return *this; }
+    PlainPermutationType eval() const { return derived(); }
-    DenseMatrixType toDenseMatrix() const { return *this; }
+    DenseMatrixType toDenseMatrix() const { return derived(); }
    /** \returns the matrix with the inverse permutation applied to the columns.
      */
    template<typename OtherDerived> friend
-    inline const internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheRight, true>
+    const Product<OtherDerived, InverseType, AliasFreeProduct>
-    operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trPerm)
+    operator*(const MatrixBase<OtherDerived>& matrix, const InverseType& trPerm)
    {
-      return internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheRight, true>(trPerm.m_permutation, matrix.derived());
+      return Product<OtherDerived, InverseType, AliasFreeProduct>(matrix.derived(), trPerm.derived());
    }
    /** \returns the matrix with the inverse permutation applied to the rows.
      */
    template<typename OtherDerived>
-    inline const internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheLeft, true>
+    const Product<InverseType, OtherDerived, AliasFreeProduct>
    operator*(const MatrixBase<OtherDerived>& matrix) const
    {
-      return internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheLeft, true>(m_permutation, matrix.derived());
+      return Product<InverseType, OtherDerived, AliasFreeProduct>(derived(), matrix.derived());
    }
    const PermutationType& nestedPermutation() const { return m_permutation; }
  protected:
    const PermutationType& m_permutation;
 };
 template<typename Derived>
@ -716,6 +622,12 @@ const PermutationWrapper<const Derived> MatrixBase<Derived>::asPermutation() con
  return derived();
 }
 namespace internal {
 template<> struct AssignmentKind<DenseShape,PermutationShape> { typedef EigenBase2EigenBase Kind; };
 } // end namespace internal
 } // end namespace Eigen
 #endif // EIGEN_PERMUTATIONMATRIX_H
--- a/eigenlib/Eigen/src/Core/PlainObjectBase.h
+++ b/eigenlib/Eigen/src/Core/PlainObjectBase.h
@ -28,6 +28,7 @@ namespace internal {
 template<int MaxSizeAtCompileTime> struct check_rows_cols_for_overflow {
  template<typename Index>
  EIGEN_DEVICE_FUNC
  static EIGEN_ALWAYS_INLINE void run(Index, Index)
  {
  }
@ -35,6 +36,7 @@ template<int MaxSizeAtCompileTime> struct check_rows_cols_for_overflow {
 template<> struct check_rows_cols_for_overflow<Dynamic> {
  template<typename Index>
  EIGEN_DEVICE_FUNC
  static EIGEN_ALWAYS_INLINE void run(Index rows, Index cols)
  {
    // http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242
@ -56,33 +58,41 @@ template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct m
 } // end namespace internal
 #ifdef EIGEN_PARSED_BY_DOXYGEN
 namespace doxygen {
 // This is a workaround to doxygen not being able to understand the inheritance logic
 // when it is hidden by the dense_xpr_base helper struct.
 // Moreover, doxygen fails to include members that are not documented in the declaration body of
 // MatrixBase if we inherits MatrixBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >,
 // this is why we simply inherits MatrixBase, though this does not make sense.
 /** This class is just a workaround for Doxygen and it does not not actually exist. */
 template<typename Derived> struct dense_xpr_base_dispatcher;
 /** This class is just a workaround for Doxygen and it does not not actually exist. */
 template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
 struct dense_xpr_base_dispatcher<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
    : public MatrixBase {};
 /** This class is just a workaround for Doxygen and it does not not actually exist. */
 template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
 struct dense_xpr_base_dispatcher<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
    : public ArrayBase {};
 } // namespace doxygen
 /** \class PlainObjectBase
  * \ingroup Core_Module
  * \brief %Dense storage base class for matrices and arrays.
  *
  * This class can be extended with the help of the plugin mechanism described on the page
-  * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_PLAINOBJECTBASE_PLUGIN.
+  * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_PLAINOBJECTBASE_PLUGIN.
  *
  * \tparam Derived is the derived type, e.g., a Matrix or Array
  *
  * \sa \ref TopicClassHierarchy
  */
 #ifdef EIGEN_PARSED_BY_DOXYGEN
 namespace internal {
 // this is a warkaround to doxygen not being able to understand the inheritence logic
 // when it is hidden by the dense_xpr_base helper struct.
 template<typename Derived> struct dense_xpr_base_dispatcher_for_doxygen;// : public MatrixBase<Derived> {};
 /** This class is just a workaround for Doxygen and it does not not actually exist. */
 template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
 struct dense_xpr_base_dispatcher_for_doxygen<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
    : public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > {};
 /** This class is just a workaround for Doxygen and it does not not actually exist. */
 template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
 struct dense_xpr_base_dispatcher_for_doxygen<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
    : public ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > {};
 } // namespace internal
 template<typename Derived>
-class PlainObjectBase : public internal::dense_xpr_base_dispatcher_for_doxygen<Derived>
+class PlainObjectBase : public doxygen::dense_xpr_base_dispatcher<Derived>
 #else
 template<typename Derived>
 class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
@ -93,8 +103,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
    typedef typename internal::dense_xpr_base<Derived>::type Base;
    typedef typename internal::traits<Derived>::StorageKind StorageKind;
    typedef typename internal::traits<Derived>::Index Index;
    typedef typename internal::traits<Derived>::Scalar Scalar;
    typedef typename internal::packet_traits<Scalar>::type PacketScalar;
    typedef typename NumTraits<Scalar>::Real RealScalar;
    typedef Derived DenseType;
@ -113,28 +123,40 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
    typedef Eigen::Map<Derived, Unaligned>  MapType;
    friend  class Eigen::Map<const Derived, Unaligned>;
    typedef const Eigen::Map<const Derived, Unaligned> ConstMapType;
-    friend  class Eigen::Map<Derived, Aligned>;
+#if EIGEN_MAX_ALIGN_BYTES>0
-    typedef Eigen::Map<Derived, Aligned> AlignedMapType;
+    // for EIGEN_MAX_ALIGN_BYTES==0, AlignedMax==Unaligned, and many compilers generate warnings for friend-ing a class twice.
-    friend  class Eigen::Map<const Derived, Aligned>;
+    friend  class Eigen::Map<Derived, AlignedMax>;
-    typedef const Eigen::Map<const Derived, Aligned> ConstAlignedMapType;
+    friend  class Eigen::Map<const Derived, AlignedMax>;
 #endif
    typedef Eigen::Map<Derived, AlignedMax> AlignedMapType;
    typedef const Eigen::Map<const Derived, AlignedMax> ConstAlignedMapType;
    template<typename StrideType> struct StridedMapType { typedef Eigen::Map<Derived, Unaligned, StrideType> type; };
    template<typename StrideType> struct StridedConstMapType { typedef Eigen::Map<const Derived, Unaligned, StrideType> type; };
-    template<typename StrideType> struct StridedAlignedMapType { typedef Eigen::Map<Derived, Aligned, StrideType> type; };
+    template<typename StrideType> struct StridedAlignedMapType { typedef Eigen::Map<Derived, AlignedMax, StrideType> type; };
-    template<typename StrideType> struct StridedConstAlignedMapType { typedef Eigen::Map<const Derived, Aligned, StrideType> type; };
+    template<typename StrideType> struct StridedConstAlignedMapType { typedef Eigen::Map<const Derived, AlignedMax, StrideType> type; };
  protected:
    DenseStorage<Scalar, Base::MaxSizeAtCompileTime, Base::RowsAtCompileTime, Base::ColsAtCompileTime, Options> m_storage;
  public:
-    enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits<Derived>::Flags & AlignedBit) != 0 };
+    enum { NeedsToAlign = (SizeAtCompileTime != Dynamic) && (internal::traits<Derived>::Alignment>0) };
    EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
    EIGEN_DEVICE_FUNC
    Base& base() { return *static_cast<Base*>(this); }
    EIGEN_DEVICE_FUNC
    const Base& base() const { return *static_cast<const Base*>(this); }
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index rows() const { return m_storage.rows(); }
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index cols() const { return m_storage.cols(); }
    /** This is an overloaded version of DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index,Index) const
      * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
      *
      * See DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index) const for details. */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE const Scalar& coeff(Index rowId, Index colId) const
    {
      if(Flags & RowMajorBit)
@ -143,11 +165,21 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
        return m_storage.data()[rowId + colId * m_storage.rows()];
    }
    /** This is an overloaded version of DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index) const
      * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
      *
      * See DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index) const for details. */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const
    {
      return m_storage.data()[index];
    }
    /** This is an overloaded version of DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index,Index) const
      * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
      *
      * See DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index,Index) const for details. */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar& coeffRef(Index rowId, Index colId)
    {
      if(Flags & RowMajorBit)
@ -156,11 +188,19 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
        return m_storage.data()[rowId + colId * m_storage.rows()];
    }
    /** This is an overloaded version of DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index) const
      * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
      *
      * See DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index) const for details. */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
    {
      return m_storage.data()[index];
    }
    /** This is the const version of coeffRef(Index,Index) which is thus synonym of coeff(Index,Index).
      * It is provided for convenience. */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const
    {
      if(Flags & RowMajorBit)
@ -169,6 +209,9 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
        return m_storage.data()[rowId + colId * m_storage.rows()];
    }
    /** This is the const version of coeffRef(Index) which is thus synonym of coeff(Index).
      * It is provided for convenience. */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const
    {
      return m_storage.data()[index];
@ -209,11 +252,11 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
    }
    /** \returns a const pointer to the data array of this matrix */
-    EIGEN_STRONG_INLINE const Scalar *data() const
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const
    { return m_storage.data(); }
    /** \returns a pointer to the data array of this matrix */
-    EIGEN_STRONG_INLINE Scalar *data()
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data()
    { return m_storage.data(); }
    /** Resizes \c *this to a \a rows x \a cols matrix.
@ -232,22 +275,22 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      *
      * \sa resize(Index) for vectors, resize(NoChange_t, Index), resize(Index, NoChange_t)
      */
-    EIGEN_STRONG_INLINE void resize(Index nbRows, Index nbCols)
+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE void resize(Index rows, Index cols)
    {
-      eigen_assert(   EIGEN_IMPLIES(RowsAtCompileTime!=Dynamic,nbRows==RowsAtCompileTime)
+      eigen_assert(   EIGEN_IMPLIES(RowsAtCompileTime!=Dynamic,rows==RowsAtCompileTime)
-                   && EIGEN_IMPLIES(ColsAtCompileTime!=Dynamic,nbCols==ColsAtCompileTime)
+                   && EIGEN_IMPLIES(ColsAtCompileTime!=Dynamic,cols==ColsAtCompileTime)
-                   && EIGEN_IMPLIES(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic,nbRows<=MaxRowsAtCompileTime)
+                   && EIGEN_IMPLIES(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic,rows<=MaxRowsAtCompileTime)
-                   && EIGEN_IMPLIES(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic,nbCols<=MaxColsAtCompileTime)
+                   && EIGEN_IMPLIES(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic,cols<=MaxColsAtCompileTime)
-                   && nbRows>=0 && nbCols>=0 && "Invalid sizes when resizing a matrix or array.");
+                   && rows>=0 && cols>=0 && "Invalid sizes when resizing a matrix or array.");
-      internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(nbRows, nbCols);
+      internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(rows, cols);
      #ifdef EIGEN_INITIALIZE_COEFFS
-        Index size = nbRows*nbCols;
+        Index size = rows*cols;
        bool size_changed = size != this->size();
-        m_storage.resize(size, nbRows, nbCols);
+        m_storage.resize(size, rows, cols);
        if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
      #else
-        internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(nbRows, nbCols);
+        m_storage.resize(rows*cols, rows, cols);
        m_storage.resize(nbRows*nbCols, nbRows, nbCols);
      #endif
    }
@ -262,6 +305,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      *
      * \sa resize(Index,Index), resize(NoChange_t, Index), resize(Index, NoChange_t)
      */
    EIGEN_DEVICE_FUNC
    inline void resize(Index size)
    {
      EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase)
@ -286,9 +330,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      *
      * \sa resize(Index,Index)
      */
-    inline void resize(NoChange_t, Index nbCols)
+    EIGEN_DEVICE_FUNC
    inline void resize(NoChange_t, Index cols)
    {
-      resize(rows(), nbCols);
+      resize(rows(), cols);
    }
    /** Resizes the matrix, changing only the number of rows. For the parameter of type NoChange_t, just pass the special value \c NoChange
@ -299,9 +344,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      *
      * \sa resize(Index,Index)
      */
-    inline void resize(Index nbRows, NoChange_t)
+    EIGEN_DEVICE_FUNC
    inline void resize(Index rows, NoChange_t)
    {
-      resize(nbRows, cols());
+      resize(rows, cols());
    }
    /** Resizes \c *this to have the same dimensions as \a other.
@ -312,6 +358,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      * remain row-vectors and vectors remain vectors.
      */
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC 
    EIGEN_STRONG_INLINE void resizeLike(const EigenBase<OtherDerived>& _other)
    {
      const OtherDerived& other = _other.derived();
@ -339,9 +386,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      * Matrices are resized relative to the top-left element. In case values need to be 
      * appended to the matrix they will be uninitialized.
      */
-    EIGEN_STRONG_INLINE void conservativeResize(Index nbRows, Index nbCols)
+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE void conservativeResize(Index rows, Index cols)
    {
-      internal::conservative_resize_like_impl<Derived>::run(*this, nbRows, nbCols);
+      internal::conservative_resize_like_impl<Derived>::run(*this, rows, cols);
    }
    /** Resizes the matrix to \a rows x \a cols while leaving old values untouched.
@ -351,10 +399,11 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      *
      * In case the matrix is growing, new rows will be uninitialized.
      */
-    EIGEN_STRONG_INLINE void conservativeResize(Index nbRows, NoChange_t)
+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE void conservativeResize(Index rows, NoChange_t)
    {
      // Note: see the comment in conservativeResize(Index,Index)
-      conservativeResize(nbRows, cols());
+      conservativeResize(rows, cols());
    }
    /** Resizes the matrix to \a rows x \a cols while leaving old values untouched.
@ -364,10 +413,11 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      *
      * In case the matrix is growing, new columns will be uninitialized.
      */
-    EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index nbCols)
+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index cols)
    {
      // Note: see the comment in conservativeResize(Index,Index)
-      conservativeResize(rows(), nbCols);
+      conservativeResize(rows(), cols);
    }
    /** Resizes the vector to \a size while retaining old values.
@ -378,6 +428,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      *
      * When values are appended, they will be uninitialized.
      */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE void conservativeResize(Index size)
    {
      internal::conservative_resize_like_impl<Derived>::run(*this, size);
@ -393,6 +444,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      * appended to the matrix they will copied from \c other.
      */
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE void conservativeResizeLike(const DenseBase<OtherDerived>& other)
    {
      internal::conservative_resize_like_impl<Derived,OtherDerived>::run(*this, other);
@ -401,6 +453,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
    /** This is a special case of the templated operator=. Its purpose is to
      * prevent a default operator= from hiding the templated operator=.
      */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Derived& operator=(const PlainObjectBase& other)
    {
      return _set(other);
@ -408,6 +461,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
    /** \sa MatrixBase::lazyAssign() */
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Derived& lazyAssign(const DenseBase<OtherDerived>& other)
    {
      _resize_to_match(other);
@ -415,12 +469,18 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
    }
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Derived& operator=(const ReturnByValue<OtherDerived>& func)
    {
      resize(func.rows(), func.cols());
      return Base::operator=(func);
    }
    // Prevent user from trying to instantiate PlainObjectBase objects
    // by making all its constructor protected. See bug 1074.
  protected:
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE PlainObjectBase() : m_storage()
    {
 //       _check_template_params();
@ -430,23 +490,81 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
 #ifndef EIGEN_PARSED_BY_DOXYGEN
    // FIXME is it still needed ?
    /** \internal */
-    PlainObjectBase(internal::constructor_without_unaligned_array_assert)
+    EIGEN_DEVICE_FUNC
    explicit PlainObjectBase(internal::constructor_without_unaligned_array_assert)
      : m_storage(internal::constructor_without_unaligned_array_assert())
    {
 //       _check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
    }
 #endif
-    EIGEN_STRONG_INLINE PlainObjectBase(Index a_size, Index nbRows, Index nbCols)
+#if EIGEN_HAS_RVALUE_REFERENCES
-      : m_storage(a_size, nbRows, nbCols)
+    EIGEN_DEVICE_FUNC
    PlainObjectBase(PlainObjectBase&& other) EIGEN_NOEXCEPT
      : m_storage( std::move(other.m_storage) )
    {
    }
    EIGEN_DEVICE_FUNC
    PlainObjectBase& operator=(PlainObjectBase&& other) EIGEN_NOEXCEPT
    {
      using std::swap;
      swap(m_storage, other.m_storage);
      return *this;
    }
 #endif
    /** Copy constructor */
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE PlainObjectBase(const PlainObjectBase& other)
      : Base(), m_storage(other.m_storage) { }
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols)
      : m_storage(size, rows, cols)
    {
 //       _check_template_params();
 //       EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
    }
-    /** \copydoc MatrixBase::operator=(const EigenBase<OtherDerived>&)
+    /** \sa PlainObjectBase::operator=(const EigenBase<OtherDerived>&) */
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE PlainObjectBase(const DenseBase<OtherDerived> &other)
      : m_storage()
    {
      _check_template_params();
      resizeLike(other);
      _set_noalias(other);
    }
    /** \sa PlainObjectBase::operator=(const EigenBase<OtherDerived>&) */
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase<OtherDerived> &other)
      : m_storage()
    {
      _check_template_params();
      resizeLike(other);
      *this = other.derived();
    }
    /** \brief Copy constructor with in-place evaluation */
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE PlainObjectBase(const ReturnByValue<OtherDerived>& other)
    {
      _check_template_params();
      // FIXME this does not automatically transpose vectors if necessary
      resize(other.rows(), other.cols());
      other.evalTo(this->derived());
    }
  public:
    /** \brief Copies the generic expression \a other into *this.
      * \copydetails DenseBase::operator=(const EigenBase<OtherDerived> &other)
      */
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC 
    EIGEN_STRONG_INLINE Derived& operator=(const EigenBase<OtherDerived> &other)
    {
      _resize_to_match(other);
@ -454,16 +572,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      return this->derived();
    }
    /** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
    template<typename OtherDerived>
    EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase<OtherDerived> &other)
      : m_storage(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
    {
      _check_template_params();
      internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(other.derived().rows(), other.derived().cols());
      Base::operator=(other.derived());
    }
    /** \name Map
      * These are convenience functions returning Map objects. The Map() static functions return unaligned Map objects,
      * while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned
@ -538,16 +646,16 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
    //@}
    using Base::setConstant;
-    Derived& setConstant(Index size, const Scalar& value);
+    EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& val);
-    Derived& setConstant(Index rows, Index cols, const Scalar& value);
+    EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, Index cols, const Scalar& val);
    using Base::setZero;
-    Derived& setZero(Index size);
+    EIGEN_DEVICE_FUNC Derived& setZero(Index size);
-    Derived& setZero(Index rows, Index cols);
+    EIGEN_DEVICE_FUNC Derived& setZero(Index rows, Index cols);
    using Base::setOnes;
-    Derived& setOnes(Index size);
+    EIGEN_DEVICE_FUNC Derived& setOnes(Index size);
-    Derived& setOnes(Index rows, Index cols);
+    EIGEN_DEVICE_FUNC Derived& setOnes(Index rows, Index cols);
    using Base::setRandom;
    Derived& setRandom(Index size);
@ -566,6 +674,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      * remain row-vectors and vectors remain vectors.
      */
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC 
    EIGEN_STRONG_INLINE void _resize_to_match(const EigenBase<OtherDerived>& other)
    {
      #ifdef EIGEN_NO_AUTOMATIC_RESIZING
@ -573,8 +682,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
                 : (rows() == other.rows() && cols() == other.cols())))
        && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
      EIGEN_ONLY_USED_FOR_DEBUG(other);
      if(this->size()==0)
        resizeLike(other);
      #else
      resizeLike(other);
      #endif
@ -594,25 +701,23 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      *
      * \internal
      */
    // aliasing is dealt once in internall::call_assignment
    // so at this stage we have to assume aliasing... and resising has to be done later.
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC 
    EIGEN_STRONG_INLINE Derived& _set(const DenseBase<OtherDerived>& other)
    {
-      _set_selector(other.derived(), typename internal::conditional<static_cast<bool>(int(OtherDerived::Flags) & EvalBeforeAssigningBit), internal::true_type, internal::false_type>::type());
+      internal::call_assignment(this->derived(), other.derived());
      return this->derived();
    }
    template<typename OtherDerived>
    EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::true_type&) { _set_noalias(other.eval()); }
    template<typename OtherDerived>
    EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::false_type&) { _set_noalias(other); }
    /** \internal Like _set() but additionally makes the assumption that no aliasing effect can happen (which
      * is the case when creating a new matrix) so one can enforce lazy evaluation.
      *
      * \sa operator=(const MatrixBase<OtherDerived>&), _set()
      */
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC 
    EIGEN_STRONG_INLINE Derived& _set_noalias(const DenseBase<OtherDerived>& other)
    {
      // I don't think we need this resize call since the lazyAssign will anyways resize
@ -620,40 +725,167 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      //_resize_to_match(other);
      // the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because
      // it wouldn't allow to copy a row-vector into a column-vector.
-      return internal::assign_selector<Derived,OtherDerived,false>::run(this->derived(), other.derived());
+      internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
      return this->derived();
    }
    template<typename T0, typename T1>
-    EIGEN_STRONG_INLINE void _init2(Index nbRows, Index nbCols, typename internal::enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename internal::enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
    {
      EIGEN_STATIC_ASSERT(bool(NumTraits<T0>::IsInteger) &&
                          bool(NumTraits<T1>::IsInteger),
                          FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
-      resize(nbRows,nbCols);
+      resize(rows,cols);
    }
    template<typename T0, typename T1>
-    EIGEN_STRONG_INLINE void _init2(const Scalar& val0, const Scalar& val1, typename internal::enable_if<Base::SizeAtCompileTime==2,T0>::type* = 0)
+    EIGEN_DEVICE_FUNC 
    EIGEN_STRONG_INLINE void _init2(const T0& val0, const T1& val1, typename internal::enable_if<Base::SizeAtCompileTime==2,T0>::type* = 0)
    {
      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
-      m_storage.data()[0] = val0;
+      m_storage.data()[0] = Scalar(val0);
-      m_storage.data()[1] = val1;
+      m_storage.data()[1] = Scalar(val1);
    }
    template<typename T0, typename T1>
    EIGEN_DEVICE_FUNC 
    EIGEN_STRONG_INLINE void _init2(const Index& val0, const Index& val1,
                                    typename internal::enable_if<    (!internal::is_same<Index,Scalar>::value)
                                                                  && (internal::is_same<T0,Index>::value)
                                                                  && (internal::is_same<T1,Index>::value)
                                                                  && Base::SizeAtCompileTime==2,T1>::type* = 0)
    {
      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
      m_storage.data()[0] = Scalar(val0);
      m_storage.data()[1] = Scalar(val1);
    }
    // The argument is convertible to the Index type and we either have a non 1x1 Matrix, or a dynamic-sized Array,
    // then the argument is meant to be the size of the object.
    template<typename T>
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if<    (Base::SizeAtCompileTime!=1 || !internal::is_convertible<T, Scalar>::value)
                                                                              && ((!internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value || Base::SizeAtCompileTime==Dynamic)),T>::type* = 0)
    {
      // NOTE MSVC 2008 complains if we directly put bool(NumTraits<T>::IsInteger) as the EIGEN_STATIC_ASSERT argument.
      const bool is_integer = NumTraits<T>::IsInteger;
      EIGEN_UNUSED_VARIABLE(is_integer);
      EIGEN_STATIC_ASSERT(is_integer,
                          FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
      resize(size);
    }
    // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type can be implicitely converted)
    template<typename T>
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE void _init1(const Scalar& val0, typename internal::enable_if<Base::SizeAtCompileTime==1 && internal::is_convertible<T, Scalar>::value,T>::type* = 0)
    {
      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1)
      m_storage.data()[0] = val0;
    }
    // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type match the index type)
    template<typename T>
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE void _init1(const Index& val0,
                                    typename internal::enable_if<    (!internal::is_same<Index,Scalar>::value)
                                                                  && (internal::is_same<Index,T>::value)
                                                                  && Base::SizeAtCompileTime==1
                                                                  && internal::is_convertible<T, Scalar>::value,T*>::type* = 0)
    {
      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1)
      m_storage.data()[0] = Scalar(val0);
    }
    // Initialize a fixed size matrix from a pointer to raw data
    template<typename T>
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE void _init1(const Scalar* data){
      this->_set_noalias(ConstMapType(data));
    }
    // Initialize an arbitrary matrix from a dense expression
    template<typename T, typename OtherDerived>
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE void _init1(const DenseBase<OtherDerived>& other){
      this->_set_noalias(other);
    }
    // Initialize an arbitrary matrix from a generic Eigen expression
    template<typename T, typename OtherDerived>
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE void _init1(const EigenBase<OtherDerived>& other){
      this->derived() = other;
    }
    template<typename T, typename OtherDerived>
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE void _init1(const ReturnByValue<OtherDerived>& other)
    {
      resize(other.rows(), other.cols());
      other.evalTo(this->derived());
    }
    template<typename T, typename OtherDerived, int ColsAtCompileTime>
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE void _init1(const RotationBase<OtherDerived,ColsAtCompileTime>& r)
    {
      this->derived() = r;
    }
    // For fixed -size arrays:
    template<typename T>
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE void _init1(const Scalar& val0,
                                    typename internal::enable_if<    Base::SizeAtCompileTime!=Dynamic
                                                                  && Base::SizeAtCompileTime!=1
                                                                  && internal::is_convertible<T, Scalar>::value
                                                                  && internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value,T>::type* = 0)
    {
      Base::setConstant(val0);
    }
    template<typename T>
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE void _init1(const Index& val0,
                                    typename internal::enable_if<    (!internal::is_same<Index,Scalar>::value)
                                                                  && (internal::is_same<Index,T>::value)
                                                                  && Base::SizeAtCompileTime!=Dynamic
                                                                  && Base::SizeAtCompileTime!=1
                                                                  && internal::is_convertible<T, Scalar>::value
                                                                  && internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value,T*>::type* = 0)
    {
      Base::setConstant(val0);
    }
    template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>
    friend struct internal::matrix_swap_impl;
-    /** \internal generic implementation of swap for dense storage since for dynamic-sized matrices of same type it is enough to swap the
+  public:
-      * data pointers.
+    
 #ifndef EIGEN_PARSED_BY_DOXYGEN
    /** \internal
      * \brief Override DenseBase::swap() since for dynamic-sized matrices
      * of same type it is enough to swap the data pointers.
      */
    template<typename OtherDerived>
-    void _swap(DenseBase<OtherDerived> const & other)
+    EIGEN_DEVICE_FUNC
    void swap(DenseBase<OtherDerived> & other)
    {
      enum { SwapPointers = internal::is_same<Derived, OtherDerived>::value && Base::SizeAtCompileTime==Dynamic };
-      internal::matrix_swap_impl<Derived, OtherDerived, bool(SwapPointers)>::run(this->derived(), other.const_cast_derived());
+      internal::matrix_swap_impl<Derived, OtherDerived, bool(SwapPointers)>::run(this->derived(), other.derived());
    }
-
+    
-  public:
+    /** \internal
-#ifndef EIGEN_PARSED_BY_DOXYGEN
+      * \brief const version forwarded to DenseBase::swap
      */
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    void swap(DenseBase<OtherDerived> const & other)
    { Base::swap(other.derived()); }
    EIGEN_DEVICE_FUNC 
    static EIGEN_STRONG_INLINE void _check_template_params()
    {
      EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (Options&RowMajor)==RowMajor)
@ -667,10 +899,9 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
                        && (Options & (DontAlign|RowMajor)) == Options),
        INVALID_MATRIX_TEMPLATE_PARAMETERS)
    }
 #endif
-private:
+    enum { IsPlainObjectBase = 1 };
-    enum { ThisConstantIsPrivateInPlainObjectBase };
+#endif
 };
 namespace internal {
@ -678,7 +909,6 @@ namespace internal {
 template <typename Derived, typename OtherDerived, bool IsVector>
 struct conservative_resize_like_impl
 {
  typedef typename Derived::Index Index;
  static void run(DenseBase<Derived>& _this, Index rows, Index cols)
  {
    if (_this.rows() == rows && _this.cols() == cols) return;
@ -694,8 +924,8 @@ struct conservative_resize_like_impl
    {
      // The storage order does not allow us to use reallocation.
      typename Derived::PlainObject tmp(rows,cols);
-      const Index common_rows = (std::min)(rows, _this.rows());
+      const Index common_rows = numext::mini(rows, _this.rows());
-      const Index common_cols = (std::min)(cols, _this.cols());
+      const Index common_cols = numext::mini(cols, _this.cols());
      tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
      _this.derived().swap(tmp);
    }
@ -728,8 +958,8 @@ struct conservative_resize_like_impl
    {
      // The storage order does not allow us to use reallocation.
      typename Derived::PlainObject tmp(other);
-      const Index common_rows = (std::min)(tmp.rows(), _this.rows());
+      const Index common_rows = numext::mini(tmp.rows(), _this.rows());
-      const Index common_cols = (std::min)(tmp.cols(), _this.cols());
+      const Index common_cols = numext::mini(tmp.cols(), _this.cols());
      tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
      _this.derived().swap(tmp);
    }
@ -744,7 +974,6 @@ struct conservative_resize_like_impl<Derived,OtherDerived,true>
 {
  using conservative_resize_like_impl<Derived,OtherDerived,false>::run;
  typedef typename Derived::Index Index;
  static void run(DenseBase<Derived>& _this, Index size)
  {
    const Index new_rows = Derived::RowsAtCompileTime==1 ? 1 : size;
@ -770,6 +999,7 @@ struct conservative_resize_like_impl<Derived,OtherDerived,true>
 template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>
 struct matrix_swap_impl
 {
  EIGEN_DEVICE_FUNC
  static inline void run(MatrixTypeA& a, MatrixTypeB& b)
  {
    a.base().swap(b);
@ -779,6 +1009,7 @@ struct matrix_swap_impl
 template<typename MatrixTypeA, typename MatrixTypeB>
 struct matrix_swap_impl<MatrixTypeA, MatrixTypeB, true>
 {
  EIGEN_DEVICE_FUNC
  static inline void run(MatrixTypeA& a, MatrixTypeB& b)
  {
    static_cast<typename MatrixTypeA::Base&>(a).m_storage.swap(static_cast<typename MatrixTypeB::Base&>(b).m_storage);
--- a/eigenlib/Eigen/src/Core/Product.h
+++ b/eigenlib/Eigen/src/Core/Product.h
@ -0,0 +1,186 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_PRODUCT_H
 #define EIGEN_PRODUCT_H
 namespace Eigen {
 template<typename Lhs, typename Rhs, int Option, typename StorageKind> class ProductImpl;
 namespace internal {
 template<typename Lhs, typename Rhs, int Option>
 struct traits<Product<Lhs, Rhs, Option> >
 {
  typedef typename remove_all<Lhs>::type LhsCleaned;
  typedef typename remove_all<Rhs>::type RhsCleaned;
  typedef traits<LhsCleaned> LhsTraits;
  typedef traits<RhsCleaned> RhsTraits;
  typedef MatrixXpr XprKind;
  typedef typename ScalarBinaryOpTraits<typename traits<LhsCleaned>::Scalar, typename traits<RhsCleaned>::Scalar>::ReturnType Scalar;
  typedef typename product_promote_storage_type<typename LhsTraits::StorageKind,
                                                typename RhsTraits::StorageKind,
                                                internal::product_type<Lhs,Rhs>::ret>::ret StorageKind;
  typedef typename promote_index_type<typename LhsTraits::StorageIndex,
                                      typename RhsTraits::StorageIndex>::type StorageIndex;
  enum {
    RowsAtCompileTime    = LhsTraits::RowsAtCompileTime,
    ColsAtCompileTime    = RhsTraits::ColsAtCompileTime,
    MaxRowsAtCompileTime = LhsTraits::MaxRowsAtCompileTime,
    MaxColsAtCompileTime = RhsTraits::MaxColsAtCompileTime,
    // FIXME: only needed by GeneralMatrixMatrixTriangular
    InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsTraits::ColsAtCompileTime, RhsTraits::RowsAtCompileTime),
    // The storage order is somewhat arbitrary here. The correct one will be determined through the evaluator.
    Flags = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? RowMajorBit
          : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0
          : (   ((LhsTraits::Flags&NoPreferredStorageOrderBit) && (RhsTraits::Flags&RowMajorBit))
             || ((RhsTraits::Flags&NoPreferredStorageOrderBit) && (LhsTraits::Flags&RowMajorBit)) ) ? RowMajorBit
          : NoPreferredStorageOrderBit
  };
 };
 } // end namespace internal
 /** \class Product
  * \ingroup Core_Module
  *
  * \brief Expression of the product of two arbitrary matrices or vectors
  *
  * \tparam _Lhs the type of the left-hand side expression
  * \tparam _Rhs the type of the right-hand side expression
  *
  * This class represents an expression of the product of two arbitrary matrices.
  *
  * The other template parameters are:
  * \tparam Option     can be DefaultProduct, AliasFreeProduct, or LazyProduct
  *
  */
 template<typename _Lhs, typename _Rhs, int Option>
 class Product : public ProductImpl<_Lhs,_Rhs,Option,
                                   typename internal::product_promote_storage_type<typename internal::traits<_Lhs>::StorageKind,
                                                                                   typename internal::traits<_Rhs>::StorageKind,
                                                                                   internal::product_type<_Lhs,_Rhs>::ret>::ret>
 {
  public:
    typedef _Lhs Lhs;
    typedef _Rhs Rhs;
    typedef typename ProductImpl<
        Lhs, Rhs, Option,
        typename internal::product_promote_storage_type<typename internal::traits<Lhs>::StorageKind,
                                                        typename internal::traits<Rhs>::StorageKind,
                                                        internal::product_type<Lhs,Rhs>::ret>::ret>::Base Base;
    EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
    typedef typename internal::ref_selector<Lhs>::type LhsNested;
    typedef typename internal::ref_selector<Rhs>::type RhsNested;
    typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
    typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
    EIGEN_DEVICE_FUNC Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs)
    {
      eigen_assert(lhs.cols() == rhs.rows()
        && "invalid matrix product"
        && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
    }
    EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); }
    EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); }
    EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; }
    EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; }
  protected:
    LhsNested m_lhs;
    RhsNested m_rhs;
 };
 namespace internal {
 template<typename Lhs, typename Rhs, int Option, int ProductTag = internal::product_type<Lhs,Rhs>::ret>
 class dense_product_base
 : public internal::dense_xpr_base<Product<Lhs,Rhs,Option> >::type
 {};
 /** Convertion to scalar for inner-products */
 template<typename Lhs, typename Rhs, int Option>
 class dense_product_base<Lhs, Rhs, Option, InnerProduct>
 : public internal::dense_xpr_base<Product<Lhs,Rhs,Option> >::type
 {
  typedef Product<Lhs,Rhs,Option> ProductXpr;
  typedef typename internal::dense_xpr_base<ProductXpr>::type Base;
 public:
  using Base::derived;
  typedef typename Base::Scalar Scalar;
  operator const Scalar() const
  {
    return internal::evaluator<ProductXpr>(derived()).coeff(0,0);
  }
 };
 } // namespace internal
 // Generic API dispatcher
 template<typename Lhs, typename Rhs, int Option, typename StorageKind>
 class ProductImpl : public internal::generic_xpr_base<Product<Lhs,Rhs,Option>, MatrixXpr, StorageKind>::type
 {
  public:
    typedef typename internal::generic_xpr_base<Product<Lhs,Rhs,Option>, MatrixXpr, StorageKind>::type Base;
 };
 template<typename Lhs, typename Rhs, int Option>
 class ProductImpl<Lhs,Rhs,Option,Dense>
  : public internal::dense_product_base<Lhs,Rhs,Option>
 {
    typedef Product<Lhs, Rhs, Option> Derived;
  public:
    typedef typename internal::dense_product_base<Lhs, Rhs, Option> Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
  protected:
    enum {
      IsOneByOne = (RowsAtCompileTime == 1 || RowsAtCompileTime == Dynamic) && 
                   (ColsAtCompileTime == 1 || ColsAtCompileTime == Dynamic),
      EnableCoeff = IsOneByOne || Option==LazyProduct
    };
  public:
    EIGEN_DEVICE_FUNC Scalar coeff(Index row, Index col) const
    {
      EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
      eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
      return internal::evaluator<Derived>(derived()).coeff(row,col);
    }
    EIGEN_DEVICE_FUNC Scalar coeff(Index i) const
    {
      EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
      eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
      return internal::evaluator<Derived>(derived()).coeff(i);
    }
 };
 } // end namespace Eigen
 #endif // EIGEN_PRODUCT_H
--- a/eigenlib/Eigen/src/Core/ProductBase.h
+++ b/eigenlib/Eigen/src/Core/ProductBase.h
@ -1,290 +0,0 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_PRODUCTBASE_H
 #define EIGEN_PRODUCTBASE_H
 namespace Eigen { 
 /** \class ProductBase
  * \ingroup Core_Module
  *
  */
 namespace internal {
 template<typename Derived, typename _Lhs, typename _Rhs>
 struct traits<ProductBase<Derived,_Lhs,_Rhs> >
 {
  typedef MatrixXpr XprKind;
  typedef typename remove_all<_Lhs>::type Lhs;
  typedef typename remove_all<_Rhs>::type Rhs;
  typedef typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType Scalar;
  typedef typename promote_storage_type<typename traits<Lhs>::StorageKind,
                                           typename traits<Rhs>::StorageKind>::ret StorageKind;
  typedef typename promote_index_type<typename traits<Lhs>::Index,
                                         typename traits<Rhs>::Index>::type Index;
  enum {
    RowsAtCompileTime = traits<Lhs>::RowsAtCompileTime,
    ColsAtCompileTime = traits<Rhs>::ColsAtCompileTime,
    MaxRowsAtCompileTime = traits<Lhs>::MaxRowsAtCompileTime,
    MaxColsAtCompileTime = traits<Rhs>::MaxColsAtCompileTime,
    Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0)
          | EvalBeforeNestingBit | EvalBeforeAssigningBit | NestByRefBit,
                  // Note that EvalBeforeNestingBit and NestByRefBit
                  // are not used in practice because nested is overloaded for products
    CoeffReadCost = 0 // FIXME why is it needed ?
  };
 };
 }
 #define EIGEN_PRODUCT_PUBLIC_INTERFACE(Derived) \
  typedef ProductBase<Derived, Lhs, Rhs > Base; \
  EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \
  typedef typename Base::LhsNested LhsNested; \
  typedef typename Base::_LhsNested _LhsNested; \
  typedef typename Base::LhsBlasTraits LhsBlasTraits; \
  typedef typename Base::ActualLhsType ActualLhsType; \
  typedef typename Base::_ActualLhsType _ActualLhsType; \
  typedef typename Base::RhsNested RhsNested; \
  typedef typename Base::_RhsNested _RhsNested; \
  typedef typename Base::RhsBlasTraits RhsBlasTraits; \
  typedef typename Base::ActualRhsType ActualRhsType; \
  typedef typename Base::_ActualRhsType _ActualRhsType; \
  using Base::m_lhs; \
  using Base::m_rhs;
 template<typename Derived, typename Lhs, typename Rhs>
 class ProductBase : public MatrixBase<Derived>
 {
  public:
    typedef MatrixBase<Derived> Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(ProductBase)
    typedef typename Lhs::Nested LhsNested;
    typedef typename internal::remove_all<LhsNested>::type _LhsNested;
    typedef internal::blas_traits<_LhsNested> LhsBlasTraits;
    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
    typedef typename internal::remove_all<ActualLhsType>::type _ActualLhsType;
    typedef typename internal::traits<Lhs>::Scalar LhsScalar;
    typedef typename Rhs::Nested RhsNested;
    typedef typename internal::remove_all<RhsNested>::type _RhsNested;
    typedef internal::blas_traits<_RhsNested> RhsBlasTraits;
    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
    typedef typename internal::remove_all<ActualRhsType>::type _ActualRhsType;
    typedef typename internal::traits<Rhs>::Scalar RhsScalar;
    // Diagonal of a product: no need to evaluate the arguments because they are going to be evaluated only once
    typedef CoeffBasedProduct<LhsNested, RhsNested, 0> FullyLazyCoeffBaseProductType;
  public:
 #ifndef EIGEN_NO_MALLOC
    typedef typename Base::PlainObject BasePlainObject;
    typedef Matrix<Scalar,RowsAtCompileTime==1?1:Dynamic,ColsAtCompileTime==1?1:Dynamic,BasePlainObject::Options> DynPlainObject;
    typedef typename internal::conditional<(BasePlainObject::SizeAtCompileTime==Dynamic) || (BasePlainObject::SizeAtCompileTime*int(sizeof(Scalar)) < int(EIGEN_STACK_ALLOCATION_LIMIT)),
                                           BasePlainObject, DynPlainObject>::type PlainObject;
 #else
    typedef typename Base::PlainObject PlainObject;
 #endif
    ProductBase(const Lhs& a_lhs, const Rhs& a_rhs)
      : m_lhs(a_lhs), m_rhs(a_rhs)
    {
      eigen_assert(a_lhs.cols() == a_rhs.rows()
        && "invalid matrix product"
        && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
    }
    inline Index rows() const { return m_lhs.rows(); }
    inline Index cols() const { return m_rhs.cols(); }
    template<typename Dest>
    inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst,Scalar(1)); }
    template<typename Dest>
    inline void addTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(1)); }
    template<typename Dest>
    inline void subTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(-1)); }
    template<typename Dest>
    inline void scaleAndAddTo(Dest& dst, const Scalar& alpha) const { derived().scaleAndAddTo(dst,alpha); }
    const _LhsNested& lhs() const { return m_lhs; }
    const _RhsNested& rhs() const { return m_rhs; }
    // Implicit conversion to the nested type (trigger the evaluation of the product)
    operator const PlainObject& () const
    {
      m_result.resize(m_lhs.rows(), m_rhs.cols());
      derived().evalTo(m_result);
      return m_result;
    }
    const Diagonal<const FullyLazyCoeffBaseProductType,0> diagonal() const
    { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); }
    template<int Index>
    const Diagonal<FullyLazyCoeffBaseProductType,Index> diagonal() const
    { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); }
    const Diagonal<FullyLazyCoeffBaseProductType,Dynamic> diagonal(Index index) const
    { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs).diagonal(index); }
    // restrict coeff accessors to 1x1 expressions. No need to care about mutators here since this isnt a Lvalue expression
    typename Base::CoeffReturnType coeff(Index row, Index col) const
    {
 #ifdef EIGEN2_SUPPORT
      return lhs().row(row).cwiseProduct(rhs().col(col).transpose()).sum();
 #else
      EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
      eigen_assert(this->rows() == 1 && this->cols() == 1);
      Matrix<Scalar,1,1> result = *this;
      return result.coeff(row,col);
 #endif
    }
    typename Base::CoeffReturnType coeff(Index i) const
    {
      EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
      eigen_assert(this->rows() == 1 && this->cols() == 1);
      Matrix<Scalar,1,1> result = *this;
      return result.coeff(i);
    }
    const Scalar& coeffRef(Index row, Index col) const
    {
      EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
      eigen_assert(this->rows() == 1 && this->cols() == 1);
      return derived().coeffRef(row,col);
    }
    const Scalar& coeffRef(Index i) const
    {
      EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
      eigen_assert(this->rows() == 1 && this->cols() == 1);
      return derived().coeffRef(i);
    }
  protected:
    LhsNested m_lhs;
    RhsNested m_rhs;
    mutable PlainObject m_result;
 };
 // here we need to overload the nested rule for products
 // such that the nested type is a const reference to a plain matrix
 namespace internal {
 template<typename Lhs, typename Rhs, int Mode, int N, typename PlainObject>
 struct nested<GeneralProduct<Lhs,Rhs,Mode>, N, PlainObject>
 {
  typedef typename GeneralProduct<Lhs,Rhs,Mode>::PlainObject const& type;
 };
 template<typename Lhs, typename Rhs, int Mode, int N, typename PlainObject>
 struct nested<const GeneralProduct<Lhs,Rhs,Mode>, N, PlainObject>
 {
  typedef typename GeneralProduct<Lhs,Rhs,Mode>::PlainObject const& type;
 };
 }
 template<typename NestedProduct>
 class ScaledProduct;
 // Note that these two operator* functions are not defined as member
 // functions of ProductBase, because, otherwise we would have to
 // define all overloads defined in MatrixBase. Furthermore, Using
 // "using Base::operator*" would not work with MSVC.
 //
 // Also note that here we accept any compatible scalar types
 template<typename Derived,typename Lhs,typename Rhs>
 const ScaledProduct<Derived>
 operator*(const ProductBase<Derived,Lhs,Rhs>& prod, const typename Derived::Scalar& x)
 { return ScaledProduct<Derived>(prod.derived(), x); }
 template<typename Derived,typename Lhs,typename Rhs>
 typename internal::enable_if<!internal::is_same<typename Derived::Scalar,typename Derived::RealScalar>::value,
                      const ScaledProduct<Derived> >::type
 operator*(const ProductBase<Derived,Lhs,Rhs>& prod, const typename Derived::RealScalar& x)
 { return ScaledProduct<Derived>(prod.derived(), x); }
 template<typename Derived,typename Lhs,typename Rhs>
 const ScaledProduct<Derived>
 operator*(const typename Derived::Scalar& x,const ProductBase<Derived,Lhs,Rhs>& prod)
 { return ScaledProduct<Derived>(prod.derived(), x); }
 template<typename Derived,typename Lhs,typename Rhs>
 typename internal::enable_if<!internal::is_same<typename Derived::Scalar,typename Derived::RealScalar>::value,
                      const ScaledProduct<Derived> >::type
 operator*(const typename Derived::RealScalar& x,const ProductBase<Derived,Lhs,Rhs>& prod)
 { return ScaledProduct<Derived>(prod.derived(), x); }
 namespace internal {
 template<typename NestedProduct>
 struct traits<ScaledProduct<NestedProduct> >
 : traits<ProductBase<ScaledProduct<NestedProduct>,
                         typename NestedProduct::_LhsNested,
                         typename NestedProduct::_RhsNested> >
 {
  typedef typename traits<NestedProduct>::StorageKind StorageKind;
 };
 }
 template<typename NestedProduct>
 class ScaledProduct
  : public ProductBase<ScaledProduct<NestedProduct>,
                       typename NestedProduct::_LhsNested,
                       typename NestedProduct::_RhsNested>
 {
  public:
    typedef ProductBase<ScaledProduct<NestedProduct>,
                       typename NestedProduct::_LhsNested,
                       typename NestedProduct::_RhsNested> Base;
    typedef typename Base::Scalar Scalar;
    typedef typename Base::PlainObject PlainObject;
 //     EIGEN_PRODUCT_PUBLIC_INTERFACE(ScaledProduct)
    ScaledProduct(const NestedProduct& prod, const Scalar& x)
    : Base(prod.lhs(),prod.rhs()), m_prod(prod), m_alpha(x) {}
    template<typename Dest>
    inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst, Scalar(1)); }
    template<typename Dest>
    inline void addTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(1)); }
    template<typename Dest>
    inline void subTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(-1)); }
    template<typename Dest>
    inline void scaleAndAddTo(Dest& dst, const Scalar& a_alpha) const { m_prod.derived().scaleAndAddTo(dst,a_alpha * m_alpha); }
    const Scalar& alpha() const { return m_alpha; }
  protected:
    const NestedProduct& m_prod;
    Scalar m_alpha;
 };
 /** \internal
  * Overloaded to perform an efficient C = (A*B).lazy() */
 template<typename Derived>
 template<typename ProductDerived, typename Lhs, typename Rhs>
 Derived& MatrixBase<Derived>::lazyAssign(const ProductBase<ProductDerived, Lhs,Rhs>& other)
 {
  other.derived().evalTo(derived());
  return derived();
 }
 } // end namespace Eigen
 #endif // EIGEN_PRODUCTBASE_H
--- a/eigenlib/Eigen/src/Core/ProductEvaluators.h
+++ b/eigenlib/Eigen/src/Core/ProductEvaluators.h
--- a/eigenlib/Eigen/src/Core/Random.h
+++ b/eigenlib/Eigen/src/Core/Random.h
@ -16,8 +16,7 @@ namespace internal {
 template<typename Scalar> struct scalar_random_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_random_op)
-  template<typename Index>
+  inline const Scalar operator() () const { return random<Scalar>(); }
  inline const Scalar operator() (Index, Index = 0) const { return random<Scalar>(); }
 };
 template<typename Scalar>
@ -28,12 +27,18 @@ struct functor_traits<scalar_random_op<Scalar> >
 /** \returns a random matrix expression
  *
  * Numbers are uniformly spread through their whole definition range for integer types,
  * and in the [-1:1] range for floating point scalar types.
  * 
  * The parameters \a rows and \a cols are the number of rows and of columns of
  * the returned matrix. Must be compatible with this MatrixBase type.
  *
  * \not_reentrant
  * 
  * This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
  * it is redundant to pass \a rows and \a cols as arguments, so Random() should be used
  * instead.
  * 
  *
  * Example: \include MatrixBase_random_int_int.cpp
  * Output: \verbinclude MatrixBase_random_int_int.out
@ -41,22 +46,28 @@ struct functor_traits<scalar_random_op<Scalar> >
  * This expression has the "evaluate before nesting" flag so that it will be evaluated into
  * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected
  * behavior with expressions involving random matrices.
  * 
  * See DenseBase::NullaryExpr(Index, const CustomNullaryOp&) for an example using C++11 random generators.
  *
-  * \sa MatrixBase::setRandom(), MatrixBase::Random(Index), MatrixBase::Random()
+  * \sa DenseBase::setRandom(), DenseBase::Random(Index), DenseBase::Random()
  */
 template<typename Derived>
-inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
+inline const typename DenseBase<Derived>::RandomReturnType
 DenseBase<Derived>::Random(Index rows, Index cols)
 {
  return NullaryExpr(rows, cols, internal::scalar_random_op<Scalar>());
 }
 /** \returns a random vector expression
  *
  * Numbers are uniformly spread through their whole definition range for integer types,
  * and in the [-1:1] range for floating point scalar types.
  *
  * The parameter \a size is the size of the returned vector.
  * Must be compatible with this MatrixBase type.
  *
  * \only_for_vectors
  * \not_reentrant
  *
  * This variant is meant to be used for dynamic-size vector types. For fixed-size types,
  * it is redundant to pass \a size as argument, so Random() should be used
@ -69,10 +80,10 @@ DenseBase<Derived>::Random(Index rows, Index cols)
  * a temporary vector whenever it is nested in a larger expression. This prevents unexpected
  * behavior with expressions involving random matrices.
  *
-  * \sa MatrixBase::setRandom(), MatrixBase::Random(Index,Index), MatrixBase::Random()
+  * \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random()
  */
 template<typename Derived>
-inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
+inline const typename DenseBase<Derived>::RandomReturnType
 DenseBase<Derived>::Random(Index size)
 {
  return NullaryExpr(size, internal::scalar_random_op<Scalar>());
@ -80,6 +91,9 @@ DenseBase<Derived>::Random(Index size)
 /** \returns a fixed-size random matrix or vector expression
  *
  * Numbers are uniformly spread through their whole definition range for integer types,
  * and in the [-1:1] range for floating point scalar types.
  * 
  * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
  * need to use the variants taking size arguments.
  *
@ -89,11 +103,13 @@ DenseBase<Derived>::Random(Index size)
  * This expression has the "evaluate before nesting" flag so that it will be evaluated into
  * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected
  * behavior with expressions involving random matrices.
  * 
  * \not_reentrant
  *
-  * \sa MatrixBase::setRandom(), MatrixBase::Random(Index,Index), MatrixBase::Random(Index)
+  * \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random(Index)
  */
 template<typename Derived>
-inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
+inline const typename DenseBase<Derived>::RandomReturnType
 DenseBase<Derived>::Random()
 {
  return NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_random_op<Scalar>());
@ -101,6 +117,11 @@ DenseBase<Derived>::Random()
 /** Sets all coefficients in this expression to random values.
  *
  * Numbers are uniformly spread through their whole definition range for integer types,
  * and in the [-1:1] range for floating point scalar types.
  * 
  * \not_reentrant
  * 
  * Example: \include MatrixBase_setRandom.cpp
  * Output: \verbinclude MatrixBase_setRandom.out
  *
@ -114,12 +135,16 @@ inline Derived& DenseBase<Derived>::setRandom()
 /** Resizes to the given \a newSize, and sets all coefficients in this expression to random values.
  *
  * Numbers are uniformly spread through their whole definition range for integer types,
  * and in the [-1:1] range for floating point scalar types.
  * 
  * \only_for_vectors
  * \not_reentrant
  *
  * Example: \include Matrix_setRandom_int.cpp
  * Output: \verbinclude Matrix_setRandom_int.out
  *
-  * \sa MatrixBase::setRandom(), setRandom(Index,Index), class CwiseNullaryOp, MatrixBase::Random()
+  * \sa DenseBase::setRandom(), setRandom(Index,Index), class CwiseNullaryOp, DenseBase::Random()
  */
 template<typename Derived>
 EIGEN_STRONG_INLINE Derived&
@ -131,19 +156,24 @@ PlainObjectBase<Derived>::setRandom(Index newSize)
 /** Resizes to the given size, and sets all coefficients in this expression to random values.
  *
-  * \param nbRows the new number of rows
+  * Numbers are uniformly spread through their whole definition range for integer types,
-  * \param nbCols the new number of columns
+  * and in the [-1:1] range for floating point scalar types.
  *
  * \not_reentrant
  * 
  * \param rows the new number of rows
  * \param cols the new number of columns
  *
  * Example: \include Matrix_setRandom_int_int.cpp
  * Output: \verbinclude Matrix_setRandom_int_int.out
  *
-  * \sa MatrixBase::setRandom(), setRandom(Index), class CwiseNullaryOp, MatrixBase::Random()
+  * \sa DenseBase::setRandom(), setRandom(Index), class CwiseNullaryOp, DenseBase::Random()
  */
 template<typename Derived>
 EIGEN_STRONG_INLINE Derived&
-PlainObjectBase<Derived>::setRandom(Index nbRows, Index nbCols)
+PlainObjectBase<Derived>::setRandom(Index rows, Index cols)
 {
-  resize(nbRows, nbCols);
+  resize(rows, cols);
  return setRandom();
 }
--- a/eigenlib/Eigen/src/Core/Redux.h
+++ b/eigenlib/Eigen/src/Core/Redux.h
@ -27,8 +27,9 @@ template<typename Func, typename Derived>
 struct redux_traits
 {
 public:
    typedef typename find_best_packet<typename Derived::Scalar,Derived::SizeAtCompileTime>::type PacketType;
  enum {
-    PacketSize = packet_traits<typename Derived::Scalar>::size,
+    PacketSize = unpacket_traits<PacketType>::size,
    InnerMaxSize = int(Derived::IsRowMajor)
                 ? Derived::MaxColsAtCompileTime
                 : Derived::MaxRowsAtCompileTime
@ -37,8 +38,8 @@ public:
  enum {
    MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
                  && (functor_traits<Func>::PacketAccess),
-    MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit),
+    MayLinearVectorize = bool(MightVectorize) && (int(Derived::Flags)&LinearAccessBit),
-    MaySliceVectorize  = MightVectorize && int(InnerMaxSize)>=3*PacketSize
+    MaySliceVectorize  = bool(MightVectorize) && int(InnerMaxSize)>=3*PacketSize
  };
 public:
@ -50,21 +51,34 @@ public:
 public:
  enum {
-    Cost = (  Derived::SizeAtCompileTime == Dynamic
+    Cost = Derived::SizeAtCompileTime == Dynamic ? HugeCost
-           || Derived::CoeffReadCost == Dynamic
+         : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
           || (Derived::SizeAtCompileTime!=1 && functor_traits<Func>::Cost == Dynamic)
           ) ? Dynamic
           : Derived::SizeAtCompileTime * Derived::CoeffReadCost
               + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
    UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
  };
 public:
  enum {
-    Unrolling = Cost != Dynamic && Cost <= UnrollingLimit
+    Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling
              ? CompleteUnrolling
              : NoUnrolling
  };
 #ifdef EIGEN_DEBUG_ASSIGN
  static void debug()
  {
    std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl;
    std::cerr.setf(std::ios::hex, std::ios::basefield);
    EIGEN_DEBUG_VAR(Derived::Flags)
    std::cerr.unsetf(std::ios::hex);
    EIGEN_DEBUG_VAR(InnerMaxSize)
    EIGEN_DEBUG_VAR(PacketSize)
    EIGEN_DEBUG_VAR(MightVectorize)
    EIGEN_DEBUG_VAR(MayLinearVectorize)
    EIGEN_DEBUG_VAR(MaySliceVectorize)
    EIGEN_DEBUG_VAR(Traversal)
    EIGEN_DEBUG_VAR(UnrollingLimit)
    EIGEN_DEBUG_VAR(Unrolling)
    std::cerr << std::endl;
  }
 #endif
 };
 /***************************************************************************
@ -82,6 +96,7 @@ struct redux_novec_unroller
  typedef typename Derived::Scalar Scalar;
  EIGEN_DEVICE_FUNC
  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
  {
    return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
@ -99,6 +114,7 @@ struct redux_novec_unroller<Func, Derived, Start, 1>
  typedef typename Derived::Scalar Scalar;
  EIGEN_DEVICE_FUNC
  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
  {
    return mat.coeffByOuterInner(outer, inner);
@ -112,6 +128,7 @@ template<typename Func, typename Derived, int Start>
 struct redux_novec_unroller<Func, Derived, Start, 0>
 {
  typedef typename Derived::Scalar Scalar;
  EIGEN_DEVICE_FUNC 
  static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
 };
@ -121,12 +138,12 @@ template<typename Func, typename Derived, int Start, int Length>
 struct redux_vec_unroller
 {
  enum {
-    PacketSize = packet_traits<typename Derived::Scalar>::size,
+    PacketSize = redux_traits<Func, Derived>::PacketSize,
    HalfLength = Length/2
  };
  typedef typename Derived::Scalar Scalar;
-  typedef typename packet_traits<Scalar>::type PacketScalar;
+  typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
  static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
  {
@ -140,18 +157,18 @@ template<typename Func, typename Derived, int Start>
 struct redux_vec_unroller<Func, Derived, Start, 1>
 {
  enum {
-    index = Start * packet_traits<typename Derived::Scalar>::size,
+    index = Start * redux_traits<Func, Derived>::PacketSize,
    outer = index / int(Derived::InnerSizeAtCompileTime),
    inner = index % int(Derived::InnerSizeAtCompileTime),
-    alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
+    alignment = Derived::Alignment
  };
  typedef typename Derived::Scalar Scalar;
-  typedef typename packet_traits<Scalar>::type PacketScalar;
+  typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
  static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
  {
-    return mat.template packetByOuterInner<alignment>(outer, inner);
+    return mat.template packetByOuterInner<alignment,PacketScalar>(outer, inner);
  }
 };
@ -169,8 +186,8 @@ template<typename Func, typename Derived>
 struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
 {
  typedef typename Derived::Scalar Scalar;
-  typedef typename Derived::Index Index;
+  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
+  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
  {
    eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
    Scalar res;
@ -193,19 +210,19 @@ template<typename Func, typename Derived>
 struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
 {
  typedef typename Derived::Scalar Scalar;
-  typedef typename packet_traits<Scalar>::type PacketScalar;
+  typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
  typedef typename Derived::Index Index;
-  static Scalar run(const Derived& mat, const Func& func)
+  static Scalar run(const Derived &mat, const Func& func)
  {
    const Index size = mat.size();
-    eigen_assert(size && "you are using an empty matrix");
+    
-    const Index packetSize = packet_traits<Scalar>::size;
+    const Index packetSize = redux_traits<Func, Derived>::PacketSize;
-    const Index alignedStart = internal::first_aligned(mat);
+    const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
    enum {
-      alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit)
+      alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
-                ? Aligned : Unaligned
+      alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment)
    };
    const Index alignedStart = internal::first_default_aligned(mat.nestedExpression());
    const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
    const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
    const Index alignedEnd2 = alignedStart + alignedSize2;
@ -213,19 +230,19 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
    Scalar res;
    if(alignedSize)
    {
-      PacketScalar packet_res0 = mat.template packet<alignment>(alignedStart);
+      PacketScalar packet_res0 = mat.template packet<alignment,PacketScalar>(alignedStart);
      if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
      {
-        PacketScalar packet_res1 = mat.template packet<alignment>(alignedStart+packetSize);
+        PacketScalar packet_res1 = mat.template packet<alignment,PacketScalar>(alignedStart+packetSize);
        for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
        {
-          packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(index));
+          packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(index));
-          packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment>(index+packetSize));
+          packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment,PacketScalar>(index+packetSize));
        }
        packet_res0 = func.packetOp(packet_res0,packet_res1);
        if(alignedEnd>alignedEnd2)
-          packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(alignedEnd2));
+          packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(alignedEnd2));
      }
      res = func.predux(packet_res0);
@ -247,29 +264,29 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
  }
 };
-template<typename Func, typename Derived>
+// NOTE: for SliceVectorizedTraversal we simply bypass unrolling
-struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
+template<typename Func, typename Derived, int Unrolling>
 struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling>
 {
  typedef typename Derived::Scalar Scalar;
-  typedef typename packet_traits<Scalar>::type PacketScalar;
+  typedef typename redux_traits<Func, Derived>::PacketType PacketType;
  typedef typename Derived::Index Index;
-  static Scalar run(const Derived& mat, const Func& func)
+  EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func)
  {
    eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
    const Index innerSize = mat.innerSize();
    const Index outerSize = mat.outerSize();
    enum {
-      packetSize = packet_traits<Scalar>::size
+      packetSize = redux_traits<Func, Derived>::PacketSize
    };
    const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
    Scalar res;
    if(packetedInnerSize)
    {
-      PacketScalar packet_res = mat.template packet<Unaligned>(0,0);
+      PacketType packet_res = mat.template packet<Unaligned,PacketType>(0,0);
      for(Index j=0; j<outerSize; ++j)
        for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
-          packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned>(j,i));
+          packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned,PacketType>(j,i));
      res = func.predux(packet_res);
      for(Index j=0; j<outerSize; ++j)
@ -290,22 +307,90 @@ template<typename Func, typename Derived>
 struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
 {
  typedef typename Derived::Scalar Scalar;
-  typedef typename packet_traits<Scalar>::type PacketScalar;
+
  typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
  enum {
-    PacketSize = packet_traits<Scalar>::size,
+    PacketSize = redux_traits<Func, Derived>::PacketSize,
    Size = Derived::SizeAtCompileTime,
    VectorizedSize = (Size / PacketSize) * PacketSize
  };
-  static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
  {
    eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
-    Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
+    if (VectorizedSize > 0) {
-    if (VectorizedSize != Size)
+      Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
-      res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
+      if (VectorizedSize != Size)
-    return res;
+        res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
      return res;
    }
    else {
      return redux_novec_unroller<Func, Derived, 0, Size>::run(mat,func);
    }
  }
 };
 // evaluator adaptor
 template<typename _XprType>
 class redux_evaluator
 {
 public:
  typedef _XprType XprType;
  EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}
  typedef typename XprType::Scalar Scalar;
  typedef typename XprType::CoeffReturnType CoeffReturnType;
  typedef typename XprType::PacketScalar PacketScalar;
  typedef typename XprType::PacketReturnType PacketReturnType;
  enum {
    MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
    MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
    // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator
    Flags = evaluator<XprType>::Flags & ~DirectAccessBit,
    IsRowMajor = XprType::IsRowMajor,
    SizeAtCompileTime = XprType::SizeAtCompileTime,
    InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime,
    CoeffReadCost = evaluator<XprType>::CoeffReadCost,
    Alignment = evaluator<XprType>::Alignment
  };
  EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
  EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
  EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); }
  EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); }
  EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); }
  EIGEN_DEVICE_FUNC
  CoeffReturnType coeff(Index row, Index col) const
  { return m_evaluator.coeff(row, col); }
  EIGEN_DEVICE_FUNC
  CoeffReturnType coeff(Index index) const
  { return m_evaluator.coeff(index); }
  template<int LoadMode, typename PacketType>
  PacketType packet(Index row, Index col) const
  { return m_evaluator.template packet<LoadMode,PacketType>(row, col); }
  template<int LoadMode, typename PacketType>
  PacketType packet(Index index) const
  { return m_evaluator.template packet<LoadMode,PacketType>(index); }
  EIGEN_DEVICE_FUNC
  CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
  { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
  template<int LoadMode, typename PacketType>
  PacketType packetByOuterInner(Index outer, Index inner) const
  { return m_evaluator.template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
  const XprType & nestedExpression() const { return m_xpr; }
 protected:
  internal::evaluator<XprType> m_evaluator;
  const XprType &m_xpr;
 };
 } // end namespace internal
 /***************************************************************************
@ -316,18 +401,21 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
 /** \returns the result of a full redux operation on the whole matrix or vector using \a func
  *
  * The template parameter \a BinaryOp is the type of the functor \a func which must be
-  * an associative operator. Both current STL and TR1 functor styles are handled.
+  * an associative operator. Both current C++98 and C++11 functor styles are handled.
  *
  * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise()
  */
 template<typename Derived>
 template<typename Func>
-EIGEN_STRONG_INLINE typename internal::result_of<Func(typename internal::traits<Derived>::Scalar)>::type
+typename internal::traits<Derived>::Scalar
 DenseBase<Derived>::redux(const Func& func) const
 {
-  typedef typename internal::remove_all<typename Derived::Nested>::type ThisNested;
+  eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
-  return internal::redux_impl<Func, ThisNested>
+
-            ::run(derived(), func);
+  typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
  ThisEvaluator thisEval(derived());
  return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func);
 }
 /** \returns the minimum of all coefficients of \c *this.
@ -337,7 +425,7 @@ template<typename Derived>
 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
 DenseBase<Derived>::minCoeff() const
 {
-  return this->redux(Eigen::internal::scalar_min_op<Scalar>());
+  return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar>());
 }
 /** \returns the maximum of all coefficients of \c *this.
@ -347,10 +435,12 @@ template<typename Derived>
 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
 DenseBase<Derived>::maxCoeff() const
 {
-  return this->redux(Eigen::internal::scalar_max_op<Scalar>());
+  return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar>());
 }
-/** \returns the sum of all coefficients of *this
+/** \returns the sum of all coefficients of \c *this
  *
  * If \c *this is empty, then the value 0 is returned.
  *
  * \sa trace(), prod(), mean()
  */
@ -360,7 +450,7 @@ DenseBase<Derived>::sum() const
 {
  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
    return Scalar(0);
-  return this->redux(Eigen::internal::scalar_sum_op<Scalar>());
+  return derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>());
 }
 /** \returns the mean of all coefficients of *this
@ -371,7 +461,14 @@ template<typename Derived>
 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
 DenseBase<Derived>::mean() const
 {
-  return Scalar(this->redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size());
+#ifdef __INTEL_COMPILER
  #pragma warning push
  #pragma warning ( disable : 2259 )
 #endif
  return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>())) / Scalar(this->size());
 #ifdef __INTEL_COMPILER
  #pragma warning pop
 #endif
 }
 /** \returns the product of all coefficients of *this
@ -387,7 +484,7 @@ DenseBase<Derived>::prod() const
 {
  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
    return Scalar(1);
-  return this->redux(Eigen::internal::scalar_product_op<Scalar>());
+  return derived().redux(Eigen::internal::scalar_product_op<Scalar>());
 }
 /** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
--- a/eigenlib/Eigen/src/Core/Ref.h
+++ b/eigenlib/Eigen/src/Core/Ref.h
@ -12,79 +12,6 @@
 namespace Eigen { 
 template<typename Derived> class RefBase;
 template<typename PlainObjectType, int Options = 0,
         typename StrideType = typename internal::conditional<PlainObjectType::IsVectorAtCompileTime,InnerStride<1>,OuterStride<> >::type > class Ref;
 /** \class Ref
  * \ingroup Core_Module
  *
  * \brief A matrix or vector expression mapping an existing expressions
  *
  * \tparam PlainObjectType the equivalent matrix type of the mapped data
  * \tparam Options specifies whether the pointer is \c #Aligned, or \c #Unaligned.
  *                The default is \c #Unaligned.
  * \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1),
  *                   but accept a variable outer stride (leading dimension).
  *                   This can be overridden by specifying strides.
  *                   The type passed here must be a specialization of the Stride template, see examples below.
  *
  * This class permits to write non template functions taking Eigen's object as parameters while limiting the number of copies.
  * A Ref<> object can represent either a const expression or a l-value:
  * \code
  * // in-out argument:
  * void foo1(Ref<VectorXf> x);
  *
  * // read-only const argument:
  * void foo2(const Ref<const VectorXf>& x);
  * \endcode
  *
  * In the in-out case, the input argument must satisfies the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered.
  * By default, a Ref<VectorXf> can reference any dense vector expression of float having a contiguous memory layout.
  * Likewise, a Ref<MatrixXf> can reference any column major dense matrix expression of float whose column's elements are contiguously stored with
  * the possibility to have a constant space inbetween each column, i.e.: the inner stride mmust be equal to 1, but the outer-stride (or leading dimension),
  * can be greater than the number of rows.
  *
  * In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function.
  * Here are some examples:
  * \code
  * MatrixXf A;
  * VectorXf a;
  * foo1(a.head());             // OK
  * foo1(A.col());              // OK
  * foo1(A.row());              // compilation error because here innerstride!=1
  * foo2(A.row());              // The row is copied into a contiguous temporary
  * foo2(2*a);                  // The expression is evaluated into a temporary
  * foo2(A.col().segment(2,4)); // No temporary
  * \endcode
  *
  * The range of inputs that can be referenced without temporary can be enlarged using the last two template parameter.
  * Here is an example accepting an innerstride!=1:
  * \code
  * // in-out argument:
  * void foo3(Ref<VectorXf,0,InnerStride<> > x);
  * foo3(A.row());              // OK
  * \endcode
  * The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involved more
  * expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overloads internally calling a
  * template function, e.g.:
  * \code
  * // in the .h:
  * void foo(const Ref<MatrixXf>& A);
  * void foo(const Ref<MatrixXf,0,Stride<> >& A);
  *
  * // in the .cpp:
  * template<typename TypeOfA> void foo_impl(const TypeOfA& A) {
  *     ... // crazy code goes here
  * }
  * void foo(const Ref<MatrixXf>& A) { foo_impl(A); }
  * void foo(const Ref<MatrixXf,0,Stride<> >& A) { foo_impl(A); }
  * \endcode
  *
  *
  * \sa PlainObjectBase::Map(), \ref TopicStorageOrders
  */
 namespace internal {
 template<typename _PlainObjectType, int _Options, typename _StrideType>
@ -95,7 +22,8 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
  typedef _StrideType StrideType;
  enum {
    Options = _Options,
-    Flags = traits<Map<_PlainObjectType, _Options, _StrideType> >::Flags | NestByRefBit
+    Flags = traits<Map<_PlainObjectType, _Options, _StrideType> >::Flags | NestByRefBit,
    Alignment = traits<Map<_PlainObjectType, _Options, _StrideType> >::Alignment
  };
  template<typename Derived> struct match {
@ -107,7 +35,13 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
                      || (int(StrideType::InnerStrideAtCompileTime)==0 && int(Derived::InnerStrideAtCompileTime)==1),
      OuterStrideMatch = Derived::IsVectorAtCompileTime
                      || int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime),
-      AlignmentMatch = (_Options!=Aligned) || ((PlainObjectType::Flags&AlignedBit)==0) || ((traits<Derived>::Flags&AlignedBit)==AlignedBit),
+      // NOTE, this indirection of evaluator<Derived>::Alignment is needed
      // to workaround a very strange bug in MSVC related to the instantiation
      // of has_*ary_operator in evaluator<CwiseNullaryOp>.
      // This line is surprisingly very sensitive. For instance, simply adding parenthesis
      // as "DerivedAlignment = (int(evaluator<Derived>::Alignment))," will make MSVC fail...
      DerivedAlignment = int(evaluator<Derived>::Alignment),
      AlignmentMatch = (int(traits<PlainObjectType>::Alignment)==int(Unaligned)) || (DerivedAlignment >= int(Alignment)), // FIXME the first condition is not very clear, it should be replaced by the required alignment
      ScalarTypeMatch = internal::is_same<typename PlainObjectType::Scalar, typename Derived::Scalar>::value,
      MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch
    };
@ -132,12 +66,12 @@ public:
  typedef MapBase<Derived> Base;
  EIGEN_DENSE_PUBLIC_INTERFACE(RefBase)
-  inline Index innerStride() const
+  EIGEN_DEVICE_FUNC inline Index innerStride() const
  {
    return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;
  }
-  inline Index outerStride() const
+  EIGEN_DEVICE_FUNC inline Index outerStride() const
  {
    return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
         : IsVectorAtCompileTime ? this->size()
@ -145,7 +79,7 @@ public:
         : this->rows();
  }
-  RefBase()
+  EIGEN_DEVICE_FUNC RefBase()
    : Base(0,RowsAtCompileTime==Dynamic?0:RowsAtCompileTime,ColsAtCompileTime==Dynamic?0:ColsAtCompileTime),
      // Stride<> does not allow default ctor for Dynamic strides, so let' initialize it with dummy values:
      m_stride(StrideType::OuterStrideAtCompileTime==Dynamic?0:StrideType::OuterStrideAtCompileTime,
@ -159,7 +93,7 @@ protected:
  typedef Stride<StrideType::OuterStrideAtCompileTime,StrideType::InnerStrideAtCompileTime> StrideBase;
  template<typename Expression>
-  void construct(Expression& expr)
+  EIGEN_DEVICE_FUNC void construct(Expression& expr)
  {
    if(PlainObjectType::RowsAtCompileTime==1)
    {
@ -184,15 +118,83 @@ protected:
  StrideBase m_stride;
 };
-
+/** \class Ref
  * \ingroup Core_Module
  *
  * \brief A matrix or vector expression mapping an existing expression
  *
  * \tparam PlainObjectType the equivalent matrix type of the mapped data
  * \tparam Options specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
  *                 The default is \c #Unaligned.
  * \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1),
  *                   but accepts a variable outer stride (leading dimension).
  *                   This can be overridden by specifying strides.
  *                   The type passed here must be a specialization of the Stride template, see examples below.
  *
  * This class provides a way to write non-template functions taking Eigen objects as parameters while limiting the number of copies.
  * A Ref<> object can represent either a const expression or a l-value:
  * \code
  * // in-out argument:
  * void foo1(Ref<VectorXf> x);
  *
  * // read-only const argument:
  * void foo2(const Ref<const VectorXf>& x);
  * \endcode
  *
  * In the in-out case, the input argument must satisfy the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered.
  * By default, a Ref<VectorXf> can reference any dense vector expression of float having a contiguous memory layout.
  * Likewise, a Ref<MatrixXf> can reference any column-major dense matrix expression of float whose column's elements are contiguously stored with
  * the possibility to have a constant space in-between each column, i.e. the inner stride must be equal to 1, but the outer stride (or leading dimension)
  * can be greater than the number of rows.
  *
  * In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function.
  * Here are some examples:
  * \code
  * MatrixXf A;
  * VectorXf a;
  * foo1(a.head());             // OK
  * foo1(A.col());              // OK
  * foo1(A.row());              // Compilation error because here innerstride!=1
  * foo2(A.row());              // Compilation error because A.row() is a 1xN object while foo2 is expecting a Nx1 object
  * foo2(A.row().transpose());  // The row is copied into a contiguous temporary
  * foo2(2*a);                  // The expression is evaluated into a temporary
  * foo2(A.col().segment(2,4)); // No temporary
  * \endcode
  *
  * The range of inputs that can be referenced without temporary can be enlarged using the last two template parameters.
  * Here is an example accepting an innerstride!=1:
  * \code
  * // in-out argument:
  * void foo3(Ref<VectorXf,0,InnerStride<> > x);
  * foo3(A.row());              // OK
  * \endcode
  * The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involve more
  * expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overload internally calling a
  * template function, e.g.:
  * \code
  * // in the .h:
  * void foo(const Ref<MatrixXf>& A);
  * void foo(const Ref<MatrixXf,0,Stride<> >& A);
  *
  * // in the .cpp:
  * template<typename TypeOfA> void foo_impl(const TypeOfA& A) {
  *     ... // crazy code goes here
  * }
  * void foo(const Ref<MatrixXf>& A) { foo_impl(A); }
  * void foo(const Ref<MatrixXf,0,Stride<> >& A) { foo_impl(A); }
  * \endcode
  *
  *
  * \sa PlainObjectBase::Map(), \ref TopicStorageOrders
  */
 template<typename PlainObjectType, int Options, typename StrideType> class Ref
  : public RefBase<Ref<PlainObjectType, Options, StrideType> >
 {
  private:
    typedef internal::traits<Ref> Traits;
    template<typename Derived>
-    inline Ref(const PlainObjectBase<Derived>& expr,
+    EIGEN_DEVICE_FUNC inline Ref(const PlainObjectBase<Derived>& expr,
-               typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0);
+                                 typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0);
  public:
    typedef RefBase<Ref> Base;
@ -201,23 +203,24 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref
    #ifndef EIGEN_PARSED_BY_DOXYGEN
    template<typename Derived>
-    inline Ref(PlainObjectBase<Derived>& expr,
+    EIGEN_DEVICE_FUNC inline Ref(PlainObjectBase<Derived>& expr,
-               typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
+                                 typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
    {
-      EIGEN_STATIC_ASSERT(static_cast<bool>(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
+      EIGEN_STATIC_ASSERT(bool(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
      Base::construct(expr.derived());
    }
    template<typename Derived>
-    inline Ref(const DenseBase<Derived>& expr,
+    EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
-               typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
+                                 typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
    #else
    /** Implicit constructor from any dense expression */
    template<typename Derived>
    inline Ref(DenseBase<Derived>& expr)
    #endif
    {
-      EIGEN_STATIC_ASSERT(static_cast<bool>(internal::is_lvalue<Derived>::value), THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);
+      EIGEN_STATIC_ASSERT(bool(internal::is_lvalue<Derived>::value), THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);
-      EIGEN_STATIC_ASSERT(static_cast<bool>(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
+      EIGEN_STATIC_ASSERT(bool(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
-      enum { THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY = Derived::ThisConstantIsPrivateInPlainObjectBase};
+      EIGEN_STATIC_ASSERT(!Derived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);
      Base::construct(expr.const_cast_derived());
    }
@ -236,8 +239,8 @@ template<typename TPlainObjectType, int Options, typename StrideType> class Ref<
    EIGEN_DENSE_PUBLIC_INTERFACE(Ref)
    template<typename Derived>
-    inline Ref(const DenseBase<Derived>& expr,
+    EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
-               typename internal::enable_if<bool(Traits::template match<Derived>::ScalarTypeMatch),Derived>::type* = 0)
+                                 typename internal::enable_if<bool(Traits::template match<Derived>::ScalarTypeMatch),Derived>::type* = 0)
    {
 //      std::cout << match_helper<Derived>::HasDirectAccess << "," << match_helper<Derived>::OuterStrideMatch << "," << match_helper<Derived>::InnerStrideMatch << "\n";
 //      std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n";
@ -245,18 +248,27 @@ template<typename TPlainObjectType, int Options, typename StrideType> class Ref<
      construct(expr.derived(), typename Traits::template match<Derived>::type());
    }
    EIGEN_DEVICE_FUNC inline Ref(const Ref& other) : Base(other) {
      // copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy
    }
    template<typename OtherRef>
    EIGEN_DEVICE_FUNC inline Ref(const RefBase<OtherRef>& other) {
      construct(other.derived(), typename Traits::template match<OtherRef>::type());
    }
  protected:
    template<typename Expression>
-    void construct(const Expression& expr,internal::true_type)
+    EIGEN_DEVICE_FUNC void construct(const Expression& expr,internal::true_type)
    {
      Base::construct(expr);
    }
    template<typename Expression>
-    void construct(const Expression& expr, internal::false_type)
+    EIGEN_DEVICE_FUNC void construct(const Expression& expr, internal::false_type)
    {
-      m_object.lazyAssign(expr);
+      internal::call_assignment_no_alias(m_object,expr,internal::assign_op<Scalar,Scalar>());
      Base::construct(m_object);
    }
--- a/eigenlib/Eigen/src/Core/Replicate.h
+++ b/eigenlib/Eigen/src/Core/Replicate.h
@ -12,21 +12,6 @@
 namespace Eigen { 
 /**
  * \class Replicate
  * \ingroup Core_Module
  *
  * \brief Expression of the multiple replication of a matrix or vector
  *
  * \param MatrixType the type of the object we are replicating
  *
  * This class represents an expression of the multiple replication of a matrix or vector.
  * It is the return type of DenseBase::replicate() and most of the time
  * this is the only way it is used.
  *
  * \sa DenseBase::replicate()
  */
 namespace internal {
 template<typename MatrixType,int RowFactor,int ColFactor>
 struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
@ -35,10 +20,7 @@ struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
  typedef typename MatrixType::Scalar Scalar;
  typedef typename traits<MatrixType>::StorageKind StorageKind;
  typedef typename traits<MatrixType>::XprKind XprKind;
-  enum {
+  typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
    Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor
  };
  typedef typename nested<MatrixType,Factor>::type MatrixTypeNested;
  typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
  enum {
    RowsAtCompileTime = RowFactor==Dynamic || int(MatrixType::RowsAtCompileTime)==Dynamic
@ -53,12 +35,29 @@ struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
    IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1
               : MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0
               : (MatrixType::Flags & RowMajorBit) ? 1 : 0,
-    Flags = (_MatrixTypeNested::Flags & HereditaryBits & ~RowMajorBit) | (IsRowMajor ? RowMajorBit : 0),
+    
-    CoeffReadCost = _MatrixTypeNested::CoeffReadCost
+    // FIXME enable DirectAccess with negative strides?
    Flags = IsRowMajor ? RowMajorBit : 0
  };
 };
 }
 /**
  * \class Replicate
  * \ingroup Core_Module
  *
  * \brief Expression of the multiple replication of a matrix or vector
  *
  * \tparam MatrixType the type of the object we are replicating
  * \tparam RowFactor number of repetitions at compile time along the vertical direction, can be Dynamic.
  * \tparam ColFactor number of repetitions at compile time along the horizontal direction, can be Dynamic.
  *
  * This class represents an expression of the multiple replication of a matrix or vector.
  * It is the return type of DenseBase::replicate() and most of the time
  * this is the only way it is used.
  *
  * \sa DenseBase::replicate()
  */
 template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
  : public internal::dense_xpr_base< Replicate<MatrixType,RowFactor,ColFactor> >::type
 {
@ -68,10 +67,12 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
    typedef typename internal::dense_xpr_base<Replicate>::type Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(Replicate)
    typedef typename internal::remove_all<MatrixType>::type NestedExpression;
    template<typename OriginalMatrixType>
-    inline explicit Replicate(const OriginalMatrixType& a_matrix)
+    EIGEN_DEVICE_FUNC
-      : m_matrix(a_matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor)
+    inline explicit Replicate(const OriginalMatrixType& matrix)
      : m_matrix(matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor)
    {
      EIGEN_STATIC_ASSERT((internal::is_same<typename internal::remove_const<MatrixType>::type,OriginalMatrixType>::value),
                          THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
@ -79,41 +80,20 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
    }
    template<typename OriginalMatrixType>
-    inline Replicate(const OriginalMatrixType& a_matrix, Index rowFactor, Index colFactor)
+    EIGEN_DEVICE_FUNC
-      : m_matrix(a_matrix), m_rowFactor(rowFactor), m_colFactor(colFactor)
+    inline Replicate(const OriginalMatrixType& matrix, Index rowFactor, Index colFactor)
      : m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor)
    {
      EIGEN_STATIC_ASSERT((internal::is_same<typename internal::remove_const<MatrixType>::type,OriginalMatrixType>::value),
                          THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
    }
    EIGEN_DEVICE_FUNC
    inline Index rows() const { return m_matrix.rows() * m_rowFactor.value(); }
    EIGEN_DEVICE_FUNC
    inline Index cols() const { return m_matrix.cols() * m_colFactor.value(); }
-    inline Scalar coeff(Index rowId, Index colId) const
+    EIGEN_DEVICE_FUNC
    {
      // try to avoid using modulo; this is a pure optimization strategy
      const Index actual_row  = internal::traits<MatrixType>::RowsAtCompileTime==1 ? 0
                            : RowFactor==1 ? rowId
                            : rowId%m_matrix.rows();
      const Index actual_col  = internal::traits<MatrixType>::ColsAtCompileTime==1 ? 0
                            : ColFactor==1 ? colId
                            : colId%m_matrix.cols();
      return m_matrix.coeff(actual_row, actual_col);
    }
    template<int LoadMode>
    inline PacketScalar packet(Index rowId, Index colId) const
    {
      const Index actual_row  = internal::traits<MatrixType>::RowsAtCompileTime==1 ? 0
                            : RowFactor==1 ? rowId
                            : rowId%m_matrix.rows();
      const Index actual_col  = internal::traits<MatrixType>::ColsAtCompileTime==1 ? 0
                            : ColFactor==1 ? colId
                            : colId%m_matrix.cols();
      return m_matrix.template packet<LoadMode>(actual_row, actual_col);
    }
    const _MatrixTypeNested& nestedExpression() const
    { 
      return m_matrix; 
@ -141,21 +121,6 @@ DenseBase<Derived>::replicate() const
  return Replicate<Derived,RowFactor,ColFactor>(derived());
 }
 /**
  * \return an expression of the replication of \c *this
  *
  * Example: \include MatrixBase_replicate_int_int.cpp
  * Output: \verbinclude MatrixBase_replicate_int_int.out
  *
  * \sa VectorwiseOp::replicate(), DenseBase::replicate<int,int>(), class Replicate
  */
 template<typename Derived>
 const typename DenseBase<Derived>::ReplicateReturnType
 DenseBase<Derived>::replicate(Index rowFactor,Index colFactor) const
 {
  return Replicate<Derived,Dynamic,Dynamic>(derived(),rowFactor,colFactor);
 }
 /**
  * \return an expression of the replication of each column (or row) of \c *this
  *
--- a/eigenlib/Eigen/src/Core/ReturnByValue.h
+++ b/eigenlib/Eigen/src/Core/ReturnByValue.h
@ -13,11 +13,6 @@
 namespace Eigen {
 /** \class ReturnByValue
  * \ingroup Core_Module
  *
  */
 namespace internal {
 template<typename Derived>
@ -38,17 +33,22 @@ struct traits<ReturnByValue<Derived> >
 * So internal::nested always gives the plain return matrix type.
 *
 * FIXME: I don't understand why we need this specialization: isn't this taken care of by the EvalBeforeNestingBit ??
 * Answer: EvalBeforeNestingBit should be deprecated since we have the evaluators
 */
 template<typename Derived,int n,typename PlainObject>
-struct nested<ReturnByValue<Derived>, n, PlainObject>
+struct nested_eval<ReturnByValue<Derived>, n, PlainObject>
 {
  typedef typename traits<Derived>::ReturnType type;
 };
 } // end namespace internal
 /** \class ReturnByValue
  * \ingroup Core_Module
  *
  */
 template<typename Derived> class ReturnByValue
-  : internal::no_assignment_operator, public internal::dense_xpr_base< ReturnByValue<Derived> >::type
+  : public internal::dense_xpr_base< ReturnByValue<Derived> >::type, internal::no_assignment_operator
 {
  public:
    typedef typename internal::traits<Derived>::ReturnType ReturnType;
@ -57,10 +57,11 @@ template<typename Derived> class ReturnByValue
    EIGEN_DENSE_PUBLIC_INTERFACE(ReturnByValue)
    template<typename Dest>
    EIGEN_DEVICE_FUNC
    inline void evalTo(Dest& dst) const
    { static_cast<const Derived*>(this)->evalTo(dst); }
-    inline Index rows() const { return static_cast<const Derived*>(this)->rows(); }
+    EIGEN_DEVICE_FUNC inline Index rows() const { return static_cast<const Derived*>(this)->rows(); }
-    inline Index cols() const { return static_cast<const Derived*>(this)->cols(); }
+    EIGEN_DEVICE_FUNC inline Index cols() const { return static_cast<const Derived*>(this)->cols(); }
 #ifndef EIGEN_PARSED_BY_DOXYGEN
 #define Unusable YOU_ARE_TRYING_TO_ACCESS_A_SINGLE_COEFFICIENT_IN_A_SPECIAL_EXPRESSION_WHERE_THAT_IS_NOT_ALLOWED_BECAUSE_THAT_WOULD_BE_INEFFICIENT
@ -72,8 +73,7 @@ template<typename Derived> class ReturnByValue
    const Unusable& coeff(Index,Index) const { return *reinterpret_cast<const Unusable*>(this); }
    Unusable& coeffRef(Index) { return *reinterpret_cast<Unusable*>(this); }
    Unusable& coeffRef(Index,Index) { return *reinterpret_cast<Unusable*>(this); }
-    template<int LoadMode>  Unusable& packet(Index) const;
+#undef Unusable
    template<int LoadMode>  Unusable& packet(Index, Index) const;
 #endif
 };
@ -85,14 +85,32 @@ Derived& DenseBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
  return derived();
 }
-template<typename Derived>
+namespace internal {
 template<typename OtherDerived>
 Derived& DenseBase<Derived>::lazyAssign(const ReturnByValue<OtherDerived>& other)
 {
  other.evalTo(derived());
  return derived();
 }
 // Expression is evaluated in a temporary; default implementation of Assignment is bypassed so that
 // when a ReturnByValue expression is assigned, the evaluator is not constructed.
 // TODO: Finalize port to new regime; ReturnByValue should not exist in the expression world
 template<typename Derived>
 struct evaluator<ReturnByValue<Derived> >
  : public evaluator<typename internal::traits<Derived>::ReturnType>
 {
  typedef ReturnByValue<Derived> XprType;
  typedef typename internal::traits<Derived>::ReturnType PlainObject;
  typedef evaluator<PlainObject> Base;
  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
    : m_result(xpr.rows(), xpr.cols())
  {
    ::new (static_cast<Base*>(this)) Base(m_result);
    xpr.evalTo(m_result);
  }
 protected:
  PlainObject m_result;
 };
 } // end namespace internal
 } // end namespace Eigen
--- a/eigenlib/Eigen/src/Core/Reverse.h
+++ b/eigenlib/Eigen/src/Core/Reverse.h
@ -14,20 +14,6 @@
 namespace Eigen { 
 /** \class Reverse
  * \ingroup Core_Module
  *
  * \brief Expression of the reverse of a vector or matrix
  *
  * \param MatrixType the type of the object of which we are taking the reverse
  *
  * This class represents an expression of the reverse of a vector.
  * It is the return type of MatrixBase::reverse() and VectorwiseOp::reverse()
  * and most of the time this is the only way it is used.
  *
  * \sa MatrixBase::reverse(), VectorwiseOp::reverse()
  */
 namespace internal {
 template<typename MatrixType, int Direction>
@ -37,36 +23,43 @@ struct traits<Reverse<MatrixType, Direction> >
  typedef typename MatrixType::Scalar Scalar;
  typedef typename traits<MatrixType>::StorageKind StorageKind;
  typedef typename traits<MatrixType>::XprKind XprKind;
-  typedef typename nested<MatrixType>::type MatrixTypeNested;
+  typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
  typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
  enum {
    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
    ColsAtCompileTime = MatrixType::ColsAtCompileTime,
    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
-
+    Flags = _MatrixTypeNested::Flags & (RowMajorBit | LvalueBit)
    // let's enable LinearAccess only with vectorization because of the product overhead
    LinearAccess = ( (Direction==BothDirections) && (int(_MatrixTypeNested::Flags)&PacketAccessBit) )
                 ? LinearAccessBit : 0,
    Flags = int(_MatrixTypeNested::Flags) & (HereditaryBits | LvalueBit | PacketAccessBit | LinearAccess),
    CoeffReadCost = _MatrixTypeNested::CoeffReadCost
  };
 };
-template<typename PacketScalar, bool ReversePacket> struct reverse_packet_cond
+template<typename PacketType, bool ReversePacket> struct reverse_packet_cond
 {
-  static inline PacketScalar run(const PacketScalar& x) { return preverse(x); }
+  static inline PacketType run(const PacketType& x) { return preverse(x); }
 };
-template<typename PacketScalar> struct reverse_packet_cond<PacketScalar,false>
+template<typename PacketType> struct reverse_packet_cond<PacketType,false>
 {
-  static inline PacketScalar run(const PacketScalar& x) { return x; }
+  static inline PacketType run(const PacketType& x) { return x; }
 };
 } // end namespace internal 
 /** \class Reverse
  * \ingroup Core_Module
  *
  * \brief Expression of the reverse of a vector or matrix
  *
  * \tparam MatrixType the type of the object of which we are taking the reverse
  * \tparam Direction defines the direction of the reverse operation, can be Vertical, Horizontal, or BothDirections
  *
  * This class represents an expression of the reverse of a vector.
  * It is the return type of MatrixBase::reverse() and VectorwiseOp::reverse()
  * and most of the time this is the only way it is used.
  *
  * \sa MatrixBase::reverse(), VectorwiseOp::reverse()
  */
 template<typename MatrixType, int Direction> class Reverse
  : public internal::dense_xpr_base< Reverse<MatrixType, Direction> >::type
 {
@ -74,12 +67,9 @@ template<typename MatrixType, int Direction> class Reverse
    typedef typename internal::dense_xpr_base<Reverse>::type Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(Reverse)
    typedef typename internal::remove_all<MatrixType>::type NestedExpression;
    using Base::IsRowMajor;
    // next line is necessary because otherwise const version of operator()
    // is hidden by non-const version defined in this file
    using Base::operator(); 
  protected:
    enum {
      PacketSize = internal::packet_traits<Scalar>::size,
@ -95,82 +85,19 @@ template<typename MatrixType, int Direction> class Reverse
    typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet;
  public:
-    inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { }
+    EIGEN_DEVICE_FUNC explicit inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { }
    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reverse)
-    inline Index rows() const { return m_matrix.rows(); }
+    EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); }
-    inline Index cols() const { return m_matrix.cols(); }
+    EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); }
-    inline Index innerStride() const
+    EIGEN_DEVICE_FUNC inline Index innerStride() const
    {
      return -m_matrix.innerStride();
    }
-    inline Scalar& operator()(Index row, Index col)
+    EIGEN_DEVICE_FUNC const typename internal::remove_all<typename MatrixType::Nested>::type&
    {
      eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
      return coeffRef(row, col);
    }
    inline Scalar& coeffRef(Index row, Index col)
    {
      return m_matrix.const_cast_derived().coeffRef(ReverseRow ? m_matrix.rows() - row - 1 : row,
                                                    ReverseCol ? m_matrix.cols() - col - 1 : col);
    }
    inline CoeffReturnType coeff(Index row, Index col) const
    {
      return m_matrix.coeff(ReverseRow ? m_matrix.rows() - row - 1 : row,
                            ReverseCol ? m_matrix.cols() - col - 1 : col);
    }
    inline CoeffReturnType coeff(Index index) const
    {
      return m_matrix.coeff(m_matrix.size() - index - 1);
    }
    inline Scalar& coeffRef(Index index)
    {
      return m_matrix.const_cast_derived().coeffRef(m_matrix.size() - index - 1);
    }
    inline Scalar& operator()(Index index)
    {
      eigen_assert(index >= 0 && index < m_matrix.size());
      return coeffRef(index);
    }
    template<int LoadMode>
    inline const PacketScalar packet(Index row, Index col) const
    {
      return reverse_packet::run(m_matrix.template packet<LoadMode>(
                                    ReverseRow ? m_matrix.rows() - row - OffsetRow : row,
                                    ReverseCol ? m_matrix.cols() - col - OffsetCol : col));
    }
    template<int LoadMode>
    inline void writePacket(Index row, Index col, const PacketScalar& x)
    {
      m_matrix.const_cast_derived().template writePacket<LoadMode>(
                                      ReverseRow ? m_matrix.rows() - row - OffsetRow : row,
                                      ReverseCol ? m_matrix.cols() - col - OffsetCol : col,
                                      reverse_packet::run(x));
    }
    template<int LoadMode>
    inline const PacketScalar packet(Index index) const
    {
      return internal::preverse(m_matrix.template packet<LoadMode>( m_matrix.size() - index - PacketSize ));
    }
    template<int LoadMode>
    inline void writePacket(Index index, const PacketScalar& x)
    {
      m_matrix.const_cast_derived().template writePacket<LoadMode>(m_matrix.size() - index - PacketSize, internal::preverse(x));
    }
    const typename internal::remove_all<typename MatrixType::Nested>::type& 
    nestedExpression() const 
    {
      return m_matrix;
@ -190,33 +117,93 @@ template<typename Derived>
 inline typename DenseBase<Derived>::ReverseReturnType
 DenseBase<Derived>::reverse()
 {
-  return derived();
+  return ReverseReturnType(derived());
 }
-/** This is the const version of reverse(). */
+
-template<typename Derived>
+//reverse const overload moved DenseBase.h due to a CUDA compiler bug
 inline const typename DenseBase<Derived>::ConstReverseReturnType
 DenseBase<Derived>::reverse() const
 {
  return derived();
 }
 /** This is the "in place" version of reverse: it reverses \c *this.
  *
  * In most cases it is probably better to simply use the reversed expression
  * of a matrix. However, when reversing the matrix data itself is really needed,
  * then this "in-place" version is probably the right choice because it provides
-  * the following additional features:
+  * the following additional benefits:
  *  - less error prone: doing the same operation with .reverse() requires special care:
  *    \code m = m.reverse().eval(); \endcode
-  *  - this API allows to avoid creating a temporary (the current implementation creates a temporary, but that could be avoided using swap)
+  *  - this API enables reverse operations without the need for a temporary
  *  - it allows future optimizations (cache friendliness, etc.)
  *
-  * \sa reverse() */
+  * \sa VectorwiseOp::reverseInPlace(), reverse() */
 template<typename Derived>
 inline void DenseBase<Derived>::reverseInPlace()
 {
-  derived() = derived().reverse().eval();
+  if(cols()>rows())
  {
    Index half = cols()/2;
    leftCols(half).swap(rightCols(half).reverse());
    if((cols()%2)==1)
    {
      Index half2 = rows()/2;
      col(half).head(half2).swap(col(half).tail(half2).reverse());
    }
  }
  else
  {
    Index half = rows()/2;
    topRows(half).swap(bottomRows(half).reverse());
    if((rows()%2)==1)
    {
      Index half2 = cols()/2;
      row(half).head(half2).swap(row(half).tail(half2).reverse());
    }
  }
 }
 namespace internal {
 template<int Direction>
 struct vectorwise_reverse_inplace_impl;
 template<>
 struct vectorwise_reverse_inplace_impl<Vertical>
 {
  template<typename ExpressionType>
  static void run(ExpressionType &xpr)
  {
    Index half = xpr.rows()/2;
    xpr.topRows(half).swap(xpr.bottomRows(half).colwise().reverse());
  }
 };
 template<>
 struct vectorwise_reverse_inplace_impl<Horizontal>
 {
  template<typename ExpressionType>
  static void run(ExpressionType &xpr)
  {
    Index half = xpr.cols()/2;
    xpr.leftCols(half).swap(xpr.rightCols(half).rowwise().reverse());
  }
 };
 } // end namespace internal
 /** This is the "in place" version of VectorwiseOp::reverse: it reverses each column or row of \c *this.
  *
  * In most cases it is probably better to simply use the reversed expression
  * of a matrix. However, when reversing the matrix data itself is really needed,
  * then this "in-place" version is probably the right choice because it provides
  * the following additional benefits:
  *  - less error prone: doing the same operation with .reverse() requires special care:
  *    \code m = m.reverse().eval(); \endcode
  *  - this API enables reverse operations without the need for a temporary
  *
  * \sa DenseBase::reverseInPlace(), reverse() */
 template<typename ExpressionType, int Direction>
 void VectorwiseOp<ExpressionType,Direction>::reverseInPlace()
 {
  internal::vectorwise_reverse_inplace_impl<Direction>::run(_expression().const_cast_derived());
 }
 } // end namespace Eigen
--- a/eigenlib/Eigen/src/Core/Select.h
+++ b/eigenlib/Eigen/src/Core/Select.h
@ -43,23 +43,21 @@ struct traits<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
    ColsAtCompileTime = ConditionMatrixType::ColsAtCompileTime,
    MaxRowsAtCompileTime = ConditionMatrixType::MaxRowsAtCompileTime,
    MaxColsAtCompileTime = ConditionMatrixType::MaxColsAtCompileTime,
-    Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & HereditaryBits,
+    Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & RowMajorBit
    CoeffReadCost = traits<typename remove_all<ConditionMatrixNested>::type>::CoeffReadCost
                  + EIGEN_SIZE_MAX(traits<typename remove_all<ThenMatrixNested>::type>::CoeffReadCost,
                                   traits<typename remove_all<ElseMatrixNested>::type>::CoeffReadCost)
  };
 };
 }
 template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
-class Select : internal::no_assignment_operator,
+class Select : public internal::dense_xpr_base< Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >::type,
-  public internal::dense_xpr_base< Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >::type
+               internal::no_assignment_operator
 {
  public:
    typedef typename internal::dense_xpr_base<Select>::type Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(Select)
    inline EIGEN_DEVICE_FUNC
    Select(const ConditionMatrixType& a_conditionMatrix,
           const ThenMatrixType& a_thenMatrix,
           const ElseMatrixType& a_elseMatrix)
@ -69,9 +67,10 @@ class Select : internal::no_assignment_operator,
      eigen_assert(m_condition.cols() == m_then.cols() && m_condition.cols() == m_else.cols());
    }
-    Index rows() const { return m_condition.rows(); }
+    inline EIGEN_DEVICE_FUNC Index rows() const { return m_condition.rows(); }
-    Index cols() const { return m_condition.cols(); }
+    inline EIGEN_DEVICE_FUNC Index cols() const { return m_condition.cols(); }
    inline EIGEN_DEVICE_FUNC
    const Scalar coeff(Index i, Index j) const
    {
      if (m_condition.coeff(i,j))
@ -80,6 +79,7 @@ class Select : internal::no_assignment_operator,
        return m_else.coeff(i,j);
    }
    inline EIGEN_DEVICE_FUNC
    const Scalar coeff(Index i) const
    {
      if (m_condition.coeff(i))
@ -88,17 +88,17 @@ class Select : internal::no_assignment_operator,
        return m_else.coeff(i);
    }
-    const ConditionMatrixType& conditionMatrix() const
+    inline EIGEN_DEVICE_FUNC const ConditionMatrixType& conditionMatrix() const
    {
      return m_condition;
    }
-    const ThenMatrixType& thenMatrix() const
+    inline EIGEN_DEVICE_FUNC const ThenMatrixType& thenMatrix() const
    {
      return m_then;
    }
-    const ElseMatrixType& elseMatrix() const
+    inline EIGEN_DEVICE_FUNC const ElseMatrixType& elseMatrix() const
    {
      return m_else;
    }
--- a/eigenlib/Eigen/src/Core/SelfAdjointView.h
+++ b/eigenlib/Eigen/src/Core/SelfAdjointView.h
@ -32,54 +32,60 @@ namespace internal {
 template<typename MatrixType, unsigned int UpLo>
 struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType>
 {
-  typedef typename nested<MatrixType>::type MatrixTypeNested;
+  typedef typename ref_selector<MatrixType>::non_const_type MatrixTypeNested;
  typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
  typedef MatrixType ExpressionType;
-  typedef typename MatrixType::PlainObject DenseMatrixType;
+  typedef typename MatrixType::PlainObject FullMatrixType;
  enum {
    Mode = UpLo | SelfAdjoint,
-    Flags =  MatrixTypeNestedCleaned::Flags & (HereditaryBits)
+    FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
-           & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit)), // FIXME these flags should be preserved
+    Flags =  MatrixTypeNestedCleaned::Flags & (HereditaryBits|FlagsLvalueBit)
-    CoeffReadCost = MatrixTypeNestedCleaned::CoeffReadCost
+           & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit)) // FIXME these flags should be preserved
  };
 };
 }
 template <typename Lhs, int LhsMode, bool LhsIsVector,
          typename Rhs, int RhsMode, bool RhsIsVector>
 struct SelfadjointProductMatrix;
-// FIXME could also be called SelfAdjointWrapper to be consistent with DiagonalWrapper ??
+template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
-template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
+  : public TriangularBase<SelfAdjointView<_MatrixType, UpLo> >
  : public TriangularBase<SelfAdjointView<MatrixType, UpLo> >
 {
  public:
    typedef _MatrixType MatrixType;
    typedef TriangularBase<SelfAdjointView> Base;
    typedef typename internal::traits<SelfAdjointView>::MatrixTypeNested MatrixTypeNested;
    typedef typename internal::traits<SelfAdjointView>::MatrixTypeNestedCleaned MatrixTypeNestedCleaned;
    typedef MatrixTypeNestedCleaned NestedExpression;
    /** \brief The type of coefficients in this matrix */
    typedef typename internal::traits<SelfAdjointView>::Scalar Scalar; 
-
+    typedef typename MatrixType::StorageIndex StorageIndex;
-    typedef typename MatrixType::Index Index;
+    typedef typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType;
    enum {
-      Mode = internal::traits<SelfAdjointView>::Mode
+      Mode = internal::traits<SelfAdjointView>::Mode,
      Flags = internal::traits<SelfAdjointView>::Flags,
      TransposeMode = ((Mode & Upper) ? Lower : 0) | ((Mode & Lower) ? Upper : 0)
    };
    typedef typename MatrixType::PlainObject PlainObject;
-    inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
+    EIGEN_DEVICE_FUNC
    explicit inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
    {}
    EIGEN_DEVICE_FUNC
    inline Index rows() const { return m_matrix.rows(); }
    EIGEN_DEVICE_FUNC
    inline Index cols() const { return m_matrix.cols(); }
    EIGEN_DEVICE_FUNC
    inline Index outerStride() const { return m_matrix.outerStride(); }
    EIGEN_DEVICE_FUNC
    inline Index innerStride() const { return m_matrix.innerStride(); }
    /** \sa MatrixBase::coeff()
      * \warning the coordinates must fit into the referenced triangular part
      */
    EIGEN_DEVICE_FUNC
    inline Scalar coeff(Index row, Index col) const
    {
      Base::check_coordinates_internal(row, col);
@ -89,36 +95,46 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
    /** \sa MatrixBase::coeffRef()
      * \warning the coordinates must fit into the referenced triangular part
      */
    EIGEN_DEVICE_FUNC
    inline Scalar& coeffRef(Index row, Index col)
    {
      EIGEN_STATIC_ASSERT_LVALUE(SelfAdjointView);
      Base::check_coordinates_internal(row, col);
-      return m_matrix.const_cast_derived().coeffRef(row, col);
+      return m_matrix.coeffRef(row, col);
    }
    /** \internal */
    EIGEN_DEVICE_FUNC
    const MatrixTypeNestedCleaned& _expression() const { return m_matrix; }
    EIGEN_DEVICE_FUNC
    const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
-    MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); }
+    EIGEN_DEVICE_FUNC
    MatrixTypeNestedCleaned& nestedExpression() { return m_matrix; }
-    /** Efficient self-adjoint matrix times vector/matrix product */
+    /** Efficient triangular matrix times vector/matrix product */
    template<typename OtherDerived>
-    SelfadjointProductMatrix<MatrixType,Mode,false,OtherDerived,0,OtherDerived::IsVectorAtCompileTime>
+    EIGEN_DEVICE_FUNC
    const Product<SelfAdjointView,OtherDerived>
    operator*(const MatrixBase<OtherDerived>& rhs) const
    {
-      return SelfadjointProductMatrix
+      return Product<SelfAdjointView,OtherDerived>(*this, rhs.derived());
              <MatrixType,Mode,false,OtherDerived,0,OtherDerived::IsVectorAtCompileTime>
              (m_matrix, rhs.derived());
    }
-    /** Efficient vector/matrix times self-adjoint matrix product */
+    /** Efficient vector/matrix times triangular matrix product */
    template<typename OtherDerived> friend
-    SelfadjointProductMatrix<OtherDerived,0,OtherDerived::IsVectorAtCompileTime,MatrixType,Mode,false>
+    EIGEN_DEVICE_FUNC
    const Product<OtherDerived,SelfAdjointView>
    operator*(const MatrixBase<OtherDerived>& lhs, const SelfAdjointView& rhs)
    {
-      return SelfadjointProductMatrix
+      return Product<OtherDerived,SelfAdjointView>(lhs.derived(),rhs);
-              <OtherDerived,0,OtherDerived::IsVectorAtCompileTime,MatrixType,Mode,false>
+    }
-              (lhs.derived(),rhs.m_matrix);
+    
    friend EIGEN_DEVICE_FUNC
    const SelfAdjointView<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,MatrixType,product),UpLo>
    operator*(const Scalar& s, const SelfAdjointView& mat)
    {
      return (s*mat.nestedExpression()).template selfadjointView<UpLo>();
    }
    /** Perform a symmetric rank 2 update of the selfadjoint matrix \c *this:
@ -132,6 +148,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
      * \sa rankUpdate(const MatrixBase<DerivedU>&, Scalar)
      */
    template<typename DerivedU, typename DerivedV>
    EIGEN_DEVICE_FUNC
    SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha = Scalar(1));
    /** Perform a symmetric rank K update of the selfadjoint matrix \c *this:
@ -145,8 +162,74 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
      * \sa rankUpdate(const MatrixBase<DerivedU>&, const MatrixBase<DerivedV>&, Scalar)
      */
    template<typename DerivedU>
    EIGEN_DEVICE_FUNC
    SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha = Scalar(1));
    /** \returns an expression of a triangular view extracted from the current selfadjoint view of a given triangular part
      *
      * The parameter \a TriMode can have the following values: \c #Upper, \c #StrictlyUpper, \c #UnitUpper,
      * \c #Lower, \c #StrictlyLower, \c #UnitLower.
      *
      * If \c TriMode references the same triangular part than \c *this, then this method simply return a \c TriangularView of the nested expression,
      * otherwise, the nested expression is first transposed, thus returning a \c TriangularView<Transpose<MatrixType>> object.
      *
      * \sa MatrixBase::triangularView(), class TriangularView
      */
    template<unsigned int TriMode>
    EIGEN_DEVICE_FUNC
    typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)),
                                   TriangularView<MatrixType,TriMode>,
                                   TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type
    triangularView() const
    {
      typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)), MatrixType&, typename MatrixType::ConstTransposeReturnType>::type tmp1(m_matrix);
      typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)), MatrixType&, typename MatrixType::AdjointReturnType>::type tmp2(tmp1);
      return typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)),
                                   TriangularView<MatrixType,TriMode>,
                                   TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type(tmp2);
    }
    typedef SelfAdjointView<const MatrixConjugateReturnType,Mode> ConjugateReturnType;
    /** \sa MatrixBase::conjugate() const */
    EIGEN_DEVICE_FUNC
    inline const ConjugateReturnType conjugate() const
    { return ConjugateReturnType(m_matrix.conjugate()); }
    typedef SelfAdjointView<const typename MatrixType::AdjointReturnType,TransposeMode> AdjointReturnType;
    /** \sa MatrixBase::adjoint() const */
    EIGEN_DEVICE_FUNC
    inline const AdjointReturnType adjoint() const
    { return AdjointReturnType(m_matrix.adjoint()); }
    typedef SelfAdjointView<typename MatrixType::TransposeReturnType,TransposeMode> TransposeReturnType;
     /** \sa MatrixBase::transpose() */
    EIGEN_DEVICE_FUNC
    inline TransposeReturnType transpose()
    {
      EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
      typename MatrixType::TransposeReturnType tmp(m_matrix);
      return TransposeReturnType(tmp);
    }
    typedef SelfAdjointView<const typename MatrixType::ConstTransposeReturnType,TransposeMode> ConstTransposeReturnType;
    /** \sa MatrixBase::transpose() const */
    EIGEN_DEVICE_FUNC
    inline const ConstTransposeReturnType transpose() const
    {
      return ConstTransposeReturnType(m_matrix.transpose());
    }
    /** \returns a const expression of the main diagonal of the matrix \c *this
      *
      * This method simply returns the diagonal of the nested expression, thus by-passing the SelfAdjointView decorator.
      *
      * \sa MatrixBase::diagonal(), class Diagonal */
    EIGEN_DEVICE_FUNC
    typename MatrixType::ConstDiagonalReturnType diagonal() const
    {
      return typename MatrixType::ConstDiagonalReturnType(m_matrix);
    }
 /////////// Cholesky module ///////////
    const LLT<PlainObject, UpLo> llt() const;
@ -159,31 +242,10 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
    /** Return type of eigenvalues() */
    typedef Matrix<RealScalar, internal::traits<MatrixType>::ColsAtCompileTime, 1> EigenvaluesReturnType;
    EIGEN_DEVICE_FUNC
    EigenvaluesReturnType eigenvalues() const;
    EIGEN_DEVICE_FUNC
    RealScalar operatorNorm() const;
    #ifdef EIGEN2_SUPPORT
    template<typename OtherDerived>
    SelfAdjointView& operator=(const MatrixBase<OtherDerived>& other)
    {
      enum {
        OtherPart = UpLo == Upper ? StrictlyLower : StrictlyUpper
      };
      m_matrix.const_cast_derived().template triangularView<UpLo>() = other;
      m_matrix.const_cast_derived().template triangularView<OtherPart>() = other.adjoint();
      return *this;
    }
    template<typename OtherMatrixType, unsigned int OtherMode>
    SelfAdjointView& operator=(const TriangularView<OtherMatrixType, OtherMode>& other)
    {
      enum {
        OtherPart = UpLo == Upper ? StrictlyLower : StrictlyUpper
      };
      m_matrix.const_cast_derived().template triangularView<UpLo>() = other.toDenseMatrix();
      m_matrix.const_cast_derived().template triangularView<OtherPart>() = other.toDenseMatrix().adjoint();
      return *this;
    }
    #endif
  protected:
    MatrixTypeNested m_matrix;
@ -201,90 +263,54 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
 namespace internal {
-template<typename Derived1, typename Derived2, int UnrollCount, bool ClearOpposite>
+// TODO currently a selfadjoint expression has the form SelfAdjointView<.,.>
-struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount, ClearOpposite>
+//      in the future selfadjoint-ness should be defined by the expression traits
 //      such that Transpose<SelfAdjointView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)
 template<typename MatrixType, unsigned int Mode>
 struct evaluator_traits<SelfAdjointView<MatrixType,Mode> >
 {
-  enum {
+  typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
-    col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
+  typedef SelfAdjointShape Shape;
-    row = (UnrollCount-1) % Derived1::RowsAtCompileTime
+};
  };
-  static inline void run(Derived1 &dst, const Derived2 &src)
+template<int UpLo, int SetOpposite, typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version>
 class triangular_dense_assignment_kernel<UpLo,SelfAdjoint,SetOpposite,DstEvaluatorTypeT,SrcEvaluatorTypeT,Functor,Version>
  : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version>
 {
 protected:
  typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version> Base;
  typedef typename Base::DstXprType DstXprType;
  typedef typename Base::SrcXprType SrcXprType;
  using Base::m_dst;
  using Base::m_src;
  using Base::m_functor;
 public:
  typedef typename Base::DstEvaluatorType DstEvaluatorType;
  typedef typename Base::SrcEvaluatorType SrcEvaluatorType;
  typedef typename Base::Scalar Scalar;
  typedef typename Base::AssignmentTraits AssignmentTraits;
  EIGEN_DEVICE_FUNC triangular_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
    : Base(dst, src, func, dstExpr)
  {}
  EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col)
  {
-    triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount-1, ClearOpposite>::run(dst, src);
+    eigen_internal_assert(row!=col);
-
+    Scalar tmp = m_src.coeff(row,col);
-    if(row == col)
+    m_functor.assignCoeff(m_dst.coeffRef(row,col), tmp);
-      dst.coeffRef(row, col) = numext::real(src.coeff(row, col));
+    m_functor.assignCoeff(m_dst.coeffRef(col,row), numext::conj(tmp));
    else if(row < col)
      dst.coeffRef(col, row) = numext::conj(dst.coeffRef(row, col) = src.coeff(row, col));
  }
-};
+  
-
+  EIGEN_DEVICE_FUNC void assignDiagonalCoeff(Index id)
 template<typename Derived1, typename Derived2, bool ClearOpposite>
 struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, 0, ClearOpposite>
 {
  static inline void run(Derived1 &, const Derived2 &) {}
 };
 template<typename Derived1, typename Derived2, int UnrollCount, bool ClearOpposite>
 struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount, ClearOpposite>
 {
  enum {
    col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
    row = (UnrollCount-1) % Derived1::RowsAtCompileTime
  };
  static inline void run(Derived1 &dst, const Derived2 &src)
  {
-    triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount-1, ClearOpposite>::run(dst, src);
+    Base::assignCoeff(id,id);
    if(row == col)
      dst.coeffRef(row, col) = numext::real(src.coeff(row, col));
    else if(row > col)
      dst.coeffRef(col, row) = numext::conj(dst.coeffRef(row, col) = src.coeff(row, col));
  }
 };
 template<typename Derived1, typename Derived2, bool ClearOpposite>
 struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, 0, ClearOpposite>
 {
  static inline void run(Derived1 &, const Derived2 &) {}
 };
 template<typename Derived1, typename Derived2, bool ClearOpposite>
 struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dynamic, ClearOpposite>
 {
  typedef typename Derived1::Index Index;
  static inline void run(Derived1 &dst, const Derived2 &src)
  {
    for(Index j = 0; j < dst.cols(); ++j)
    {
      for(Index i = 0; i < j; ++i)
      {
        dst.copyCoeff(i, j, src);
        dst.coeffRef(j,i) = numext::conj(dst.coeff(i,j));
      }
      dst.copyCoeff(j, j, src);
    }
  }
 };
 template<typename Derived1, typename Derived2, bool ClearOpposite>
 struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, Dynamic, ClearOpposite>
 {
  static inline void run(Derived1 &dst, const Derived2 &src)
  {
  typedef typename Derived1::Index Index;
    for(Index i = 0; i < dst.rows(); ++i)
    {
      for(Index j = 0; j < i; ++j)
      {
        dst.copyCoeff(i, j, src);
        dst.coeffRef(j,i) = numext::conj(dst.coeff(i,j));
      }
      dst.copyCoeff(i, i, src);
    }
  }
  EIGEN_DEVICE_FUNC void assignOppositeCoeff(Index, Index)
  { eigen_internal_assert(false && "should never be called"); }
 };
 } // end namespace internal
@ -293,20 +319,30 @@ struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, Dyn
 * Implementation of MatrixBase methods
 ***************************************************************************/
 /** This is the const version of MatrixBase::selfadjointView() */
 template<typename Derived>
 template<unsigned int UpLo>
 typename MatrixBase<Derived>::template ConstSelfAdjointViewReturnType<UpLo>::Type
 MatrixBase<Derived>::selfadjointView() const
 {
-  return derived();
+  return typename ConstSelfAdjointViewReturnType<UpLo>::Type(derived());
 }
 /** \returns an expression of a symmetric/self-adjoint view extracted from the upper or lower triangular part of the current matrix
  *
  * The parameter \a UpLo can be either \c #Upper or \c #Lower
  *
  * Example: \include MatrixBase_selfadjointView.cpp
  * Output: \verbinclude MatrixBase_selfadjointView.out
  *
  * \sa class SelfAdjointView
  */
 template<typename Derived>
 template<unsigned int UpLo>
 typename MatrixBase<Derived>::template SelfAdjointViewReturnType<UpLo>::Type
 MatrixBase<Derived>::selfadjointView()
 {
-  return derived();
+  return typename SelfAdjointViewReturnType<UpLo>::Type(derived());
 }
 } // end namespace Eigen
--- a/eigenlib/Eigen/src/Core/SelfCwiseBinaryOp.h
+++ b/eigenlib/Eigen/src/Core/SelfCwiseBinaryOp.h
@ -12,183 +12,37 @@
 namespace Eigen { 
-/** \class SelfCwiseBinaryOp
+// TODO generalize the scalar type of 'other'
  * \ingroup Core_Module
  *
  * \internal
  *
  * \brief Internal helper class for optimizing operators like +=, -=
  *
  * This is a pseudo expression class re-implementing the copyCoeff/copyPacket
  * method to directly performs a +=/-= operations in an optimal way. In particular,
  * this allows to make sure that the input/output data are loaded only once using
  * aligned packet loads.
  *
  * \sa class SwapWrapper for a similar trick.
  */
 namespace internal {
 template<typename BinaryOp, typename Lhs, typename Rhs>
 struct traits<SelfCwiseBinaryOp<BinaryOp,Lhs,Rhs> >
  : traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >
 {
  enum {
    // Note that it is still a good idea to preserve the DirectAccessBit
    // so that assign can correctly align the data.
    Flags = traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >::Flags | (Lhs::Flags&DirectAccessBit) | (Lhs::Flags&LvalueBit),
    OuterStrideAtCompileTime = Lhs::OuterStrideAtCompileTime,
    InnerStrideAtCompileTime = Lhs::InnerStrideAtCompileTime
  };
 };
 }
 template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
  : public internal::dense_xpr_base< SelfCwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type
 {
  public:
    typedef typename internal::dense_xpr_base<SelfCwiseBinaryOp>::type Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(SelfCwiseBinaryOp)
    typedef typename internal::packet_traits<Scalar>::type Packet;
    inline SelfCwiseBinaryOp(Lhs& xpr, const BinaryOp& func = BinaryOp()) : m_matrix(xpr), m_functor(func) {}
    inline Index rows() const { return m_matrix.rows(); }
    inline Index cols() const { return m_matrix.cols(); }
    inline Index outerStride() const { return m_matrix.outerStride(); }
    inline Index innerStride() const { return m_matrix.innerStride(); }
    inline const Scalar* data() const { return m_matrix.data(); }
    // note that this function is needed by assign to correctly align loads/stores
    // TODO make Assign use .data()
    inline Scalar& coeffRef(Index row, Index col)
    {
      EIGEN_STATIC_ASSERT_LVALUE(Lhs)
      return m_matrix.const_cast_derived().coeffRef(row, col);
    }
    inline const Scalar& coeffRef(Index row, Index col) const
    {
      return m_matrix.coeffRef(row, col);
    }
    // note that this function is needed by assign to correctly align loads/stores
    // TODO make Assign use .data()
    inline Scalar& coeffRef(Index index)
    {
      EIGEN_STATIC_ASSERT_LVALUE(Lhs)
      return m_matrix.const_cast_derived().coeffRef(index);
    }
    inline const Scalar& coeffRef(Index index) const
    {
      return m_matrix.const_cast_derived().coeffRef(index);
    }
    template<typename OtherDerived>
    void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other)
    {
      OtherDerived& _other = other.const_cast_derived();
      eigen_internal_assert(row >= 0 && row < rows()
                         && col >= 0 && col < cols());
      Scalar& tmp = m_matrix.coeffRef(row,col);
      tmp = m_functor(tmp, _other.coeff(row,col));
    }
    template<typename OtherDerived>
    void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
    {
      OtherDerived& _other = other.const_cast_derived();
      eigen_internal_assert(index >= 0 && index < m_matrix.size());
      Scalar& tmp = m_matrix.coeffRef(index);
      tmp = m_functor(tmp, _other.coeff(index));
    }
    template<typename OtherDerived, int StoreMode, int LoadMode>
    void copyPacket(Index row, Index col, const DenseBase<OtherDerived>& other)
    {
      OtherDerived& _other = other.const_cast_derived();
      eigen_internal_assert(row >= 0 && row < rows()
                        && col >= 0 && col < cols());
      m_matrix.template writePacket<StoreMode>(row, col,
        m_functor.packetOp(m_matrix.template packet<StoreMode>(row, col),_other.template packet<LoadMode>(row, col)) );
    }
    template<typename OtherDerived, int StoreMode, int LoadMode>
    void copyPacket(Index index, const DenseBase<OtherDerived>& other)
    {
      OtherDerived& _other = other.const_cast_derived();
      eigen_internal_assert(index >= 0 && index < m_matrix.size());
      m_matrix.template writePacket<StoreMode>(index,
        m_functor.packetOp(m_matrix.template packet<StoreMode>(index),_other.template packet<LoadMode>(index)) );
    }
    // reimplement lazyAssign to handle complex *= real
    // see CwiseBinaryOp ctor for details
    template<typename RhsDerived>
    EIGEN_STRONG_INLINE SelfCwiseBinaryOp& lazyAssign(const DenseBase<RhsDerived>& rhs)
    {
      EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs,RhsDerived)
      EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename RhsDerived::Scalar);
    #ifdef EIGEN_DEBUG_ASSIGN
      internal::assign_traits<SelfCwiseBinaryOp, RhsDerived>::debug();
    #endif
      eigen_assert(rows() == rhs.rows() && cols() == rhs.cols());
      internal::assign_impl<SelfCwiseBinaryOp, RhsDerived>::run(*this,rhs.derived());
    #ifndef EIGEN_NO_DEBUG
      this->checkTransposeAliasing(rhs.derived());
    #endif
      return *this;
    }
    // overloaded to honor evaluation of special matrices
    // maybe another solution would be to not use SelfCwiseBinaryOp
    // at first...
    SelfCwiseBinaryOp& operator=(const Rhs& _rhs)
    {
      typename internal::nested<Rhs>::type rhs(_rhs);
      return Base::operator=(rhs);
    }
    Lhs& expression() const 
    { 
      return m_matrix;
    }
    const BinaryOp& functor() const 
    { 
      return m_functor;
    }
  protected:
    Lhs& m_matrix;
    const BinaryOp& m_functor;
  private:
    SelfCwiseBinaryOp& operator=(const SelfCwiseBinaryOp&);
 };
 template<typename Derived>
-inline Derived& DenseBase<Derived>::operator*=(const Scalar& other)
+EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other)
 {
  typedef typename Derived::PlainObject PlainObject;
-  SelfCwiseBinaryOp<internal::scalar_product_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
+  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar,Scalar>());
  tmp = PlainObject::Constant(rows(),cols(),other);
  return derived();
 }
 template<typename Derived>
-inline Derived& DenseBase<Derived>::operator/=(const Scalar& other)
+EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
 {
  typedef typename internal::conditional<NumTraits<Scalar>::IsInteger,
                                        internal::scalar_quotient_op<Scalar>,
                                        internal::scalar_product_op<Scalar> >::type BinOp;
  typedef typename Derived::PlainObject PlainObject;
-  SelfCwiseBinaryOp<BinOp, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
+  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar,Scalar>());
-  Scalar actual_other;
+  return derived();
-  if(NumTraits<Scalar>::IsInteger)  actual_other = other;
+}
-  else                              actual_other = Scalar(1)/other;
+
-  tmp = PlainObject::Constant(rows(),cols(), actual_other);
+template<typename Derived>
 EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
 {
  typedef typename Derived::PlainObject PlainObject;
  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar,Scalar>());
  return derived();
 }
 template<typename Derived>
 EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other)
 {
  typedef typename Derived::PlainObject PlainObject;
  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar,Scalar>());
  return derived();
 }
--- a/eigenlib/Eigen/src/Core/Solve.h
+++ b/eigenlib/Eigen/src/Core/Solve.h
@ -0,0 +1,188 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_SOLVE_H
 #define EIGEN_SOLVE_H
 namespace Eigen {
 template<typename Decomposition, typename RhsType, typename StorageKind> class SolveImpl;
 /** \class Solve
  * \ingroup Core_Module
  *
  * \brief Pseudo expression representing a solving operation
  *
  * \tparam Decomposition the type of the matrix or decomposion object
  * \tparam Rhstype the type of the right-hand side
  *
  * This class represents an expression of A.solve(B)
  * and most of the time this is the only way it is used.
  *
  */
 namespace internal {
 // this solve_traits class permits to determine the evaluation type with respect to storage kind (Dense vs Sparse)
 template<typename Decomposition, typename RhsType,typename StorageKind> struct solve_traits;
 template<typename Decomposition, typename RhsType>
 struct solve_traits<Decomposition,RhsType,Dense>
 {
  typedef Matrix<typename RhsType::Scalar,
                 Decomposition::ColsAtCompileTime,
                 RhsType::ColsAtCompileTime,
                 RhsType::PlainObject::Options,
                 Decomposition::MaxColsAtCompileTime,
                 RhsType::MaxColsAtCompileTime> PlainObject;  
 };
 template<typename Decomposition, typename RhsType>
 struct traits<Solve<Decomposition, RhsType> >
  : traits<typename solve_traits<Decomposition,RhsType,typename internal::traits<RhsType>::StorageKind>::PlainObject>
 {
  typedef typename solve_traits<Decomposition,RhsType,typename internal::traits<RhsType>::StorageKind>::PlainObject PlainObject;
  typedef typename promote_index_type<typename Decomposition::StorageIndex, typename RhsType::StorageIndex>::type StorageIndex;
  typedef traits<PlainObject> BaseTraits;
  enum {
    Flags = BaseTraits::Flags & RowMajorBit,
    CoeffReadCost = HugeCost
  };
 };
 }
 template<typename Decomposition, typename RhsType>
 class Solve : public SolveImpl<Decomposition,RhsType,typename internal::traits<RhsType>::StorageKind>
 {
 public:
  typedef typename internal::traits<Solve>::PlainObject PlainObject;
  typedef typename internal::traits<Solve>::StorageIndex StorageIndex;
  Solve(const Decomposition &dec, const RhsType &rhs)
    : m_dec(dec), m_rhs(rhs)
  {}
  EIGEN_DEVICE_FUNC Index rows() const { return m_dec.cols(); }
  EIGEN_DEVICE_FUNC Index cols() const { return m_rhs.cols(); }
  EIGEN_DEVICE_FUNC const Decomposition& dec() const { return m_dec; }
  EIGEN_DEVICE_FUNC const RhsType&       rhs() const { return m_rhs; }
 protected:
  const Decomposition &m_dec;
  const RhsType       &m_rhs;
 };
 // Specialization of the Solve expression for dense results
 template<typename Decomposition, typename RhsType>
 class SolveImpl<Decomposition,RhsType,Dense>
  : public MatrixBase<Solve<Decomposition,RhsType> >
 {
  typedef Solve<Decomposition,RhsType> Derived;
 public:
  typedef MatrixBase<Solve<Decomposition,RhsType> > Base;
  EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
 private:
  Scalar coeff(Index row, Index col) const;
  Scalar coeff(Index i) const;
 };
 // Generic API dispatcher
 template<typename Decomposition, typename RhsType, typename StorageKind>
 class SolveImpl : public internal::generic_xpr_base<Solve<Decomposition,RhsType>, MatrixXpr, StorageKind>::type
 {
  public:
    typedef typename internal::generic_xpr_base<Solve<Decomposition,RhsType>, MatrixXpr, StorageKind>::type Base;
 };
 namespace internal {
 // Evaluator of Solve -> eval into a temporary
 template<typename Decomposition, typename RhsType>
 struct evaluator<Solve<Decomposition,RhsType> >
  : public evaluator<typename Solve<Decomposition,RhsType>::PlainObject>
 {
  typedef Solve<Decomposition,RhsType> SolveType;
  typedef typename SolveType::PlainObject PlainObject;
  typedef evaluator<PlainObject> Base;
  enum { Flags = Base::Flags | EvalBeforeNestingBit };
  EIGEN_DEVICE_FUNC explicit evaluator(const SolveType& solve)
    : m_result(solve.rows(), solve.cols())
  {
    ::new (static_cast<Base*>(this)) Base(m_result);
    solve.dec()._solve_impl(solve.rhs(), m_result);
  }
 protected:  
  PlainObject m_result;
 };
 // Specialization for "dst = dec.solve(rhs)"
 // NOTE we need to specialize it for Dense2Dense to avoid ambiguous specialization error and a Sparse2Sparse specialization must exist somewhere
 template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
 struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar,Scalar>, Dense2Dense>
 {
  typedef Solve<DecType,RhsType> SrcXprType;
  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
  {
    Index dstRows = src.rows();
    Index dstCols = src.cols();
    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
      dst.resize(dstRows, dstCols);
    src.dec()._solve_impl(src.rhs(), dst);
  }
 };
 // Specialization for "dst = dec.transpose().solve(rhs)"
 template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
 struct Assignment<DstXprType, Solve<Transpose<const DecType>,RhsType>, internal::assign_op<Scalar,Scalar>, Dense2Dense>
 {
  typedef Solve<Transpose<const DecType>,RhsType> SrcXprType;
  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
  {
    Index dstRows = src.rows();
    Index dstCols = src.cols();
    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
      dst.resize(dstRows, dstCols);
    src.dec().nestedExpression().template _solve_impl_transposed<false>(src.rhs(), dst);
  }
 };
 // Specialization for "dst = dec.adjoint().solve(rhs)"
 template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
 struct Assignment<DstXprType, Solve<CwiseUnaryOp<internal::scalar_conjugate_op<typename DecType::Scalar>, const Transpose<const DecType> >,RhsType>,
                  internal::assign_op<Scalar,Scalar>, Dense2Dense>
 {
  typedef Solve<CwiseUnaryOp<internal::scalar_conjugate_op<typename DecType::Scalar>, const Transpose<const DecType> >,RhsType> SrcXprType;
  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
  {
    Index dstRows = src.rows();
    Index dstCols = src.cols();
    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
      dst.resize(dstRows, dstCols);
    src.dec().nestedExpression().nestedExpression().template _solve_impl_transposed<true>(src.rhs(), dst);
  }
 };
 } // end namepsace internal
 } // end namespace Eigen
 #endif // EIGEN_SOLVE_H
--- a/eigenlib/Eigen/src/Core/SolveTriangular.h
+++ b/eigenlib/Eigen/src/Core/SolveTriangular.h
@ -68,7 +68,7 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,1>
    if(!useRhsDirectly)
      MappedRhs(actualRhs,rhs.size()) = rhs;
-    triangular_solve_vector<LhsScalar, RhsScalar, typename Lhs::Index, Side, Mode, LhsProductTraits::NeedToConjugate,
+    triangular_solve_vector<LhsScalar, RhsScalar, Index, Side, Mode, LhsProductTraits::NeedToConjugate,
                            (int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor>
      ::run(actualLhs.cols(), actualLhs.data(), actualLhs.outerStride(), actualRhs);
@ -82,7 +82,6 @@ template<typename Lhs, typename Rhs, int Side, int Mode>
 struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>
 {
  typedef typename Rhs::Scalar Scalar;
  typedef typename Rhs::Index Index;
  typedef blas_traits<Lhs> LhsProductTraits;
  typedef typename LhsProductTraits::DirectLinearAccessType ActualLhsType;
@ -96,7 +95,7 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>
    typedef internal::gemm_blocking_space<(Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
              Rhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxRowsAtCompileTime,4> BlockingType;
-    BlockingType blocking(rhs.rows(), rhs.cols(), size);
+    BlockingType blocking(rhs.rows(), rhs.cols(), size, 1, false);
    triangular_solve_matrix<Scalar,Index,Side,Mode,LhsProductTraits::NeedToConjugate,(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor,
                               (Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor>
@ -108,32 +107,32 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>
 * meta-unrolling implementation
 ***************************************************************************/
-template<typename Lhs, typename Rhs, int Mode, int Index, int Size,
+template<typename Lhs, typename Rhs, int Mode, int LoopIndex, int Size,
-         bool Stop = Index==Size>
+         bool Stop = LoopIndex==Size>
 struct triangular_solver_unroller;
-template<typename Lhs, typename Rhs, int Mode, int Index, int Size>
+template<typename Lhs, typename Rhs, int Mode, int LoopIndex, int Size>
-struct triangular_solver_unroller<Lhs,Rhs,Mode,Index,Size,false> {
+struct triangular_solver_unroller<Lhs,Rhs,Mode,LoopIndex,Size,false> {
  enum {
    IsLower = ((Mode&Lower)==Lower),
-    I = IsLower ? Index : Size - Index - 1,
+    DiagIndex  = IsLower ? LoopIndex : Size - LoopIndex - 1,
-    S = IsLower ? 0     : I+1
+    StartIndex = IsLower ? 0         : DiagIndex+1
  };
  static void run(const Lhs& lhs, Rhs& rhs)
  {
-    if (Index>0)
+    if (LoopIndex>0)
-      rhs.coeffRef(I) -= lhs.row(I).template segment<Index>(S).transpose()
+      rhs.coeffRef(DiagIndex) -= lhs.row(DiagIndex).template segment<LoopIndex>(StartIndex).transpose()
-                         .cwiseProduct(rhs.template segment<Index>(S)).sum();
+                                .cwiseProduct(rhs.template segment<LoopIndex>(StartIndex)).sum();
    if(!(Mode & UnitDiag))
-      rhs.coeffRef(I) /= lhs.coeff(I,I);
+      rhs.coeffRef(DiagIndex) /= lhs.coeff(DiagIndex,DiagIndex);
-    triangular_solver_unroller<Lhs,Rhs,Mode,Index+1,Size>::run(lhs,rhs);
+    triangular_solver_unroller<Lhs,Rhs,Mode,LoopIndex+1,Size>::run(lhs,rhs);
  }
 };
-template<typename Lhs, typename Rhs, int Mode, int Index, int Size>
+template<typename Lhs, typename Rhs, int Mode, int LoopIndex, int Size>
-struct triangular_solver_unroller<Lhs,Rhs,Mode,Index,Size,true> {
+struct triangular_solver_unroller<Lhs,Rhs,Mode,LoopIndex,Size,true> {
  static void run(const Lhs&, Rhs&) {}
 };
@ -162,61 +161,35 @@ struct triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,CompleteUnrolling,1> {
 * TriangularView methods
 ***************************************************************************/
-/** "in-place" version of TriangularView::solve() where the result is written in \a other
+#ifndef EIGEN_PARSED_BY_DOXYGEN
  *
  * \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here.
  * This function will const_cast it, so constness isn't honored here.
  *
  * See TriangularView:solve() for the details.
  */
 template<typename MatrixType, unsigned int Mode>
 template<int Side, typename OtherDerived>
-void TriangularView<MatrixType,Mode>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
+void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
 {
  OtherDerived& other = _other.const_cast_derived();
-  eigen_assert( cols() == rows() && ((Side==OnTheLeft && cols() == other.rows()) || (Side==OnTheRight && cols() == other.cols())) );
+  eigen_assert( derived().cols() == derived().rows() && ((Side==OnTheLeft && derived().cols() == other.rows()) || (Side==OnTheRight && derived().cols() == other.cols())) );
  eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower)));
-  enum { copy = internal::traits<OtherDerived>::Flags & RowMajorBit  && OtherDerived::IsVectorAtCompileTime };
+  enum { copy = (internal::traits<OtherDerived>::Flags & RowMajorBit)  && OtherDerived::IsVectorAtCompileTime && OtherDerived::SizeAtCompileTime!=1};
  typedef typename internal::conditional<copy,
    typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::type OtherCopy;
  OtherCopy otherCopy(other);
  internal::triangular_solver_selector<MatrixType, typename internal::remove_reference<OtherCopy>::type,
-    Side, Mode>::run(nestedExpression(), otherCopy);
+    Side, Mode>::run(derived().nestedExpression(), otherCopy);
  if (copy)
    other = otherCopy;
 }
 /** \returns the product of the inverse of \c *this with \a other, \a *this being triangular.
  *
  * This function computes the inverse-matrix matrix product inverse(\c *this) * \a other if
  * \a Side==OnTheLeft (the default), or the right-inverse-multiply  \a other * inverse(\c *this) if
  * \a Side==OnTheRight.
  *
  * The matrix \c *this must be triangular and invertible (i.e., all the coefficients of the
  * diagonal must be non zero). It works as a forward (resp. backward) substitution if \c *this
  * is an upper (resp. lower) triangular matrix.
  *
  * Example: \include MatrixBase_marked.cpp
  * Output: \verbinclude MatrixBase_marked.out
  *
  * This function returns an expression of the inverse-multiply and can works in-place if it is assigned
  * to the same matrix or vector \a other.
  *
  * For users coming from BLAS, this function (and more specifically solveInPlace()) offer
  * all the operations supported by the \c *TRSV and \c *TRSM BLAS routines.
  *
  * \sa TriangularView::solveInPlace()
  */
 template<typename Derived, unsigned int Mode>
 template<int Side, typename Other>
 const internal::triangular_solve_retval<Side,TriangularView<Derived,Mode>,Other>
-TriangularView<Derived,Mode>::solve(const MatrixBase<Other>& other) const
+TriangularViewImpl<Derived,Mode,Dense>::solve(const MatrixBase<Other>& other) const
 {
-  return internal::triangular_solve_retval<Side,TriangularView,Other>(*this, other.derived());
+  return internal::triangular_solve_retval<Side,TriangularViewType,Other>(derived(), other.derived());
 }
 #endif
 namespace internal {
@ -232,7 +205,6 @@ template<int Side, typename TriangularType, typename Rhs> struct triangular_solv
 {
  typedef typename remove_all<typename Rhs::Nested>::type RhsNestedCleaned;
  typedef ReturnByValue<triangular_solve_retval> Base;
  typedef typename Base::Index Index;
  triangular_solve_retval(const TriangularType& tri, const Rhs& rhs)
    : m_triangularMatrix(tri), m_rhs(rhs)
@ -243,7 +215,7 @@ template<int Side, typename TriangularType, typename Rhs> struct triangular_solv
  template<typename Dest> inline void evalTo(Dest& dst) const
  {
-    if(!(is_same<RhsNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_rhs)))
+    if(!is_same_dense(dst,m_rhs))
      dst = m_rhs;
    m_triangularMatrix.template solveInPlace<Side>(dst);
  }
--- a/Show More
+++ b/Show More
`@ -1,2 +1,2 @@`
	`#include "Dense"`	`#include "Dense"`
	`//#include "Sparse"`	`#include "Sparse"`