diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index a6eacc9354d5..4b9a58337ea4 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -62,7 +62,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@883d8588e56d1753a8a58c1c86e88976f0c23449 # v3.26.3 + uses: github/codeql-action/init@2c779ab0d087cd7fe7b826087247c2c81f27bfa6 # v3.26.5 with: languages: ${{ matrix.language }} build-mode: ${{ matrix.build-mode }} @@ -85,6 +85,6 @@ jobs: make -j 2 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@883d8588e56d1753a8a58c1c86e88976f0c23449 # v3.26.3 + uses: github/codeql-action/analyze@2c779ab0d087cd7fe7b826087247c2c81f27bfa6 # v3.26.5 with: category: "/language:${{matrix.language}}" diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index a4b85a7ee90c..1a77e450bd96 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -66,6 +66,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@883d8588e56d1753a8a58c1c86e88976f0c23449 # v3.26.3 + uses: github/codeql-action/upload-sarif@2c779ab0d087cd7fe7b826087247c2c81f27bfa6 # v3.26.5 with: sarif_file: results.sarif diff --git a/packages/framework/ini-files/config-specs.ini b/packages/framework/ini-files/config-specs.ini index 1fe2a62ea164..1a76ae6fd5b5 100644 --- a/packages/framework/ini-files/config-specs.ini +++ b/packages/framework/ini-files/config-specs.ini @@ -297,6 +297,11 @@ [COMMON] opt-set-cmake-var CMAKE_GENERATOR STRING : Ninja + +# Disable deprecated warnings until the deprecated packages are removed (e.g. Epetra), +# otherwise the warnings are pretty overwhelming of other compiler warnings +opt-set-cmake-var Trilinos_SHOW_DEPRECATED_WARNINGS BOOL : OFF + #opt-set-cmake-var Trilinos_ENABLE_BUILD_STATS BOOL : ON opt-set-cmake-var Trilinos_PARALLEL_LINK_JOBS_LIMIT STRING : 8 @@ -1921,7 +1926,7 @@ opt-set-cmake-var MPI_EXEC_PRE_NUMPROCS_FLAGS STRING opt-set-cmake-var Teko_DISABLE_LSCSTABALIZED_TPETRA_ALPAH_INV_D BOOL : ON opt-set-cmake-var KokkosKernels_blas_serial_MPI_1_DISABLE BOOL : ON opt-set-cmake-var ROL_example_PDE-OPT_helmholtz_example_02_MPI_1_DISABLE BOOL : ON -opt-set-cmake-var CMAKE_CXX_FLAGS STRING FORCE : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-nonnull-compare -Wno-address -Wno-inline -Wno-error -DTRILINOS_HIDE_DEPRECATED_HEADER_WARNINGS +opt-set-cmake-var CMAKE_CXX_FLAGS STRING FORCE : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-nonnull-compare -Wno-address -Wno-inline -Wno-error # Test failures as of 11-28-22 opt-set-cmake-var ROL_example_PDE-OPT_navier-stokes_example_01_MPI_4_DISABLE BOOL : ON @@ -2002,7 +2007,7 @@ use USE-DEPRECATED|YES use COMMON_USE-MPI|NO opt-set-cmake-var Trilinos_ENABLE_Fortran OFF BOOL : OFF -opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-parentheses -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-nonnull-compare -Wno-address -Wno-inline -Wno-unused-but-set-variable -Wno-unused-variable -Wno-unused-label -Werror -Werror=shadow -DTRILINOS_HIDE_DEPRECATED_HEADER_WARNINGS +opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-parentheses -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-nonnull-compare -Wno-address -Wno-inline -Wno-unused-but-set-variable -Wno-unused-variable -Wno-unused-label -Werror -Werror=shadow opt-set-cmake-var TPL_ENABLE_ParMETIS BOOL FORCE : OFF use GCC_PACKAGE_SPECIFIC_WARNING_FLAGS @@ -2038,7 +2043,7 @@ opt-set-cmake-var MPI_EXEC_PRE_NUMPROCS_FLAGS STRING : - opt-set-cmake-var CMAKE_CXX_EXTENSIONS BOOL : OFF opt-set-cmake-var Teko_DISABLE_LSCSTABALIZED_TPETRA_ALPAH_INV_D BOOL : ON opt-set-cmake-var ROL_test_algorithm_TypeP_CompareTypeU_MPI_1_DISABLE BOOL : ON -opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -fno-strict-aliasing -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-parentheses -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-inline -Wno-nonnull-compare -Wno-address -Wno-error -Werror=shadow -DTRILINOS_HIDE_DEPRECATED_HEADER_WARNINGS +opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -fno-strict-aliasing -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-parentheses -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-inline -Wno-nonnull-compare -Wno-address -Wno-error -Werror=shadow use GCC_OPENMP_PACKAGE_SPECIFIC_WARNING_FLAGS @@ -2929,7 +2934,7 @@ opt-set-cmake-var MPI_EXEC_PRE_NUMPROCS_FLAGS STRING opt-set-cmake-var Teko_DISABLE_LSCSTABALIZED_TPETRA_ALPAH_INV_D BOOL : ON opt-set-cmake-var KokkosKernels_blas_serial_MPI_1_DISABLE BOOL : ON opt-set-cmake-var ROL_example_PDE-OPT_helmholtz_example_02_MPI_1_DISABLE BOOL : ON -opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-nonnull-compare -Wno-address -Wno-inline -DTRILINOS_HIDE_DEPRECATED_HEADER_WARNINGS +opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-nonnull-compare -Wno-address -Wno-inline # Test failures as of 11-28-22 opt-set-cmake-var ROL_example_PDE-OPT_navier-stokes_example_01_MPI_4_DISABLE BOOL : ON @@ -2979,7 +2984,7 @@ opt-set-cmake-var MPI_EXEC_PRE_NUMPROCS_FLAGS STRING opt-set-cmake-var Teko_DISABLE_LSCSTABALIZED_TPETRA_ALPAH_INV_D BOOL : ON opt-set-cmake-var KokkosKernels_blas_serial_MPI_1_DISABLE BOOL : ON opt-set-cmake-var ROL_example_PDE-OPT_helmholtz_example_02_MPI_1_DISABLE BOOL : ON -opt-set-cmake-var CMAKE_CXX_FLAGS STRING FORCE : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-nonnull-compare -Wno-address -Wno-inline -DTRILINOS_HIDE_DEPRECATED_HEADER_WARNINGS +opt-set-cmake-var CMAKE_CXX_FLAGS STRING FORCE : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-nonnull-compare -Wno-address -Wno-inline use RHEL7_POST @@ -3014,7 +3019,7 @@ use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use COMMON_SPACK_TPLS opt-set-cmake-var Trilinos_ENABLE_Fortran OFF BOOL : OFF -opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-parentheses -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-nonnull-compare -Wno-address -Wno-inline -Wno-unused-but-set-variable -Wno-unused-variable -Wno-unused-label -Wno-error -DTRILINOS_HIDE_DEPRECATED_HEADER_WARNINGS +opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-parentheses -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-nonnull-compare -Wno-address -Wno-inline -Wno-unused-but-set-variable -Wno-unused-variable -Wno-unused-label -Wno-error opt-set-cmake-var TPL_ENABLE_ParMETIS BOOL FORCE : OFF opt-set-cmake-var TPL_ENABLE_Pnetcdf BOOL FORCE : OFF opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING FORCE : -L${NETCDF_C_LIB|ENV};${NETCDF_C_LIB|ENV}/libnetcdf.a;${TPL_HDF5_LIBRARIES|CMAKE} @@ -3073,7 +3078,7 @@ use COMMON_SPACK_TPLS opt-set-cmake-var MPI_EXEC_PRE_NUMPROCS_FLAGS STRING : --bind-to;none --mca btl vader,self opt-set-cmake-var CMAKE_CXX_EXTENSIONS BOOL : OFF opt-set-cmake-var Teko_DISABLE_LSCSTABALIZED_TPETRA_ALPAH_INV_D BOOL : ON -opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -fno-strict-aliasing -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-parentheses -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-inline -Wno-nonnull-compare -Wno-address -Werror -DTRILINOS_HIDE_DEPRECATED_HEADER_WARNINGS +opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -fno-strict-aliasing -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-parentheses -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-inline -Wno-nonnull-compare -Wno-address -Werror use RHEL7_POST @@ -3108,7 +3113,7 @@ opt-set-cmake-var TPL_ENABLE_Scotch BOOL FORCE : OFF opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING FORCE : ${NETCDF_C_LIB|ENV}/libnetcdf.so opt-set-cmake-var Trilinos_ENABLE_Fortran OFF BOOL : OFF -opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-nonnull-compare -Wno-address -Wno-inline -Wno-unused-label -Werror=parentheses -Werror=sign-compare -Werror=unused-variable -DTRILINOS_HIDE_DEPRECATED_HEADER_WARNINGS +opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-nonnull-compare -Wno-address -Wno-inline -Wno-unused-label -Werror=parentheses -Werror=sign-compare -Werror=unused-variable use GCC_PACKAGE_SPECIFIC_WARNING_FLAGS @@ -3142,7 +3147,7 @@ use COMMON_SPACK_TPLS opt-set-cmake-var MPI_EXEC_PRE_NUMPROCS_FLAGS STRING : --bind-to;none --mca btl vader,self opt-set-cmake-var CMAKE_CXX_EXTENSIONS BOOL : OFF opt-set-cmake-var Teko_DISABLE_LSCSTABALIZED_TPETRA_ALPAH_INV_D BOOL : ON -opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -fno-strict-aliasing -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-inline -Wno-nonnull-compare -Wno-address -Werror=sign-compare -Werror=unused-variable -Werror=parentheses -DTRILINOS_HIDE_DEPRECATED_HEADER_WARNINGS +opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -fno-strict-aliasing -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-inline -Wno-nonnull-compare -Wno-address -Werror=sign-compare -Werror=unused-variable -Werror=parentheses # TPL_BLAS_LIBRARIES is redefined here with libm for SuperLU to properly link opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : -L${BLAS_ROOT|ENV}/lib;-lblas;-lgfortran;-lgomp;-lm @@ -3185,7 +3190,7 @@ opt-set-cmake-var ROL_example_PDE-OPT_helmholtz_example_02_MPI_1_DISABLE BOOL opt-set-cmake-var Pliris_vector_random_MPI_3_DISABLE BOOL : ON opt-set-cmake-var Pliris_vector_random_MPI_4_DISABLE BOOL : ON -opt-set-cmake-var CMAKE_CXX_FLAGS STRING FORCE : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-nonnull-compare -Wno-address -Wno-inline -Werror=sign-compare -Werror=unused-variable -Werror=parentheses -DTRILINOS_HIDE_DEPRECATED_HEADER_WARNINGS +opt-set-cmake-var CMAKE_CXX_FLAGS STRING FORCE : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-nonnull-compare -Wno-address -Wno-inline -Werror=sign-compare -Werror=unused-variable -Werror=parentheses # Test failures as of 11-28-22 opt-set-cmake-var ROL_example_PDE-OPT_navier-stokes_example_01_MPI_4_DISABLE BOOL : ON @@ -3286,7 +3291,7 @@ opt-set-cmake-var TPL_LAPACK_LIBRARY_DIRS STRING FORCE : ${OPENBLAS_ROOT|ENV}/ opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib/libopenblas.a;-L${OPENBLAS_ROOT|ENV}/lib;-lgfortran;-lgomp;-lm opt-set-cmake-var MPI_EXEC_PRE_NUMPROCS_FLAGS STRING : --bind-to;none --mca btl vader,self -opt-set-cmake-var CMAKE_CXX_FLAGS STRING FORCE : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-nonnull-compare -Wno-address -Wno-inline -DTRILINOS_HIDE_DEPRECATED_HEADER_WARNINGS +opt-set-cmake-var CMAKE_CXX_FLAGS STRING FORCE : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-nonnull-compare -Wno-address -Wno-inline opt-set-cmake-var TPL_ENABLE_SuperLUDist BOOL FORCE : ON opt-set-cmake-var TPL_ENABLE_ParMETIS BOOL FORCE : ON @@ -3336,7 +3341,7 @@ use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use COMMON_SPACK_TPLS -opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-parentheses -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-nonnull-compare -Wno-address -Wno-inline -Wno-unused-but-set-variable -Wno-unused-variable -Wno-unused-label -DTRILINOS_HIDE_DEPRECATED_HEADER_WARNINGS +opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-parentheses -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-nonnull-compare -Wno-address -Wno-inline -Wno-unused-but-set-variable -Wno-unused-variable -Wno-unused-label opt-set-cmake-var TPL_BLAS_LIBRARY_DIRS STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib/libopenblas.a;-L${OPENBLAS_ROOT|ENV}/lib;-lgfortran;-lgomp;-lm @@ -3384,7 +3389,7 @@ opt-set-cmake-var ROL_example_PDE-OPT_helmholtz_example_02_MPI_1_DISABLE BOO opt-set-cmake-var ROL_example_PDE-OPT_navier-stokes_example_01_MPI_4_DISABLE BOOL : ON opt-set-cmake-var Pliris_vector_random_MPI_3_DISABLE BOOL : ON opt-set-cmake-var Pliris_vector_random_MPI_4_DISABLE BOOL : ON -opt-set-cmake-var CMAKE_CXX_FLAGS STRING FORCE : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-nonnull-compare -Wno-address -Wno-inline -DTRILINOS_HIDE_DEPRECATED_HEADER_WARNINGS +opt-set-cmake-var CMAKE_CXX_FLAGS STRING FORCE : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-nonnull-compare -Wno-address -Wno-inline opt-set-cmake-var TPL_ENABLE_SuperLUDist BOOL FORCE: OFF diff --git a/packages/intrepid2/assembly-examples/GRADGRADStandardAssembly.hpp b/packages/intrepid2/assembly-examples/GRADGRADStandardAssembly.hpp index 099893c6facd..8fc7852e7650 100644 --- a/packages/intrepid2/assembly-examples/GRADGRADStandardAssembly.hpp +++ b/packages/intrepid2/assembly-examples/GRADGRADStandardAssembly.hpp @@ -145,8 +145,11 @@ Intrepid2::ScalarView performStandardQuadratureGRADGRAD(Intre // because structured integration performs transformations within integrate(), to get a fairer comparison here we include the transformation calls. fstIntegrateCall->start(); FunctionSpaceTools::HGRADtransformGRAD(unorientedTransformedGradValues, jacobianInverse, basisGradValues); + // we want to exclude orientation application in the core integration timing -- this time gets reported as "Other" + fstIntegrateCall->stop(); OrientationTools::modifyBasisByOrientation(transformedGradValues, unorientedTransformedGradValues, orientationsWorkset, basis.get()); + fstIntegrateCall->start(); transformIntegrateFlopCount += double(numCellsInWorkset) * double(numFields) * double(numPoints) * double(spaceDim) * (spaceDim - 1) * 2.0; // 2: one multiply, one add per (P,D) entry in the contraction. FunctionSpaceTools::multiplyMeasure(transformedWeightedGradValues, cellMeasures, transformedGradValues); diff --git a/packages/intrepid2/assembly-examples/H1StandardAssembly.hpp b/packages/intrepid2/assembly-examples/H1StandardAssembly.hpp index 455be4e39471..21fb9207ef0f 100644 --- a/packages/intrepid2/assembly-examples/H1StandardAssembly.hpp +++ b/packages/intrepid2/assembly-examples/H1StandardAssembly.hpp @@ -151,8 +151,11 @@ Intrepid2::ScalarView performStandardQuadratureH1(Intrepid2:: // because structured integration performs transformations within integrate(), to get a fairer comparison here we include the transformation calls. fstIntegrateCall->start(); FunctionSpaceTools::HGRADtransformGRAD(unorientedTransformedGradValues, jacobianInverse, basisGradValues); + // we want to exclude orientation application in the core integration timing -- this time gets reported as "Other" + fstIntegrateCall->stop(); OrientationTools::modifyBasisByOrientation(transformedGradValues, unorientedTransformedGradValues, orientationsWorkset, basis.get()); + fstIntegrateCall->start(); transformIntegrateFlopCount += double(numCellsInWorkset) * double(numFields) * double(numPoints) * double(spaceDim) * (spaceDim - 1) * 2.0; // 2: one multiply, one add per (P,D) entry in the contraction. FunctionSpaceTools::multiplyMeasure(transformedWeightedGradValues, cellMeasures, transformedGradValues); transformIntegrateFlopCount += double(numCellsInWorkset) * double(numFields) * double(numPoints) * double(spaceDim); // multiply each entry of transformedGradValues: one flop for each. @@ -163,8 +166,11 @@ Intrepid2::ScalarView performStandardQuadratureH1(Intrepid2:: ExecutionSpace().fence(); FunctionSpaceTools::HGRADtransformVALUE(unorientedTransformedBasisValues, basisValues); + // we want to exclude orientation application in the core integration timing -- this time gets reported as "Other" + fstIntegrateCall->stop(); OrientationTools::modifyBasisByOrientation(transformedBasisValues, unorientedTransformedBasisValues, orientationsWorkset, basis.get()); + fstIntegrateCall->start(); FunctionSpaceTools::multiplyMeasure(transformedWeightedBasisValues, cellMeasures, transformedBasisValues); bool sumInto = true; // add the (value,value) integral to the (grad,grad) that we've already integrated FunctionSpaceTools::integrate(cellStiffnessSubview, transformedBasisValues, transformedWeightedBasisValues, sumInto); diff --git a/packages/intrepid2/assembly-examples/HCURLStandardAssembly.hpp b/packages/intrepid2/assembly-examples/HCURLStandardAssembly.hpp index 17724153fcf5..a29c80bdbb2c 100644 --- a/packages/intrepid2/assembly-examples/HCURLStandardAssembly.hpp +++ b/packages/intrepid2/assembly-examples/HCURLStandardAssembly.hpp @@ -175,8 +175,11 @@ Intrepid2::ScalarView performStandardQuadratureHCURL(Intrepid // because structured integration performs transformations within integrate(), to get a fairer comparison here we include the transformation calls. fstIntegrateCall->start(); FunctionSpaceTools::HCURLtransformCURL(unorientedTransformedCurlValues, jacobian, jacobianDeterminant, basisCurlValues); + // we want to exclude orientation application in the core integration timing -- this time gets reported as "Other" + fstIntegrateCall->stop(); OrientationTools::modifyBasisByOrientation(transformedCurlValues, unorientedTransformedCurlValues, orientationsWorkset, basis.get()); + fstIntegrateCall->start(); transformIntegrateFlopCount += double(numCellsInWorkset) * double(numFields) * double(numPoints) * double(spaceDim) * (spaceDim - 1) * 2.0; // 2: one multiply, one add per (P,D) entry in the contraction. FunctionSpaceTools::multiplyMeasure(transformedWeightedCurlValues, cellMeasures, transformedCurlValues); transformIntegrateFlopCount += double(numCellsInWorkset) * double(numFields) * double(numPoints) * double(spaceDim); // multiply each entry of transformedCurlValues: one flop for each. @@ -186,8 +189,11 @@ Intrepid2::ScalarView performStandardQuadratureHCURL(Intrepid FunctionSpaceTools::integrate(cellStiffnessSubview, transformedCurlValues, transformedWeightedCurlValues); FunctionSpaceTools::HCURLtransformVALUE(unorientedTransformedBasisValues, jacobianInverse, basisValues); + // we want to exclude orientation application in the core integration timing -- this time gets reported as "Other" + fstIntegrateCall->stop(); OrientationTools::modifyBasisByOrientation(transformedBasisValues, unorientedTransformedBasisValues, orientationsWorkset, basis.get()); + fstIntegrateCall->start(); FunctionSpaceTools::multiplyMeasure(transformedWeightedBasisValues, cellMeasures, transformedBasisValues); bool sumInto = true; // add the (value,value) integral to the (curl,curl) that we've already integrated FunctionSpaceTools::integrate(cellStiffnessSubview, transformedBasisValues, transformedWeightedBasisValues, sumInto); diff --git a/packages/intrepid2/assembly-examples/HDIVStandardAssembly.hpp b/packages/intrepid2/assembly-examples/HDIVStandardAssembly.hpp index 04f415c88afc..2e50d065a732 100644 --- a/packages/intrepid2/assembly-examples/HDIVStandardAssembly.hpp +++ b/packages/intrepid2/assembly-examples/HDIVStandardAssembly.hpp @@ -151,8 +151,11 @@ Intrepid2::ScalarView performStandardQuadratureHDIV(Intrepid2 // because structured integration performs transformations within integrate(), to get a fairer comparison here we include the transformation calls. fstIntegrateCall->start(); FunctionSpaceTools::HDIVtransformDIV(unorientedTransformedDivValues, jacobianDeterminant, basisDivValues); + // we want to exclude orientation application in the core integration timing -- this time gets reported as "Other" + fstIntegrateCall->stop(); OrientationTools::modifyBasisByOrientation(transformedDivValues, unorientedTransformedDivValues, orientationsWorkset, basis.get()); + fstIntegrateCall->start(); transformIntegrateFlopCount += double(numCellsInWorkset) * double(numFields) * double(numPoints) * double(spaceDim) * (spaceDim - 1) * 2.0; // 2: one multiply, one add per (P,D) entry in the contraction. FunctionSpaceTools::multiplyMeasure(transformedWeightedDivValues, cellMeasures, transformedDivValues); transformIntegrateFlopCount += double(numCellsInWorkset) * double(numFields) * double(numPoints) * double(spaceDim); // multiply each entry of transformedDivValues: one flop for each. @@ -161,10 +164,12 @@ Intrepid2::ScalarView performStandardQuadratureHDIV(Intrepid2 FunctionSpaceTools::integrate(cellStiffnessSubview, transformedDivValues, transformedWeightedDivValues); ExecutionSpace().fence(); - FunctionSpaceTools::HDIVtransformVALUE(unorientedTransformedBasisValues, jacobian, jacobianDeterminant, basisValues); + // we want to exclude orientation application in the core integration timing -- this time gets reported as "Other" + fstIntegrateCall->stop(); OrientationTools::modifyBasisByOrientation(transformedBasisValues, unorientedTransformedBasisValues, orientationsWorkset, basis.get()); + fstIntegrateCall->start(); FunctionSpaceTools::multiplyMeasure(transformedWeightedBasisValues, cellMeasures, transformedBasisValues); bool sumInto = true; // add the (value,value) integral to the (div,div) that we've already integrated FunctionSpaceTools::integrate(cellStiffnessSubview, transformedBasisValues, transformedWeightedBasisValues, sumInto); diff --git a/packages/intrepid2/assembly-examples/HVOLStandardAssembly.hpp b/packages/intrepid2/assembly-examples/HVOLStandardAssembly.hpp index 723b8f236698..e4729ec5e538 100644 --- a/packages/intrepid2/assembly-examples/HVOLStandardAssembly.hpp +++ b/packages/intrepid2/assembly-examples/HVOLStandardAssembly.hpp @@ -139,8 +139,11 @@ Intrepid2::ScalarView performStandardQuadratureHVOL(Intrepid2 auto cellStiffnessSubview = Kokkos::subview(cellStiffness, cellRange, Kokkos::ALL(), Kokkos::ALL()); FunctionSpaceTools::HVOLtransformVALUE(unorientedTransformedBasisValues, jacobianDeterminant, basisValues); + // we want to exclude orientation application in the core integration timing -- this time gets reported as "Other" + fstIntegrateCall->stop(); OrientationTools::modifyBasisByOrientation(transformedBasisValues, unorientedTransformedBasisValues, orientationsWorkset, basis.get()); + fstIntegrateCall->start(); FunctionSpaceTools::multiplyMeasure(transformedWeightedBasisValues, cellMeasures, transformedBasisValues); bool sumInto = true; // add the (value,value) integral to the (curl,curl) that we've already integrated FunctionSpaceTools::integrate(cellStiffnessSubview, transformedBasisValues, transformedWeightedBasisValues, sumInto); diff --git a/packages/intrepid2/assembly-examples/StandardAssembly.hpp b/packages/intrepid2/assembly-examples/StandardAssembly.hpp index a689306dfbf6..610918e7298d 100644 --- a/packages/intrepid2/assembly-examples/StandardAssembly.hpp +++ b/packages/intrepid2/assembly-examples/StandardAssembly.hpp @@ -110,10 +110,10 @@ namespace { } //! General assembly for two arbitrary bases and ops that uses the classic, generic Intrepid2 paths. -template +template // spaceDim and spaceDim2 should agree on value (differ on type) Intrepid2::ScalarView performStandardAssembly(Intrepid2::CellGeometry &geometry, int worksetSize, - const int &polyOrder1, const Intrepid2::EFunctionSpace &fs1, const Intrepid2::EOperator &op1, - const int &polyOrder2, const Intrepid2::EFunctionSpace &fs2, const Intrepid2::EOperator &op2, + const int &polyOrder1, const Intrepid2::EFunctionSpace &fs1, const Intrepid2::EOperator &op1, Teuchos::RCP< Kokkos::Array > vectorWeight1, + const int &polyOrder2, const Intrepid2::EFunctionSpace &fs2, const Intrepid2::EOperator &op2, Teuchos::RCP< Kokkos::Array > vectorWeight2, double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount) { using ExecutionSpace = typename DeviceType::execution_space; @@ -170,32 +170,72 @@ Intrepid2::ScalarView performStandardAssembly(Intrepid2::Cell ViewType basis1Values = basis1->allocateOutputView(numPoints, op1); // (F1,P[,D]) ViewType basis2Values = basis2->allocateOutputView(numPoints, op2); // (F2,P[,D]) - ViewType orientedValues1, transformedValues1; - ViewType orientedValues2, transformedValues2, transformedWeightedValues2; + ViewType orientedValues1, transformedValues1, ultimateValues1; + ViewType orientedValues2, transformedValues2, ultimateValues2, ultimateWeightedValues2; - INTREPID2_TEST_FOR_EXCEPTION(basis1Values.rank() != basis2Values.rank(), std::invalid_argument, "basis1 and basis2 must agree on their rank under the respective operators"); + int ultimateBasis1Rank, ultimateBasis2Rank; + if (basis1Values.rank() == 2) + { + // the un-transformed values have shape (F,P): scalar values + // if vector weights supplied, these will increase the rank + ultimateBasis1Rank = (vectorWeight1 == Teuchos::null) ? 3 : 4; // (C,F,P) or (C,F,P,D) + } + else if (basis1Values.rank() == 3) + { + // the un-transformed values have shape (F,P,D): vector values + // if vector weights supplied, these will decrease the rank (we interpret as a dot product) + ultimateBasis1Rank = (vectorWeight1 == Teuchos::null) ? 4 : 3; // (C,F,P,D) or (C,F,P) + } + if (basis2Values.rank() == 2) + { + // the un-transformed values have shape (F,P): scalar values + // if vector weights supplied, these will increase the rank + ultimateBasis2Rank = (vectorWeight2 == Teuchos::null) ? 3 : 4; // (C,F,P) or (C,F,P,D) + } + else if (basis2Values.rank() == 3) + { + // the un-transformed values have shape (F,P,D): vector values + // if vector weights supplied, these will decrease the rank (we interpret as a dot product) + ultimateBasis2Rank = (vectorWeight2 == Teuchos::null) ? 4 : 3; // (C,F,P,D) or (C,F,P) + } - const bool scalarValued = (basis1Values.rank() == 2); // (F1,P): scalar-valued - if (scalarValued) + INTREPID2_TEST_FOR_EXCEPTION(ultimateBasis1Rank != ultimateBasis2Rank, std::invalid_argument, "basis1 and basis2 must agree on their rank under the respective operators"); + + if (basis1Values.rank() == 2) { orientedValues1 = ViewType("oriented values 1", worksetSize, numFields1, numPoints); - orientedValues2 = ViewType("oriented values 2", worksetSize, numFields2, numPoints); - transformedValues1 = ViewType("transformed values 1", worksetSize, numFields1, numPoints); + } + else + { + orientedValues1 = ViewType("oriented values 1", worksetSize, numFields1, numPoints, spaceDim); + transformedValues1 = ViewType("transformed values 1", worksetSize, numFields1, numPoints, spaceDim); + } + if (basis2Values.rank() == 2) + { + orientedValues2 = ViewType("oriented values 2", worksetSize, numFields2, numPoints); transformedValues2 = ViewType("transformed values 2", worksetSize, numFields2, numPoints); + } + else + { + orientedValues2 = ViewType("oriented values 2", worksetSize, numFields2, numPoints, spaceDim); + transformedValues2 = ViewType("transformed values 2", worksetSize, numFields2, numPoints, spaceDim); + } + + const bool scalarValued = (ultimateBasis1Rank == 3); // (C,F1,P): scalar-valued + if (scalarValued) + { + ultimateValues1 = ViewType("ultimate values 1", worksetSize, numFields1, numPoints); + ultimateValues2 = ViewType("ultimate values 2", worksetSize, numFields2, numPoints); - transformedWeightedValues2 = ViewType("transformed weighted values 2", worksetSize, numFields2, numPoints); + ultimateWeightedValues2 = ViewType("ultimate weighted values 2", worksetSize, numFields2, numPoints); } else // (F1, P, D) { - const int finalDim = basis1Values.extent_int(2); - orientedValues1 = ViewType("oriented values 1", worksetSize, numFields1, numPoints, finalDim); - orientedValues2 = ViewType("oriented values 2", worksetSize, numFields2, numPoints, finalDim); - - transformedValues1 = ViewType("transformed values 1", worksetSize, numFields1, numPoints, finalDim); - transformedValues2 = ViewType("transformed values 2", worksetSize, numFields2, numPoints, finalDim); + ultimateValues1 = ViewType("ultimate values 1", worksetSize, numFields1, numPoints, spaceDim); + ultimateValues2 = ViewType("ultimate values 2", worksetSize, numFields2, numPoints, spaceDim); - transformedWeightedValues2 = ViewType("transformed weighted values 2", worksetSize, numFields2, numPoints, finalDim); + ultimateWeightedValues2 = ViewType("ultimate weighted values 2", worksetSize, numFields2, numPoints, spaceDim); } basis1->getValues(basis1Values, cubaturePoints, op1 ); @@ -218,6 +258,10 @@ Intrepid2::ScalarView performStandardAssembly(Intrepid2::Cell ViewType jacobianDeterminant("jacobian determinant", worksetSize, numPoints); ViewType jacobian("jacobian", worksetSize, numPoints, spaceDim, spaceDim); ViewType jacobianInverse("jacobian inverse", worksetSize, numPoints, spaceDim, spaceDim); + + // Views used for vector-weighted case: + ViewType scalarTransformedValues1 ("scalar transformed values 1", worksetSize, numFields1, numPoints); + ViewType scalarTransformedWeightedValues2("scalar transformed weighted values 2", worksetSize, numFields2, numPoints); initialSetupTimer->stop(); @@ -243,23 +287,45 @@ Intrepid2::ScalarView performStandardAssembly(Intrepid2::Cell Kokkos::resize(jacobianInverse, numCellsInWorkset, numPoints, spaceDim, spaceDim); Kokkos::resize(jacobianDeterminant, numCellsInWorkset, numPoints); Kokkos::resize(cellMeasures, numCellsInWorkset, numPoints); + Kokkos::resize(jacobianDeterminant, numCellsInWorkset, numPoints); - if (scalarValued) + Kokkos::resize(scalarTransformedValues1, numCellsInWorkset, numFields1, numPoints); + Kokkos::resize(scalarTransformedWeightedValues2, numCellsInWorkset, numFields2, numPoints); + + if (basis1Values.rank() == 2) + { + Kokkos::resize(orientedValues1, numCellsInWorkset, numFields1, numPoints); + Kokkos::resize(transformedValues1, numCellsInWorkset, numFields1, numPoints); + } + else + { + Kokkos::resize(orientedValues1, numCellsInWorkset, numFields1, numPoints, spaceDim); + Kokkos::resize(transformedValues1, numCellsInWorkset, numFields1, numPoints, spaceDim); + } + if (basis2Values.rank() == 2) { - Kokkos::resize(orientedValues1, numCellsInWorkset, numFields1, numPoints); - Kokkos::resize(orientedValues2, numCellsInWorkset, numFields2, numPoints); - Kokkos::resize(transformedValues1, numCellsInWorkset, numFields1, numPoints); - Kokkos::resize(transformedValues2, numCellsInWorkset, numFields2, numPoints); - Kokkos::resize(transformedWeightedValues2, numCellsInWorkset, numFields2, numPoints); + Kokkos::resize(orientedValues2, numCellsInWorkset, numFields2, numPoints); + Kokkos::resize(transformedValues2, numCellsInWorkset, numFields2, numPoints); } else { - const int finalDim = basis1Values.extent_int(2); - Kokkos::resize(orientedValues1, numCellsInWorkset, numFields1, numPoints, finalDim); - Kokkos::resize(orientedValues2, numCellsInWorkset, numFields2, numPoints, finalDim); - Kokkos::resize(transformedValues1, numCellsInWorkset, numFields1, numPoints, finalDim); - Kokkos::resize(transformedValues2, numCellsInWorkset, numFields2, numPoints, finalDim); - Kokkos::resize(transformedWeightedValues2, numCellsInWorkset, numFields2, numPoints, finalDim); + Kokkos::resize(orientedValues2, numCellsInWorkset, numFields2, numPoints, spaceDim); + Kokkos::resize(transformedValues2, numCellsInWorkset, numFields2, numPoints, spaceDim); + } + + if (scalarValued) + { + Kokkos::resize(ultimateValues1, numCellsInWorkset, numFields1, numPoints); + Kokkos::resize(ultimateValues2, numCellsInWorkset, numFields2, numPoints); + + Kokkos::resize(ultimateWeightedValues2, numCellsInWorkset, numFields2, numPoints); + } + else // (F1, P, D) + { + ultimateValues1 = ViewType("ultimate values 1", worksetSize, numFields1, numPoints, spaceDim); + ultimateValues2 = ViewType("ultimate values 2", worksetSize, numFields2, numPoints, spaceDim); + + ultimateWeightedValues2 = ViewType("ultimate weighted values 2", worksetSize, numFields2, numPoints, spaceDim); } } jacobianAndCellMeasureTimer->start(); @@ -271,20 +337,94 @@ Intrepid2::ScalarView performStandardAssembly(Intrepid2::Cell ExecutionSpace().fence(); jacobianAndCellMeasureTimer->stop(); - // because structured integration performs transformations within integrate(), to get a fairer comparison here we include the transformation calls. - fstIntegrateCall->start(); OrientationTools::modifyBasisByOrientation(orientedValues1, basis1Values, orientationsWorkset, basis1.get()); OrientationTools::modifyBasisByOrientation(orientedValues2, basis2Values, orientationsWorkset, basis2.get()); + + // because structured integration performs transformations within integrate(), to get a fairer comparison here we include the transformation calls. + fstIntegrateCall->start(); transform(transformedValues1, orientedValues1, fs1, op1, jacobian, jacobianDeterminant, jacobianInverse); transform(transformedValues2, orientedValues2, fs2, op2, jacobian, jacobianDeterminant, jacobianInverse); - - transformIntegrateFlopCount += double(numCellsInWorkset) * double(numFields1+numFields2) * double(numPoints) * double(spaceDim) * (spaceDim - 1) * 2.0; // 2: one multiply, one add per (P,D) entry in the contraction. - FunctionSpaceTools::multiplyMeasure(transformedWeightedValues2, cellMeasures, transformedValues2); - transformIntegrateFlopCount += double(numCellsInWorkset) * double(numFields1+numFields2) * double(numPoints) * double(spaceDim); // multiply each entry of transformedGradValues: one flop for each. auto cellStiffnessSubview = Kokkos::subview(cellStiffness, cellRange, Kokkos::ALL(), Kokkos::ALL()); - FunctionSpaceTools::integrate(cellStiffnessSubview, transformedValues1, transformedWeightedValues2); + if (vectorWeight1 != Teuchos::null) + { + auto uWeight = *vectorWeight1; + + auto policy3 = Kokkos::MDRangePolicy>({0,0,0},{numCellsInWorkset,numFields1,numPoints}); + if (transformedValues1.rank() == 4) + { + Kokkos::parallel_for("compute ultimateValues1", policy3, + KOKKOS_LAMBDA (const int &cellOrdinal, const int &fieldOrdinal, const int &pointOrdinal) + { + Scalar u_result = 0; + for (int d=0; d>({0,0,0},{numCellsInWorkset,numFields2,numPoints}); + if (transformedValues2.rank() == 4) + { + Kokkos::parallel_for("compute ultimateValues2", policy3, + KOKKOS_LAMBDA (const int &cellOrdinal, const int &fieldOrdinal, const int &pointOrdinal) + { + Scalar v_result = 0; + for (int d=0; dstop(); @@ -297,4 +437,18 @@ Intrepid2::ScalarView performStandardAssembly(Intrepid2::Cell return cellStiffness; } +//! General assembly for two arbitrary bases and ops that uses the classic, generic Intrepid2 paths. +template +Intrepid2::ScalarView performStandardAssembly(Intrepid2::CellGeometry &geometry, int worksetSize, + const int &polyOrder1, const Intrepid2::EFunctionSpace &fs1, const Intrepid2::EOperator &op1, + const int &polyOrder2, const Intrepid2::EFunctionSpace &fs2, const Intrepid2::EOperator &op2, + double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount) +{ + Teuchos::RCP< Kokkos::Array > nullVectorWeight = Teuchos::null; + + return performStandardAssembly(geometry, worksetSize, + polyOrder1, fs1, op1, nullVectorWeight, + polyOrder2, fs2, op2, nullVectorWeight, + transformIntegrateFlopCount, jacobianCellMeasureFlopCount); +} #endif /* StandardAssembly_hpp */ diff --git a/packages/intrepid2/assembly-examples/StructuredAssembly.hpp b/packages/intrepid2/assembly-examples/StructuredAssembly.hpp index 24c87de7e90e..98a31da9c041 100644 --- a/packages/intrepid2/assembly-examples/StructuredAssembly.hpp +++ b/packages/intrepid2/assembly-examples/StructuredAssembly.hpp @@ -102,10 +102,10 @@ namespace { } //! General assembly for two arbitrary bases and ops that takes advantage of the new structured integration support, including support for sum factorization. -template +template // spaceDim and spaceDim2 should agree in value (differ in type) Intrepid2::ScalarView performStructuredAssembly(Intrepid2::CellGeometry &geometry, const int &worksetSize, - const int &polyOrder1, const Intrepid2::EFunctionSpace &fs1, const Intrepid2::EOperator &op1, - const int &polyOrder2, const Intrepid2::EFunctionSpace &fs2, const Intrepid2::EOperator &op2, + const int &polyOrder1, const Intrepid2::EFunctionSpace &fs1, const Intrepid2::EOperator &op1, Teuchos::RCP< Kokkos::Array > vectorWeight1, + const int &polyOrder2, const Intrepid2::EFunctionSpace &fs2, const Intrepid2::EOperator &op2, Teuchos::RCP< Kokkos::Array > vectorWeight2, double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount) { using namespace Intrepid2; @@ -151,7 +151,7 @@ Intrepid2::ScalarView performStructuredAssembly(Intrepid2::Ce BasisValues basis2Values = basis2->allocateBasisValues(tensorCubaturePoints, op2); basis2->getValues(basis2Values, tensorCubaturePoints, op2); - + int cellOffset = 0; auto jacobianAndCellMeasureTimer = Teuchos::TimeMonitor::getNewTimer("Jacobians"); @@ -169,18 +169,19 @@ Intrepid2::ScalarView performStructuredAssembly(Intrepid2::Ce auto transformedBasis2ValuesTemp = transform(basis2Values, fs2, op2, jacobian, jacobianDet, jacobianInv, jacobianDetInv, jacobianDividedByJacobianDet); auto integralData = IntegrationTools::allocateIntegralData(transformedBasis1ValuesTemp, cellMeasures, transformedBasis2ValuesTemp); - const int numPoints = jacobian.getDataExtent(1); // data extent will be 1 for affine, numPoints for other cases + const int numJacobianDataPoints = jacobian.getDataExtent(1); // data extent will be 1 for affine, numPoints for other cases + const int numPoints = jacobian.extent_int(1); // number of logical points // TODO: make the below determination accurate for diagonal/block-diagonal cases… (right now, will overcount) - const double flopsPerJacobianPerCell = flopsPerJacobian(spaceDim, numPoints, numVertices); - const double flopsPerJacobianDetPerCell = flopsPerJacobianDet(spaceDim, numPoints); - const double flopsPerJacobianInvPerCell = flopsPerJacobianInverse(spaceDim, numPoints); + const double flopsPerJacobianPerCell = flopsPerJacobian(spaceDim, numJacobianDataPoints, numVertices); + const double flopsPerJacobianDetPerCell = flopsPerJacobianDet(spaceDim, numJacobianDataPoints); + const double flopsPerJacobianInvPerCell = flopsPerJacobianInverse(spaceDim, numJacobianDataPoints); transformIntegrateFlopCount = 0; jacobianCellMeasureFlopCount = numCells * flopsPerJacobianPerCell; // jacobian itself jacobianCellMeasureFlopCount += numCells * flopsPerJacobianInvPerCell; // inverse jacobianCellMeasureFlopCount += numCells * flopsPerJacobianDetPerCell; // determinant - jacobianCellMeasureFlopCount += numCells * numPoints; // cell measure: (C,P) gets weighted with cubature weights of shape (P) + jacobianCellMeasureFlopCount += numCells * numJacobianDataPoints; // cell measure: (C,P) gets weighted with cubature weights of shape (P) auto refData = geometry.getJacobianRefData(tensorCubaturePoints); @@ -217,6 +218,49 @@ Intrepid2::ScalarView performStructuredAssembly(Intrepid2::Ce auto transformedBasis1Values = transform(basis1Values, fs1, op1, jacobian, jacobianDet, jacobianInv, jacobianDetInv, jacobianDividedByJacobianDet); auto transformedBasis2Values = transform(basis2Values, fs2, op2, jacobian, jacobianDet, jacobianInv, jacobianDetInv, jacobianDividedByJacobianDet); + if (vectorWeight1 != Teuchos::null) + { + ScalarView auView("a_u", spaceDim); + auto auViewHost = Kokkos::create_mirror(auView); + for (int d=0; d extents {numCellsInWorkset,numPoints,spaceDim}; + Kokkos::Array variationTypes {CONSTANT, CONSTANT, GENERAL}; + + Data au_data(auView, extents, variationTypes); + auto uTransform = Data::allocateMatVecResult(transformedBasis1Values.transform(), au_data, true); + uTransform.storeMatVec(transformedBasis1Values.transform(), au_data, true); // true: transpose basis transform when multiplying + transformedBasis1Values = Intrepid2::TransformedBasisValues(uTransform, basis1Values); + + // TODO: modify transformIntegrateFlopCount to include an estimate for above mat-vecs (but note that these will not be a dominant cost, especially at high order). + } + + if (vectorWeight2 != Teuchos::null) + { + ScalarView avView("a_v", spaceDim); + auto avViewHost = Kokkos::create_mirror(avView); + + for (int d=0; d extents {numCellsInWorkset,numPoints,spaceDim}; + Kokkos::Array variationTypes {CONSTANT, CONSTANT, GENERAL}; + + Data av_data(avView, extents, variationTypes); + auto vTransform = Data::allocateMatVecResult(transformedBasis2Values.transform(), av_data, true); + vTransform.storeMatVec(transformedBasis2Values.transform(), av_data, true); // true: transpose basis transform when multiplying + transformedBasis2Values = Intrepid2::TransformedBasisValues(vTransform, basis2Values); + + // TODO: modify transformIntegrateFlopCount to include an estimate for above mat-vecs (but note that these will not be a dominant cost, especially at high order). + } + geometry.computeCellMeasure(cellMeasures, jacobianDet, tensorCubatureWeights); ExecutionSpace().fence(); jacobianAndCellMeasureTimer->stop(); @@ -243,6 +287,22 @@ Intrepid2::ScalarView performStructuredAssembly(Intrepid2::Ce cellOffset += worksetSize; } return cellStiffness; + +} + +//! General assembly for two arbitrary bases and ops that takes advantage of the new structured integration support, including support for sum factorization. +template +Intrepid2::ScalarView performStructuredAssembly(Intrepid2::CellGeometry &geometry, const int &worksetSize, + const int &polyOrder1, const Intrepid2::EFunctionSpace &fs1, const Intrepid2::EOperator &op1, + const int &polyOrder2, const Intrepid2::EFunctionSpace &fs2, const Intrepid2::EOperator &op2, + double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount) +{ + Teuchos::RCP< Kokkos::Array > nullVectorWeight = Teuchos::null; + + return performStructuredAssembly(geometry, worksetSize, + polyOrder1, fs1, op1, nullVectorWeight, + polyOrder2, fs2, op2, nullVectorWeight, + transformIntegrateFlopCount, jacobianCellMeasureFlopCount); } #endif /* StructuredAssembly_h */ diff --git a/packages/intrepid2/assembly-examples/VectorWeightedGRADGRADStandardAssembly.hpp b/packages/intrepid2/assembly-examples/VectorWeightedGRADGRADStandardAssembly.hpp new file mode 100644 index 000000000000..dc540e7e65a3 --- /dev/null +++ b/packages/intrepid2/assembly-examples/VectorWeightedGRADGRADStandardAssembly.hpp @@ -0,0 +1,205 @@ +// +// VectorWeightedGRADGRADStandardAssembly.hpp +// Trilinos +// +// Created by Roberts, Nathan V on 5/13/24. +// + +#ifndef Intrepid2_VectorWeightedGRADGRADStandardAssembly_hpp +#define Intrepid2_VectorWeightedGRADGRADStandardAssembly_hpp + +#include "JacobianFlopEstimate.hpp" +#include "Intrepid2_OrientationTools.hpp" + +/** \file VectorWeightedGRADGRADStandardAssembly.hpp + \brief Locally assembles a vector-weighted Poisson matrix -- an array of shape (C,F,F), with formulation (a dot grad e_i, b dot grad e_j), using standard Intrepid2 methods; these do not algorithmically exploit geometric structure. + */ + +//! Version that uses the classic, generic Intrepid2 paths. +template +Intrepid2::ScalarView performStandardQuadratureVectorWeightedGRADGRAD(Intrepid2::CellGeometry &geometry, + const int &polyOrder, int worksetSize, + Teuchos::RCP> vectorWeight1, + Teuchos::RCP> vectorWeight2, + double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount) +{ + INTREPID2_TEST_FOR_EXCEPTION(vectorWeight1 == Teuchos::null, std::invalid_argument, "vectorWeight1 cannot be null"); + INTREPID2_TEST_FOR_EXCEPTION(vectorWeight2 == Teuchos::null, std::invalid_argument, "vectorWeight2 cannot be null"); + + using ExecutionSpace = typename DeviceType::execution_space; + int numVertices = 1; + for (int d=0; dstart(); + + using CellTools = Intrepid2::CellTools; + using FunctionSpaceTools = Intrepid2::FunctionSpaceTools; + + using namespace Intrepid2; + + using namespace std; + // dimensions of the returned view are (C,F,F) + auto fs = FUNCTION_SPACE_HGRAD; + + Intrepid2::ScalarView orientations("orientations", geometry.numCells() ); + geometry.orientations(orientations, 0, -1); + + shards::CellTopology cellTopo = geometry.cellTopology(); + + auto basis = getBasis< BasisFamily >(cellTopo, fs, polyOrder); + + int numFields = basis->getCardinality(); + int numCells = geometry.numCells(); + + if (worksetSize > numCells) worksetSize = numCells; + + // local stiffness matrices: + ScalarView cellStiffness("cell stiffness matrices",numCells,numFields,numFields); + + auto cubature = DefaultCubatureFactory::create(cellTopo,polyOrder*2); + int numPoints = cubature->getNumPoints(); + ScalarView cubaturePoints("cubature points",numPoints,spaceDim); + ScalarView cubatureWeights("cubature weights", numPoints); + + cubature->getCubature(cubaturePoints, cubatureWeights); + + const double flopsPerJacobianPerCell = flopsPerJacobian(spaceDim, numPoints, numVertices); + const double flopsPerJacobianDetPerCell = flopsPerJacobianDet(spaceDim, numPoints); + const double flopsPerJacobianInvPerCell = flopsPerJacobianInverse(spaceDim, numPoints); + + // Allocate some intermediate containers + ScalarView basisValues ("basis values", numFields, numPoints ); + ScalarView basisGradValues("basis grad values", numFields, numPoints, spaceDim); + + ScalarView unorientedTransformedGradValues("unoriented transformed grad values", worksetSize, numFields, numPoints, spaceDim); + ScalarView transformedGradValues("transformed grad values", worksetSize, numFields, numPoints, spaceDim); + ScalarView transformedWeightedGradValues("transformed weighted grad values", worksetSize, numFields, numPoints, spaceDim); + ScalarView vectorWeightedTransformedGradValues("vector-weighted transformed grad values", worksetSize, numFields, numPoints); + ScalarView vectorWeightedTransformedWeightedGradValues("vector-weighted transformed weighted grad values", worksetSize, numFields, numPoints); + + basis->getValues(basisValues, cubaturePoints, OPERATOR_VALUE ); + basis->getValues(basisGradValues, cubaturePoints, OPERATOR_GRAD ); + + const int numNodesPerCell = geometry.numNodesPerCell(); + ScalarView expandedCellNodes("expanded cell nodes",numCells,numNodesPerCell,spaceDim); + Kokkos::parallel_for(Kokkos::RangePolicy(0,numCells), + KOKKOS_LAMBDA (const int &cellOrdinal) { + for (int nodeOrdinal=0; nodeOrdinal cellMeasures("cell measures", worksetSize, numPoints); + ScalarView jacobianDeterminant("jacobian determinant", worksetSize, numPoints); + ScalarView jacobian("jacobian", worksetSize, numPoints, spaceDim, spaceDim); + ScalarView jacobianInverse("jacobian inverse", worksetSize, numPoints, spaceDim, spaceDim); + + auto auView = getView("a_u", spaceDim); + auto auViewHost = Kokkos::create_mirror(auView); + + for (int d=0; d("a_v", spaceDim); + auto avViewHost = Kokkos::create_mirror(avView); + for (int d=0; dstop(); + + transformIntegrateFlopCount = 0; + jacobianCellMeasureFlopCount = numCells * flopsPerJacobianPerCell; // jacobian itself + jacobianCellMeasureFlopCount += numCells * flopsPerJacobianInvPerCell; // inverse + jacobianCellMeasureFlopCount += numCells * flopsPerJacobianDetPerCell; // determinant + jacobianCellMeasureFlopCount += numCells * numPoints; // cell measure: (C,P) gets weighted with cubature weights of shape (P) + + int cellOffset = 0; + while (cellOffset < numCells) + { + int startCell = cellOffset; + int numCellsInWorkset = (cellOffset + worksetSize - 1 < numCells) ? worksetSize : numCells - startCell; + + std::pair cellRange = {startCell, startCell+numCellsInWorkset}; + auto cellWorkset = Kokkos::subview(expandedCellNodes, cellRange, Kokkos::ALL(), Kokkos::ALL()); + auto orientationsWorkset = Kokkos::subview(orientations, cellRange); + + if (numCellsInWorkset != worksetSize) + { + Kokkos::resize(jacobian, numCellsInWorkset, numPoints, spaceDim, spaceDim); + Kokkos::resize(jacobianInverse, numCellsInWorkset, numPoints, spaceDim, spaceDim); + Kokkos::resize(jacobianDeterminant, numCellsInWorkset, numPoints); + Kokkos::resize(cellMeasures, numCellsInWorkset, numPoints); + Kokkos::resize(unorientedTransformedGradValues, numCellsInWorkset, numFields, numPoints, spaceDim); + Kokkos::resize(transformedGradValues, numCellsInWorkset, numFields, numPoints, spaceDim); + Kokkos::resize(transformedWeightedGradValues, numCellsInWorkset, numFields, numPoints, spaceDim); + } + jacobianAndCellMeasureTimer->start(); + CellTools::setJacobian(jacobian, cubaturePoints, cellWorkset, cellTopo); // accounted for outside loop, as numCells * flopsPerJacobianPerCell. + CellTools::setJacobianInv(jacobianInverse, jacobian); + CellTools::setJacobianDet(jacobianDeterminant, jacobian); + + FunctionSpaceTools::computeCellMeasure(cellMeasures, jacobianDeterminant, cubatureWeights); + ExecutionSpace().fence(); + jacobianAndCellMeasureTimer->stop(); + + // because structured integration performs transformations within integrate(), to get a fairer comparison here we include the transformation calls. + fstIntegrateCall->start(); + FunctionSpaceTools::HGRADtransformGRAD(unorientedTransformedGradValues, jacobianInverse, basisGradValues); + // we want to exclude orientation application in the core integration timing -- this time gets reported as "Other" + fstIntegrateCall->stop(); + OrientationTools::modifyBasisByOrientation(transformedGradValues, unorientedTransformedGradValues, + orientationsWorkset, basis.get()); + fstIntegrateCall->start(); + + transformIntegrateFlopCount += double(numCellsInWorkset) * double(numFields) * double(numPoints) * double(spaceDim) * (spaceDim - 1) * 2.0; // 2: one multiply, one add per (P,D) entry in the contraction. + FunctionSpaceTools::multiplyMeasure(transformedWeightedGradValues, cellMeasures, transformedGradValues); + transformIntegrateFlopCount += double(numCellsInWorkset) * double(numFields) * double(numPoints) * double(spaceDim); // multiply each entry of transformedGradValues: one flop for each. + + auto policy3 = Kokkos::MDRangePolicy>({0,0,0},{numCellsInWorkset,numFields,numPoints}); + Kokkos::parallel_for("compute expanded_{u,v}TransformedGradValues", policy3, + KOKKOS_LAMBDA (const int &cellOrdinal, const int &fieldOrdinal, const int &pointOrdinal) + { + Scalar u_result = 0; + Scalar v_result_weighted = 0; + for (int d=0; dstop(); + + transformIntegrateFlopCount += double(numCellsInWorkset) * double(numFields) * double(numFields) * double(numPoints * 2); // 2: one multiply, one add per P entry in the contraction. + + cellOffset += worksetSize; + } +// std::cout << "standard integration, approximateFlopCount: " << approximateFlopCount << std::endl; + return cellStiffness; +} + +#endif /* VectorWeightedGRADGRADStandardAssembly_h */ diff --git a/packages/intrepid2/assembly-examples/VectorWeightedGRADGRADStructuredAssembly.hpp b/packages/intrepid2/assembly-examples/VectorWeightedGRADGRADStructuredAssembly.hpp new file mode 100644 index 000000000000..a1d640607720 --- /dev/null +++ b/packages/intrepid2/assembly-examples/VectorWeightedGRADGRADStructuredAssembly.hpp @@ -0,0 +1,187 @@ +// +// VectorWeightedGRADGRADStructuredAssembly.hpp +// Trilinos +// +// Created by Roberts, Nathan V on 5/13/24. +// + +#ifndef VectorWeightedGRADGRADStructuredAssembly_h +#define VectorWeightedGRADGRADStructuredAssembly_h + +#include "JacobianFlopEstimate.hpp" +#include "Intrepid2_OrientationTools.hpp" + +/** \file VectorWeightedGRADGRADStructuredAssembly.hpp + \brief Locally assembles a vector-weighted Poisson matrix -- an array of shape (C,F,F), with formulation (a dot grad e_i, b dot grad e_j), using "structured" Intrepid2 methods; these algorithmically exploit geometric structure as expressed in the provided CellGeometry. + */ + +//! Version that takes advantage of new structured integration support, including sum factorization. +template +Intrepid2::ScalarView performStructuredQuadratureVectorWeightedGRADGRAD(Intrepid2::CellGeometry &geometry, const int &polyOrder, const int &worksetSize, + Teuchos::RCP> vectorWeight1, + Teuchos::RCP> vectorWeight2, + double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount) +{ + using namespace Intrepid2; + + using ExecutionSpace = typename DeviceType::execution_space; + + int numVertices = 1; + for (int d=0; dstart(); + using namespace std; + using FunctionSpaceTools = FunctionSpaceTools; + using IntegrationTools = IntegrationTools; + // dimensions of the returned view are (C,F,F) + auto fs = FUNCTION_SPACE_HGRAD; + + Intrepid2::ScalarView orientations("orientations", geometry.numCells() ); + geometry.orientations(orientations, 0, -1); + + shards::CellTopology cellTopo = geometry.cellTopology(); + + auto basis = getBasis< BasisFamily >(cellTopo, fs, polyOrder); + + int numFields = basis->getCardinality(); + int numCells = geometry.numCells(); + + // local stiffness matrix: + ScalarView cellStiffness("cell stiffness matrices",numCells,numFields,numFields); + ScalarView worksetCellStiffness("cell stiffness workset matrices",worksetSize,numFields,numFields); + + auto cubature = DefaultCubatureFactory::create(cellTopo,polyOrder*2); + auto tensorCubatureWeights = cubature->allocateCubatureWeights(); + TensorPoints tensorCubaturePoints = cubature->allocateCubaturePoints(); + + cubature->getCubature(tensorCubaturePoints, tensorCubatureWeights); + + EOperator op = OPERATOR_GRAD; + BasisValues gradientValues = basis->allocateBasisValues(tensorCubaturePoints, op); + basis->getValues(gradientValues, tensorCubaturePoints, op); + + // goal here is to do a weighted Poisson; i.e. (f grad u, grad v) on each cell + + int cellOffset = 0; + + auto jacobianAndCellMeasureTimer = Teuchos::TimeMonitor::getNewTimer("Jacobians"); + auto fstIntegrateCall = Teuchos::TimeMonitor::getNewTimer("transform + integrate()"); + + Data jacobian = geometry.allocateJacobianData(tensorCubaturePoints, 0, worksetSize); + Data jacobianDet = CellTools::allocateJacobianDet(jacobian); + Data jacobianInv = CellTools::allocateJacobianInv(jacobian); + TensorData cellMeasures = geometry.allocateCellMeasure(jacobianDet, tensorCubatureWeights); + + // lazily-evaluated transformed gradient values (temporary to allow integralData allocation) + auto transformedGradientValuesTemp = FunctionSpaceTools::getHGRADtransformGRAD(jacobianInv, gradientValues); + auto integralData = IntegrationTools::allocateIntegralData(transformedGradientValuesTemp, cellMeasures, transformedGradientValuesTemp); + + const int numJacobianDataPoints = jacobian.getDataExtent(1); // data extent will be 1 for affine, numPoints for other cases + const int numPoints = jacobian.extent_int(1); // logical point count + + // TODO: make the below determination accurate for diagonal/block-diagonal cases… (right now, will overcount) + const double flopsPerJacobianPerCell = flopsPerJacobian(spaceDim, numJacobianDataPoints, numVertices); + const double flopsPerJacobianDetPerCell = flopsPerJacobianDet(spaceDim, numJacobianDataPoints); + const double flopsPerJacobianInvPerCell = flopsPerJacobianInverse(spaceDim, numJacobianDataPoints); + + transformIntegrateFlopCount = 0; + jacobianCellMeasureFlopCount = numCells * flopsPerJacobianPerCell; // jacobian itself + jacobianCellMeasureFlopCount += numCells * flopsPerJacobianInvPerCell; // inverse + jacobianCellMeasureFlopCount += numCells * flopsPerJacobianDetPerCell; // determinant + jacobianCellMeasureFlopCount += numCells * numJacobianDataPoints; // cell measure: (C,P) gets weighted with cubature weights of shape (P) + + auto refData = geometry.getJacobianRefData(tensorCubaturePoints); + + ScalarView auView("a_u", spaceDim); + auto auViewHost = Kokkos::create_mirror(auView); + + for (int d=0; d avView("a_v", spaceDim); + auto avViewHost = Kokkos::create_mirror(avView); + + for (int d=0; d au_data(auView, Kokkos::Array{worksetSize,numPoints,spaceDim}, Kokkos::Array{CONSTANT,CONSTANT,GENERAL}); + Data av_data(avView, Kokkos::Array{worksetSize,numPoints,spaceDim}, Kokkos::Array{CONSTANT,CONSTANT,GENERAL}); + + auto uTransform = Data::allocateMatVecResult(jacobianInv, au_data, true); + auto vTransform = Data::allocateMatVecResult(jacobianInv, av_data, true); + + initialSetupTimer->stop(); + while (cellOffset < numCells) + { + int startCell = cellOffset; + int numCellsInWorkset = (cellOffset + worksetSize - 1 < numCells) ? worksetSize : numCells - startCell; + int endCell = numCellsInWorkset + startCell; + + jacobianAndCellMeasureTimer->start(); + if (numCellsInWorkset != worksetSize) + { + const int CELL_DIM = 0; // first dimension corresponds to cell + jacobian.setExtent (CELL_DIM, numCellsInWorkset); + jacobianDet.setExtent (CELL_DIM, numCellsInWorkset); + jacobianInv.setExtent (CELL_DIM, numCellsInWorkset); + integralData.setExtent(CELL_DIM, numCellsInWorkset); + au_data.setExtent (CELL_DIM, numCellsInWorkset); + av_data.setExtent (CELL_DIM, numCellsInWorkset); + uTransform.setExtent (CELL_DIM, numCellsInWorkset); + vTransform.setExtent (CELL_DIM, numCellsInWorkset); + + Kokkos::resize(worksetCellStiffness, numCellsInWorkset, numFields, numFields); + + // cellMeasures is a TensorData object with separateFirstComponent_ = true; the below sets the cell dimension… + cellMeasures.setFirstComponentExtentInDimension0(numCellsInWorkset); + } + + geometry.setJacobian(jacobian, tensorCubaturePoints, refData, startCell, endCell); + CellTools::setJacobianDet(jacobianDet, jacobian); + CellTools::setJacobianInv(jacobianInv, jacobian); + + // lazily-evaluated transformed gradient values: + geometry.computeCellMeasure(cellMeasures, jacobianDet, tensorCubatureWeights); + ExecutionSpace().fence(); + jacobianAndCellMeasureTimer->stop(); + + uTransform.storeMatVec(jacobianInv, au_data, true); // true: transpose jacobianInv when multiplying + vTransform.storeMatVec(jacobianInv, av_data, true); // true: transpose jacobianInv when multiplying + + Intrepid2::TransformedBasisValues uTransformedGradientValues(uTransform, gradientValues); + Intrepid2::TransformedBasisValues vTransformedGradientValues(vTransform, gradientValues); + + bool sumInto = false; + double approximateFlopCountIntegrateWorkset = 0; + fstIntegrateCall->start(); + IntegrationTools::integrate(integralData, uTransformedGradientValues, cellMeasures, vTransformedGradientValues, sumInto, &approximateFlopCountIntegrateWorkset); + ExecutionSpace().fence(); + fstIntegrateCall->stop(); + + // modify integrals by orientations + std::pair cellRange = {startCell, endCell}; + auto orientationsWorkset = Kokkos::subview(orientations, cellRange); + OrientationTools::modifyMatrixByOrientation(worksetCellStiffness, integralData.getUnderlyingView(), + orientationsWorkset, basis.get(), basis.get()); + + // copy into cellStiffness container. + auto cellStiffnessSubview = Kokkos::subview(cellStiffness, cellRange, Kokkos::ALL(), Kokkos::ALL()); + Kokkos::deep_copy(cellStiffnessSubview, worksetCellStiffness); + + transformIntegrateFlopCount += approximateFlopCountIntegrateWorkset; + + cellOffset += worksetSize; + } + return cellStiffness; +} + +#endif /* VectorWeightedGRADGRADStructuredAssembly_h */ diff --git a/packages/intrepid2/src/Cell/Intrepid2_CellData.hpp b/packages/intrepid2/src/Cell/Intrepid2_CellData.hpp index 863bb0b18402..a4bcad3b089a 100644 --- a/packages/intrepid2/src/Cell/Intrepid2_CellData.hpp +++ b/packages/intrepid2/src/Cell/Intrepid2_CellData.hpp @@ -337,10 +337,10 @@ template */ template<> struct PointInclusion::key> { - template + template KOKKOS_INLINE_FUNCTION static bool - check(const PointViewType &point, const double threshold); + check(const PointViewType &point, const ScalarType threshold); }; /** @@ -348,10 +348,10 @@ template */ template<> struct PointInclusion::key> { - template + template KOKKOS_INLINE_FUNCTION static bool - check(const PointViewType &point, const double threshold); + check(const PointViewType &point, const ScalarType threshold); }; /** @@ -360,10 +360,10 @@ template template<> struct PointInclusion::key> { - template + template KOKKOS_INLINE_FUNCTION static bool - check(const PointViewType &point, const double threshold); + check(const PointViewType &point, const ScalarType threshold); }; /** @@ -371,10 +371,10 @@ template */ template<> struct PointInclusion::key> { - template + template KOKKOS_INLINE_FUNCTION static bool - check(const PointViewType &point, const double threshold); + check(const PointViewType &point, const ScalarType threshold); }; /** @@ -382,10 +382,10 @@ template */ template<> struct PointInclusion::key> { - template + template KOKKOS_INLINE_FUNCTION static bool - check(const PointViewType &point, const double threshold); + check(const PointViewType &point, const ScalarType threshold); }; /** @@ -393,10 +393,10 @@ template */ template<> struct PointInclusion::key> { - template + template KOKKOS_INLINE_FUNCTION static bool - check(const PointViewType &point, const double threshold); + check(const PointViewType &point, const ScalarType threshold); }; /** @@ -404,10 +404,10 @@ template */ template<> struct PointInclusion::key> { - template + template KOKKOS_INLINE_FUNCTION static bool - check(const PointViewType &point, const double threshold); + check(const PointViewType &point, const ScalarType threshold); }; } diff --git a/packages/intrepid2/src/Cell/Intrepid2_CellDataDef.hpp b/packages/intrepid2/src/Cell/Intrepid2_CellDataDef.hpp index 1c7969c51655..6d9070dfda32 100644 --- a/packages/intrepid2/src/Cell/Intrepid2_CellDataDef.hpp +++ b/packages/intrepid2/src/Cell/Intrepid2_CellDataDef.hpp @@ -826,76 +826,76 @@ refCenterDataStatic_ = { // Point Inclusion - template + template KOKKOS_INLINE_FUNCTION bool PointInclusion::key>:: - check(const PointViewType &point, const double threshold) { - const double minus_one = -1.0 - threshold, plus_one = 1.0 + threshold; + check(const PointViewType &point, const ScalarType threshold) { + const ScalarType minus_one = -1.0 - threshold, plus_one = 1.0 + threshold; return (minus_one <= point(0) && point(0) <= plus_one); } - template + template KOKKOS_INLINE_FUNCTION bool PointInclusion::key>:: - check(const PointViewType &point, const double threshold) { - const double distance = max( max( -point(0), -point(1) ), point(0) + point(1) - 1.0 ); + check(const PointViewType &point, const ScalarType threshold) { + const ScalarType distance = max( max( -point(0), -point(1) ), point(0) + point(1) - 1.0 ); return distance < threshold; } - template + template KOKKOS_INLINE_FUNCTION bool PointInclusion::key>:: check(const PointViewType &point, - const double threshold) { - const double minus_one = -1.0 - threshold, plus_one = 1.0 + threshold; + const ScalarType threshold) { + const ScalarType minus_one = -1.0 - threshold, plus_one = 1.0 + threshold; return ((minus_one <= point(0) && point(0) <= plus_one) && (minus_one <= point(1) && point(1) <= plus_one)); } - template + template KOKKOS_INLINE_FUNCTION bool PointInclusion::key>:: - check(const PointViewType &point, const double threshold) { - const double distance = max( max(-point(0),-point(1)), + check(const PointViewType &point, const ScalarType threshold) { + const ScalarType distance = max( max(-point(0),-point(1)), max(-point(2), point(0) + point(1) + point(2) - 1) ); return distance < threshold; } - template + template KOKKOS_INLINE_FUNCTION bool PointInclusion::key>:: - check(const PointViewType &point, const double threshold) { - const double minus_one = -1.0 - threshold, plus_one = 1.0 + threshold; + check(const PointViewType &point, const ScalarType threshold) { + const ScalarType minus_one = -1.0 - threshold, plus_one = 1.0 + threshold; return ((minus_one <= point(0) && point(0) <= plus_one) && (minus_one <= point(1) && point(1) <= plus_one) && (minus_one <= point(2) && point(2) <= plus_one)); } - template + template KOKKOS_INLINE_FUNCTION bool PointInclusion::key>:: - check(const PointViewType &point, const double threshold) { - const double minus_one = -1.0 - threshold, plus_one = 1.0 + threshold, minus_zero = -threshold; - const double left = minus_one + point(2); - const double right = plus_one - point(2); + check(const PointViewType &point, const ScalarType threshold) { + const ScalarType minus_one = -1.0 - threshold, plus_one = 1.0 + threshold, minus_zero = -threshold; + const ScalarType left = minus_one + point(2); + const ScalarType right = plus_one - point(2); return ((left <= point(0) && point(0) <= right) && (left <= point(1) && point(1) <= right) && (minus_zero <= point(2) && point(2) <= plus_one)); } - template + template KOKKOS_INLINE_FUNCTION bool PointInclusion::key>:: - check(const PointViewType &point, const double threshold) { - const double minus_one = -1.0 - threshold, plus_one = 1.0 + threshold; - const double distance = max( max( -point(0), -point(1) ), point(0) + point(1) - 1 ); + check(const PointViewType &point, const ScalarType threshold) { + const ScalarType minus_one = -1.0 - threshold, plus_one = 1.0 + threshold; + const ScalarType distance = max( max( -point(0), -point(1) ), point(0) + point(1) - 1 ); return (distance < threshold && (minus_one <= point(2) && point(2) <= plus_one)); } diff --git a/packages/intrepid2/src/Cell/Intrepid2_CellTools.hpp b/packages/intrepid2/src/Cell/Intrepid2_CellTools.hpp index 8a522a544ad3..a9eb6cab7145 100644 --- a/packages/intrepid2/src/Cell/Intrepid2_CellTools.hpp +++ b/packages/intrepid2/src/Cell/Intrepid2_CellTools.hpp @@ -352,11 +352,11 @@ namespace Intrepid2 { /** \brief Computes reciprocals of determinants corresponding to the Jacobians in the Data container provided - \param jacobianDet [out] - data with shape (C,P), as returned by CellTools::allocateJacobianDet() - \param jacobian [in] - data with shape (C,P,D,D), as returned by CellGeometry::allocateJacobianData() + \param jacobianDetInv [out] - data with shape (C,P), as returned by CellTools::allocateJacobianDet() + \param jacobian [in] - data with shape (C,P,D,D), as returned by CellGeometry::allocateJacobianData() */ template - static void setJacobianDetInv( Data & jacobianDet, + static void setJacobianDetInv( Data & jacobianDetInv, const Data & jacobian); /** \brief Computes determinants corresponding to the Jacobians in the Data container provided @@ -1396,11 +1396,13 @@ namespace Intrepid2 { \param threshold [in] - "tightness" of the inclusion test \return true if the point is in the closure of the specified reference cell and false otherwise. */ - template + template static bool - checkPointInclusion( const pointViewType point, + checkPointInclusion( const PointViewType point, const shards::CellTopology cellTopo, - const double thres = threshold() ); + const typename ScalarTraits::scalar_type thres = + threshold::scalar_type>() ); + /** \brief Checks every point for inclusion in the reference cell of a given topology. @@ -1417,7 +1419,8 @@ namespace Intrepid2 { typename InputViewType> static void checkPointwiseInclusion( OutputViewType inCell, const InputViewType points, - const double thresh = threshold()); + const typename ScalarTraits::scalar_type thresh = + threshold::scalar_type>()); @@ -1434,7 +1437,8 @@ namespace Intrepid2 { static void checkPointwiseInclusion( InCellViewType inCell, const PointViewType points, const shards::CellTopology cellTopo, - const double thres = threshold() ); + const typename ScalarTraits::scalar_type thres = + threshold::scalar_type>() ); /** \brief Checks every points for inclusion in physical cells from a cell workset. The points can belong to a global set and stored in a rank-2 (P,D) view, @@ -1454,7 +1458,8 @@ namespace Intrepid2 { const Kokkos::DynRankView points, const Kokkos::DynRankView cellWorkset, const shards::CellTopology cellTopo, - const double thres = threshold() ); + const typename ScalarTraits::scalar_type thres = + threshold::scalar_type>() ); // //============================================================================================// diff --git a/packages/intrepid2/src/Cell/Intrepid2_CellToolsDefInclusion.hpp b/packages/intrepid2/src/Cell/Intrepid2_CellToolsDefInclusion.hpp index 5e1b091e3638..1d9ecfe94b63 100644 --- a/packages/intrepid2/src/Cell/Intrepid2_CellToolsDefInclusion.hpp +++ b/packages/intrepid2/src/Cell/Intrepid2_CellToolsDefInclusion.hpp @@ -34,9 +34,9 @@ namespace Intrepid2 { template bool CellTools:: - checkPointInclusion( const PointViewType point, - const shards::CellTopology cellTopo, - const double threshold) { + checkPointInclusion( const PointViewType point, + const shards::CellTopology cellTopo, + const typename ScalarTraits::scalar_type threshold) { #ifdef HAVE_INTREPID2_DEBUG INTREPID2_TEST_FOR_EXCEPTION( point.rank() != 1, std::invalid_argument, ">>> ERROR (Intrepid2::CellTools::checkPointInclusion): Point must have rank 1. "); @@ -94,12 +94,13 @@ namespace Intrepid2 { struct checkPointInclusionFunctor { OutputViewType output_; InputViewType input_; - double threshold_; + using ScalarType = typename ScalarTraits::scalar_type; + ScalarType threshold_; KOKKOS_INLINE_FUNCTION - checkPointInclusionFunctor( OutputViewType output, - const InputViewType input, - const double threshold) + checkPointInclusionFunctor( OutputViewType output, + const InputViewType input, + const ScalarType threshold) : output_(output), input_(input), threshold_(threshold) {} @@ -129,7 +130,7 @@ namespace Intrepid2 { void CellTools:: checkPointwiseInclusion( OutputViewType inCell, const InputViewType points, - const double threshold) { + const typename ScalarTraits::scalar_type threshold) { using FunctorType = checkPointInclusionFunctor; if (points.rank() == 2) { // inCell.rank() == 1 @@ -144,13 +145,13 @@ namespace Intrepid2 { template template + typename InputViewType> void CellTools:: - checkPointwiseInclusion( InCellViewType inCell, - const PointViewType points, - const shards::CellTopology cellTopo, - const double threshold ) { + checkPointwiseInclusion( InCellViewType inCell, + const InputViewType points, + const shards::CellTopology cellTopo, + const typename ScalarTraits::scalar_type threshold ) { #ifdef HAVE_INTREPID2_DEBUG { INTREPID2_TEST_FOR_EXCEPTION( (inCell.rank() != 1) && (inCell.rank() != 2), std::invalid_argument, @@ -218,7 +219,7 @@ namespace Intrepid2 { const Kokkos::DynRankView points, const Kokkos::DynRankView cellWorkset, const shards::CellTopology cellTopo, - const double threshold ) { + const typename ScalarTraits::scalar_type threshold ) { #ifdef HAVE_INTREPID2_DEBUG { const auto key = cellTopo.getBaseKey(); diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_BasisValues.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_BasisValues.hpp index 9750acf87e4d..588957c915a5 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_BasisValues.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_BasisValues.hpp @@ -31,18 +31,18 @@ namespace Intrepid2 { - template + template class BasisValues { - using TensorDataType = TensorData; - using VectorDataType = VectorData; + using TensorDataType = TensorData; + using VectorDataType = VectorData; Kokkos::Array tensorDataFamilies_; VectorDataType vectorData_; int numTensorDataFamilies_ = -1; - Kokkos::View ordinalFilter_; + Kokkos::View ordinalFilter_; public: //! Constructor for scalar-valued BasisValues with a single family of values. BasisValues(TensorDataType tensorData) @@ -76,8 +76,8 @@ namespace Intrepid2 //! copy-like constructor for differing execution spaces. This does a deep copy of underlying views. - template::value>::type> - BasisValues(const BasisValues &basisValues) + template::value>::type> + BasisValues(const BasisValues &basisValues) : vectorData_(basisValues.vectorData()), numTensorDataFamilies_(basisValues.numTensorDataFamilies()) @@ -85,16 +85,16 @@ namespace Intrepid2 auto otherFamilies = basisValues.tensorDataFamilies(); for (int family=0; family(otherFamilies[family]); + tensorDataFamilies_[family] = TensorData(otherFamilies[family]); } auto otherOrdinalFilter = basisValues.ordinalFilter(); - ordinalFilter_ = Kokkos::View("BasisValues::ordinalFilter_",otherOrdinalFilter.extent(0)); + ordinalFilter_ = Kokkos::View("BasisValues::ordinalFilter_",otherOrdinalFilter.extent(0)); Kokkos::deep_copy(ordinalFilter_, otherOrdinalFilter); } //! field start and length must align with families in vectorData_ or tensorDataFamilies_ (whichever is valid). - BasisValues basisValuesForFields(const int &fieldStartOrdinal, const int &numFields) + BasisValues basisValuesForFields(const int &fieldStartOrdinal, const int &numFields) { int familyStartOrdinal = -1, familyEndOrdinal = -1; const int familyCount = this->numFamilies(); @@ -118,12 +118,12 @@ namespace Intrepid2 { tensorDataFamilies[i-familyStartOrdinal] = tensorDataFamilies_[i]; } - return BasisValues(tensorDataFamilies); + return BasisValues(tensorDataFamilies); } else { const int componentCount = vectorData_.numComponents(); - std::vector< std::vector > > vectorComponents(numFamiliesInFieldSpan, std::vector >(componentCount)); + std::vector< std::vector > > vectorComponents(numFamiliesInFieldSpan, std::vector >(componentCount)); for (int i=familyStartOrdinal; i<=familyEndOrdinal; i++) { for (int j=0; j(vectorComponents); + return BasisValues(vectorComponents); } } @@ -327,16 +327,22 @@ namespace Intrepid2 } } - void setOrdinalFilter(Kokkos::View ordinalFilter) + void setOrdinalFilter(Kokkos::View ordinalFilter) { ordinalFilter_ = ordinalFilter; } - Kokkos::View ordinalFilter() const + Kokkos::View ordinalFilter() const { return ordinalFilter_; } }; -} + + template + KOKKOS_INLINE_FUNCTION unsigned rank(const BasisValues &basisValues) + { + return basisValues.rank(); + } +} // namespace Intrepid2 #endif /* Intrepid2_BasisValues_h */ diff --git a/packages/intrepid2/src/Discretization/Integration/Intrepid2_IntegrationToolsDef.hpp b/packages/intrepid2/src/Discretization/Integration/Intrepid2_IntegrationToolsDef.hpp index 380e820c2d71..a8e57b15d5ef 100644 --- a/packages/intrepid2/src/Discretization/Integration/Intrepid2_IntegrationToolsDef.hpp +++ b/packages/intrepid2/src/Discretization/Integration/Intrepid2_IntegrationToolsDef.hpp @@ -15,6 +15,7 @@ #ifndef __INTREPID2_INTEGRATIONTOOLS_DEF_HPP__ #define __INTREPID2_INTEGRATIONTOOLS_DEF_HPP__ +#include "Intrepid2_DataTools.hpp" #include "Intrepid2_FunctorIterator.hpp" #include "Intrepid2_TensorArgumentIterator.hpp" @@ -123,7 +124,7 @@ namespace Intrepid2 { // prepare for allocation of temporary storage // note: tempStorage goes "backward", starting from the final component, which needs just one entry - const bool allocateFadStorage = !std::is_pod::value; + const bool allocateFadStorage = !(std::is_standard_layout::value && std::is_trivial::value); if (allocateFadStorage) { fad_size_output_ = dimension_scalar(integralView_); @@ -1063,7 +1064,7 @@ namespace Intrepid2 { // prepare for allocation of temporary storage // note: tempStorage goes "backward", starting from the final component, which needs just one entry - const bool allocateFadStorage = !std::is_pod::value; + const bool allocateFadStorage = !(std::is_standard_layout::value && std::is_trivial::value); if (allocateFadStorage) { fad_size_output_ = dimension_scalar(integralView_); @@ -1210,7 +1211,6 @@ namespace Intrepid2 { const int GyEntryCount = pointBounds_z; // for each thread: store one Gy value per z coordinate Kokkos::View GxIntegrals; // for caching Gx values: we integrate out the first component dimension for each coordinate in the remaining dimensios Kokkos::View GyIntegrals; // for caching Gy values (each thread gets a stack, of the same height as tensorComponents - 1) - Kokkos::View GzIntegral; // for one Gz value that we sum into before summing into the destination matrix Kokkos::View pointWeights; // indexed by (expanded) point; stores M_ab * cell measure; shared by team Kokkos::View leftFields_x, rightFields_x; @@ -1219,7 +1219,6 @@ namespace Intrepid2 { if (fad_size_output_ > 0) { GxIntegrals = Kokkos::View(teamMember.team_shmem(), pointsInNonzeroComponentDimensions, fad_size_output_); GyIntegrals = Kokkos::View(teamMember.team_shmem(), GyEntryCount * numThreads, fad_size_output_); - GzIntegral = Kokkos::View(teamMember.team_shmem(), numThreads, fad_size_output_); pointWeights = Kokkos::View (teamMember.team_shmem(), composedTransform_.extent_int(1), fad_size_output_); leftFields_x = Kokkos::View(teamMember.team_shmem(), leftFieldBounds_x, pointBounds_x, fad_size_output_); @@ -1232,7 +1231,6 @@ namespace Intrepid2 { else { GxIntegrals = Kokkos::View(teamMember.team_shmem(), pointsInNonzeroComponentDimensions); GyIntegrals = Kokkos::View(teamMember.team_shmem(), GyEntryCount * numThreads); - GzIntegral = Kokkos::View(teamMember.team_shmem(), numThreads); pointWeights = Kokkos::View (teamMember.team_shmem(), composedTransform_.extent_int(1)); leftFields_x = Kokkos::View(teamMember.team_shmem(), leftFieldBounds_x, pointBounds_x); @@ -1376,43 +1374,67 @@ namespace Intrepid2 { const int i1 = i1j1 % leftFieldBounds_y; const int j1 = i1j1 / leftFieldBounds_y; - int Gy_index = GyEntryCount * threadNumber; // thread-relative index into GyIntegrals container; store one value per z coordinate + int Gy_index_offset = GyEntryCount * threadNumber; // thread-relative index into GyIntegrals container; store one value per z coordinate - int pointEnumerationIndex = 0; // incremented at bottom of lz loop below. for (int lz=0; lz(integralView, cellDataOrdinal, i, j) += Gz; + Kokkos::single (Kokkos::PerThread(teamMember), [&] () { + integralViewEntry(integralView, cellDataOrdinal, i, j) += Gz; + }); } } }); @@ -1766,7 +1790,6 @@ namespace Intrepid2 { { shmem_size += Kokkos::View::shmem_size(pointsInNonzeroComponentDimensions, fad_size_output_); // GxIntegrals: entries with x integrated away shmem_size += Kokkos::View::shmem_size(GyEntryCount * numThreads, fad_size_output_); // GyIntegrals: entries with x,y integrated away - shmem_size += Kokkos::View::shmem_size( 1 * numThreads, fad_size_output_); // GzIntegral: entry with x,y,z integrated away shmem_size += Kokkos::View::shmem_size (composedTransform_.extent_int(1), fad_size_output_); // pointWeights shmem_size += Kokkos::View::shmem_size( leftFieldBounds_[0], pointBounds_[0], fad_size_output_); // leftFields_x @@ -1780,7 +1803,6 @@ namespace Intrepid2 { { shmem_size += Kokkos::View::shmem_size(pointsInNonzeroComponentDimensions); // GxIntegrals: entries with x integrated away shmem_size += Kokkos::View::shmem_size(GyEntryCount * numThreads); // GyIntegrals: entries with x,y integrated away - shmem_size += Kokkos::View::shmem_size( 1 * numThreads); // GzIntegral: entry with x,y,z integrated away shmem_size += Kokkos::View::shmem_size (composedTransform_.extent_int(1)); // pointWeights shmem_size += Kokkos::View::shmem_size( leftFieldBounds_[0], pointBounds_[0]); // leftFields_x @@ -1940,16 +1962,14 @@ void IntegrationTools::integrate(Data integrals, // we require that the number of tensor components in the vectors are the same for each vector entry // this is not strictly necessary, but it makes implementation easier, and we don't at present anticipate other use cases int numTensorComponentsLeft = -1; - const bool isVectorValued = basisValuesLeft.vectorData().isValid(); - if (isVectorValued) + const bool leftIsVectorValued = basisValuesLeft.vectorData().isValid(); + + if (leftIsVectorValued) { - const bool rightIsVectorValued = basisValuesRight.vectorData().isValid(); - INTREPID2_TEST_FOR_EXCEPTION(!rightIsVectorValued, std::invalid_argument, "left and right must either both be vector-valued, or both scalar-valued"); const auto &refVectorLeft = basisValuesLeft.vectorData(); int numFamiliesLeft = refVectorLeft.numFamilies(); int numVectorComponentsLeft = refVectorLeft.numComponents(); Kokkos::Array maxFieldsForComponentLeft {0,0,0,0,0,0,0}; - Kokkos::Array maxFieldsForComponentRight {0,0,0,0,0,0,0}; for (int familyOrdinal=0; familyOrdinal::integrate(Data integrals, } } } - int numTensorComponentsRight = -1; + } + else + { + numTensorComponentsLeft = basisValuesLeft.basisValues().tensorData(0).numTensorComponents(); // family ordinal 0 + for (int familyOrdinal = 0; familyOrdinal < leftFamilyCount; familyOrdinal++) + { + INTREPID2_TEST_FOR_EXCEPTION(basisValuesLeft.basisValues().tensorData(familyOrdinal).numTensorComponents() != numTensorComponentsLeft, std::invalid_argument, "All families must match in the number of tensor components"); + } + } + int numTensorComponentsRight = -1; + const bool rightIsVectorValued = basisValuesRight.vectorData().isValid(); + + if (rightIsVectorValued) + { const auto &refVectorRight = basisValuesRight.vectorData(); int numFamiliesRight = refVectorRight.numFamilies(); int numVectorComponentsRight = refVectorRight.numComponents(); + Kokkos::Array maxFieldsForComponentRight {0,0,0,0,0,0,0}; for (int familyOrdinal=0; familyOrdinal::integrate(Data integrals, } } } - INTREPID2_TEST_FOR_EXCEPTION(numVectorComponentsLeft != numVectorComponentsRight, std::invalid_argument, "Left and right vector entries must have the same number of tensorial components"); + INTREPID2_TEST_FOR_EXCEPTION(numTensorComponentsRight != numTensorComponentsLeft, std::invalid_argument, "Right families must match left in the number of tensor components"); } else { - numTensorComponentsLeft = basisValuesLeft.basisValues().tensorData(0).numTensorComponents(); // family ordinal 0 - for (int familyOrdinal = 0; familyOrdinal < leftFamilyCount; familyOrdinal++) - { - INTREPID2_TEST_FOR_EXCEPTION(basisValuesLeft.basisValues().tensorData(familyOrdinal).numTensorComponents() != numTensorComponentsLeft, std::invalid_argument, "All families must match in the number of tensor components"); - } - - // check that right tensor component count also agrees + // check that right tensor component count agrees with left for (int familyOrdinal=0; familyOrdinal< rightFamilyCount; familyOrdinal++) { INTREPID2_TEST_FOR_EXCEPTION(basisValuesRight.basisValues().tensorData(familyOrdinal).numTensorComponents() != numTensorComponentsLeft, std::invalid_argument, "Right families must match left in the number of tensor components"); @@ -2042,11 +2070,11 @@ void IntegrationTools::integrate(Data integrals, int a_offset = 0; // left vector component offset int leftFieldOffset = basisValuesLeft.basisValues().familyFieldOrdinalOffset(leftFamilyOrdinal); - const int leftVectorComponentCount = isVectorValued ? basisValuesLeft.vectorData().numComponents() : 1; + const int leftVectorComponentCount = leftIsVectorValued ? basisValuesLeft.vectorData().numComponents() : 1; for (int leftVectorComponentOrdinal = 0; leftVectorComponentOrdinal < leftVectorComponentCount; leftVectorComponentOrdinal++) { - TensorData leftComponent = isVectorValued ? basisValuesLeft.vectorData().getComponent(leftFamilyOrdinal, leftVectorComponentOrdinal) - : basisValuesLeft.basisValues().tensorData(leftFamilyOrdinal); + TensorData leftComponent = leftIsVectorValued ? basisValuesLeft.vectorData().getComponent(leftFamilyOrdinal, leftVectorComponentOrdinal) + : basisValuesLeft.basisValues().tensorData(leftFamilyOrdinal); if (!leftComponent.isValid()) { a_offset++; // empty components are understood to take up one dimension @@ -2061,11 +2089,11 @@ void IntegrationTools::integrate(Data integrals, int b_offset = 0; // right vector component offset int rightFieldOffset = basisValuesRight.vectorData().familyFieldOrdinalOffset(rightFamilyOrdinal); - const int rightVectorComponentCount = isVectorValued ? basisValuesRight.vectorData().numComponents() : 1; + const int rightVectorComponentCount = rightIsVectorValued ? basisValuesRight.vectorData().numComponents() : 1; for (int rightVectorComponentOrdinal = 0; rightVectorComponentOrdinal < rightVectorComponentCount; rightVectorComponentOrdinal++) { - TensorData rightComponent = isVectorValued ? basisValuesRight.vectorData().getComponent(rightFamilyOrdinal, rightVectorComponentOrdinal) - : basisValuesRight.basisValues().tensorData(rightFamilyOrdinal); + TensorData rightComponent = rightIsVectorValued ? basisValuesRight.vectorData().getComponent(rightFamilyOrdinal, rightVectorComponentOrdinal) + : basisValuesRight.basisValues().tensorData(rightFamilyOrdinal); if (!rightComponent.isValid()) { b_offset++; // empty components are understood to take up one dimension @@ -2127,7 +2155,7 @@ void IntegrationTools::integrate(Data integrals, { ScalarView componentIntegralView; - const bool allocateFadStorage = !std::is_pod::value; + const bool allocateFadStorage = !(std::is_standard_layout::value && std::is_trivial::value); if (allocateFadStorage) { auto fad_size_output = dimension_scalar(integrals.getUnderlyingView()); @@ -2223,15 +2251,23 @@ void IntegrationTools::integrate(Data integrals, const bool transposeRight = false; // auto timer = Teuchos::TimeMonitor::getNewTimer("mat-mat"); // timer->start(); - // transforms can be matrices -- (C,P,D,D): rank 4 -- or scalar weights -- (C,P): rank 2 - const bool matrixTransform = (leftTransform.rank() == 4) || (rightTransform.rank() == 4); + // transforms can be matrices -- (C,P,D,D): rank 4 -- or scalar weights -- (C,P): rank 2 -- or vector weights -- (C,P,D): rank 3 Data composedTransform; // invalid/empty transforms are used when the identity is intended. + const int leftRank = leftTransform.rank(); + const int rightRank = rightTransform.rank(); + if (leftTransform.isValid() && rightTransform.isValid()) { - if (matrixTransform) + const bool bothRank4 = (leftRank == 4) && (rightRank == 4); + const bool bothRank3 = (leftRank == 3) && (rightRank == 3); + const bool bothRank2 = (leftRank == 2) && (rightRank == 2); + const bool ranks32 = ((leftRank == 3) && (rightRank == 2)) || ((leftRank == 2) && (rightRank == 3)); + const bool ranks42 = ((leftRank == 4) && (rightRank == 2)) || ((leftRank == 2) && (rightRank == 4)); + + if (bothRank4) // (C,P,D,D) { - composedTransform = leftTransform.allocateMatMatResult(transposeLeft, leftTransform, transposeRight, rightTransform); + composedTransform = Data::allocateMatMatResult(transposeLeft, leftTransform, transposeRight, rightTransform); composedTransform.storeMatMat(transposeLeft, leftTransform, transposeRight, rightTransform); // if the composedTransform matrices are full, the following is a good estimate. If they have some diagonal portions, this will overcount. @@ -2240,12 +2276,41 @@ void IntegrationTools::integrate(Data integrals, *approximateFlops += composedTransform.getUnderlyingViewSize() * (spaceDim - 1) * 2; } } - else + else if (bothRank3) // (C,P,D) + { + // re-cast leftTransform as a rank 4 (C,P,1,D) object -- a 1 x D matrix at each (C,P). + const int newRank = 4; + auto extents = leftTransform.getExtents(); + auto variationTypes = leftTransform.getVariationTypes(); + extents[3] = extents[2]; + extents[2] = 1; + variationTypes[3] = variationTypes[2]; + variationTypes[2] = CONSTANT; + auto leftTransformMatrix = leftTransform.shallowCopy(newRank, extents, variationTypes); + + // re-cast rightTransform as a rank 4 (C,P,1,D) object -- a 1 x D matrix at each (C,P) + extents = rightTransform.getExtents(); + variationTypes = rightTransform.getVariationTypes(); + extents[3] = extents[2]; + extents[2] = 1; + variationTypes[3] = variationTypes[2]; + variationTypes[2] = CONSTANT; + auto rightTransformMatrix = rightTransform.shallowCopy(newRank, extents, variationTypes); + + composedTransform = Data::allocateMatMatResult(transposeLeft, leftTransformMatrix, transposeRight, rightTransformMatrix); // false: don't transpose + composedTransform.storeMatMat(transposeLeft, leftTransformMatrix, transposeRight, rightTransformMatrix); + + if (approximateFlops != NULL) + { + *approximateFlops += composedTransform.getUnderlyingViewSize(); // one multiply per entry + } + } + else if (bothRank2) { composedTransform = leftTransform.allocateInPlaceCombinationResult(leftTransform, rightTransform); composedTransform.storeInPlaceProduct(leftTransform, rightTransform); - // re-cast composedTranform as a rank 4 (C,P,D,D) object -- a 1 x 1 matrix at each (C,P). + // re-cast composedTranform as a rank 4 (C,P,1,1) object -- a 1 x 1 matrix at each (C,P). const int newRank = 4; auto extents = composedTransform.getExtents(); auto variationTypes = composedTransform.getVariationTypes(); @@ -2255,16 +2320,100 @@ void IntegrationTools::integrate(Data integrals, *approximateFlops += composedTransform.getUnderlyingViewSize(); // one multiply per entry } } + else if (ranks32) // rank 2 / rank 3 combination. + { + const auto & rank3Transform = (leftRank == 3) ? leftTransform : rightTransform; + const auto & rank2Transform = (leftRank == 2) ? leftTransform : rightTransform; + + composedTransform = DataTools::multiplyByCPWeights(rank3Transform, rank2Transform); + + // re-cast composedTransform as a rank 4 object: + // logically, the original rank-3 transform can be understood as a 1xD matrix. The composed transform is leftTransform^T * rightTransform, so: + // - if left has the rank-3 transform, composedTransform should be a (C,P,D,1) object -- a D x 1 matrix at each (C,P). + // - if right has the rank-3 transform, composedTransform should be a (C,P,1,D) object -- a 1 x D matrix at each (C,P). + const int newRank = 4; + auto extents = composedTransform.getExtents(); + auto variationTypes = composedTransform.getVariationTypes(); + if (leftRank == 3) + { + // extents[3] and variationTypes[3] will already be 1 and CONSTANT, respectively + // extents[3] = 1; + // variationTypes[3] = CONSTANT; + } + else + { + extents[3] = extents[2]; + extents[2] = 1; + variationTypes[3] = variationTypes[2]; + variationTypes[2] = CONSTANT; + } + composedTransform = composedTransform.shallowCopy(newRank, extents, variationTypes); + } + else if (ranks42) // rank 4 / rank 2 combination. + { + if (leftRank == 4) + { + // want to transpose left matrix, and multiply by the values from rightTransform + // start with the multiplication: + auto composedTransformTransposed = DataTools::multiplyByCPWeights(leftTransform, rightTransform); + composedTransform = DataTools::transposeMatrix(composedTransformTransposed); + } + else // (leftRank == 2) + { + composedTransform = DataTools::multiplyByCPWeights(rightTransform, leftTransform); + } + } + else + { + INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported transform combination"); + } } else if (leftTransform.isValid()) { // rightTransform is the identity - composedTransform = leftTransform; + switch (leftRank) + { + case 4: composedTransform = DataTools::transposeMatrix(leftTransform); break; + case 3: + { + // - if left has the rank-3 transform, composedTransform should be a (C,P,D,1) object -- a D x 1 matrix at each (C,P). + const int newRank = 4; + auto extents = leftTransform.getExtents(); + auto variationTypes = leftTransform.getVariationTypes(); + + composedTransform = leftTransform.shallowCopy(newRank, extents, variationTypes); + } + break; + case 2: composedTransform = leftTransform; break; + default: + INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported transform combination"); + } } else if (rightTransform.isValid()) { // leftTransform is the identity composedTransform = rightTransform; + switch (rightRank) + { + case 4: composedTransform = rightTransform; break; + case 3: + { + // - if right has the rank-3 transform, composedTransform should be a (C,P,1,D) object -- a 1 x D matrix at each (C,P). + const int newRank = 4; + auto extents = rightTransform.getExtents(); + auto variationTypes = rightTransform.getVariationTypes(); + extents[3] = extents[2]; + variationTypes[3] = variationTypes[2]; + extents[2] = 1; + variationTypes[2] = CONSTANT; + + composedTransform = rightTransform.shallowCopy(newRank, extents, variationTypes); + } + break; + case 2: composedTransform = rightTransform; break; + default: + INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported transform combination"); + } } else { @@ -2283,8 +2432,8 @@ void IntegrationTools::integrate(Data integrals, const int leftFamilyCount = basisValuesLeft. basisValues().numFamilies(); const int rightFamilyCount = basisValuesRight.basisValues().numFamilies(); - const int leftComponentCount = isVectorValued ? basisValuesLeft. vectorData().numComponents() : 1; - const int rightComponentCount = isVectorValued ? basisValuesRight.vectorData().numComponents() : 1; + const int leftComponentCount = leftIsVectorValued ? basisValuesLeft. vectorData().numComponents() : 1; + const int rightComponentCount = rightIsVectorValued ? basisValuesRight.vectorData().numComponents() : 1; int leftFieldOrdinalOffset = 0; // keeps track of the number of fields in prior families for (int leftFamilyOrdinal=0; leftFamilyOrdinal::integrate(Data integrals, bool haveLaunchedContributionToCurrentFamilyLeft = false; // helps to track whether we need a Kokkos::fence before launching a kernel. for (int leftComponentOrdinal=0; leftComponentOrdinal leftComponent = isVectorValued ? basisValuesLeft.vectorData().getComponent(leftFamilyOrdinal, leftComponentOrdinal) - : basisValuesLeft.basisValues().tensorData(leftFamilyOrdinal); + TensorData leftComponent = leftIsVectorValued ? basisValuesLeft.vectorData().getComponent(leftFamilyOrdinal, leftComponentOrdinal) + : basisValuesLeft.basisValues().tensorData(leftFamilyOrdinal); if (!leftComponent.isValid()) { // represents zero @@ -2313,8 +2462,8 @@ void IntegrationTools::integrate(Data integrals, int b_offset = 0; for (int rightComponentOrdinal=0; rightComponentOrdinal rightComponent = isVectorValued ? basisValuesRight.vectorData().getComponent(rightFamilyOrdinal, rightComponentOrdinal) - : basisValuesRight.basisValues().tensorData(rightFamilyOrdinal); + TensorData rightComponent = rightIsVectorValued ? basisValuesRight.vectorData().getComponent(rightFamilyOrdinal, rightComponentOrdinal) + : basisValuesRight.basisValues().tensorData(rightFamilyOrdinal); if (!rightComponent.isValid()) { // represents zero @@ -2416,13 +2565,13 @@ void IntegrationTools::integrate(Data integrals, } } } - b_offset += isVectorValued ? basisValuesRight.vectorData().numDimsForComponent(rightComponentOrdinal) : 1; + b_offset += rightIsVectorValued ? basisValuesRight.vectorData().numDimsForComponent(rightComponentOrdinal) : 1; } - rightFieldOrdinalOffset += isVectorValued ? basisValuesRight.vectorData().numFieldsInFamily(rightFamilyOrdinal) : basisValuesRight.basisValues().numFieldsInFamily(rightFamilyOrdinal); + rightFieldOrdinalOffset += rightIsVectorValued ? basisValuesRight.vectorData().numFieldsInFamily(rightFamilyOrdinal) : basisValuesRight.basisValues().numFieldsInFamily(rightFamilyOrdinal); } - a_offset += isVectorValued ? basisValuesLeft.vectorData().numDimsForComponent(leftComponentOrdinal) : 1; + a_offset += leftIsVectorValued ? basisValuesLeft.vectorData().numDimsForComponent(leftComponentOrdinal) : 1; } - leftFieldOrdinalOffset += isVectorValued ? basisValuesLeft.vectorData().numFieldsInFamily(leftFamilyOrdinal) : basisValuesLeft.basisValues().numFieldsInFamily(leftFamilyOrdinal); + leftFieldOrdinalOffset += leftIsVectorValued ? basisValuesLeft.vectorData().numFieldsInFamily(leftFamilyOrdinal) : basisValuesLeft.basisValues().numFieldsInFamily(leftFamilyOrdinal); } } // if (approximateFlops != NULL) diff --git a/packages/intrepid2/src/Shared/Intrepid2_Data.hpp b/packages/intrepid2/src/Shared/Intrepid2_Data.hpp index 6c7db78d673d..67a713151ada 100644 --- a/packages/intrepid2/src/Shared/Intrepid2_Data.hpp +++ b/packages/intrepid2/src/Shared/Intrepid2_Data.hpp @@ -34,7 +34,7 @@ namespace Intrepid2 { \class Intrepid2::ZeroView \brief A singleton class for a DynRankView containing exactly one zero entry. (Technically, the entry is DataScalar(), the default value for the scalar type.) This allows View-wrapping classes to return a reference to zero, even when that zero is not explicitly stored in the wrapped views. -This is used by Interpid2::Data for its getEntry() and getWritableEntry() methods. +This is used by Intrepid2::Data for its getEntry() and getWritableEntry() methods. \note There is no protection against the zero value being overwritten; perhaps we should add some (i.e., const-qualify DataScalar). Because of implementation details in Intrepid2::Data, we don't do so yet. */ @@ -1490,43 +1490,37 @@ class ZeroView { resultExtents[i] = 1; } - ScalarView data; + ScalarView data; // new view will match this one in layout and fad dimension, if any + auto viewToMatch = A_MatData.getUnderlyingView(); if (resultNumActiveDims == 1) { - auto viewToMatch = A_MatData.getUnderlyingView1(); // new view will match this one in layout and fad dimension, if any data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0]); } else if (resultNumActiveDims == 2) { - auto viewToMatch = A_MatData.getUnderlyingView2(); // new view will match this one in layout and fad dimension, if any data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1]); } else if (resultNumActiveDims == 3) { - auto viewToMatch = A_MatData.getUnderlyingView3(); // new view will match this one in layout and fad dimension, if any data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2]); } else if (resultNumActiveDims == 4) { - auto viewToMatch = A_MatData.getUnderlyingView4(); // new view will match this one in layout and fad dimension, if any data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2], resultDataDims[3]); } else if (resultNumActiveDims == 5) { - auto viewToMatch = A_MatData.getUnderlyingView5(); // new view will match this one in layout and fad dimension, if any data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2], resultDataDims[3], resultDataDims[4]); } else if (resultNumActiveDims == 6) { - auto viewToMatch = A_MatData.getUnderlyingView6(); // new view will match this one in layout and fad dimension, if any data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2], resultDataDims[3], resultDataDims[4], resultDataDims[5]); } else // resultNumActiveDims == 7 { - auto viewToMatch = A_MatData.getUnderlyingView7(); // new view will match this one in layout and fad dimension, if any data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2], resultDataDims[3], resultDataDims[4], resultDataDims[5], resultDataDims[6]); } @@ -1534,6 +1528,37 @@ class ZeroView { return Data(data,resultRank,resultExtents,resultVariationTypes,resultBlockPlusDiagonalLastNonDiagonal); } + //! Constructs a container suitable for storing the result of a contraction over the final dimensions of the two provided containers. The two containers must have the same logical shape. + //! \see storeInPlaceCombination() + //! \param A [in] - the first data container. + //! \param B [in] - the second data container. Must have the same logical shape as A. + //! \param numContractionDims [in] - the number of dimensions over which the contraction should take place. + //! \return A numContractionDims-rank-lower container with the same logical shape as A and B in all but the last dimensions. + static Data allocateContractionResult( const Data &A, const Data &B, const int &numContractionDims ) + { + INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A.rank() != B.rank(), std::invalid_argument, "A and B must have the same logical shape"); + const int rank = A.rank(); + const int resultRank = rank - numContractionDims; + std::vector dimInfo(resultRank); + for (int d=0; d result(dimInfo); + return result; + } + + //! Constructs a container suitable for storing the result of a contraction over the final dimension of the two provided containers. The two containers must have the same logical shape. + //! \see storeInPlaceCombination() + //! \param A [in] - the first data container. + //! \param B [in] - the second data container. Must have the same logical shape as A. + //! \return A 1-rank-lower container with the same logical shape as A and B in all but the last dimension. + static Data allocateDotProductResult( const Data &A, const Data &B ) + { + return allocateContractionResult(A, B, 1); + } + //! Constructs a container suitable for storing the result of a matrix-vector multiply corresponding to the two provided containers. //! \see storeMatVec() static Data allocateMatVecResult( const Data &matData, const Data &vecData, const bool transposeMatrix = false ) @@ -1618,10 +1643,8 @@ class ZeroView { } // for the final dimension, the variation type is always GENERAL // (Some combinations, e.g. CONSTANT/CONSTANT *would* generate a CONSTANT result, but constant matrices don't make a lot of sense beyond 1x1 matrices…) - resultVariationTypes[resultNumActiveDims] = GENERAL; resultActiveDims[resultNumActiveDims] = resultRank - 1; resultDataDims[resultNumActiveDims] = rows; - resultExtents[resultRank-1] = rows; resultNumActiveDims++; for (int i=resultRank; i<7; i++) @@ -1629,6 +1652,8 @@ class ZeroView { resultVariationTypes[i] = CONSTANT; resultExtents[i] = 1; } + resultVariationTypes[resultRank-1] = GENERAL; + resultExtents[resultRank-1] = rows; ScalarView data; if (resultNumActiveDims == 1) @@ -1730,6 +1755,64 @@ class ZeroView { } } + //! Places the result of a contraction along the final dimension of A and B into this data container. + void storeDotProduct(const Data &A, const Data &B) + { + const int D_DIM = A.rank() - 1; + INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A.extent_int(D_DIM) != B.extent_int(D_DIM), std::invalid_argument, "A and B have different extents"); + const int vectorComponents = A.extent_int(D_DIM); + + // shallow copy of this to avoid implicit references to this in call to getWritableEntry() below + Data thisData = *this; + + using ExecutionSpace = typename DeviceType::execution_space; + // note the use of getDataExtent() below: we only range over the possibly-distinct entries + if (rank_ == 1) // contraction result rank; e.g., (P) + { + Kokkos::parallel_for("compute dot product", getDataExtent(0), + KOKKOS_LAMBDA (const int &pointOrdinal) { + auto & val = thisData.getWritableEntry(pointOrdinal); + val = 0; + for (int i=0; i>({0,0},{getDataExtent(0),getDataExtent(1)}); + Kokkos::parallel_for("compute dot product", policy, + KOKKOS_LAMBDA (const int &cellOrdinal, const int &pointOrdinal) { + auto & val = thisData.getWritableEntry(cellOrdinal, pointOrdinal); + val = 0; + for (int i=0; i>({0,0,0},{getDataExtent(0),getDataExtent(1),getDataExtent(2)}); + Kokkos::parallel_for("compute dot product", policy, + KOKKOS_LAMBDA (const int &cellOrdinal, const int &pointOrdinal, const int &d) { + auto & val = thisData.getWritableEntry(cellOrdinal, pointOrdinal,d); + val = 0; + for (int i=0; i void storeInPlaceCombination(const Data &A, const Data &B, BinaryOperator binaryOperator); @@ -1909,7 +1992,7 @@ class ZeroView { { Kokkos::parallel_for("compute mat-mat", policy, KOKKOS_LAMBDA (const int &cellOrdinal, const int &pointOrdinal) { - for (int i=0; i static void multiplyByCPWeights(Data &resultMatrixData, const Data &matrixDataIn, const Data &scalarDataIn) { - const ordinal_type rank = scalarDataIn.rank(); - auto extents = scalarDataIn.getExtents(); - auto variationTypes = scalarDataIn.getVariationTypes(); - extents[rank] = matrixDataIn.extent_int(rank); - extents[rank+1] = matrixDataIn.extent_int(rank+1); - variationTypes[rank] = CONSTANT; - variationTypes[rank+1] = CONSTANT; + const ordinal_type rank = scalarDataIn.rank(); + const ordinal_type matrixRank = matrixDataIn.rank(); + auto extents = scalarDataIn.getExtents(); + auto variationTypes = scalarDataIn.getVariationTypes(); + for (int r=rank; r static Data multiplyByCPWeights(const Data &matrixDataIn, const Data &scalarDataIn) { - const ordinal_type rank = scalarDataIn.rank(); - auto extents = scalarDataIn.getExtents(); - auto variationTypes = scalarDataIn.getVariationTypes(); - extents[rank] = matrixDataIn.extent_int(rank); - extents[rank+1] = matrixDataIn.extent_int(rank+1); - variationTypes[rank] = CONSTANT; - variationTypes[rank+1] = CONSTANT; + const ordinal_type rank = scalarDataIn.rank(); + const ordinal_type matrixRank = matrixDataIn.rank(); + auto extents = scalarDataIn.getExtents(); + auto variationTypes = scalarDataIn.getVariationTypes(); + for (int r=rank; r::allocateInPlaceCombinationResult(scalarDataInExtended, matrixDataIn); result.storeInPlaceProduct(matrixDataIn,scalarDataInExtended); return result; } + + //! Allocates and fills Data object corresponding to the transpose of matrix data, represented by the last two dimensions of the input object. + //! \param matrixDataIn [in] - the (…,D1,D2) container. + //! \return a (…,D2,D1) container containing the transpose of the input matrix data. + template + static Data transposeMatrix(const Data &matrixDataIn) + { + // A direct construction of the transpose could be more efficient, but here we take advantage of existing + // implementations within the Data class supporting matrix-matrix multiplication. We construct an identity + // matrix, and left-multiply this by the transpose of the input matrix. + const ordinal_type rank = matrixDataIn.rank(); + auto extents = matrixDataIn.getExtents(); + auto variationTypes = matrixDataIn.getVariationTypes(); + const auto D1 = extents[rank-2]; + + extents[rank-2] = D1; + extents[rank-1] = D1; + variationTypes[rank-2] = BLOCK_PLUS_DIAGONAL; + variationTypes[rank-1] = BLOCK_PLUS_DIAGONAL; + + Kokkos::View identityUnderlyingView("Intrepid2::DataTools::transposeMatrix() - identity view",D1); + Kokkos::deep_copy(identityUnderlyingView, 1.0); + Data identityData(identityUnderlyingView,extents,variationTypes); + + auto result = Data::allocateMatMatResult(true, matrixDataIn, false, identityData); + result.storeMatMat(true, matrixDataIn, false, identityData); + + return result; + } }; } diff --git a/packages/intrepid2/src/Shared/Intrepid2_TestUtils.hpp b/packages/intrepid2/src/Shared/Intrepid2_TestUtils.hpp index 6423eb68d80f..6e56356d86fe 100644 --- a/packages/intrepid2/src/Shared/Intrepid2_TestUtils.hpp +++ b/packages/intrepid2/src/Shared/Intrepid2_TestUtils.hpp @@ -203,7 +203,7 @@ namespace Intrepid2 template inline ViewType getView(const std::string &label, DimArgs... dims) { - const bool allocateFadStorage = !std::is_pod::value; + const bool allocateFadStorage = !(std::is_standard_layout::value && std::is_trivial::value); if (!allocateFadStorage) { return ViewType(label,dims...); @@ -218,7 +218,7 @@ namespace Intrepid2 template inline FixedRankViewType< typename RankExpander::value_type, DefaultTestDeviceType > getFixedRankView(const std::string &label, DimArgs... dims) { - const bool allocateFadStorage = !std::is_pod::value; + const bool allocateFadStorage = !(std::is_standard_layout::value && std::is_trivial::value); using value_type = typename RankExpander::value_type; if (!allocateFadStorage) { diff --git a/packages/intrepid2/src/Shared/Intrepid2_TransformedBasisValues.hpp b/packages/intrepid2/src/Shared/Intrepid2_TransformedBasisValues.hpp index bc6250fed912..b177617fd448 100644 --- a/packages/intrepid2/src/Shared/Intrepid2_TransformedBasisValues.hpp +++ b/packages/intrepid2/src/Shared/Intrepid2_TransformedBasisValues.hpp @@ -27,7 +27,7 @@ namespace Intrepid2 { /** \class Intrepid2::TransformedBasisValues \brief Structure-preserving representation of transformed vector data; reference space values and transformations are stored separately. - TransformedBasisValues provides a View-like interface of rank 4, with shape (C,F,P,D). When the corresponding accessor is used, the transformed value is determined from corresponding reference space values and the transformation. + TransformedBasisValues provides a View-like interface of rank 3 or 4, with shape (C,F,P) or (C,F,P,D). When the corresponding accessor is used, the transformed value is determined from corresponding reference space values and the transformation. */ template class TransformedBasisValues @@ -35,13 +35,13 @@ namespace Intrepid2 { public: ordinal_type numCells_; - Data transform_; // vector case: (C,P,D,D) jacobian or jacobian inverse; can also be unset for identity transform. Scalar case: (C,P), or unset for identity. + Data transform_; // vector case: (C,P,D,D) jacobian or jacobian inverse; can also be unset for identity transform. Scalar case: (C,P), or unset for identity. Contracted vector case: (C,P,D) transform, to be contracted with a vector field to produce a scalar result. BasisValues basisValues_; /** \brief Standard constructor. - \param [in] transform - the transformation (matrix), with logical shape (C,P) or (C,P,D,D) + \param [in] transform - the transformation (matrix), with logical shape (C,P), (C,P,D), or (C,P,D,D) \param [in] basisValues - the reference-space data to be transformed, with logical shape (F,P) (for scalar values) or (F,P,D) (for vector values) */ TransformedBasisValues(const Data &transform, const BasisValues &basisValues) @@ -52,6 +52,7 @@ namespace Intrepid2 { { // sanity check: when transform is diagonal, we expect there to be no pointwise variation. INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(transform_.isDiagonal() && (transform_.getVariationTypes()[1] != CONSTANT), std::invalid_argument, "When transform is diagonal, we assume in various places that there is no pointwise variation; the transform_ Data should have CONSTANT as its variation type in dimension 1."); + INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE((transform_.rank() < 2) || (transform_.rank() > 4), std::invalid_argument, "Only transforms of rank 2, 3, or 4 are supported"); } /** @@ -129,7 +130,7 @@ namespace Intrepid2 { } else { - if (transform_.rank() == 4) + if ((transform_.rank() == 4) || (transform_.rank() == 3)) { transform_ = DataTools::multiplyByCPWeights(transform_,weightData); } @@ -164,7 +165,22 @@ namespace Intrepid2 { //! Returns the logical extent in the space dimension, which is the 3 dimension in this container. KOKKOS_INLINE_FUNCTION int spaceDim() const { - return basisValues_.extent_int(2); + if ((transform_.rank() == 3) && (basisValues_.rank() == 3)) // (C,P,D) contracted in D against (F,P,D) + { + return 1; // spaceDim contracted away + } + else if ((transform_.rank() == 3) && (basisValues_.rank() == 2)) // (C,P,D) weighting (F,P) + { + return transform_.extent_int(2); + } + else if (transform_.isValid()) + { + return transform_.extent_int(2); + } + else + { + return basisValues_.extent_int(2); + } } //! Scalar accessor, with arguments (C,F,P). @@ -175,10 +191,20 @@ namespace Intrepid2 { // null transform is understood as the identity return basisValues_(fieldOrdinal,pointOrdinal); } - else + else if (transform_.rank() == 2) { return transform_(cellOrdinal,pointOrdinal) * basisValues_(fieldOrdinal,pointOrdinal); } + else if (transform_.rank() == 3) + { + Scalar value = 0; + for (int d=0; d(); } + template + KOKKOS_FORCEINLINE_FUNCTION + ValueType tolerence() { + return 100.0*epsilon(); + } + KOKKOS_FORCEINLINE_FUNCTION double tolerence() { - return 100.0*epsilon(); + return tolerence(); + } + + template + KOKKOS_FORCEINLINE_FUNCTION + ValueType threshold() { + return 10.0*epsilon(); } KOKKOS_FORCEINLINE_FUNCTION double threshold() { - return 10.0*epsilon(); + return threshold(); } /// Define constants diff --git a/packages/intrepid2/src/Shared/Intrepid2_Utils.hpp b/packages/intrepid2/src/Shared/Intrepid2_Utils.hpp index 14ad8483558a..45c5f09816d1 100644 --- a/packages/intrepid2/src/Shared/Intrepid2_Utils.hpp +++ b/packages/intrepid2/src/Shared/Intrepid2_Utils.hpp @@ -281,13 +281,13 @@ namespace Intrepid2 { template KOKKOS_FORCEINLINE_FUNCTION constexpr typename - std::enable_if< !std::is_pod::value, typename ScalarTraits::scalar_type >::type + std::enable_if< !(std::is_standard_layout::value && std::is_trivial::value), typename ScalarTraits::scalar_type >::type get_scalar_value(const T& obj) {return obj.val();} template KOKKOS_FORCEINLINE_FUNCTION constexpr typename - std::enable_if< std::is_pod::value, typename ScalarTraits::scalar_type >::type + std::enable_if< std::is_standard_layout::value && std::is_trivial::value, typename ScalarTraits::scalar_type >::type get_scalar_value(const T& obj){return obj;} @@ -300,13 +300,13 @@ namespace Intrepid2 { template KOKKOS_INLINE_FUNCTION constexpr typename - std::enable_if< std::is_pod::value, unsigned >::type + std::enable_if< std::is_standard_layout::value && std::is_trivial::value, unsigned >::type dimension_scalar(const Kokkos::DynRankView /* view */) {return 1;} template KOKKOS_INLINE_FUNCTION constexpr typename - std::enable_if< std::is_pod< typename Kokkos::View::value_type >::value, unsigned >::type + std::enable_if< std::is_standard_layout::value_type>::value && std::is_trivial::value_type>::value, unsigned >::type dimension_scalar(const Kokkos::View /*view*/) {return 1;} template @@ -339,7 +339,7 @@ namespace Intrepid2 { using DeviceType = typename ViewType::device_type; using ViewTypeWithLayout = Kokkos::DynRankView; - const bool allocateFadStorage = !std::is_pod::value; + const bool allocateFadStorage = !(std::is_standard_layout::value && std::is_trivial::value); if (!allocateFadStorage) { return ViewTypeWithLayout(label,dims...); @@ -766,7 +766,7 @@ namespace Intrepid2 { template struct NaturalLayoutForType { using layout = - typename std::conditional::value, + typename std::conditional<(std::is_standard_layout::value && std::is_trivial::value), Kokkos::LayoutLeft, // for POD types, use LayoutLeft Kokkos::LayoutNatural >::type; // For FAD types, use LayoutNatural }; @@ -791,7 +791,7 @@ namespace Intrepid2 { template constexpr int getVectorSizeForHierarchicalParallelism() { - return std::is_pod::value ? VECTOR_SIZE : FAD_VECTOR_SIZE; + return (std::is_standard_layout::value && std::is_trivial::value) ? VECTOR_SIZE : FAD_VECTOR_SIZE; } /** @@ -803,7 +803,7 @@ namespace Intrepid2 { KOKKOS_INLINE_FUNCTION constexpr unsigned getScalarDimensionForView(const ViewType &view) { - return (std::is_pod::value) ? 0 : get_dimension_scalar(view); + return (std::is_standard_layout::value && std::is_trivial::value) ? 0 : get_dimension_scalar(view); } } // end namespace Intrepid2 diff --git a/packages/intrepid2/unit-test/MonolithicExecutable/DataTests.cpp b/packages/intrepid2/unit-test/MonolithicExecutable/DataTests.cpp index 3bd8ce4c4aee..740cded482a6 100644 --- a/packages/intrepid2/unit-test/MonolithicExecutable/DataTests.cpp +++ b/packages/intrepid2/unit-test/MonolithicExecutable/DataTests.cpp @@ -510,6 +510,35 @@ namespace printView(actualResultData.getUnderlyingView3(), out); } + + // now, check that u' A v = v' A' u for arbitrary vectors u,v + + // set up a second vector (v) + auto vector2View = getView("vector2", cellCount, pointCount, spaceDim); + auto vector2ViewHost = Kokkos::create_mirror(vector2View); + vector2ViewHost(0,0,0) = 3.0; + vector2ViewHost(0,0,1) = 2.0; + Kokkos::deep_copy(vector2View, vector2ViewHost); + + Data u_data(vectorView); + Data A_data(matrixView); + Data v_data(vector2View); + + auto AvResultData = Data::allocateMatVecResult(A_data, v_data, false); + AvResultData.storeMatVec(A_data, v_data, false); + + auto upAvResultData = Data::allocateDotProductResult(u_data, AvResultData); + upAvResultData.storeDotProduct(u_data, AvResultData); + + auto ApuResultData = Data::allocateMatVecResult(A_data, u_data, true); + ApuResultData.storeMatVec(A_data, u_data, true); + + auto vpAuResultData = Data::allocateDotProductResult(v_data, ApuResultData); + vpAuResultData.storeDotProduct(v_data, ApuResultData); + + testFloatingEquality2(upAvResultData, vpAuResultData, relTol, absTol, out, success); + printView(upAvResultData.getUnderlyingView2(), out); + printView(vpAuResultData.getUnderlyingView2(), out); } // #pragma mark Data: MatMat @@ -576,6 +605,82 @@ namespace printView(actualResultData.getUnderlyingView3(), out); } +/** \brief Data provides matrix-matrix multiplication support. This method checks correctness of the computed mat-mat for a case arising from taking the outer product of two vectors. +*/ + TEUCHOS_UNIT_TEST( Data, MatMatOuterProduct ) + { + double relTol = 1e-13; + double absTol = 1e-13; + + using DeviceType = DefaultTestDeviceType; + using Scalar = double; + const int spaceDim = 2; + const int cellCount = 1; + const int pointCount = 1; + auto leftVectorView = getView("left vector", cellCount, pointCount, spaceDim); + auto leftVectorViewHost = Kokkos::create_mirror(leftVectorView); + leftVectorViewHost(0,0,0) = 1.0; + leftVectorViewHost(0,0,1) = 0.5; + Kokkos::deep_copy(leftVectorView, leftVectorViewHost); + + Data leftVector(leftVectorView); + + auto rightVectorView = getView("right vector", cellCount, pointCount, spaceDim); + auto rightVectorViewHost = Kokkos::create_mirror(rightVectorView); + rightVectorViewHost(0,0,0) = 0.5; + rightVectorViewHost(0,0,1) = 1.0; + Kokkos::deep_copy(rightVectorView, rightVectorViewHost); + Data rightVector(rightVectorView); + + // re-cast leftVector as a rank 4 (C,P,D,1) object -- a D x 1 matrix at each (C,P). + const int newRank = 4; + auto extents = leftVector.getExtents(); + auto variationTypes = leftVector.getVariationTypes(); + auto leftMatrix = leftVector.shallowCopy(newRank, extents, variationTypes); + + // re-cast rightVector as a rank 4 (C,P,1,D) object -- a 1 x D matrix at each (C,P) + extents = rightVector.getExtents(); + extents[3] = extents[2]; + extents[2] = 1; + variationTypes = rightVector.getVariationTypes(); + variationTypes[3] = variationTypes[2]; + variationTypes[2] = CONSTANT; + auto rightMatrix = rightVector.shallowCopy(newRank, extents, variationTypes); + + auto expectedResultView = getView("result matrix", cellCount, pointCount, spaceDim, spaceDim); + auto expectedResultViewHost = Kokkos::create_mirror(expectedResultView); + + const int cellOrdinal = 0; + for (int i=0; i::allocateMatMatResult(transposeA, leftMatrix, transposeB, rightMatrix); + + TEST_EQUALITY( 4, actualResultData.rank()); + TEST_EQUALITY( cellCount, actualResultData.extent_int(0)); + TEST_EQUALITY(pointCount, actualResultData.extent_int(1)); + TEST_EQUALITY( spaceDim, actualResultData.extent_int(2)); + TEST_EQUALITY( spaceDim, actualResultData.extent_int(3)); + + actualResultData.storeMatMat(transposeA, leftMatrix, transposeB, rightMatrix); + + testFloatingEquality4(expectedResultView, actualResultData, relTol, absTol, out, success); + + printView(actualResultData.getUnderlyingView(), out); + } + // #pragma mark Data: MatMatExplicitIdentity_PDD /** \brief Data provides matrix-matrix multiplication support. This method checks correctness of the computed mat-mat for several cases involving 3x3 identity matrices. Here, the logical dimensions (C,P,D,D) differ from the stored dimensions of (P,D,D). We test each possible transpose combination. */ @@ -725,6 +830,48 @@ TEUCHOS_UNIT_TEST( Data, MatMatExplicitIdentity_PDD ) // (P,D,D) underlying; not printView(actualResultData.getUnderlyingView2(), out); } + +// #pragma mark Data: VecDotProduct +/** \brief Data provides vector dot product multiplication support. This method checks correctness of the computed dot product for a particular case involving 2x1 vectors. +*/ + TEUCHOS_UNIT_TEST( Data, VecDotProduct ) + { + double relTol = 1e-13; + double absTol = 1e-13; + + using DeviceType = DefaultTestDeviceType; + using Scalar = double; + const int numCells = 1; + const int spaceDim = 2; + + auto vec1View = getView("vector", numCells, spaceDim); + auto vec1ViewHost = Kokkos::create_mirror(vec1View); + + vec1ViewHost(0,0) = 1.0; + vec1ViewHost(0,1) = 2.0; + Kokkos::deep_copy(vec1View, vec1ViewHost); + + auto vec2View = getView("vector", numCells, spaceDim); + auto vec2ViewHost = Kokkos::create_mirror(vec1View); + + vec2ViewHost(0,0) = 3.0; + vec2ViewHost(0,1) = 2.0; + Kokkos::deep_copy(vec2View, vec2ViewHost); + + auto expectedResultView = getView("result",numCells); + auto expectedResultViewHost = Kokkos::create_mirror(expectedResultView); + + expectedResultViewHost(0) = vec1ViewHost(0,0) * vec2ViewHost(0,0) + vec1ViewHost(0,1) * vec2ViewHost(0,1); + + Kokkos::deep_copy(expectedResultView, expectedResultViewHost); + + Data vec1Data(vec1View); + Data vec2Data(vec2View); + auto actualResultData = Data::allocateDotProductResult(vec1Data, vec2Data); + actualResultData.storeDotProduct(vec1Data, vec2Data); + + testFloatingEquality1(expectedResultView, actualResultData.getUnderlyingView1(), relTol, absTol, out, success); + } // test statically that Data supports all 7 rank operators static_assert(supports_rank,1>::value, "Data is expected to support up to rank 7"); diff --git a/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_GeneralStandardIntegration.cpp b/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_GeneralStandardIntegration.cpp index 257b700bab2f..fd5672916aad 100644 --- a/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_GeneralStandardIntegration.cpp +++ b/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_GeneralStandardIntegration.cpp @@ -83,6 +83,7 @@ void testStandardIntegration(int meshWidth, int polyOrder, int worksetSize, EFunctionSpace fs; EOperator op1, op2; int numOps = 0; // can be 1 or 2 + Teuchos::RCP> vectorWeight1, vectorWeight2; switch (formulation) { case Poisson: @@ -113,12 +114,32 @@ void testStandardIntegration(int meshWidth, int polyOrder, int worksetSize, op1 = EOperator::OPERATOR_VALUE; fs = EFunctionSpace::FUNCTION_SPACE_HDIV; break; + case VectorWeightedPoisson: + numOps = 1; + op1 = EOperator::OPERATOR_GRAD; + fs = EFunctionSpace::FUNCTION_SPACE_HGRAD; + vectorWeight1 = Teuchos::rcp(new Kokkos::Array); + vectorWeight2 = Teuchos::rcp(new Kokkos::Array); + double weight = 1.0; + for (int d=0; d(geometry, worksetSize, - polyOrder, fs, op1, - polyOrder, fs, op1, + polyOrder, fs, op1, vectorWeight1, + polyOrder, fs, op1, vectorWeight2, flopCountIntegration, flopCountJacobian); if (numOps == 2) { @@ -136,7 +157,7 @@ void testStandardIntegration(int meshWidth, int polyOrder, int worksetSize, }); } - auto specificIntegrals = performStandardQuadrature(formulation, geometry, polyOrder, worksetSize, flopCountIntegration, flopCountJacobian); + auto specificIntegrals = performStandardQuadrature(formulation, geometry, polyOrder, worksetSize, flopCountIntegration, flopCountJacobian, vectorWeight1, vectorWeight2); out << "Comparing new general standard assembly implementation to previous formulation-specific integration path…\n"; testFloatingEquality3(generalIntegrals, specificIntegrals, relTol, absTol, out, success, "general integral", "specific formulation integral"); @@ -167,4 +188,8 @@ TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT(StructuredIntegration, GeneralStandardInteg TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT(StructuredIntegration, GeneralStandardIntegration, PoissonFormulation, D2, P3) TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT(StructuredIntegration, GeneralStandardIntegration, PoissonFormulation, D3, P3) +TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT(StructuredIntegration, GeneralStandardIntegration, VectorWeightedPoissonFormulation, D1, P1) +TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT(StructuredIntegration, GeneralStandardIntegration, VectorWeightedPoissonFormulation, D2, P3) +TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT(StructuredIntegration, GeneralStandardIntegration, VectorWeightedPoissonFormulation, D3, P3) + } // anonymous namespace diff --git a/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_GeneralStructuredIntegration.cpp b/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_GeneralStructuredIntegration.cpp index 7e67f3b15579..7d12fe961809 100644 --- a/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_GeneralStructuredIntegration.cpp +++ b/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_GeneralStructuredIntegration.cpp @@ -83,6 +83,7 @@ void testStructuredIntegration(int meshWidth, int polyOrder, int worksetSize, EFunctionSpace fs; EOperator op1, op2; int numOps = 0; // can be 1 or 2 + Teuchos::RCP> vectorWeight1, vectorWeight2; switch (formulation) { case Poisson: @@ -113,12 +114,32 @@ void testStructuredIntegration(int meshWidth, int polyOrder, int worksetSize, op1 = EOperator::OPERATOR_VALUE; fs = EFunctionSpace::FUNCTION_SPACE_HDIV; break; + case VectorWeightedPoisson: + numOps = 1; + op1 = EOperator::OPERATOR_GRAD; + fs = EFunctionSpace::FUNCTION_SPACE_HGRAD; + vectorWeight1 = Teuchos::rcp(new Kokkos::Array); + vectorWeight2 = Teuchos::rcp(new Kokkos::Array); + double weight = 1.0; + for (int d=0; d(geometry, worksetSize, - polyOrder, fs, op1, - polyOrder, fs, op1, + polyOrder, fs, op1, vectorWeight1, + polyOrder, fs, op1, vectorWeight2, flopCountIntegration, flopCountJacobian); if (numOps == 2) { @@ -136,7 +157,7 @@ void testStructuredIntegration(int meshWidth, int polyOrder, int worksetSize, }); } - auto specificIntegrals = performStructuredQuadrature(formulation, geometry, polyOrder, worksetSize, flopCountIntegration, flopCountJacobian); + auto specificIntegrals = performStructuredQuadrature(formulation, geometry, polyOrder, worksetSize, flopCountIntegration, flopCountJacobian, vectorWeight1, vectorWeight2); out << "Comparing new general standard assembly implementation to previous formulation-specific integration path…\n"; testFloatingEquality3(generalIntegrals, specificIntegrals, relTol, absTol, out, success, "general integral", "specific formulation integral"); @@ -167,4 +188,8 @@ TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT(StructuredIntegration, GeneralStructuredInt TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT(StructuredIntegration, GeneralStructuredIntegration, PoissonFormulation, D2, P3) TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT(StructuredIntegration, GeneralStructuredIntegration, PoissonFormulation, D3, P3) +TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT(StructuredIntegration, GeneralStructuredIntegration, VectorWeightedPoissonFormulation, D1, P1) +TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT(StructuredIntegration, GeneralStructuredIntegration, VectorWeightedPoissonFormulation, D2, P3) +TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT(StructuredIntegration, GeneralStructuredIntegration, VectorWeightedPoissonFormulation, D3, P3) + } // anonymous namespace diff --git a/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_QuadratureUniformMesh.cpp b/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_QuadratureUniformMesh.cpp index cad8b2a13534..ab1e182c0417 100644 --- a/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_QuadratureUniformMesh.cpp +++ b/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_QuadratureUniformMesh.cpp @@ -74,11 +74,31 @@ namespace gridCellCounts[d] = meshWidth; } + Teuchos::RCP> vectorWeight1, vectorWeight2; + if (formulation == VectorWeightedPoisson) + { + vectorWeight1 = Teuchos::rcp(new Kokkos::Array); + vectorWeight2 = Teuchos::rcp(new Kokkos::Array); + double weight = 1.0; + for (int d=0; d(algorithm, gridCellCounts); double flopCountIntegration = 0, flopCountJacobian = 0; - auto standardIntegrals = performStandardQuadrature(formulation, geometry, polyOrder, worksetSize, flopCountIntegration, flopCountJacobian); + auto standardIntegrals = performStandardQuadrature(formulation, geometry, polyOrder, worksetSize, flopCountIntegration, flopCountJacobian, vectorWeight1, vectorWeight2); - auto structuredIntegrals = performStructuredQuadrature(formulation, geometry, polyOrder, worksetSize, flopCountIntegration, flopCountJacobian); + auto structuredIntegrals = performStructuredQuadrature(formulation, geometry, polyOrder, worksetSize, flopCountIntegration, flopCountJacobian, vectorWeight1, vectorWeight2); out << "Comparing standard Intrepid2 integration to new integration path…\n"; testFloatingEquality3(standardIntegrals, structuredIntegrals, relTol, absTol, out, success, "standard Intrepid2 integral", "structured integral"); @@ -108,170 +128,179 @@ namespace // comparisons are to Standard algorithm, so we don't instantiate with Standard: // 1D, p=1 tests: - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D1, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D1, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D1, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D1, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D1, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D1, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D1, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D1, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D1, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D1, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D1, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D1, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D1, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D1, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D1, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D1, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D1, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D1, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D1, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D1, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D1, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D1, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D1, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D1, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, VectorWeightedPoissonFormulation, NonAffineTensorAlgorithm, D1, P1) // 1D, p=2 tests: - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D1, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D1, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D1, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D1, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D1, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D1, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D1, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D1, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D1, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D1, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D1, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D1, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D1, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D1, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D1, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D1, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D1, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D1, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D1, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D1, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D1, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D1, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D1, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D1, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, VectorWeightedPoissonFormulation, NonAffineTensorAlgorithm, D1, P2) // 1D, p=4 tests: - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D1, P4) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D1, P4) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D1, P4) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D1, P4) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D1, P4) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D1, P4) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D1, P4) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D1, P4) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D1, P4) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D1, P4) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D1, P4) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D1, P4) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D1, P4) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D1, P4) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D1, P4) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D1, P4) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D1, P4) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D1, P4) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D1, P4) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D1, P4) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D1, P4) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D1, P4) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D1, P4) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D1, P4) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, VectorWeightedPoissonFormulation, NonAffineTensorAlgorithm, D1, P4) // 2D, p=1 tests: - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, NonAffineTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineNonTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, UniformAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, NonAffineTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineNonTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, UniformAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, NonAffineTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineNonTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, UniformAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, NonAffineTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineNonTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, UniformAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, VectorWeightedPoissonFormulation, NonAffineTensorAlgorithm, D2, P1) // 2D, p=2 tests: - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, NonAffineTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineNonTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, UniformAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, NonAffineTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineNonTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, UniformAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, NonAffineTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineNonTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, UniformAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, NonAffineTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineNonTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, UniformAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, VectorWeightedPoissonFormulation, NonAffineTensorAlgorithm, D2, P2) // 2D, p=3 tests: - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, NonAffineTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineNonTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, UniformAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, NonAffineTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineNonTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, UniformAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, NonAffineTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineNonTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, UniformAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, NonAffineTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineNonTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, UniformAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, VectorWeightedPoissonFormulation, NonAffineTensorAlgorithm, D2, P3) - // 3D, p=1 tests: - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, NonAffineTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineNonTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, UniformAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, NonAffineTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineNonTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, UniformAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D3, P1) + // 3D, p=1 tests: + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, NonAffineTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineNonTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, UniformAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, NonAffineTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineNonTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, UniformAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, VectorWeightedPoissonFormulation, NonAffineTensorAlgorithm, D3, P1) // 3D, p=2 tests: - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, NonAffineTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineNonTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, UniformAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, NonAffineTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineNonTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, UniformAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, NonAffineTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineNonTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, UniformAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, NonAffineTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineNonTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, UniformAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, VectorWeightedPoissonFormulation, NonAffineTensorAlgorithm, D3, P2) // 3D, p=3 tests: - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, NonAffineTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineNonTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, UniformAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, NonAffineTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineNonTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, UniformAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, NonAffineTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineNonTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, UniformAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, NonAffineTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineNonTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, UniformAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, VectorWeightedPoissonFormulation, NonAffineTensorAlgorithm, D3, P3) } // anonymous namespace diff --git a/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_StructuredVersusStandard.cpp b/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_StructuredVersusStandard.cpp index 28ffcb37b3bd..27059b43e728 100644 --- a/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_StructuredVersusStandard.cpp +++ b/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_StructuredVersusStandard.cpp @@ -64,8 +64,8 @@ namespace template void testStandardVersusStructuredIntegration(const int &meshWidth, const int &worksetSize, - const EFunctionSpace &fs1, const EOperator &op1, const int &p1, - const EFunctionSpace &fs2, const EOperator &op2, const int &p2, + const EFunctionSpace &fs1, const EOperator &op1, const int &p1, Teuchos::RCP< Kokkos::Array > vectorWeight1, + const EFunctionSpace &fs2, const EOperator &op2, const int &p2, Teuchos::RCP< Kokkos::Array > vectorWeight2, const double &relTol, const double &absTol, Teuchos::FancyOStream &out, bool &success) { @@ -84,19 +84,32 @@ void testStandardVersusStructuredIntegration(const int &meshWidth, const int &wo double flopCountIntegration = 0, flopCountJacobian = 0; auto structuredIntegrals = performStructuredAssembly(geometry, worksetSize, - p1, fs1, op1, - p2, fs2, op2, + p1, fs1, op1, vectorWeight1, + p2, fs2, op2, vectorWeight2, flopCountIntegration, flopCountJacobian); auto standardIntegrals = performStandardAssembly(geometry, worksetSize, - p1, fs1, op1, - p2, fs2, op2, + p1, fs1, op1, vectorWeight1, + p2, fs2, op2, vectorWeight2, flopCountIntegration, flopCountJacobian); out << "Comparing general standard assembly to structured integration path…\n"; testFloatingEquality3(standardIntegrals, structuredIntegrals, relTol, absTol, out, success, "standard integral", "structured formulation integral"); } +template +void testStandardVersusStructuredIntegration(const int &meshWidth, const int &worksetSize, + const EFunctionSpace &fs1, const EOperator &op1, const int &p1, + const EFunctionSpace &fs2, const EOperator &op2, const int &p2, + const double &relTol, const double &absTol, + Teuchos::FancyOStream &out, bool &success) +{ + testStandardVersusStructuredIntegration(meshWidth, worksetSize, + fs1, op1, p1, Teuchos::null, + fs2, op2, p2, Teuchos::null, + relTol, absTol, out, success); +} + TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(StructuredIntegration, StructuredVersusStandard_D1_P1_P1, FS1Tag, Op1Tag, FS2Tag, Op2Tag) { using DataScalar = double; @@ -322,6 +335,381 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(StructuredIntegration, StructuredVersusStandar (meshWidth, worksetSize, fs1, op1, p1, fs2, op2, p2, relTol, absTol, out, success); } +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(StructuredIntegration, StructuredVersusStandardVectorWeighted_D1_P1_P1, FS1Tag, Op1Tag, FS2Tag, Op2Tag) +{ + using DataScalar = double; + using PointScalar = double; + const int meshWidth = 1; + const int spaceDim = 1; + const int p1 = 1; + const int p2 = 1; + const int worksetSize = meshWidth; + + auto vectorWeight1 = Teuchos::rcp(new Kokkos::Array); + auto vectorWeight2 = Teuchos::rcp(new Kokkos::Array); + + double weight = 1.0; + for (int d=0; d; + + const EFunctionSpace fs1 = FS1Tag::functionSpace; + const EFunctionSpace fs2 = FS2Tag::functionSpace; + const EOperator op1 = Op1Tag::op; + const EOperator op2 = Op2Tag::op; + + double relTol = 1e-12; + double absTol = 1e-12; + + testStandardVersusStructuredIntegration + (meshWidth, worksetSize, fs1, op1, p1, vectorWeight1, fs2, op2, p2, vectorWeight2, relTol, absTol, out, success); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(StructuredIntegration, StructuredVersusStandardVectorWeighted_D2_P1_P1, FS1Tag, Op1Tag, FS2Tag, Op2Tag) +{ + using DataScalar = double; + using PointScalar = double; + const int meshWidth = 1; + const int spaceDim = 2; + const int p1 = 1; + const int p2 = 1; + const int worksetSize = meshWidth; + + auto vectorWeight1 = Teuchos::rcp(new Kokkos::Array); + auto vectorWeight2 = Teuchos::rcp(new Kokkos::Array); + + double weight = 1.0; + for (int d=0; d; + + const EFunctionSpace fs1 = FS1Tag::functionSpace; + const EFunctionSpace fs2 = FS2Tag::functionSpace; + const EOperator op1 = Op1Tag::op; + const EOperator op2 = Op2Tag::op; + + double relTol = 1e-12; + double absTol = 1e-12; + + testStandardVersusStructuredIntegration + (meshWidth, worksetSize, fs1, op1, p1, vectorWeight1, fs2, op2, p2, vectorWeight2, relTol, absTol, out, success); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(StructuredIntegration, StructuredVersusStandardVectorWeighted_D2_P2_P1, FS1Tag, Op1Tag, FS2Tag, Op2Tag) +{ + using DataScalar = double; + using PointScalar = double; + const int meshWidth = 1; + const int spaceDim = 2; + const int p1 = 2; + const int p2 = 1; + const int worksetSize = meshWidth; + + auto vectorWeight1 = Teuchos::rcp(new Kokkos::Array); + auto vectorWeight2 = Teuchos::rcp(new Kokkos::Array); + + double weight = 1.0; + for (int d=0; d; + + const EFunctionSpace fs1 = FS1Tag::functionSpace; + const EFunctionSpace fs2 = FS2Tag::functionSpace; + const EOperator op1 = Op1Tag::op; + const EOperator op2 = Op2Tag::op; + + double relTol = 1e-12; + double absTol = 1e-12; + + testStandardVersusStructuredIntegration + (meshWidth, worksetSize, fs1, op1, p1, vectorWeight1, fs2, op2, p2, vectorWeight2, relTol, absTol, out, success); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(StructuredIntegration, StructuredVersusStandardScalarAgainstVectorDotVector_D1_P1_P1, FS1Tag, Op1Tag, FS2Tag, Op2Tag) +{ + using DataScalar = double; + using PointScalar = double; + const int meshWidth = 1; + const int spaceDim = 1; + const int p1 = 1; + const int p2 = 1; + const int worksetSize = meshWidth; + + Teuchos::RCP > vectorWeight1; // no vector weight on scalar term + auto vectorWeight2 = Teuchos::rcp(new Kokkos::Array); + + double weight = 1.0; + for (int d=0; d; + + const EFunctionSpace fs1 = FS1Tag::functionSpace; + const EFunctionSpace fs2 = FS2Tag::functionSpace; + const EOperator op1 = Op1Tag::op; + const EOperator op2 = Op2Tag::op; + + double relTol = 1e-12; + double absTol = 1e-12; + + testStandardVersusStructuredIntegration + (meshWidth, worksetSize, fs1, op1, p1, vectorWeight1, fs2, op2, p2, vectorWeight2, relTol, absTol, out, success); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(StructuredIntegration, StructuredVersusStandardScalarAgainstVectorDotVector_D2_P1_P1, FS1Tag, Op1Tag, FS2Tag, Op2Tag) +{ + using DataScalar = double; + using PointScalar = double; + const int meshWidth = 1; + const int spaceDim = 2; + const int p1 = 1; + const int p2 = 1; + const int worksetSize = meshWidth; + + Teuchos::RCP > vectorWeight1; // no vector weight on scalar term + auto vectorWeight2 = Teuchos::rcp(new Kokkos::Array); + + double weight = 1.0; + for (int d=0; d; + + const EFunctionSpace fs1 = FS1Tag::functionSpace; + const EFunctionSpace fs2 = FS2Tag::functionSpace; + const EOperator op1 = Op1Tag::op; + const EOperator op2 = Op2Tag::op; + + double relTol = 1e-12; + double absTol = 1e-12; + + testStandardVersusStructuredIntegration + (meshWidth, worksetSize, fs1, op1, p1, vectorWeight1, fs2, op2, p2, vectorWeight2, relTol, absTol, out, success); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(StructuredIntegration, StructuredVersusStandardScalarAgainstVectorDotVector_D3_P1_P1, FS1Tag, Op1Tag, FS2Tag, Op2Tag) +{ + using DataScalar = double; + using PointScalar = double; + const int meshWidth = 1; + const int spaceDim = 3; + const int p1 = 1; + const int p2 = 1; + const int worksetSize = meshWidth; + + Teuchos::RCP > vectorWeight1; // no vector weight on scalar term + auto vectorWeight2 = Teuchos::rcp(new Kokkos::Array); + + double weight = 1.0; + for (int d=0; d; + + const EFunctionSpace fs1 = FS1Tag::functionSpace; + const EFunctionSpace fs2 = FS2Tag::functionSpace; + const EOperator op1 = Op1Tag::op; + const EOperator op2 = Op2Tag::op; + + double relTol = 1e-12; + double absTol = 1e-12; + + testStandardVersusStructuredIntegration + (meshWidth, worksetSize, fs1, op1, p1, vectorWeight1, fs2, op2, p2, vectorWeight2, relTol, absTol, out, success); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(StructuredIntegration, StructuredVersusStandardVectorDotVectorAgainstScalar_D1_P1_P1, FS1Tag, Op1Tag, FS2Tag, Op2Tag) +{ + using DataScalar = double; + using PointScalar = double; + const int meshWidth = 1; + const int spaceDim = 1; + const int p1 = 1; + const int p2 = 1; + const int worksetSize = meshWidth; + + auto vectorWeight1 = Teuchos::rcp(new Kokkos::Array); + double weight = 1.0; + for (int d=0; d > vectorWeight2; // no vector weight on scalar term + + using DeviceType = DefaultTestDeviceType; + using BasisFamily = DerivedNodalBasisFamily; + + const EFunctionSpace fs1 = FS1Tag::functionSpace; + const EFunctionSpace fs2 = FS2Tag::functionSpace; + const EOperator op1 = Op1Tag::op; + const EOperator op2 = Op2Tag::op; + + double relTol = 1e-12; + double absTol = 1e-12; + + testStandardVersusStructuredIntegration + (meshWidth, worksetSize, fs1, op1, p1, vectorWeight1, fs2, op2, p2, vectorWeight2, relTol, absTol, out, success); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(StructuredIntegration, StructuredVersusStandardVectorDotVectorAgainstScalar_D2_P1_P1, FS1Tag, Op1Tag, FS2Tag, Op2Tag) +{ + using DataScalar = double; + using PointScalar = double; + const int meshWidth = 1; + const int spaceDim = 2; + const int p1 = 1; + const int p2 = 1; + const int worksetSize = meshWidth; + + auto vectorWeight1 = Teuchos::rcp(new Kokkos::Array); + double weight = 1.0; + for (int d=0; d > vectorWeight2; // no vector weight on scalar term + + using DeviceType = DefaultTestDeviceType; + using BasisFamily = DerivedNodalBasisFamily; + + const EFunctionSpace fs1 = FS1Tag::functionSpace; + const EFunctionSpace fs2 = FS2Tag::functionSpace; + const EOperator op1 = Op1Tag::op; + const EOperator op2 = Op2Tag::op; + + double relTol = 1e-12; + double absTol = 1e-12; + + testStandardVersusStructuredIntegration + (meshWidth, worksetSize, fs1, op1, p1, vectorWeight1, fs2, op2, p2, vectorWeight2, relTol, absTol, out, success); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(StructuredIntegration, StructuredVersusStandardVectorDotVectorAgainstScalar_D3_P1_P1, FS1Tag, Op1Tag, FS2Tag, Op2Tag) +{ + using DataScalar = double; + using PointScalar = double; + const int meshWidth = 1; + const int spaceDim = 3; + const int p1 = 1; + const int p2 = 1; + const int worksetSize = meshWidth; + + auto vectorWeight1 = Teuchos::rcp(new Kokkos::Array); + double weight = 1.0; + for (int d=0; d > vectorWeight2; // no vector weight on scalar term + + using DeviceType = DefaultTestDeviceType; + using BasisFamily = DerivedNodalBasisFamily; + + const EFunctionSpace fs1 = FS1Tag::functionSpace; + const EFunctionSpace fs2 = FS2Tag::functionSpace; + const EOperator op1 = Op1Tag::op; + const EOperator op2 = Op2Tag::op; + + double relTol = 1e-12; + double absTol = 1e-12; + + testStandardVersusStructuredIntegration + (meshWidth, worksetSize, fs1, op1, p1, vectorWeight1, fs2, op2, p2, vectorWeight2, relTol, absTol, out, success); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(StructuredIntegration, StructuredVersusStandardVectorWeighted_D3_P2_P1, FS1Tag, Op1Tag, FS2Tag, Op2Tag) +{ + using DataScalar = double; + using PointScalar = double; + const int meshWidth = 1; + const int spaceDim = 3; + const int p1 = 2; + const int p2 = 1; + const int worksetSize = meshWidth; + + auto vectorWeight1 = Teuchos::rcp(new Kokkos::Array); + auto vectorWeight2 = Teuchos::rcp(new Kokkos::Array); + + double weight = 1.0; + for (int d=0; d; + + const EFunctionSpace fs1 = FS1Tag::functionSpace; + const EFunctionSpace fs2 = FS2Tag::functionSpace; + const EOperator op1 = Op1Tag::op; + const EOperator op2 = Op2Tag::op; + + double relTol = 1e-12; + double absTol = 1e-12; + + testStandardVersusStructuredIntegration + (meshWidth, worksetSize, fs1, op1, p1, vectorWeight1, fs2, op2, p2, vectorWeight2, relTol, absTol, out, success); +} + // asymmetric tests (mostly -- a couple symmetric ones tossed in as sanity checks on the test itself) // 1D tests: H(grad) and H(vol) bases defined @@ -338,6 +726,17 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStan TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandard_D1_P2_P1, HGRAD, VALUE, HGRAD, VALUE) TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandard_D1_P2_P1, HVOL, VALUE, HGRAD, VALUE) +// 1D vector-weighted test +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorWeighted_D1_P1_P1, HGRAD, GRAD, HGRAD, GRAD) + +// 1D scalar against vector-weighted tests +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardScalarAgainstVectorDotVector_D1_P1_P1, HVOL, VALUE, HGRAD, GRAD) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardScalarAgainstVectorDotVector_D1_P1_P1, HGRAD, VALUE, HGRAD, GRAD) + +// 1D vector-weighted against scalar tests +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorDotVectorAgainstScalar_D1_P1_P1, HGRAD, GRAD, HVOL, VALUE) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorDotVectorAgainstScalar_D1_P1_P1, HGRAD, GRAD, HGRAD, VALUE) + // 2D tests: curls of H(curl) are scalars. // p1, p1: TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandard_D2_P1_P1, HGRAD, GRAD, HGRAD, GRAD) @@ -367,6 +766,22 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStan TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandard_D2_P1_P2, HCURL, CURL, HVOL, VALUE) TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandard_D2_P1_P2, HVOL, VALUE, HGRAD, VALUE) +// 2D vector-weighted tests +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorWeighted_D2_P1_P1, HGRAD, GRAD, HGRAD, GRAD) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorWeighted_D2_P2_P1, HGRAD, GRAD, HGRAD, GRAD) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorWeighted_D2_P1_P1, HCURL, VALUE, HDIV, VALUE) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorWeighted_D2_P2_P1, HCURL, VALUE, HDIV, VALUE) + +// 2D scalar against vector-weighted tests +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardScalarAgainstVectorDotVector_D2_P1_P1, HVOL, VALUE, HGRAD, GRAD) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardScalarAgainstVectorDotVector_D2_P1_P1, HGRAD, VALUE, HGRAD, GRAD) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardScalarAgainstVectorDotVector_D2_P1_P1, HGRAD, VALUE, HDIV, VALUE) + +// 2D vector-weighted against scalar tests +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorDotVectorAgainstScalar_D2_P1_P1, HGRAD, GRAD, HVOL, VALUE) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorDotVectorAgainstScalar_D2_P1_P1, HGRAD, GRAD, HGRAD, VALUE) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorDotVectorAgainstScalar_D2_P1_P1, HDIV, VALUE, HGRAD, VALUE) + // 3D tests: curls of H(curl) are vectors // p1, p1: TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandard_D3_P1_P1, HGRAD, GRAD, HGRAD, GRAD) @@ -396,5 +811,19 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStan TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandard_D3_P1_P2, HCURL, CURL, HDIV, VALUE) TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandard_D3_P1_P2, HVOL, VALUE, HGRAD, VALUE) +// 3D vector-weighted tests +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorWeighted_D3_P2_P1, HGRAD, GRAD, HGRAD, GRAD) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorWeighted_D3_P2_P1, HCURL, VALUE, HDIV, VALUE) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorWeighted_D3_P2_P1, HCURL, CURL, HGRAD, GRAD) + +// 3D scalar against vector-weighted tests +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardScalarAgainstVectorDotVector_D3_P1_P1, HVOL, VALUE, HGRAD, GRAD) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardScalarAgainstVectorDotVector_D3_P1_P1, HGRAD, VALUE, HGRAD, GRAD) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardScalarAgainstVectorDotVector_D3_P1_P1, HGRAD, VALUE, HDIV, VALUE) + +// 3D vector-weighted against scalar tests +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorDotVectorAgainstScalar_D3_P1_P1, HGRAD, GRAD, HVOL, VALUE) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorDotVectorAgainstScalar_D3_P1_P1, HGRAD, GRAD, HGRAD, VALUE) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorDotVectorAgainstScalar_D3_P1_P1, HDIV, VALUE, HGRAD, VALUE) } // anonymous namespace diff --git a/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_TagDefs.hpp b/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_TagDefs.hpp index bb1ce87fd872..fbafa35407d4 100644 --- a/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_TagDefs.hpp +++ b/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_TagDefs.hpp @@ -23,7 +23,8 @@ enum FormulationChoice Hgrad, // (grad, grad) + (value, value) Hdiv, // (div, div) + (value, value) Hcurl, // (curl, curl) + (value, value) - L2 // (value, value) + L2, // (value, value) + VectorWeightedPoisson // (a dot grad, b dot grad) }; enum AlgorithmChoice @@ -64,6 +65,10 @@ class L2Formulation { public: static const FormulationChoice formulation = L2; }; +class VectorWeightedPoissonFormulation { +public: + static const FormulationChoice formulation = VectorWeightedPoisson; +}; class StandardAlgorithm { public: diff --git a/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_Utils.hpp b/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_Utils.hpp index 799fa0135efe..513de612af3d 100644 --- a/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_Utils.hpp +++ b/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_Utils.hpp @@ -26,6 +26,8 @@ #include "HCURLStructuredAssembly.hpp" #include "HVOLStandardAssembly.hpp" #include "HVOLStructuredAssembly.hpp" +#include "VectorWeightedGRADGRADStandardAssembly.hpp" +#include "VectorWeightedGRADGRADStructuredAssembly.hpp" template< typename PointScalar, int spaceDim, typename DeviceType > inline @@ -65,10 +67,12 @@ CellGeometry getMesh(AlgorithmChoice algorith return uniformTensorGeometry; // this line should be unreachable; included to avoid compiler warnings from nvcc } -template +template // spaceDim and spaceDim2 should agree on value (differ on type) Intrepid2::ScalarView performStandardQuadrature(FormulationChoice formulation, - Intrepid2::CellGeometry &geometry, const int &polyOrder, const int &worksetSize, - double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount) + Intrepid2::CellGeometry &geometry, const int &polyOrder, const int &worksetSize, + double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount, + Teuchos::RCP< Kokkos::Array > vectorWeight1 = Teuchos::null, + Teuchos::RCP< Kokkos::Array > vectorWeight2 = Teuchos::null) { switch (formulation) { @@ -82,15 +86,19 @@ Intrepid2::ScalarView performStandardQuadrature(FormulationCh return performStandardQuadratureHCURL(geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); case L2: return performStandardQuadratureHVOL(geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); + case VectorWeightedPoisson: + return performStandardQuadratureVectorWeightedGRADGRAD(geometry, polyOrder, worksetSize, vectorWeight1, vectorWeight2, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported formulation"); } } -template +template // spaceDim and spaceDim2 should agree on value (differ on type) Intrepid2::ScalarView performStructuredQuadrature(FormulationChoice formulation, - Intrepid2::CellGeometry &geometry, const int &polyOrder, const int &worksetSize, - double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount) + Intrepid2::CellGeometry &geometry, const int &polyOrder, const int &worksetSize, + double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount, + Teuchos::RCP< Kokkos::Array > vectorWeight1 = Teuchos::null, + Teuchos::RCP< Kokkos::Array > vectorWeight2 = Teuchos::null) { switch (formulation) { @@ -104,6 +112,8 @@ Intrepid2::ScalarView performStructuredQuadrature(Formulation return performStructuredQuadratureHCURL(geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); case L2: return performStructuredQuadratureHVOL(geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); + case VectorWeightedPoisson: + return performStructuredQuadratureVectorWeightedGRADGRAD(geometry, polyOrder, worksetSize, vectorWeight1, vectorWeight2, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported formulation"); } diff --git a/packages/intrepid2/unit-test/MonolithicExecutable/TransformedBasisValuesTests.cpp b/packages/intrepid2/unit-test/MonolithicExecutable/TransformedBasisValuesTests.cpp index d9c388910cfb..55772e28a89d 100644 --- a/packages/intrepid2/unit-test/MonolithicExecutable/TransformedBasisValuesTests.cpp +++ b/packages/intrepid2/unit-test/MonolithicExecutable/TransformedBasisValuesTests.cpp @@ -8,7 +8,7 @@ // @HEADER -/** \file TransformedVectorDataTests.cpp +/** \file TransformedBasisValuesTests.cpp \brief Tests against TransformedBasisValues. \author Created by Nate Roberts */ @@ -341,6 +341,195 @@ namespace testFloatingEquality4(transformedGradValues, transformedGradientData, relTol, absTol, out, success); } + // testVectorWeightedTransformation tests against a (C,P,D) transformation of a gradient field. + template + void testWeightedVectorTransformation(const int &polyOrder, const int &meshWidth, Teuchos::FancyOStream &out, bool &success) + { + using DeviceType = DefaultTestDeviceType; + using Scalar = double; + using PointScalar = double; + + const double relTol = 1e-12; + const double absTol = 1e-12; + + auto fs = Intrepid2::FUNCTION_SPACE_HGRAD; + + auto lineBasis = Intrepid2::getLineBasis< Intrepid2::NodalBasisFamily >(fs, polyOrder); + + int numFields_1D = lineBasis->getCardinality(); + + int numFields = 1; + int numHypercubes = 1; + for (int d=0; d >(); + shards::CellTopology cellTopo; + if (spaceDim == 1) cellTopo = shards::getCellTopologyData< shards::Line<> >(); + else if (spaceDim == 2) cellTopo = shards::getCellTopologyData< shards::Quadrilateral<> >(); + else if (spaceDim == 3) cellTopo = shards::getCellTopologyData< shards::Hexahedron<> >(); + + auto lineCubature = Intrepid2::DefaultCubatureFactory::create(lineTopo,polyOrder*2); + int numPoints_1D = lineCubature->getNumPoints(); + ScalarView lineCubaturePoints("line cubature points",numPoints_1D,1); + ScalarView lineCubatureWeights("line cubature weights", numPoints_1D); + + lineCubature->getCubature(lineCubaturePoints, lineCubatureWeights); + + // Allocate some intermediate containers + ScalarView lineBasisValues ("line basis values", numFields_1D, numPoints_1D ); + ScalarView lineBasisGradValues("line basis grad values", numFields_1D, numPoints_1D, 1); + + // for now, we use 1D values to build up the 2D or 3D gradients + // eventually, TensorBasis should offer a getValues() variant that returns tensor basis data + lineBasis->getValues(lineBasisValues, lineCubaturePoints, Intrepid2::OPERATOR_VALUE ); + lineBasis->getValues(lineBasisGradValues, lineCubaturePoints, Intrepid2::OPERATOR_GRAD ); + + // drop the trivial space dimension in line gradient values: + Kokkos::resize(lineBasisGradValues, numFields_1D, numPoints_1D); + + Kokkos::Array, spaceDim> vectorComponents; + + for (int d=0; d, spaceDim> gradComponent_d; + for (int d2=0; d2(lineBasisGradValues); + else gradComponent_d[d2] = Data(lineBasisValues); + } + vectorComponents[d] = TensorData(gradComponent_d); + } + VectorData gradientVectorData(vectorComponents, false); // false: not axis-aligned + BasisValues gradientValues(gradientVectorData); + + CellGeometry cellNodes = uniformCartesianMesh(1.0, meshWidth); + + // goal here is to do a vector-weighted Poisson; i.e. (f a_u \cdot grad u, a_v \cdot grad v) on each cell + + int pointsPerCell = 1; + for (int d=0; d::allocateJacobianDet(jacobian); + auto jacobianInv = CellTools::allocateJacobianInv(jacobian); + cellNodes.setJacobian( jacobian, pointsPerCell); + CellTools::setJacobianDet(jacobianDet, jacobian); + CellTools::setJacobianInv(jacobianInv, jacobian); + + auto auView = getView("a_u", spaceDim); + auto auViewHost = Kokkos::create_mirror(auView); + double weight = 1.0; + for (int d=0; d("a_v", spaceDim); + auto avViewHost = Kokkos::create_mirror(avView); + weight = 0.5; + for (int d=0; d au_data(auView, Kokkos::Array{numCells,pointsPerCell,spaceDim}, Kokkos::Array{CONSTANT,CONSTANT,GENERAL}); + Data av_data(avView, Kokkos::Array{numCells,pointsPerCell,spaceDim}, Kokkos::Array{CONSTANT,CONSTANT,GENERAL}); + + auto uTransform = Data::allocateMatVecResult(jacobianInv, au_data, true); + auto vTransform = Data::allocateMatVecResult(jacobianInv, av_data, true); + + uTransform.storeMatVec(jacobianInv, au_data, true); // true: transpose jacobianInv when multiplying + vTransform.storeMatVec(jacobianInv, av_data, true); // true: transpose jacobianInv when multiplying + + Intrepid2::TransformedBasisValues utransformedBasisGradients(uTransform, gradientValues); + Intrepid2::TransformedBasisValues vtransformedBasisGradients(vTransform, gradientValues); + + int numPoints = 1; + for (int d=0; d expanded_uTransformedGradValues("transformed a_u dot grad values", numCells, numFields, numPoints); + ScalarView expanded_vTransformedGradValues("transformed a_v dot grad values", numCells, numFields, numPoints); + + auto basis = Intrepid2::getBasis< Intrepid2::NodalBasisFamily >(cellTopo, fs, polyOrder); + + // Allocate some intermediate containers + ScalarView basisValues ("basis values", numFields, numPoints ); + ScalarView basisGradValues("basis grad values", numFields, numPoints, spaceDim); + + ScalarView transformedGradValues("transformed grad values", numCells, numFields, numPoints, spaceDim); + ScalarView transformedWeightedGradValues("transformed weighted grad values", numCells, numFields, numPoints, spaceDim); + + auto cubature = Intrepid2::DefaultCubatureFactory::create(cellTopo,polyOrder*2); + TEST_EQUALITY( numPoints, cubature->getNumPoints()); + ScalarView cubaturePoints("cubature points",numPoints,spaceDim); + ScalarView cubatureWeights("cubature weights", numPoints); + + cubature->getCubature(cubaturePoints, cubatureWeights); + + basis->getValues(basisValues, cubaturePoints, Intrepid2::OPERATOR_VALUE ); + basis->getValues(basisGradValues, cubaturePoints, Intrepid2::OPERATOR_GRAD ); + + const int numNodesPerCell = cellNodes.numNodesPerCell(); + ScalarView expandedCellNodes("expanded cell nodes",numCells,numNodesPerCell,spaceDim); + + using ExecutionSpace = typename DeviceType::execution_space; + auto policy = Kokkos::MDRangePolicy>({0,0},{numCells,numNodesPerCell}); + Kokkos::parallel_for("fill expanded cell nodes", policy, + KOKKOS_LAMBDA (const int &cellOrdinal, const int &nodeOrdinal) + { + for (int d=0; d expandedJacobian("jacobian", numCells, numPoints, spaceDim, spaceDim); + ScalarView expandedJacobianInverse("jacobian inverse", numCells, numPoints, spaceDim, spaceDim); + + using CellTools = Intrepid2::CellTools; + using ExecutionSpace = typename DeviceType::execution_space; + using FunctionSpaceTools = Intrepid2::FunctionSpaceTools; + + CellTools::setJacobian(expandedJacobian, cubaturePoints, expandedCellNodes, cellTopo); + CellTools::setJacobianInv(expandedJacobianInverse, expandedJacobian); + + FunctionSpaceTools::HGRADtransformGRAD(transformedGradValues, expandedJacobianInverse, basisGradValues); + + auto policy3 = Kokkos::MDRangePolicy>({0,0,0},{numCells,numFields,numPoints}); + Kokkos::parallel_for("compute expanded_{u,v}TransformedGradValues", policy3, + KOKKOS_LAMBDA (const int &cellOrdinal, const int &fieldOrdinal, const int &pointOrdinal) + { + Scalar u_result = 0; + Scalar v_result = 0; + for (int d=0; d(polyOrder, meshWidth, out, success); } + + TEUCHOS_UNIT_TEST( TransformedBasisValues, TransformedWeightedVector_1D_p1 ) + { + const int spaceDim = 1; + const int polyOrder = 1; + const int meshWidth = 10; + testWeightedVectorTransformation(polyOrder, meshWidth, out, success); + } + + TEUCHOS_UNIT_TEST( TransformedBasisValues, TransformedWeightedVector_1D_p2 ) + { + const int spaceDim = 1; + const int polyOrder = 2; + const int meshWidth = 10; + testWeightedVectorTransformation(polyOrder, meshWidth, out, success); + } + + TEUCHOS_UNIT_TEST( TransformedBasisValues, TransformedWeightedVector_2D_p1 ) + { + const int spaceDim = 2; + const int polyOrder = 1; + const int meshWidth = 3; + testWeightedVectorTransformation(polyOrder, meshWidth, out, success); + } + + TEUCHOS_UNIT_TEST( TransformedBasisValues, TransformedWeightedVector_2D_p2 ) + { + const int spaceDim = 2; + const int polyOrder = 2; + const int meshWidth = 3; + testWeightedVectorTransformation(polyOrder, meshWidth, out, success); + } } // anonymous namespace diff --git a/packages/intrepid2/unit-test/performance/StructuredIntegration/StructuredIntegrationPerformance.cpp b/packages/intrepid2/unit-test/performance/StructuredIntegration/StructuredIntegrationPerformance.cpp index ab83eda5d694..1d708f698aca 100644 --- a/packages/intrepid2/unit-test/performance/StructuredIntegration/StructuredIntegrationPerformance.cpp +++ b/packages/intrepid2/unit-test/performance/StructuredIntegration/StructuredIntegrationPerformance.cpp @@ -36,6 +36,8 @@ #include "HCURLStructuredAssembly.hpp" #include "HVOLStandardAssembly.hpp" #include "HVOLStructuredAssembly.hpp" +#include "VectorWeightedGRADGRADStandardAssembly.hpp" +#include "VectorWeightedGRADGRADStructuredAssembly.hpp" enum FormulationChoice { @@ -44,6 +46,7 @@ enum FormulationChoice Hdiv, // (div, div) + (value, value) Hcurl, // (curl, curl) + (value, value) L2, // (value, value) + VectorWeightedPoisson, UnknownFormulation }; @@ -81,11 +84,12 @@ std::string to_string(AlgorithmChoice choice) std::string to_string(FormulationChoice choice) { switch (choice) { - case Poisson: return "Poisson"; - case Hgrad: return "Hgrad"; - case Hdiv: return "Hdiv"; - case Hcurl: return "Hcurl"; - case L2: return "L2"; + case Poisson: return "Poisson"; + case Hgrad: return "Hgrad"; + case Hdiv: return "Hdiv"; + case Hcurl: return "Hcurl"; + case L2: return "L2"; + case VectorWeightedPoisson: return "VectorWeightedPoisson"; default: return "Unknown FormulationChoice"; } @@ -230,10 +234,12 @@ getMeshWidths(int basisCardinality, int maxStiffnessEntryCount, int maxElements) return meshWidths; } -template +template Intrepid2::ScalarView performStandardQuadrature(FormulationChoice formulation, - Intrepid2::CellGeometry &geometry, const int &polyOrder, const int &worksetSize, - double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount) + Intrepid2::CellGeometry &geometry, const int &polyOrder, const int &worksetSize, + double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount, + Teuchos::RCP> vectorWeight1 = Teuchos::null, + Teuchos::RCP> vectorWeight2 = Teuchos::null) { switch (formulation) { @@ -247,15 +253,19 @@ Intrepid2::ScalarView performStandardQuadrature(FormulationCh return performStandardQuadratureHCURL(geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); case L2: return performStandardQuadratureHVOL(geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); + case VectorWeightedPoisson: + return performStandardQuadratureVectorWeightedGRADGRAD(geometry, polyOrder, worksetSize, vectorWeight1, vectorWeight2, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); default: return Intrepid2::ScalarView(); } } -template +template Intrepid2::ScalarView performStructuredQuadrature(FormulationChoice formulation, - Intrepid2::CellGeometry &geometry, const int &polyOrder, const int &worksetSize, - double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount) + Intrepid2::CellGeometry &geometry, const int &polyOrder, const int &worksetSize, + double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount, + Teuchos::RCP> vectorWeight1 = Teuchos::null, + Teuchos::RCP> vectorWeight2 = Teuchos::null) { switch (formulation) { @@ -269,6 +279,8 @@ Intrepid2::ScalarView performStructuredQuadrature(Formulation return performStructuredQuadratureHCURL(geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); case L2: return performStructuredQuadratureHVOL(geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); + case VectorWeightedPoisson: + return performStructuredQuadratureVectorWeightedGRADGRAD(geometry, polyOrder, worksetSize, vectorWeight1, vectorWeight2, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); default: return Intrepid2::ScalarView(); } @@ -280,12 +292,13 @@ typename BasisFamily::BasisPtr getBasisForFormulation(FormulationChoice formulat Intrepid2::EFunctionSpace fs; switch (formulation) { - case Poisson: fs = FUNCTION_SPACE_HGRAD; break; - case Hgrad: fs = FUNCTION_SPACE_HGRAD; break; - case Hdiv: fs = FUNCTION_SPACE_HDIV; break; - case Hcurl: fs = FUNCTION_SPACE_HCURL; break; - case L2: fs = FUNCTION_SPACE_HVOL; break; - case UnknownFormulation: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unknown formulation"); + case Poisson: fs = FUNCTION_SPACE_HGRAD; break; + case Hgrad: fs = FUNCTION_SPACE_HGRAD; break; + case Hdiv: fs = FUNCTION_SPACE_HDIV; break; + case Hcurl: fs = FUNCTION_SPACE_HCURL; break; + case L2: fs = FUNCTION_SPACE_HVOL; break; + case VectorWeightedPoisson: fs = FUNCTION_SPACE_HGRAD; break; + case UnknownFormulation: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unknown formulation"); } auto basis = getBasis< BasisFamily >(cellTopo, fs, polyOrder); @@ -350,7 +363,7 @@ map,map > getWorksetSizeM map,map > worksetSizeMap; // keys are maps p -> worksetSize vector allAlgorithmChoices {Standard, NonAffineTensor, AffineTensor, Uniform}; - vector allFormulationChoices {Poisson, Hgrad, Hdiv, Hcurl, L2}; + vector allFormulationChoices {Poisson, Hgrad, Hdiv, Hcurl, L2, VectorWeightedPoisson}; // skip calibration case; want that to span workset sizes in a particular way… vector allModes {Test,BestSerial,BestOpenMP_16,BestCuda,Precalibrated}; @@ -590,6 +603,48 @@ map,map > getWorksetSizeM worksetSizeMap[affineTensorKey][7] = 1; worksetSizeMap[affineTensorKey][8] = 1; } + { + // VectorWeightedPoisson + // These calibrations were run 5-25-24 on an M2 Ultra, on a fork expected to be merged into Trilinos develop soon. + FormulationChoice formulation = VectorWeightedPoisson; + tuple standardKey {mode,formulation,Standard}; + tuple nonAffineTensorKey {mode,formulation,NonAffineTensor}; + tuple affineTensorKey {mode,formulation,AffineTensor}; + + // best for VectorWeightedPoisson - these are for meshes that range from 32,768 for p=1 to 128 for p=10 + worksetSizeMap[standardKey][1] = 4096; + worksetSizeMap[standardKey][2] = 1024; + worksetSizeMap[standardKey][3] = 32; + worksetSizeMap[standardKey][4] = 4; + worksetSizeMap[standardKey][5] = 1; + worksetSizeMap[standardKey][6] = 1; + worksetSizeMap[standardKey][7] = 1; + worksetSizeMap[standardKey][8] = 1; + worksetSizeMap[standardKey][9] = 1; + worksetSizeMap[standardKey][10] = 1; + + worksetSizeMap[nonAffineTensorKey][1] = 2048; + worksetSizeMap[nonAffineTensorKey][2] = 2048; + worksetSizeMap[nonAffineTensorKey][3] = 128; + worksetSizeMap[nonAffineTensorKey][4] = 16; + worksetSizeMap[nonAffineTensorKey][5] = 2; + worksetSizeMap[nonAffineTensorKey][6] = 1; + worksetSizeMap[nonAffineTensorKey][7] = 1; + worksetSizeMap[nonAffineTensorKey][8] = 1; + worksetSizeMap[nonAffineTensorKey][9] = 1; + worksetSizeMap[nonAffineTensorKey][10] = 1; + + worksetSizeMap[affineTensorKey][1] = 32768; + worksetSizeMap[affineTensorKey][2] = 8192; + worksetSizeMap[affineTensorKey][3] = 128; + worksetSizeMap[affineTensorKey][4] = 8; + worksetSizeMap[affineTensorKey][5] = 2; + worksetSizeMap[affineTensorKey][6] = 1; + worksetSizeMap[affineTensorKey][7] = 1; + worksetSizeMap[affineTensorKey][8] = 1; + worksetSizeMap[affineTensorKey][9] = 1; + worksetSizeMap[affineTensorKey][10] = 1; + } } // BestSerial case break; case BestOpenMP_16: @@ -774,6 +829,48 @@ map,map > getWorksetSizeM worksetSizeMap[affineTensorKey][7] = 16; worksetSizeMap[affineTensorKey][8] = 16; } + { + // VectorWeightedPoisson + // These calibrations were run 5-25-24 on an M2 Ultra, on a fork expected to be merged into Trilinos develop soon. + FormulationChoice formulation = VectorWeightedPoisson; + tuple standardKey {mode,formulation,Standard}; + tuple nonAffineTensorKey {mode,formulation,NonAffineTensor}; + tuple affineTensorKey {mode,formulation,AffineTensor}; + + // best for VectorWeightedPoisson - these are for meshes that range from 32,768 for p=1 to 128 for p=10 + worksetSizeMap[standardKey][1] = 16384; + worksetSizeMap[standardKey][2] = 16384; + worksetSizeMap[standardKey][3] = 8192; + worksetSizeMap[standardKey][4] = 1024; + worksetSizeMap[standardKey][5] = 1024; + worksetSizeMap[standardKey][6] = 1024; + worksetSizeMap[standardKey][7] = 512; + worksetSizeMap[standardKey][8] = 256; + worksetSizeMap[standardKey][9] = 128; + worksetSizeMap[standardKey][10] = 32; + + worksetSizeMap[nonAffineTensorKey][1] = 32768; + worksetSizeMap[nonAffineTensorKey][2] = 8192; + worksetSizeMap[nonAffineTensorKey][3] = 8192; + worksetSizeMap[nonAffineTensorKey][4] = 4096; + worksetSizeMap[nonAffineTensorKey][5] = 4096; + worksetSizeMap[nonAffineTensorKey][6] = 64; + worksetSizeMap[nonAffineTensorKey][7] = 32; + worksetSizeMap[nonAffineTensorKey][8] = 32; + worksetSizeMap[nonAffineTensorKey][9] = 16; + worksetSizeMap[nonAffineTensorKey][10] = 16; + + worksetSizeMap[affineTensorKey][1] = 32768; + worksetSizeMap[affineTensorKey][2] = 16384; + worksetSizeMap[affineTensorKey][3] = 8192; + worksetSizeMap[affineTensorKey][4] = 4096; + worksetSizeMap[affineTensorKey][5] = 4096; + worksetSizeMap[affineTensorKey][6] = 2048; + worksetSizeMap[affineTensorKey][7] = 32; + worksetSizeMap[affineTensorKey][8] = 16; + worksetSizeMap[affineTensorKey][9] = 16; + worksetSizeMap[affineTensorKey][10] = 16; + } } // BestOpenMP_16 case break; case BestCuda: @@ -953,6 +1050,23 @@ map,map > getWorksetSizeM worksetSizeMap[affineTensorKey][7] = 256; worksetSizeMap[affineTensorKey][8] = 128; } // L^2 formulation + { + // VectorWeightedPoisson + // TODO: set this with some actual calibration result values. For now, we just borrow from Poisson + + FormulationChoice formulation = VectorWeightedPoisson; + tuple standardKey {mode,formulation,Standard}; + tuple nonAffineTensorKey {mode,formulation,NonAffineTensor}; + tuple affineTensorKey {mode,formulation,AffineTensor}; + + tuple standardKey_Poisson {mode,Poisson,Standard}; + tuple nonAffineTensorKey_Poisson {mode,Poisson,NonAffineTensor}; + tuple affineTensorKey_Poisson {mode,Poisson,AffineTensor}; + + worksetSizeMap[standardKey] = worksetSizeMap[standardKey_Poisson]; + worksetSizeMap[nonAffineTensorKey] = worksetSizeMap[nonAffineTensorKey_Poisson]; + worksetSizeMap[affineTensorKey] = worksetSizeMap[affineTensorKey_Poisson]; + } } // BestCuda case break; case Precalibrated: @@ -1128,6 +1242,7 @@ int main( int argc, char* argv[] ) return -1; } + Teuchos::RCP> vectorWeight1, vectorWeight2; // used for VectorWeightedPoisson vector formulationChoices; if (formulationChoiceString == "All") { @@ -1153,6 +1268,17 @@ int main( int argc, char* argv[] ) { formulationChoices = vector{L2}; } + else if (formulationChoiceString == "VectorWeightedPoisson") + { + formulationChoices = vector{VectorWeightedPoisson}; + vectorWeight1 = Teuchos::rcp( new Kokkos::Array() ); + vectorWeight2 = Teuchos::rcp( new Kokkos::Array() ); + for (int d=0; d > assembledMatrices; for (auto algorithmChoice : algorithmChoices) { - int worksetSize = worksetSizeMap[algorithmChoice]; + int worksetSize = 1; + if (worksetSizeMap.find(algorithmChoice) != worksetSizeMap.end()) + worksetSize = worksetSizeMap[algorithmChoice]; if (mode == Calibration) { // if this workset size is bigger than the optimal for p-1, skip it -- it's highly @@ -1428,13 +1556,13 @@ int main( int argc, char* argv[] ) case Nodal: { using BasisFamily = DerivedNodalBasisFamily; - assembledMatrix = performStandardQuadrature(formulation, geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); + assembledMatrix = performStandardQuadrature(formulation, geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount, vectorWeight1, vectorWeight2); } break; case Hierarchical: { using BasisFamily = HierarchicalBasisFamily; - assembledMatrix = performStandardQuadrature(formulation, geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); + assembledMatrix = performStandardQuadrature(formulation, geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount, vectorWeight1, vectorWeight2); } break; case Serendipity: @@ -1456,13 +1584,13 @@ int main( int argc, char* argv[] ) case Nodal: { using BasisFamily = DerivedNodalBasisFamily; - assembledMatrix = performStructuredQuadrature(formulation, geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); + assembledMatrix = performStructuredQuadrature(formulation, geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount, vectorWeight1, vectorWeight2); } break; case Hierarchical: { using BasisFamily = HierarchicalBasisFamily; - assembledMatrix = performStructuredQuadrature(formulation, geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); + assembledMatrix = performStructuredQuadrature(formulation, geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount, vectorWeight1, vectorWeight2); } break; case Serendipity: @@ -1485,13 +1613,13 @@ int main( int argc, char* argv[] ) case Nodal: { using BasisFamily = DerivedNodalBasisFamily; - assembledMatrix = performStructuredQuadrature(formulation, geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); + assembledMatrix = performStructuredQuadrature(formulation, geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount, vectorWeight1, vectorWeight2); } break; case Hierarchical: { using BasisFamily = HierarchicalBasisFamily; - assembledMatrix = performStructuredQuadrature(formulation, geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); + assembledMatrix = performStructuredQuadrature(formulation, geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount, vectorWeight1, vectorWeight2); } break; case Serendipity: @@ -1520,13 +1648,13 @@ int main( int argc, char* argv[] ) case Nodal: { using BasisFamily = DerivedNodalBasisFamily; - assembledMatrix = performStructuredQuadrature(formulation, geometry, polyOrder, numCells, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); + assembledMatrix = performStructuredQuadrature(formulation, geometry, polyOrder, numCells, transformIntegrateFlopCount, jacobianCellMeasureFlopCount, vectorWeight1, vectorWeight2); } break; case Hierarchical: { using BasisFamily = HierarchicalBasisFamily; - assembledMatrix = performStructuredQuadrature(formulation, geometry, polyOrder, numCells, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); + assembledMatrix = performStructuredQuadrature(formulation, geometry, polyOrder, numCells, transformIntegrateFlopCount, jacobianCellMeasureFlopCount, vectorWeight1, vectorWeight2); } break; case Serendipity: diff --git a/packages/kokkos-kernels/common/src/KokkosKernels_SimpleUtils.hpp b/packages/kokkos-kernels/common/src/KokkosKernels_SimpleUtils.hpp index 0ae29a2f50e0..51ff697bde1e 100644 --- a/packages/kokkos-kernels/common/src/KokkosKernels_SimpleUtils.hpp +++ b/packages/kokkos-kernels/common/src/KokkosKernels_SimpleUtils.hpp @@ -358,13 +358,19 @@ struct ReduceMaxFunctor { }; template -void kk_view_reduce_max(size_t num_elements, view_type view_to_reduce, +void kk_view_reduce_max(const MyExecSpace &exec, size_t num_elements, view_type view_to_reduce, typename view_type::non_const_value_type &max_reduction) { - typedef Kokkos::RangePolicy my_exec_space; - Kokkos::parallel_reduce("KokkosKernels::Common::ReduceMax", my_exec_space(0, num_elements), + typedef Kokkos::RangePolicy policy_t; + Kokkos::parallel_reduce("KokkosKernels::Common::ReduceMax", policy_t(exec, 0, num_elements), ReduceMaxFunctor(view_to_reduce), max_reduction); } +template +void kk_view_reduce_max(size_t num_elements, view_type view_to_reduce, + typename view_type::non_const_value_type &max_reduction) { + kk_view_reduce_max(MyExecSpace(), num_elements, view_to_reduce, max_reduction); +} + // xorshift hash/pseudorandom function (supported for 32- and 64-bit integer // types only) template @@ -429,10 +435,14 @@ struct SequentialFillFunctor { val_type start; }; +template +void sequential_fill(const ExecSpace &exec, const V &v, typename V::non_const_value_type start = 0) { + Kokkos::parallel_for(Kokkos::RangePolicy(exec, 0, v.extent(0)), SequentialFillFunctor(v, start)); +} + template void sequential_fill(const V &v, typename V::non_const_value_type start = 0) { - Kokkos::parallel_for(Kokkos::RangePolicy(0, v.extent(0)), - SequentialFillFunctor(v, start)); + sequential_fill(typename V::execution_space(), v, start); } } // namespace Impl diff --git a/packages/kokkos-kernels/common/src/KokkosKernels_Utils.hpp b/packages/kokkos-kernels/common/src/KokkosKernels_Utils.hpp index a087002d3142..f0add80c50ed 100644 --- a/packages/kokkos-kernels/common/src/KokkosKernels_Utils.hpp +++ b/packages/kokkos-kernels/common/src/KokkosKernels_Utils.hpp @@ -1076,6 +1076,12 @@ void view_reduce_max(size_t num_elements, view_type view_to_reduce, kk_view_reduce_max(num_elements, view_to_reduce, max_reduction); } +template +void view_reduce_max(const MyExecSpace &exec, size_t num_elements, view_type view_to_reduce, + typename view_type::non_const_value_type &max_reduction) { + kk_view_reduce_max(exec, num_elements, view_to_reduce, max_reduction); +} + template struct ReduceRowSizeFunctor { const size_type *rowmap_view_begins; diff --git a/packages/kokkos-kernels/perf_test/sparse/CMakeLists.txt b/packages/kokkos-kernels/perf_test/sparse/CMakeLists.txt index ef0bf7d99530..514ef0ed8253 100644 --- a/packages/kokkos-kernels/perf_test/sparse/CMakeLists.txt +++ b/packages/kokkos-kernels/perf_test/sparse/CMakeLists.txt @@ -116,6 +116,15 @@ KOKKOSKERNELS_ADD_EXECUTABLE( SOURCES KokkosSparse_mdf.cpp ) +# For the sake of build times, don't build this CRS sorting perf test by default. +# It can be enabled if needed by setting -DKokkosKernels_ENABLE_SORT_CRS_PERFTEST=ON. +if (KokkosKernels_ENABLE_SORT_CRS_PERFTEST) + KOKKOSKERNELS_ADD_EXECUTABLE( + sparse_sort_crs + SOURCES KokkosSparse_sort_crs.cpp +) +endif () + if (KokkosKernels_ENABLE_BENCHMARK) KOKKOSKERNELS_ADD_BENCHMARK( sparse_par_ilut diff --git a/packages/kokkos-kernels/perf_test/sparse/KokkosSparse_sort_crs.cpp b/packages/kokkos-kernels/perf_test/sparse/KokkosSparse_sort_crs.cpp new file mode 100644 index 000000000000..cd3ed91521d5 --- /dev/null +++ b/packages/kokkos-kernels/perf_test/sparse/KokkosSparse_sort_crs.cpp @@ -0,0 +1,103 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include +#include "KokkosKernels_config.h" +#include "KokkosSparse_IOUtils.hpp" +#include "KokkosKernels_perf_test_utilities.hpp" + +#include "KokkosSparse_CrsMatrix.hpp" +#include "KokkosSparse_SortCrs.hpp" + +using perf_test::CommonInputParams; + +struct LocalParams { + std::string mtxFile; +}; + +void print_options() { + std::cerr << "Options\n" << std::endl; + + std::cerr << perf_test::list_common_options(); + + std::cerr << "\t[Required] --mtx :: matrix to sort\n"; + std::cerr << "\t[Optional] --repeat :: how many times to repeat sorting\n"; +} + +int parse_inputs(LocalParams& params, int argc, char** argv) { + for (int i = 1; i < argc; ++i) { + if (perf_test::check_arg_str(i, argc, argv, "--mtx", params.mtxFile)) { + ++i; + } else { + std::cerr << "Unrecognized command line argument #" << i << ": " << argv[i] << std::endl; + print_options(); + return 1; + } + } + return 0; +} + +template +void run_experiment(int argc, char** argv, const CommonInputParams& common_params) { + using namespace KokkosSparse; + + using mem_space = typename exec_space::memory_space; + using device_t = typename Kokkos::Device; + using size_type = default_size_type; + using lno_t = default_lno_t; + using scalar_t = default_scalar; + using crsMat_t = KokkosSparse::CrsMatrix; + + using graph_t = typename crsMat_t::StaticCrsGraphType; + + LocalParams params; + if (parse_inputs(params, argc, argv)) return; + + crsMat_t A = KokkosSparse::Impl::read_kokkos_crst_matrix(params.mtxFile.c_str()); + std::cout << "Loaded matrix: " << A.numRows() << "x" << A.numCols() << " with " << A.nnz() << " entries.\n"; + // This first sort call serves as a warm-up + KokkosSparse::sort_crs_matrix(A); + lno_t m = A.numRows(); + lno_t n = A.numCols(); + auto rowmapHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.row_map); + auto entriesHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.entries); + typename crsMat_t::index_type shuffledEntries("shuffled entries", A.nnz()); + // Randomly shuffle the entries within each row, so that the rows aren't + // already sorted. Leave the values alone; this changes the matrix numerically + // but this doesn't affect sorting. + for (lno_t i = 0; i < m; i++) { + std::random_shuffle(entriesHost.data() + i, entriesHost.data() + i + 1); + } + Kokkos::deep_copy(shuffledEntries, entriesHost); + exec_space exec; + Kokkos::Timer timer; + double totalTime = 0; + for (int rep = 0; rep < common_params.repeat; rep++) { + Kokkos::deep_copy(exec, A.graph.entries, shuffledEntries); + exec.fence(); + timer.reset(); + KokkosSparse::sort_crs_matrix(exec, A); + exec.fence(); + totalTime += timer.seconds(); + } + std::cout << "Mean sort_crs_matrix time over " << common_params.repeat << " trials: "; + std::cout << totalTime / common_params.repeat << "\n"; +} + +#define KOKKOSKERNELS_PERF_TEST_NAME run_experiment +#include "KokkosKernels_perf_test_instantiation.hpp" +int main(int argc, char** argv) { return main_instantiation(argc, argv); } // main diff --git a/packages/kokkos-kernels/sparse/impl/KokkosSparse_sort_crs_impl.hpp b/packages/kokkos-kernels/sparse/impl/KokkosSparse_sort_crs_impl.hpp new file mode 100644 index 000000000000..5e18c3fd5ca2 --- /dev/null +++ b/packages/kokkos-kernels/sparse/impl/KokkosSparse_sort_crs_impl.hpp @@ -0,0 +1,366 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#ifndef _KOKKOSSPARSE_SORTCRS_IMPL_HPP +#define _KOKKOSSPARSE_SORTCRS_IMPL_HPP + +#include "Kokkos_Core.hpp" +#include "Kokkos_Sort.hpp" +#include "KokkosKernels_Sorting.hpp" + +// Workaround for issue with Kokkos::Experimental::sort_by_key, with nvcc and OpenMP enabled +// (Kokkos issue #7036, fixed in 4.4 release) +// Once support for Kokkos < 4.4 is dropped, +// all code inside "ifdef KK_DISABLE_BULK_SORT_BY_KEY" can be deleted. +#if (KOKKOS_VERSION < 40400) && defined(KOKKOS_ENABLE_CUDA) +#define KK_DISABLE_BULK_SORT_BY_KEY +#endif + +namespace KokkosSparse { +namespace Impl { + +template +struct MatrixRadixSortFunctor { + using Offset = typename rowmap_t::non_const_value_type; + using Ordinal = typename entries_t::non_const_value_type; + using UnsignedOrdinal = typename std::make_unsigned::type; + using Scalar = typename values_t::non_const_value_type; + // The functor owns memory for entriesAux, so it can't have + // MemoryTraits + using entries_managed_t = Kokkos::View; + using values_managed_t = Kokkos::View; + + MatrixRadixSortFunctor(const rowmap_t& rowmap_, const entries_t& entries_, const values_t& values_) + : rowmap(rowmap_), entries(entries_), values(values_) { + entriesAux = entries_managed_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries aux"), entries.extent(0)); + valuesAux = values_managed_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Values aux"), values.extent(0)); + } + + KOKKOS_INLINE_FUNCTION void operator()(Ordinal i) const { + Offset rowStart = rowmap(i); + Offset rowEnd = rowmap(i + 1); + Ordinal rowNum = rowEnd - rowStart; + // Radix sort requires unsigned keys for comparison + KokkosKernels::SerialRadixSort2( + (UnsignedOrdinal*)entries.data() + rowStart, (UnsignedOrdinal*)entriesAux.data() + rowStart, + values.data() + rowStart, valuesAux.data() + rowStart, rowNum); + } + + rowmap_t rowmap; + entries_t entries; + entries_managed_t entriesAux; + values_t values; + values_managed_t valuesAux; +}; + +template +struct MatrixThreadSortFunctor { + using Offset = typename rowmap_t::non_const_value_type; + + MatrixThreadSortFunctor(Ordinal numRows_, const rowmap_t& rowmap_, const entries_t& entries_, const values_t& values_) + : numRows(numRows_), rowmap(rowmap_), entries(entries_), values(values_) {} + + KOKKOS_INLINE_FUNCTION void operator()(const typename Policy::member_type& t) const { + Ordinal i = t.league_rank() * t.team_size() + t.team_rank(); + if (i >= numRows) return; + Offset rowStart = rowmap(i); + Offset rowEnd = rowmap(i + 1); + auto rowEntries = Kokkos::subview(entries, Kokkos::make_pair(rowStart, rowEnd)); + auto rowValues = Kokkos::subview(values, Kokkos::make_pair(rowStart, rowEnd)); + Kokkos::Experimental::sort_by_key_thread(t, rowEntries, rowValues); + } + + Ordinal numRows; + rowmap_t rowmap; + entries_t entries; + values_t values; +}; + +template +struct GraphRadixSortFunctor { + using Offset = typename rowmap_t::non_const_value_type; + using Ordinal = typename entries_t::non_const_value_type; + using UnsignedOrdinal = typename std::make_unsigned::type; + // The functor owns memory for entriesAux, so it can't have + // MemoryTraits + using entries_managed_t = Kokkos::View; + + GraphRadixSortFunctor(const rowmap_t& rowmap_, const entries_t& entries_) : rowmap(rowmap_), entries(entries_) { + entriesAux = entries_managed_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries aux"), entries.extent(0)); + } + + KOKKOS_INLINE_FUNCTION void operator()(Ordinal i) const { + Offset rowStart = rowmap(i); + Offset rowEnd = rowmap(i + 1); + Ordinal rowNum = rowEnd - rowStart; + // Radix sort requires unsigned keys for comparison + KokkosKernels::SerialRadixSort((UnsignedOrdinal*)entries.data() + rowStart, + (UnsignedOrdinal*)entriesAux.data() + rowStart, rowNum); + } + + rowmap_t rowmap; + entries_t entries; + entries_managed_t entriesAux; +}; + +template +struct GraphThreadSortFunctor { + using Offset = typename rowmap_t::non_const_value_type; + + GraphThreadSortFunctor(Ordinal numRows_, const rowmap_t& rowmap_, const entries_t& entries_) + : numRows(numRows_), rowmap(rowmap_), entries(entries_) {} + + KOKKOS_INLINE_FUNCTION void operator()(const typename Policy::member_type& t) const { + Ordinal i = t.league_rank() * t.team_size() + t.team_rank(); + if (i >= numRows) return; + Offset rowStart = rowmap(i); + Offset rowEnd = rowmap(i + 1); + auto rowEntries = Kokkos::subview(entries, Kokkos::make_pair(rowStart, rowEnd)); + Kokkos::Experimental::sort_thread(t, rowEntries); + } + + Ordinal numRows; + rowmap_t rowmap; + entries_t entries; +}; + +template +struct MergedRowmapFunctor { + using size_type = typename rowmap_t::non_const_value_type; + using lno_t = typename entries_t::non_const_value_type; + using c_rowmap_t = typename rowmap_t::const_type; + + // Precondition: entries are sorted within each row + MergedRowmapFunctor(const rowmap_t& mergedCounts_, const c_rowmap_t& rowmap_, const entries_t& entries_) + : mergedCounts(mergedCounts_), rowmap(rowmap_), entries(entries_) {} + + KOKKOS_INLINE_FUNCTION void operator()(lno_t row, size_type& lnewNNZ) const { + size_type rowBegin = rowmap(row); + size_type rowEnd = rowmap(row + 1); + if (rowEnd == rowBegin) { + // Row was empty to begin with + mergedCounts(row) = 0; + return; + } + // Otherwise, the first entry in the row exists + lno_t uniqueEntries = 1; + for (size_type j = rowBegin + 1; j < rowEnd; j++) { + if (entries(j - 1) != entries(j)) uniqueEntries++; + } + mergedCounts(row) = uniqueEntries; + lnewNNZ += uniqueEntries; + if (row == lno_t((rowmap.extent(0) - 1) - 1)) mergedCounts(row + 1) = 0; + } + + rowmap_t mergedCounts; + c_rowmap_t rowmap; + entries_t entries; +}; + +template +struct MatrixMergedEntriesFunctor { + using size_type = typename rowmap_t::non_const_value_type; + using lno_t = typename entries_t::non_const_value_type; + using scalar_t = typename values_t::non_const_value_type; + + // Precondition: entries are sorted within each row + MatrixMergedEntriesFunctor(const typename rowmap_t::const_type& rowmap_, const entries_t& entries_, + const values_t& values_, const rowmap_t& mergedRowmap_, const entries_t& mergedEntries_, + const values_t& mergedValues_) + : rowmap(rowmap_), + entries(entries_), + values(values_), + mergedRowmap(mergedRowmap_), + mergedEntries(mergedEntries_), + mergedValues(mergedValues_) {} + + KOKKOS_INLINE_FUNCTION void operator()(lno_t row) const { + size_type rowBegin = rowmap(row); + size_type rowEnd = rowmap(row + 1); + if (rowEnd == rowBegin) { + // Row was empty to begin with, nothing to do + return; + } + // Otherwise, accumulate the value for each column + scalar_t accumVal = values(rowBegin); + lno_t accumCol = entries(rowBegin); + size_type insertPos = mergedRowmap(row); + for (size_type j = rowBegin + 1; j < rowEnd; j++) { + if (accumCol == entries(j)) { + // accumulate + accumVal += values(j); + } else { + // write out and reset + mergedValues(insertPos) = accumVal; + mergedEntries(insertPos) = accumCol; + insertPos++; + accumVal = values(j); + accumCol = entries(j); + } + } + // always left with the last unique entry + mergedValues(insertPos) = accumVal; + mergedEntries(insertPos) = accumCol; + } + + typename rowmap_t::const_type rowmap; + entries_t entries; + values_t values; + rowmap_t mergedRowmap; + entries_t mergedEntries; + values_t mergedValues; +}; + +template +struct GraphMergedEntriesFunctor { + using size_type = typename rowmap_t::non_const_value_type; + using lno_t = typename entries_t::non_const_value_type; + + // Precondition: entries are sorted within each row + GraphMergedEntriesFunctor(const typename rowmap_t::const_type& rowmap_, const entries_t& entries_, + const rowmap_t& mergedRowmap_, const entries_t& mergedEntries_) + : rowmap(rowmap_), entries(entries_), mergedRowmap(mergedRowmap_), mergedEntries(mergedEntries_) {} + + KOKKOS_INLINE_FUNCTION void operator()(lno_t row) const { + size_type rowBegin = rowmap(row); + size_type rowEnd = rowmap(row + 1); + if (rowEnd == rowBegin) { + // Row was empty to begin with, nothing to do + return; + } + // Otherwise, accumulate the value for each column + lno_t accumCol = entries(rowBegin); + size_type insertPos = mergedRowmap(row); + for (size_type j = rowBegin + 1; j < rowEnd; j++) { + if (accumCol != entries(j)) { + // write out and reset + mergedEntries(insertPos) = accumCol; + insertPos++; + accumCol = entries(j); + } + } + // always left with the last unique entry + mergedEntries(insertPos) = accumCol; + } + + typename rowmap_t::const_type rowmap; + entries_t entries; + rowmap_t mergedRowmap; + entries_t mergedEntries; +}; + +template +struct MaxScanFunctor { + using value_type = uint64_t; + + MaxScanFunctor(uint64_t ncols_, const Keys& keys_, const Entries& entries_) + : ncols(ncols_), keys(keys_), entries(entries_) {} + + KOKKOS_INLINE_FUNCTION + void init(uint64_t& update) const { update = 0; } + + KOKKOS_INLINE_FUNCTION + void join(uint64_t& update, const uint64_t& input) const { update = Kokkos::max(update, input); } + + KOKKOS_INLINE_FUNCTION + void operator()(Offset i, uint64_t& lmax, bool finalPass) const { + lmax = Kokkos::max(lmax, keys(i)); + if (finalPass) { + // lmax is the row containing entry i. + // The key is equivalent to the entry's linear + // index if the matrix were dense and row-major. + keys(i) = lmax * ncols + entries(i); + } + } + + uint64_t ncols; + Keys keys; + Entries entries; +}; + +template +Kokkos::View generateBulkCrsKeys(const ExecSpace& exec, const Rowmap& rowmap, + const Entries& entries, + typename Entries::non_const_value_type ncols) { + using Offset = typename Rowmap::non_const_value_type; + using Ordinal = typename Entries::non_const_value_type; + Ordinal numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; + Kokkos::View keys("keys", entries.extent(0)); + Kokkos::parallel_for( + "CRS bulk sorting: mark row begins", Kokkos::RangePolicy(exec, 0, numRows), KOKKOS_LAMBDA(Ordinal i) { + Offset rowBegin = rowmap(i); + // Only mark the beginnings of non-empty rows. + // Otherwise multiple rows could try to update the same key. + if (rowmap(i + 1) != rowBegin) { + keys(rowBegin) = uint64_t(i); + } + }); + Kokkos::fence(); + Kokkos::parallel_scan("CRS bulk sorting: compute keys", Kokkos::RangePolicy(exec, 0, entries.extent(0)), + MaxScanFunctor(ncols, keys, entries)); + Kokkos::fence(); + return keys; +} + +#ifndef KK_DISABLE_BULK_SORT_BY_KEY +template +Kokkos::View computeEntryPermutation( + const ExecSpace& exec, const Rowmap& rowmap, const Entries& entries, typename Entries::non_const_value_type ncols) { + using Offset = typename Rowmap::non_const_value_type; + auto keys = generateBulkCrsKeys(exec, rowmap, entries, ncols); + Kokkos::View permutation(Kokkos::view_alloc(Kokkos::WithoutInitializing, "permutation"), + entries.extent(0)); + // This initializes permutation as the identity + KokkosKernels::Impl::sequential_fill(exec, permutation); + Kokkos::Experimental::sort_by_key(exec, keys, permutation); + return permutation; +} + +// Heuristic for choosing bulk sorting algorithm +template +bool useBulkSortHeuristic(Ordinal avgDeg, Ordinal maxDeg) { + // Use bulk sort if matrix is highly imbalanced, + // OR the longest rows have many entries. + return (maxDeg / 10 > avgDeg) || (maxDeg > 1024); +} +#endif + +template +void applyPermutation(const ExecSpace& exec, const Permutation& permutation, const InView& in, const OutView& out) { + Kokkos::parallel_for( + "CRS bulk sorting: permute", Kokkos::RangePolicy(exec, 0, in.extent(0)), + KOKKOS_LAMBDA(size_t i) { out(i) = in(permutation(i)); }); +} + +template +void applyPermutationBlockValues(const ExecSpace& exec, const Permutation& permutation, const InView& in, + const OutView& out, Ordinal blockSize) { + uint64_t scalarsPerBlock = (uint64_t)blockSize * blockSize; + if (in.extent(0) % scalarsPerBlock) + throw std::invalid_argument( + "sort_bsr_matrix: matrix values extent not divisible by graph entries " + "extent"); + Kokkos::parallel_for( + "BSR bulk sorting: permute", Kokkos::RangePolicy(exec, 0, in.extent(0)), KOKKOS_LAMBDA(size_t i) { + uint64_t blockIndex = i / scalarsPerBlock; + uint64_t offsetInBlock = i % scalarsPerBlock; + out(i) = in(permutation(blockIndex) * scalarsPerBlock + offsetInBlock); + }); +} + +} // namespace Impl +} // namespace KokkosSparse + +#endif diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_SortCrs.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_SortCrs.hpp index 455068b56f43..1203cd244b5b 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_SortCrs.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_SortCrs.hpp @@ -16,38 +16,11 @@ #ifndef _KOKKOSSPARSE_SORTCRS_HPP #define _KOKKOSSPARSE_SORTCRS_HPP -#include "Kokkos_Core.hpp" -#include "KokkosKernels_Sorting.hpp" +#include "KokkosSparse_sort_crs_impl.hpp" +#include "KokkosSparse_Utils.hpp" namespace KokkosSparse { -// ---------------------------------- -// BSR matrix/graph sorting utilities -// ---------------------------------- - -// Sort a BRS matrix: within each row, sort entries ascending by column and -// permute the values accordingly. -template -void sort_bsr_matrix(const lno_t blockdim, const rowmap_t& rowmap, const entries_t& entries, const values_t& values); - -// Sort a BRS matrix on the given execution space instance: within each row, -// sort entries ascending by column and permute the values accordingly. -template -void sort_bsr_matrix(const execution_space& exec, const lno_t blockdim, const rowmap_t& rowmap, - const entries_t& entries, const values_t& values); - -// Sort a BRS matrix: within each row, sort entries ascending by column and -// permute the values accordingly. -template -void sort_bsr_matrix(const bsrMat_t& A); - -// Sort a BRS matrix on the given execution space instance: within each row, -// sort entries ascending by column and permute the values accordingly. -template -void sort_bsr_matrix(const typename bsrMat_t::execution_space& exec, const bsrMat_t& A); - // ---------------------------------- // CRS matrix/graph sorting utilities // ---------------------------------- @@ -63,269 +36,13 @@ void sort_bsr_matrix(const typename bsrMat_t::execution_space& exec, const bsrMa // duplicated entries in A, A is sorted and returned (instead of a newly // allocated matrix). -namespace Impl { - -template -struct SortCrsMatrixFunctor { - using size_type = typename rowmap_t::non_const_value_type; - using lno_t = typename entries_t::non_const_value_type; - using scalar_t = typename values_t::non_const_value_type; - using team_mem = typename Kokkos::TeamPolicy::member_type; - // The functor owns memory for entriesAux, so it can't have - // MemoryTraits - using entries_managed_t = Kokkos::View; - using values_managed_t = Kokkos::View; - - SortCrsMatrixFunctor(bool usingRangePol, const rowmap_t& rowmap_, const entries_t& entries_, const values_t& values_) - : rowmap(rowmap_), entries(entries_), values(values_) { - if (usingRangePol) { - entriesAux = entries_managed_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries aux"), entries.extent(0)); - valuesAux = values_managed_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Values aux"), values.extent(0)); - } - // otherwise, aux arrays won't be allocated (sorting in place) - } - - KOKKOS_INLINE_FUNCTION void operator()(const lno_t i) const { - size_type rowStart = rowmap(i); - size_type rowEnd = rowmap(i + 1); - lno_t rowNum = rowEnd - rowStart; - // Radix sort requires unsigned keys for comparison - using unsigned_lno_t = typename std::make_unsigned::type; - KokkosKernels::SerialRadixSort2( - (unsigned_lno_t*)entries.data() + rowStart, (unsigned_lno_t*)entriesAux.data() + rowStart, - values.data() + rowStart, valuesAux.data() + rowStart, rowNum); - } - - KOKKOS_INLINE_FUNCTION void operator()(const team_mem t) const { - size_type i = t.league_rank(); - size_type rowStart = rowmap(i); - size_type rowEnd = rowmap(i + 1); - lno_t rowNum = rowEnd - rowStart; - KokkosKernels::TeamBitonicSort2(entries.data() + rowStart, - values.data() + rowStart, rowNum, t); - } - - rowmap_t rowmap; - entries_t entries; - entries_managed_t entriesAux; - values_t values; - values_managed_t valuesAux; -}; - -template -struct SortCrsGraphFunctor { - using size_type = typename rowmap_t::non_const_value_type; - using lno_t = typename entries_t::non_const_value_type; - using team_mem = typename Kokkos::TeamPolicy::member_type; - // The functor owns memory for entriesAux, so it can't have - // MemoryTraits - using entries_managed_t = Kokkos::View; - - SortCrsGraphFunctor(bool usingRangePol, const rowmap_t& rowmap_, const entries_t& entries_) - : rowmap(rowmap_), entries(entries_) { - if (usingRangePol) { - entriesAux = entries_managed_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries aux"), entries.extent(0)); - } - // otherwise, aux arrays won't be allocated (sorting in place) - } - - KOKKOS_INLINE_FUNCTION void operator()(const lno_t i) const { - size_type rowStart = rowmap(i); - size_type rowEnd = rowmap(i + 1); - lno_t rowNum = rowEnd - rowStart; - // Radix sort requires unsigned keys for comparison - using unsigned_lno_t = typename std::make_unsigned::type; - KokkosKernels::SerialRadixSort((unsigned_lno_t*)entries.data() + rowStart, - (unsigned_lno_t*)entriesAux.data() + rowStart, rowNum); - } - - KOKKOS_INLINE_FUNCTION void operator()(const team_mem t) const { - size_type i = t.league_rank(); - size_type rowStart = rowmap(i); - size_type rowEnd = rowmap(i + 1); - lno_t rowNum = rowEnd - rowStart; - KokkosKernels::TeamBitonicSort(entries.data() + rowStart, rowNum, t); - } - - rowmap_t rowmap; - entries_t entries; - entries_managed_t entriesAux; -}; - -template -struct MergedRowmapFunctor { - using size_type = typename rowmap_t::non_const_value_type; - using lno_t = typename entries_t::non_const_value_type; - using c_rowmap_t = typename rowmap_t::const_type; - - // Precondition: entries are sorted within each row - MergedRowmapFunctor(const rowmap_t& mergedCounts_, const c_rowmap_t& rowmap_, const entries_t& entries_) - : mergedCounts(mergedCounts_), rowmap(rowmap_), entries(entries_) {} - - KOKKOS_INLINE_FUNCTION void operator()(lno_t row, size_type& lnewNNZ) const { - size_type rowBegin = rowmap(row); - size_type rowEnd = rowmap(row + 1); - if (rowEnd == rowBegin) { - // Row was empty to begin with - mergedCounts(row) = 0; - return; - } - // Otherwise, the first entry in the row exists - lno_t uniqueEntries = 1; - for (size_type j = rowBegin + 1; j < rowEnd; j++) { - if (entries(j - 1) != entries(j)) uniqueEntries++; - } - mergedCounts(row) = uniqueEntries; - lnewNNZ += uniqueEntries; - if (row == lno_t((rowmap.extent(0) - 1) - 1)) mergedCounts(row + 1) = 0; - } - - rowmap_t mergedCounts; - c_rowmap_t rowmap; - entries_t entries; -}; - -template -struct MatrixMergedEntriesFunctor { - using size_type = typename rowmap_t::non_const_value_type; - using lno_t = typename entries_t::non_const_value_type; - using scalar_t = typename values_t::non_const_value_type; - - // Precondition: entries are sorted within each row - MatrixMergedEntriesFunctor(const typename rowmap_t::const_type& rowmap_, const entries_t& entries_, - const values_t& values_, const rowmap_t& mergedRowmap_, const entries_t& mergedEntries_, - const values_t& mergedValues_) - : rowmap(rowmap_), - entries(entries_), - values(values_), - mergedRowmap(mergedRowmap_), - mergedEntries(mergedEntries_), - mergedValues(mergedValues_) {} - - KOKKOS_INLINE_FUNCTION void operator()(lno_t row) const { - size_type rowBegin = rowmap(row); - size_type rowEnd = rowmap(row + 1); - if (rowEnd == rowBegin) { - // Row was empty to begin with, nothing to do - return; - } - // Otherwise, accumulate the value for each column - scalar_t accumVal = values(rowBegin); - lno_t accumCol = entries(rowBegin); - size_type insertPos = mergedRowmap(row); - for (size_type j = rowBegin + 1; j < rowEnd; j++) { - if (accumCol == entries(j)) { - // accumulate - accumVal += values(j); - } else { - // write out and reset - mergedValues(insertPos) = accumVal; - mergedEntries(insertPos) = accumCol; - insertPos++; - accumVal = values(j); - accumCol = entries(j); - } - } - // always left with the last unique entry - mergedValues(insertPos) = accumVal; - mergedEntries(insertPos) = accumCol; - } - - typename rowmap_t::const_type rowmap; - entries_t entries; - values_t values; - rowmap_t mergedRowmap; - entries_t mergedEntries; - values_t mergedValues; -}; - -template -struct GraphMergedEntriesFunctor { - using size_type = typename rowmap_t::non_const_value_type; - using lno_t = typename entries_t::non_const_value_type; - - // Precondition: entries are sorted within each row - GraphMergedEntriesFunctor(const typename rowmap_t::const_type& rowmap_, const entries_t& entries_, - const rowmap_t& mergedRowmap_, const entries_t& mergedEntries_) - : rowmap(rowmap_), entries(entries_), mergedRowmap(mergedRowmap_), mergedEntries(mergedEntries_) {} - - KOKKOS_INLINE_FUNCTION void operator()(lno_t row) const { - size_type rowBegin = rowmap(row); - size_type rowEnd = rowmap(row + 1); - if (rowEnd == rowBegin) { - // Row was empty to begin with, nothing to do - return; - } - // Otherwise, accumulate the value for each column - lno_t accumCol = entries(rowBegin); - size_type insertPos = mergedRowmap(row); - for (size_type j = rowBegin + 1; j < rowEnd; j++) { - if (accumCol != entries(j)) { - // write out and reset - mergedEntries(insertPos) = accumCol; - insertPos++; - accumCol = entries(j); - } - } - // always left with the last unique entry - mergedEntries(insertPos) = accumCol; - } - - typename rowmap_t::const_type rowmap; - entries_t entries; - rowmap_t mergedRowmap; - entries_t mergedEntries; -}; - -template -KOKKOS_INLINE_FUNCTION void kk_swap(T& a, T& b) { - T t = a; - a = b; - b = t; -} - -template -struct sort_bsr_functor { - using lno_t = typename entries_type::non_const_value_type; - - row_map_type rowmap; - entries_type entries; - values_type values; - const lno_t blocksize; - - sort_bsr_functor(row_map_type rowmap_, entries_type entries_, values_type values_, const lno_t blocksize_) - : rowmap(rowmap_), entries(entries_), values(values_), blocksize(blocksize_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(const lno_t i) const { - const lno_t rowStart = rowmap(i); - const lno_t rowSize = rowmap(i + 1) - rowStart; - auto* e = entries.data() + rowStart; - auto* v = values.data() + rowStart * blocksize; - bool done = false; - while (!done) { - done = true; - for (lno_t j = 1; j < rowSize; ++j) { - const lno_t jp = j - 1; - if (e[jp] <= e[j]) continue; - Impl::kk_swap(e[jp], e[j]); - auto const vb = v + j * blocksize; - auto const vbp = v + jp * blocksize; - for (lno_t k = 0; k < blocksize; ++k) // std::swap_ranges(vb, vb + blocksize, vbp); - Impl::kk_swap(vb[k], vbp[k]); - done = false; - } - } - } -}; - -} // namespace Impl - // Sort a CRS matrix: within each row, sort entries ascending by column. // At the same time, permute the values. template void sort_crs_matrix(const execution_space& exec, const rowmap_t& rowmap, const entries_t& entries, - const values_t& values) { + const values_t& values, + typename entries_t::non_const_value_type numCols = + Kokkos::ArithTraits::max()) { static_assert(Kokkos::SpaceAccessibility::accessible, "sort_crs_matrix: rowmap_t is not accessible from the given execution " "space"); @@ -338,71 +55,156 @@ void sort_crs_matrix(const execution_space& exec, const rowmap_t& rowmap, const static_assert(!std::is_const_v, "sort_crs_matrix: entries_t must not be const-valued"); static_assert(!std::is_const_v, "sort_crs_matrix: value_t must not be const-valued"); - using lno_t = typename entries_t::non_const_value_type; - using team_pol = Kokkos::TeamPolicy; - bool useRadix = !KokkosKernels::Impl::kk_is_gpu_exec_space(); - lno_t numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; - if (numRows == 0) return; - Impl::SortCrsMatrixFunctor funct(useRadix, rowmap, entries, values); - if (useRadix) { - Kokkos::parallel_for("sort_crs_matrix", Kokkos::RangePolicy(exec, 0, numRows), funct); + using Ordinal = typename entries_t::non_const_value_type; + // This early return condition covers having 0 or 1 entries, + // which is also implied by having 0 rows or 0 columns. + // If only 1 entry, the matrix is already sorted. + if (entries.extent(0) <= size_t(1)) { + return; + } + Ordinal numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; + if constexpr (!KokkosKernels::Impl::kk_is_gpu_exec_space()) { + // On CPUs, use a sequential radix sort within each row. + Kokkos::parallel_for("sort_crs_matrix[CPU,radix]", + Kokkos::RangePolicy>(exec, 0, numRows), + Impl::MatrixRadixSortFunctor(rowmap, entries, values)); } else { - // Try to get teamsize to be largest power of 2 not greater than avg entries - // per row - // TODO (probably important for performnce): add thread-level sort also, and - // use that for small avg degree. But this works for now. - lno_t idealTeamSize = 1; - lno_t avgDeg = (entries.extent(0) + numRows - 1) / numRows; - while (idealTeamSize < avgDeg / 2) { - idealTeamSize *= 2; + // On GPUs: + // If the matrix is highly imbalanced, or has long rows AND the dimensions + // are not too large to do one large bulk sort, do that. Otherwise, sort + // using one Kokkos thread per row. + Ordinal avgDeg = (entries.extent(0) + numRows - 1) / numRows; +#ifndef KK_DISABLE_BULK_SORT_BY_KEY + Ordinal maxDeg = KokkosSparse::Impl::graph_max_degree(exec, rowmap); + bool useBulkSort = false; + if (KokkosSparse::Impl::useBulkSortHeuristic(avgDeg, maxDeg)) { + // Calculate the true number of columns if user didn't pass it in + if (numCols == Kokkos::ArithTraits::max()) { + KokkosKernels::Impl::kk_view_reduce_max(exec, entries.extent(0), entries, numCols); + numCols++; + } + uint64_t maxBulkKey = (uint64_t)numRows * (uint64_t)numCols; + useBulkSort = maxBulkKey / numRows == (uint64_t)numCols; + } + if (useBulkSort) { + auto permutation = KokkosSparse::Impl::computeEntryPermutation(exec, rowmap, entries, numCols); + // Permutations cannot be done in-place + Kokkos::View origValues( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "origValues"), values.extent(0)); + Kokkos::View origEntries( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "origEntries"), entries.extent(0)); + Kokkos::deep_copy(exec, origValues, values); + Kokkos::deep_copy(exec, origEntries, entries); + KokkosSparse::Impl::applyPermutation(exec, permutation, origEntries, entries); + KokkosSparse::Impl::applyPermutation(exec, permutation, origValues, values); + } else +#else + (void)numCols; +#endif + { + using TeamPol = Kokkos::TeamPolicy; + // Can't use bulk sort approach as matrix dimensions are too large. + // Fall back to parallel thread-level sort within each row. + Ordinal vectorLength = 1; + while (vectorLength < avgDeg / 2) { + vectorLength *= 2; + } + if (vectorLength > TeamPol ::vector_length_max()) vectorLength = TeamPol ::vector_length_max(); + Impl::MatrixThreadSortFunctor funct(numRows, rowmap, entries, + values); + Ordinal teamSize = TeamPol(exec, 1, 1, vectorLength).team_size_recommended(funct, Kokkos::ParallelForTag()); + Kokkos::parallel_for("sort_crs_matrix[GPU,bitonic]", + TeamPol(exec, (numRows + teamSize - 1) / teamSize, teamSize, vectorLength), funct); } - team_pol temp(exec, numRows, 1); - lno_t maxTeamSize = temp.team_size_max(funct, Kokkos::ParallelForTag()); - lno_t teamSize = std::min(idealTeamSize, maxTeamSize); - Kokkos::parallel_for("sort_crs_matrix", team_pol(exec, numRows, teamSize), funct); } } template -void sort_crs_matrix(const rowmap_t& rowmap, const entries_t& entries, const values_t& values) { - sort_crs_matrix(execution_space(), rowmap, entries, values); +void sort_crs_matrix(const rowmap_t& rowmap, const entries_t& entries, const values_t& values, + typename entries_t::const_value_type numCols = + Kokkos::ArithTraits::max()) { + sort_crs_matrix(execution_space(), rowmap, entries, values, numCols); } template -void sort_crs_matrix(const rowmap_t& rowmap, const entries_t& entries, const values_t& values) { - sort_crs_matrix(typename entries_t::execution_space(), rowmap, entries, values); +void sort_crs_matrix(const rowmap_t& rowmap, const entries_t& entries, const values_t& values, + typename entries_t::const_value_type numCols = + Kokkos::ArithTraits::max()) { + sort_crs_matrix(typename entries_t::execution_space(), rowmap, entries, values, numCols); } template void sort_crs_matrix(const typename crsMat_t::execution_space& exec, const crsMat_t& A) { - sort_crs_matrix(exec, A.graph.row_map, A.graph.entries, A.values); + sort_crs_matrix(exec, A.graph.row_map, A.graph.entries, A.values, A.numCols()); } template void sort_crs_matrix(const crsMat_t& A) { - sort_crs_matrix(typename crsMat_t::execution_space(), A.graph.row_map, A.graph.entries, A.values); + sort_crs_matrix(typename crsMat_t::execution_space(), A.graph.row_map, A.graph.entries, A.values, A.numCols()); } // Sort a BRS matrix: within each row, sort entries ascending by column and // permute the values accordingly. -template -void sort_bsr_matrix(const execution_space& exec, const lno_t blockdim, const rowmap_t& rowmap, - const entries_t& entries, const values_t& values) { - // TODO: this is O(N^2) mock for debugging - do regular implementation based - // on Radix/Bitonic sort (like CSR) IDEA: maybe we need only one general - // Radix2/Bitonic2 and CSR sorting may call it with blockSize=1 ? - lno_t numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; - if (numRows == 0) return; - const lno_t blocksize = blockdim * blockdim; - - assert(values.extent(0) == entries.extent(0) * blocksize); - Impl::sort_bsr_functor bsr_sorter(rowmap, entries, values, blocksize); - Kokkos::parallel_for("sort_bsr_matrix", Kokkos::RangePolicy(exec, 0, numRows), bsr_sorter); -} - -template -void sort_bsr_matrix(const lno_t blockdim, const rowmap_t& rowmap, const entries_t& entries, const values_t& values) { - sort_bsr_matrix(execution_space(), blockdim, rowmap, entries, values); +template +void sort_bsr_matrix(const execution_space& exec, Ordinal blockSize, const rowmap_t& rowmap, const entries_t& entries, + const values_t& values, + typename entries_t::non_const_value_type numCols = + Kokkos::ArithTraits::max()) { + static_assert(std::is_same_v, + "sort_bsr_matrix: Ordinal type must match nonconst value type of " + "entries_t (default template parameter)"); + if (entries.extent(0) <= size_t(1)) { + return; + } + Ordinal numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; + if (numCols == Kokkos::ArithTraits::max()) { + KokkosKernels::Impl::kk_view_reduce_max(exec, entries.extent(0), entries, numCols); + numCols++; + } + uint64_t maxBulkKey = (uint64_t)numRows * (uint64_t)numCols; + if (maxBulkKey / numRows != (uint64_t)numCols) + throw std::invalid_argument( + "sort_bsr_matrix: implementation requires that numRows * numCols is " + "representable in uint64_t"); +#ifdef KK_DISABLE_BULK_SORT_BY_KEY + using TeamPol = Kokkos::TeamPolicy; + using Offset = typename rowmap_t::non_const_value_type; + // Temporary workaround: do not use Kokkos::Experimental::sort_by_key, instead + // sort bulk keys one row at a time + auto keys = Impl::generateBulkCrsKeys(exec, rowmap, entries, numCols); + Kokkos::View permutation(Kokkos::view_alloc(Kokkos::WithoutInitializing, "permutation"), + entries.extent(0)); + KokkosKernels::Impl::sequential_fill(exec, permutation); + Ordinal vectorLength = 1; + Ordinal avgDeg = (entries.extent(0) + numRows - 1) / numRows; + while (vectorLength < avgDeg / 2) { + vectorLength *= 2; + } + if (vectorLength > TeamPol ::vector_length_max()) vectorLength = TeamPol ::vector_length_max(); + Impl::MatrixThreadSortFunctor funct( + numRows, rowmap, keys, permutation); + Ordinal teamSize = TeamPol(exec, 1, 1, vectorLength).team_size_recommended(funct, Kokkos::ParallelForTag()); + Kokkos::parallel_for("sort_bulk_keys_by_row[GPU,bitonic]", + TeamPol(exec, (numRows + teamSize - 1) / teamSize, teamSize, vectorLength), funct); +#else + auto permutation = KokkosSparse::Impl::computeEntryPermutation(exec, rowmap, entries, numCols); +#endif + // Permutations cannot be done in-place + Kokkos::View origValues( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "origValues"), values.extent(0)); + Kokkos::View origEntries( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "origEntries"), entries.extent(0)); + Kokkos::deep_copy(exec, origValues, values); + Kokkos::deep_copy(exec, origEntries, entries); + KokkosSparse::Impl::applyPermutation(exec, permutation, origEntries, entries); + KokkosSparse::Impl::applyPermutationBlockValues(exec, permutation, origValues, values, blockSize); +} + +template +void sort_bsr_matrix(Ordinal blockdim, const rowmap_t& rowmap, const entries_t& entries, const values_t& values, + Ordinal numCols = Kokkos::ArithTraits::max()) { + sort_bsr_matrix(execution_space(), blockdim, rowmap, entries, values, numCols); } // Sort a BSR matrix (like CRS but single values are replaced with contignous @@ -413,7 +215,7 @@ void sort_bsr_matrix(const typename bsrMat_t::execution_space& exec, const bsrMa // directly sort_bsr_matrix( - exec, A.blockDim(), A.graph.row_map, A.graph.entries, A.values); + exec, A.blockDim(), A.graph.row_map, A.graph.entries, A.values, A.numCols()); } template @@ -423,9 +225,10 @@ void sort_bsr_matrix(const bsrMat_t& A) { // Sort a CRS graph: within each row, sort entries ascending by column. template -void sort_crs_graph(const execution_space& exec, const rowmap_t& rowmap, const entries_t& entries) { - using lno_t = typename entries_t::non_const_value_type; - using team_pol = Kokkos::TeamPolicy; +void sort_crs_graph(const execution_space& exec, const rowmap_t& rowmap, const entries_t& entries, + typename entries_t::non_const_value_type numCols = + Kokkos::ArithTraits::max()) { + using Ordinal = typename entries_t::non_const_value_type; static_assert(Kokkos::SpaceAccessibility::accessible, "sort_crs_graph: rowmap_t is not accessible from the given execution " "space"); @@ -433,27 +236,55 @@ void sort_crs_graph(const execution_space& exec, const rowmap_t& rowmap, const e "sort_crs_graph: entries_t is not accessible from the given execution " "space"); static_assert(!std::is_const_v, "sort_crs_graph: entries_t must not be const-valued"); - bool useRadix = !KokkosKernels::Impl::kk_is_gpu_exec_space(); - lno_t numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; - if (numRows == 0) return; - Impl::SortCrsGraphFunctor funct(useRadix, rowmap, entries); - if (useRadix) { - Kokkos::parallel_for("sort_crs_graph", Kokkos::RangePolicy(exec, 0, numRows), funct); + Ordinal numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; + if (entries.extent(0) <= size_t(1)) { + return; + } + if constexpr (!KokkosKernels::Impl::kk_is_gpu_exec_space()) { + // If on CPU, sort each row independently. Don't need to know numCols for + // this. + Kokkos::parallel_for("sort_crs_graph[CPU,radix]", + Kokkos::RangePolicy>(exec, 0, numRows), + Impl::GraphRadixSortFunctor(rowmap, entries)); } else { - // Try to get teamsize to be largest power of 2 less than or equal to - // half the entries per row. 0.5 * #entries is bitonic's parallelism within - // a row. - // TODO (probably important for performnce): add thread-level sort also, and - // use that for small avg degree. But this works for now. - lno_t idealTeamSize = 1; - lno_t avgDeg = (entries.extent(0) + numRows - 1) / numRows; - while (idealTeamSize < avgDeg / 2) { - idealTeamSize *= 2; + // On GPUs: + // If the graph is highly imbalanced AND the dimensions are not too large + // to do one large bulk sort, do that. Otherwise, sort using one Kokkos + // thread per row. + Ordinal avgDeg = (entries.extent(0) + numRows - 1) / numRows; +#ifndef KK_DISABLE_BULK_SORT_BY_KEY + Ordinal maxDeg = KokkosSparse::Impl::graph_max_degree(exec, rowmap); + bool useBulkSort = false; + if (KokkosSparse::Impl::useBulkSortHeuristic(avgDeg, maxDeg)) { + // Calculate the true number of columns if user didn't pass it in + if (numCols == Kokkos::ArithTraits::max()) { + KokkosKernels::Impl::kk_view_reduce_max(exec, entries.extent(0), entries, numCols); + numCols++; + } + uint64_t maxBulkKey = (uint64_t)numRows * (uint64_t)numCols; + useBulkSort = maxBulkKey / numRows == (uint64_t)numCols; + } + if (useBulkSort) { + auto keys = KokkosSparse::Impl::generateBulkCrsKeys(exec, rowmap, entries, numCols); + Kokkos::Experimental::sort_by_key(exec, keys, entries); + } else +#else + (void)numCols; +#endif + { + using TeamPol = Kokkos::TeamPolicy; + // Fall back to thread-level sort within each row + Ordinal vectorLength = 1; + while (vectorLength < avgDeg / 2) { + vectorLength *= 2; + } + if (vectorLength > TeamPol ::vector_length_max()) vectorLength = TeamPol ::vector_length_max(); + + Impl::GraphThreadSortFunctor funct(numRows, rowmap, entries); + Ordinal teamSize = TeamPol(exec, 1, 1, vectorLength).team_size_recommended(funct, Kokkos::ParallelForTag()); + Kokkos::parallel_for("sort_crs_graph[GPU,bitonic]", + TeamPol(exec, (numRows + teamSize - 1) / teamSize, teamSize, vectorLength), funct); } - team_pol temp(exec, numRows, 1); - lno_t maxTeamSize = temp.team_size_max(funct, Kokkos::ParallelForTag()); - lno_t teamSize = std::min(idealTeamSize, maxTeamSize); - Kokkos::parallel_for("sort_crs_graph", team_pol(exec, numRows, teamSize), funct); } } @@ -462,36 +293,38 @@ void sort_crs_graph(const rowmap_t& rowmap, const entries_t& entries) { sort_crs_graph(execution_space(), rowmap, entries); } -// This overload covers 2 cases, while allowing all template args to be deduced: -// - sort_crs_graph(exec, G) -// - sort_crs_graph(rowmap, entries) -template -void sort_crs_graph(const Arg1& a1, const Arg2& a2) { - if constexpr (Kokkos::is_execution_space_v) { - // a1 is an exec instance, a2 is a graph - sort_crs_graph(a1, a2.row_map, a2.entries); - } else if constexpr (Kokkos::is_view_v) { - // a1 is rowmap, a2 is entries - sort_crs_graph(typename Arg2::execution_space(), a1, a2); - } else { - static_assert(Arg1::doesnthavethisthing, - "sort_crs_graph(arg1, arg2): expect either (exec, G) or " - "(rowmap, entries)"); - } +template +typename std::enable_if_t> sort_crs_graph( + const rowmap_t& rowmap, const entries_t& entries, + typename entries_t::const_value_type& numCols = + Kokkos::ArithTraits::max()) { + sort_crs_graph(typename entries_t::execution_space(), rowmap, entries, numCols); +} + +template +typename std::enable_if_t> sort_crs_graph( + const execution_space& exec, const crsGraph_t& G, + typename crsGraph_t::entries_type::const_value_type& numCols = + Kokkos::ArithTraits::max()) { + sort_crs_graph(exec, G.row_map, G.entries, numCols); } template -void sort_crs_graph(const crsGraph_t& G) { - sort_crs_graph(typename crsGraph_t::execution_space(), G); +void sort_crs_graph(const crsGraph_t& G, + typename crsGraph_t::entries_type::const_value_type& numCols = + Kokkos::ArithTraits::max()) { + sort_crs_graph(typename crsGraph_t::execution_space(), G, numCols); } template void sort_and_merge_matrix(const exec_space& exec, const typename rowmap_t::const_type& rowmap_in, const entries_t& entries_in, const values_t& values_in, rowmap_t& rowmap_out, - entries_t& entries_out, values_t& values_out) { + entries_t& entries_out, values_t& values_out, + typename entries_t::const_value_type& numCols = + Kokkos::ArithTraits::max()) { using nc_rowmap_t = typename rowmap_t::non_const_type; - using size_type = typename nc_rowmap_t::value_type; - using ordinal_t = typename entries_t::value_type; + using Offset = typename nc_rowmap_t::value_type; + using Ordinal = typename entries_t::value_type; using range_t = Kokkos::RangePolicy; static_assert(Kokkos::SpaceAccessibility::accessible, "sort_and_merge_matrix: rowmap_t is not accessible from the given " @@ -507,8 +340,8 @@ void sort_and_merge_matrix(const exec_space& exec, const typename rowmap_t::cons static_assert(!std::is_const_v, "sort_and_merge_matrix: value_t must not be const-valued"); - ordinal_t numRows = rowmap_in.extent(0) ? ordinal_t(rowmap_in.extent(0) - 1) : ordinal_t(0); - size_type nnz = entries_in.extent(0); + Ordinal numRows = rowmap_in.extent(0) ? Ordinal(rowmap_in.extent(0) - 1) : Ordinal(0); + Offset nnz = entries_in.extent(0); if (numRows == 0) { rowmap_out = typename rowmap_t::non_const_type("SortedMerged rowmap", rowmap_in.extent(0)); @@ -517,13 +350,13 @@ void sort_and_merge_matrix(const exec_space& exec, const typename rowmap_t::cons return; } - sort_crs_matrix(exec, rowmap_in, entries_in, values_in); + sort_crs_matrix(exec, rowmap_in, entries_in, values_in, numCols); // Count entries per row into a new rowmap, in terms of merges that can be // done nc_rowmap_t nc_rowmap_out(Kokkos::view_alloc(exec, Kokkos::WithoutInitializing, "SortedMerged rowmap"), numRows + 1); - size_type numCompressedEntries = 0; - Kokkos::parallel_reduce(range_t(exec, 0, numRows), + Offset numCompressedEntries = 0; + Kokkos::parallel_reduce("KokkosSparse::Impl::MergedRowmapFunctor", range_t(exec, 0, numRows), Impl::MergedRowmapFunctor(nc_rowmap_out, rowmap_in, entries_in), numCompressedEntries); if (nnz == numCompressedEntries) { @@ -555,7 +388,7 @@ void sort_and_merge_matrix(const exec_space& exec, const typename rowmap_t::cons values_out = values_t(Kokkos::view_alloc(exec, Kokkos::WithoutInitializing, "SortedMerged values"), numCompressedEntries); // Compute merged entries and values - Kokkos::parallel_for(range_t(exec, 0, numRows), + Kokkos::parallel_for("KokkosSparse::Impl::MatrixMergedEntriesFunctor", range_t(exec, 0, numRows), Impl::MatrixMergedEntriesFunctor( rowmap_orig, entries_orig, values_orig, rowmap_out, entries_out, values_out)); } @@ -571,7 +404,8 @@ crsMat_t sort_and_merge_matrix(const typename crsMat_t::execution_space& exec, c entries_t entries_out; values_t values_out; - sort_and_merge_matrix(exec, A.graph.row_map, A.graph.entries, A.values, rowmap_out, entries_out, values_out); + sort_and_merge_matrix(exec, A.graph.row_map, A.graph.entries, A.values, rowmap_out, entries_out, values_out, + A.numCols()); return crsMat_t("SortedMerged", A.numRows(), A.numCols(), values_out.extent(0), values_out, rowmap_out, entries_out); } @@ -584,23 +418,29 @@ crsMat_t sort_and_merge_matrix(const crsMat_t& A) { template void sort_and_merge_matrix(const typename rowmap_t::const_type& rowmap_in, const entries_t& entries_in, const values_t& values_in, rowmap_t& rowmap_out, entries_t& entries_out, - values_t& values_out) { - sort_and_merge_matrix(exec_space(), rowmap_in, entries_in, values_in, rowmap_out, entries_out, values_out); + values_t& values_out, + typename entries_t::const_value_type& numCols = + Kokkos::ArithTraits::max()) { + sort_and_merge_matrix(exec_space(), rowmap_in, entries_in, values_in, rowmap_out, entries_out, values_out, numCols); } template void sort_and_merge_matrix(const typename rowmap_t::const_type& rowmap_in, const entries_t& entries_in, const values_t& values_in, rowmap_t& rowmap_out, entries_t& entries_out, - values_t& values_out) { + values_t& values_out, + typename entries_t::const_value_type& numCols = + Kokkos::ArithTraits::max()) { sort_and_merge_matrix(typename entries_t::execution_space(), rowmap_in, entries_in, values_in, rowmap_out, - entries_out, values_out); + entries_out, values_out, numCols); } template void sort_and_merge_graph(const exec_space& exec, const typename rowmap_t::const_type& rowmap_in, - const entries_t& entries_in, rowmap_t& rowmap_out, entries_t& entries_out) { - using size_type = typename rowmap_t::non_const_value_type; - using lno_t = typename entries_t::value_type; + const entries_t& entries_in, rowmap_t& rowmap_out, entries_t& entries_out, + typename entries_t::const_value_type& numCols = + Kokkos::ArithTraits::max()) { + using Offset = typename rowmap_t::non_const_value_type; + using Ordinal = typename entries_t::value_type; using range_t = Kokkos::RangePolicy; using nc_rowmap_t = typename rowmap_t::non_const_type; static_assert(Kokkos::SpaceAccessibility::accessible, @@ -612,19 +452,19 @@ void sort_and_merge_graph(const exec_space& exec, const typename rowmap_t::const static_assert(!std::is_const_v, "sort_and_merge_graph: entries_t must not be const-valued"); - lno_t numRows = rowmap_in.extent(0) ? rowmap_in.extent(0) - 1 : 0; + Ordinal numRows = rowmap_in.extent(0) ? rowmap_in.extent(0) - 1 : 0; if (numRows == 0) { rowmap_out = typename rowmap_t::non_const_type("SortedMerged rowmap", rowmap_in.extent(0)); entries_out = entries_t(); return; } // Sort in place - sort_crs_graph(exec, rowmap_in, entries_in); + sort_crs_graph(exec, rowmap_in, entries_in, numCols); // Count entries per row into a new rowmap, in terms of merges that can be // done nc_rowmap_t nc_rowmap_out(Kokkos::view_alloc(exec, Kokkos::WithoutInitializing, "SortedMerged rowmap"), numRows + 1); - size_type numCompressedEntries = 0; - Kokkos::parallel_reduce(range_t(exec, 0, numRows), + Offset numCompressedEntries = 0; + Kokkos::parallel_reduce("KokkosSparse::Impl::MergedRowmapFunctor", range_t(exec, 0, numRows), Impl::MergedRowmapFunctor(nc_rowmap_out, rowmap_in, entries_in), numCompressedEntries); if (entries_in.extent(0) == size_t(numCompressedEntries)) { @@ -655,107 +495,50 @@ void sort_and_merge_graph(const exec_space& exec, const typename rowmap_t::const entries_out = entries_t(Kokkos::view_alloc(exec, Kokkos::WithoutInitializing, "SortedMerged entries"), numCompressedEntries); // Compute merged entries and values - Kokkos::parallel_for(range_t(exec, 0, numRows), Impl::GraphMergedEntriesFunctor( - rowmap_orig, entries_orig, rowmap_out, entries_out)); + Kokkos::parallel_for( + "KokkosSparse::Impl::GraphMergedEntriesFunctor", range_t(exec, 0, numRows), + Impl::GraphMergedEntriesFunctor(rowmap_orig, entries_orig, rowmap_out, entries_out)); } template void sort_and_merge_graph(const typename rowmap_t::const_type& rowmap_in, const entries_t& entries_in, - rowmap_t& rowmap_out, entries_t& entries_out) { - return sort_and_merge_graph(exec_space(), rowmap_in, entries_in, rowmap_out, entries_out); + rowmap_t& rowmap_out, entries_t& entries_out, + typename entries_t::const_value_type& numCols = + Kokkos::ArithTraits::max()) { + return sort_and_merge_graph(exec_space(), rowmap_in, entries_in, rowmap_out, entries_out, numCols); } template void sort_and_merge_graph(const typename rowmap_t::const_type& rowmap_in, const entries_t& entries_in, - rowmap_t& rowmap_out, entries_t& entries_out) { - return sort_and_merge_graph(typename entries_t::execution_space(), rowmap_in, entries_in, rowmap_out, entries_out); + rowmap_t& rowmap_out, entries_t& entries_out, + typename entries_t::const_value_type& numCols = + Kokkos::ArithTraits::max()) { + return sort_and_merge_graph(typename entries_t::execution_space(), rowmap_in, entries_in, rowmap_out, entries_out, + numCols); } template -crsGraph_t sort_and_merge_graph(const typename crsGraph_t::execution_space& exec, const crsGraph_t& G) { +crsGraph_t sort_and_merge_graph( + const typename crsGraph_t::execution_space& exec, const crsGraph_t& G, + typename crsGraph_t::entries_type::const_value_type& numCols = + Kokkos::ArithTraits::max()) { using rowmap_t = typename crsGraph_t::row_map_type::non_const_type; using entries_t = typename crsGraph_t::entries_type; static_assert(!std::is_const::value, "sort_and_merge_graph requires StaticCrsGraph entries to be non-const."); rowmap_t mergedRowmap; entries_t mergedEntries; - sort_and_merge_graph(exec, G.row_map, G.entries, mergedRowmap, mergedEntries); + sort_and_merge_graph(exec, G.row_map, G.entries, mergedRowmap, mergedEntries, numCols); return crsGraph_t(mergedEntries, mergedRowmap); } template -crsGraph_t sort_and_merge_graph(const crsGraph_t& G) { - return sort_and_merge_graph(typename crsGraph_t::execution_space(), G); +crsGraph_t sort_and_merge_graph( + const crsGraph_t& G, typename crsGraph_t::entries_type::const_value_type& numCols = + Kokkos::ArithTraits::max()) { + return sort_and_merge_graph(typename crsGraph_t::execution_space(), G, numCols); } } // namespace KokkosSparse -namespace KokkosKernels { - -// ---------------------------------- -// BSR matrix/graph sorting utilities -// ---------------------------------- - -// Sort a BRS matrix: within each row, sort entries ascending by column and -// permute the values accordingly. -template -[[deprecated]] void sort_bsr_matrix(const lno_t blockdim, const rowmap_t& rowmap, const entries_t& entries, - const values_t& values) { - KokkosSparse::sort_bsr_matrix(blockdim, rowmap, entries, values); -} - -template -[[deprecated]] void sort_bsr_matrix(const bsrMat_t& A) { - KokkosSparse::sort_bsr_matrix(A); -} - -// ---------------------------------- -// CRS matrix/graph sorting utilities -// ---------------------------------- - -// The sort_crs* functions sort the adjacent column list for each row into -// ascending order. - -template -[[deprecated]] void sort_crs_matrix(const rowmap_t& rowmap, const entries_t& entries, const values_t& values) { - KokkosSparse::sort_crs_matrix(rowmap, entries, values); -} - -template -[[deprecated]] void sort_crs_matrix(const crsMat_t& A) { - KokkosSparse::sort_crs_matrix(A); -} - -template -[[deprecated]] void sort_crs_graph(const rowmap_t& rowmap, const entries_t& entries) { - KokkosSparse::sort_crs_graph(rowmap, entries); -} - -template -[[deprecated]] void sort_crs_graph(const crsGraph_t& G) { - KokkosSparse::sort_crs_graph(G); -} - -// sort_and_merge_matrix produces a new matrix which is equivalent to A but is -// sorted and has no duplicated entries: each (i, j) is unique. Values for -// duplicated entries are summed. -template -[[deprecated]] crsMat_t sort_and_merge_matrix(const crsMat_t& A) { - KokkosSparse::sort_and_merge_matrix(A); -} - -template -[[deprecated]] crsGraph_t sort_and_merge_graph(const crsGraph_t& G) { - KokkosSparse::sort_and_merge_graph(G); -} - -template -[[deprecated]] void sort_and_merge_graph(const typename rowmap_t::const_type& rowmap_in, const entries_t& entries_in, - rowmap_t& rowmap_out, entries_t& entries_out) { - KokkosSparse::sort_and_merge_graph(rowmap_in, entries_in, rowmap_out, entries_out); -} - -} // namespace KokkosKernels - #endif // _KOKKOSSPARSE_SORTCRS_HPP diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils.hpp index 781857ef551f..d73787481e0e 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils.hpp @@ -848,6 +848,19 @@ ordinal_t graph_max_degree(const rowmap_t &rowmap) { return val; } +template +typename rowmap_t::non_const_value_type graph_max_degree(const execution_space &exec, const rowmap_t &rowmap) { + using Offset = typename rowmap_t::non_const_value_type; + using Reducer = Kokkos::Max; + Offset nrows = rowmap.extent(0); + if (nrows) nrows--; + if (nrows == 0) return 0; + Offset val; + Kokkos::parallel_reduce(Kokkos::RangePolicy(exec, 0, nrows), + MaxDegreeFunctor(rowmap), Reducer(val)); + return val; +} + template void graph_min_max_degree(const rowmap_t &rowmap, ordinal_t &min_degree, ordinal_t &max_degree) { using Reducer = Kokkos::MinMax; diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_spadd_handle.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_spadd_handle.hpp index ea9594ca3e2f..8d28309585a7 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_spadd_handle.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_spadd_handle.hpp @@ -102,10 +102,6 @@ class SPADDHandle { */ size_type get_c_nnz() { return this->result_nnz_size; } - void set_sort_option(int option) { this->sort_option = option; } - - int get_sort_option() { return this->sort_option; } - #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE SpaddCusparseData cusparseData; #endif diff --git a/packages/muelu/cmake/MueLu_config.hpp.in b/packages/muelu/cmake/MueLu_config.hpp.in index c120b66affd4..9f02daefb9b7 100644 --- a/packages/muelu/cmake/MueLu_config.hpp.in +++ b/packages/muelu/cmake/MueLu_config.hpp.in @@ -124,4 +124,6 @@ */ @MUELU_DEPRECATED_DECLARATIONS@ +#cmakedefine MueLu_SHOW_DEPRECATED_WARNINGS + #endif /* MUELU_CONFIG_HPP */ diff --git a/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_decl.hpp b/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_decl.hpp index b1cc80c1181d..8b22c8fc7f90 100644 --- a/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_decl.hpp +++ b/packages/muelu/src/Interface/MueLu_MLParameterListInterpreter_decl.hpp @@ -49,7 +49,7 @@ #endif #ifdef HAVE_MUELU_DEPRECATED_CODE -#ifndef TRILINOS_HIDE_DEPRECATED_HEADER_WARNINGS +#ifdef MueLu_SHOW_DEPRECATED_WARNINGS #warning "The header file MueLu_MLParameterListInterpreter.hpp is deprecated" #endif #else diff --git a/packages/phalanx/test/DagManager/DagManagerTest.cpp b/packages/phalanx/test/DagManager/DagManagerTest.cpp index 34b9dc36caca..ffee37324306 100644 --- a/packages/phalanx/test/DagManager/DagManagerTest.cpp +++ b/packages/phalanx/test/DagManager/DagManagerTest.cpp @@ -50,8 +50,8 @@ void registerDagNodes(PHX::DagManager& em, RCP a = rcp(new Mock); a->setName("Eval_A"); a->evaluates("A"); - a->requires("B"); - a->requires("C"); + a->depends("B"); + a->depends("C"); em.registerEvaluator(a); } @@ -60,7 +60,7 @@ void registerDagNodes(PHX::DagManager& em, b->setName("Eval_B"); b->evaluates("B"); b->evaluates("D"); - b->requires("E"); + b->depends("E"); em.registerEvaluator(b); } @@ -68,7 +68,7 @@ void registerDagNodes(PHX::DagManager& em, RCP c = rcp(new Mock); c->setName("Eval_C"); c->evaluates("C"); - c->requires("E"); + c->depends("E"); em.registerEvaluator(c); } @@ -77,7 +77,7 @@ void registerDagNodes(PHX::DagManager& em, e->setName("Eval_E"); e->evaluates("E"); if (addCircularDependency) - e->requires("D"); + e->depends("D"); em.registerEvaluator(e); } @@ -86,7 +86,7 @@ void registerDagNodes(PHX::DagManager& em, RCP c = rcp(new Mock); c->setName("DUPLICATE Eval_C"); c->evaluates("C"); - c->requires("E"); + c->depends("E"); em.registerEvaluator(c); } } @@ -342,22 +342,22 @@ TEUCHOS_UNIT_TEST(dag, analyze_graph2) RCP m = rcp(new Mock); m->setName("Eval_A"); m->evaluates("A"); - m->requires("B"); - m->requires("C"); + m->depends("B"); + m->depends("C"); dag.registerEvaluator(m); } { RCP m = rcp(new Mock); m->setName("Eval_B"); m->evaluates("B"); - m->requires("D"); + m->depends("D"); dag.registerEvaluator(m); } { RCP m = rcp(new Mock); m->setName("Eval_C"); m->evaluates("C"); - m->requires("D"); + m->depends("D"); dag.registerEvaluator(m); } { @@ -469,8 +469,8 @@ TEUCHOS_UNIT_TEST(dag, contrib_and_eval_B) RCP m = rcp(new Mock); m->setName("Eval_A"); m->evaluates("A"); - m->requires("B"); - m->requires("C"); + m->depends("B"); + m->depends("C"); dag.registerEvaluator(m); } { @@ -483,7 +483,7 @@ TEUCHOS_UNIT_TEST(dag, contrib_and_eval_B) RCP m = rcp(new Mock); m->setName("Eval_C"); m->evaluates("C"); - m->requires("D"); + m->depends("D"); dag.registerEvaluator(m); } { @@ -496,14 +496,14 @@ TEUCHOS_UNIT_TEST(dag, contrib_and_eval_B) RCP m = rcp(new Mock); m->setName("Eval_B+"); m->contributes("B"); - m->requires("D"); + m->depends("D"); dag.registerEvaluator(m); } { // Contributes to B also RCP m = rcp(new Mock); m->setName("Eval_B++"); m->contributes("B"); - m->requires("D"); + m->depends("D"); dag.registerEvaluator(m); } @@ -572,15 +572,15 @@ TEUCHOS_UNIT_TEST(dag, contrib_only_B) RCP m = rcp(new Mock); m->setName("Eval_A"); m->evaluates("A"); - m->requires("B"); - m->requires("C"); + m->depends("B"); + m->depends("C"); dag.registerEvaluator(m); } { RCP m = rcp(new Mock); m->setName("Eval_C"); m->evaluates("C"); - m->requires("D"); + m->depends("D"); dag.registerEvaluator(m); } { @@ -593,14 +593,14 @@ TEUCHOS_UNIT_TEST(dag, contrib_only_B) RCP m = rcp(new Mock); m->setName("Eval_B+"); m->contributes("B"); - m->requires("D"); + m->depends("D"); dag.registerEvaluator(m); } { // Contributes to B also RCP m = rcp(new Mock); m->setName("Eval_B++"); m->contributes("B"); - m->requires("D"); + m->depends("D"); dag.registerEvaluator(m); } @@ -665,7 +665,7 @@ TEUCHOS_UNIT_TEST(dag, alias_field) RCP m = rcp(new Mock); m->setName("Eval_A"); m->evaluates("A"); - m->requires("B"); + m->depends("B"); dag.registerEvaluator(m); } { @@ -746,14 +746,14 @@ TEUCHOS_UNIT_TEST(dag, use_range_and_unshared) RCP e = rcp(new Mock); e->setName("c"); e->evaluates("f3"); - e->requires("f2"); + e->depends("f2"); dag.registerEvaluator(e); } { RCP e = rcp(new Mock); e->setName("e"); e->evaluates("f4"); - e->requires("f3"); + e->depends("f3"); dag.registerEvaluator(e); } { @@ -766,7 +766,7 @@ TEUCHOS_UNIT_TEST(dag, use_range_and_unshared) RCP e = rcp(new Mock); e->setName("b"); e->evaluates("f2"); - e->requires("f1"); + e->depends("f1"); e->unshared("f2"); e->unshared("f1"); dag.registerEvaluator(e); @@ -997,7 +997,7 @@ TEUCHOS_UNIT_TEST(contrib, basic_contrib_only) RCP e = rcp(new Mock); e->setName("Convection Operator"); e->contributes("Residual",use_dynamic_layout); - e->requires("X",use_dynamic_layout); + e->depends("X",use_dynamic_layout); dm.registerEvaluator(e); } @@ -1005,7 +1005,7 @@ TEUCHOS_UNIT_TEST(contrib, basic_contrib_only) RCP e = rcp(new Mock); e->setName("Diffusion Operator"); e->contributes("Residual",use_dynamic_layout); - e->requires("X",use_dynamic_layout); + e->depends("X",use_dynamic_layout); dm.registerEvaluator(e); } @@ -1013,7 +1013,7 @@ TEUCHOS_UNIT_TEST(contrib, basic_contrib_only) RCP e = rcp(new Mock); e->setName("Reaction Operator"); e->contributes("Residual",use_dynamic_layout); - e->requires("X",use_dynamic_layout); + e->depends("X",use_dynamic_layout); dm.registerEvaluator(e); } @@ -1023,7 +1023,7 @@ TEUCHOS_UNIT_TEST(contrib, basic_contrib_only) // Important that this is "contributes" to catch writing graph // output correctly. e->contributes("Scatter",use_dynamic_layout); - e->requires("Residual",use_dynamic_layout); + e->depends("Residual",use_dynamic_layout); dm.registerEvaluator(e); } @@ -1086,7 +1086,7 @@ TEUCHOS_UNIT_TEST(contrib, basic_contrib_and_evalauted) RCP e = rcp(new Mock); e->setName("Initialize"); e->evaluates("Residual",use_dynamic_layout); - e->requires("X",use_dynamic_layout); + e->depends("X",use_dynamic_layout); dm.registerEvaluator(e); } @@ -1094,7 +1094,7 @@ TEUCHOS_UNIT_TEST(contrib, basic_contrib_and_evalauted) RCP e = rcp(new Mock); e->setName("Convection Operator"); e->contributes("Residual",use_dynamic_layout); - e->requires("X",use_dynamic_layout); + e->depends("X",use_dynamic_layout); dm.registerEvaluator(e); } @@ -1102,7 +1102,7 @@ TEUCHOS_UNIT_TEST(contrib, basic_contrib_and_evalauted) RCP e = rcp(new Mock); e->setName("Diffusion Operator"); e->contributes("Residual",use_dynamic_layout); - e->requires("X",use_dynamic_layout); + e->depends("X",use_dynamic_layout); dm.registerEvaluator(e); } @@ -1110,7 +1110,7 @@ TEUCHOS_UNIT_TEST(contrib, basic_contrib_and_evalauted) RCP e = rcp(new Mock); e->setName("Reaction Operator"); e->contributes("Residual",use_dynamic_layout); - e->requires("X",use_dynamic_layout); + e->depends("X",use_dynamic_layout); dm.registerEvaluator(e); } @@ -1120,7 +1120,7 @@ TEUCHOS_UNIT_TEST(contrib, basic_contrib_and_evalauted) // Important that this is "contributes" to catch writing graph // output correctly. e->contributes("Scatter",use_dynamic_layout); - e->requires("Residual",use_dynamic_layout); + e->depends("Residual",use_dynamic_layout); dm.registerEvaluator(e); } diff --git a/packages/phalanx/test/EvaluatorMacros/EvaluatorMacrosTest.cpp b/packages/phalanx/test/EvaluatorMacros/EvaluatorMacrosTest.cpp index 57081eb2fe88..abb938d199e8 100644 --- a/packages/phalanx/test/EvaluatorMacros/EvaluatorMacrosTest.cpp +++ b/packages/phalanx/test/EvaluatorMacros/EvaluatorMacrosTest.cpp @@ -46,8 +46,8 @@ TEUCHOS_UNIT_TEST(evaluator_macros, basic) RCP a = rcp(new Ev1(*plist_a)); a->setName("Eval_A"); a->evaluates("A"); - a->requires("B"); - a->requires("C"); + a->depends("B"); + a->depends("C"); fm.registerEvaluator(a); } { @@ -55,7 +55,7 @@ TEUCHOS_UNIT_TEST(evaluator_macros, basic) RCP b = rcp(new Ev2(*plist_b)); b->setName("Eval_B"); b->evaluates("B"); - b->requires("D"); + b->depends("D"); fm.registerEvaluator(b); } { @@ -63,7 +63,7 @@ TEUCHOS_UNIT_TEST(evaluator_macros, basic) RCP c = rcp(new Ev2(*plist_c)); c->setName("Eval_C"); c->evaluates("C"); - c->requires("D"); + c->depends("D"); fm.registerEvaluator(c); } { diff --git a/packages/phalanx/test/EvaluatorMacros/EvaluatorWithMacros.hpp b/packages/phalanx/test/EvaluatorMacros/EvaluatorWithMacros.hpp index 13badbfcdb06..de1a86677e2b 100644 --- a/packages/phalanx/test/EvaluatorMacros/EvaluatorWithMacros.hpp +++ b/packages/phalanx/test/EvaluatorMacros/EvaluatorWithMacros.hpp @@ -19,7 +19,7 @@ namespace PHX { PHX_EVALUATOR_CLASS(EvaluatorWithMacros1) public: void evaluates(const std::string& field_name); - void requires(const std::string& field_name); + void depends(const std::string& field_name); void bindField(const PHX::FieldTag& ft, const std::any& f); PHX_EVALUATOR_CLASS_END @@ -27,7 +27,7 @@ namespace PHX { PHX_EVALUATOR_CLASS_PP(EvaluatorWithMacros2) public: void evaluates(const std::string& field_name); - void requires(const std::string& field_name); + void depends(const std::string& field_name); void bindField(const PHX::FieldTag& ft, const std::any& f); PHX_EVALUATOR_CLASS_END diff --git a/packages/phalanx/test/EvaluatorMacros/EvaluatorWithMacros_Def.hpp b/packages/phalanx/test/EvaluatorMacros/EvaluatorWithMacros_Def.hpp index 8361a2bc070c..8e35a31a3850 100644 --- a/packages/phalanx/test/EvaluatorMacros/EvaluatorWithMacros_Def.hpp +++ b/packages/phalanx/test/EvaluatorMacros/EvaluatorWithMacros_Def.hpp @@ -45,7 +45,7 @@ namespace PHX { } template - void EvaluatorWithMacros1::requires(const std::string& n) + void EvaluatorWithMacros1::depends(const std::string& n) { using Teuchos::RCP; using Teuchos::rcp; @@ -105,7 +105,7 @@ namespace PHX { } template - void EvaluatorWithMacros2::requires(const std::string& n) + void EvaluatorWithMacros2::depends(const std::string& n) { using Teuchos::RCP; using Teuchos::rcp; diff --git a/packages/phalanx/test/Kokkos/CMakeLists.txt b/packages/phalanx/test/Kokkos/CMakeLists.txt index a13eaf8b99a6..98b582cd8c2a 100644 --- a/packages/phalanx/test/Kokkos/CMakeLists.txt +++ b/packages/phalanx/test/Kokkos/CMakeLists.txt @@ -1,11 +1,14 @@ TRIBITS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) TRIBITS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}/../Utilities) +# RUN_SERIAL is added since UniqueToken can require a large amount of +# memory on GPUs. TRIBITS_ADD_EXECUTABLE_AND_TEST( tKokkos SOURCES tKokkos.cpp TESTONLYLIBS phalanx_unit_test_main phalanx_test_utilities NUM_MPI_PROCS 1 + RUN_SERIAL ) TRIBITS_ADD_EXECUTABLE_AND_TEST( diff --git a/packages/phalanx/test/Kokkos/tKokkos.cpp b/packages/phalanx/test/Kokkos/tKokkos.cpp index cdadac61779d..b42e8355e4b3 100644 --- a/packages/phalanx/test/Kokkos/tKokkos.cpp +++ b/packages/phalanx/test/Kokkos/tKokkos.cpp @@ -1,6 +1,6 @@ // @HEADER // ***************************************************************************** -// Phalanx: A Partial Differential Equation Field Evaluation +// Phalanx: A Partial Differential Equation Field Evaluation // Kernel for Flexible Management of Complex Dependency Chains // // Copyright 2008 NTESS and the Phalanx contributors. @@ -828,6 +828,8 @@ namespace phalanx_test { #if defined(KOKKOS_ENABLE_CUDA) using DefaultFadLayout = Kokkos::LayoutContiguous; +#elif defined(KOKKOS_ENABLE_HIP) + using DefaultFadLayout = Kokkos::LayoutContiguous; #else using DefaultFadLayout = Kokkos::LayoutContiguous; #endif @@ -841,13 +843,13 @@ namespace phalanx_test { static_assert(std::is_same::value,"ERROR: Layout Inconsistency!"); static_assert(std::is_same::value,"ERROR: Layout Inconsistency!"); - std::cout << "\n\nscalar_view_layout = " << PHX::print() << std::endl; - std::cout << "scalar_dev_layout = " << PHX::print() << std::endl; - std::cout << "DefaultDevLayout = " << PHX::print() << "\n" << std::endl; + out << "\n\nscalar_view_layout = " << PHX::print() << std::endl; + out << "scalar_dev_layout = " << PHX::print() << std::endl; + out << "DefaultDevLayout = " << PHX::print() << "\n" << std::endl; - std::cout << "fad_view_layout = " << PHX::print() << std::endl; - std::cout << "fad_dev_layout = " << PHX::print() << std::endl; - std::cout << "DefaultFadLayout = " << PHX::print() << "\n" << std::endl; + out << "fad_view_layout = " << PHX::print() << std::endl; + out << "fad_dev_layout = " << PHX::print() << std::endl; + out << "DefaultFadLayout = " << PHX::print() << "\n" << std::endl; // Tests for assignments from static View to DynRankView Kokkos::View::type,PHX::Device> static_a("static_a",100,8,64); @@ -969,4 +971,102 @@ namespace phalanx_test { TEST_FLOATING_EQUALITY(mean,mean_gold,tol); TEST_FLOATING_EQUALITY(stddev,stddev_gold,tol); } + + struct Inner { + Kokkos::Experimental::UniqueToken token_; + }; + + struct Outer { + Inner inner_; + }; + + TEUCHOS_UNIT_TEST(kokkos, UniqueToken) + { + Kokkos::print_configuration(out); + + using ExecutionSpace = PHX::exec_space; + + Kokkos::Experimental::UniqueToken token; + + out << "\nExecutionSpace.concurrency() = " << ExecutionSpace().concurrency() << std::endl; + out << "UniqueToken.size() = " << token.size() << std::endl; + + TEST_EQUALITY(ExecutionSpace().concurrency(), token.size()); + + const size_t num_elements = token.size()+10; + Outer o; + + Kokkos::View scratch_space("scratch space",token.size()); + Kokkos::parallel_for("unique token",num_elements,KOKKOS_LAMBDA(const int cell){ + Kokkos::Experimental::AcquireUniqueToken lock(o.inner_.token_); + const auto t = lock.value(); + scratch_space(t) = cell; + // printf("cell=%d, t=%u, equal=%u\n",cell,t,unsigned(cell == t)); + }); + } + + TEUCHOS_UNIT_TEST(kokkos, ReduceCheck) + { + constexpr int size = 10; + double gold_sum = 0.0; + Kokkos::View parts("parts",size); + auto parts_host = Kokkos::create_mirror_view(parts); + for (int i=0; i < size; ++i) { + parts_host(i) = double(i); + + if (i%2 == 0) + gold_sum += double(i); + } + Kokkos::deep_copy(parts,parts_host); + + double sum = 0.0; + Kokkos::parallel_reduce("sum",10,KOKKOS_LAMBDA(const int i, double& tmp){ + if (i%2 == 0) + tmp += parts(i); + // printf("tmp(%d)=%f \n",i,tmp); + },sum); + out << "sum = " << sum << std::endl; + const double tol = Teuchos::ScalarTraits::eps()*1000.0; + TEST_FLOATING_EQUALITY(sum,gold_sum,tol); + } + + TEUCHOS_UNIT_TEST(kokkos, ScanCheck) + { + constexpr int size = 10; + Kokkos::View parts("parts",size); + auto parts_host = Kokkos::create_mirror_view(parts); + for (int i=0; i < size; ++i) + parts_host(i)=double(i); + Kokkos::deep_copy(parts,parts_host); + + Kokkos::View inclusive_scan("inclusive",size); + Kokkos::View exclusive_scan("exclusive",size); + double result = 0.0; + Kokkos::parallel_scan("sum",10,KOKKOS_LAMBDA(const int i, double& partial_sum, const bool is_final){ + if (is_final) + exclusive_scan(i) = partial_sum; + + partial_sum += parts(i); + + if (is_final) + inclusive_scan(i) += partial_sum; + + // printf("partial_sum(%d)=%f, is_final=%d \n",i,partial_sum,int(is_final)); + },result); + + auto is_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),inclusive_scan); + auto es_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),exclusive_scan); + + for (int i=0; i < size; ++i) + out << "inclusive_scan(" << i << ") = " << is_host(i) << ", parts(" << i << ") = " << parts_host(i) << std::endl; + for (int i=0; i < size; ++i) + out << "exclusive_scan(" << i << ") = " << es_host(i) << ", parts(" << i << ") = " << parts_host(i) << std::endl; + out << "result (exclusive end) = " << result << std::endl; + + const double tol = Teuchos::ScalarTraits::eps()*100.0; + for (int i=0; i < size; ++i) { + TEST_FLOATING_EQUALITY(is_host(i)-es_host(i), parts_host(i), tol); + } + } + } diff --git a/packages/phalanx/test/Utilities/Evaluator_MockDAG.hpp b/packages/phalanx/test/Utilities/Evaluator_MockDAG.hpp index 378406c1ea0b..3981f832352c 100644 --- a/packages/phalanx/test/Utilities/Evaluator_MockDAG.hpp +++ b/packages/phalanx/test/Utilities/Evaluator_MockDAG.hpp @@ -25,7 +25,7 @@ namespace PHX { PHX::FieldManager& fm); void evaluateFields(typename Traits::EvalData d); void evaluates(const std::string& field_name, const bool use_dynamic_layout=false); - void requires(const std::string& field_name, const bool use_dynamic_layout=false); + void depends(const std::string& field_name, const bool use_dynamic_layout=false); void contributes(const std::string& field_name, const bool use_dynamic_layout=false); void unshared(const std::string& field_name); }; diff --git a/packages/phalanx/test/Utilities/Evaluator_MockDAG_Def.hpp b/packages/phalanx/test/Utilities/Evaluator_MockDAG_Def.hpp index d755b35f50ef..4c6884f8cbdd 100644 --- a/packages/phalanx/test/Utilities/Evaluator_MockDAG_Def.hpp +++ b/packages/phalanx/test/Utilities/Evaluator_MockDAG_Def.hpp @@ -48,8 +48,8 @@ namespace PHX { } template - void MockDAG::requires(const std::string& n, - const bool use_dynamic_layout) + void MockDAG::depends(const std::string& n, + const bool use_dynamic_layout) { using Teuchos::RCP; using Teuchos::rcp; diff --git a/packages/rtop/src/support/RTOpPack_SPMD_apply_op_decl.hpp b/packages/rtop/src/support/RTOpPack_SPMD_apply_op_decl.hpp index 3e23800b14d4..07bd6071ab84 100644 --- a/packages/rtop/src/support/RTOpPack_SPMD_apply_op_decl.hpp +++ b/packages/rtop/src/support/RTOpPack_SPMD_apply_op_decl.hpp @@ -156,7 +156,7 @@ class ReductTargetReductionOp Teuchos::RCP > op_; // Not defined and not to be called! ReductTargetReductionOp(); - ReductTargetReductionOp(const ReductTargetReductionOp&); + ReductTargetReductionOp(const ReductTargetReductionOp&); ReductTargetReductionOp& operator=(const ReductTargetReductionOp&); }; diff --git a/packages/sacado/src/KokkosExp_View_Fad_Contiguous.hpp b/packages/sacado/src/KokkosExp_View_Fad_Contiguous.hpp index 1dcf26543372..c5db121cdb59 100644 --- a/packages/sacado/src/KokkosExp_View_Fad_Contiguous.hpp +++ b/packages/sacado/src/KokkosExp_View_Fad_Contiguous.hpp @@ -148,7 +148,11 @@ namespace Sacado { #include "Sacado_Traits.hpp" #include "Kokkos_Core.hpp" +#if KOKKOS_VERSION >= 40499 +#include "View/Kokkos_ViewMapping.hpp" +#else #include "impl/Kokkos_ViewMapping.hpp" +#endif //---------------------------------------------------------------------------- diff --git a/packages/sacado/src/Kokkos_LayoutContiguous.hpp b/packages/sacado/src/Kokkos_LayoutContiguous.hpp index dedc05c78a86..acb722d2fe99 100644 --- a/packages/sacado/src/Kokkos_LayoutContiguous.hpp +++ b/packages/sacado/src/Kokkos_LayoutContiguous.hpp @@ -73,6 +73,7 @@ struct inner_layout< LayoutContiguous > { } // namespace Kokkos +// FIXME This is evil and needs refactoring urgently. // Make LayoutContiguous equivalent to Layout namespace std { @@ -81,14 +82,31 @@ namespace std { static const bool value = true; }; + template +#if defined(KOKKOS_COMPILER_INTEL) + inline constexpr bool is_same_v< Kokkos::LayoutContiguous, Layout> = is_same, Layout>::value; +#else + static constexpr bool is_same_v< Kokkos::LayoutContiguous, Layout> = is_same, Layout>::value; +#endif + template struct is_same< Layout, Kokkos::LayoutContiguous > { static const bool value = true; }; + template +#if defined(KOKKOS_COMPILER_INTEL) + inline constexpr bool is_same_v< Layout, Kokkos::LayoutContiguous> = is_same, Layout>::value; +#else + static constexpr bool is_same_v< Layout, Kokkos::LayoutContiguous> = is_same, Layout>::value; +#endif } +#if KOKKOS_VERSION >= 40499 +#include "View/Kokkos_ViewMapping.hpp" +#else #include "impl/Kokkos_ViewMapping.hpp" +#endif namespace Kokkos { namespace Impl { diff --git a/packages/sacado/src/Kokkos_LayoutNatural.hpp b/packages/sacado/src/Kokkos_LayoutNatural.hpp index e4e77d023c1c..1a5ae982295f 100644 --- a/packages/sacado/src/Kokkos_LayoutNatural.hpp +++ b/packages/sacado/src/Kokkos_LayoutNatural.hpp @@ -79,7 +79,11 @@ namespace std { } +#if KOKKOS_VERSION >= 40499 +#include "View/Kokkos_ViewMapping.hpp" +#else #include "impl/Kokkos_ViewMapping.hpp" +#endif namespace Kokkos { namespace Impl { diff --git a/packages/sacado/src/new_design/Sacado_Fad_Exp_ViewStorage.hpp b/packages/sacado/src/new_design/Sacado_Fad_Exp_ViewStorage.hpp index 184eea7bae11..3b67f97d8cf3 100644 --- a/packages/sacado/src/new_design/Sacado_Fad_Exp_ViewStorage.hpp +++ b/packages/sacado/src/new_design/Sacado_Fad_Exp_ViewStorage.hpp @@ -79,7 +79,7 @@ namespace Sacado { //! Constructor SACADO_INLINE_FUNCTION ViewStorage(T* v, const int arg_size = 0, const int arg_stride = 0) : - sz_(arg_size), stride_(arg_stride), val_(v+sz_.value*stride_.value), dx_(v) {} + sz_(arg_size), stride_(arg_stride), val_(v+sz_.value*static_cast(stride_.value)), dx_(v) {} //! Constructor SACADO_INLINE_FUNCTION diff --git a/packages/seacas/applications/cpup/cpup.C b/packages/seacas/applications/cpup/cpup.C index c88c46703bda..0024535483e5 100644 --- a/packages/seacas/applications/cpup/cpup.C +++ b/packages/seacas/applications/cpup/cpup.C @@ -35,6 +35,19 @@ unsigned int debug_level = 0; +#if FMT_VERSION >= 90000 +namespace fmt { + template <> struct formatter : ostream_formatter + { + }; +} // namespace fmt +namespace fmt { + template <> struct formatter : ostream_formatter + { + }; +} // namespace fmt +#endif + namespace { std::string tsFormat = "[{:%H:%M:%S}] "; diff --git a/packages/seacas/applications/epu/epu.C b/packages/seacas/applications/epu/epu.C index 30a11cefd1a1..79134fd827b7 100644 --- a/packages/seacas/applications/epu/epu.C +++ b/packages/seacas/applications/epu/epu.C @@ -1453,8 +1453,8 @@ int epu(SystemInterface &interFace, int start_part, int part_count, int cycle, T for (int ig = 0; ig < global_vars.count(InOut::IN); ig++) { if (proc_global_values[ig] != global_values[ig]) { fmt::print(stderr, - "At step {:{}}, Global Variable {:{}}, P{:0{}} = {:15.8g}, P{:0{}} = " - "{:15.8g}\n", + fmt::runtime("At step {:{}}, Global Variable {:{}}, P{:0{}} = {:15.8g}, P{:0{}} = " + "{:15.8g}\n"), time_step + 1, ts_max + 1, ig + 1, get_width(global_vars.count(InOut::IN)), start_part, get_width(interFace.processor_count()), start_part + p, diff --git a/packages/seacas/applications/exodiff/edge_block.C b/packages/seacas/applications/exodiff/edge_block.C index 1debce5fadb0..6696ecdf980a 100644 --- a/packages/seacas/applications/exodiff/edge_block.C +++ b/packages/seacas/applications/exodiff/edge_block.C @@ -50,12 +50,12 @@ template void Edge_Block::entity_load_params() if (num_edges_per_elmt < 0 || num_attr < 0) { Error(fmt::format( - "Edge_Block::entity_load_params(): Data appears corrupt for edge block {}!\n" - "\tnum elmts = {}\n" - "\tnum edges per elmt = {}\n" - "\tnum attributes = {}\n" - " ... Aborting...\n", - fmt::group_digits(numEntity), num_edges_per_elmt, num_attr)); + fmt::runtime("Edge_Block::entity_load_params(): Data appears corrupt for edge block {}!\n" + "\tnum elmts = {}\n" + "\tnum edges per elmt = {}\n" + "\tnum attributes = {}\n" + " ... Aborting...\n"), + fmt::group_digits(numEntity), num_edges_per_elmt, num_attr)); } } diff --git a/packages/seacas/applications/exodiff/exo_block.C b/packages/seacas/applications/exodiff/exo_block.C index ab601e200f7e..4eb93464c746 100644 --- a/packages/seacas/applications/exodiff/exo_block.C +++ b/packages/seacas/applications/exodiff/exo_block.C @@ -53,11 +53,11 @@ template void Exo_Block::entity_load_params() elmt_type = block.topology; if (num_nodes_per_elmt < 0 || num_attr < 0) { - Error(fmt::format("Exo_Block::entity_load_params(): Data appears corrupt for block {}!\n" + Error(fmt::format(fmt::runtime("Exo_Block::entity_load_params(): Data appears corrupt for block {}!\n" "\tnum elmts = {}\n" "\tnum nodes per elmt = {}\n" "\tnum attributes = {}\n" - " ... Aborting...\n", + " ... Aborting...\n"), fmt::group_digits(numEntity), num_nodes_per_elmt, num_attr)); } } diff --git a/packages/seacas/applications/exodiff/face_block.C b/packages/seacas/applications/exodiff/face_block.C index 11b4d8ba6fb8..d8d8b7a21f7e 100644 --- a/packages/seacas/applications/exodiff/face_block.C +++ b/packages/seacas/applications/exodiff/face_block.C @@ -50,11 +50,11 @@ template void Face_Block::entity_load_params() if (num_faces_per_elmt < 0 || num_attr < 0) { Error(fmt::format( - "Face_Block::entity_load_params(): Data appears corrupt for face block {}!\n" - "\tnum elmts = {}\n" - "\tnum faces per elmt = {}\n" - "\tnum attributes = {}\n" - " ... Aborting...\n", + fmt::runtime("Face_Block::entity_load_params(): Data appears corrupt for face block {}!\n" + "\tnum elmts = {}\n" + "\tnum faces per elmt = {}\n" + "\tnum attributes = {}\n" + " ... Aborting...\n"), fmt::group_digits(numEntity), num_faces_per_elmt, num_attr)); } } diff --git a/packages/seacas/applications/nem_spread/pe_input.C b/packages/seacas/applications/nem_spread/pe_input.C index fda7e6b4f4b6..4fc6af2933f4 100644 --- a/packages/seacas/applications/nem_spread/pe_input.C +++ b/packages/seacas/applications/nem_spread/pe_input.C @@ -243,13 +243,13 @@ int read_pexoII_info(NemSpread &spreader, const char *filename) /* "{" defines the beginning of the group designator */ cptr2 = strchr(cptr, '{'); if (cptr2 == nullptr) { - fmt::print(stderr, "fatal: list start designator \"{\" not found"); + fmt::print(stderr, fmt::runtime("fatal: list start designator \"{\" not found")); exit(1); } cptr2++; cptr3 = strchr(cptr, '}'); if (cptr3 == nullptr) { - fmt::print(stderr, "fatal: list end designator \"}\" not found"); + fmt::print(stderr, fmt::runtime("fatal: list end designator \"}\" not found")); exit(1); } *cptr3 = '\0'; diff --git a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C index d6d5fdd097b4..f3c1e051ca34 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C +++ b/packages/seacas/libraries/ioss/src/Ioss_DecompositionUtils.C @@ -501,14 +501,14 @@ namespace Ioss { for (size_t i = 0; i < elem_per_rank.size(); i++) { int star_cnt = (double)(elem_per_rank[i] - min_work) / (max_work - min_work) * delta + min_star; - std::string stars(star_cnt, '*'); - std::string format = "\tProcessor {:{}}, work = {:{}} ({:.2f})\t{}\n"; + std::string stars(star_cnt, '*'); + const std::string format = "\tProcessor {:{}}, work = {:{}} ({:.2f})\t{}\n"; if (elem_per_rank[i] == max_work) { fmt::print( #if !defined __NVCC__ fg(fmt::color::red), #endif - format, i, proc_width, fmt::group_digits(elem_per_rank[i]), work_width, + fmt::runtime(format), i, proc_width, fmt::group_digits(elem_per_rank[i]), work_width, (double)elem_per_rank[i] / avg_work, stars); } else if (elem_per_rank[i] == min_work) { @@ -516,12 +516,12 @@ namespace Ioss { #if !defined __NVCC__ fg(fmt::color::green), #endif - format, i, proc_width, fmt::group_digits(elem_per_rank[i]), work_width, + fmt::runtime(format), i, proc_width, fmt::group_digits(elem_per_rank[i]), work_width, elem_per_rank[i] / avg_work, stars); } else { - fmt::print(format, i, proc_width, fmt::group_digits(elem_per_rank[i]), work_width, - elem_per_rank[i] / avg_work, stars); + fmt::print(fmt::runtime(format), i, proc_width, fmt::group_digits(elem_per_rank[i]), + work_width, elem_per_rank[i] / avg_work, stars); } } diff --git a/packages/seacas/libraries/ioss/src/Ioss_DynamicTopology.C b/packages/seacas/libraries/ioss/src/Ioss_DynamicTopology.C index 639f6ba47a59..16b73da0a4ee 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_DynamicTopology.C +++ b/packages/seacas/libraries/ioss/src/Ioss_DynamicTopology.C @@ -432,7 +432,7 @@ std::string DynamicTopologyFileControl::construct_database_filename(int& step, I error_message += "The database FILENAME has not been defined\n"; } std::ostringstream errmsg; - fmt::print(errmsg, error_message); + fmt::print(errmsg, fmt::runtime(error_message)); IOSS_ERROR(errmsg); } assert(!m_ioDB.empty()); diff --git a/packages/seacas/libraries/ioss/src/exodus/Ioex_DatabaseIO.C b/packages/seacas/libraries/ioss/src/exodus/Ioex_DatabaseIO.C index d368bd2ff93d..d84be0925268 100644 --- a/packages/seacas/libraries/ioss/src/exodus/Ioex_DatabaseIO.C +++ b/packages/seacas/libraries/ioss/src/exodus/Ioex_DatabaseIO.C @@ -684,6 +684,7 @@ namespace Ioex { { Ioss::SerializeIO serializeIO_(this); m_timestepCount = ex_inquire_int(get_file_pointer(), EX_INQ_TIME); + } // Need to sync timestep count across ranks if parallel... if (isParallel) { auto min_timestep_count = util().global_minmax(m_timestepCount, Ioss::ParallelUtils::DO_MIN); @@ -725,6 +726,7 @@ namespace Ioex { Ioss::Utils::check_set_bool_property(properties, "EXODUS_CALL_GET_ALL_TIMES", call_ex_get_all_times); if (call_ex_get_all_times) { + Ioss::SerializeIO serializeIO_(this); int error = ex_get_all_times(get_file_pointer(), Data(tsteps)); if (error < 0) { Ioex::exodus_error(get_file_pointer(), __LINE__, __func__, __FILE__); @@ -733,8 +735,11 @@ namespace Ioex { // See if the "last_written_time" attribute exists and if it // does, check that it matches the largest time in 'tsteps'. - exists = Ioex::read_last_time_attribute(get_file_pointer(), &last_time); - } + { + Ioss::SerializeIO serializeIO_(this); + exists = Ioex::read_last_time_attribute(get_file_pointer(), &last_time); + } + if (exists && isParallel) { // Assume that if it exists on 1 processor, it exists on // all... Sync value among processors since could have a diff --git a/packages/seacas/libraries/ioss/src/main/cgns_decomp.C b/packages/seacas/libraries/ioss/src/main/cgns_decomp.C index 850afe05fb49..2ef43f03c6b6 100644 --- a/packages/seacas/libraries/ioss/src/main/cgns_decomp.C +++ b/packages/seacas/libraries/ioss/src/main/cgns_decomp.C @@ -633,13 +633,13 @@ namespace { int star_cnt = (double)(proc_work[i] - min_work) / (max_work - min_work) * delta + min_star; std::string stars(star_cnt, '*'); - std::string format = "\tProcessor {:{}}, work = {:{}} ({:.2f})\t{}\n"; + const std::string format = "\tProcessor {:{}}, work = {:{}} ({:.2f})\t{}\n"; if (proc_work[i] == max_work) { fmt::print( #if !defined __NVCC__ fg(fmt::color::red), #endif - format, i, proc_width, fmt::group_digits(proc_work[i]), work_width, + fmt::runtime(format), i, proc_width, fmt::group_digits(proc_work[i]), work_width, proc_work[i] / avg_work, stars); } else if (proc_work[i] == min_work) { @@ -647,11 +647,11 @@ namespace { #if !defined __NVCC__ fg(fmt::color::green), #endif - format, i, proc_width, fmt::group_digits(proc_work[i]), work_width, + fmt::runtime(format), i, proc_width, fmt::group_digits(proc_work[i]), work_width, proc_work[i] / avg_work, stars); } else { - fmt::print(format, i, proc_width, fmt::group_digits(proc_work[i]), work_width, + fmt::print(fmt::runtime(format), i, proc_width, fmt::group_digits(proc_work[i]), work_width, proc_work[i] / avg_work, stars); } if (verbose) { diff --git a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/args.h b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/args.h index a3966d140719..31a60e8faf1a 100644 --- a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/args.h +++ b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/args.h @@ -1,4 +1,4 @@ -// Formatting library for C++ - dynamic format arguments +// Formatting library for C++ - dynamic argument lists // // Copyright (c) 2012 - present, Victor Zverovich // All rights reserved. @@ -8,11 +8,13 @@ #ifndef FMT_ARGS_H_ #define FMT_ARGS_H_ -#include // std::reference_wrapper -#include // std::unique_ptr -#include +#ifndef FMT_MODULE +# include // std::reference_wrapper +# include // std::unique_ptr +# include +#endif -#include "core.h" +#include "format.h" // std_string_view FMT_BEGIN_NAMESPACE @@ -22,20 +24,24 @@ template struct is_reference_wrapper : std::false_type {}; template struct is_reference_wrapper> : std::true_type {}; -template const T& unwrap(const T& v) { return v; } -template const T& unwrap(const std::reference_wrapper& v) { +template auto unwrap(const T& v) -> const T& { return v; } +template +auto unwrap(const std::reference_wrapper& v) -> const T& { return static_cast(v); } -class dynamic_arg_list { - // Workaround for clang's -Wweak-vtables. Unlike for regular classes, for - // templates it doesn't complain about inability to deduce single translation - // unit for placing vtable. So storage_node_base is made a fake template. - template struct node { - virtual ~node() = default; - std::unique_ptr> next; - }; +// node is defined outside dynamic_arg_list to workaround a C2504 bug in MSVC +// 2022 (v17.10.0). +// +// Workaround for clang's -Wweak-vtables. Unlike for regular classes, for +// templates it doesn't complain about inability to deduce single translation +// unit for placing vtable. So node is made a fake template. +template struct node { + virtual ~node() = default; + std::unique_ptr> next; +}; +class dynamic_arg_list { template struct typed_node : node<> { T value; @@ -50,7 +56,7 @@ class dynamic_arg_list { std::unique_ptr> head_; public: - template const T& push(const Arg& arg) { + template auto push(const Arg& arg) -> const T& { auto new_node = std::unique_ptr>(new typed_node(arg)); auto& value = new_node->value; new_node->next = std::move(head_); @@ -61,14 +67,10 @@ class dynamic_arg_list { } // namespace detail /** - \rst - A dynamic version of `fmt::format_arg_store`. - It's equipped with a storage to potentially temporary objects which lifetimes - could be shorter than the format arguments object. - - It can be implicitly converted into `~fmt::basic_format_args` for passing - into type-erased formatting functions such as `~fmt::vformat`. - \endrst + * A dynamic list of formatting arguments with storage. + * + * It can be implicitly converted into `fmt::basic_format_args` for passing + * into type-erased formatting functions such as `fmt::vformat`. */ template class dynamic_format_arg_store @@ -110,14 +112,14 @@ class dynamic_format_arg_store friend class basic_format_args; - unsigned long long get_types() const { + auto get_types() const -> unsigned long long { return detail::is_unpacked_bit | data_.size() | (named_info_.empty() ? 0ULL : static_cast(detail::has_named_args_bit)); } - const basic_format_arg* data() const { + auto data() const -> const basic_format_arg* { return named_info_.empty() ? data_.data() : data_.data() + 1; } @@ -146,22 +148,20 @@ class dynamic_format_arg_store constexpr dynamic_format_arg_store() = default; /** - \rst - Adds an argument into the dynamic store for later passing to a formatting - function. - - Note that custom types and string types (but not string views) are copied - into the store dynamically allocating memory if necessary. - - **Example**:: - - fmt::dynamic_format_arg_store store; - store.push_back(42); - store.push_back("abc"); - store.push_back(1.5f); - std::string result = fmt::vformat("{} and {} and {}", store); - \endrst - */ + * Adds an argument into the dynamic store for later passing to a formatting + * function. + * + * Note that custom types and string types (but not string views) are copied + * into the store dynamically allocating memory if necessary. + * + * **Example**: + * + * fmt::dynamic_format_arg_store store; + * store.push_back(42); + * store.push_back("abc"); + * store.push_back(1.5f); + * std::string result = fmt::vformat("{} and {} and {}", store); + */ template void push_back(const T& arg) { if (detail::const_check(need_copy::value)) emplace_arg(dynamic_args_.push>(arg)); @@ -170,20 +170,18 @@ class dynamic_format_arg_store } /** - \rst - Adds a reference to the argument into the dynamic store for later passing to - a formatting function. - - **Example**:: - - fmt::dynamic_format_arg_store store; - char band[] = "Rolling Stones"; - store.push_back(std::cref(band)); - band[9] = 'c'; // Changing str affects the output. - std::string result = fmt::vformat("{}", store); - // result == "Rolling Scones" - \endrst - */ + * Adds a reference to the argument into the dynamic store for later passing + * to a formatting function. + * + * **Example**: + * + * fmt::dynamic_format_arg_store store; + * char band[] = "Rolling Stones"; + * store.push_back(std::cref(band)); + * band[9] = 'c'; // Changing str affects the output. + * std::string result = fmt::vformat("{}", store); + * // result == "Rolling Scones" + */ template void push_back(std::reference_wrapper arg) { static_assert( need_copy::value, @@ -192,10 +190,10 @@ class dynamic_format_arg_store } /** - Adds named argument into the dynamic store for later passing to a formatting - function. ``std::reference_wrapper`` is supported to avoid copying of the - argument. The name is always copied into the store. - */ + * Adds named argument into the dynamic store for later passing to a + * formatting function. `std::reference_wrapper` is supported to avoid + * copying of the argument. The name is always copied into the store. + */ template void push_back(const detail::named_arg& arg) { const char_type* arg_name = @@ -208,19 +206,15 @@ class dynamic_format_arg_store } } - /** Erase all elements from the store */ + /// Erase all elements from the store. void clear() { data_.clear(); named_info_.clear(); dynamic_args_ = detail::dynamic_arg_list(); } - /** - \rst - Reserves space to store at least *new_cap* arguments including - *new_cap_named* named arguments. - \endrst - */ + /// Reserves space to store at least `new_cap` arguments including + /// `new_cap_named` named arguments. void reserve(size_t new_cap, size_t new_cap_named) { FMT_ASSERT(new_cap >= new_cap_named, "Set of arguments includes set of named arguments"); diff --git a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/base.h b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/base.h new file mode 100644 index 000000000000..e1568b040c8a --- /dev/null +++ b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/base.h @@ -0,0 +1,3078 @@ +// Formatting library for C++ - the base API for char/UTF-8 +// +// Copyright (c) 2012 - present, Victor Zverovich +// All rights reserved. +// +// For the license information refer to format.h. + +#ifndef FMT_BASE_H_ +#define FMT_BASE_H_ + +#if defined(FMT_IMPORT_STD) && !defined(FMT_MODULE) +# define FMT_MODULE +#endif + +#ifndef FMT_MODULE +# include // CHAR_BIT +# include // FILE +# include // strlen + +// is also included transitively from . +# include // std::byte +# include // std::enable_if +#endif + +// The fmt library version in the form major * 10000 + minor * 100 + patch. +#define FMT_VERSION 110002 +#define FMT_HEADER_ONLY + +// Detect compiler versions. +#if defined(__clang__) && !defined(__ibmxl__) +# define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__) +#else +# define FMT_CLANG_VERSION 0 +#endif +#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) +# define FMT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +#else +# define FMT_GCC_VERSION 0 +#endif +#if defined(__ICL) +# define FMT_ICC_VERSION __ICL +#elif defined(__INTEL_COMPILER) +# define FMT_ICC_VERSION __INTEL_COMPILER +#else +# define FMT_ICC_VERSION 0 +#endif +#if defined(_MSC_VER) +# define FMT_MSC_VERSION _MSC_VER +#else +# define FMT_MSC_VERSION 0 +#endif + +// Detect standard library versions. +#ifdef _GLIBCXX_RELEASE +# define FMT_GLIBCXX_RELEASE _GLIBCXX_RELEASE +#else +# define FMT_GLIBCXX_RELEASE 0 +#endif +#ifdef _LIBCPP_VERSION +# define FMT_LIBCPP_VERSION _LIBCPP_VERSION +#else +# define FMT_LIBCPP_VERSION 0 +#endif + +#ifdef _MSVC_LANG +# define FMT_CPLUSPLUS _MSVC_LANG +#else +# define FMT_CPLUSPLUS __cplusplus +#endif + +// Detect __has_*. +#ifdef __has_feature +# define FMT_HAS_FEATURE(x) __has_feature(x) +#else +# define FMT_HAS_FEATURE(x) 0 +#endif +#ifdef __has_include +# define FMT_HAS_INCLUDE(x) __has_include(x) +#else +# define FMT_HAS_INCLUDE(x) 0 +#endif +#ifdef __has_cpp_attribute +# define FMT_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x) +#else +# define FMT_HAS_CPP_ATTRIBUTE(x) 0 +#endif + +#define FMT_HAS_CPP14_ATTRIBUTE(attribute) \ + (FMT_CPLUSPLUS >= 201402L && FMT_HAS_CPP_ATTRIBUTE(attribute)) + +#define FMT_HAS_CPP17_ATTRIBUTE(attribute) \ + (FMT_CPLUSPLUS >= 201703L && FMT_HAS_CPP_ATTRIBUTE(attribute)) + +// Detect C++14 relaxed constexpr. +#ifdef FMT_USE_CONSTEXPR +// Use the provided definition. +#elif FMT_GCC_VERSION >= 600 && FMT_CPLUSPLUS >= 201402L +// GCC only allows throw in constexpr since version 6: +// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67371. +# define FMT_USE_CONSTEXPR 1 +#elif FMT_ICC_VERSION +# define FMT_USE_CONSTEXPR 0 // https://github.com/fmtlib/fmt/issues/1628 +#elif FMT_HAS_FEATURE(cxx_relaxed_constexpr) || FMT_MSC_VERSION >= 1912 +# define FMT_USE_CONSTEXPR 1 +#else +# define FMT_USE_CONSTEXPR 0 +#endif +#if FMT_USE_CONSTEXPR +# define FMT_CONSTEXPR constexpr +#else +# define FMT_CONSTEXPR +#endif + +// Detect consteval, C++20 constexpr extensions and std::is_constant_evaluated. +#if !defined(__cpp_lib_is_constant_evaluated) +# define FMT_USE_CONSTEVAL 0 +#elif FMT_CPLUSPLUS < 201709L +# define FMT_USE_CONSTEVAL 0 +#elif FMT_GLIBCXX_RELEASE && FMT_GLIBCXX_RELEASE < 10 +# define FMT_USE_CONSTEVAL 0 +#elif FMT_LIBCPP_VERSION && FMT_LIBCPP_VERSION < 10000 +# define FMT_USE_CONSTEVAL 0 +#elif defined(__apple_build_version__) && __apple_build_version__ < 14000029L +# define FMT_USE_CONSTEVAL 0 // consteval is broken in Apple clang < 14. +#elif FMT_MSC_VERSION && FMT_MSC_VERSION < 1929 +# define FMT_USE_CONSTEVAL 0 // consteval is broken in MSVC VS2019 < 16.10. +#elif defined(__cpp_consteval) +# define FMT_USE_CONSTEVAL 1 +#elif FMT_GCC_VERSION >= 1002 || FMT_CLANG_VERSION >= 1101 +# define FMT_USE_CONSTEVAL 1 +#else +# define FMT_USE_CONSTEVAL 0 +#endif +#if FMT_USE_CONSTEVAL +# define FMT_CONSTEVAL consteval +# define FMT_CONSTEXPR20 constexpr +#else +# define FMT_CONSTEVAL +# define FMT_CONSTEXPR20 +#endif + +#if defined(FMT_USE_NONTYPE_TEMPLATE_ARGS) +// Use the provided definition. +#elif defined(__NVCOMPILER) +# define FMT_USE_NONTYPE_TEMPLATE_ARGS 0 +#elif FMT_GCC_VERSION >= 903 && FMT_CPLUSPLUS >= 201709L +# define FMT_USE_NONTYPE_TEMPLATE_ARGS 1 +#elif defined(__cpp_nontype_template_args) && \ + __cpp_nontype_template_args >= 201911L +# define FMT_USE_NONTYPE_TEMPLATE_ARGS 1 +#elif FMT_CLANG_VERSION >= 1200 && FMT_CPLUSPLUS >= 202002L +# define FMT_USE_NONTYPE_TEMPLATE_ARGS 1 +#else +# define FMT_USE_NONTYPE_TEMPLATE_ARGS 0 +#endif + +#ifdef FMT_USE_CONCEPTS +// Use the provided definition. +#elif defined(__cpp_concepts) +# define FMT_USE_CONCEPTS 1 +#else +# define FMT_USE_CONCEPTS 0 +#endif + +// Check if exceptions are disabled. +#ifdef FMT_EXCEPTIONS +// Use the provided definition. +#elif defined(__GNUC__) && !defined(__EXCEPTIONS) +# define FMT_EXCEPTIONS 0 +#elif FMT_MSC_VERSION && !_HAS_EXCEPTIONS +# define FMT_EXCEPTIONS 0 +#else +# define FMT_EXCEPTIONS 1 +#endif +#if FMT_EXCEPTIONS +# define FMT_TRY try +# define FMT_CATCH(x) catch (x) +#else +# define FMT_TRY if (true) +# define FMT_CATCH(x) if (false) +#endif + +#if FMT_HAS_CPP17_ATTRIBUTE(fallthrough) +# define FMT_FALLTHROUGH [[fallthrough]] +#elif defined(__clang__) +# define FMT_FALLTHROUGH [[clang::fallthrough]] +#elif FMT_GCC_VERSION >= 700 && \ + (!defined(__EDG_VERSION__) || __EDG_VERSION__ >= 520) +# define FMT_FALLTHROUGH [[gnu::fallthrough]] +#else +# define FMT_FALLTHROUGH +#endif + +// Disable [[noreturn]] on MSVC/NVCC because of bogus unreachable code warnings. +#if FMT_HAS_CPP_ATTRIBUTE(noreturn) && !FMT_MSC_VERSION && !defined(__NVCC__) +# define FMT_NORETURN [[noreturn]] +#else +# define FMT_NORETURN +#endif + +#ifndef FMT_NODISCARD +# if FMT_HAS_CPP17_ATTRIBUTE(nodiscard) +# define FMT_NODISCARD [[nodiscard]] +# else +# define FMT_NODISCARD +# endif +#endif + +#ifdef FMT_DEPRECATED +// Use the provided definition. +#elif FMT_HAS_CPP14_ATTRIBUTE(deprecated) +# define FMT_DEPRECATED [[deprecated]] +#else +# define FMT_DEPRECATED /* deprecated */ +#endif + +#ifdef FMT_INLINE +// Use the provided definition. +#elif FMT_GCC_VERSION || FMT_CLANG_VERSION +# define FMT_ALWAYS_INLINE inline __attribute__((always_inline)) +#else +# define FMT_ALWAYS_INLINE inline +#endif +// A version of FMT_INLINE to prevent code bloat in debug mode. +#ifdef NDEBUG +# define FMT_INLINE FMT_ALWAYS_INLINE +#else +# define FMT_INLINE inline +#endif + +#if FMT_GCC_VERSION || FMT_CLANG_VERSION +# define FMT_VISIBILITY(value) __attribute__((visibility(value))) +#else +# define FMT_VISIBILITY(value) +#endif + +#ifndef FMT_GCC_PRAGMA +// Workaround a _Pragma bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59884 +// and an nvhpc warning: https://github.com/fmtlib/fmt/pull/2582. +# if FMT_GCC_VERSION >= 504 && !defined(__NVCOMPILER) +# define FMT_GCC_PRAGMA(arg) _Pragma(arg) +# else +# define FMT_GCC_PRAGMA(arg) +# endif +#endif + +// GCC < 5 requires this-> in decltype. +#if FMT_GCC_VERSION && FMT_GCC_VERSION < 500 +# define FMT_DECLTYPE_THIS this-> +#else +# define FMT_DECLTYPE_THIS +#endif + +#if FMT_MSC_VERSION +# define FMT_MSC_WARNING(...) __pragma(warning(__VA_ARGS__)) +# define FMT_UNCHECKED_ITERATOR(It) \ + using _Unchecked_type = It // Mark iterator as checked. +#else +# define FMT_MSC_WARNING(...) +# define FMT_UNCHECKED_ITERATOR(It) using unchecked_type = It +#endif + +#ifndef FMT_BEGIN_NAMESPACE +# define FMT_BEGIN_NAMESPACE \ + namespace fmt { \ + inline namespace v11 { +# define FMT_END_NAMESPACE \ + } \ + } +#endif + +#ifndef FMT_EXPORT +# define FMT_EXPORT +# define FMT_BEGIN_EXPORT +# define FMT_END_EXPORT +#endif + +#if !defined(FMT_HEADER_ONLY) && defined(_WIN32) +# if defined(FMT_LIB_EXPORT) +# define FMT_API __declspec(dllexport) +# elif defined(FMT_SHARED) +# define FMT_API __declspec(dllimport) +# endif +#elif defined(FMT_LIB_EXPORT) || defined(FMT_SHARED) +# define FMT_API FMT_VISIBILITY("default") +#endif +#ifndef FMT_API +# define FMT_API +#endif + +#ifndef FMT_UNICODE +# define FMT_UNICODE 1 +#endif + +// Check if rtti is available. +#ifndef FMT_USE_RTTI +// __RTTI is for EDG compilers. _CPPRTTI is for MSVC. +# if defined(__GXX_RTTI) || FMT_HAS_FEATURE(cxx_rtti) || defined(_CPPRTTI) || \ + defined(__INTEL_RTTI__) || defined(__RTTI) +# define FMT_USE_RTTI 1 +# else +# define FMT_USE_RTTI 0 +# endif +#endif + +#define FMT_FWD(...) static_cast(__VA_ARGS__) + +// Enable minimal optimizations for more compact code in debug mode. +FMT_GCC_PRAGMA("GCC push_options") +#if !defined(__OPTIMIZE__) && !defined(__CUDACC__) +FMT_GCC_PRAGMA("GCC optimize(\"Og\")") +#endif + +FMT_BEGIN_NAMESPACE + +// Implementations of enable_if_t and other metafunctions for older systems. +template +using enable_if_t = typename std::enable_if::type; +template +using conditional_t = typename std::conditional::type; +template using bool_constant = std::integral_constant; +template +using remove_reference_t = typename std::remove_reference::type; +template +using remove_const_t = typename std::remove_const::type; +template +using remove_cvref_t = typename std::remove_cv>::type; +template struct type_identity { + using type = T; +}; +template using type_identity_t = typename type_identity::type; +template +using make_unsigned_t = typename std::make_unsigned::type; +template +using underlying_t = typename std::underlying_type::type; + +#if FMT_GCC_VERSION && FMT_GCC_VERSION < 500 +// A workaround for gcc 4.8 to make void_t work in a SFINAE context. +template struct void_t_impl { + using type = void; +}; +template using void_t = typename void_t_impl::type; +#else +template using void_t = void; +#endif + +struct monostate { + constexpr monostate() {} +}; + +// An enable_if helper to be used in template parameters which results in much +// shorter symbols: https://godbolt.org/z/sWw4vP. Extra parentheses are needed +// to workaround a bug in MSVC 2019 (see #1140 and #1186). +#ifdef FMT_DOC +# define FMT_ENABLE_IF(...) +#else +# define FMT_ENABLE_IF(...) fmt::enable_if_t<(__VA_ARGS__), int> = 0 +#endif + +// This is defined in base.h instead of format.h to avoid injecting in std. +// It is a template to avoid undesirable implicit conversions to std::byte. +#ifdef __cpp_lib_byte +template ::value)> +inline auto format_as(T b) -> unsigned char { + return static_cast(b); +} +#endif + +namespace detail { +// Suppresses "unused variable" warnings with the method described in +// https://herbsutter.com/2009/10/18/mailbag-shutting-up-compiler-warnings/. +// (void)var does not work on many Intel compilers. +template FMT_CONSTEXPR void ignore_unused(const T&...) {} + +constexpr auto is_constant_evaluated(bool default_value = false) noexcept + -> bool { +// Workaround for incompatibility between libstdc++ consteval-based +// std::is_constant_evaluated() implementation and clang-14: +// https://github.com/fmtlib/fmt/issues/3247. +#if FMT_CPLUSPLUS >= 202002L && FMT_GLIBCXX_RELEASE >= 12 && \ + (FMT_CLANG_VERSION >= 1400 && FMT_CLANG_VERSION < 1500) + ignore_unused(default_value); + return __builtin_is_constant_evaluated(); +#elif defined(__cpp_lib_is_constant_evaluated) + ignore_unused(default_value); + return std::is_constant_evaluated(); +#else + return default_value; +#endif +} + +// Suppresses "conditional expression is constant" warnings. +template constexpr auto const_check(T value) -> T { return value; } + +FMT_NORETURN FMT_API void assert_fail(const char* file, int line, + const char* message); + +#if defined(FMT_ASSERT) +// Use the provided definition. +#elif defined(NDEBUG) +// FMT_ASSERT is not empty to avoid -Wempty-body. +# define FMT_ASSERT(condition, message) \ + fmt::detail::ignore_unused((condition), (message)) +#else +# define FMT_ASSERT(condition, message) \ + ((condition) /* void() fails with -Winvalid-constexpr on clang 4.0.1 */ \ + ? (void)0 \ + : fmt::detail::assert_fail(__FILE__, __LINE__, (message))) +#endif + +#ifdef FMT_USE_INT128 +// Do nothing. +#elif defined(__SIZEOF_INT128__) && !defined(__NVCC__) && \ + !(FMT_CLANG_VERSION && FMT_MSC_VERSION) +# define FMT_USE_INT128 1 +using int128_opt = __int128_t; // An optional native 128-bit integer. +using uint128_opt = __uint128_t; +template inline auto convert_for_visit(T value) -> T { + return value; +} +#else +# define FMT_USE_INT128 0 +#endif +#if !FMT_USE_INT128 +enum class int128_opt {}; +enum class uint128_opt {}; +// Reduce template instantiations. +template auto convert_for_visit(T) -> monostate { return {}; } +#endif + +// Casts a nonnegative integer to unsigned. +template +FMT_CONSTEXPR auto to_unsigned(Int value) -> make_unsigned_t { + FMT_ASSERT(std::is_unsigned::value || value >= 0, "negative value"); + return static_cast>(value); +} + +// A heuristic to detect std::string and std::[experimental::]string_view. +// It is mainly used to avoid dependency on <[experimental/]string_view>. +template +struct is_std_string_like : std::false_type {}; +template +struct is_std_string_like().find_first_of( + typename T::value_type(), 0))>> + : std::is_convertible().data()), + const typename T::value_type*> {}; + +// Returns true iff the literal encoding is UTF-8. +constexpr auto is_utf8_enabled() -> bool { + // Avoid an MSVC sign extension bug: https://github.com/fmtlib/fmt/pull/2297. + using uchar = unsigned char; + return sizeof("\u00A7") == 3 && uchar("\u00A7"[0]) == 0xC2 && + uchar("\u00A7"[1]) == 0xA7; +} +constexpr auto use_utf8() -> bool { + return !FMT_MSC_VERSION || is_utf8_enabled(); +} + +static_assert(!FMT_UNICODE || use_utf8(), + "Unicode support requires compiling with /utf-8"); + +template FMT_CONSTEXPR auto length(const Char* s) -> size_t { + size_t len = 0; + while (*s++) ++len; + return len; +} + +template +FMT_CONSTEXPR auto compare(const Char* s1, const Char* s2, std::size_t n) + -> int { + if (!is_constant_evaluated() && sizeof(Char) == 1) return memcmp(s1, s2, n); + for (; n != 0; ++s1, ++s2, --n) { + if (*s1 < *s2) return -1; + if (*s1 > *s2) return 1; + } + return 0; +} + +namespace adl { +using namespace std; + +template +auto invoke_back_inserter() + -> decltype(back_inserter(std::declval())); +} // namespace adl + +template +struct is_back_insert_iterator : std::false_type {}; + +template +struct is_back_insert_iterator< + It, bool_constant()), + It>::value>> : std::true_type {}; + +// Extracts a reference to the container from *insert_iterator. +template +inline auto get_container(OutputIt it) -> typename OutputIt::container_type& { + struct accessor : OutputIt { + accessor(OutputIt base) : OutputIt(base) {} + using OutputIt::container; + }; + return *accessor(it).container; +} +} // namespace detail + +// Checks whether T is a container with contiguous storage. +template struct is_contiguous : std::false_type {}; + +/** + * An implementation of `std::basic_string_view` for pre-C++17. It provides a + * subset of the API. `fmt::basic_string_view` is used for format strings even + * if `std::basic_string_view` is available to prevent issues when a library is + * compiled with a different `-std` option than the client code (which is not + * recommended). + */ +FMT_EXPORT +template class basic_string_view { + private: + const Char* data_; + size_t size_; + + public: + using value_type = Char; + using iterator = const Char*; + + constexpr basic_string_view() noexcept : data_(nullptr), size_(0) {} + + /// Constructs a string reference object from a C string and a size. + constexpr basic_string_view(const Char* s, size_t count) noexcept + : data_(s), size_(count) {} + + constexpr basic_string_view(std::nullptr_t) = delete; + + /// Constructs a string reference object from a C string. + FMT_CONSTEXPR20 + basic_string_view(const Char* s) + : data_(s), + size_(detail::const_check(std::is_same::value && + !detail::is_constant_evaluated(false)) + ? strlen(reinterpret_cast(s)) + : detail::length(s)) {} + + /// Constructs a string reference from a `std::basic_string` or a + /// `std::basic_string_view` object. + template ::value&& std::is_same< + typename S::value_type, Char>::value)> + FMT_CONSTEXPR basic_string_view(const S& s) noexcept + : data_(s.data()), size_(s.size()) {} + + /// Returns a pointer to the string data. + constexpr auto data() const noexcept -> const Char* { return data_; } + + /// Returns the string size. + constexpr auto size() const noexcept -> size_t { return size_; } + + constexpr auto begin() const noexcept -> iterator { return data_; } + constexpr auto end() const noexcept -> iterator { return data_ + size_; } + + constexpr auto operator[](size_t pos) const noexcept -> const Char& { + return data_[pos]; + } + + FMT_CONSTEXPR void remove_prefix(size_t n) noexcept { + data_ += n; + size_ -= n; + } + + FMT_CONSTEXPR auto starts_with(basic_string_view sv) const noexcept + -> bool { + return size_ >= sv.size_ && detail::compare(data_, sv.data_, sv.size_) == 0; + } + FMT_CONSTEXPR auto starts_with(Char c) const noexcept -> bool { + return size_ >= 1 && *data_ == c; + } + FMT_CONSTEXPR auto starts_with(const Char* s) const -> bool { + return starts_with(basic_string_view(s)); + } + + // Lexicographically compare this string reference to other. + FMT_CONSTEXPR auto compare(basic_string_view other) const -> int { + size_t str_size = size_ < other.size_ ? size_ : other.size_; + int result = detail::compare(data_, other.data_, str_size); + if (result == 0) + result = size_ == other.size_ ? 0 : (size_ < other.size_ ? -1 : 1); + return result; + } + + FMT_CONSTEXPR friend auto operator==(basic_string_view lhs, + basic_string_view rhs) -> bool { + return lhs.compare(rhs) == 0; + } + friend auto operator!=(basic_string_view lhs, basic_string_view rhs) -> bool { + return lhs.compare(rhs) != 0; + } + friend auto operator<(basic_string_view lhs, basic_string_view rhs) -> bool { + return lhs.compare(rhs) < 0; + } + friend auto operator<=(basic_string_view lhs, basic_string_view rhs) -> bool { + return lhs.compare(rhs) <= 0; + } + friend auto operator>(basic_string_view lhs, basic_string_view rhs) -> bool { + return lhs.compare(rhs) > 0; + } + friend auto operator>=(basic_string_view lhs, basic_string_view rhs) -> bool { + return lhs.compare(rhs) >= 0; + } +}; + +FMT_EXPORT +using string_view = basic_string_view; + +/// Specifies if `T` is a character type. Can be specialized by users. +FMT_EXPORT +template struct is_char : std::false_type {}; +template <> struct is_char : std::true_type {}; + +namespace detail { + +// Constructs fmt::basic_string_view from types implicitly convertible +// to it, deducing Char. Explicitly convertible types such as the ones returned +// from FMT_STRING are intentionally excluded. +template ::value)> +constexpr auto to_string_view(const Char* s) -> basic_string_view { + return s; +} +template ::value)> +constexpr auto to_string_view(const T& s) + -> basic_string_view { + return s; +} +template +constexpr auto to_string_view(basic_string_view s) + -> basic_string_view { + return s; +} + +template +struct has_to_string_view : std::false_type {}; +// detail:: is intentional since to_string_view is not an extension point. +template +struct has_to_string_view< + T, void_t()))>> + : std::true_type {}; + +template struct string_literal { + static constexpr Char value[sizeof...(C)] = {C...}; + constexpr operator basic_string_view() const { + return {value, sizeof...(C)}; + } +}; +#if FMT_CPLUSPLUS < 201703L +template +constexpr Char string_literal::value[sizeof...(C)]; +#endif + +enum class type { + none_type, + // Integer types should go first, + int_type, + uint_type, + long_long_type, + ulong_long_type, + int128_type, + uint128_type, + bool_type, + char_type, + last_integer_type = char_type, + // followed by floating-point types. + float_type, + double_type, + long_double_type, + last_numeric_type = long_double_type, + cstring_type, + string_type, + pointer_type, + custom_type +}; + +// Maps core type T to the corresponding type enum constant. +template +struct type_constant : std::integral_constant {}; + +#define FMT_TYPE_CONSTANT(Type, constant) \ + template \ + struct type_constant \ + : std::integral_constant {} + +FMT_TYPE_CONSTANT(int, int_type); +FMT_TYPE_CONSTANT(unsigned, uint_type); +FMT_TYPE_CONSTANT(long long, long_long_type); +FMT_TYPE_CONSTANT(unsigned long long, ulong_long_type); +FMT_TYPE_CONSTANT(int128_opt, int128_type); +FMT_TYPE_CONSTANT(uint128_opt, uint128_type); +FMT_TYPE_CONSTANT(bool, bool_type); +FMT_TYPE_CONSTANT(Char, char_type); +FMT_TYPE_CONSTANT(float, float_type); +FMT_TYPE_CONSTANT(double, double_type); +FMT_TYPE_CONSTANT(long double, long_double_type); +FMT_TYPE_CONSTANT(const Char*, cstring_type); +FMT_TYPE_CONSTANT(basic_string_view, string_type); +FMT_TYPE_CONSTANT(const void*, pointer_type); + +constexpr auto is_integral_type(type t) -> bool { + return t > type::none_type && t <= type::last_integer_type; +} +constexpr auto is_arithmetic_type(type t) -> bool { + return t > type::none_type && t <= type::last_numeric_type; +} + +constexpr auto set(type rhs) -> int { return 1 << static_cast(rhs); } +constexpr auto in(type t, int set) -> bool { + return ((set >> static_cast(t)) & 1) != 0; +} + +// Bitsets of types. +enum { + sint_set = + set(type::int_type) | set(type::long_long_type) | set(type::int128_type), + uint_set = set(type::uint_type) | set(type::ulong_long_type) | + set(type::uint128_type), + bool_set = set(type::bool_type), + char_set = set(type::char_type), + float_set = set(type::float_type) | set(type::double_type) | + set(type::long_double_type), + string_set = set(type::string_type), + cstring_set = set(type::cstring_type), + pointer_set = set(type::pointer_type) +}; +} // namespace detail + +/// Reports a format error at compile time or, via a `format_error` exception, +/// at runtime. +// This function is intentionally not constexpr to give a compile-time error. +FMT_NORETURN FMT_API void report_error(const char* message); + +FMT_DEPRECATED FMT_NORETURN inline void throw_format_error( + const char* message) { + report_error(message); +} + +/// String's character (code unit) type. +template ()))> +using char_t = typename V::value_type; + +/** + * Parsing context consisting of a format string range being parsed and an + * argument counter for automatic indexing. + * You can use the `format_parse_context` type alias for `char` instead. + */ +FMT_EXPORT +template class basic_format_parse_context { + private: + basic_string_view format_str_; + int next_arg_id_; + + FMT_CONSTEXPR void do_check_arg_id(int id); + + public: + using char_type = Char; + using iterator = const Char*; + + explicit constexpr basic_format_parse_context( + basic_string_view format_str, int next_arg_id = 0) + : format_str_(format_str), next_arg_id_(next_arg_id) {} + + /// Returns an iterator to the beginning of the format string range being + /// parsed. + constexpr auto begin() const noexcept -> iterator { + return format_str_.begin(); + } + + /// Returns an iterator past the end of the format string range being parsed. + constexpr auto end() const noexcept -> iterator { return format_str_.end(); } + + /// Advances the begin iterator to `it`. + FMT_CONSTEXPR void advance_to(iterator it) { + format_str_.remove_prefix(detail::to_unsigned(it - begin())); + } + + /// Reports an error if using the manual argument indexing; otherwise returns + /// the next argument index and switches to the automatic indexing. + FMT_CONSTEXPR auto next_arg_id() -> int { + if (next_arg_id_ < 0) { + report_error("cannot switch from manual to automatic argument indexing"); + return 0; + } + int id = next_arg_id_++; + do_check_arg_id(id); + return id; + } + + /// Reports an error if using the automatic argument indexing; otherwise + /// switches to the manual indexing. + FMT_CONSTEXPR void check_arg_id(int id) { + if (next_arg_id_ > 0) { + report_error("cannot switch from automatic to manual argument indexing"); + return; + } + next_arg_id_ = -1; + do_check_arg_id(id); + } + FMT_CONSTEXPR void check_arg_id(basic_string_view) { + next_arg_id_ = -1; + } + FMT_CONSTEXPR void check_dynamic_spec(int arg_id); +}; + +FMT_EXPORT +using format_parse_context = basic_format_parse_context; + +namespace detail { +// A parse context with extra data used only in compile-time checks. +template +class compile_parse_context : public basic_format_parse_context { + private: + int num_args_; + const type* types_; + using base = basic_format_parse_context; + + public: + explicit FMT_CONSTEXPR compile_parse_context( + basic_string_view format_str, int num_args, const type* types, + int next_arg_id = 0) + : base(format_str, next_arg_id), num_args_(num_args), types_(types) {} + + constexpr auto num_args() const -> int { return num_args_; } + constexpr auto arg_type(int id) const -> type { return types_[id]; } + + FMT_CONSTEXPR auto next_arg_id() -> int { + int id = base::next_arg_id(); + if (id >= num_args_) report_error("argument not found"); + return id; + } + + FMT_CONSTEXPR void check_arg_id(int id) { + base::check_arg_id(id); + if (id >= num_args_) report_error("argument not found"); + } + using base::check_arg_id; + + FMT_CONSTEXPR void check_dynamic_spec(int arg_id) { + detail::ignore_unused(arg_id); + if (arg_id < num_args_ && types_ && !is_integral_type(types_[arg_id])) + report_error("width/precision is not integer"); + } +}; + +/// A contiguous memory buffer with an optional growing ability. It is an +/// internal class and shouldn't be used directly, only via `memory_buffer`. +template class buffer { + private: + T* ptr_; + size_t size_; + size_t capacity_; + + using grow_fun = void (*)(buffer& buf, size_t capacity); + grow_fun grow_; + + protected: + // Don't initialize ptr_ since it is not accessed to save a few cycles. + FMT_MSC_WARNING(suppress : 26495) + FMT_CONSTEXPR20 buffer(grow_fun grow, size_t sz) noexcept + : size_(sz), capacity_(sz), grow_(grow) {} + + constexpr buffer(grow_fun grow, T* p = nullptr, size_t sz = 0, + size_t cap = 0) noexcept + : ptr_(p), size_(sz), capacity_(cap), grow_(grow) {} + + FMT_CONSTEXPR20 ~buffer() = default; + buffer(buffer&&) = default; + + /// Sets the buffer data and capacity. + FMT_CONSTEXPR void set(T* buf_data, size_t buf_capacity) noexcept { + ptr_ = buf_data; + capacity_ = buf_capacity; + } + + public: + using value_type = T; + using const_reference = const T&; + + buffer(const buffer&) = delete; + void operator=(const buffer&) = delete; + + auto begin() noexcept -> T* { return ptr_; } + auto end() noexcept -> T* { return ptr_ + size_; } + + auto begin() const noexcept -> const T* { return ptr_; } + auto end() const noexcept -> const T* { return ptr_ + size_; } + + /// Returns the size of this buffer. + constexpr auto size() const noexcept -> size_t { return size_; } + + /// Returns the capacity of this buffer. + constexpr auto capacity() const noexcept -> size_t { return capacity_; } + + /// Returns a pointer to the buffer data (not null-terminated). + FMT_CONSTEXPR auto data() noexcept -> T* { return ptr_; } + FMT_CONSTEXPR auto data() const noexcept -> const T* { return ptr_; } + + /// Clears this buffer. + void clear() { size_ = 0; } + + // Tries resizing the buffer to contain `count` elements. If T is a POD type + // the new elements may not be initialized. + FMT_CONSTEXPR void try_resize(size_t count) { + try_reserve(count); + size_ = count <= capacity_ ? count : capacity_; + } + + // Tries increasing the buffer capacity to `new_capacity`. It can increase the + // capacity by a smaller amount than requested but guarantees there is space + // for at least one additional element either by increasing the capacity or by + // flushing the buffer if it is full. + FMT_CONSTEXPR void try_reserve(size_t new_capacity) { + if (new_capacity > capacity_) grow_(*this, new_capacity); + } + + FMT_CONSTEXPR void push_back(const T& value) { + try_reserve(size_ + 1); + ptr_[size_++] = value; + } + + /// Appends data to the end of the buffer. + template void append(const U* begin, const U* end) { + while (begin != end) { + auto count = to_unsigned(end - begin); + try_reserve(size_ + count); + auto free_cap = capacity_ - size_; + if (free_cap < count) count = free_cap; + // A loop is faster than memcpy on small sizes. + T* out = ptr_ + size_; + for (size_t i = 0; i < count; ++i) out[i] = begin[i]; + size_ += count; + begin += count; + } + } + + template FMT_CONSTEXPR auto operator[](Idx index) -> T& { + return ptr_[index]; + } + template + FMT_CONSTEXPR auto operator[](Idx index) const -> const T& { + return ptr_[index]; + } +}; + +struct buffer_traits { + explicit buffer_traits(size_t) {} + auto count() const -> size_t { return 0; } + auto limit(size_t size) -> size_t { return size; } +}; + +class fixed_buffer_traits { + private: + size_t count_ = 0; + size_t limit_; + + public: + explicit fixed_buffer_traits(size_t limit) : limit_(limit) {} + auto count() const -> size_t { return count_; } + auto limit(size_t size) -> size_t { + size_t n = limit_ > count_ ? limit_ - count_ : 0; + count_ += size; + return size < n ? size : n; + } +}; + +// A buffer that writes to an output iterator when flushed. +template +class iterator_buffer : public Traits, public buffer { + private: + OutputIt out_; + enum { buffer_size = 256 }; + T data_[buffer_size]; + + static FMT_CONSTEXPR void grow(buffer& buf, size_t) { + if (buf.size() == buffer_size) static_cast(buf).flush(); + } + + void flush() { + auto size = this->size(); + this->clear(); + const T* begin = data_; + const T* end = begin + this->limit(size); + while (begin != end) *out_++ = *begin++; + } + + public: + explicit iterator_buffer(OutputIt out, size_t n = buffer_size) + : Traits(n), buffer(grow, data_, 0, buffer_size), out_(out) {} + iterator_buffer(iterator_buffer&& other) noexcept + : Traits(other), + buffer(grow, data_, 0, buffer_size), + out_(other.out_) {} + ~iterator_buffer() { + // Don't crash if flush fails during unwinding. + FMT_TRY { flush(); } + FMT_CATCH(...) {} + } + + auto out() -> OutputIt { + flush(); + return out_; + } + auto count() const -> size_t { return Traits::count() + this->size(); } +}; + +template +class iterator_buffer : public fixed_buffer_traits, + public buffer { + private: + T* out_; + enum { buffer_size = 256 }; + T data_[buffer_size]; + + static FMT_CONSTEXPR void grow(buffer& buf, size_t) { + if (buf.size() == buf.capacity()) + static_cast(buf).flush(); + } + + void flush() { + size_t n = this->limit(this->size()); + if (this->data() == out_) { + out_ += n; + this->set(data_, buffer_size); + } + this->clear(); + } + + public: + explicit iterator_buffer(T* out, size_t n = buffer_size) + : fixed_buffer_traits(n), buffer(grow, out, 0, n), out_(out) {} + iterator_buffer(iterator_buffer&& other) noexcept + : fixed_buffer_traits(other), + buffer(static_cast(other)), + out_(other.out_) { + if (this->data() != out_) { + this->set(data_, buffer_size); + this->clear(); + } + } + ~iterator_buffer() { flush(); } + + auto out() -> T* { + flush(); + return out_; + } + auto count() const -> size_t { + return fixed_buffer_traits::count() + this->size(); + } +}; + +template class iterator_buffer : public buffer { + public: + explicit iterator_buffer(T* out, size_t = 0) + : buffer([](buffer&, size_t) {}, out, 0, ~size_t()) {} + + auto out() -> T* { return &*this->end(); } +}; + +// A buffer that writes to a container with the contiguous storage. +template +class iterator_buffer< + OutputIt, + enable_if_t::value && + is_contiguous::value, + typename OutputIt::container_type::value_type>> + : public buffer { + private: + using container_type = typename OutputIt::container_type; + using value_type = typename container_type::value_type; + container_type& container_; + + static FMT_CONSTEXPR void grow(buffer& buf, size_t capacity) { + auto& self = static_cast(buf); + self.container_.resize(capacity); + self.set(&self.container_[0], capacity); + } + + public: + explicit iterator_buffer(container_type& c) + : buffer(grow, c.size()), container_(c) {} + explicit iterator_buffer(OutputIt out, size_t = 0) + : iterator_buffer(get_container(out)) {} + + auto out() -> OutputIt { return back_inserter(container_); } +}; + +// A buffer that counts the number of code units written discarding the output. +template class counting_buffer : public buffer { + private: + enum { buffer_size = 256 }; + T data_[buffer_size]; + size_t count_ = 0; + + static FMT_CONSTEXPR void grow(buffer& buf, size_t) { + if (buf.size() != buffer_size) return; + static_cast(buf).count_ += buf.size(); + buf.clear(); + } + + public: + counting_buffer() : buffer(grow, data_, 0, buffer_size) {} + + auto count() -> size_t { return count_ + this->size(); } +}; +} // namespace detail + +template +FMT_CONSTEXPR void basic_format_parse_context::do_check_arg_id(int id) { + // Argument id is only checked at compile-time during parsing because + // formatting has its own validation. + if (detail::is_constant_evaluated() && + (!FMT_GCC_VERSION || FMT_GCC_VERSION >= 1200)) { + using context = detail::compile_parse_context; + if (id >= static_cast(this)->num_args()) + report_error("argument not found"); + } +} + +template +FMT_CONSTEXPR void basic_format_parse_context::check_dynamic_spec( + int arg_id) { + if (detail::is_constant_evaluated() && + (!FMT_GCC_VERSION || FMT_GCC_VERSION >= 1200)) { + using context = detail::compile_parse_context; + static_cast(this)->check_dynamic_spec(arg_id); + } +} + +FMT_EXPORT template class basic_format_arg; +FMT_EXPORT template class basic_format_args; +FMT_EXPORT template class dynamic_format_arg_store; + +// A formatter for objects of type T. +FMT_EXPORT +template +struct formatter { + // A deleted default constructor indicates a disabled formatter. + formatter() = delete; +}; + +// Specifies if T has an enabled formatter specialization. A type can be +// formattable even if it doesn't have a formatter e.g. via a conversion. +template +using has_formatter = + std::is_constructible>; + +// An output iterator that appends to a buffer. It is used instead of +// back_insert_iterator to reduce symbol sizes and avoid dependency. +template class basic_appender { + private: + detail::buffer* buffer_; + + friend auto get_container(basic_appender app) -> detail::buffer& { + return *app.buffer_; + } + + public: + using iterator_category = int; + using value_type = T; + using difference_type = ptrdiff_t; + using pointer = T*; + using reference = T&; + using container_type = detail::buffer; + FMT_UNCHECKED_ITERATOR(basic_appender); + + FMT_CONSTEXPR basic_appender(detail::buffer& buf) : buffer_(&buf) {} + + auto operator=(T c) -> basic_appender& { + buffer_->push_back(c); + return *this; + } + auto operator*() -> basic_appender& { return *this; } + auto operator++() -> basic_appender& { return *this; } + auto operator++(int) -> basic_appender { return *this; } +}; + +using appender = basic_appender; + +namespace detail { +template +struct is_back_insert_iterator> : std::true_type {}; + +template +struct locking : std::true_type {}; +template +struct locking>::nonlocking>> + : std::false_type {}; + +template FMT_CONSTEXPR inline auto is_locking() -> bool { + return locking::value; +} +template +FMT_CONSTEXPR inline auto is_locking() -> bool { + return locking::value || is_locking(); +} + +// An optimized version of std::copy with the output value type (T). +template ::value)> +auto copy(InputIt begin, InputIt end, OutputIt out) -> OutputIt { + get_container(out).append(begin, end); + return out; +} + +template ::value)> +FMT_CONSTEXPR auto copy(InputIt begin, InputIt end, OutputIt out) -> OutputIt { + while (begin != end) *out++ = static_cast(*begin++); + return out; +} + +template +FMT_CONSTEXPR auto copy(basic_string_view s, OutputIt out) -> OutputIt { + return copy(s.begin(), s.end(), out); +} + +template +constexpr auto has_const_formatter_impl(T*) + -> decltype(typename Context::template formatter_type().format( + std::declval(), std::declval()), + true) { + return true; +} +template +constexpr auto has_const_formatter_impl(...) -> bool { + return false; +} +template +constexpr auto has_const_formatter() -> bool { + return has_const_formatter_impl(static_cast(nullptr)); +} + +template +struct is_buffer_appender : std::false_type {}; +template +struct is_buffer_appender< + It, bool_constant< + is_back_insert_iterator::value && + std::is_base_of, + typename It::container_type>::value>> + : std::true_type {}; + +// Maps an output iterator to a buffer. +template ::value)> +auto get_buffer(OutputIt out) -> iterator_buffer { + return iterator_buffer(out); +} +template ::value)> +auto get_buffer(OutputIt out) -> buffer& { + return get_container(out); +} + +template +auto get_iterator(Buf& buf, OutputIt) -> decltype(buf.out()) { + return buf.out(); +} +template +auto get_iterator(buffer&, OutputIt out) -> OutputIt { + return out; +} + +struct view {}; + +template struct named_arg : view { + const Char* name; + const T& value; + named_arg(const Char* n, const T& v) : name(n), value(v) {} +}; + +template struct named_arg_info { + const Char* name; + int id; +}; + +template struct is_named_arg : std::false_type {}; +template struct is_statically_named_arg : std::false_type {}; + +template +struct is_named_arg> : std::true_type {}; + +template constexpr auto count() -> size_t { return B ? 1 : 0; } +template constexpr auto count() -> size_t { + return (B1 ? 1 : 0) + count(); +} + +template constexpr auto count_named_args() -> size_t { + return count::value...>(); +} + +template +constexpr auto count_statically_named_args() -> size_t { + return count::value...>(); +} + +struct unformattable {}; +struct unformattable_char : unformattable {}; +struct unformattable_pointer : unformattable {}; + +template struct string_value { + const Char* data; + size_t size; +}; + +template struct named_arg_value { + const named_arg_info* data; + size_t size; +}; + +template struct custom_value { + using parse_context = typename Context::parse_context_type; + void* value; + void (*format)(void* arg, parse_context& parse_ctx, Context& ctx); +}; + +// A formatting argument value. +template class value { + public: + using char_type = typename Context::char_type; + + union { + monostate no_value; + int int_value; + unsigned uint_value; + long long long_long_value; + unsigned long long ulong_long_value; + int128_opt int128_value; + uint128_opt uint128_value; + bool bool_value; + char_type char_value; + float float_value; + double double_value; + long double long_double_value; + const void* pointer; + string_value string; + custom_value custom; + named_arg_value named_args; + }; + + constexpr FMT_ALWAYS_INLINE value() : no_value() {} + constexpr FMT_ALWAYS_INLINE value(int val) : int_value(val) {} + constexpr FMT_ALWAYS_INLINE value(unsigned val) : uint_value(val) {} + constexpr FMT_ALWAYS_INLINE value(long long val) : long_long_value(val) {} + constexpr FMT_ALWAYS_INLINE value(unsigned long long val) + : ulong_long_value(val) {} + FMT_ALWAYS_INLINE value(int128_opt val) : int128_value(val) {} + FMT_ALWAYS_INLINE value(uint128_opt val) : uint128_value(val) {} + constexpr FMT_ALWAYS_INLINE value(float val) : float_value(val) {} + constexpr FMT_ALWAYS_INLINE value(double val) : double_value(val) {} + FMT_ALWAYS_INLINE value(long double val) : long_double_value(val) {} + constexpr FMT_ALWAYS_INLINE value(bool val) : bool_value(val) {} + constexpr FMT_ALWAYS_INLINE value(char_type val) : char_value(val) {} + FMT_CONSTEXPR FMT_ALWAYS_INLINE value(const char_type* val) { + string.data = val; + if (is_constant_evaluated()) string.size = {}; + } + FMT_CONSTEXPR FMT_ALWAYS_INLINE value(basic_string_view val) { + string.data = val.data(); + string.size = val.size(); + } + FMT_ALWAYS_INLINE value(const void* val) : pointer(val) {} + FMT_ALWAYS_INLINE value(const named_arg_info* args, size_t size) + : named_args{args, size} {} + + template FMT_CONSTEXPR20 FMT_ALWAYS_INLINE value(T& val) { + using value_type = remove_const_t; + // T may overload operator& e.g. std::vector::reference in libc++. +#if defined(__cpp_if_constexpr) + if constexpr (std::is_same::value) + custom.value = const_cast(&val); +#endif + if (!is_constant_evaluated()) + custom.value = const_cast(&reinterpret_cast(val)); + // Get the formatter type through the context to allow different contexts + // have different extension points, e.g. `formatter` for `format` and + // `printf_formatter` for `printf`. + custom.format = format_custom_arg< + value_type, typename Context::template formatter_type>; + } + value(unformattable); + value(unformattable_char); + value(unformattable_pointer); + + private: + // Formats an argument of a custom type, such as a user-defined class. + template + static void format_custom_arg(void* arg, + typename Context::parse_context_type& parse_ctx, + Context& ctx) { + auto f = Formatter(); + parse_ctx.advance_to(f.parse(parse_ctx)); + using qualified_type = + conditional_t(), const T, T>; + // format must be const for compatibility with std::format and compilation. + const auto& cf = f; + ctx.advance_to(cf.format(*static_cast(arg), ctx)); + } +}; + +// To minimize the number of types we need to deal with, long is translated +// either to int or to long long depending on its size. +enum { long_short = sizeof(long) == sizeof(int) }; +using long_type = conditional_t; +using ulong_type = conditional_t; + +template struct format_as_result { + template ::value || std::is_class::value)> + static auto map(U*) -> remove_cvref_t()))>; + static auto map(...) -> void; + + using type = decltype(map(static_cast(nullptr))); +}; +template using format_as_t = typename format_as_result::type; + +template +struct has_format_as + : bool_constant, void>::value> {}; + +#define FMT_MAP_API FMT_CONSTEXPR FMT_ALWAYS_INLINE + +// Maps formatting arguments to core types. +// arg_mapper reports errors by returning unformattable instead of using +// static_assert because it's used in the is_formattable trait. +template struct arg_mapper { + using char_type = typename Context::char_type; + + FMT_MAP_API auto map(signed char val) -> int { return val; } + FMT_MAP_API auto map(unsigned char val) -> unsigned { return val; } + FMT_MAP_API auto map(short val) -> int { return val; } + FMT_MAP_API auto map(unsigned short val) -> unsigned { return val; } + FMT_MAP_API auto map(int val) -> int { return val; } + FMT_MAP_API auto map(unsigned val) -> unsigned { return val; } + FMT_MAP_API auto map(long val) -> long_type { return val; } + FMT_MAP_API auto map(unsigned long val) -> ulong_type { return val; } + FMT_MAP_API auto map(long long val) -> long long { return val; } + FMT_MAP_API auto map(unsigned long long val) -> unsigned long long { + return val; + } + FMT_MAP_API auto map(int128_opt val) -> int128_opt { return val; } + FMT_MAP_API auto map(uint128_opt val) -> uint128_opt { return val; } + FMT_MAP_API auto map(bool val) -> bool { return val; } + + template ::value || + std::is_same::value)> + FMT_MAP_API auto map(T val) -> char_type { + return val; + } + template ::value || +#ifdef __cpp_char8_t + std::is_same::value || +#endif + std::is_same::value || + std::is_same::value) && + !std::is_same::value, + int> = 0> + FMT_MAP_API auto map(T) -> unformattable_char { + return {}; + } + + FMT_MAP_API auto map(float val) -> float { return val; } + FMT_MAP_API auto map(double val) -> double { return val; } + FMT_MAP_API auto map(long double val) -> long double { return val; } + + FMT_MAP_API auto map(char_type* val) -> const char_type* { return val; } + FMT_MAP_API auto map(const char_type* val) -> const char_type* { return val; } + template , + FMT_ENABLE_IF(std::is_same::value && + !std::is_pointer::value)> + FMT_MAP_API auto map(const T& val) -> basic_string_view { + return to_string_view(val); + } + template , + FMT_ENABLE_IF(!std::is_same::value && + !std::is_pointer::value)> + FMT_MAP_API auto map(const T&) -> unformattable_char { + return {}; + } + + FMT_MAP_API auto map(void* val) -> const void* { return val; } + FMT_MAP_API auto map(const void* val) -> const void* { return val; } + FMT_MAP_API auto map(volatile void* val) -> const void* { + return const_cast(val); + } + FMT_MAP_API auto map(const volatile void* val) -> const void* { + return const_cast(val); + } + FMT_MAP_API auto map(std::nullptr_t val) -> const void* { return val; } + + // Use SFINAE instead of a const T* parameter to avoid a conflict with the + // array overload. + template < + typename T, + FMT_ENABLE_IF( + std::is_pointer::value || std::is_member_pointer::value || + std::is_function::type>::value || + (std::is_array::value && + !std::is_convertible::value))> + FMT_CONSTEXPR auto map(const T&) -> unformattable_pointer { + return {}; + } + + template ::value)> + FMT_MAP_API auto map(const T (&values)[N]) -> const T (&)[N] { + return values; + } + + // Only map owning types because mapping views can be unsafe. + template , + FMT_ENABLE_IF(std::is_arithmetic::value)> + FMT_MAP_API auto map(const T& val) -> decltype(FMT_DECLTYPE_THIS map(U())) { + return map(format_as(val)); + } + + template > + struct formattable : bool_constant() || + (has_formatter::value && + !std::is_const::value)> {}; + + template ::value)> + FMT_MAP_API auto do_map(T& val) -> T& { + return val; + } + template ::value)> + FMT_MAP_API auto do_map(T&) -> unformattable { + return {}; + } + + // is_fundamental is used to allow formatters for extended FP types. + template , + FMT_ENABLE_IF( + (std::is_class::value || std::is_enum::value || + std::is_union::value || std::is_fundamental::value) && + !has_to_string_view::value && !is_char::value && + !is_named_arg::value && !std::is_integral::value && + !std::is_arithmetic>::value)> + FMT_MAP_API auto map(T& val) -> decltype(FMT_DECLTYPE_THIS do_map(val)) { + return do_map(val); + } + + template ::value)> + FMT_MAP_API auto map(const T& named_arg) + -> decltype(FMT_DECLTYPE_THIS map(named_arg.value)) { + return map(named_arg.value); + } + + auto map(...) -> unformattable { return {}; } +}; + +// A type constant after applying arg_mapper. +template +using mapped_type_constant = + type_constant().map(std::declval())), + typename Context::char_type>; + +enum { packed_arg_bits = 4 }; +// Maximum number of arguments with packed types. +enum { max_packed_args = 62 / packed_arg_bits }; +enum : unsigned long long { is_unpacked_bit = 1ULL << 63 }; +enum : unsigned long long { has_named_args_bit = 1ULL << 62 }; + +template +struct is_output_iterator : std::false_type {}; + +template <> struct is_output_iterator : std::true_type {}; + +template +struct is_output_iterator< + It, T, void_t()++ = std::declval())>> + : std::true_type {}; + +// A type-erased reference to an std::locale to avoid a heavy include. +class locale_ref { + private: + const void* locale_; // A type-erased pointer to std::locale. + + public: + constexpr locale_ref() : locale_(nullptr) {} + template explicit locale_ref(const Locale& loc); + + explicit operator bool() const noexcept { return locale_ != nullptr; } + + template auto get() const -> Locale; +}; + +template constexpr auto encode_types() -> unsigned long long { + return 0; +} + +template +constexpr auto encode_types() -> unsigned long long { + return static_cast(mapped_type_constant::value) | + (encode_types() << packed_arg_bits); +} + +template +constexpr unsigned long long make_descriptor() { + return NUM_ARGS <= max_packed_args ? encode_types() + : is_unpacked_bit | NUM_ARGS; +} + +// This type is intentionally undefined, only used for errors. +template +#if FMT_CLANG_VERSION && FMT_CLANG_VERSION <= 1500 +// https://github.com/fmtlib/fmt/issues/3796 +struct type_is_unformattable_for { +}; +#else +struct type_is_unformattable_for; +#endif + +template +FMT_CONSTEXPR auto make_arg(T& val) -> value { + using arg_type = remove_cvref_t().map(val))>; + + // Use enum instead of constexpr because the latter may generate code. + enum { + formattable_char = !std::is_same::value + }; + static_assert(formattable_char, "Mixing character types is disallowed."); + + // Formatting of arbitrary pointers is disallowed. If you want to format a + // pointer cast it to `void*` or `const void*`. In particular, this forbids + // formatting of `[const] volatile char*` printed as bool by iostreams. + enum { + formattable_pointer = !std::is_same::value + }; + static_assert(formattable_pointer, + "Formatting of non-void pointers is disallowed."); + + enum { formattable = !std::is_same::value }; +#if defined(__cpp_if_constexpr) + if constexpr (!formattable) + type_is_unformattable_for _; +#endif + static_assert( + formattable, + "Cannot format an argument. To make type T formattable provide a " + "formatter specialization: https://fmt.dev/latest/api.html#udt"); + return {arg_mapper().map(val)}; +} + +template +FMT_CONSTEXPR auto make_arg(T& val) -> basic_format_arg { + auto arg = basic_format_arg(); + arg.type_ = mapped_type_constant::value; + arg.value_ = make_arg(val); + return arg; +} + +template +FMT_CONSTEXPR inline auto make_arg(T& val) -> basic_format_arg { + return make_arg(val); +} + +template +using arg_t = conditional_t, + basic_format_arg>; + +template ::value)> +void init_named_arg(named_arg_info*, int& arg_index, int&, const T&) { + ++arg_index; +} +template ::value)> +void init_named_arg(named_arg_info* named_args, int& arg_index, + int& named_arg_index, const T& arg) { + named_args[named_arg_index++] = {arg.name, arg_index++}; +} + +// An array of references to arguments. It can be implicitly converted to +// `fmt::basic_format_args` for passing into type-erased formatting functions +// such as `fmt::vformat`. +template +struct format_arg_store { + // args_[0].named_args points to named_args to avoid bloating format_args. + // +1 to workaround a bug in gcc 7.5 that causes duplicated-branches warning. + static constexpr size_t ARGS_ARR_SIZE = 1 + (NUM_ARGS != 0 ? NUM_ARGS : +1); + + arg_t args[ARGS_ARR_SIZE]; + named_arg_info named_args[NUM_NAMED_ARGS]; + + template + FMT_MAP_API format_arg_store(T&... values) + : args{{named_args, NUM_NAMED_ARGS}, + make_arg(values)...} { + using dummy = int[]; + int arg_index = 0, named_arg_index = 0; + (void)dummy{ + 0, + (init_named_arg(named_args, arg_index, named_arg_index, values), 0)...}; + } + + format_arg_store(format_arg_store&& rhs) { + args[0] = {named_args, NUM_NAMED_ARGS}; + for (size_t i = 1; i < ARGS_ARR_SIZE; ++i) args[i] = rhs.args[i]; + for (size_t i = 0; i < NUM_NAMED_ARGS; ++i) + named_args[i] = rhs.named_args[i]; + } + + format_arg_store(const format_arg_store& rhs) = delete; + format_arg_store& operator=(const format_arg_store& rhs) = delete; + format_arg_store& operator=(format_arg_store&& rhs) = delete; +}; + +// A specialization of format_arg_store without named arguments. +// It is a plain struct to reduce binary size in debug mode. +template +struct format_arg_store { + // +1 to workaround a bug in gcc 7.5 that causes duplicated-branches warning. + arg_t args[NUM_ARGS != 0 ? NUM_ARGS : +1]; +}; + +} // namespace detail +FMT_BEGIN_EXPORT + +// A formatting argument. Context is a template parameter for the compiled API +// where output can be unbuffered. +template class basic_format_arg { + private: + detail::value value_; + detail::type type_; + + template + friend FMT_CONSTEXPR auto detail::make_arg(T& value) + -> basic_format_arg; + + friend class basic_format_args; + friend class dynamic_format_arg_store; + + using char_type = typename Context::char_type; + + template + friend struct detail::format_arg_store; + + basic_format_arg(const detail::named_arg_info* args, size_t size) + : value_(args, size) {} + + public: + class handle { + public: + explicit handle(detail::custom_value custom) : custom_(custom) {} + + void format(typename Context::parse_context_type& parse_ctx, + Context& ctx) const { + custom_.format(custom_.value, parse_ctx, ctx); + } + + private: + detail::custom_value custom_; + }; + + constexpr basic_format_arg() : type_(detail::type::none_type) {} + + constexpr explicit operator bool() const noexcept { + return type_ != detail::type::none_type; + } + + auto type() const -> detail::type { return type_; } + + auto is_integral() const -> bool { return detail::is_integral_type(type_); } + auto is_arithmetic() const -> bool { + return detail::is_arithmetic_type(type_); + } + + /** + * Visits an argument dispatching to the appropriate visit method based on + * the argument type. For example, if the argument type is `double` then + * `vis(value)` will be called with the value of type `double`. + */ + template + FMT_CONSTEXPR FMT_INLINE auto visit(Visitor&& vis) const -> decltype(vis(0)) { + switch (type_) { + case detail::type::none_type: + break; + case detail::type::int_type: + return vis(value_.int_value); + case detail::type::uint_type: + return vis(value_.uint_value); + case detail::type::long_long_type: + return vis(value_.long_long_value); + case detail::type::ulong_long_type: + return vis(value_.ulong_long_value); + case detail::type::int128_type: + return vis(detail::convert_for_visit(value_.int128_value)); + case detail::type::uint128_type: + return vis(detail::convert_for_visit(value_.uint128_value)); + case detail::type::bool_type: + return vis(value_.bool_value); + case detail::type::char_type: + return vis(value_.char_value); + case detail::type::float_type: + return vis(value_.float_value); + case detail::type::double_type: + return vis(value_.double_value); + case detail::type::long_double_type: + return vis(value_.long_double_value); + case detail::type::cstring_type: + return vis(value_.string.data); + case detail::type::string_type: + using sv = basic_string_view; + return vis(sv(value_.string.data, value_.string.size)); + case detail::type::pointer_type: + return vis(value_.pointer); + case detail::type::custom_type: + return vis(typename basic_format_arg::handle(value_.custom)); + } + return vis(monostate()); + } + + auto format_custom(const char_type* parse_begin, + typename Context::parse_context_type& parse_ctx, + Context& ctx) -> bool { + if (type_ != detail::type::custom_type) return false; + parse_ctx.advance_to(parse_begin); + value_.custom.format(value_.custom.value, parse_ctx, ctx); + return true; + } +}; + +template +FMT_DEPRECATED FMT_CONSTEXPR auto visit_format_arg( + Visitor&& vis, const basic_format_arg& arg) -> decltype(vis(0)) { + return arg.visit(static_cast(vis)); +} + +/** + * A view of a collection of formatting arguments. To avoid lifetime issues it + * should only be used as a parameter type in type-erased functions such as + * `vformat`: + * + * void vlog(fmt::string_view fmt, fmt::format_args args); // OK + * fmt::format_args args = fmt::make_format_args(); // Dangling reference + */ +template class basic_format_args { + public: + using size_type = int; + using format_arg = basic_format_arg; + + private: + // A descriptor that contains information about formatting arguments. + // If the number of arguments is less or equal to max_packed_args then + // argument types are passed in the descriptor. This reduces binary code size + // per formatting function call. + unsigned long long desc_; + union { + // If is_packed() returns true then argument values are stored in values_; + // otherwise they are stored in args_. This is done to improve cache + // locality and reduce compiled code size since storing larger objects + // may require more code (at least on x86-64) even if the same amount of + // data is actually copied to stack. It saves ~10% on the bloat test. + const detail::value* values_; + const format_arg* args_; + }; + + constexpr auto is_packed() const -> bool { + return (desc_ & detail::is_unpacked_bit) == 0; + } + constexpr auto has_named_args() const -> bool { + return (desc_ & detail::has_named_args_bit) != 0; + } + + FMT_CONSTEXPR auto type(int index) const -> detail::type { + int shift = index * detail::packed_arg_bits; + unsigned int mask = (1 << detail::packed_arg_bits) - 1; + return static_cast((desc_ >> shift) & mask); + } + + public: + constexpr basic_format_args() : desc_(0), args_(nullptr) {} + + /// Constructs a `basic_format_args` object from `format_arg_store`. + template + constexpr FMT_ALWAYS_INLINE basic_format_args( + const detail::format_arg_store& + store) + : desc_(DESC), values_(store.args + (NUM_NAMED_ARGS != 0 ? 1 : 0)) {} + + template detail::max_packed_args)> + constexpr basic_format_args( + const detail::format_arg_store& + store) + : desc_(DESC), args_(store.args + (NUM_NAMED_ARGS != 0 ? 1 : 0)) {} + + /// Constructs a `basic_format_args` object from `dynamic_format_arg_store`. + constexpr basic_format_args(const dynamic_format_arg_store& store) + : desc_(store.get_types()), args_(store.data()) {} + + /// Constructs a `basic_format_args` object from a dynamic list of arguments. + constexpr basic_format_args(const format_arg* args, int count) + : desc_(detail::is_unpacked_bit | detail::to_unsigned(count)), + args_(args) {} + + /// Returns the argument with the specified id. + FMT_CONSTEXPR auto get(int id) const -> format_arg { + format_arg arg; + if (!is_packed()) { + if (id < max_size()) arg = args_[id]; + return arg; + } + if (static_cast(id) >= detail::max_packed_args) return arg; + arg.type_ = type(id); + if (arg.type_ == detail::type::none_type) return arg; + arg.value_ = values_[id]; + return arg; + } + + template + auto get(basic_string_view name) const -> format_arg { + int id = get_id(name); + return id >= 0 ? get(id) : format_arg(); + } + + template + FMT_CONSTEXPR auto get_id(basic_string_view name) const -> int { + if (!has_named_args()) return -1; + const auto& named_args = + (is_packed() ? values_[-1] : args_[-1].value_).named_args; + for (size_t i = 0; i < named_args.size; ++i) { + if (named_args.data[i].name == name) return named_args.data[i].id; + } + return -1; + } + + auto max_size() const -> int { + unsigned long long max_packed = detail::max_packed_args; + return static_cast(is_packed() ? max_packed + : desc_ & ~detail::is_unpacked_bit); + } +}; + +// A formatting context. +class context { + private: + appender out_; + basic_format_args args_; + detail::locale_ref loc_; + + public: + /// The character type for the output. + using char_type = char; + + using iterator = appender; + using format_arg = basic_format_arg; + using parse_context_type = basic_format_parse_context; + template using formatter_type = formatter; + + /// Constructs a `basic_format_context` object. References to the arguments + /// are stored in the object so make sure they have appropriate lifetimes. + FMT_CONSTEXPR context(iterator out, basic_format_args ctx_args, + detail::locale_ref loc = {}) + : out_(out), args_(ctx_args), loc_(loc) {} + context(context&&) = default; + context(const context&) = delete; + void operator=(const context&) = delete; + + FMT_CONSTEXPR auto arg(int id) const -> format_arg { return args_.get(id); } + auto arg(string_view name) -> format_arg { return args_.get(name); } + FMT_CONSTEXPR auto arg_id(string_view name) -> int { + return args_.get_id(name); + } + auto args() const -> const basic_format_args& { return args_; } + + // Returns an iterator to the beginning of the output range. + FMT_CONSTEXPR auto out() -> iterator { return out_; } + + // Advances the begin iterator to `it`. + void advance_to(iterator) {} + + FMT_CONSTEXPR auto locale() -> detail::locale_ref { return loc_; } +}; + +template class generic_context; + +// Longer aliases for C++20 compatibility. +template +using basic_format_context = + conditional_t::value, context, + generic_context>; +using format_context = context; + +template +using buffered_context = basic_format_context, Char>; + +template +using is_formattable = bool_constant>() + .map(std::declval()))>::value>; + +#if FMT_USE_CONCEPTS +template +concept formattable = is_formattable, Char>::value; +#endif + +/** + * Constructs an object that stores references to arguments and can be + * implicitly converted to `format_args`. `Context` can be omitted in which case + * it defaults to `format_context`. See `arg` for lifetime considerations. + */ +// Take arguments by lvalue references to avoid some lifetime issues, e.g. +// auto args = make_format_args(std::string()); +template (), + unsigned long long DESC = detail::make_descriptor(), + FMT_ENABLE_IF(NUM_NAMED_ARGS == 0)> +constexpr FMT_ALWAYS_INLINE auto make_format_args(T&... args) + -> detail::format_arg_store { + return {{detail::make_arg( + args)...}}; +} + +#ifndef FMT_DOC +template (), + unsigned long long DESC = + detail::make_descriptor() | + static_cast(detail::has_named_args_bit), + FMT_ENABLE_IF(NUM_NAMED_ARGS != 0)> +constexpr auto make_format_args(T&... args) + -> detail::format_arg_store { + return {args...}; +} +#endif + +/** + * Returns a named argument to be used in a formatting function. + * It should only be used in a call to a formatting function or + * `dynamic_format_arg_store::push_back`. + * + * **Example**: + * + * fmt::print("The answer is {answer}.", fmt::arg("answer", 42)); + */ +template +inline auto arg(const Char* name, const T& arg) -> detail::named_arg { + static_assert(!detail::is_named_arg(), "nested named arguments"); + return {name, arg}; +} +FMT_END_EXPORT + +/// An alias for `basic_format_args`. +// A separate type would result in shorter symbols but break ABI compatibility +// between clang and gcc on ARM (#1919). +FMT_EXPORT using format_args = basic_format_args; + +// We cannot use enum classes as bit fields because of a gcc bug, so we put them +// in namespaces instead (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61414). +// Additionally, if an underlying type is specified, older gcc incorrectly warns +// that the type is too small. Both bugs are fixed in gcc 9.3. +#if FMT_GCC_VERSION && FMT_GCC_VERSION < 903 +# define FMT_ENUM_UNDERLYING_TYPE(type) +#else +# define FMT_ENUM_UNDERLYING_TYPE(type) : type +#endif +namespace align { +enum type FMT_ENUM_UNDERLYING_TYPE(unsigned char){none, left, right, center, + numeric}; +} +using align_t = align::type; +namespace sign { +enum type FMT_ENUM_UNDERLYING_TYPE(unsigned char){none, minus, plus, space}; +} +using sign_t = sign::type; + +namespace detail { + +template +using unsigned_char = typename conditional_t::value, + std::make_unsigned, + type_identity>::type; + +// Character (code unit) type is erased to prevent template bloat. +struct fill_t { + private: + enum { max_size = 4 }; + char data_[max_size] = {' '}; + unsigned char size_ = 1; + + public: + template + FMT_CONSTEXPR void operator=(basic_string_view s) { + auto size = s.size(); + size_ = static_cast(size); + if (size == 1) { + unsigned uchar = static_cast>(s[0]); + data_[0] = static_cast(uchar); + data_[1] = static_cast(uchar >> 8); + return; + } + FMT_ASSERT(size <= max_size, "invalid fill"); + for (size_t i = 0; i < size; ++i) data_[i] = static_cast(s[i]); + } + + FMT_CONSTEXPR void operator=(char c) { + data_[0] = c; + size_ = 1; + } + + constexpr auto size() const -> size_t { return size_; } + + template constexpr auto get() const -> Char { + using uchar = unsigned char; + return static_cast(static_cast(data_[0]) | + (static_cast(data_[1]) << 8)); + } + + template ::value)> + constexpr auto data() const -> const Char* { + return data_; + } + template ::value)> + constexpr auto data() const -> const Char* { + return nullptr; + } +}; +} // namespace detail + +enum class presentation_type : unsigned char { + // Common specifiers: + none = 0, + debug = 1, // '?' + string = 2, // 's' (string, bool) + + // Integral, bool and character specifiers: + dec = 3, // 'd' + hex, // 'x' or 'X' + oct, // 'o' + bin, // 'b' or 'B' + chr, // 'c' + + // String and pointer specifiers: + pointer = 3, // 'p' + + // Floating-point specifiers: + exp = 1, // 'e' or 'E' (1 since there is no FP debug presentation) + fixed, // 'f' or 'F' + general, // 'g' or 'G' + hexfloat // 'a' or 'A' +}; + +// Format specifiers for built-in and string types. +struct format_specs { + int width; + int precision; + presentation_type type; + align_t align : 4; + sign_t sign : 3; + bool upper : 1; // An uppercase version e.g. 'X' for 'x'. + bool alt : 1; // Alternate form ('#'). + bool localized : 1; + detail::fill_t fill; + + constexpr format_specs() + : width(0), + precision(-1), + type(presentation_type::none), + align(align::none), + sign(sign::none), + upper(false), + alt(false), + localized(false) {} +}; + +namespace detail { + +enum class arg_id_kind { none, index, name }; + +// An argument reference. +template struct arg_ref { + FMT_CONSTEXPR arg_ref() : kind(arg_id_kind::none), val() {} + + FMT_CONSTEXPR explicit arg_ref(int index) + : kind(arg_id_kind::index), val(index) {} + FMT_CONSTEXPR explicit arg_ref(basic_string_view name) + : kind(arg_id_kind::name), val(name) {} + + FMT_CONSTEXPR auto operator=(int idx) -> arg_ref& { + kind = arg_id_kind::index; + val.index = idx; + return *this; + } + + arg_id_kind kind; + union value { + FMT_CONSTEXPR value(int idx = 0) : index(idx) {} + FMT_CONSTEXPR value(basic_string_view n) : name(n) {} + + int index; + basic_string_view name; + } val; +}; + +// Format specifiers with width and precision resolved at formatting rather +// than parsing time to allow reusing the same parsed specifiers with +// different sets of arguments (precompilation of format strings). +template struct dynamic_format_specs : format_specs { + arg_ref width_ref; + arg_ref precision_ref; +}; + +// Converts a character to ASCII. Returns '\0' on conversion failure. +template ::value)> +constexpr auto to_ascii(Char c) -> char { + return c <= 0xff ? static_cast(c) : '\0'; +} + +// Returns the number of code units in a code point or 1 on error. +template +FMT_CONSTEXPR auto code_point_length(const Char* begin) -> int { + if (const_check(sizeof(Char) != 1)) return 1; + auto c = static_cast(*begin); + return static_cast((0x3a55000000000000ull >> (2 * (c >> 3))) & 0x3) + 1; +} + +// Return the result via the out param to workaround gcc bug 77539. +template +FMT_CONSTEXPR auto find(Ptr first, Ptr last, T value, Ptr& out) -> bool { + for (out = first; out != last; ++out) { + if (*out == value) return true; + } + return false; +} + +template <> +inline auto find(const char* first, const char* last, char value, + const char*& out) -> bool { + out = + static_cast(memchr(first, value, to_unsigned(last - first))); + return out != nullptr; +} + +// Parses the range [begin, end) as an unsigned integer. This function assumes +// that the range is non-empty and the first character is a digit. +template +FMT_CONSTEXPR auto parse_nonnegative_int(const Char*& begin, const Char* end, + int error_value) noexcept -> int { + FMT_ASSERT(begin != end && '0' <= *begin && *begin <= '9', ""); + unsigned value = 0, prev = 0; + auto p = begin; + do { + prev = value; + value = value * 10 + unsigned(*p - '0'); + ++p; + } while (p != end && '0' <= *p && *p <= '9'); + auto num_digits = p - begin; + begin = p; + int digits10 = static_cast(sizeof(int) * CHAR_BIT * 3 / 10); + if (num_digits <= digits10) return static_cast(value); + // Check for overflow. + unsigned max = INT_MAX; + return num_digits == digits10 + 1 && + prev * 10ull + unsigned(p[-1] - '0') <= max + ? static_cast(value) + : error_value; +} + +FMT_CONSTEXPR inline auto parse_align(char c) -> align_t { + switch (c) { + case '<': + return align::left; + case '>': + return align::right; + case '^': + return align::center; + } + return align::none; +} + +template constexpr auto is_name_start(Char c) -> bool { + return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '_'; +} + +template +FMT_CONSTEXPR auto do_parse_arg_id(const Char* begin, const Char* end, + Handler&& handler) -> const Char* { + Char c = *begin; + if (c >= '0' && c <= '9') { + int index = 0; + if (c != '0') + index = parse_nonnegative_int(begin, end, INT_MAX); + else + ++begin; + if (begin == end || (*begin != '}' && *begin != ':')) + report_error("invalid format string"); + else + handler.on_index(index); + return begin; + } + if (!is_name_start(c)) { + report_error("invalid format string"); + return begin; + } + auto it = begin; + do { + ++it; + } while (it != end && (is_name_start(*it) || ('0' <= *it && *it <= '9'))); + handler.on_name({begin, to_unsigned(it - begin)}); + return it; +} + +template +FMT_CONSTEXPR auto parse_arg_id(const Char* begin, const Char* end, + Handler&& handler) -> const Char* { + FMT_ASSERT(begin != end, ""); + Char c = *begin; + if (c != '}' && c != ':') return do_parse_arg_id(begin, end, handler); + handler.on_auto(); + return begin; +} + +template struct dynamic_spec_id_handler { + basic_format_parse_context& ctx; + arg_ref& ref; + + FMT_CONSTEXPR void on_auto() { + int id = ctx.next_arg_id(); + ref = arg_ref(id); + ctx.check_dynamic_spec(id); + } + FMT_CONSTEXPR void on_index(int id) { + ref = arg_ref(id); + ctx.check_arg_id(id); + ctx.check_dynamic_spec(id); + } + FMT_CONSTEXPR void on_name(basic_string_view id) { + ref = arg_ref(id); + ctx.check_arg_id(id); + } +}; + +// Parses [integer | "{" [arg_id] "}"]. +template +FMT_CONSTEXPR auto parse_dynamic_spec(const Char* begin, const Char* end, + int& value, arg_ref& ref, + basic_format_parse_context& ctx) + -> const Char* { + FMT_ASSERT(begin != end, ""); + if ('0' <= *begin && *begin <= '9') { + int val = parse_nonnegative_int(begin, end, -1); + if (val != -1) + value = val; + else + report_error("number is too big"); + } else if (*begin == '{') { + ++begin; + auto handler = dynamic_spec_id_handler{ctx, ref}; + if (begin != end) begin = parse_arg_id(begin, end, handler); + if (begin != end && *begin == '}') return ++begin; + report_error("invalid format string"); + } + return begin; +} + +template +FMT_CONSTEXPR auto parse_precision(const Char* begin, const Char* end, + int& value, arg_ref& ref, + basic_format_parse_context& ctx) + -> const Char* { + ++begin; + if (begin == end || *begin == '}') { + report_error("invalid precision"); + return begin; + } + return parse_dynamic_spec(begin, end, value, ref, ctx); +} + +enum class state { start, align, sign, hash, zero, width, precision, locale }; + +// Parses standard format specifiers. +template +FMT_CONSTEXPR auto parse_format_specs(const Char* begin, const Char* end, + dynamic_format_specs& specs, + basic_format_parse_context& ctx, + type arg_type) -> const Char* { + auto c = '\0'; + if (end - begin > 1) { + auto next = to_ascii(begin[1]); + c = parse_align(next) == align::none ? to_ascii(*begin) : '\0'; + } else { + if (begin == end) return begin; + c = to_ascii(*begin); + } + + struct { + state current_state = state::start; + FMT_CONSTEXPR void operator()(state s, bool valid = true) { + if (current_state >= s || !valid) + report_error("invalid format specifier"); + current_state = s; + } + } enter_state; + + using pres = presentation_type; + constexpr auto integral_set = sint_set | uint_set | bool_set | char_set; + struct { + const Char*& begin; + dynamic_format_specs& specs; + type arg_type; + + FMT_CONSTEXPR auto operator()(pres pres_type, int set) -> const Char* { + if (!in(arg_type, set)) { + if (arg_type == type::none_type) return begin; + report_error("invalid format specifier"); + } + specs.type = pres_type; + return begin + 1; + } + } parse_presentation_type{begin, specs, arg_type}; + + for (;;) { + switch (c) { + case '<': + case '>': + case '^': + enter_state(state::align); + specs.align = parse_align(c); + ++begin; + break; + case '+': + case '-': + case ' ': + if (arg_type == type::none_type) return begin; + enter_state(state::sign, in(arg_type, sint_set | float_set)); + switch (c) { + case '+': + specs.sign = sign::plus; + break; + case '-': + specs.sign = sign::minus; + break; + case ' ': + specs.sign = sign::space; + break; + } + ++begin; + break; + case '#': + if (arg_type == type::none_type) return begin; + enter_state(state::hash, is_arithmetic_type(arg_type)); + specs.alt = true; + ++begin; + break; + case '0': + enter_state(state::zero); + if (!is_arithmetic_type(arg_type)) { + if (arg_type == type::none_type) return begin; + report_error("format specifier requires numeric argument"); + } + if (specs.align == align::none) { + // Ignore 0 if align is specified for compatibility with std::format. + specs.align = align::numeric; + specs.fill = '0'; + } + ++begin; + break; + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '{': + enter_state(state::width); + begin = parse_dynamic_spec(begin, end, specs.width, specs.width_ref, ctx); + break; + case '.': + if (arg_type == type::none_type) return begin; + enter_state(state::precision, + in(arg_type, float_set | string_set | cstring_set)); + begin = parse_precision(begin, end, specs.precision, specs.precision_ref, + ctx); + break; + case 'L': + if (arg_type == type::none_type) return begin; + enter_state(state::locale, is_arithmetic_type(arg_type)); + specs.localized = true; + ++begin; + break; + case 'd': + return parse_presentation_type(pres::dec, integral_set); + case 'X': + specs.upper = true; + FMT_FALLTHROUGH; + case 'x': + return parse_presentation_type(pres::hex, integral_set); + case 'o': + return parse_presentation_type(pres::oct, integral_set); + case 'B': + specs.upper = true; + FMT_FALLTHROUGH; + case 'b': + return parse_presentation_type(pres::bin, integral_set); + case 'E': + specs.upper = true; + FMT_FALLTHROUGH; + case 'e': + return parse_presentation_type(pres::exp, float_set); + case 'F': + specs.upper = true; + FMT_FALLTHROUGH; + case 'f': + return parse_presentation_type(pres::fixed, float_set); + case 'G': + specs.upper = true; + FMT_FALLTHROUGH; + case 'g': + return parse_presentation_type(pres::general, float_set); + case 'A': + specs.upper = true; + FMT_FALLTHROUGH; + case 'a': + return parse_presentation_type(pres::hexfloat, float_set); + case 'c': + if (arg_type == type::bool_type) report_error("invalid format specifier"); + return parse_presentation_type(pres::chr, integral_set); + case 's': + return parse_presentation_type(pres::string, + bool_set | string_set | cstring_set); + case 'p': + return parse_presentation_type(pres::pointer, pointer_set | cstring_set); + case '?': + return parse_presentation_type(pres::debug, + char_set | string_set | cstring_set); + case '}': + return begin; + default: { + if (*begin == '}') return begin; + // Parse fill and alignment. + auto fill_end = begin + code_point_length(begin); + if (end - fill_end <= 0) { + report_error("invalid format specifier"); + return begin; + } + if (*begin == '{') { + report_error("invalid fill character '{'"); + return begin; + } + auto align = parse_align(to_ascii(*fill_end)); + enter_state(state::align, align != align::none); + specs.fill = + basic_string_view(begin, to_unsigned(fill_end - begin)); + specs.align = align; + begin = fill_end + 1; + } + } + if (begin == end) return begin; + c = to_ascii(*begin); + } +} + +template +FMT_CONSTEXPR auto parse_replacement_field(const Char* begin, const Char* end, + Handler&& handler) -> const Char* { + struct id_adapter { + Handler& handler; + int arg_id; + + FMT_CONSTEXPR void on_auto() { arg_id = handler.on_arg_id(); } + FMT_CONSTEXPR void on_index(int id) { arg_id = handler.on_arg_id(id); } + FMT_CONSTEXPR void on_name(basic_string_view id) { + arg_id = handler.on_arg_id(id); + } + }; + + ++begin; + if (begin == end) return handler.on_error("invalid format string"), end; + if (*begin == '}') { + handler.on_replacement_field(handler.on_arg_id(), begin); + } else if (*begin == '{') { + handler.on_text(begin, begin + 1); + } else { + auto adapter = id_adapter{handler, 0}; + begin = parse_arg_id(begin, end, adapter); + Char c = begin != end ? *begin : Char(); + if (c == '}') { + handler.on_replacement_field(adapter.arg_id, begin); + } else if (c == ':') { + begin = handler.on_format_specs(adapter.arg_id, begin + 1, end); + if (begin == end || *begin != '}') + return handler.on_error("unknown format specifier"), end; + } else { + return handler.on_error("missing '}' in format string"), end; + } + } + return begin + 1; +} + +template +FMT_CONSTEXPR void parse_format_string(basic_string_view format_str, + Handler&& handler) { + auto begin = format_str.data(); + auto end = begin + format_str.size(); + if (end - begin < 32) { + // Use a simple loop instead of memchr for small strings. + const Char* p = begin; + while (p != end) { + auto c = *p++; + if (c == '{') { + handler.on_text(begin, p - 1); + begin = p = parse_replacement_field(p - 1, end, handler); + } else if (c == '}') { + if (p == end || *p != '}') + return handler.on_error("unmatched '}' in format string"); + handler.on_text(begin, p); + begin = ++p; + } + } + handler.on_text(begin, end); + return; + } + struct writer { + FMT_CONSTEXPR void operator()(const Char* from, const Char* to) { + if (from == to) return; + for (;;) { + const Char* p = nullptr; + if (!find(from, to, Char('}'), p)) + return handler_.on_text(from, to); + ++p; + if (p == to || *p != '}') + return handler_.on_error("unmatched '}' in format string"); + handler_.on_text(from, p); + from = p + 1; + } + } + Handler& handler_; + } write = {handler}; + while (begin != end) { + // Doing two passes with memchr (one for '{' and another for '}') is up to + // 2.5x faster than the naive one-pass implementation on big format strings. + const Char* p = begin; + if (*begin != '{' && !find(begin + 1, end, Char('{'), p)) + return write(begin, end); + write(begin, p); + begin = parse_replacement_field(p, end, handler); + } +} + +template ::value> struct strip_named_arg { + using type = T; +}; +template struct strip_named_arg { + using type = remove_cvref_t; +}; + +template +FMT_VISIBILITY("hidden") // Suppress an ld warning on macOS (#3769). +FMT_CONSTEXPR auto parse_format_specs(ParseContext& ctx) + -> decltype(ctx.begin()) { + using char_type = typename ParseContext::char_type; + using context = buffered_context; + using mapped_type = conditional_t< + mapped_type_constant::value != type::custom_type, + decltype(arg_mapper().map(std::declval())), + typename strip_named_arg::type>; +#if defined(__cpp_if_constexpr) + if constexpr (std::is_default_constructible< + formatter>::value) { + return formatter().parse(ctx); + } else { + type_is_unformattable_for _; + return ctx.begin(); + } +#else + return formatter().parse(ctx); +#endif +} + +// Checks char specs and returns true iff the presentation type is char-like. +FMT_CONSTEXPR inline auto check_char_specs(const format_specs& specs) -> bool { + if (specs.type != presentation_type::none && + specs.type != presentation_type::chr && + specs.type != presentation_type::debug) { + return false; + } + if (specs.align == align::numeric || specs.sign != sign::none || specs.alt) + report_error("invalid format specifier for char"); + return true; +} + +#if FMT_USE_NONTYPE_TEMPLATE_ARGS +template +constexpr auto get_arg_index_by_name(basic_string_view name) -> int { + if constexpr (is_statically_named_arg()) { + if (name == T::name) return N; + } + if constexpr (sizeof...(Args) > 0) + return get_arg_index_by_name(name); + (void)name; // Workaround an MSVC bug about "unused" parameter. + return -1; +} +#endif + +template +FMT_CONSTEXPR auto get_arg_index_by_name(basic_string_view name) -> int { +#if FMT_USE_NONTYPE_TEMPLATE_ARGS + if constexpr (sizeof...(Args) > 0) + return get_arg_index_by_name<0, Args...>(name); +#endif + (void)name; + return -1; +} + +template class format_string_checker { + private: + using parse_context_type = compile_parse_context; + static constexpr int num_args = sizeof...(Args); + + // Format specifier parsing function. + // In the future basic_format_parse_context will replace compile_parse_context + // here and will use is_constant_evaluated and downcasting to access the data + // needed for compile-time checks: https://godbolt.org/z/GvWzcTjh1. + using parse_func = const Char* (*)(parse_context_type&); + + type types_[num_args > 0 ? static_cast(num_args) : 1]; + parse_context_type context_; + parse_func parse_funcs_[num_args > 0 ? static_cast(num_args) : 1]; + + public: + explicit FMT_CONSTEXPR format_string_checker(basic_string_view fmt) + : types_{mapped_type_constant>::value...}, + context_(fmt, num_args, types_), + parse_funcs_{&parse_format_specs...} {} + + FMT_CONSTEXPR void on_text(const Char*, const Char*) {} + + FMT_CONSTEXPR auto on_arg_id() -> int { return context_.next_arg_id(); } + FMT_CONSTEXPR auto on_arg_id(int id) -> int { + return context_.check_arg_id(id), id; + } + FMT_CONSTEXPR auto on_arg_id(basic_string_view id) -> int { +#if FMT_USE_NONTYPE_TEMPLATE_ARGS + auto index = get_arg_index_by_name(id); + if (index < 0) on_error("named argument is not found"); + return index; +#else + (void)id; + on_error("compile-time checks for named arguments require C++20 support"); + return 0; +#endif + } + + FMT_CONSTEXPR void on_replacement_field(int id, const Char* begin) { + on_format_specs(id, begin, begin); // Call parse() on empty specs. + } + + FMT_CONSTEXPR auto on_format_specs(int id, const Char* begin, const Char*) + -> const Char* { + context_.advance_to(begin); + // id >= 0 check is a workaround for gcc 10 bug (#2065). + return id >= 0 && id < num_args ? parse_funcs_[id](context_) : begin; + } + + FMT_NORETURN FMT_CONSTEXPR void on_error(const char* message) { + report_error(message); + } +}; + +// A base class for compile-time strings. +struct compile_string {}; + +template +using is_compile_string = std::is_base_of; + +// Reports a compile-time error if S is not a valid format string. +template ::value)> +FMT_ALWAYS_INLINE void check_format_string(const S&) { +#ifdef FMT_ENFORCE_COMPILE_STRING + static_assert(is_compile_string::value, + "FMT_ENFORCE_COMPILE_STRING requires all format strings to use " + "FMT_STRING."); +#endif +} +template ::value)> +void check_format_string(S format_str) { + using char_t = typename S::char_type; + FMT_CONSTEXPR auto s = basic_string_view(format_str); + using checker = format_string_checker...>; + FMT_CONSTEXPR bool error = (parse_format_string(s, checker(s)), true); + ignore_unused(error); +} + +// Report truncation to prevent silent data loss. +inline void report_truncation(bool truncated) { + if (truncated) report_error("output is truncated"); +} + +// Use vformat_args and avoid type_identity to keep symbols short and workaround +// a GCC <= 4.8 bug. +template struct vformat_args { + using type = basic_format_args>; +}; +template <> struct vformat_args { + using type = format_args; +}; + +template +void vformat_to(buffer& buf, basic_string_view fmt, + typename vformat_args::type args, locale_ref loc = {}); + +FMT_API void vprint_mojibake(FILE*, string_view, format_args, bool = false); +#ifndef _WIN32 +inline void vprint_mojibake(FILE*, string_view, format_args, bool) {} +#endif + +template struct native_formatter { + private: + dynamic_format_specs specs_; + + public: + using nonlocking = void; + + template + FMT_CONSTEXPR auto parse(ParseContext& ctx) -> const Char* { + if (ctx.begin() == ctx.end() || *ctx.begin() == '}') return ctx.begin(); + auto end = parse_format_specs(ctx.begin(), ctx.end(), specs_, ctx, TYPE); + if (const_check(TYPE == type::char_type)) check_char_specs(specs_); + return end; + } + + template + FMT_CONSTEXPR void set_debug_format(bool set = true) { + specs_.type = set ? presentation_type::debug : presentation_type::none; + } + + template + FMT_CONSTEXPR auto format(const T& val, FormatContext& ctx) const + -> decltype(ctx.out()); +}; +} // namespace detail + +FMT_BEGIN_EXPORT + +// A formatter specialization for natively supported types. +template +struct formatter::value != + detail::type::custom_type>> + : detail::native_formatter::value> { +}; + +template struct runtime_format_string { + basic_string_view str; +}; + +/// A compile-time format string. +template class basic_format_string { + private: + basic_string_view str_; + + public: + template < + typename S, + FMT_ENABLE_IF( + std::is_convertible>::value || + (detail::is_compile_string::value && + std::is_constructible, const S&>::value))> + FMT_CONSTEVAL FMT_ALWAYS_INLINE basic_format_string(const S& s) : str_(s) { + static_assert( + detail::count< + (std::is_base_of>::value && + std::is_reference::value)...>() == 0, + "passing views as lvalues is disallowed"); +#if FMT_USE_CONSTEVAL + if constexpr (detail::count_named_args() == + detail::count_statically_named_args()) { + using checker = + detail::format_string_checker...>; + detail::parse_format_string(str_, checker(s)); + } +#else + detail::check_format_string(s); +#endif + } + basic_format_string(runtime_format_string fmt) : str_(fmt.str) {} + + FMT_ALWAYS_INLINE operator basic_string_view() const { return str_; } + auto get() const -> basic_string_view { return str_; } +}; + +#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 +// Workaround broken conversion on older gcc. +template using format_string = string_view; +inline auto runtime(string_view s) -> string_view { return s; } +#else +template +using format_string = basic_format_string...>; +/** + * Creates a runtime format string. + * + * **Example**: + * + * // Check format string at runtime instead of compile-time. + * fmt::print(fmt::runtime("{:d}"), "I am not a number"); + */ +inline auto runtime(string_view s) -> runtime_format_string<> { return {{s}}; } +#endif + +/// Formats a string and writes the output to `out`. +template , + char>::value)> +auto vformat_to(OutputIt&& out, string_view fmt, format_args args) + -> remove_cvref_t { + auto&& buf = detail::get_buffer(out); + detail::vformat_to(buf, fmt, args, {}); + return detail::get_iterator(buf, out); +} + +/** + * Formats `args` according to specifications in `fmt`, writes the result to + * the output iterator `out` and returns the iterator past the end of the output + * range. `format_to` does not append a terminating null character. + * + * **Example**: + * + * auto out = std::vector(); + * fmt::format_to(std::back_inserter(out), "{}", 42); + */ +template , + char>::value)> +FMT_INLINE auto format_to(OutputIt&& out, format_string fmt, T&&... args) + -> remove_cvref_t { + return vformat_to(FMT_FWD(out), fmt, fmt::make_format_args(args...)); +} + +template struct format_to_n_result { + /// Iterator past the end of the output range. + OutputIt out; + /// Total (not truncated) output size. + size_t size; +}; + +template ::value)> +auto vformat_to_n(OutputIt out, size_t n, string_view fmt, format_args args) + -> format_to_n_result { + using traits = detail::fixed_buffer_traits; + auto buf = detail::iterator_buffer(out, n); + detail::vformat_to(buf, fmt, args, {}); + return {buf.out(), buf.count()}; +} + +/** + * Formats `args` according to specifications in `fmt`, writes up to `n` + * characters of the result to the output iterator `out` and returns the total + * (not truncated) output size and the iterator past the end of the output + * range. `format_to_n` does not append a terminating null character. + */ +template ::value)> +FMT_INLINE auto format_to_n(OutputIt out, size_t n, format_string fmt, + T&&... args) -> format_to_n_result { + return vformat_to_n(out, n, fmt, fmt::make_format_args(args...)); +} + +template +struct format_to_result { + /// Iterator pointing to just after the last successful write in the range. + OutputIt out; + /// Specifies if the output was truncated. + bool truncated; + + FMT_CONSTEXPR operator OutputIt&() & { + detail::report_truncation(truncated); + return out; + } + FMT_CONSTEXPR operator const OutputIt&() const& { + detail::report_truncation(truncated); + return out; + } + FMT_CONSTEXPR operator OutputIt&&() && { + detail::report_truncation(truncated); + return static_cast(out); + } +}; + +template +auto vformat_to(char (&out)[N], string_view fmt, format_args args) + -> format_to_result { + auto result = vformat_to_n(out, N, fmt, args); + return {result.out, result.size > N}; +} + +template +FMT_INLINE auto format_to(char (&out)[N], format_string fmt, T&&... args) + -> format_to_result { + auto result = fmt::format_to_n(out, N, fmt, static_cast(args)...); + return {result.out, result.size > N}; +} + +/// Returns the number of chars in the output of `format(fmt, args...)`. +template +FMT_NODISCARD FMT_INLINE auto formatted_size(format_string fmt, + T&&... args) -> size_t { + auto buf = detail::counting_buffer<>(); + detail::vformat_to(buf, fmt, fmt::make_format_args(args...), {}); + return buf.count(); +} + +FMT_API void vprint(string_view fmt, format_args args); +FMT_API void vprint(FILE* f, string_view fmt, format_args args); +FMT_API void vprint_buffered(FILE* f, string_view fmt, format_args args); +FMT_API void vprintln(FILE* f, string_view fmt, format_args args); + +/** + * Formats `args` according to specifications in `fmt` and writes the output + * to `stdout`. + * + * **Example**: + * + * fmt::print("The answer is {}.", 42); + */ +template +FMT_INLINE void print(format_string fmt, T&&... args) { + const auto& vargs = fmt::make_format_args(args...); + if (!detail::use_utf8()) return detail::vprint_mojibake(stdout, fmt, vargs); + return detail::is_locking() ? vprint_buffered(stdout, fmt, vargs) + : vprint(fmt, vargs); +} + +/** + * Formats `args` according to specifications in `fmt` and writes the + * output to the file `f`. + * + * **Example**: + * + * fmt::print(stderr, "Don't {}!", "panic"); + */ +template +FMT_INLINE void print(FILE* f, format_string fmt, T&&... args) { + const auto& vargs = fmt::make_format_args(args...); + if (!detail::use_utf8()) return detail::vprint_mojibake(f, fmt, vargs); + return detail::is_locking() ? vprint_buffered(f, fmt, vargs) + : vprint(f, fmt, vargs); +} + +/// Formats `args` according to specifications in `fmt` and writes the output +/// to the file `f` followed by a newline. +template +FMT_INLINE void println(FILE* f, format_string fmt, T&&... args) { + const auto& vargs = fmt::make_format_args(args...); + return detail::use_utf8() ? vprintln(f, fmt, vargs) + : detail::vprint_mojibake(f, fmt, vargs, true); +} + +/// Formats `args` according to specifications in `fmt` and writes the output +/// to `stdout` followed by a newline. +template +FMT_INLINE void println(format_string fmt, T&&... args) { + return fmt::println(stdout, fmt, static_cast(args)...); +} + +FMT_END_EXPORT +FMT_GCC_PRAGMA("GCC pop_options") +FMT_END_NAMESPACE + +#ifdef FMT_HEADER_ONLY +# include "format.h" +#endif +#endif // FMT_BASE_H_ diff --git a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/chrono.h b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/chrono.h index 1a8d8d04c2aa..c93123fd3353 100644 --- a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/chrono.h +++ b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/chrono.h @@ -8,20 +8,31 @@ #ifndef FMT_CHRONO_H_ #define FMT_CHRONO_H_ -#include -#include -#include // std::isfinite -#include // std::memcpy -#include -#include -#include -#include -#include +#ifndef FMT_MODULE +# include +# include +# include // std::isfinite +# include // std::memcpy +# include +# include +# include +# include +# include +#endif #include "format.h" FMT_BEGIN_NAMESPACE +// Check if std::chrono::local_t is available. +#ifndef FMT_USE_LOCAL_TIME +# ifdef __cpp_lib_chrono +# define FMT_USE_LOCAL_TIME (__cpp_lib_chrono >= 201907L) +# else +# define FMT_USE_LOCAL_TIME 0 +# endif +#endif + // Check if std::chrono::utc_timestamp is available. #ifndef FMT_USE_UTC_TIME # ifdef __cpp_lib_chrono @@ -63,7 +74,8 @@ template ::value && std::numeric_limits::is_signed == std::numeric_limits::is_signed)> -FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { +FMT_CONSTEXPR auto lossless_integral_conversion(const From from, int& ec) + -> To { ec = 0; using F = std::numeric_limits; using T = std::numeric_limits; @@ -84,15 +96,14 @@ FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { return static_cast(from); } -/** - * converts From to To, without loss. If the dynamic value of from - * can't be converted to To without loss, ec is set. - */ +/// Converts From to To, without loss. If the dynamic value of from +/// can't be converted to To without loss, ec is set. template ::value && std::numeric_limits::is_signed != std::numeric_limits::is_signed)> -FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { +FMT_CONSTEXPR auto lossless_integral_conversion(const From from, int& ec) + -> To { ec = 0; using F = std::numeric_limits; using T = std::numeric_limits; @@ -124,7 +135,8 @@ FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { template ::value)> -FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { +FMT_CONSTEXPR auto lossless_integral_conversion(const From from, int& ec) + -> To { ec = 0; return from; } // function @@ -145,7 +157,7 @@ FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { // clang-format on template ::value)> -FMT_CONSTEXPR To safe_float_conversion(const From from, int& ec) { +FMT_CONSTEXPR auto safe_float_conversion(const From from, int& ec) -> To { ec = 0; using T = std::numeric_limits; static_assert(std::is_floating_point::value, "From must be floating"); @@ -167,20 +179,18 @@ FMT_CONSTEXPR To safe_float_conversion(const From from, int& ec) { template ::value)> -FMT_CONSTEXPR To safe_float_conversion(const From from, int& ec) { +FMT_CONSTEXPR auto safe_float_conversion(const From from, int& ec) -> To { ec = 0; static_assert(std::is_floating_point::value, "From must be floating"); return from; } -/** - * safe duration cast between integral durations - */ +/// Safe duration cast between integral durations template ::value), FMT_ENABLE_IF(std::is_integral::value)> -To safe_duration_cast(std::chrono::duration from, - int& ec) { +auto safe_duration_cast(std::chrono::duration from, + int& ec) -> To { using From = std::chrono::duration; ec = 0; // the basic idea is that we need to convert from count() in the from type @@ -212,7 +222,8 @@ To safe_duration_cast(std::chrono::duration from, } const auto min1 = (std::numeric_limits::min)() / Factor::num; - if (!std::is_unsigned::value && count < min1) { + if (detail::const_check(!std::is_unsigned::value) && + count < min1) { ec = 1; return {}; } @@ -224,14 +235,12 @@ To safe_duration_cast(std::chrono::duration from, return ec ? To() : To(tocount); } -/** - * safe duration_cast between floating point durations - */ +/// Safe duration_cast between floating point durations template ::value), FMT_ENABLE_IF(std::is_floating_point::value)> -To safe_duration_cast(std::chrono::duration from, - int& ec) { +auto safe_duration_cast(std::chrono::duration from, + int& ec) -> To { using From = std::chrono::duration; ec = 0; if (std::isnan(from.count())) { @@ -311,12 +320,45 @@ To safe_duration_cast(std::chrono::duration from, namespace detail { template struct null {}; -inline null<> localtime_r FMT_NOMACRO(...) { return null<>(); } -inline null<> localtime_s(...) { return null<>(); } -inline null<> gmtime_r(...) { return null<>(); } -inline null<> gmtime_s(...) { return null<>(); } +inline auto localtime_r FMT_NOMACRO(...) -> null<> { return null<>(); } +inline auto localtime_s(...) -> null<> { return null<>(); } +inline auto gmtime_r(...) -> null<> { return null<>(); } +inline auto gmtime_s(...) -> null<> { return null<>(); } + +// It is defined here and not in ostream.h because the latter has expensive +// includes. +template class formatbuf : public Streambuf { + private: + using char_type = typename Streambuf::char_type; + using streamsize = decltype(std::declval().sputn(nullptr, 0)); + using int_type = typename Streambuf::int_type; + using traits_type = typename Streambuf::traits_type; + + buffer& buffer_; + + public: + explicit formatbuf(buffer& buf) : buffer_(buf) {} + + protected: + // The put area is always empty. This makes the implementation simpler and has + // the advantage that the streambuf and the buffer are always in sync and + // sputc never writes into uninitialized memory. A disadvantage is that each + // call to sputc always results in a (virtual) call to overflow. There is no + // disadvantage here for sputn since this always results in a call to xsputn. + + auto overflow(int_type ch) -> int_type override { + if (!traits_type::eq_int_type(ch, traits_type::eof())) + buffer_.push_back(static_cast(ch)); + return ch; + } + + auto xsputn(const char_type* s, streamsize count) -> streamsize override { + buffer_.append(s, s + count); + return count; + } +}; -inline const std::locale& get_classic_locale() { +inline auto get_classic_locale() -> const std::locale& { static const auto& locale = std::locale::classic(); return locale; } @@ -326,8 +368,6 @@ template struct codecvt_result { CodeUnit buf[max_size]; CodeUnit* end; }; -template -constexpr const size_t codecvt_result::max_size; template void write_codecvt(codecvt_result& out, string_view in_buf, @@ -351,11 +391,12 @@ void write_codecvt(codecvt_result& out, string_view in_buf, template auto write_encoded_tm_str(OutputIt out, string_view in, const std::locale& loc) -> OutputIt { - if (detail::is_utf8() && loc != get_classic_locale()) { + if (detail::use_utf8() && loc != get_classic_locale()) { // char16_t and char32_t codecvts are broken in MSVC (linkage errors) and // gcc-4. -#if FMT_MSC_VERSION != 0 || \ - (defined(__GLIBCXX__) && !defined(_GLIBCXX_USE_DUAL_ABI)) +#if FMT_MSC_VERSION != 0 || \ + (defined(__GLIBCXX__) && \ + (!defined(_GLIBCXX_USE_DUAL_ABI) || _GLIBCXX_USE_DUAL_ABI == 0)) // The _GLIBCXX_USE_DUAL_ABI macro is always defined in libstdc++ from gcc-5 // and newer. using code_unit = wchar_t; @@ -367,39 +408,13 @@ auto write_encoded_tm_str(OutputIt out, string_view in, const std::locale& loc) unit_t unit; write_codecvt(unit, in, loc); // In UTF-8 is used one to four one-byte code units. - auto&& buf = basic_memory_buffer(); - for (code_unit* p = unit.buf; p != unit.end; ++p) { - uint32_t c = static_cast(*p); - if (sizeof(code_unit) == 2 && c >= 0xd800 && c <= 0xdfff) { - // surrogate pair - ++p; - if (p == unit.end || (c & 0xfc00) != 0xd800 || - (*p & 0xfc00) != 0xdc00) { - FMT_THROW(format_error("failed to format time")); - } - c = (c << 10) + static_cast(*p) - 0x35fdc00; - } - if (c < 0x80) { - buf.push_back(static_cast(c)); - } else if (c < 0x800) { - buf.push_back(static_cast(0xc0 | (c >> 6))); - buf.push_back(static_cast(0x80 | (c & 0x3f))); - } else if ((c >= 0x800 && c <= 0xd7ff) || (c >= 0xe000 && c <= 0xffff)) { - buf.push_back(static_cast(0xe0 | (c >> 12))); - buf.push_back(static_cast(0x80 | ((c & 0xfff) >> 6))); - buf.push_back(static_cast(0x80 | (c & 0x3f))); - } else if (c >= 0x10000 && c <= 0x10ffff) { - buf.push_back(static_cast(0xf0 | (c >> 18))); - buf.push_back(static_cast(0x80 | ((c & 0x3ffff) >> 12))); - buf.push_back(static_cast(0x80 | ((c & 0xfff) >> 6))); - buf.push_back(static_cast(0x80 | (c & 0x3f))); - } else { - FMT_THROW(format_error("failed to format time")); - } - } - return copy_str(buf.data(), buf.data() + buf.size(), out); + auto u = + to_utf8>(); + if (!u.convert({unit.buf, to_unsigned(unit.end - unit.buf)})) + FMT_THROW(format_error("failed to format time")); + return copy(u.c_str(), u.c_str() + u.size(), out); } - return copy_str(in.data(), in.data() + in.size(), out); + return copy(in.data(), in.data() + in.size(), out); } template OutputIt { codecvt_result unit; write_codecvt(unit, sv, loc); - return copy_str(unit.buf, unit.end, out); + return copy(unit.buf, unit.end, out); } template & buf, const std::tm& time, auto&& format_buf = formatbuf>(buf); auto&& os = std::basic_ostream(&format_buf); os.imbue(loc); - using iterator = std::ostreambuf_iterator; - const auto& facet = std::use_facet>(loc); + const auto& facet = std::use_facet>(loc); auto end = facet.put(os, os, Char(' '), &time, format, modifier); if (end.failed()) FMT_THROW(format_error("failed to format time")); } @@ -448,38 +462,83 @@ auto write(OutputIt out, const std::tm& time, const std::locale& loc, return write_encoded_tm_str(out, string_view(buf.data(), buf.size()), loc); } +template +struct is_same_arithmetic_type + : public std::integral_constant::value && + std::is_integral::value) || + (std::is_floating_point::value && + std::is_floating_point::value)> { +}; + +template < + typename To, typename FromRep, typename FromPeriod, + FMT_ENABLE_IF(is_same_arithmetic_type::value)> +auto fmt_duration_cast(std::chrono::duration from) -> To { +#if FMT_SAFE_DURATION_CAST + // Throwing version of safe_duration_cast is only available for + // integer to integer or float to float casts. + int ec; + To to = safe_duration_cast::safe_duration_cast(from, ec); + if (ec) FMT_THROW(format_error("cannot format duration")); + return to; +#else + // Standard duration cast, may overflow. + return std::chrono::duration_cast(from); +#endif +} + +template < + typename To, typename FromRep, typename FromPeriod, + FMT_ENABLE_IF(!is_same_arithmetic_type::value)> +auto fmt_duration_cast(std::chrono::duration from) -> To { + // Mixed integer <-> float cast is not supported by safe_duration_cast. + return std::chrono::duration_cast(from); +} + +template +auto to_time_t( + std::chrono::time_point time_point) + -> std::time_t { + // Cannot use std::chrono::system_clock::to_time_t since this would first + // require a cast to std::chrono::system_clock::time_point, which could + // overflow. + return fmt_duration_cast>( + time_point.time_since_epoch()) + .count(); +} } // namespace detail -FMT_MODULE_EXPORT_BEGIN +FMT_BEGIN_EXPORT /** - Converts given time since epoch as ``std::time_t`` value into calendar time, - expressed in local time. Unlike ``std::localtime``, this function is - thread-safe on most platforms. + * Converts given time since epoch as `std::time_t` value into calendar time, + * expressed in local time. Unlike `std::localtime`, this function is + * thread-safe on most platforms. */ -inline std::tm localtime(std::time_t time) { +inline auto localtime(std::time_t time) -> std::tm { struct dispatcher { std::time_t time_; std::tm tm_; dispatcher(std::time_t t) : time_(t) {} - bool run() { + auto run() -> bool { using namespace fmt::detail; return handle(localtime_r(&time_, &tm_)); } - bool handle(std::tm* tm) { return tm != nullptr; } + auto handle(std::tm* tm) -> bool { return tm != nullptr; } - bool handle(detail::null<>) { + auto handle(detail::null<>) -> bool { using namespace fmt::detail; return fallback(localtime_s(&tm_, &time_)); } - bool fallback(int res) { return res == 0; } + auto fallback(int res) -> bool { return res == 0; } #if !FMT_MSC_VERSION - bool fallback(detail::null<>) { + auto fallback(detail::null<>) -> bool { using namespace fmt::detail; std::tm* tm = std::localtime(&time_); if (tm) tm_ = *tm; @@ -493,57 +552,62 @@ inline std::tm localtime(std::time_t time) { return lt.tm_; } -inline std::tm localtime( - std::chrono::time_point time_point) { - return localtime(std::chrono::system_clock::to_time_t(time_point)); +#if FMT_USE_LOCAL_TIME +template +inline auto localtime(std::chrono::local_time time) -> std::tm { + return localtime( + detail::to_time_t(std::chrono::current_zone()->to_sys(time))); } +#endif /** - Converts given time since epoch as ``std::time_t`` value into calendar time, - expressed in Coordinated Universal Time (UTC). Unlike ``std::gmtime``, this - function is thread-safe on most platforms. + * Converts given time since epoch as `std::time_t` value into calendar time, + * expressed in Coordinated Universal Time (UTC). Unlike `std::gmtime`, this + * function is thread-safe on most platforms. */ -inline std::tm gmtime(std::time_t time) { +inline auto gmtime(std::time_t time) -> std::tm { struct dispatcher { std::time_t time_; std::tm tm_; dispatcher(std::time_t t) : time_(t) {} - bool run() { + auto run() -> bool { using namespace fmt::detail; return handle(gmtime_r(&time_, &tm_)); } - bool handle(std::tm* tm) { return tm != nullptr; } + auto handle(std::tm* tm) -> bool { return tm != nullptr; } - bool handle(detail::null<>) { + auto handle(detail::null<>) -> bool { using namespace fmt::detail; return fallback(gmtime_s(&tm_, &time_)); } - bool fallback(int res) { return res == 0; } + auto fallback(int res) -> bool { return res == 0; } #if !FMT_MSC_VERSION - bool fallback(detail::null<>) { + auto fallback(detail::null<>) -> bool { std::tm* tm = std::gmtime(&time_); if (tm) tm_ = *tm; return tm != nullptr; } #endif }; - dispatcher gt(time); + auto gt = dispatcher(time); // Too big time values may be unsupported. if (!gt.run()) FMT_THROW(format_error("time_t value out of range")); return gt.tm_; } -inline std::tm gmtime( - std::chrono::time_point time_point) { - return gmtime(std::chrono::system_clock::to_time_t(time_point)); +template +inline auto gmtime( + std::chrono::time_point time_point) + -> std::tm { + return gmtime(detail::to_time_t(time_point)); } -FMT_BEGIN_DETAIL_NAMESPACE +namespace detail { // Writes two-digit numbers a, b and c separated by sep to buf. // The method by Pavel Novikov based on @@ -579,7 +643,8 @@ inline void write_digit2_separated(char* buf, unsigned a, unsigned b, } } -template FMT_CONSTEXPR inline const char* get_units() { +template +FMT_CONSTEXPR inline auto get_units() -> const char* { if (std::is_same::value) return "as"; if (std::is_same::value) return "fs"; if (std::is_same::value) return "ps"; @@ -597,8 +662,9 @@ template FMT_CONSTEXPR inline const char* get_units() { if (std::is_same::value) return "Ts"; if (std::is_same::value) return "Ps"; if (std::is_same::value) return "Es"; - if (std::is_same>::value) return "m"; + if (std::is_same>::value) return "min"; if (std::is_same>::value) return "h"; + if (std::is_same>::value) return "d"; return nullptr; } @@ -608,13 +674,37 @@ enum class numeric_system { alternative }; +// Glibc extensions for formatting numeric values. +enum class pad_type { + // Pad a numeric result string with zeros (the default). + zero, + // Do not pad a numeric result string. + none, + // Pad a numeric result string with spaces. + space, +}; + +template +auto write_padding(OutputIt out, pad_type pad, int width) -> OutputIt { + if (pad == pad_type::none) return out; + return detail::fill_n(out, width, pad == pad_type::space ? ' ' : '0'); +} + +template +auto write_padding(OutputIt out, pad_type pad) -> OutputIt { + if (pad != pad_type::none) *out++ = pad == pad_type::space ? ' ' : '0'; + return out; +} + // Parses a put_time-like format string and invokes handler actions. template -FMT_CONSTEXPR const Char* parse_chrono_format(const Char* begin, - const Char* end, - Handler&& handler) { +FMT_CONSTEXPR auto parse_chrono_format(const Char* begin, const Char* end, + Handler&& handler) -> const Char* { + if (begin == end || *begin == '}') return begin; + if (*begin != '%') FMT_THROW(format_error("invalid format")); auto ptr = begin; while (ptr != end) { + pad_type pad = pad_type::zero; auto c = *ptr; if (c == '}') break; if (c != '%') { @@ -624,6 +714,18 @@ FMT_CONSTEXPR const Char* parse_chrono_format(const Char* begin, if (begin != ptr) handler.on_text(begin, ptr); ++ptr; // consume '%' if (ptr == end) FMT_THROW(format_error("invalid format")); + c = *ptr; + switch (c) { + case '_': + pad = pad_type::space; + ++ptr; + break; + case '-': + pad = pad_type::none; + ++ptr; + break; + } + if (ptr == end) FMT_THROW(format_error("invalid format")); c = *ptr++; switch (c) { case '%': @@ -681,35 +783,35 @@ FMT_CONSTEXPR const Char* parse_chrono_format(const Char* begin, break; // Day of the year/month: case 'U': - handler.on_dec0_week_of_year(numeric_system::standard); + handler.on_dec0_week_of_year(numeric_system::standard, pad); break; case 'W': - handler.on_dec1_week_of_year(numeric_system::standard); + handler.on_dec1_week_of_year(numeric_system::standard, pad); break; case 'V': - handler.on_iso_week_of_year(numeric_system::standard); + handler.on_iso_week_of_year(numeric_system::standard, pad); break; case 'j': handler.on_day_of_year(); break; case 'd': - handler.on_day_of_month(numeric_system::standard); + handler.on_day_of_month(numeric_system::standard, pad); break; case 'e': - handler.on_day_of_month_space(numeric_system::standard); + handler.on_day_of_month(numeric_system::standard, pad_type::space); break; // Hour, minute, second: case 'H': - handler.on_24_hour(numeric_system::standard); + handler.on_24_hour(numeric_system::standard, pad); break; case 'I': - handler.on_12_hour(numeric_system::standard); + handler.on_12_hour(numeric_system::standard, pad); break; case 'M': - handler.on_minute(numeric_system::standard); + handler.on_minute(numeric_system::standard, pad); break; case 'S': - handler.on_second(numeric_system::standard); + handler.on_second(numeric_system::standard, pad); break; // Other: case 'c': @@ -746,7 +848,7 @@ FMT_CONSTEXPR const Char* parse_chrono_format(const Char* begin, handler.on_duration_unit(); break; case 'z': - handler.on_utc_offset(); + handler.on_utc_offset(numeric_system::standard); break; case 'Z': handler.on_tz_name(); @@ -774,6 +876,9 @@ FMT_CONSTEXPR const Char* parse_chrono_format(const Char* begin, case 'X': handler.on_loc_time(numeric_system::alternative); break; + case 'z': + handler.on_utc_offset(numeric_system::alternative); + break; default: FMT_THROW(format_error("invalid format")); } @@ -790,19 +895,19 @@ FMT_CONSTEXPR const Char* parse_chrono_format(const Char* begin, handler.on_dec_month(numeric_system::alternative); break; case 'U': - handler.on_dec0_week_of_year(numeric_system::alternative); + handler.on_dec0_week_of_year(numeric_system::alternative, pad); break; case 'W': - handler.on_dec1_week_of_year(numeric_system::alternative); + handler.on_dec1_week_of_year(numeric_system::alternative, pad); break; case 'V': - handler.on_iso_week_of_year(numeric_system::alternative); + handler.on_iso_week_of_year(numeric_system::alternative, pad); break; case 'd': - handler.on_day_of_month(numeric_system::alternative); + handler.on_day_of_month(numeric_system::alternative, pad); break; case 'e': - handler.on_day_of_month_space(numeric_system::alternative); + handler.on_day_of_month(numeric_system::alternative, pad_type::space); break; case 'w': handler.on_dec0_weekday(numeric_system::alternative); @@ -811,16 +916,19 @@ FMT_CONSTEXPR const Char* parse_chrono_format(const Char* begin, handler.on_dec1_weekday(numeric_system::alternative); break; case 'H': - handler.on_24_hour(numeric_system::alternative); + handler.on_24_hour(numeric_system::alternative, pad); break; case 'I': - handler.on_12_hour(numeric_system::alternative); + handler.on_12_hour(numeric_system::alternative, pad); break; case 'M': - handler.on_minute(numeric_system::alternative); + handler.on_minute(numeric_system::alternative, pad); break; case 'S': - handler.on_second(numeric_system::alternative); + handler.on_second(numeric_system::alternative, pad); + break; + case 'z': + handler.on_utc_offset(numeric_system::alternative); break; default: FMT_THROW(format_error("invalid format")); @@ -852,12 +960,19 @@ template struct null_chrono_spec_handler { FMT_CONSTEXPR void on_abbr_month() { unsupported(); } FMT_CONSTEXPR void on_full_month() { unsupported(); } FMT_CONSTEXPR void on_dec_month(numeric_system) { unsupported(); } - FMT_CONSTEXPR void on_dec0_week_of_year(numeric_system) { unsupported(); } - FMT_CONSTEXPR void on_dec1_week_of_year(numeric_system) { unsupported(); } - FMT_CONSTEXPR void on_iso_week_of_year(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_dec0_week_of_year(numeric_system, pad_type) { + unsupported(); + } + FMT_CONSTEXPR void on_dec1_week_of_year(numeric_system, pad_type) { + unsupported(); + } + FMT_CONSTEXPR void on_iso_week_of_year(numeric_system, pad_type) { + unsupported(); + } FMT_CONSTEXPR void on_day_of_year() { unsupported(); } - FMT_CONSTEXPR void on_day_of_month(numeric_system) { unsupported(); } - FMT_CONSTEXPR void on_day_of_month_space(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_day_of_month(numeric_system, pad_type) { + unsupported(); + } FMT_CONSTEXPR void on_24_hour(numeric_system) { unsupported(); } FMT_CONSTEXPR void on_12_hour(numeric_system) { unsupported(); } FMT_CONSTEXPR void on_minute(numeric_system) { unsupported(); } @@ -873,7 +988,7 @@ template struct null_chrono_spec_handler { FMT_CONSTEXPR void on_am_pm() { unsupported(); } FMT_CONSTEXPR void on_duration_value() { unsupported(); } FMT_CONSTEXPR void on_duration_unit() { unsupported(); } - FMT_CONSTEXPR void on_utc_offset() { unsupported(); } + FMT_CONSTEXPR void on_utc_offset(numeric_system) { unsupported(); } FMT_CONSTEXPR void on_tz_name() { unsupported(); } }; @@ -895,16 +1010,15 @@ struct tm_format_checker : null_chrono_spec_handler { FMT_CONSTEXPR void on_abbr_month() {} FMT_CONSTEXPR void on_full_month() {} FMT_CONSTEXPR void on_dec_month(numeric_system) {} - FMT_CONSTEXPR void on_dec0_week_of_year(numeric_system) {} - FMT_CONSTEXPR void on_dec1_week_of_year(numeric_system) {} - FMT_CONSTEXPR void on_iso_week_of_year(numeric_system) {} + FMT_CONSTEXPR void on_dec0_week_of_year(numeric_system, pad_type) {} + FMT_CONSTEXPR void on_dec1_week_of_year(numeric_system, pad_type) {} + FMT_CONSTEXPR void on_iso_week_of_year(numeric_system, pad_type) {} FMT_CONSTEXPR void on_day_of_year() {} - FMT_CONSTEXPR void on_day_of_month(numeric_system) {} - FMT_CONSTEXPR void on_day_of_month_space(numeric_system) {} - FMT_CONSTEXPR void on_24_hour(numeric_system) {} - FMT_CONSTEXPR void on_12_hour(numeric_system) {} - FMT_CONSTEXPR void on_minute(numeric_system) {} - FMT_CONSTEXPR void on_second(numeric_system) {} + FMT_CONSTEXPR void on_day_of_month(numeric_system, pad_type) {} + FMT_CONSTEXPR void on_24_hour(numeric_system, pad_type) {} + FMT_CONSTEXPR void on_12_hour(numeric_system, pad_type) {} + FMT_CONSTEXPR void on_minute(numeric_system, pad_type) {} + FMT_CONSTEXPR void on_second(numeric_system, pad_type) {} FMT_CONSTEXPR void on_datetime(numeric_system) {} FMT_CONSTEXPR void on_loc_date(numeric_system) {} FMT_CONSTEXPR void on_loc_time(numeric_system) {} @@ -914,29 +1028,29 @@ struct tm_format_checker : null_chrono_spec_handler { FMT_CONSTEXPR void on_24_hour_time() {} FMT_CONSTEXPR void on_iso_time() {} FMT_CONSTEXPR void on_am_pm() {} - FMT_CONSTEXPR void on_utc_offset() {} + FMT_CONSTEXPR void on_utc_offset(numeric_system) {} FMT_CONSTEXPR void on_tz_name() {} }; -inline const char* tm_wday_full_name(int wday) { +inline auto tm_wday_full_name(int wday) -> const char* { static constexpr const char* full_name_list[] = { "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"}; return wday >= 0 && wday <= 6 ? full_name_list[wday] : "?"; } -inline const char* tm_wday_short_name(int wday) { +inline auto tm_wday_short_name(int wday) -> const char* { static constexpr const char* short_name_list[] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"}; return wday >= 0 && wday <= 6 ? short_name_list[wday] : "???"; } -inline const char* tm_mon_full_name(int mon) { +inline auto tm_mon_full_name(int mon) -> const char* { static constexpr const char* full_name_list[] = { "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"}; return mon >= 0 && mon <= 11 ? full_name_list[mon] : "?"; } -inline const char* tm_mon_short_name(int mon) { +inline auto tm_mon_short_name(int mon) -> const char* { static constexpr const char* short_name_list[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", @@ -966,13 +1080,135 @@ inline void tzset_once() { } #endif -template class tm_writer { +// Converts value to Int and checks that it's in the range [0, upper). +template ::value)> +inline auto to_nonnegative_int(T value, Int upper) -> Int { + if (!std::is_unsigned::value && + (value < 0 || to_unsigned(value) > to_unsigned(upper))) { + FMT_THROW(fmt::format_error("chrono value is out of range")); + } + return static_cast(value); +} +template ::value)> +inline auto to_nonnegative_int(T value, Int upper) -> Int { + auto int_value = static_cast(value); + if (int_value < 0 || value > static_cast(upper)) + FMT_THROW(format_error("invalid value")); + return int_value; +} + +constexpr auto pow10(std::uint32_t n) -> long long { + return n == 0 ? 1 : 10 * pow10(n - 1); +} + +// Counts the number of fractional digits in the range [0, 18] according to the +// C++20 spec. If more than 18 fractional digits are required then returns 6 for +// microseconds precision. +template () / 10)> +struct count_fractional_digits { + static constexpr int value = + Num % Den == 0 ? N : count_fractional_digits::value; +}; + +// Base case that doesn't instantiate any more templates +// in order to avoid overflow. +template +struct count_fractional_digits { + static constexpr int value = (Num % Den == 0) ? N : 6; +}; + +// Format subseconds which are given as an integer type with an appropriate +// number of digits. +template +void write_fractional_seconds(OutputIt& out, Duration d, int precision = -1) { + constexpr auto num_fractional_digits = + count_fractional_digits::value; + + using subsecond_precision = std::chrono::duration< + typename std::common_type::type, + std::ratio<1, detail::pow10(num_fractional_digits)>>; + + const auto fractional = d - fmt_duration_cast(d); + const auto subseconds = + std::chrono::treat_as_floating_point< + typename subsecond_precision::rep>::value + ? fractional.count() + : fmt_duration_cast(fractional).count(); + auto n = static_cast>(subseconds); + const int num_digits = detail::count_digits(n); + + int leading_zeroes = (std::max)(0, num_fractional_digits - num_digits); + if (precision < 0) { + FMT_ASSERT(!std::is_floating_point::value, ""); + if (std::ratio_less::value) { + *out++ = '.'; + out = detail::fill_n(out, leading_zeroes, '0'); + out = format_decimal(out, n, num_digits).end; + } + } else if (precision > 0) { + *out++ = '.'; + leading_zeroes = (std::min)(leading_zeroes, precision); + int remaining = precision - leading_zeroes; + out = detail::fill_n(out, leading_zeroes, '0'); + if (remaining < num_digits) { + int num_truncated_digits = num_digits - remaining; + n /= to_unsigned(detail::pow10(to_unsigned(num_truncated_digits))); + if (n) { + out = format_decimal(out, n, remaining).end; + } + return; + } + if (n) { + out = format_decimal(out, n, num_digits).end; + remaining -= num_digits; + } + out = detail::fill_n(out, remaining, '0'); + } +} + +// Format subseconds which are given as a floating point type with an +// appropriate number of digits. We cannot pass the Duration here, as we +// explicitly need to pass the Rep value in the chrono_formatter. +template +void write_floating_seconds(memory_buffer& buf, Duration duration, + int num_fractional_digits = -1) { + using rep = typename Duration::rep; + FMT_ASSERT(std::is_floating_point::value, ""); + + auto val = duration.count(); + + if (num_fractional_digits < 0) { + // For `std::round` with fallback to `round`: + // On some toolchains `std::round` is not available (e.g. GCC 6). + using namespace std; + num_fractional_digits = + count_fractional_digits::value; + if (num_fractional_digits < 6 && static_cast(round(val)) != val) + num_fractional_digits = 6; + } + + fmt::format_to(std::back_inserter(buf), FMT_STRING("{:.{}f}"), + std::fmod(val * static_cast(Duration::period::num) / + static_cast(Duration::period::den), + static_cast(60)), + num_fractional_digits); +} + +template +class tm_writer { private: static constexpr int days_per_week = 7; const std::locale& loc_; const bool is_classic_; OutputIt out_; + const Duration* subsecs_; const std::tm& tm_; auto tm_sec() const noexcept -> int { @@ -1021,8 +1257,7 @@ template class tm_writer { return static_cast(l); } - // Algorithm: - // https://en.wikipedia.org/wiki/ISO_week_date#Calculating_the_week_number_from_a_month_and_day_of_the_month_or_ordinal_date + // Algorithm: https://en.wikipedia.org/wiki/ISO_week_date. auto iso_year_weeks(long long curr_year) const noexcept -> int { const auto prev_year = curr_year - 1; const auto curr_p = @@ -1060,6 +1295,17 @@ template class tm_writer { *out_++ = *d++; *out_++ = *d; } + void write2(int value, pad_type pad) { + unsigned int v = to_unsigned(value) % 100; + if (v >= 10) { + const char* d = digits2(v); + *out_++ = *d++; + *out_++ = *d; + } else { + out_ = detail::write_padding(out_, pad); + *out_++ = static_cast('0' + v); + } + } void write_year_extended(long long year) { // At least 4 characters. @@ -1071,7 +1317,8 @@ template class tm_writer { } uint32_or_64_or_128_t n = to_unsigned(year); const int num_digits = count_digits(n); - if (width > num_digits) out_ = std::fill_n(out_, width - num_digits, '0'); + if (width > num_digits) + out_ = detail::fill_n(out_, width - num_digits, '0'); out_ = format_decimal(out_, n, num_digits).end; } void write_year(long long year) { @@ -1083,7 +1330,7 @@ template class tm_writer { } } - void write_utc_offset(long offset) { + void write_utc_offset(long offset, numeric_system ns) { if (offset < 0) { *out_++ = '-'; offset = -offset; @@ -1092,14 +1339,15 @@ template class tm_writer { } offset /= 60; write2(static_cast(offset / 60)); + if (ns != numeric_system::standard) *out_++ = ':'; write2(static_cast(offset % 60)); } template ::value)> - void format_utc_offset_impl(const T& tm) { - write_utc_offset(tm.tm_gmtoff); + void format_utc_offset_impl(const T& tm, numeric_system ns) { + write_utc_offset(tm.tm_gmtoff, ns); } template ::value)> - void format_utc_offset_impl(const T& tm) { + void format_utc_offset_impl(const T& tm, numeric_system ns) { #if defined(_WIN32) && defined(_UCRT) # if FMT_USE_TZSET tzset_once(); @@ -1111,10 +1359,17 @@ template class tm_writer { _get_dstbias(&dstbias); offset += dstbias; } - write_utc_offset(-offset); + write_utc_offset(-offset, ns); #else - ignore_unused(tm); - format_localized('z'); + if (ns == numeric_system::standard) return format_localized('z'); + + // Extract timezone offset from timezone conversion functions. + std::tm gtm = tm; + std::time_t gt = std::mktime(>m); + std::tm ltm = gmtime(gt); + std::time_t lt = std::mktime(<m); + long offset = gt - lt; + write_utc_offset(offset, ns); #endif } @@ -1135,16 +1390,18 @@ template class tm_writer { } public: - tm_writer(const std::locale& loc, OutputIt out, const std::tm& tm) + tm_writer(const std::locale& loc, OutputIt out, const std::tm& tm, + const Duration* subsecs = nullptr) : loc_(loc), is_classic_(loc_ == get_classic_locale()), out_(out), + subsecs_(subsecs), tm_(tm) {} - OutputIt out() const { return out_; } + auto out() const -> OutputIt { return out_; } FMT_CONSTEXPR void on_text(const Char* begin, const Char* end) { - out_ = copy_str(begin, end, out_); + out_ = copy(begin, end, out_); } void on_abbr_weekday() { @@ -1191,7 +1448,7 @@ template class tm_writer { *out_++ = ' '; on_abbr_month(); *out_++ = ' '; - on_day_of_month_space(numeric_system::standard); + on_day_of_month(numeric_system::standard, pad_type::space); *out_++ = ' '; on_iso_time(); *out_++ = ' '; @@ -1217,7 +1474,7 @@ template class tm_writer { write_digit2_separated(buf, to_unsigned(tm_mon() + 1), to_unsigned(tm_mday()), to_unsigned(split_year_lower(tm_year())), '/'); - out_ = copy_str(std::begin(buf), std::end(buf), out_); + out_ = copy(std::begin(buf), std::end(buf), out_); } void on_iso_date() { auto year = tm_year(); @@ -1233,10 +1490,10 @@ template class tm_writer { write_digit2_separated(buf + 2, static_cast(year % 100), to_unsigned(tm_mon() + 1), to_unsigned(tm_mday()), '-'); - out_ = copy_str(std::begin(buf) + offset, std::end(buf), out_); + out_ = copy(std::begin(buf) + offset, std::end(buf), out_); } - void on_utc_offset() { format_utc_offset_impl(tm_); } + void on_utc_offset(numeric_system ns) { format_utc_offset_impl(tm_, ns); } void on_tz_name() { format_tz_name_impl(tm_); } void on_year(numeric_system ns) { @@ -1278,24 +1535,26 @@ template class tm_writer { format_localized('m', 'O'); } - void on_dec0_week_of_year(numeric_system ns) { + void on_dec0_week_of_year(numeric_system ns, pad_type pad) { if (is_classic_ || ns == numeric_system::standard) - return write2((tm_yday() + days_per_week - tm_wday()) / days_per_week); + return write2((tm_yday() + days_per_week - tm_wday()) / days_per_week, + pad); format_localized('U', 'O'); } - void on_dec1_week_of_year(numeric_system ns) { + void on_dec1_week_of_year(numeric_system ns, pad_type pad) { if (is_classic_ || ns == numeric_system::standard) { auto wday = tm_wday(); write2((tm_yday() + days_per_week - (wday == 0 ? (days_per_week - 1) : (wday - 1))) / - days_per_week); + days_per_week, + pad); } else { format_localized('W', 'O'); } } - void on_iso_week_of_year(numeric_system ns) { + void on_iso_week_of_year(numeric_system ns, pad_type pad) { if (is_classic_ || ns == numeric_system::standard) - return write2(tm_iso_week_of_year()); + return write2(tm_iso_week_of_year(), pad); format_localized('V', 'O'); } @@ -1309,37 +1568,47 @@ template class tm_writer { write1(yday / 100); write2(yday % 100); } - void on_day_of_month(numeric_system ns) { - if (is_classic_ || ns == numeric_system::standard) return write2(tm_mday()); + void on_day_of_month(numeric_system ns, pad_type pad) { + if (is_classic_ || ns == numeric_system::standard) + return write2(tm_mday(), pad); format_localized('d', 'O'); } - void on_day_of_month_space(numeric_system ns) { - if (is_classic_ || ns == numeric_system::standard) { - auto mday = to_unsigned(tm_mday()) % 100; - const char* d2 = digits2(mday); - *out_++ = mday < 10 ? ' ' : d2[0]; - *out_++ = d2[1]; - } else { - format_localized('e', 'O'); - } - } - void on_24_hour(numeric_system ns) { - if (is_classic_ || ns == numeric_system::standard) return write2(tm_hour()); + void on_24_hour(numeric_system ns, pad_type pad) { + if (is_classic_ || ns == numeric_system::standard) + return write2(tm_hour(), pad); format_localized('H', 'O'); } - void on_12_hour(numeric_system ns) { + void on_12_hour(numeric_system ns, pad_type pad) { if (is_classic_ || ns == numeric_system::standard) - return write2(tm_hour12()); + return write2(tm_hour12(), pad); format_localized('I', 'O'); } - void on_minute(numeric_system ns) { - if (is_classic_ || ns == numeric_system::standard) return write2(tm_min()); + void on_minute(numeric_system ns, pad_type pad) { + if (is_classic_ || ns == numeric_system::standard) + return write2(tm_min(), pad); format_localized('M', 'O'); } - void on_second(numeric_system ns) { - if (is_classic_ || ns == numeric_system::standard) return write2(tm_sec()); - format_localized('S', 'O'); + + void on_second(numeric_system ns, pad_type pad) { + if (is_classic_ || ns == numeric_system::standard) { + write2(tm_sec(), pad); + if (subsecs_) { + if (std::is_floating_point::value) { + auto buf = memory_buffer(); + write_floating_seconds(buf, *subsecs_); + if (buf.size() > 1) { + // Remove the leading "0", write something like ".123". + out_ = std::copy(buf.begin() + 1, buf.end(), out_); + } + } else { + write_fractional_seconds(out_, *subsecs_); + } + } + } else { + // Currently no formatting of subseconds when a locale is set. + format_localized('S', 'O'); + } } void on_12_hour_time() { @@ -1347,7 +1616,7 @@ template class tm_writer { char buf[8]; write_digit2_separated(buf, to_unsigned(tm_hour12()), to_unsigned(tm_min()), to_unsigned(tm_sec()), ':'); - out_ = copy_str(std::begin(buf), std::end(buf), out_); + out_ = copy(std::begin(buf), std::end(buf), out_); *out_++ = ' '; on_am_pm(); } else { @@ -1360,10 +1629,9 @@ template class tm_writer { write2(tm_min()); } void on_iso_time() { - char buf[8]; - write_digit2_separated(buf, to_unsigned(tm_hour()), to_unsigned(tm_min()), - to_unsigned(tm_sec()), ':'); - out_ = copy_str(std::begin(buf), std::end(buf), out_); + on_24_hour_time(); + *out_++ = ':'; + on_second(numeric_system::standard, pad_type::zero); } void on_am_pm() { @@ -1381,49 +1649,41 @@ template class tm_writer { }; struct chrono_format_checker : null_chrono_spec_handler { + bool has_precision_integral = false; + FMT_NORETURN void unsupported() { FMT_THROW(format_error("no date")); } template FMT_CONSTEXPR void on_text(const Char*, const Char*) {} - FMT_CONSTEXPR void on_24_hour(numeric_system) {} - FMT_CONSTEXPR void on_12_hour(numeric_system) {} - FMT_CONSTEXPR void on_minute(numeric_system) {} - FMT_CONSTEXPR void on_second(numeric_system) {} + FMT_CONSTEXPR void on_day_of_year() {} + FMT_CONSTEXPR void on_24_hour(numeric_system, pad_type) {} + FMT_CONSTEXPR void on_12_hour(numeric_system, pad_type) {} + FMT_CONSTEXPR void on_minute(numeric_system, pad_type) {} + FMT_CONSTEXPR void on_second(numeric_system, pad_type) {} FMT_CONSTEXPR void on_12_hour_time() {} FMT_CONSTEXPR void on_24_hour_time() {} FMT_CONSTEXPR void on_iso_time() {} FMT_CONSTEXPR void on_am_pm() {} - FMT_CONSTEXPR void on_duration_value() {} + FMT_CONSTEXPR void on_duration_value() const { + if (has_precision_integral) { + FMT_THROW(format_error("precision not allowed for this argument type")); + } + } FMT_CONSTEXPR void on_duration_unit() {} }; -template ::value)> -inline bool isfinite(T) { +template ::value&& has_isfinite::value)> +inline auto isfinite(T) -> bool { return true; } -// Converts value to Int and checks that it's in the range [0, upper). -template ::value)> -inline Int to_nonnegative_int(T value, Int upper) { - FMT_ASSERT(std::is_unsigned::value || - (value >= 0 && to_unsigned(value) <= to_unsigned(upper)), - "invalid value"); - (void)upper; - return static_cast(value); -} -template ::value)> -inline Int to_nonnegative_int(T value, Int upper) { - if (value < 0 || value > static_cast(upper)) - FMT_THROW(format_error("invalid value")); - return static_cast(value); -} - template ::value)> -inline T mod(T x, int y) { +inline auto mod(T x, int y) -> T { return x % static_cast(y); } template ::value)> -inline T mod(T x, int y) { +inline auto mod(T x, int y) -> T { return std::fmod(x, static_cast(y)); } @@ -1438,104 +1698,52 @@ template struct make_unsigned_or_unchanged { using type = typename std::make_unsigned::type; }; -#if FMT_SAFE_DURATION_CAST -// throwing version of safe_duration_cast -template -To fmt_safe_duration_cast(std::chrono::duration from) { - int ec; - To to = safe_duration_cast::safe_duration_cast(from, ec); - if (ec) FMT_THROW(format_error("cannot format duration")); - return to; -} -#endif - template ::value)> -inline std::chrono::duration get_milliseconds( - std::chrono::duration d) { +inline auto get_milliseconds(std::chrono::duration d) + -> std::chrono::duration { // this may overflow and/or the result may not fit in the // target type. #if FMT_SAFE_DURATION_CAST using CommonSecondsType = typename std::common_type::type; - const auto d_as_common = fmt_safe_duration_cast(d); + const auto d_as_common = fmt_duration_cast(d); const auto d_as_whole_seconds = - fmt_safe_duration_cast(d_as_common); + fmt_duration_cast(d_as_common); // this conversion should be nonproblematic const auto diff = d_as_common - d_as_whole_seconds; const auto ms = - fmt_safe_duration_cast>(diff); + fmt_duration_cast>(diff); return ms; #else - auto s = std::chrono::duration_cast(d); - return std::chrono::duration_cast(d - s); + auto s = fmt_duration_cast(d); + return fmt_duration_cast(d - s); #endif } -// Counts the number of fractional digits in the range [0, 18] according to the -// C++20 spec. If more than 18 fractional digits are required then returns 6 for -// microseconds precision. -template () / 10)> -struct count_fractional_digits { - static constexpr int value = - Num % Den == 0 ? N : count_fractional_digits::value; -}; - -// Base case that doesn't instantiate any more templates -// in order to avoid overflow. -template -struct count_fractional_digits { - static constexpr int value = (Num % Den == 0) ? N : 6; -}; - -constexpr long long pow10(std::uint32_t n) { - return n == 0 ? 1 : 10 * pow10(n - 1); -} - -template ::is_signed)> -constexpr std::chrono::duration abs( - std::chrono::duration d) { - // We need to compare the duration using the count() method directly - // due to a compiler bug in clang-11 regarding the spaceship operator, - // when -Wzero-as-null-pointer-constant is enabled. - // In clang-12 the bug has been fixed. See - // https://bugs.llvm.org/show_bug.cgi?id=46235 and the reproducible example: - // https://www.godbolt.org/z/Knbb5joYx. - return d.count() >= d.zero().count() ? d : -d; -} - -template ::is_signed)> -constexpr std::chrono::duration abs( - std::chrono::duration d) { - return d; -} - template ::value)> -OutputIt format_duration_value(OutputIt out, Rep val, int) { +auto format_duration_value(OutputIt out, Rep val, int) -> OutputIt { return write(out, val); } template ::value)> -OutputIt format_duration_value(OutputIt out, Rep val, int precision) { - auto specs = basic_format_specs(); +auto format_duration_value(OutputIt out, Rep val, int precision) -> OutputIt { + auto specs = format_specs(); specs.precision = precision; - specs.type = precision >= 0 ? presentation_type::fixed_lower - : presentation_type::general_lower; + specs.type = + precision >= 0 ? presentation_type::fixed : presentation_type::general; return write(out, val, specs); } template -OutputIt copy_unit(string_view unit, OutputIt out, Char) { +auto copy_unit(string_view unit, OutputIt out, Char) -> OutputIt { return std::copy(unit.begin(), unit.end(), out); } template -OutputIt copy_unit(string_view unit, OutputIt out, wchar_t) { +auto copy_unit(string_view unit, OutputIt out, wchar_t) -> OutputIt { // This works when wchar_t is UTF-32 because units only contain characters // that have the same representation in UTF-16 and UTF-32. utf8_to_utf16 u(unit); @@ -1543,7 +1751,7 @@ OutputIt copy_unit(string_view unit, OutputIt out, wchar_t) { } template -OutputIt format_duration_unit(OutputIt out) { +auto format_duration_unit(OutputIt out) -> OutputIt { if (const char* unit = get_units()) return copy_unit(string_view(unit), out, Char()); *out++ = '['; @@ -1566,8 +1774,10 @@ class get_locale { public: get_locale(bool localized, locale_ref loc) : has_locale_(localized) { +#ifndef FMT_STATIC_THOUSANDS_SEPARATOR if (localized) ::new (&locale_) std::locale(loc.template get()); +#endif } ~get_locale() { if (has_locale_) locale_.~locale(); @@ -1610,18 +1820,12 @@ struct chrono_formatter { // this may overflow and/or the result may not fit in the // target type. -#if FMT_SAFE_DURATION_CAST // might need checked conversion (rep!=Rep) - auto tmpval = std::chrono::duration(val); - s = fmt_safe_duration_cast(tmpval); -#else - s = std::chrono::duration_cast( - std::chrono::duration(val)); -#endif + s = fmt_duration_cast(std::chrono::duration(val)); } // returns true if nan or inf, writes to out. - bool handle_nan_inf() { + auto handle_nan_inf() -> bool { if (isfinite(val)) { return false; } @@ -1638,17 +1842,22 @@ struct chrono_formatter { return true; } - Rep hour() const { return static_cast(mod((s.count() / 3600), 24)); } + auto days() const -> Rep { return static_cast(s.count() / 86400); } + auto hour() const -> Rep { + return static_cast(mod((s.count() / 3600), 24)); + } - Rep hour12() const { + auto hour12() const -> Rep { Rep hour = static_cast(mod((s.count() / 3600), 12)); return hour <= 0 ? 12 : hour; } - Rep minute() const { return static_cast(mod((s.count() / 60), 60)); } - Rep second() const { return static_cast(mod(s.count(), 60)); } + auto minute() const -> Rep { + return static_cast(mod((s.count() / 60), 60)); + } + auto second() const -> Rep { return static_cast(mod(s.count(), 60)); } - std::tm time() const { + auto time() const -> std::tm { auto time = std::tm(); time.tm_hour = to_nonnegative_int(hour(), 24); time.tm_min = to_nonnegative_int(minute(), 60); @@ -1663,44 +1872,16 @@ struct chrono_formatter { } } - void write(Rep value, int width) { + void write(Rep value, int width, pad_type pad = pad_type::zero) { write_sign(); if (isnan(value)) return write_nan(); uint32_or_64_or_128_t n = to_unsigned(to_nonnegative_int(value, max_value())); int num_digits = detail::count_digits(n); - if (width > num_digits) out = std::fill_n(out, width - num_digits, '0'); - out = format_decimal(out, n, num_digits).end; - } - - template void write_fractional_seconds(Duration d) { - FMT_ASSERT(!std::is_floating_point::value, ""); - constexpr auto num_fractional_digits = - count_fractional_digits::value; - - using subsecond_precision = std::chrono::duration< - typename std::common_type::type, - std::ratio<1, detail::pow10(num_fractional_digits)>>; - if (std::ratio_less::value) { - *out++ = '.'; - auto fractional = - detail::abs(d) - std::chrono::duration_cast(d); - auto subseconds = - std::chrono::treat_as_floating_point< - typename subsecond_precision::rep>::value - ? fractional.count() - : std::chrono::duration_cast(fractional) - .count(); - uint32_or_64_or_128_t n = - to_unsigned(to_nonnegative_int(subseconds, max_value())); - int num_digits = detail::count_digits(n); - if (num_fractional_digits > num_digits) - out = std::fill_n(out, num_fractional_digits - num_digits, '0'); - out = format_decimal(out, n, num_digits).end; + if (width > num_digits) { + out = detail::write_padding(out, pad, width - num_digits); } + out = format_decimal(out, n, num_digits).end; } void write_nan() { std::copy_n("nan", 3, out); } @@ -1732,7 +1913,7 @@ struct chrono_formatter { void on_loc_time(numeric_system) {} void on_us_date() {} void on_iso_date() {} - void on_utc_offset() {} + void on_utc_offset(numeric_system) {} void on_tz_name() {} void on_year(numeric_system) {} void on_short_year(numeric_system) {} @@ -1741,65 +1922,66 @@ struct chrono_formatter { void on_iso_week_based_year() {} void on_iso_week_based_short_year() {} void on_dec_month(numeric_system) {} - void on_dec0_week_of_year(numeric_system) {} - void on_dec1_week_of_year(numeric_system) {} - void on_iso_week_of_year(numeric_system) {} - void on_day_of_year() {} - void on_day_of_month(numeric_system) {} - void on_day_of_month_space(numeric_system) {} - - void on_24_hour(numeric_system ns) { + void on_dec0_week_of_year(numeric_system, pad_type) {} + void on_dec1_week_of_year(numeric_system, pad_type) {} + void on_iso_week_of_year(numeric_system, pad_type) {} + void on_day_of_month(numeric_system, pad_type) {} + + void on_day_of_year() { if (handle_nan_inf()) return; + write(days(), 0); + } - if (ns == numeric_system::standard) return write(hour(), 2); + void on_24_hour(numeric_system ns, pad_type pad) { + if (handle_nan_inf()) return; + + if (ns == numeric_system::standard) return write(hour(), 2, pad); auto time = tm(); time.tm_hour = to_nonnegative_int(hour(), 24); - format_tm(time, &tm_writer_type::on_24_hour, ns); + format_tm(time, &tm_writer_type::on_24_hour, ns, pad); } - void on_12_hour(numeric_system ns) { + void on_12_hour(numeric_system ns, pad_type pad) { if (handle_nan_inf()) return; - if (ns == numeric_system::standard) return write(hour12(), 2); + if (ns == numeric_system::standard) return write(hour12(), 2, pad); auto time = tm(); time.tm_hour = to_nonnegative_int(hour12(), 12); - format_tm(time, &tm_writer_type::on_12_hour, ns); + format_tm(time, &tm_writer_type::on_12_hour, ns, pad); } - void on_minute(numeric_system ns) { + void on_minute(numeric_system ns, pad_type pad) { if (handle_nan_inf()) return; - if (ns == numeric_system::standard) return write(minute(), 2); + if (ns == numeric_system::standard) return write(minute(), 2, pad); auto time = tm(); time.tm_min = to_nonnegative_int(minute(), 60); - format_tm(time, &tm_writer_type::on_minute, ns); + format_tm(time, &tm_writer_type::on_minute, ns, pad); } - void on_second(numeric_system ns) { + void on_second(numeric_system ns, pad_type pad) { if (handle_nan_inf()) return; if (ns == numeric_system::standard) { if (std::is_floating_point::value) { - constexpr auto num_fractional_digits = - count_fractional_digits::value; auto buf = memory_buffer(); - format_to(std::back_inserter(buf), runtime("{:.{}f}"), - std::fmod(val * static_cast(Period::num) / - static_cast(Period::den), - static_cast(60)), - num_fractional_digits); + write_floating_seconds(buf, std::chrono::duration(val), + precision); if (negative) *out++ = '-'; - if (buf.size() < 2 || buf[1] == '.') *out++ = '0'; + if (buf.size() < 2 || buf[1] == '.') { + out = detail::write_padding(out, pad); + } out = std::copy(buf.begin(), buf.end(), out); } else { - write(second(), 2); - write_fractional_seconds(std::chrono::duration(val)); + write(second(), 2, pad); + write_fractional_seconds( + out, std::chrono::duration(val), precision); } return; } auto time = tm(); time.tm_sec = to_nonnegative_int(second(), 60); - format_tm(time, &tm_writer_type::on_second, ns); + format_tm(time, &tm_writer_type::on_second, ns, pad); } void on_12_hour_time() { @@ -1823,7 +2005,7 @@ struct chrono_formatter { on_24_hour_time(); *out++ = ':'; if (handle_nan_inf()) return; - on_second(numeric_system::standard); + on_second(numeric_system::standard, pad_type::zero); } void on_am_pm() { @@ -1842,168 +2024,279 @@ struct chrono_formatter { } }; -FMT_END_DETAIL_NAMESPACE +} // namespace detail #if defined(__cpp_lib_chrono) && __cpp_lib_chrono >= 201907 using weekday = std::chrono::weekday; +using day = std::chrono::day; +using month = std::chrono::month; +using year = std::chrono::year; +using year_month_day = std::chrono::year_month_day; #else // A fallback version of weekday. class weekday { private: - unsigned char value; + unsigned char value_; public: weekday() = default; - explicit constexpr weekday(unsigned wd) noexcept - : value(static_cast(wd != 7 ? wd : 0)) {} - constexpr unsigned c_encoding() const noexcept { return value; } + constexpr explicit weekday(unsigned wd) noexcept + : value_(static_cast(wd != 7 ? wd : 0)) {} + constexpr auto c_encoding() const noexcept -> unsigned { return value_; } }; -class year_month_day {}; +class day { + private: + unsigned char value_; + + public: + day() = default; + constexpr explicit day(unsigned d) noexcept + : value_(static_cast(d)) {} + constexpr explicit operator unsigned() const noexcept { return value_; } +}; + +class month { + private: + unsigned char value_; + + public: + month() = default; + constexpr explicit month(unsigned m) noexcept + : value_(static_cast(m)) {} + constexpr explicit operator unsigned() const noexcept { return value_; } +}; + +class year { + private: + int value_; + + public: + year() = default; + constexpr explicit year(int y) noexcept : value_(y) {} + constexpr explicit operator int() const noexcept { return value_; } +}; + +class year_month_day { + private: + fmt::year year_; + fmt::month month_; + fmt::day day_; + + public: + year_month_day() = default; + constexpr year_month_day(const year& y, const month& m, const day& d) noexcept + : year_(y), month_(m), day_(d) {} + constexpr auto year() const noexcept -> fmt::year { return year_; } + constexpr auto month() const noexcept -> fmt::month { return month_; } + constexpr auto day() const noexcept -> fmt::day { return day_; } +}; #endif -// A rudimentary weekday formatter. -template struct formatter { +template +struct formatter : private formatter { private: - bool localized = false; + bool localized_ = false; + bool use_tm_formatter_ = false; public: FMT_CONSTEXPR auto parse(basic_format_parse_context& ctx) -> decltype(ctx.begin()) { - auto begin = ctx.begin(), end = ctx.end(); - if (begin != end && *begin == 'L') { - ++begin; - localized = true; + auto it = ctx.begin(), end = ctx.end(); + if (it != end && *it == 'L') { + ++it; + localized_ = true; + return it; } - return begin; + use_tm_formatter_ = it != end && *it != '}'; + return use_tm_formatter_ ? formatter::parse(ctx) : it; } template auto format(weekday wd, FormatContext& ctx) const -> decltype(ctx.out()) { auto time = std::tm(); time.tm_wday = static_cast(wd.c_encoding()); - detail::get_locale loc(localized, ctx.locale()); + if (use_tm_formatter_) return formatter::format(time, ctx); + detail::get_locale loc(localized_, ctx.locale()); auto w = detail::tm_writer(loc, ctx.out(), time); w.on_abbr_weekday(); return w.out(); } }; -template -struct formatter, Char> { +template +struct formatter : private formatter { private: - basic_format_specs specs; - int precision = -1; - using arg_ref_type = detail::arg_ref; - arg_ref_type width_ref; - arg_ref_type precision_ref; - bool localized = false; - basic_string_view format_str; - using duration = std::chrono::duration; + bool use_tm_formatter_ = false; - struct spec_handler { - formatter& f; - basic_format_parse_context& context; - basic_string_view format_str; + public: + FMT_CONSTEXPR auto parse(basic_format_parse_context& ctx) + -> decltype(ctx.begin()) { + auto it = ctx.begin(), end = ctx.end(); + use_tm_formatter_ = it != end && *it != '}'; + return use_tm_formatter_ ? formatter::parse(ctx) : it; + } - template FMT_CONSTEXPR arg_ref_type make_arg_ref(Id arg_id) { - context.check_arg_id(arg_id); - return arg_ref_type(arg_id); - } + template + auto format(day d, FormatContext& ctx) const -> decltype(ctx.out()) { + auto time = std::tm(); + time.tm_mday = static_cast(static_cast(d)); + if (use_tm_formatter_) return formatter::format(time, ctx); + detail::get_locale loc(false, ctx.locale()); + auto w = detail::tm_writer(loc, ctx.out(), time); + w.on_day_of_month(detail::numeric_system::standard, detail::pad_type::zero); + return w.out(); + } +}; - FMT_CONSTEXPR arg_ref_type make_arg_ref(basic_string_view arg_id) { - context.check_arg_id(arg_id); - return arg_ref_type(arg_id); - } +template +struct formatter : private formatter { + private: + bool localized_ = false; + bool use_tm_formatter_ = false; - FMT_CONSTEXPR arg_ref_type make_arg_ref(detail::auto_id) { - return arg_ref_type(context.next_arg_id()); + public: + FMT_CONSTEXPR auto parse(basic_format_parse_context& ctx) + -> decltype(ctx.begin()) { + auto it = ctx.begin(), end = ctx.end(); + if (it != end && *it == 'L') { + ++it; + localized_ = true; + return it; } + use_tm_formatter_ = it != end && *it != '}'; + return use_tm_formatter_ ? formatter::parse(ctx) : it; + } - void on_error(const char* msg) { FMT_THROW(format_error(msg)); } - FMT_CONSTEXPR void on_fill(basic_string_view fill) { - f.specs.fill = fill; - } - FMT_CONSTEXPR void on_align(align_t align) { f.specs.align = align; } - FMT_CONSTEXPR void on_width(int width) { f.specs.width = width; } - FMT_CONSTEXPR void on_precision(int _precision) { - f.precision = _precision; - } - FMT_CONSTEXPR void end_precision() {} + template + auto format(month m, FormatContext& ctx) const -> decltype(ctx.out()) { + auto time = std::tm(); + time.tm_mon = static_cast(static_cast(m)) - 1; + if (use_tm_formatter_) return formatter::format(time, ctx); + detail::get_locale loc(localized_, ctx.locale()); + auto w = detail::tm_writer(loc, ctx.out(), time); + w.on_abbr_month(); + return w.out(); + } +}; - template FMT_CONSTEXPR void on_dynamic_width(Id arg_id) { - f.width_ref = make_arg_ref(arg_id); - } +template +struct formatter : private formatter { + private: + bool use_tm_formatter_ = false; - template FMT_CONSTEXPR void on_dynamic_precision(Id arg_id) { - f.precision_ref = make_arg_ref(arg_id); - } - }; + public: + FMT_CONSTEXPR auto parse(basic_format_parse_context& ctx) + -> decltype(ctx.begin()) { + auto it = ctx.begin(), end = ctx.end(); + use_tm_formatter_ = it != end && *it != '}'; + return use_tm_formatter_ ? formatter::parse(ctx) : it; + } - using iterator = typename basic_format_parse_context::iterator; - struct parse_range { - iterator begin; - iterator end; - }; + template + auto format(year y, FormatContext& ctx) const -> decltype(ctx.out()) { + auto time = std::tm(); + time.tm_year = static_cast(y) - 1900; + if (use_tm_formatter_) return formatter::format(time, ctx); + detail::get_locale loc(false, ctx.locale()); + auto w = detail::tm_writer(loc, ctx.out(), time); + w.on_year(detail::numeric_system::standard); + return w.out(); + } +}; - FMT_CONSTEXPR parse_range do_parse(basic_format_parse_context& ctx) { - auto begin = ctx.begin(), end = ctx.end(); - if (begin == end || *begin == '}') return {begin, begin}; - spec_handler handler{*this, ctx, format_str}; - begin = detail::parse_align(begin, end, handler); - if (begin == end) return {begin, begin}; - begin = detail::parse_width(begin, end, handler); - if (begin == end) return {begin, begin}; - if (*begin == '.') { - if (std::is_floating_point::value) - begin = detail::parse_precision(begin, end, handler); - else - handler.on_error("precision not allowed for this argument type"); - } - if (begin != end && *begin == 'L') { - ++begin; - localized = true; - } - end = detail::parse_chrono_format(begin, end, - detail::chrono_format_checker()); - return {begin, end}; +template +struct formatter : private formatter { + private: + bool use_tm_formatter_ = false; + + public: + FMT_CONSTEXPR auto parse(basic_format_parse_context& ctx) + -> decltype(ctx.begin()) { + auto it = ctx.begin(), end = ctx.end(); + use_tm_formatter_ = it != end && *it != '}'; + return use_tm_formatter_ ? formatter::parse(ctx) : it; } + template + auto format(year_month_day val, FormatContext& ctx) const + -> decltype(ctx.out()) { + auto time = std::tm(); + time.tm_year = static_cast(val.year()) - 1900; + time.tm_mon = static_cast(static_cast(val.month())) - 1; + time.tm_mday = static_cast(static_cast(val.day())); + if (use_tm_formatter_) return formatter::format(time, ctx); + detail::get_locale loc(true, ctx.locale()); + auto w = detail::tm_writer(loc, ctx.out(), time); + w.on_iso_date(); + return w.out(); + } +}; + +template +struct formatter, Char> { + private: + format_specs specs_; + detail::arg_ref width_ref_; + detail::arg_ref precision_ref_; + bool localized_ = false; + basic_string_view format_str_; + public: FMT_CONSTEXPR auto parse(basic_format_parse_context& ctx) -> decltype(ctx.begin()) { - auto range = do_parse(ctx); - format_str = basic_string_view( - &*range.begin, detail::to_unsigned(range.end - range.begin)); - return range.end; + auto it = ctx.begin(), end = ctx.end(); + if (it == end || *it == '}') return it; + + it = detail::parse_align(it, end, specs_); + if (it == end) return it; + + it = detail::parse_dynamic_spec(it, end, specs_.width, width_ref_, ctx); + if (it == end) return it; + + auto checker = detail::chrono_format_checker(); + if (*it == '.') { + checker.has_precision_integral = !std::is_floating_point::value; + it = detail::parse_precision(it, end, specs_.precision, precision_ref_, + ctx); + } + if (it != end && *it == 'L') { + localized_ = true; + ++it; + } + end = detail::parse_chrono_format(it, end, checker); + format_str_ = {it, detail::to_unsigned(end - it)}; + return end; } template - auto format(const duration& d, FormatContext& ctx) const + auto format(std::chrono::duration d, FormatContext& ctx) const -> decltype(ctx.out()) { - auto specs_copy = specs; - auto precision_copy = precision; - auto begin = format_str.begin(), end = format_str.end(); + auto specs = specs_; + auto precision = specs.precision; + specs.precision = -1; + auto begin = format_str_.begin(), end = format_str_.end(); // As a possible future optimization, we could avoid extra copying if width // is not specified. - basic_memory_buffer buf; + auto buf = basic_memory_buffer(); auto out = std::back_inserter(buf); - detail::handle_dynamic_spec(specs_copy.width, - width_ref, ctx); - detail::handle_dynamic_spec(precision_copy, - precision_ref, ctx); + detail::handle_dynamic_spec(specs.width, width_ref_, + ctx); + detail::handle_dynamic_spec(precision, + precision_ref_, ctx); if (begin == end || *begin == '}') { - out = detail::format_duration_value(out, d.count(), precision_copy); + out = detail::format_duration_value(out, d.count(), precision); detail::format_duration_unit(out); } else { - detail::chrono_formatter f( - ctx, out, d); - f.precision = precision_copy; - f.localized = localized; + using chrono_formatter = + detail::chrono_formatter; + auto f = chrono_formatter(ctx, out, d); + f.precision = precision; + f.localized = localized_; detail::parse_chrono_format(begin, end, f); } return detail::write( - ctx.out(), basic_string_view(buf.data(), buf.size()), specs_copy); + ctx.out(), basic_string_view(buf.data(), buf.size()), specs); } }; @@ -2011,87 +2304,129 @@ template struct formatter, Char> : formatter { FMT_CONSTEXPR formatter() { - basic_string_view default_specs = - detail::string_literal{}; - this->do_parse(default_specs.begin(), default_specs.end()); + this->format_str_ = detail::string_literal{}; } template - auto format(std::chrono::time_point val, + auto format(std::chrono::time_point val, FormatContext& ctx) const -> decltype(ctx.out()) { - return formatter::format(localtime(val), ctx); + std::tm tm = gmtime(val); + using period = typename Duration::period; + if (detail::const_check( + period::num == 1 && period::den == 1 && + !std::is_floating_point::value)) { + return formatter::format(tm, ctx); + } + Duration epoch = val.time_since_epoch(); + Duration subsecs = detail::fmt_duration_cast( + epoch - detail::fmt_duration_cast(epoch)); + if (subsecs.count() < 0) { + auto second = + detail::fmt_duration_cast(std::chrono::seconds(1)); + if (tm.tm_sec != 0) + --tm.tm_sec; + else + tm = gmtime(val - second); + subsecs += detail::fmt_duration_cast(std::chrono::seconds(1)); + } + return formatter::do_format(tm, ctx, &subsecs); } }; -#if FMT_USE_UTC_TIME +#if FMT_USE_LOCAL_TIME template -struct formatter, - Char> : formatter { +struct formatter, Char> + : formatter { FMT_CONSTEXPR formatter() { - basic_string_view default_specs = - detail::string_literal{}; - this->do_parse(default_specs.begin(), default_specs.end()); + this->format_str_ = detail::string_literal{}; } template - auto format(std::chrono::time_point val, + auto format(std::chrono::local_time val, FormatContext& ctx) const + -> decltype(ctx.out()) { + using period = typename Duration::period; + if (period::num != 1 || period::den != 1 || + std::is_floating_point::value) { + const auto epoch = val.time_since_epoch(); + const auto subsecs = detail::fmt_duration_cast( + epoch - detail::fmt_duration_cast(epoch)); + + return formatter::do_format(localtime(val), ctx, &subsecs); + } + + return formatter::format(localtime(val), ctx); + } +}; +#endif + +#if FMT_USE_UTC_TIME +template +struct formatter, + Char> + : formatter, + Char> { + template + auto format(std::chrono::time_point val, FormatContext& ctx) const -> decltype(ctx.out()) { - return formatter::format( - localtime(std::chrono::utc_clock::to_sys(val)), ctx); + return formatter< + std::chrono::time_point, + Char>::format(std::chrono::utc_clock::to_sys(val), ctx); } }; #endif template struct formatter { private: - enum class spec { - unknown, - year_month_day, - hh_mm_ss, - }; - spec spec_ = spec::unknown; - basic_string_view specs; + format_specs specs_; + detail::arg_ref width_ref_; protected: - template FMT_CONSTEXPR auto do_parse(It begin, It end) -> It { - if (begin != end && *begin == ':') ++begin; - end = detail::parse_chrono_format(begin, end, detail::tm_format_checker()); - // Replace default spec only if the new spec is not empty. - if (end != begin) specs = {begin, detail::to_unsigned(end - begin)}; - return end; + basic_string_view format_str_; + + template + auto do_format(const std::tm& tm, FormatContext& ctx, + const Duration* subsecs) const -> decltype(ctx.out()) { + auto specs = specs_; + auto buf = basic_memory_buffer(); + auto out = std::back_inserter(buf); + detail::handle_dynamic_spec(specs.width, width_ref_, + ctx); + + auto loc_ref = ctx.locale(); + detail::get_locale loc(static_cast(loc_ref), loc_ref); + auto w = + detail::tm_writer(loc, out, tm, subsecs); + detail::parse_chrono_format(format_str_.begin(), format_str_.end(), w); + return detail::write( + ctx.out(), basic_string_view(buf.data(), buf.size()), specs); } public: FMT_CONSTEXPR auto parse(basic_format_parse_context& ctx) -> decltype(ctx.begin()) { - auto end = this->do_parse(ctx.begin(), ctx.end()); - // basic_string_view<>::compare isn't constexpr before C++17. - if (specs.size() == 2 && specs[0] == Char('%')) { - if (specs[1] == Char('F')) - spec_ = spec::year_month_day; - else if (specs[1] == Char('T')) - spec_ = spec::hh_mm_ss; - } + auto it = ctx.begin(), end = ctx.end(); + if (it == end || *it == '}') return it; + + it = detail::parse_align(it, end, specs_); + if (it == end) return it; + + it = detail::parse_dynamic_spec(it, end, specs_.width, width_ref_, ctx); + if (it == end) return it; + + end = detail::parse_chrono_format(it, end, detail::tm_format_checker()); + // Replace the default format_str only if the new spec is not empty. + if (end != it) format_str_ = {it, detail::to_unsigned(end - it)}; return end; } template auto format(const std::tm& tm, FormatContext& ctx) const -> decltype(ctx.out()) { - const auto loc_ref = ctx.locale(); - detail::get_locale loc(static_cast(loc_ref), loc_ref); - auto w = detail::tm_writer(loc, ctx.out(), tm); - if (spec_ == spec::year_month_day) - w.on_iso_date(); - else if (spec_ == spec::hh_mm_ss) - w.on_iso_time(); - else - detail::parse_chrono_format(specs.begin(), specs.end(), w); - return w.out(); + return do_format(tm, ctx, nullptr); } }; -FMT_MODULE_EXPORT_END +FMT_END_EXPORT FMT_END_NAMESPACE #endif // FMT_CHRONO_H_ diff --git a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/color.h b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/color.h index e9b880ad431c..f0e9dd94ef3a 100644 --- a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/color.h +++ b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/color.h @@ -11,7 +11,7 @@ #include "format.h" FMT_BEGIN_NAMESPACE -FMT_MODULE_EXPORT_BEGIN +FMT_BEGIN_EXPORT enum class color : uint32_t { alice_blue = 0xF0F8FF, // rgb(240,248,255) @@ -203,7 +203,7 @@ struct rgb { uint8_t b; }; -FMT_BEGIN_DETAIL_NAMESPACE +namespace detail { // color is a struct of either a rgb color or a terminal color. struct color_type { @@ -225,22 +225,21 @@ struct color_type { uint32_t rgb_color; } value; }; +} // namespace detail -FMT_END_DETAIL_NAMESPACE - -/** A text style consisting of foreground and background colors and emphasis. */ +/// A text style consisting of foreground and background colors and emphasis. class text_style { public: FMT_CONSTEXPR text_style(emphasis em = emphasis()) noexcept : set_foreground_color(), set_background_color(), ems(em) {} - FMT_CONSTEXPR text_style& operator|=(const text_style& rhs) { + FMT_CONSTEXPR auto operator|=(const text_style& rhs) -> text_style& { if (!set_foreground_color) { set_foreground_color = rhs.set_foreground_color; foreground_color = rhs.foreground_color; } else if (rhs.set_foreground_color) { if (!foreground_color.is_rgb || !rhs.foreground_color.is_rgb) - FMT_THROW(format_error("can't OR a terminal color")); + report_error("can't OR a terminal color"); foreground_color.value.rgb_color |= rhs.foreground_color.value.rgb_color; } @@ -249,7 +248,7 @@ class text_style { background_color = rhs.background_color; } else if (rhs.set_background_color) { if (!background_color.is_rgb || !rhs.background_color.is_rgb) - FMT_THROW(format_error("can't OR a terminal color")); + report_error("can't OR a terminal color"); background_color.value.rgb_color |= rhs.background_color.value.rgb_color; } @@ -258,29 +257,29 @@ class text_style { return *this; } - friend FMT_CONSTEXPR text_style operator|(text_style lhs, - const text_style& rhs) { + friend FMT_CONSTEXPR auto operator|(text_style lhs, const text_style& rhs) + -> text_style { return lhs |= rhs; } - FMT_CONSTEXPR bool has_foreground() const noexcept { + FMT_CONSTEXPR auto has_foreground() const noexcept -> bool { return set_foreground_color; } - FMT_CONSTEXPR bool has_background() const noexcept { + FMT_CONSTEXPR auto has_background() const noexcept -> bool { return set_background_color; } - FMT_CONSTEXPR bool has_emphasis() const noexcept { + FMT_CONSTEXPR auto has_emphasis() const noexcept -> bool { return static_cast(ems) != 0; } - FMT_CONSTEXPR detail::color_type get_foreground() const noexcept { + FMT_CONSTEXPR auto get_foreground() const noexcept -> detail::color_type { FMT_ASSERT(has_foreground(), "no foreground specified for this style"); return foreground_color; } - FMT_CONSTEXPR detail::color_type get_background() const noexcept { + FMT_CONSTEXPR auto get_background() const noexcept -> detail::color_type { FMT_ASSERT(has_background(), "no background specified for this style"); return background_color; } - FMT_CONSTEXPR emphasis get_emphasis() const noexcept { + FMT_CONSTEXPR auto get_emphasis() const noexcept -> emphasis { FMT_ASSERT(has_emphasis(), "no emphasis specified for this style"); return ems; } @@ -298,9 +297,11 @@ class text_style { } } - friend FMT_CONSTEXPR text_style fg(detail::color_type foreground) noexcept; + friend FMT_CONSTEXPR auto fg(detail::color_type foreground) noexcept + -> text_style; - friend FMT_CONSTEXPR text_style bg(detail::color_type background) noexcept; + friend FMT_CONSTEXPR auto bg(detail::color_type background) noexcept + -> text_style; detail::color_type foreground_color; detail::color_type background_color; @@ -309,21 +310,24 @@ class text_style { emphasis ems; }; -/** Creates a text style from the foreground (text) color. */ -FMT_CONSTEXPR inline text_style fg(detail::color_type foreground) noexcept { +/// Creates a text style from the foreground (text) color. +FMT_CONSTEXPR inline auto fg(detail::color_type foreground) noexcept + -> text_style { return text_style(true, foreground); } -/** Creates a text style from the background color. */ -FMT_CONSTEXPR inline text_style bg(detail::color_type background) noexcept { +/// Creates a text style from the background color. +FMT_CONSTEXPR inline auto bg(detail::color_type background) noexcept + -> text_style { return text_style(false, background); } -FMT_CONSTEXPR inline text_style operator|(emphasis lhs, emphasis rhs) noexcept { +FMT_CONSTEXPR inline auto operator|(emphasis lhs, emphasis rhs) noexcept + -> text_style { return text_style(lhs) | rhs; } -FMT_BEGIN_DETAIL_NAMESPACE +namespace detail { template struct ansi_color_escape { FMT_CONSTEXPR ansi_color_escape(detail::color_type text_color, @@ -385,9 +389,9 @@ template struct ansi_color_escape { } FMT_CONSTEXPR operator const Char*() const noexcept { return buffer; } - FMT_CONSTEXPR const Char* begin() const noexcept { return buffer; } - FMT_CONSTEXPR_CHAR_TRAITS const Char* end() const noexcept { - return buffer + std::char_traits::length(buffer); + FMT_CONSTEXPR auto begin() const noexcept -> const Char* { return buffer; } + FMT_CONSTEXPR20 auto end() const noexcept -> const Char* { + return buffer + basic_string_view(buffer).size(); } private: @@ -401,66 +405,45 @@ template struct ansi_color_escape { out[2] = static_cast('0' + c % 10); out[3] = static_cast(delimiter); } - static FMT_CONSTEXPR bool has_emphasis(emphasis em, emphasis mask) noexcept { + static FMT_CONSTEXPR auto has_emphasis(emphasis em, emphasis mask) noexcept + -> bool { return static_cast(em) & static_cast(mask); } }; template -FMT_CONSTEXPR ansi_color_escape make_foreground_color( - detail::color_type foreground) noexcept { +FMT_CONSTEXPR auto make_foreground_color(detail::color_type foreground) noexcept + -> ansi_color_escape { return ansi_color_escape(foreground, "\x1b[38;2;"); } template -FMT_CONSTEXPR ansi_color_escape make_background_color( - detail::color_type background) noexcept { +FMT_CONSTEXPR auto make_background_color(detail::color_type background) noexcept + -> ansi_color_escape { return ansi_color_escape(background, "\x1b[48;2;"); } template -FMT_CONSTEXPR ansi_color_escape make_emphasis(emphasis em) noexcept { +FMT_CONSTEXPR auto make_emphasis(emphasis em) noexcept + -> ansi_color_escape { return ansi_color_escape(em); } -template inline void fputs(const Char* chars, FILE* stream) { - int result = std::fputs(chars, stream); -#if !__NVCC__ - if (result < 0) - FMT_THROW(system_error(errno, FMT_STRING("cannot write to file"))); -#endif -} - -template <> inline void fputs(const wchar_t* chars, FILE* stream) { - int result = std::fputws(chars, stream); -#if !__NVCC__ - if (result < 0) - FMT_THROW(system_error(errno, FMT_STRING("cannot write to file"))); -#endif -} - -template inline void reset_color(FILE* stream) { - fputs("\x1b[0m", stream); -} - -template <> inline void reset_color(FILE* stream) { - fputs(L"\x1b[0m", stream); -} - template inline void reset_color(buffer& buffer) { auto reset_color = string_view("\x1b[0m"); buffer.append(reset_color.begin(), reset_color.end()); } -template struct styled_arg { +template struct styled_arg : detail::view { const T& value; text_style style; + styled_arg(const T& v, text_style s) : value(v), style(s) {} }; template -void vformat_to(buffer& buf, const text_style& ts, - basic_string_view format_str, - basic_format_args>> args) { +void vformat_to( + buffer& buf, const text_style& ts, basic_string_view format_str, + basic_format_args>> args) { bool has_style = false; if (ts.has_emphasis()) { has_style = true; @@ -481,118 +464,94 @@ void vformat_to(buffer& buf, const text_style& ts, if (has_style) detail::reset_color(buf); } -FMT_END_DETAIL_NAMESPACE - -template > -void vprint(std::FILE* f, const text_style& ts, const S& format, - basic_format_args>> args) { - basic_memory_buffer buf; - detail::vformat_to(buf, ts, detail::to_string_view(format), args); - if (detail::is_utf8()) { - detail::print(f, basic_string_view(buf.begin(), buf.size())); - } else { - buf.push_back(Char(0)); - detail::fputs(buf.data(), f); - } +} // namespace detail + +inline void vprint(FILE* f, const text_style& ts, string_view fmt, + format_args args) { + auto buf = memory_buffer(); + detail::vformat_to(buf, ts, fmt, args); + print(f, FMT_STRING("{}"), string_view(buf.begin(), buf.size())); } /** - \rst - Formats a string and prints it to the specified file stream using ANSI - escape sequences to specify text formatting. - - **Example**:: - - fmt::print(fmt::emphasis::bold | fg(fmt::color::red), - "Elapsed time: {0:.2f} seconds", 1.23); - \endrst + * Formats a string and prints it to the specified file stream using ANSI + * escape sequences to specify text formatting. + * + * **Example**: + * + * fmt::print(fmt::emphasis::bold | fg(fmt::color::red), + * "Elapsed time: {0:.2f} seconds", 1.23); */ -template ::value)> -void print(std::FILE* f, const text_style& ts, const S& format_str, - const Args&... args) { - vprint(f, ts, format_str, - fmt::make_format_args>>(args...)); +template +void print(FILE* f, const text_style& ts, format_string fmt, + T&&... args) { + vprint(f, ts, fmt, fmt::make_format_args(args...)); } /** - \rst - Formats a string and prints it to stdout using ANSI escape sequences to - specify text formatting. - - **Example**:: - - fmt::print(fmt::emphasis::bold | fg(fmt::color::red), - "Elapsed time: {0:.2f} seconds", 1.23); - \endrst + * Formats a string and prints it to stdout using ANSI escape sequences to + * specify text formatting. + * + * **Example**: + * + * fmt::print(fmt::emphasis::bold | fg(fmt::color::red), + * "Elapsed time: {0:.2f} seconds", 1.23); */ -template ::value)> -void print(const text_style& ts, const S& format_str, const Args&... args) { - return print(stdout, ts, format_str, args...); +template +void print(const text_style& ts, format_string fmt, T&&... args) { + return print(stdout, ts, fmt, std::forward(args)...); } -template > -inline std::basic_string vformat( - const text_style& ts, const S& format_str, - basic_format_args>> args) { - basic_memory_buffer buf; - detail::vformat_to(buf, ts, detail::to_string_view(format_str), args); +inline auto vformat(const text_style& ts, string_view fmt, format_args args) + -> std::string { + auto buf = memory_buffer(); + detail::vformat_to(buf, ts, fmt, args); return fmt::to_string(buf); } /** - \rst - Formats arguments and returns the result as a string using ANSI - escape sequences to specify text formatting. - - **Example**:: - - #include - std::string message = fmt::format(fmt::emphasis::bold | fg(fmt::color::red), - "The answer is {}", 42); - \endrst -*/ -template > -inline std::basic_string format(const text_style& ts, const S& format_str, - const Args&... args) { - return fmt::vformat(ts, detail::to_string_view(format_str), - fmt::make_format_args>(args...)); + * Formats arguments and returns the result as a string using ANSI escape + * sequences to specify text formatting. + * + * **Example**: + * + * ``` + * #include + * std::string message = fmt::format(fmt::emphasis::bold | fg(fmt::color::red), + * "The answer is {}", 42); + * ``` + */ +template +inline auto format(const text_style& ts, format_string fmt, T&&... args) + -> std::string { + return fmt::vformat(ts, fmt, fmt::make_format_args(args...)); } -/** - Formats a string with the given text_style and writes the output to ``out``. - */ -template ::value)> -OutputIt vformat_to( - OutputIt out, const text_style& ts, basic_string_view format_str, - basic_format_args>> args) { - auto&& buf = detail::get_buffer(out); - detail::vformat_to(buf, ts, format_str, args); +/// Formats a string with the given text_style and writes the output to `out`. +template ::value)> +auto vformat_to(OutputIt out, const text_style& ts, string_view fmt, + format_args args) -> OutputIt { + auto&& buf = detail::get_buffer(out); + detail::vformat_to(buf, ts, fmt, args); return detail::get_iterator(buf, out); } /** - \rst - Formats arguments with the given text_style, writes the result to the output - iterator ``out`` and returns the iterator past the end of the output range. - - **Example**:: - - std::vector out; - fmt::format_to(std::back_inserter(out), - fmt::emphasis::bold | fg(fmt::color::red), "{}", 42); - \endrst -*/ -template >::value&& - detail::is_string::value> -inline auto format_to(OutputIt out, const text_style& ts, const S& format_str, - Args&&... args) -> - typename std::enable_if::type { - return vformat_to(out, ts, detail::to_string_view(format_str), - fmt::make_format_args>>(args...)); + * Formats arguments with the given text style, writes the result to the output + * iterator `out` and returns the iterator past the end of the output range. + * + * **Example**: + * + * std::vector out; + * fmt::format_to(std::back_inserter(out), + * fmt::emphasis::bold | fg(fmt::color::red), "{}", 42); + */ +template ::value)> +inline auto format_to(OutputIt out, const text_style& ts, + format_string fmt, T&&... args) -> OutputIt { + return vformat_to(out, ts, fmt, fmt::make_format_args(args...)); } template @@ -632,16 +591,14 @@ struct formatter, Char> : formatter { }; /** - \rst - Returns an argument that will be formatted using ANSI escape sequences, - to be used in a formatting function. - - **Example**:: - - fmt::print("Elapsed time: {0:.2f} seconds", - fmt::styled(1.23, fmt::fg(fmt::color::green) | - fmt::bg(fmt::color::blue))); - \endrst + * Returns an argument that will be formatted using ANSI escape sequences, + * to be used in a formatting function. + * + * **Example**: + * + * fmt::print("Elapsed time: {0:.2f} seconds", + * fmt::styled(1.23, fmt::fg(fmt::color::green) | + * fmt::bg(fmt::color::blue))); */ template FMT_CONSTEXPR auto styled(const T& value, text_style ts) @@ -649,7 +606,7 @@ FMT_CONSTEXPR auto styled(const T& value, text_style ts) return detail::styled_arg>{value, ts}; } -FMT_MODULE_EXPORT_END +FMT_END_EXPORT FMT_END_NAMESPACE #endif // FMT_COLOR_H_ diff --git a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/compile.h b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/compile.h index 933668c41c3e..b2afc2c309f4 100644 --- a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/compile.h +++ b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/compile.h @@ -8,117 +8,41 @@ #ifndef FMT_COMPILE_H_ #define FMT_COMPILE_H_ +#ifndef FMT_MODULE +# include // std::back_inserter +#endif + #include "format.h" FMT_BEGIN_NAMESPACE + +// A compile-time string which is compiled into fast formatting code. +FMT_EXPORT class compiled_string {}; + namespace detail { -template -FMT_CONSTEXPR inline counting_iterator copy_str(InputIt begin, InputIt end, - counting_iterator it) { +template +FMT_CONSTEXPR inline auto copy(InputIt begin, InputIt end, counting_iterator it) + -> counting_iterator { return it + (end - begin); } -template class truncating_iterator_base { - protected: - OutputIt out_; - size_t limit_; - size_t count_ = 0; - - truncating_iterator_base() : out_(), limit_(0) {} - - truncating_iterator_base(OutputIt out, size_t limit) - : out_(out), limit_(limit) {} - - public: - using iterator_category = std::output_iterator_tag; - using value_type = typename std::iterator_traits::value_type; - using difference_type = std::ptrdiff_t; - using pointer = void; - using reference = void; - FMT_UNCHECKED_ITERATOR(truncating_iterator_base); - - OutputIt base() const { return out_; } - size_t count() const { return count_; } -}; - -// An output iterator that truncates the output and counts the number of objects -// written to it. -template ::value_type>::type> -class truncating_iterator; - -template -class truncating_iterator - : public truncating_iterator_base { - mutable typename truncating_iterator_base::value_type blackhole_; - - public: - using value_type = typename truncating_iterator_base::value_type; - - truncating_iterator() = default; - - truncating_iterator(OutputIt out, size_t limit) - : truncating_iterator_base(out, limit) {} - - truncating_iterator& operator++() { - if (this->count_++ < this->limit_) ++this->out_; - return *this; - } - - truncating_iterator operator++(int) { - auto it = *this; - ++*this; - return it; - } - - value_type& operator*() const { - return this->count_ < this->limit_ ? *this->out_ : blackhole_; - } -}; - -template -class truncating_iterator - : public truncating_iterator_base { - public: - truncating_iterator() = default; - - truncating_iterator(OutputIt out, size_t limit) - : truncating_iterator_base(out, limit) {} - - template truncating_iterator& operator=(T val) { - if (this->count_++ < this->limit_) *this->out_++ = val; - return *this; - } - - truncating_iterator& operator++() { return *this; } - truncating_iterator& operator++(int) { return *this; } - truncating_iterator& operator*() { return *this; } -}; - -// A compile-time string which is compiled into fast formatting code. -class compiled_string {}; - template struct is_compiled_string : std::is_base_of {}; /** - \rst - Converts a string literal *s* into a format string that will be parsed at - compile time and converted into efficient formatting code. Requires C++17 - ``constexpr if`` compiler support. - - **Example**:: - - // Converts 42 into std::string using the most efficient method and no - // runtime format string processing. - std::string s = fmt::format(FMT_COMPILE("{}"), 42); - \endrst + * Converts a string literal `s` into a format string that will be parsed at + * compile time and converted into efficient formatting code. Requires C++17 + * `constexpr if` compiler support. + * + * **Example**: + * + * // Converts 42 into std::string using the most efficient method and no + * // runtime format string processing. + * std::string s = fmt::format(FMT_COMPILE("{}"), 42); */ #if defined(__cpp_if_constexpr) && defined(__cpp_return_type_deduction) -# define FMT_COMPILE(s) \ - FMT_STRING_IMPL(s, fmt::detail::compiled_string, explicit) +# define FMT_COMPILE(s) FMT_STRING_IMPL(s, fmt::compiled_string, explicit) #else # define FMT_COMPILE(s) FMT_STRING(s) #endif @@ -135,7 +59,7 @@ struct udl_compiled_string : compiled_string { #endif template -const T& first(const T& value, const Tail&...) { +auto first(const T& value, const Tail&...) -> const T& { return value; } @@ -196,7 +120,8 @@ template struct code_unit { template constexpr OutputIt format(OutputIt out, const Args&...) const { - return write(out, value); + *out++ = value; + return out; } }; @@ -220,7 +145,12 @@ template struct field { template constexpr OutputIt format(OutputIt out, const Args&... args) const { - return write(out, get_arg_checked(args...)); + const T& arg = get_arg_checked(args...); + if constexpr (std::is_convertible>::value) { + auto s = basic_string_view(arg); + return copy(s.begin(), s.end(), out); + } + return write(out, arg); } }; @@ -308,13 +238,12 @@ constexpr size_t parse_text(basic_string_view str, size_t pos) { } template -constexpr auto compile_format_string(S format_str); +constexpr auto compile_format_string(S fmt); template -constexpr auto parse_tail(T head, S format_str) { - if constexpr (POS != - basic_string_view(format_str).size()) { - constexpr auto tail = compile_format_string(format_str); +constexpr auto parse_tail(T head, S fmt) { + if constexpr (POS != basic_string_view(fmt).size()) { + constexpr auto tail = compile_format_string(fmt); if constexpr (std::is_same, unknown_format>()) return tail; @@ -331,14 +260,14 @@ template struct parse_specs_result { int next_arg_id; }; -constexpr int manual_indexing_id = -1; +enum { manual_indexing_id = -1 }; template constexpr parse_specs_result parse_specs(basic_string_view str, size_t pos, int next_arg_id) { str.remove_prefix(pos); - auto ctx = compile_parse_context(str, max_value(), nullptr, {}, - next_arg_id); + auto ctx = + compile_parse_context(str, max_value(), nullptr, next_arg_id); auto f = formatter(); auto end = f.parse(ctx); return {f, pos + fmt::detail::to_unsigned(end - str.data()), @@ -348,22 +277,18 @@ constexpr parse_specs_result parse_specs(basic_string_view str, template struct arg_id_handler { arg_ref arg_id; - constexpr int operator()() { + constexpr int on_auto() { FMT_ASSERT(false, "handler cannot be used with automatic indexing"); return 0; } - constexpr int operator()(int id) { + constexpr int on_index(int id) { arg_id = arg_ref(id); return 0; } - constexpr int operator()(basic_string_view id) { + constexpr int on_name(basic_string_view id) { arg_id = arg_ref(id); return 0; } - - constexpr void on_error(const char* message) { - FMT_THROW(format_error(message)); - } }; template struct parse_arg_id_result { @@ -389,14 +314,13 @@ struct field_type::value>> { template -constexpr auto parse_replacement_field_then_tail(S format_str) { +constexpr auto parse_replacement_field_then_tail(S fmt) { using char_type = typename S::char_type; - constexpr auto str = basic_string_view(format_str); + constexpr auto str = basic_string_view(fmt); constexpr char_type c = END_POS != str.size() ? str[END_POS] : char_type(); if constexpr (c == '}') { return parse_tail( - field::type, ARG_INDEX>(), - format_str); + field::type, ARG_INDEX>(), fmt); } else if constexpr (c != ':') { FMT_THROW(format_error("expected ':'")); } else { @@ -409,7 +333,7 @@ constexpr auto parse_replacement_field_then_tail(S format_str) { return parse_tail( spec_field::type, ARG_INDEX>{ result.fmt}, - format_str); + fmt); } } } @@ -417,22 +341,21 @@ constexpr auto parse_replacement_field_then_tail(S format_str) { // Compiles a non-empty format string and returns the compiled representation // or unknown_format() on unrecognized input. template -constexpr auto compile_format_string(S format_str) { +constexpr auto compile_format_string(S fmt) { using char_type = typename S::char_type; - constexpr auto str = basic_string_view(format_str); + constexpr auto str = basic_string_view(fmt); if constexpr (str[POS] == '{') { if constexpr (POS + 1 == str.size()) FMT_THROW(format_error("unmatched '{' in format string")); if constexpr (str[POS + 1] == '{') { - return parse_tail(make_text(str, POS, 1), format_str); + return parse_tail(make_text(str, POS, 1), fmt); } else if constexpr (str[POS + 1] == '}' || str[POS + 1] == ':') { static_assert(ID != manual_indexing_id, "cannot switch from manual to automatic argument indexing"); constexpr auto next_id = ID != manual_indexing_id ? ID + 1 : manual_indexing_id; return parse_replacement_field_then_tail, Args, - POS + 1, ID, next_id>( - format_str); + POS + 1, ID, next_id>(fmt); } else { constexpr auto arg_id_result = parse_arg_id(str.data() + POS + 1, str.data() + str.size()); @@ -448,60 +371,55 @@ constexpr auto compile_format_string(S format_str) { return parse_replacement_field_then_tail, Args, arg_id_end_pos, arg_index, manual_indexing_id>( - format_str); + fmt); } else if constexpr (arg_id_result.arg_id.kind == arg_id_kind::name) { constexpr auto arg_index = get_arg_index_by_name(arg_id_result.arg_id.val.name, Args{}); - if constexpr (arg_index != invalid_arg_index) { + if constexpr (arg_index >= 0) { constexpr auto next_id = ID != manual_indexing_id ? ID + 1 : manual_indexing_id; return parse_replacement_field_then_tail< decltype(get_type::value), Args, arg_id_end_pos, - arg_index, next_id>(format_str); - } else { - if constexpr (c == '}') { - return parse_tail( - runtime_named_field{arg_id_result.arg_id.val.name}, - format_str); - } else if constexpr (c == ':') { - return unknown_format(); // no type info for specs parsing - } + arg_index, next_id>(fmt); + } else if constexpr (c == '}') { + return parse_tail( + runtime_named_field{arg_id_result.arg_id.val.name}, + fmt); + } else if constexpr (c == ':') { + return unknown_format(); // no type info for specs parsing } } } } else if constexpr (str[POS] == '}') { if constexpr (POS + 1 == str.size()) FMT_THROW(format_error("unmatched '}' in format string")); - return parse_tail(make_text(str, POS, 1), format_str); + return parse_tail(make_text(str, POS, 1), fmt); } else { constexpr auto end = parse_text(str, POS + 1); if constexpr (end - POS > 1) { - return parse_tail(make_text(str, POS, end - POS), - format_str); + return parse_tail(make_text(str, POS, end - POS), fmt); } else { - return parse_tail(code_unit{str[POS]}, - format_str); + return parse_tail(code_unit{str[POS]}, fmt); } } } template ::value)> -constexpr auto compile(S format_str) { - constexpr auto str = basic_string_view(format_str); +constexpr auto compile(S fmt) { + constexpr auto str = basic_string_view(fmt); if constexpr (str.size() == 0) { return detail::make_text(str, 0, 0); } else { constexpr auto result = - detail::compile_format_string, 0, 0>( - format_str); + detail::compile_format_string, 0, 0>(fmt); return result; } } #endif // defined(__cpp_if_constexpr) && defined(__cpp_return_type_deduction) } // namespace detail -FMT_MODULE_EXPORT_BEGIN +FMT_BEGIN_EXPORT #if defined(__cpp_if_constexpr) && defined(__cpp_return_type_deduction) @@ -566,33 +484,33 @@ FMT_CONSTEXPR OutputIt format_to(OutputIt out, const S&, Args&&... args) { template ::value)> -format_to_n_result format_to_n(OutputIt out, size_t n, - const S& format_str, Args&&... args) { - auto it = fmt::format_to(detail::truncating_iterator(out, n), - format_str, std::forward(args)...); - return {it.base(), it.count()}; +auto format_to_n(OutputIt out, size_t n, const S& fmt, Args&&... args) + -> format_to_n_result { + using traits = detail::fixed_buffer_traits; + auto buf = detail::iterator_buffer(out, n); + fmt::format_to(std::back_inserter(buf), fmt, std::forward(args)...); + return {buf.out(), buf.count()}; } template ::value)> -FMT_CONSTEXPR20 size_t formatted_size(const S& format_str, - const Args&... args) { - return fmt::format_to(detail::counting_iterator(), format_str, args...) - .count(); +FMT_CONSTEXPR20 auto formatted_size(const S& fmt, const Args&... args) + -> size_t { + return fmt::format_to(detail::counting_iterator(), fmt, args...).count(); } template ::value)> -void print(std::FILE* f, const S& format_str, const Args&... args) { +void print(std::FILE* f, const S& fmt, const Args&... args) { memory_buffer buffer; - fmt::format_to(std::back_inserter(buffer), format_str, args...); + fmt::format_to(std::back_inserter(buffer), fmt, args...); detail::print(f, {buffer.data(), buffer.size()}); } template ::value)> -void print(const S& format_str, const Args&... args) { - print(stdout, format_str, args...); +void print(const S& fmt, const Args&... args) { + print(stdout, fmt, args...); } #if FMT_USE_NONTYPE_TEMPLATE_ARGS @@ -605,7 +523,7 @@ template constexpr auto operator""_cf() { } // namespace literals #endif -FMT_MODULE_EXPORT_END +FMT_END_EXPORT FMT_END_NAMESPACE #endif // FMT_COMPILE_H_ diff --git a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/core.h b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/core.h index af61b22c44ec..8ca735f0c004 100644 --- a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/core.h +++ b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/core.h @@ -1,3343 +1,5 @@ -// Formatting library for C++ - the core API for char/UTF-8 -// -// Copyright (c) 2012 - present, Victor Zverovich -// All rights reserved. -// -// For the license information refer to format.h. +// This file is only provided for compatibility and may be removed in future +// versions. Use fmt/base.h if you don't need fmt::format and fmt/format.h +// otherwise. -#ifndef FMT_CORE_H_ -#define FMT_CORE_H_ - -#include // std::byte -#include // std::FILE -#include // std::strlen -#include -#include -#include -#include - -// The fmt library version in the form major * 10000 + minor * 100 + patch. -#define FMT_VERSION 90101 - -#define FMT_HEADER_ONLY - -#if defined(__clang__) && !defined(__ibmxl__) -# define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__) -#else -# define FMT_CLANG_VERSION 0 -#endif - -#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) && \ - !defined(__NVCOMPILER) -# define FMT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) -#else -# define FMT_GCC_VERSION 0 -#endif - -#ifndef FMT_GCC_PRAGMA -// Workaround _Pragma bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59884. -# if FMT_GCC_VERSION >= 504 -# define FMT_GCC_PRAGMA(arg) _Pragma(arg) -# else -# define FMT_GCC_PRAGMA(arg) -# endif -#endif - -#ifdef __ICL -# define FMT_ICC_VERSION __ICL -#elif defined(__INTEL_COMPILER) -# define FMT_ICC_VERSION __INTEL_COMPILER -#else -# define FMT_ICC_VERSION 0 -#endif - -#ifdef _MSC_VER -# define FMT_MSC_VERSION _MSC_VER -# define FMT_MSC_WARNING(...) __pragma(warning(__VA_ARGS__)) -#else -# define FMT_MSC_VERSION 0 -# define FMT_MSC_WARNING(...) -#endif - -#ifdef _MSVC_LANG -# define FMT_CPLUSPLUS _MSVC_LANG -#else -# define FMT_CPLUSPLUS __cplusplus -#endif - -#ifdef __has_feature -# define FMT_HAS_FEATURE(x) __has_feature(x) -#else -# define FMT_HAS_FEATURE(x) 0 -#endif - -#if defined(__has_include) || FMT_ICC_VERSION >= 1600 || FMT_MSC_VERSION > 1900 -# define FMT_HAS_INCLUDE(x) __has_include(x) -#else -# define FMT_HAS_INCLUDE(x) 0 -#endif - -#ifdef __has_cpp_attribute -# define FMT_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x) -#else -# define FMT_HAS_CPP_ATTRIBUTE(x) 0 -#endif - -#define FMT_HAS_CPP14_ATTRIBUTE(attribute) \ - (FMT_CPLUSPLUS >= 201402L && FMT_HAS_CPP_ATTRIBUTE(attribute)) - -#define FMT_HAS_CPP17_ATTRIBUTE(attribute) \ - (FMT_CPLUSPLUS >= 201703L && FMT_HAS_CPP_ATTRIBUTE(attribute)) - -// Check if relaxed C++14 constexpr is supported. -// GCC doesn't allow throw in constexpr until version 6 (bug 67371). -#ifndef FMT_USE_CONSTEXPR -# if (FMT_HAS_FEATURE(cxx_relaxed_constexpr) || FMT_MSC_VERSION >= 1912 || \ - (FMT_GCC_VERSION >= 600 && FMT_CPLUSPLUS >= 201402L)) && \ - !FMT_ICC_VERSION && !defined(__NVCC__) -# define FMT_USE_CONSTEXPR 1 -# else -# define FMT_USE_CONSTEXPR 0 -# endif -#endif -#if FMT_USE_CONSTEXPR -# define FMT_CONSTEXPR constexpr -#else -# define FMT_CONSTEXPR -#endif - -#if ((FMT_CPLUSPLUS >= 202002L) && \ - (!defined(_GLIBCXX_RELEASE) || _GLIBCXX_RELEASE > 9)) || \ - (FMT_CPLUSPLUS >= 201709L && FMT_GCC_VERSION >= 1002) -# define FMT_CONSTEXPR20 constexpr -#else -# define FMT_CONSTEXPR20 -#endif - -// Check if constexpr std::char_traits<>::{compare,length} are supported. -#if defined(__GLIBCXX__) -# if FMT_CPLUSPLUS >= 201703L && defined(_GLIBCXX_RELEASE) && \ - _GLIBCXX_RELEASE >= 7 // GCC 7+ libstdc++ has _GLIBCXX_RELEASE. -# define FMT_CONSTEXPR_CHAR_TRAITS constexpr -# endif -#elif defined(_LIBCPP_VERSION) && FMT_CPLUSPLUS >= 201703L && \ - _LIBCPP_VERSION >= 4000 -# define FMT_CONSTEXPR_CHAR_TRAITS constexpr -#elif FMT_MSC_VERSION >= 1914 && FMT_CPLUSPLUS >= 201703L -# define FMT_CONSTEXPR_CHAR_TRAITS constexpr -#endif -#ifndef FMT_CONSTEXPR_CHAR_TRAITS -# define FMT_CONSTEXPR_CHAR_TRAITS -#endif - -// Check if exceptions are disabled. -#ifndef FMT_EXCEPTIONS -# if (defined(__GNUC__) && !defined(__EXCEPTIONS)) || \ - (FMT_MSC_VERSION && !_HAS_EXCEPTIONS) -# define FMT_EXCEPTIONS 0 -# else -# define FMT_EXCEPTIONS 1 -# endif -#endif - -#ifndef FMT_DEPRECATED -# if FMT_HAS_CPP14_ATTRIBUTE(deprecated) || FMT_MSC_VERSION >= 1900 -# define FMT_DEPRECATED [[deprecated]] -# else -# if (defined(__GNUC__) && !defined(__LCC__)) || defined(__clang__) -# define FMT_DEPRECATED __attribute__((deprecated)) -# elif FMT_MSC_VERSION -# define FMT_DEPRECATED __declspec(deprecated) -# else -# define FMT_DEPRECATED /* deprecated */ -# endif -# endif -#endif - -// [[noreturn]] is disabled on MSVC and NVCC because of bogus unreachable code -// warnings. -#if FMT_EXCEPTIONS && FMT_HAS_CPP_ATTRIBUTE(noreturn) && !FMT_MSC_VERSION && \ - !defined(__NVCC__) -# define FMT_NORETURN [[noreturn]] -#else -# define FMT_NORETURN -#endif - -#if FMT_HAS_CPP17_ATTRIBUTE(fallthrough) -# define FMT_FALLTHROUGH [[fallthrough]] -#elif defined(__clang__) -# define FMT_FALLTHROUGH [[clang::fallthrough]] -#elif FMT_GCC_VERSION >= 700 && \ - (!defined(__EDG_VERSION__) || __EDG_VERSION__ >= 520) -# define FMT_FALLTHROUGH [[gnu::fallthrough]] -#else -# define FMT_FALLTHROUGH -#endif - -#ifndef FMT_NODISCARD -# if FMT_HAS_CPP17_ATTRIBUTE(nodiscard) -# define FMT_NODISCARD [[nodiscard]] -# else -# define FMT_NODISCARD -# endif -#endif - -#ifndef FMT_USE_FLOAT -# define FMT_USE_FLOAT 1 -#endif -#ifndef FMT_USE_DOUBLE -# define FMT_USE_DOUBLE 1 -#endif -#ifndef FMT_USE_LONG_DOUBLE -# define FMT_USE_LONG_DOUBLE 1 -#endif - -#ifndef FMT_INLINE -# if FMT_GCC_VERSION || FMT_CLANG_VERSION -# define FMT_INLINE inline __attribute__((always_inline)) -# else -# define FMT_INLINE inline -# endif -#endif - -// An inline std::forward replacement. -#define FMT_FORWARD(...) static_cast(__VA_ARGS__) - -#ifdef _MSC_VER -# define FMT_UNCHECKED_ITERATOR(It) \ - using _Unchecked_type = It // Mark iterator as checked. -#else -# define FMT_UNCHECKED_ITERATOR(It) using unchecked_type = It -#endif - -#ifndef FMT_BEGIN_NAMESPACE -# define FMT_BEGIN_NAMESPACE \ - namespace fmt { \ - inline namespace v9 { -# define FMT_END_NAMESPACE \ - } \ - } -#endif - -#ifndef FMT_MODULE_EXPORT -# define FMT_MODULE_EXPORT -# define FMT_MODULE_EXPORT_BEGIN -# define FMT_MODULE_EXPORT_END -# define FMT_BEGIN_DETAIL_NAMESPACE namespace detail { -# define FMT_END_DETAIL_NAMESPACE } -#endif - -#if !defined(FMT_HEADER_ONLY) && defined(_WIN32) -# define FMT_CLASS_API FMT_MSC_WARNING(suppress : 4275) -# ifdef FMT_EXPORT -# define FMT_API __declspec(dllexport) -# elif defined(FMT_SHARED) -# define FMT_API __declspec(dllimport) -# endif -#else -# define FMT_CLASS_API -# if defined(FMT_EXPORT) || defined(FMT_SHARED) -# if defined(__GNUC__) || defined(__clang__) -# define FMT_API __attribute__((visibility("default"))) -# endif -# endif -#endif -#ifndef FMT_API -# define FMT_API -#endif - -// libc++ supports string_view in pre-c++17. -#if FMT_HAS_INCLUDE() && \ - (FMT_CPLUSPLUS >= 201703L || defined(_LIBCPP_VERSION)) -# include -# define FMT_USE_STRING_VIEW -#elif FMT_HAS_INCLUDE("experimental/string_view") && FMT_CPLUSPLUS >= 201402L -# include -# define FMT_USE_EXPERIMENTAL_STRING_VIEW -#endif - -#ifndef FMT_UNICODE -# define FMT_UNICODE !FMT_MSC_VERSION -#endif - -#ifndef FMT_CONSTEVAL -# if ((FMT_GCC_VERSION >= 1000 || FMT_CLANG_VERSION >= 1101) && \ - FMT_CPLUSPLUS >= 202002L && !defined(__apple_build_version__)) || \ - (defined(__cpp_consteval) && \ - (!FMT_MSC_VERSION || _MSC_FULL_VER >= 193030704)) -// consteval is broken in MSVC before VS2022 and Apple clang 13. -# define FMT_CONSTEVAL consteval -# define FMT_HAS_CONSTEVAL -# else -# define FMT_CONSTEVAL -# endif -#endif - -#ifndef FMT_USE_NONTYPE_TEMPLATE_ARGS -# if defined(__cpp_nontype_template_args) && \ - ((FMT_GCC_VERSION >= 903 && FMT_CPLUSPLUS >= 201709L) || \ - __cpp_nontype_template_args >= 201911L) && \ - !defined(__NVCOMPILER) && !defined(__LCC__) -# define FMT_USE_NONTYPE_TEMPLATE_ARGS 1 -# else -# define FMT_USE_NONTYPE_TEMPLATE_ARGS 0 -# endif -#endif - -// Enable minimal optimizations for more compact code in debug mode. -FMT_GCC_PRAGMA("GCC push_options") -#if !defined(__OPTIMIZE__) && !defined(__NVCOMPILER) && !defined(__LCC__) -FMT_GCC_PRAGMA("GCC optimize(\"Og\")") -#endif - -FMT_BEGIN_NAMESPACE -FMT_MODULE_EXPORT_BEGIN - -// Implementations of enable_if_t and other metafunctions for older systems. -template -using enable_if_t = typename std::enable_if::type; -template -using conditional_t = typename std::conditional::type; -template using bool_constant = std::integral_constant; -template -using remove_reference_t = typename std::remove_reference::type; -template -using remove_const_t = typename std::remove_const::type; -template -using remove_cvref_t = typename std::remove_cv>::type; -template struct type_identity { using type = T; }; -template using type_identity_t = typename type_identity::type; -template -using underlying_t = typename std::underlying_type::type; - -template struct disjunction : std::false_type {}; -template struct disjunction

: P {}; -template -struct disjunction - : conditional_t> {}; - -template struct conjunction : std::true_type {}; -template struct conjunction

: P {}; -template -struct conjunction - : conditional_t, P1> {}; - -struct monostate { - constexpr monostate() {} -}; - -// An enable_if helper to be used in template parameters which results in much -// shorter symbols: https://godbolt.org/z/sWw4vP. Extra parentheses are needed -// to workaround a bug in MSVC 2019 (see #1140 and #1186). -#ifdef FMT_DOC -# define FMT_ENABLE_IF(...) -#else -# define FMT_ENABLE_IF(...) fmt::enable_if_t<(__VA_ARGS__), int> = 0 -#endif - -FMT_BEGIN_DETAIL_NAMESPACE - -// Suppresses "unused variable" warnings with the method described in -// https://herbsutter.com/2009/10/18/mailbag-shutting-up-compiler-warnings/. -// (void)var does not work on many Intel compilers. -template FMT_CONSTEXPR void ignore_unused(const T&...) {} - -constexpr FMT_INLINE auto is_constant_evaluated( - bool default_value = false) noexcept -> bool { -#ifdef __cpp_lib_is_constant_evaluated - ignore_unused(default_value); - return std::is_constant_evaluated(); -#else - return default_value; -#endif -} - -// Suppresses "conditional expression is constant" warnings. -template constexpr FMT_INLINE auto const_check(T value) -> T { - return value; -} - -FMT_NORETURN FMT_API void assert_fail(const char* file, int line, - const char* message); - -#ifndef FMT_ASSERT -# ifdef NDEBUG -// FMT_ASSERT is not empty to avoid -Wempty-body. -# define FMT_ASSERT(condition, message) \ - ::fmt::detail::ignore_unused((condition), (message)) -# else -# define FMT_ASSERT(condition, message) \ - ((condition) /* void() fails with -Winvalid-constexpr on clang 4.0.1 */ \ - ? (void)0 \ - : ::fmt::detail::assert_fail(__FILE__, __LINE__, (message))) -# endif -#endif - -#if defined(FMT_USE_STRING_VIEW) -template using std_string_view = std::basic_string_view; -#elif defined(FMT_USE_EXPERIMENTAL_STRING_VIEW) -template -using std_string_view = std::experimental::basic_string_view; -#else -template struct std_string_view {}; -#endif - -#ifdef FMT_USE_INT128 -// Do nothing. -#elif defined(__SIZEOF_INT128__) && !defined(__NVCC__) && \ - !(FMT_CLANG_VERSION && FMT_MSC_VERSION) -# define FMT_USE_INT128 1 -using int128_opt = __int128_t; // An optional native 128-bit integer. -using uint128_opt = __uint128_t; -template inline auto convert_for_visit(T value) -> T { - return value; -} -#else -# define FMT_USE_INT128 0 -#endif -#if !FMT_USE_INT128 -enum class int128_opt {}; -enum class uint128_opt {}; -// Reduce template instantiations. -template auto convert_for_visit(T) -> monostate { return {}; } -#endif - -// Casts a nonnegative integer to unsigned. -template -FMT_CONSTEXPR auto to_unsigned(Int value) -> - typename std::make_unsigned::type { - return static_cast::type>(value); -} - -FMT_MSC_WARNING(suppress : 4566) constexpr unsigned char micro[] = "\u00B5"; - -constexpr auto is_utf8() -> bool { - // Avoid buggy sign extensions in MSVC's constant evaluation mode (#2297). - using uchar = unsigned char; - return FMT_UNICODE || (sizeof(micro) == 3 && uchar(micro[0]) == 0xC2 && - uchar(micro[1]) == 0xB5); -} -FMT_END_DETAIL_NAMESPACE - -/** - An implementation of ``std::basic_string_view`` for pre-C++17. It provides a - subset of the API. ``fmt::basic_string_view`` is used for format strings even - if ``std::string_view`` is available to prevent issues when a library is - compiled with a different ``-std`` option than the client code (which is not - recommended). - */ -template class basic_string_view { - private: - const Char* data_; - size_t size_; - - public: - using value_type = Char; - using iterator = const Char*; - - constexpr basic_string_view() noexcept : data_(nullptr), size_(0) {} - - /** Constructs a string reference object from a C string and a size. */ - constexpr basic_string_view(const Char* s, size_t count) noexcept - : data_(s), size_(count) {} - - /** - \rst - Constructs a string reference object from a C string computing - the size with ``std::char_traits::length``. - \endrst - */ - FMT_CONSTEXPR_CHAR_TRAITS - FMT_INLINE - basic_string_view(const Char* s) - : data_(s), - size_(detail::const_check(std::is_same::value && - !detail::is_constant_evaluated(true)) - ? std::strlen(reinterpret_cast(s)) - : std::char_traits::length(s)) {} - - /** Constructs a string reference from a ``std::basic_string`` object. */ - template - FMT_CONSTEXPR basic_string_view( - const std::basic_string& s) noexcept - : data_(s.data()), size_(s.size()) {} - - template >::value)> - FMT_CONSTEXPR basic_string_view(S s) noexcept - : data_(s.data()), size_(s.size()) {} - - /** Returns a pointer to the string data. */ - constexpr auto data() const noexcept -> const Char* { return data_; } - - /** Returns the string size. */ - constexpr auto size() const noexcept -> size_t { return size_; } - - constexpr auto begin() const noexcept -> iterator { return data_; } - constexpr auto end() const noexcept -> iterator { return data_ + size_; } - - constexpr auto operator[](size_t pos) const noexcept -> const Char& { - return data_[pos]; - } - - FMT_CONSTEXPR void remove_prefix(size_t n) noexcept { - data_ += n; - size_ -= n; - } - - FMT_CONSTEXPR_CHAR_TRAITS bool starts_with( - basic_string_view sv) const noexcept { - return size_ >= sv.size_ && - std::char_traits::compare(data_, sv.data_, sv.size_) == 0; - } - FMT_CONSTEXPR_CHAR_TRAITS bool starts_with(Char c) const noexcept { - return size_ >= 1 && std::char_traits::eq(*data_, c); - } - FMT_CONSTEXPR_CHAR_TRAITS bool starts_with(const Char* s) const { - return starts_with(basic_string_view(s)); - } - - // Lexicographically compare this string reference to other. - FMT_CONSTEXPR_CHAR_TRAITS auto compare(basic_string_view other) const -> int { - size_t str_size = size_ < other.size_ ? size_ : other.size_; - int result = std::char_traits::compare(data_, other.data_, str_size); - if (result == 0) - result = size_ == other.size_ ? 0 : (size_ < other.size_ ? -1 : 1); - return result; - } - - FMT_CONSTEXPR_CHAR_TRAITS friend auto operator==(basic_string_view lhs, - basic_string_view rhs) - -> bool { - return lhs.compare(rhs) == 0; - } - friend auto operator!=(basic_string_view lhs, basic_string_view rhs) -> bool { - return lhs.compare(rhs) != 0; - } - friend auto operator<(basic_string_view lhs, basic_string_view rhs) -> bool { - return lhs.compare(rhs) < 0; - } - friend auto operator<=(basic_string_view lhs, basic_string_view rhs) -> bool { - return lhs.compare(rhs) <= 0; - } - friend auto operator>(basic_string_view lhs, basic_string_view rhs) -> bool { - return lhs.compare(rhs) > 0; - } - friend auto operator>=(basic_string_view lhs, basic_string_view rhs) -> bool { - return lhs.compare(rhs) >= 0; - } -}; - -using string_view = basic_string_view; - -/** Specifies if ``T`` is a character type. Can be specialized by users. */ -template struct is_char : std::false_type {}; -template <> struct is_char : std::true_type {}; - -FMT_BEGIN_DETAIL_NAMESPACE - -// A base class for compile-time strings. -struct compile_string {}; - -template -struct is_compile_string : std::is_base_of {}; - -// Returns a string view of `s`. -template ::value)> -FMT_INLINE auto to_string_view(const Char* s) -> basic_string_view { - return s; -} -template -inline auto to_string_view(const std::basic_string& s) - -> basic_string_view { - return s; -} -template -constexpr auto to_string_view(basic_string_view s) - -> basic_string_view { - return s; -} -template >::value)> -inline auto to_string_view(std_string_view s) -> basic_string_view { - return s; -} -template ::value)> -constexpr auto to_string_view(const S& s) - -> basic_string_view { - return basic_string_view(s); -} -void to_string_view(...); - -// Specifies whether S is a string type convertible to fmt::basic_string_view. -// It should be a constexpr function but MSVC 2017 fails to compile it in -// enable_if and MSVC 2015 fails to compile it as an alias template. -// ADL invocation of to_string_view is DEPRECATED! -template -struct is_string : std::is_class()))> { -}; - -template struct char_t_impl {}; -template struct char_t_impl::value>> { - using result = decltype(to_string_view(std::declval())); - using type = typename result::value_type; -}; - -enum class type { - none_type, - // Integer types should go first, - int_type, - uint_type, - long_long_type, - ulong_long_type, - int128_type, - uint128_type, - bool_type, - char_type, - last_integer_type = char_type, - // followed by floating-point types. - float_type, - double_type, - long_double_type, - last_numeric_type = long_double_type, - cstring_type, - string_type, - pointer_type, - custom_type -}; - -// Maps core type T to the corresponding type enum constant. -template -struct type_constant : std::integral_constant {}; - -#define FMT_TYPE_CONSTANT(Type, constant) \ - template \ - struct type_constant \ - : std::integral_constant {} - -FMT_TYPE_CONSTANT(int, int_type); -FMT_TYPE_CONSTANT(unsigned, uint_type); -FMT_TYPE_CONSTANT(long long, long_long_type); -FMT_TYPE_CONSTANT(unsigned long long, ulong_long_type); -FMT_TYPE_CONSTANT(int128_opt, int128_type); -FMT_TYPE_CONSTANT(uint128_opt, uint128_type); -FMT_TYPE_CONSTANT(bool, bool_type); -FMT_TYPE_CONSTANT(Char, char_type); -FMT_TYPE_CONSTANT(float, float_type); -FMT_TYPE_CONSTANT(double, double_type); -FMT_TYPE_CONSTANT(long double, long_double_type); -FMT_TYPE_CONSTANT(const Char*, cstring_type); -FMT_TYPE_CONSTANT(basic_string_view, string_type); -FMT_TYPE_CONSTANT(const void*, pointer_type); - -constexpr bool is_integral_type(type t) { - return t > type::none_type && t <= type::last_integer_type; -} - -constexpr bool is_arithmetic_type(type t) { - return t > type::none_type && t <= type::last_numeric_type; -} - -FMT_NORETURN FMT_API void throw_format_error(const char* message); - -struct error_handler { - constexpr error_handler() = default; - constexpr error_handler(const error_handler&) = default; - - // This function is intentionally not constexpr to give a compile-time error. - FMT_NORETURN void on_error(const char* message) { - throw_format_error(message); - } -}; -FMT_END_DETAIL_NAMESPACE - -/** String's character type. */ -template using char_t = typename detail::char_t_impl::type; - -/** - \rst - Parsing context consisting of a format string range being parsed and an - argument counter for automatic indexing. - You can use the ``format_parse_context`` type alias for ``char`` instead. - \endrst - */ -template -class basic_format_parse_context : private ErrorHandler { - private: - basic_string_view format_str_; - int next_arg_id_; - - FMT_CONSTEXPR void do_check_arg_id(int id); - - public: - using char_type = Char; - using iterator = typename basic_string_view::iterator; - - explicit constexpr basic_format_parse_context( - basic_string_view format_str, ErrorHandler eh = {}, - int next_arg_id = 0) - : ErrorHandler(eh), format_str_(format_str), next_arg_id_(next_arg_id) {} - - /** - Returns an iterator to the beginning of the format string range being - parsed. - */ - constexpr auto begin() const noexcept -> iterator { - return format_str_.begin(); - } - - /** - Returns an iterator past the end of the format string range being parsed. - */ - constexpr auto end() const noexcept -> iterator { return format_str_.end(); } - - /** Advances the begin iterator to ``it``. */ - FMT_CONSTEXPR void advance_to(iterator it) { - format_str_.remove_prefix(detail::to_unsigned(it - begin())); - } - - /** - Reports an error if using the manual argument indexing; otherwise returns - the next argument index and switches to the automatic indexing. - */ - FMT_CONSTEXPR auto next_arg_id() -> int { - if (next_arg_id_ < 0) { - on_error("cannot switch from manual to automatic argument indexing"); - return 0; - } - int id = next_arg_id_++; - do_check_arg_id(id); - return id; - } - - /** - Reports an error if using the automatic argument indexing; otherwise - switches to the manual indexing. - */ - FMT_CONSTEXPR void check_arg_id(int id) { - if (next_arg_id_ > 0) { - on_error("cannot switch from automatic to manual argument indexing"); - return; - } - next_arg_id_ = -1; - do_check_arg_id(id); - } - FMT_CONSTEXPR void check_arg_id(basic_string_view) {} - FMT_CONSTEXPR void check_dynamic_spec(int arg_id); - - FMT_CONSTEXPR void on_error(const char* message) { - ErrorHandler::on_error(message); - } - - constexpr auto error_handler() const -> ErrorHandler { return *this; } -}; - -using format_parse_context = basic_format_parse_context; - -FMT_BEGIN_DETAIL_NAMESPACE -// A parse context with extra data used only in compile-time checks. -template -class compile_parse_context - : public basic_format_parse_context { - private: - int num_args_; - const type* types_; - using base = basic_format_parse_context; - - public: - explicit FMT_CONSTEXPR compile_parse_context( - basic_string_view format_str, int num_args, const type* types, - ErrorHandler eh = {}, int next_arg_id = 0) - : base(format_str, eh, next_arg_id), num_args_(num_args), types_(types) {} - - constexpr auto num_args() const -> int { return num_args_; } - constexpr auto arg_type(int id) const -> type { return types_[id]; } - - FMT_CONSTEXPR auto next_arg_id() -> int { - int id = base::next_arg_id(); - if (id >= num_args_) this->on_error("argument not found"); - return id; - } - - FMT_CONSTEXPR void check_arg_id(int id) { - base::check_arg_id(id); - if (id >= num_args_) this->on_error("argument not found"); - } - using base::check_arg_id; - - FMT_CONSTEXPR void check_dynamic_spec(int arg_id) { - detail::ignore_unused(arg_id); -#if !defined(__LCC__) - if (arg_id < num_args_ && types_ && !is_integral_type(types_[arg_id])) - this->on_error("width/precision is not integer"); -#endif - } -}; -FMT_END_DETAIL_NAMESPACE - -template -FMT_CONSTEXPR void -basic_format_parse_context::do_check_arg_id(int id) { - // Argument id is only checked at compile-time during parsing because - // formatting has its own validation. - if (detail::is_constant_evaluated() && FMT_GCC_VERSION >= 1200) { - using context = detail::compile_parse_context; - if (id >= static_cast(this)->num_args()) - on_error("argument not found"); - } -} - -template -FMT_CONSTEXPR void -basic_format_parse_context::check_dynamic_spec(int arg_id) { - if (detail::is_constant_evaluated()) { - using context = detail::compile_parse_context; - static_cast(this)->check_dynamic_spec(arg_id); - } -} - -template class basic_format_arg; -template class basic_format_args; -template class dynamic_format_arg_store; - -// A formatter for objects of type T. -template -struct formatter { - // A deleted default constructor indicates a disabled formatter. - formatter() = delete; -}; - -// Specifies if T has an enabled formatter specialization. A type can be -// formattable even if it doesn't have a formatter e.g. via a conversion. -template -using has_formatter = - std::is_constructible>; - -// Checks whether T is a container with contiguous storage. -template struct is_contiguous : std::false_type {}; -template -struct is_contiguous> : std::true_type {}; - -class appender; - -FMT_BEGIN_DETAIL_NAMESPACE - -template -constexpr auto has_const_formatter_impl(T*) - -> decltype(typename Context::template formatter_type().format( - std::declval(), std::declval()), - true) { - return true; -} -template -constexpr auto has_const_formatter_impl(...) -> bool { - return false; -} -template -constexpr auto has_const_formatter() -> bool { - return has_const_formatter_impl(static_cast(nullptr)); -} - -// Extracts a reference to the container from back_insert_iterator. -template -inline auto get_container(std::back_insert_iterator it) - -> Container& { - using base = std::back_insert_iterator; - struct accessor : base { - accessor(base b) : base(b) {} - using base::container; - }; - return *accessor(it).container; -} - -template -FMT_CONSTEXPR auto copy_str(InputIt begin, InputIt end, OutputIt out) - -> OutputIt { - while (begin != end) *out++ = static_cast(*begin++); - return out; -} - -template , U>::value&& is_char::value)> -FMT_CONSTEXPR auto copy_str(T* begin, T* end, U* out) -> U* { - if (is_constant_evaluated()) return copy_str(begin, end, out); - auto size = to_unsigned(end - begin); - memcpy(out, begin, size * sizeof(U)); - return out + size; -} - -/** - \rst - A contiguous memory buffer with an optional growing ability. It is an internal - class and shouldn't be used directly, only via `~fmt::basic_memory_buffer`. - \endrst - */ -template class buffer { - private: - T* ptr_; - size_t size_; - size_t capacity_; - - protected: - // Don't initialize ptr_ since it is not accessed to save a few cycles. - FMT_MSC_WARNING(suppress : 26495) - buffer(size_t sz) noexcept : size_(sz), capacity_(sz) {} - - FMT_CONSTEXPR20 buffer(T* p = nullptr, size_t sz = 0, size_t cap = 0) noexcept - : ptr_(p), size_(sz), capacity_(cap) {} - - FMT_CONSTEXPR20 ~buffer() = default; - buffer(buffer&&) = default; - - /** Sets the buffer data and capacity. */ - FMT_CONSTEXPR void set(T* buf_data, size_t buf_capacity) noexcept { - ptr_ = buf_data; - capacity_ = buf_capacity; - } - - /** Increases the buffer capacity to hold at least *capacity* elements. */ - virtual FMT_CONSTEXPR20 void grow(size_t capacity) = 0; - - public: - using value_type = T; - using const_reference = const T&; - - buffer(const buffer&) = delete; - void operator=(const buffer&) = delete; - - FMT_INLINE auto begin() noexcept -> T* { return ptr_; } - FMT_INLINE auto end() noexcept -> T* { return ptr_ + size_; } - - FMT_INLINE auto begin() const noexcept -> const T* { return ptr_; } - FMT_INLINE auto end() const noexcept -> const T* { return ptr_ + size_; } - - /** Returns the size of this buffer. */ - constexpr auto size() const noexcept -> size_t { return size_; } - - /** Returns the capacity of this buffer. */ - constexpr auto capacity() const noexcept -> size_t { return capacity_; } - - /** Returns a pointer to the buffer data. */ - FMT_CONSTEXPR auto data() noexcept -> T* { return ptr_; } - - /** Returns a pointer to the buffer data. */ - FMT_CONSTEXPR auto data() const noexcept -> const T* { return ptr_; } - - /** Clears this buffer. */ - void clear() { size_ = 0; } - - // Tries resizing the buffer to contain *count* elements. If T is a POD type - // the new elements may not be initialized. - FMT_CONSTEXPR20 void try_resize(size_t count) { - try_reserve(count); - size_ = count <= capacity_ ? count : capacity_; - } - - // Tries increasing the buffer capacity to *new_capacity*. It can increase the - // capacity by a smaller amount than requested but guarantees there is space - // for at least one additional element either by increasing the capacity or by - // flushing the buffer if it is full. - FMT_CONSTEXPR20 void try_reserve(size_t new_capacity) { - if (new_capacity > capacity_) grow(new_capacity); - } - - FMT_CONSTEXPR20 void push_back(const T& value) { - try_reserve(size_ + 1); - ptr_[size_++] = value; - } - - /** Appends data to the end of the buffer. */ - template void append(const U* begin, const U* end); - - template FMT_CONSTEXPR auto operator[](Idx index) -> T& { - return ptr_[index]; - } - template - FMT_CONSTEXPR auto operator[](Idx index) const -> const T& { - return ptr_[index]; - } -}; - -struct buffer_traits { - explicit buffer_traits(size_t) {} - auto count() const -> size_t { return 0; } - auto limit(size_t size) -> size_t { return size; } -}; - -class fixed_buffer_traits { - private: - size_t count_ = 0; - size_t limit_; - - public: - explicit fixed_buffer_traits(size_t limit) : limit_(limit) {} - auto count() const -> size_t { return count_; } - auto limit(size_t size) -> size_t { - size_t n = limit_ > count_ ? limit_ - count_ : 0; - count_ += size; - return size < n ? size : n; - } -}; - -// A buffer that writes to an output iterator when flushed. -template -class iterator_buffer final : public Traits, public buffer { - private: - OutputIt out_; - enum { buffer_size = 256 }; - T data_[buffer_size]; - - protected: - FMT_CONSTEXPR20 void grow(size_t) override { - if (this->size() == buffer_size) flush(); - } - - void flush() { - auto size = this->size(); - this->clear(); - out_ = copy_str(data_, data_ + this->limit(size), out_); - } - - public: - explicit iterator_buffer(OutputIt out, size_t n = buffer_size) - : Traits(n), buffer(data_, 0, buffer_size), out_(out) {} - iterator_buffer(iterator_buffer&& other) - : Traits(other), buffer(data_, 0, buffer_size), out_(other.out_) {} - ~iterator_buffer() { flush(); } - - auto out() -> OutputIt { - flush(); - return out_; - } - auto count() const -> size_t { return Traits::count() + this->size(); } -}; - -template -class iterator_buffer final - : public fixed_buffer_traits, - public buffer { - private: - T* out_; - enum { buffer_size = 256 }; - T data_[buffer_size]; - - protected: - FMT_CONSTEXPR20 void grow(size_t) override { - if (this->size() == this->capacity()) flush(); - } - - void flush() { - size_t n = this->limit(this->size()); - if (this->data() == out_) { - out_ += n; - this->set(data_, buffer_size); - } - this->clear(); - } - - public: - explicit iterator_buffer(T* out, size_t n = buffer_size) - : fixed_buffer_traits(n), buffer(out, 0, n), out_(out) {} - iterator_buffer(iterator_buffer&& other) - : fixed_buffer_traits(other), - buffer(std::move(other)), - out_(other.out_) { - if (this->data() != out_) { - this->set(data_, buffer_size); - this->clear(); - } - } - ~iterator_buffer() { flush(); } - - auto out() -> T* { - flush(); - return out_; - } - auto count() const -> size_t { - return fixed_buffer_traits::count() + this->size(); - } -}; - -template class iterator_buffer final : public buffer { - protected: - FMT_CONSTEXPR20 void grow(size_t) override {} - - public: - explicit iterator_buffer(T* out, size_t = 0) : buffer(out, 0, ~size_t()) {} - - auto out() -> T* { return &*this->end(); } -}; - -// A buffer that writes to a container with the contiguous storage. -template -class iterator_buffer, - enable_if_t::value, - typename Container::value_type>> - final : public buffer { - private: - Container& container_; - - protected: - FMT_CONSTEXPR20 void grow(size_t capacity) override { - container_.resize(capacity); - this->set(&container_[0], capacity); - } - - public: - explicit iterator_buffer(Container& c) - : buffer(c.size()), container_(c) {} - explicit iterator_buffer(std::back_insert_iterator out, size_t = 0) - : iterator_buffer(get_container(out)) {} - - auto out() -> std::back_insert_iterator { - return std::back_inserter(container_); - } -}; - -// A buffer that counts the number of code units written discarding the output. -template class counting_buffer final : public buffer { - private: - enum { buffer_size = 256 }; - T data_[buffer_size]; - size_t count_ = 0; - - protected: - FMT_CONSTEXPR20 void grow(size_t) override { - if (this->size() != buffer_size) return; - count_ += this->size(); - this->clear(); - } - - public: - counting_buffer() : buffer(data_, 0, buffer_size) {} - - auto count() -> size_t { return count_ + this->size(); } -}; - -template -using buffer_appender = conditional_t::value, appender, - std::back_insert_iterator>>; - -// Maps an output iterator to a buffer. -template -auto get_buffer(OutputIt out) -> iterator_buffer { - return iterator_buffer(out); -} -template , Buf>::value)> -auto get_buffer(std::back_insert_iterator out) -> buffer& { - return get_container(out); -} - -template -FMT_INLINE auto get_iterator(Buf& buf, OutputIt) -> decltype(buf.out()) { - return buf.out(); -} -template -auto get_iterator(buffer&, OutputIt out) -> OutputIt { - return out; -} - -template -struct fallback_formatter { - fallback_formatter() = delete; -}; - -// Specifies if T has an enabled fallback_formatter specialization. -template -using has_fallback_formatter = -#ifdef FMT_DEPRECATED_OSTREAM - std::is_constructible>; -#else - std::false_type; -#endif - -struct view {}; - -template struct named_arg : view { - const Char* name; - const T& value; - named_arg(const Char* n, const T& v) : name(n), value(v) {} -}; - -template struct named_arg_info { - const Char* name; - int id; -}; - -template -struct arg_data { - // args_[0].named_args points to named_args_ to avoid bloating format_args. - // +1 to workaround a bug in gcc 7.5 that causes duplicated-branches warning. - T args_[1 + (NUM_ARGS != 0 ? NUM_ARGS : +1)]; - named_arg_info named_args_[NUM_NAMED_ARGS]; - - template - arg_data(const U&... init) : args_{T(named_args_, NUM_NAMED_ARGS), init...} {} - arg_data(const arg_data& other) = delete; - auto args() const -> const T* { return args_ + 1; } - auto named_args() -> named_arg_info* { return named_args_; } -}; - -template -struct arg_data { - // +1 to workaround a bug in gcc 7.5 that causes duplicated-branches warning. - T args_[NUM_ARGS != 0 ? NUM_ARGS : +1]; - - template - FMT_CONSTEXPR FMT_INLINE arg_data(const U&... init) : args_{init...} {} - FMT_CONSTEXPR FMT_INLINE auto args() const -> const T* { return args_; } - FMT_CONSTEXPR FMT_INLINE auto named_args() -> std::nullptr_t { - return nullptr; - } -}; - -template -inline void init_named_args(named_arg_info*, int, int) {} - -template struct is_named_arg : std::false_type {}; -template struct is_statically_named_arg : std::false_type {}; - -template -struct is_named_arg> : std::true_type {}; - -template ::value)> -void init_named_args(named_arg_info* named_args, int arg_count, - int named_arg_count, const T&, const Tail&... args) { - init_named_args(named_args, arg_count + 1, named_arg_count, args...); -} - -template ::value)> -void init_named_args(named_arg_info* named_args, int arg_count, - int named_arg_count, const T& arg, const Tail&... args) { - named_args[named_arg_count++] = {arg.name, arg_count}; - init_named_args(named_args, arg_count + 1, named_arg_count, args...); -} - -template -FMT_CONSTEXPR FMT_INLINE void init_named_args(std::nullptr_t, int, int, - const Args&...) {} - -template constexpr auto count() -> size_t { return B ? 1 : 0; } -template constexpr auto count() -> size_t { - return (B1 ? 1 : 0) + count(); -} - -template constexpr auto count_named_args() -> size_t { - return count::value...>(); -} - -template -constexpr auto count_statically_named_args() -> size_t { - return count::value...>(); -} - -struct unformattable {}; -struct unformattable_char : unformattable {}; -struct unformattable_const : unformattable {}; -struct unformattable_pointer : unformattable {}; - -template struct string_value { - const Char* data; - size_t size; -}; - -template struct named_arg_value { - const named_arg_info* data; - size_t size; -}; - -template struct custom_value { - using parse_context = typename Context::parse_context_type; - void* value; - void (*format)(void* arg, parse_context& parse_ctx, Context& ctx); -}; - -// A formatting argument value. -template class value { - public: - using char_type = typename Context::char_type; - - union { - monostate no_value; - int int_value; - unsigned uint_value; - long long long_long_value; - unsigned long long ulong_long_value; - int128_opt int128_value; - uint128_opt uint128_value; - bool bool_value; - char_type char_value; - float float_value; - double double_value; - long double long_double_value; - const void* pointer; - string_value string; - custom_value custom; - named_arg_value named_args; - }; - - constexpr FMT_INLINE value() : no_value() {} - constexpr FMT_INLINE value(int val) : int_value(val) {} - constexpr FMT_INLINE value(unsigned val) : uint_value(val) {} - constexpr FMT_INLINE value(long long val) : long_long_value(val) {} - constexpr FMT_INLINE value(unsigned long long val) : ulong_long_value(val) {} - FMT_INLINE value(int128_opt val) : int128_value(val) {} - FMT_INLINE value(uint128_opt val) : uint128_value(val) {} - constexpr FMT_INLINE value(float val) : float_value(val) {} - constexpr FMT_INLINE value(double val) : double_value(val) {} - FMT_INLINE value(long double val) : long_double_value(val) {} - constexpr FMT_INLINE value(bool val) : bool_value(val) {} - constexpr FMT_INLINE value(char_type val) : char_value(val) {} - FMT_CONSTEXPR FMT_INLINE value(const char_type* val) { - string.data = val; - if (is_constant_evaluated()) string.size = {}; - } - FMT_CONSTEXPR FMT_INLINE value(basic_string_view val) { - string.data = val.data(); - string.size = val.size(); - } - FMT_INLINE value(const void* val) : pointer(val) {} - FMT_INLINE value(const named_arg_info* args, size_t size) - : named_args{args, size} {} - - template FMT_CONSTEXPR FMT_INLINE value(T& val) { - using value_type = remove_cvref_t; - custom.value = const_cast(&val); - // Get the formatter type through the context to allow different contexts - // have different extension points, e.g. `formatter` for `format` and - // `printf_formatter` for `printf`. - custom.format = format_custom_arg< - value_type, - conditional_t::value, - typename Context::template formatter_type, - fallback_formatter>>; - } - value(unformattable); - value(unformattable_char); - value(unformattable_const); - value(unformattable_pointer); - - private: - // Formats an argument of a custom type, such as a user-defined class. - template - static void format_custom_arg(void* arg, - typename Context::parse_context_type& parse_ctx, - Context& ctx) { - auto f = Formatter(); - parse_ctx.advance_to(f.parse(parse_ctx)); - using qualified_type = - conditional_t(), const T, T>; - ctx.advance_to(f.format(*static_cast(arg), ctx)); - } -}; - -template -FMT_CONSTEXPR auto make_arg(T&& value) -> basic_format_arg; - -// To minimize the number of types we need to deal with, long is translated -// either to int or to long long depending on its size. -enum { long_short = sizeof(long) == sizeof(int) }; -using long_type = conditional_t; -using ulong_type = conditional_t; - -#ifdef __cpp_lib_byte -inline auto format_as(std::byte b) -> unsigned char { - return static_cast(b); -} -#endif - -template struct has_format_as { - template ::value&& std::is_integral::value)> - static auto check(U*) -> std::true_type; - static auto check(...) -> std::false_type; - - enum { value = decltype(check(static_cast(nullptr)))::value }; -}; - -// Maps formatting arguments to core types. -// arg_mapper reports errors by returning unformattable instead of using -// static_assert because it's used in the is_formattable trait. -template struct arg_mapper { - using char_type = typename Context::char_type; - - FMT_CONSTEXPR FMT_INLINE auto map(signed char val) -> int { return val; } - FMT_CONSTEXPR FMT_INLINE auto map(unsigned char val) -> unsigned { - return val; - } - FMT_CONSTEXPR FMT_INLINE auto map(short val) -> int { return val; } - FMT_CONSTEXPR FMT_INLINE auto map(unsigned short val) -> unsigned { - return val; - } - FMT_CONSTEXPR FMT_INLINE auto map(int val) -> int { return val; } - FMT_CONSTEXPR FMT_INLINE auto map(unsigned val) -> unsigned { return val; } - FMT_CONSTEXPR FMT_INLINE auto map(long val) -> long_type { return val; } - FMT_CONSTEXPR FMT_INLINE auto map(unsigned long val) -> ulong_type { - return val; - } - FMT_CONSTEXPR FMT_INLINE auto map(long long val) -> long long { return val; } - FMT_CONSTEXPR FMT_INLINE auto map(unsigned long long val) - -> unsigned long long { - return val; - } - FMT_CONSTEXPR FMT_INLINE auto map(int128_opt val) -> int128_opt { - return val; - } - FMT_CONSTEXPR FMT_INLINE auto map(uint128_opt val) -> uint128_opt { - return val; - } - FMT_CONSTEXPR FMT_INLINE auto map(bool val) -> bool { return val; } - - template ::value || - std::is_same::value)> - FMT_CONSTEXPR FMT_INLINE auto map(T val) -> char_type { - return val; - } - template ::value || -#ifdef __cpp_char8_t - std::is_same::value || -#endif - std::is_same::value || - std::is_same::value) && - !std::is_same::value, - int> = 0> - FMT_CONSTEXPR FMT_INLINE auto map(T) -> unformattable_char { - return {}; - } - - FMT_CONSTEXPR FMT_INLINE auto map(float val) -> float { return val; } - FMT_CONSTEXPR FMT_INLINE auto map(double val) -> double { return val; } - FMT_CONSTEXPR FMT_INLINE auto map(long double val) -> long double { - return val; - } - - FMT_CONSTEXPR FMT_INLINE auto map(char_type* val) -> const char_type* { - return val; - } - FMT_CONSTEXPR FMT_INLINE auto map(const char_type* val) -> const char_type* { - return val; - } - template ::value && !std::is_pointer::value && - std::is_same>::value)> - FMT_CONSTEXPR FMT_INLINE auto map(const T& val) - -> basic_string_view { - return to_string_view(val); - } - template ::value && !std::is_pointer::value && - !std::is_same>::value)> - FMT_CONSTEXPR FMT_INLINE auto map(const T&) -> unformattable_char { - return {}; - } - template >::value && - !is_string::value && !has_formatter::value && - !has_fallback_formatter::value)> - FMT_CONSTEXPR FMT_INLINE auto map(const T& val) - -> basic_string_view { - return basic_string_view(val); - } - template >::value && - !std::is_convertible>::value && - !is_string::value && !has_formatter::value && - !has_fallback_formatter::value)> - FMT_CONSTEXPR FMT_INLINE auto map(const T& val) - -> basic_string_view { - return std_string_view(val); - } - - FMT_CONSTEXPR FMT_INLINE auto map(void* val) -> const void* { return val; } - FMT_CONSTEXPR FMT_INLINE auto map(const void* val) -> const void* { - return val; - } - FMT_CONSTEXPR FMT_INLINE auto map(std::nullptr_t val) -> const void* { - return val; - } - - // We use SFINAE instead of a const T* parameter to avoid conflicting with - // the C array overload. - template < - typename T, - FMT_ENABLE_IF( - std::is_pointer::value || std::is_member_pointer::value || - std::is_function::type>::value || - (std::is_convertible::value && - !std::is_convertible::value && - !has_formatter::value))> - FMT_CONSTEXPR auto map(const T&) -> unformattable_pointer { - return {}; - } - - template ::value)> - FMT_CONSTEXPR FMT_INLINE auto map(const T (&values)[N]) -> const T (&)[N] { - return values; - } - - template ::value&& std::is_convertible::value && - !has_format_as::value && !has_formatter::value && - !has_fallback_formatter::value)> - FMT_CONSTEXPR FMT_INLINE auto map(const T& val) - -> decltype(std::declval().map( - static_cast>(val))) { - return map(static_cast>(val)); - } - - template ::value && - !has_formatter::value)> - FMT_CONSTEXPR FMT_INLINE auto map(const T& val) - -> decltype(std::declval().map(format_as(T()))) { - return map(format_as(val)); - } - - template > - struct formattable - : bool_constant() || - !std::is_const>::value || - has_fallback_formatter::value> {}; - -#if (FMT_MSC_VERSION != 0 && FMT_MSC_VERSION < 1910) || \ - FMT_ICC_VERSION != 0 || defined(__NVCC__) - // Workaround a bug in MSVC and Intel (Issue 2746). - template FMT_CONSTEXPR FMT_INLINE auto do_map(T&& val) -> T& { - return val; - } -#else - template ::value)> - FMT_CONSTEXPR FMT_INLINE auto do_map(T&& val) -> T& { - return val; - } - template ::value)> - FMT_CONSTEXPR FMT_INLINE auto do_map(T&&) -> unformattable_const { - return {}; - } -#endif - - template , - FMT_ENABLE_IF(!is_string::value && !is_char::value && - !std::is_array::value && - !std::is_pointer::value && - !has_format_as::value && - (has_formatter::value || - has_fallback_formatter::value))> - FMT_CONSTEXPR FMT_INLINE auto map(T&& val) - -> decltype(this->do_map(std::forward(val))) { - return do_map(std::forward(val)); - } - - template ::value)> - FMT_CONSTEXPR FMT_INLINE auto map(const T& named_arg) - -> decltype(std::declval().map(named_arg.value)) { - return map(named_arg.value); - } - - auto map(...) -> unformattable { return {}; } -}; - -// A type constant after applying arg_mapper. -template -using mapped_type_constant = - type_constant().map(std::declval())), - typename Context::char_type>; - -enum { packed_arg_bits = 4 }; -// Maximum number of arguments with packed types. -enum { max_packed_args = 62 / packed_arg_bits }; -enum : unsigned long long { is_unpacked_bit = 1ULL << 63 }; -enum : unsigned long long { has_named_args_bit = 1ULL << 62 }; - -FMT_END_DETAIL_NAMESPACE - -// An output iterator that appends to a buffer. -// It is used to reduce symbol sizes for the common case. -class appender : public std::back_insert_iterator> { - using base = std::back_insert_iterator>; - - public: - using std::back_insert_iterator>::back_insert_iterator; - appender(base it) noexcept : base(it) {} - FMT_UNCHECKED_ITERATOR(appender); - - auto operator++() noexcept -> appender& { return *this; } - auto operator++(int) noexcept -> appender { return *this; } -}; - -// A formatting argument. It is a trivially copyable/constructible type to -// allow storage in basic_memory_buffer. -template class basic_format_arg { - private: - detail::value value_; - detail::type type_; - - template - friend FMT_CONSTEXPR auto detail::make_arg(T&& value) - -> basic_format_arg; - - template - friend FMT_CONSTEXPR auto visit_format_arg(Visitor&& vis, - const basic_format_arg& arg) - -> decltype(vis(0)); - - friend class basic_format_args; - friend class dynamic_format_arg_store; - - using char_type = typename Context::char_type; - - template - friend struct detail::arg_data; - - basic_format_arg(const detail::named_arg_info* args, size_t size) - : value_(args, size) {} - - public: - class handle { - public: - explicit handle(detail::custom_value custom) : custom_(custom) {} - - void format(typename Context::parse_context_type& parse_ctx, - Context& ctx) const { - custom_.format(custom_.value, parse_ctx, ctx); - } - - private: - detail::custom_value custom_; - }; - - constexpr basic_format_arg() : type_(detail::type::none_type) {} - - constexpr explicit operator bool() const noexcept { - return type_ != detail::type::none_type; - } - - auto type() const -> detail::type { return type_; } - - auto is_integral() const -> bool { return detail::is_integral_type(type_); } - auto is_arithmetic() const -> bool { - return detail::is_arithmetic_type(type_); - } -}; - -/** - \rst - Visits an argument dispatching to the appropriate visit method based on - the argument type. For example, if the argument type is ``double`` then - ``vis(value)`` will be called with the value of type ``double``. - \endrst - */ -#if FMT_ICC_VERSION != 0 -#pragma warning(disable : 1595) -#endif -template -FMT_CONSTEXPR FMT_INLINE auto visit_format_arg( - Visitor&& vis, const basic_format_arg& arg) -> decltype(vis(0)) { - switch (arg.type_) { - case detail::type::none_type: - break; - case detail::type::int_type: - return vis(arg.value_.int_value); - case detail::type::uint_type: - return vis(arg.value_.uint_value); - case detail::type::long_long_type: - return vis(arg.value_.long_long_value); - case detail::type::ulong_long_type: - return vis(arg.value_.ulong_long_value); - case detail::type::int128_type: - return vis(detail::convert_for_visit(arg.value_.int128_value)); - case detail::type::uint128_type: - return vis(detail::convert_for_visit(arg.value_.uint128_value)); - case detail::type::bool_type: - return vis(arg.value_.bool_value); - case detail::type::char_type: - return vis(arg.value_.char_value); - case detail::type::float_type: - return vis(arg.value_.float_value); - case detail::type::double_type: - return vis(arg.value_.double_value); - case detail::type::long_double_type: - return vis(arg.value_.long_double_value); - case detail::type::cstring_type: - return vis(arg.value_.string.data); - case detail::type::string_type: - using sv = basic_string_view; - return vis(sv(arg.value_.string.data, arg.value_.string.size)); - case detail::type::pointer_type: - return vis(arg.value_.pointer); - case detail::type::custom_type: - return vis(typename basic_format_arg::handle(arg.value_.custom)); - } - return vis(monostate()); -} - -FMT_BEGIN_DETAIL_NAMESPACE - -template -auto copy_str(InputIt begin, InputIt end, appender out) -> appender { - get_container(out).append(begin, end); - return out; -} - -template -FMT_CONSTEXPR auto copy_str(R&& rng, OutputIt out) -> OutputIt { - return detail::copy_str(rng.begin(), rng.end(), out); -} - -#if FMT_GCC_VERSION && FMT_GCC_VERSION < 500 -// A workaround for gcc 4.8 to make void_t work in a SFINAE context. -template struct void_t_impl { using type = void; }; -template -using void_t = typename detail::void_t_impl::type; -#else -template using void_t = void; -#endif - -template -struct is_output_iterator : std::false_type {}; - -template -struct is_output_iterator< - It, T, - void_t::iterator_category, - decltype(*std::declval() = std::declval())>> - : std::true_type {}; - -template -struct is_back_insert_iterator : std::false_type {}; -template -struct is_back_insert_iterator> - : std::true_type {}; - -template -struct is_contiguous_back_insert_iterator : std::false_type {}; -template -struct is_contiguous_back_insert_iterator> - : is_contiguous {}; -template <> -struct is_contiguous_back_insert_iterator : std::true_type {}; - -// A type-erased reference to an std::locale to avoid a heavy include. -class locale_ref { - private: - const void* locale_; // A type-erased pointer to std::locale. - - public: - constexpr FMT_INLINE locale_ref() : locale_(nullptr) {} - template explicit locale_ref(const Locale& loc); - - explicit operator bool() const noexcept { return locale_ != nullptr; } - - template auto get() const -> Locale; -}; - -template constexpr auto encode_types() -> unsigned long long { - return 0; -} - -template -constexpr auto encode_types() -> unsigned long long { - return static_cast(mapped_type_constant::value) | - (encode_types() << packed_arg_bits); -} - -template -FMT_CONSTEXPR FMT_INLINE auto make_value(T&& val) -> value { - const auto& arg = arg_mapper().map(FMT_FORWARD(val)); - - constexpr bool formattable_char = - !std::is_same::value; - static_assert(formattable_char, "Mixing character types is disallowed."); - - constexpr bool formattable_const = - !std::is_same::value; - static_assert(formattable_const, "Cannot format a const argument."); - - // Formatting of arbitrary pointers is disallowed. If you want to output - // a pointer cast it to "void *" or "const void *". In particular, this - // forbids formatting of "[const] volatile char *" which is printed as bool - // by iostreams. - constexpr bool formattable_pointer = - !std::is_same::value; - static_assert(formattable_pointer, - "Formatting of non-void pointers is disallowed."); - - constexpr bool formattable = - !std::is_same::value; - static_assert( - formattable, - "Cannot format an argument. To make type T formattable provide a " - "formatter specialization: https://fmt.dev/latest/api.html#udt"); - return {arg}; -} - -template -FMT_CONSTEXPR auto make_arg(T&& value) -> basic_format_arg { - basic_format_arg arg; - arg.type_ = mapped_type_constant::value; - arg.value_ = make_value(value); - return arg; -} - -// The type template parameter is there to avoid an ODR violation when using -// a fallback formatter in one translation unit and an implicit conversion in -// another (not recommended). -template -FMT_CONSTEXPR FMT_INLINE auto make_arg(T&& val) -> value { - return make_value(val); -} - -template -FMT_CONSTEXPR inline auto make_arg(T&& value) -> basic_format_arg { - return make_arg(value); -} -FMT_END_DETAIL_NAMESPACE - -// Formatting context. -template class basic_format_context { - public: - /** The character type for the output. */ - using char_type = Char; - - private: - OutputIt out_; - basic_format_args args_; - detail::locale_ref loc_; - - public: - using iterator = OutputIt; - using format_arg = basic_format_arg; - using parse_context_type = basic_format_parse_context; - template using formatter_type = formatter; - - basic_format_context(basic_format_context&&) = default; - basic_format_context(const basic_format_context&) = delete; - void operator=(const basic_format_context&) = delete; - /** - Constructs a ``basic_format_context`` object. References to the arguments are - stored in the object so make sure they have appropriate lifetimes. - */ - constexpr basic_format_context( - OutputIt out, basic_format_args ctx_args, - detail::locale_ref loc = detail::locale_ref()) - : out_(out), args_(ctx_args), loc_(loc) {} - - constexpr auto arg(int id) const -> format_arg { return args_.get(id); } - FMT_CONSTEXPR auto arg(basic_string_view name) -> format_arg { - return args_.get(name); - } - FMT_CONSTEXPR auto arg_id(basic_string_view name) -> int { - return args_.get_id(name); - } - auto args() const -> const basic_format_args& { - return args_; - } - - FMT_CONSTEXPR auto error_handler() -> detail::error_handler { return {}; } - void on_error(const char* message) { error_handler().on_error(message); } - - // Returns an iterator to the beginning of the output range. - FMT_CONSTEXPR auto out() -> iterator { return out_; } - - // Advances the begin iterator to ``it``. - void advance_to(iterator it) { - if (!detail::is_back_insert_iterator()) out_ = it; - } - - FMT_CONSTEXPR auto locale() -> detail::locale_ref { return loc_; } -}; - -template -using buffer_context = - basic_format_context, Char>; -using format_context = buffer_context; - -// Workaround an alias issue: https://stackoverflow.com/q/62767544/471164. -#define FMT_BUFFER_CONTEXT(Char) \ - basic_format_context, Char> - -template -using is_formattable = bool_constant< - !std::is_base_of>().map( - std::declval()))>::value && - !detail::has_fallback_formatter::value>; - -/** - \rst - An array of references to arguments. It can be implicitly converted into - `~fmt::basic_format_args` for passing into type-erased formatting functions - such as `~fmt::vformat`. - \endrst - */ -template -class format_arg_store -#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 - // Workaround a GCC template argument substitution bug. - : public basic_format_args -#endif -{ - private: - static const size_t num_args = sizeof...(Args); - static const size_t num_named_args = detail::count_named_args(); - static const bool is_packed = num_args <= detail::max_packed_args; - - using value_type = conditional_t, - basic_format_arg>; - - detail::arg_data - data_; - - friend class basic_format_args; - - static constexpr unsigned long long desc = - (is_packed ? detail::encode_types() - : detail::is_unpacked_bit | num_args) | - (num_named_args != 0 - ? static_cast(detail::has_named_args_bit) - : 0); - - public: - template - FMT_CONSTEXPR FMT_INLINE format_arg_store(T&&... args) - : -#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 - basic_format_args(*this), -#endif - data_{detail::make_arg< - is_packed, Context, - detail::mapped_type_constant, Context>::value>( - FMT_FORWARD(args))...} { - detail::init_named_args(data_.named_args(), 0, 0, args...); - } -}; - -/** - \rst - Constructs a `~fmt::format_arg_store` object that contains references to - arguments and can be implicitly converted to `~fmt::format_args`. `Context` - can be omitted in which case it defaults to `~fmt::context`. - See `~fmt::arg` for lifetime considerations. - \endrst - */ -template -constexpr auto make_format_args(Args&&... args) - -> format_arg_store...> { - return {FMT_FORWARD(args)...}; -} - -/** - \rst - Returns a named argument to be used in a formatting function. - It should only be used in a call to a formatting function or - `dynamic_format_arg_store::push_back`. - - **Example**:: - - fmt::print("Elapsed time: {s:.2f} seconds", fmt::arg("s", 1.23)); - \endrst - */ -template -inline auto arg(const Char* name, const T& arg) -> detail::named_arg { - static_assert(!detail::is_named_arg(), "nested named arguments"); - return {name, arg}; -} - -/** - \rst - A view of a collection of formatting arguments. To avoid lifetime issues it - should only be used as a parameter type in type-erased functions such as - ``vformat``:: - - void vlog(string_view format_str, format_args args); // OK - format_args args = make_format_args(42); // Error: dangling reference - \endrst - */ -template class basic_format_args { - public: - using size_type = int; - using format_arg = basic_format_arg; - - private: - // A descriptor that contains information about formatting arguments. - // If the number of arguments is less or equal to max_packed_args then - // argument types are passed in the descriptor. This reduces binary code size - // per formatting function call. - unsigned long long desc_; - union { - // If is_packed() returns true then argument values are stored in values_; - // otherwise they are stored in args_. This is done to improve cache - // locality and reduce compiled code size since storing larger objects - // may require more code (at least on x86-64) even if the same amount of - // data is actually copied to stack. It saves ~10% on the bloat test. - const detail::value* values_; - const format_arg* args_; - }; - - constexpr auto is_packed() const -> bool { - return (desc_ & detail::is_unpacked_bit) == 0; - } - auto has_named_args() const -> bool { - return (desc_ & detail::has_named_args_bit) != 0; - } - - FMT_CONSTEXPR auto type(int index) const -> detail::type { - int shift = index * detail::packed_arg_bits; - unsigned int mask = (1 << detail::packed_arg_bits) - 1; - return static_cast((desc_ >> shift) & mask); - } - - constexpr FMT_INLINE basic_format_args(unsigned long long desc, - const detail::value* values) - : desc_(desc), values_(values) {} - constexpr basic_format_args(unsigned long long desc, const format_arg* args) - : desc_(desc), args_(args) {} - - public: - constexpr basic_format_args() : desc_(0), args_(nullptr) {} - - /** - \rst - Constructs a `basic_format_args` object from `~fmt::format_arg_store`. - \endrst - */ - template - constexpr FMT_INLINE basic_format_args( - const format_arg_store& store) - : basic_format_args(format_arg_store::desc, - store.data_.args()) {} - - /** - \rst - Constructs a `basic_format_args` object from - `~fmt::dynamic_format_arg_store`. - \endrst - */ - constexpr FMT_INLINE basic_format_args( - const dynamic_format_arg_store& store) - : basic_format_args(store.get_types(), store.data()) {} - - /** - \rst - Constructs a `basic_format_args` object from a dynamic set of arguments. - \endrst - */ - constexpr basic_format_args(const format_arg* args, int count) - : basic_format_args(detail::is_unpacked_bit | detail::to_unsigned(count), - args) {} - - /** Returns the argument with the specified id. */ - FMT_CONSTEXPR auto get(int id) const -> format_arg { - format_arg arg; - if (!is_packed()) { - if (id < max_size()) arg = args_[id]; - return arg; - } - if (id >= detail::max_packed_args) return arg; - arg.type_ = type(id); - if (arg.type_ == detail::type::none_type) return arg; - arg.value_ = values_[id]; - return arg; - } - - template - auto get(basic_string_view name) const -> format_arg { - int id = get_id(name); - return id >= 0 ? get(id) : format_arg(); - } - - template - auto get_id(basic_string_view name) const -> int { - if (!has_named_args()) return -1; - const auto& named_args = - (is_packed() ? values_[-1] : args_[-1].value_).named_args; - for (size_t i = 0; i < named_args.size; ++i) { - if (named_args.data[i].name == name) return named_args.data[i].id; - } - return -1; - } - - auto max_size() const -> int { - unsigned long long max_packed = detail::max_packed_args; - return static_cast(is_packed() ? max_packed - : desc_ & ~detail::is_unpacked_bit); - } -}; - -/** An alias to ``basic_format_args``. */ -// A separate type would result in shorter symbols but break ABI compatibility -// between clang and gcc on ARM (#1919). -using format_args = basic_format_args; - -// We cannot use enum classes as bit fields because of a gcc bug, so we put them -// in namespaces instead (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61414). -// Additionally, if an underlying type is specified, older gcc incorrectly warns -// that the type is too small. Both bugs are fixed in gcc 9.3. -#if FMT_GCC_VERSION && FMT_GCC_VERSION < 903 -# define FMT_ENUM_UNDERLYING_TYPE(type) -#else -# define FMT_ENUM_UNDERLYING_TYPE(type) : type -#endif -namespace align { -enum type FMT_ENUM_UNDERLYING_TYPE(unsigned char){none, left, right, center, - numeric}; -} -using align_t = align::type; -namespace sign { -enum type FMT_ENUM_UNDERLYING_TYPE(unsigned char){none, minus, plus, space}; -} -using sign_t = sign::type; - -FMT_BEGIN_DETAIL_NAMESPACE - -// Workaround an array initialization issue in gcc 4.8. -template struct fill_t { - private: - enum { max_size = 4 }; - Char data_[max_size] = {Char(' '), Char(0), Char(0), Char(0)}; - unsigned char size_ = 1; - - public: - FMT_CONSTEXPR void operator=(basic_string_view s) { - auto size = s.size(); - if (size > max_size) return throw_format_error("invalid fill"); - for (size_t i = 0; i < size; ++i) data_[i] = s[i]; - size_ = static_cast(size); - } - - constexpr auto size() const -> size_t { return size_; } - constexpr auto data() const -> const Char* { return data_; } - - FMT_CONSTEXPR auto operator[](size_t index) -> Char& { return data_[index]; } - FMT_CONSTEXPR auto operator[](size_t index) const -> const Char& { - return data_[index]; - } -}; -FMT_END_DETAIL_NAMESPACE - -enum class presentation_type : unsigned char { - none, - // Integer types should go first, - dec, // 'd' - oct, // 'o' - hex_lower, // 'x' - hex_upper, // 'X' - bin_lower, // 'b' - bin_upper, // 'B' - hexfloat_lower, // 'a' - hexfloat_upper, // 'A' - exp_lower, // 'e' - exp_upper, // 'E' - fixed_lower, // 'f' - fixed_upper, // 'F' - general_lower, // 'g' - general_upper, // 'G' - chr, // 'c' - string, // 's' - pointer, // 'p' - debug // '?' -}; - -// Format specifiers for built-in and string types. -template struct basic_format_specs { - int width; - int precision; - presentation_type type; - align_t align : 4; - sign_t sign : 3; - bool alt : 1; // Alternate form ('#'). - bool localized : 1; - detail::fill_t fill; - - constexpr basic_format_specs() - : width(0), - precision(-1), - type(presentation_type::none), - align(align::none), - sign(sign::none), - alt(false), - localized(false) {} -}; - -using format_specs = basic_format_specs; - -FMT_BEGIN_DETAIL_NAMESPACE - -enum class arg_id_kind { none, index, name }; - -// An argument reference. -template struct arg_ref { - FMT_CONSTEXPR arg_ref() : kind(arg_id_kind::none), val() {} - - FMT_CONSTEXPR explicit arg_ref(int index) - : kind(arg_id_kind::index), val(index) {} - FMT_CONSTEXPR explicit arg_ref(basic_string_view name) - : kind(arg_id_kind::name), val(name) {} - - FMT_CONSTEXPR auto operator=(int idx) -> arg_ref& { - kind = arg_id_kind::index; - val.index = idx; - return *this; - } - - arg_id_kind kind; - union value { - FMT_CONSTEXPR value(int id = 0) : index{id} {} - FMT_CONSTEXPR value(basic_string_view n) : name(n) {} - - int index; - basic_string_view name; - } val; -}; - -// Format specifiers with width and precision resolved at formatting rather -// than parsing time to allow re-using the same parsed specifiers with -// different sets of arguments (precompilation of format strings). -template -struct dynamic_format_specs : basic_format_specs { - arg_ref width_ref; - arg_ref precision_ref; -}; - -struct auto_id {}; - -// A format specifier handler that sets fields in basic_format_specs. -template class specs_setter { - protected: - basic_format_specs& specs_; - - public: - explicit FMT_CONSTEXPR specs_setter(basic_format_specs& specs) - : specs_(specs) {} - - FMT_CONSTEXPR specs_setter(const specs_setter& other) - : specs_(other.specs_) {} - - FMT_CONSTEXPR void on_align(align_t align) { specs_.align = align; } - FMT_CONSTEXPR void on_fill(basic_string_view fill) { - specs_.fill = fill; - } - FMT_CONSTEXPR void on_sign(sign_t s) { specs_.sign = s; } - FMT_CONSTEXPR void on_hash() { specs_.alt = true; } - FMT_CONSTEXPR void on_localized() { specs_.localized = true; } - - FMT_CONSTEXPR void on_zero() { - if (specs_.align == align::none) specs_.align = align::numeric; - specs_.fill[0] = Char('0'); - } - - FMT_CONSTEXPR void on_width(int width) { specs_.width = width; } - FMT_CONSTEXPR void on_precision(int precision) { - specs_.precision = precision; - } - FMT_CONSTEXPR void end_precision() {} - - FMT_CONSTEXPR void on_type(presentation_type type) { specs_.type = type; } -}; - -// Format spec handler that saves references to arguments representing dynamic -// width and precision to be resolved at formatting time. -template -class dynamic_specs_handler - : public specs_setter { - public: - using char_type = typename ParseContext::char_type; - - FMT_CONSTEXPR dynamic_specs_handler(dynamic_format_specs& specs, - ParseContext& ctx) - : specs_setter(specs), specs_(specs), context_(ctx) {} - - FMT_CONSTEXPR dynamic_specs_handler(const dynamic_specs_handler& other) - : specs_setter(other), - specs_(other.specs_), - context_(other.context_) {} - - template FMT_CONSTEXPR void on_dynamic_width(Id arg_id) { - specs_.width_ref = make_arg_ref(arg_id); - } - - template FMT_CONSTEXPR void on_dynamic_precision(Id arg_id) { - specs_.precision_ref = make_arg_ref(arg_id); - } - - FMT_CONSTEXPR void on_error(const char* message) { - context_.on_error(message); - } - - private: - dynamic_format_specs& specs_; - ParseContext& context_; - - using arg_ref_type = arg_ref; - - FMT_CONSTEXPR auto make_arg_ref(int arg_id) -> arg_ref_type { - context_.check_arg_id(arg_id); - context_.check_dynamic_spec(arg_id); - return arg_ref_type(arg_id); - } - - FMT_CONSTEXPR auto make_arg_ref(auto_id) -> arg_ref_type { - int arg_id = context_.next_arg_id(); - context_.check_dynamic_spec(arg_id); - return arg_ref_type(arg_id); - } - - FMT_CONSTEXPR auto make_arg_ref(basic_string_view arg_id) - -> arg_ref_type { - context_.check_arg_id(arg_id); - basic_string_view format_str( - context_.begin(), to_unsigned(context_.end() - context_.begin())); - return arg_ref_type(arg_id); - } -}; - -template constexpr bool is_ascii_letter(Char c) { - return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); -} - -// Converts a character to ASCII. Returns a number > 127 on conversion failure. -template ::value)> -constexpr auto to_ascii(Char c) -> Char { - return c; -} -template ::value)> -constexpr auto to_ascii(Char c) -> underlying_t { - return c; -} - -FMT_CONSTEXPR inline auto code_point_length_impl(char c) -> int { - return "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0\0\0\2\2\2\2\3\3\4" - [static_cast(c) >> 3]; -} - -template -FMT_CONSTEXPR auto code_point_length(const Char* begin) -> int { - if (const_check(sizeof(Char) != 1)) return 1; - int len = code_point_length_impl(static_cast(*begin)); - - // Compute the pointer to the next character early so that the next - // iteration can start working on the next character. Neither Clang - // nor GCC figure out this reordering on their own. - return len + !len; -} - -// Return the result via the out param to workaround gcc bug 77539. -template -FMT_CONSTEXPR auto find(Ptr first, Ptr last, T value, Ptr& out) -> bool { - for (out = first; out != last; ++out) { - if (*out == value) return true; - } - return false; -} - -template <> -inline auto find(const char* first, const char* last, char value, - const char*& out) -> bool { - out = static_cast( - std::memchr(first, value, to_unsigned(last - first))); - return out != nullptr; -} - -// Parses the range [begin, end) as an unsigned integer. This function assumes -// that the range is non-empty and the first character is a digit. -template -FMT_CONSTEXPR auto parse_nonnegative_int(const Char*& begin, const Char* end, - int error_value) noexcept -> int { - FMT_ASSERT(begin != end && '0' <= *begin && *begin <= '9', ""); - unsigned value = 0, prev = 0; - auto p = begin; - do { - prev = value; - value = value * 10 + unsigned(*p - '0'); - ++p; - } while (p != end && '0' <= *p && *p <= '9'); - auto num_digits = p - begin; - begin = p; - if (num_digits <= std::numeric_limits::digits10) - return static_cast(value); - // Check for overflow. - const unsigned max = to_unsigned((std::numeric_limits::max)()); - return num_digits == std::numeric_limits::digits10 + 1 && - prev * 10ull + unsigned(p[-1] - '0') <= max - ? static_cast(value) - : error_value; -} - -// Parses fill and alignment. -template -FMT_CONSTEXPR auto parse_align(const Char* begin, const Char* end, - Handler&& handler) -> const Char* { - FMT_ASSERT(begin != end, ""); - auto align = align::none; - auto p = begin + code_point_length(begin); - if (end - p <= 0) p = begin; - for (;;) { - switch (to_ascii(*p)) { - case '<': - align = align::left; - break; - case '>': - align = align::right; - break; - case '^': - align = align::center; - break; - default: - break; - } - if (align != align::none) { - if (p != begin) { - auto c = *begin; - if (c == '{') - return handler.on_error("invalid fill character '{'"), begin; - if (c == '}') return begin; - handler.on_fill(basic_string_view(begin, to_unsigned(p - begin))); - begin = p + 1; - } else - ++begin; - handler.on_align(align); - break; - } else if (p == begin) { - break; - } - p = begin; - } - return begin; -} - -template FMT_CONSTEXPR bool is_name_start(Char c) { - return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || '_' == c; -} - -template -FMT_CONSTEXPR auto do_parse_arg_id(const Char* begin, const Char* end, - IDHandler&& handler) -> const Char* { - FMT_ASSERT(begin != end, ""); - Char c = *begin; - if (c >= '0' && c <= '9') { - int index = 0; - if (c != '0') - index = - parse_nonnegative_int(begin, end, (std::numeric_limits::max)()); - else - ++begin; - if (begin == end || (*begin != '}' && *begin != ':')) - handler.on_error("invalid format string"); - else - handler(index); - return begin; - } - if (!is_name_start(c)) { - handler.on_error("invalid format string"); - return begin; - } - auto it = begin; - do { - ++it; - } while (it != end && (is_name_start(c = *it) || ('0' <= c && c <= '9'))); - handler(basic_string_view(begin, to_unsigned(it - begin))); - return it; -} - -template -FMT_CONSTEXPR FMT_INLINE auto parse_arg_id(const Char* begin, const Char* end, - IDHandler&& handler) -> const Char* { - Char c = *begin; - if (c != '}' && c != ':') return do_parse_arg_id(begin, end, handler); - handler(); - return begin; -} - -template -FMT_CONSTEXPR auto parse_width(const Char* begin, const Char* end, - Handler&& handler) -> const Char* { - using detail::auto_id; - struct width_adapter { - Handler& handler; - - FMT_CONSTEXPR void operator()() { handler.on_dynamic_width(auto_id()); } - FMT_CONSTEXPR void operator()(int id) { handler.on_dynamic_width(id); } - FMT_CONSTEXPR void operator()(basic_string_view id) { - handler.on_dynamic_width(id); - } - FMT_CONSTEXPR void on_error(const char* message) { - if (message) handler.on_error(message); - } - }; - - FMT_ASSERT(begin != end, ""); - if ('0' <= *begin && *begin <= '9') { - int width = parse_nonnegative_int(begin, end, -1); - if (width != -1) - handler.on_width(width); - else - handler.on_error("number is too big"); - } else if (*begin == '{') { - ++begin; - if (begin != end) begin = parse_arg_id(begin, end, width_adapter{handler}); - if (begin == end || *begin != '}') - return handler.on_error("invalid format string"), begin; - ++begin; - } - return begin; -} - -template -FMT_CONSTEXPR auto parse_precision(const Char* begin, const Char* end, - Handler&& handler) -> const Char* { - using detail::auto_id; - struct precision_adapter { - Handler& handler; - - FMT_CONSTEXPR void operator()() { handler.on_dynamic_precision(auto_id()); } - FMT_CONSTEXPR void operator()(int id) { handler.on_dynamic_precision(id); } - FMT_CONSTEXPR void operator()(basic_string_view id) { - handler.on_dynamic_precision(id); - } - FMT_CONSTEXPR void on_error(const char* message) { - if (message) handler.on_error(message); - } - }; - - ++begin; - auto c = begin != end ? *begin : Char(); - if ('0' <= c && c <= '9') { - auto precision = parse_nonnegative_int(begin, end, -1); - if (precision != -1) - handler.on_precision(precision); - else - handler.on_error("number is too big"); - } else if (c == '{') { - ++begin; - if (begin != end) - begin = parse_arg_id(begin, end, precision_adapter{handler}); - if (begin == end || *begin++ != '}') - return handler.on_error("invalid format string"), begin; - } else { - return handler.on_error("missing precision specifier"), begin; - } - handler.end_precision(); - return begin; -} - -template -FMT_CONSTEXPR auto parse_presentation_type(Char type) -> presentation_type { - switch (to_ascii(type)) { - case 'd': - return presentation_type::dec; - case 'o': - return presentation_type::oct; - case 'x': - return presentation_type::hex_lower; - case 'X': - return presentation_type::hex_upper; - case 'b': - return presentation_type::bin_lower; - case 'B': - return presentation_type::bin_upper; - case 'a': - return presentation_type::hexfloat_lower; - case 'A': - return presentation_type::hexfloat_upper; - case 'e': - return presentation_type::exp_lower; - case 'E': - return presentation_type::exp_upper; - case 'f': - return presentation_type::fixed_lower; - case 'F': - return presentation_type::fixed_upper; - case 'g': - return presentation_type::general_lower; - case 'G': - return presentation_type::general_upper; - case 'c': - return presentation_type::chr; - case 's': - return presentation_type::string; - case 'p': - return presentation_type::pointer; - case '?': - return presentation_type::debug; - default: - return presentation_type::none; - } -} - -// Parses standard format specifiers and sends notifications about parsed -// components to handler. -template -FMT_CONSTEXPR FMT_INLINE auto parse_format_specs(const Char* begin, - const Char* end, - SpecHandler&& handler) - -> const Char* { - if (1 < end - begin && begin[1] == '}' && is_ascii_letter(*begin) && - *begin != 'L') { - presentation_type type = parse_presentation_type(*begin++); - if (type == presentation_type::none) - handler.on_error("invalid type specifier"); - handler.on_type(type); - return begin; - } - - if (begin == end) return begin; - - begin = parse_align(begin, end, handler); - if (begin == end) return begin; - - // Parse sign. - switch (to_ascii(*begin)) { - case '+': - handler.on_sign(sign::plus); - ++begin; - break; - case '-': - handler.on_sign(sign::minus); - ++begin; - break; - case ' ': - handler.on_sign(sign::space); - ++begin; - break; - default: - break; - } - if (begin == end) return begin; - - if (*begin == '#') { - handler.on_hash(); - if (++begin == end) return begin; - } - - // Parse zero flag. - if (*begin == '0') { - handler.on_zero(); - if (++begin == end) return begin; - } - - begin = parse_width(begin, end, handler); - if (begin == end) return begin; - - // Parse precision. - if (*begin == '.') { - begin = parse_precision(begin, end, handler); - if (begin == end) return begin; - } - - if (*begin == 'L') { - handler.on_localized(); - ++begin; - } - - // Parse type. - if (begin != end && *begin != '}') { - presentation_type type = parse_presentation_type(*begin++); - if (type == presentation_type::none) - handler.on_error("invalid type specifier"); - handler.on_type(type); - } - return begin; -} - -template -FMT_CONSTEXPR auto parse_replacement_field(const Char* begin, const Char* end, - Handler&& handler) -> const Char* { - struct id_adapter { - Handler& handler; - int arg_id; - - FMT_CONSTEXPR void operator()() { arg_id = handler.on_arg_id(); } - FMT_CONSTEXPR void operator()(int id) { arg_id = handler.on_arg_id(id); } - FMT_CONSTEXPR void operator()(basic_string_view id) { - arg_id = handler.on_arg_id(id); - } - FMT_CONSTEXPR void on_error(const char* message) { - if (message) handler.on_error(message); - } - }; - - ++begin; - if (begin == end) return handler.on_error("invalid format string"), end; - if (*begin == '}') { - handler.on_replacement_field(handler.on_arg_id(), begin); - } else if (*begin == '{') { - handler.on_text(begin, begin + 1); - } else { - auto adapter = id_adapter{handler, 0}; - begin = parse_arg_id(begin, end, adapter); - Char c = begin != end ? *begin : Char(); - if (c == '}') { - handler.on_replacement_field(adapter.arg_id, begin); - } else if (c == ':') { - begin = handler.on_format_specs(adapter.arg_id, begin + 1, end); - if (begin == end || *begin != '}') - return handler.on_error("unknown format specifier"), end; - } else { - return handler.on_error("missing '}' in format string"), end; - } - } - return begin + 1; -} - -template -FMT_CONSTEXPR FMT_INLINE void parse_format_string( - basic_string_view format_str, Handler&& handler) { - // Workaround a name-lookup bug in MSVC's modules implementation. - using detail::find; - - auto begin = format_str.data(); - auto end = begin + format_str.size(); - if (end - begin < 32) { - // Use a simple loop instead of memchr for small strings. - const Char* p = begin; - while (p != end) { - auto c = *p++; - if (c == '{') { - handler.on_text(begin, p - 1); - begin = p = parse_replacement_field(p - 1, end, handler); - } else if (c == '}') { - if (p == end || *p != '}') - return handler.on_error("unmatched '}' in format string"); - handler.on_text(begin, p); - begin = ++p; - } - } - handler.on_text(begin, end); - return; - } - struct writer { - FMT_CONSTEXPR void operator()(const Char* from, const Char* to) { - if (from == to) return; - for (;;) { - const Char* p = nullptr; - if (!find(from, to, Char('}'), p)) - return handler_.on_text(from, to); - ++p; - if (p == to || *p != '}') - return handler_.on_error("unmatched '}' in format string"); - handler_.on_text(from, p); - from = p + 1; - } - } - Handler& handler_; - } write = {handler}; - while (begin != end) { - // Doing two passes with memchr (one for '{' and another for '}') is up to - // 2.5x faster than the naive one-pass implementation on big format strings. - const Char* p = begin; - if (*begin != '{' && !find(begin + 1, end, Char('{'), p)) - return write(begin, end); - write(begin, p); - begin = parse_replacement_field(p, end, handler); - } -} - -template ::value> struct strip_named_arg { - using type = T; -}; -template struct strip_named_arg { - using type = remove_cvref_t; -}; - -template -FMT_CONSTEXPR auto parse_format_specs(ParseContext& ctx) - -> decltype(ctx.begin()) { - using char_type = typename ParseContext::char_type; - using context = buffer_context; - using stripped_type = typename strip_named_arg::type; - using mapped_type = conditional_t< - mapped_type_constant::value != type::custom_type, - decltype(arg_mapper().map(std::declval())), - stripped_type>; - auto f = conditional_t::value, - formatter, - fallback_formatter>(); - return f.parse(ctx); -} - -template -FMT_CONSTEXPR void check_int_type_spec(presentation_type type, - ErrorHandler&& eh) { - if (type > presentation_type::bin_upper && type != presentation_type::chr) - eh.on_error("invalid type specifier"); -} - -// Checks char specs and returns true if the type spec is char (and not int). -template -FMT_CONSTEXPR auto check_char_specs(const basic_format_specs& specs, - ErrorHandler&& eh = {}) -> bool { - if (specs.type != presentation_type::none && - specs.type != presentation_type::chr && - specs.type != presentation_type::debug) { - check_int_type_spec(specs.type, eh); - return false; - } - if (specs.align == align::numeric || specs.sign != sign::none || specs.alt) - eh.on_error("invalid format specifier for char"); - return true; -} - -// A floating-point presentation format. -enum class float_format : unsigned char { - general, // General: exponent notation or fixed point based on magnitude. - exp, // Exponent notation with the default precision of 6, e.g. 1.2e-3. - fixed, // Fixed point with the default precision of 6, e.g. 0.0012. - hex -}; - -struct float_specs { - int precision; - float_format format : 8; - sign_t sign : 8; - bool upper : 1; - bool locale : 1; - bool binary32 : 1; - bool showpoint : 1; -}; - -template -FMT_CONSTEXPR auto parse_float_type_spec(const basic_format_specs& specs, - ErrorHandler&& eh = {}) - -> float_specs { - auto result = float_specs(); - result.showpoint = specs.alt; - result.locale = specs.localized; - switch (specs.type) { - case presentation_type::none: - result.format = float_format::general; - break; - case presentation_type::general_upper: - result.upper = true; - FMT_FALLTHROUGH; - case presentation_type::general_lower: - result.format = float_format::general; - break; - case presentation_type::exp_upper: - result.upper = true; - FMT_FALLTHROUGH; - case presentation_type::exp_lower: - result.format = float_format::exp; - result.showpoint |= specs.precision != 0; - break; - case presentation_type::fixed_upper: - result.upper = true; - FMT_FALLTHROUGH; - case presentation_type::fixed_lower: - result.format = float_format::fixed; - result.showpoint |= specs.precision != 0; - break; - case presentation_type::hexfloat_upper: - result.upper = true; - FMT_FALLTHROUGH; - case presentation_type::hexfloat_lower: - result.format = float_format::hex; - break; - default: - eh.on_error("invalid type specifier"); - break; - } - return result; -} - -template -FMT_CONSTEXPR auto check_cstring_type_spec(presentation_type type, - ErrorHandler&& eh = {}) -> bool { - if (type == presentation_type::none || type == presentation_type::string || - type == presentation_type::debug) - return true; - if (type != presentation_type::pointer) eh.on_error("invalid type specifier"); - return false; -} - -template -FMT_CONSTEXPR void check_string_type_spec(presentation_type type, - ErrorHandler&& eh = {}) { - if (type != presentation_type::none && type != presentation_type::string && - type != presentation_type::debug) - eh.on_error("invalid type specifier"); -} - -template -FMT_CONSTEXPR void check_pointer_type_spec(presentation_type type, - ErrorHandler&& eh) { - if (type != presentation_type::none && type != presentation_type::pointer) - eh.on_error("invalid type specifier"); -} - -// A parse_format_specs handler that checks if specifiers are consistent with -// the argument type. -template class specs_checker : public Handler { - private: - detail::type arg_type_; - - FMT_CONSTEXPR void require_numeric_argument() { - if (!is_arithmetic_type(arg_type_)) - this->on_error("format specifier requires numeric argument"); - } - - public: - FMT_CONSTEXPR specs_checker(const Handler& handler, detail::type arg_type) - : Handler(handler), arg_type_(arg_type) {} - - FMT_CONSTEXPR void on_align(align_t align) { - if (align == align::numeric) require_numeric_argument(); - Handler::on_align(align); - } - - FMT_CONSTEXPR void on_sign(sign_t s) { - require_numeric_argument(); - if (is_integral_type(arg_type_) && arg_type_ != type::int_type && - arg_type_ != type::long_long_type && arg_type_ != type::int128_type && - arg_type_ != type::char_type) { - this->on_error("format specifier requires signed argument"); - } - Handler::on_sign(s); - } - - FMT_CONSTEXPR void on_hash() { - require_numeric_argument(); - Handler::on_hash(); - } - - FMT_CONSTEXPR void on_localized() { - require_numeric_argument(); - Handler::on_localized(); - } - - FMT_CONSTEXPR void on_zero() { - require_numeric_argument(); - Handler::on_zero(); - } - - FMT_CONSTEXPR void end_precision() { - if (is_integral_type(arg_type_) || arg_type_ == type::pointer_type) - this->on_error("precision not allowed for this argument type"); - } -}; - -constexpr int invalid_arg_index = -1; - -#if FMT_USE_NONTYPE_TEMPLATE_ARGS -template -constexpr auto get_arg_index_by_name(basic_string_view name) -> int { - if constexpr (detail::is_statically_named_arg()) { - if (name == T::name) return N; - } - if constexpr (sizeof...(Args) > 0) - return get_arg_index_by_name(name); - (void)name; // Workaround an MSVC bug about "unused" parameter. - return invalid_arg_index; -} -#endif - -template -FMT_CONSTEXPR auto get_arg_index_by_name(basic_string_view name) -> int { -#if FMT_USE_NONTYPE_TEMPLATE_ARGS - if constexpr (sizeof...(Args) > 0) - return get_arg_index_by_name<0, Args...>(name); -#endif - (void)name; - return invalid_arg_index; -} - -template -class format_string_checker { - private: - // In the future basic_format_parse_context will replace compile_parse_context - // here and will use is_constant_evaluated and downcasting to access the data - // needed for compile-time checks: https://godbolt.org/z/GvWzcTjh1. - using parse_context_type = compile_parse_context; - static constexpr int num_args = sizeof...(Args); - - // Format specifier parsing function. - using parse_func = const Char* (*)(parse_context_type&); - - parse_context_type context_; - parse_func parse_funcs_[num_args > 0 ? static_cast(num_args) : 1]; - type types_[num_args > 0 ? static_cast(num_args) : 1]; - - public: - explicit FMT_CONSTEXPR format_string_checker( - basic_string_view format_str, ErrorHandler eh) - : context_(format_str, num_args, types_, eh), - parse_funcs_{&parse_format_specs...}, - types_{ - mapped_type_constant>::value...} { - } - - FMT_CONSTEXPR void on_text(const Char*, const Char*) {} - - FMT_CONSTEXPR auto on_arg_id() -> int { return context_.next_arg_id(); } - FMT_CONSTEXPR auto on_arg_id(int id) -> int { - return context_.check_arg_id(id), id; - } - FMT_CONSTEXPR auto on_arg_id(basic_string_view id) -> int { -#if FMT_USE_NONTYPE_TEMPLATE_ARGS - auto index = get_arg_index_by_name(id); - if (index == invalid_arg_index) on_error("named argument is not found"); - return context_.check_arg_id(index), index; -#else - (void)id; - on_error("compile-time checks for named arguments require C++20 support"); - return 0; -#endif - } - - FMT_CONSTEXPR void on_replacement_field(int, const Char*) {} - - FMT_CONSTEXPR auto on_format_specs(int id, const Char* begin, const Char*) - -> const Char* { - context_.advance_to(context_.begin() + (begin - &*context_.begin())); - // id >= 0 check is a workaround for gcc 10 bug (#2065). - return id >= 0 && id < num_args ? parse_funcs_[id](context_) : begin; - } - - FMT_CONSTEXPR void on_error(const char* message) { - context_.on_error(message); - } -}; - -// Reports a compile-time error if S is not a valid format string. -template ::value)> -FMT_INLINE void check_format_string(const S&) { -#ifdef FMT_ENFORCE_COMPILE_STRING - static_assert(is_compile_string::value, - "FMT_ENFORCE_COMPILE_STRING requires all format strings to use " - "FMT_STRING."); -#endif -} -template ::value)> -void check_format_string(S format_str) { - FMT_CONSTEXPR auto s = basic_string_view(format_str); - using checker = format_string_checker...>; - FMT_CONSTEXPR bool invalid_format = - (parse_format_string(s, checker(s, {})), true); - ignore_unused(invalid_format); -} - -// Don't use type_identity for args to simplify symbols. -template -void vformat_to(buffer& buf, basic_string_view fmt, - basic_format_args args, - locale_ref loc = {}); - -FMT_API void vprint_mojibake(std::FILE*, string_view, format_args); -#ifndef _WIN32 -inline void vprint_mojibake(std::FILE*, string_view, format_args) {} -#endif -FMT_END_DETAIL_NAMESPACE - -// A formatter specialization for the core types corresponding to detail::type -// constants. -template -struct formatter::value != - detail::type::custom_type>> { - private: - detail::dynamic_format_specs specs_; - - public: - // Parses format specifiers stopping either at the end of the range or at the - // terminating '}'. - template - FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) { - auto begin = ctx.begin(), end = ctx.end(); - if (begin == end) return begin; - using handler_type = detail::dynamic_specs_handler; - auto type = detail::type_constant::value; - auto checker = - detail::specs_checker(handler_type(specs_, ctx), type); - auto it = detail::parse_format_specs(begin, end, checker); - auto eh = ctx.error_handler(); - switch (type) { - case detail::type::none_type: - FMT_ASSERT(false, "invalid argument type"); - break; - case detail::type::bool_type: - if (specs_.type == presentation_type::none || - specs_.type == presentation_type::string) { - break; - } - FMT_FALLTHROUGH; - case detail::type::int_type: - case detail::type::uint_type: - case detail::type::long_long_type: - case detail::type::ulong_long_type: - case detail::type::int128_type: - case detail::type::uint128_type: - detail::check_int_type_spec(specs_.type, eh); - break; - case detail::type::char_type: - detail::check_char_specs(specs_, eh); - break; - case detail::type::float_type: - if (detail::const_check(FMT_USE_FLOAT)) - detail::parse_float_type_spec(specs_, eh); - else - FMT_ASSERT(false, "float support disabled"); - break; - case detail::type::double_type: - if (detail::const_check(FMT_USE_DOUBLE)) - detail::parse_float_type_spec(specs_, eh); - else - FMT_ASSERT(false, "double support disabled"); - break; - case detail::type::long_double_type: - if (detail::const_check(FMT_USE_LONG_DOUBLE)) - detail::parse_float_type_spec(specs_, eh); - else - FMT_ASSERT(false, "long double support disabled"); - break; - case detail::type::cstring_type: - detail::check_cstring_type_spec(specs_.type, eh); - break; - case detail::type::string_type: - detail::check_string_type_spec(specs_.type, eh); - break; - case detail::type::pointer_type: - detail::check_pointer_type_spec(specs_.type, eh); - break; - case detail::type::custom_type: - // Custom format specifiers are checked in parse functions of - // formatter specializations. - break; - } - return it; - } - - template ::value, - enable_if_t<(U == detail::type::string_type || - U == detail::type::cstring_type || - U == detail::type::char_type), - int> = 0> - FMT_CONSTEXPR void set_debug_format() { - specs_.type = presentation_type::debug; - } - - template - FMT_CONSTEXPR auto format(const T& val, FormatContext& ctx) const - -> decltype(ctx.out()); -}; - -#define FMT_FORMAT_AS(Type, Base) \ - template \ - struct formatter : formatter { \ - template \ - auto format(Type const& val, FormatContext& ctx) const \ - -> decltype(ctx.out()) { \ - return formatter::format(static_cast(val), ctx); \ - } \ - } - -FMT_FORMAT_AS(signed char, int); -FMT_FORMAT_AS(unsigned char, unsigned); -FMT_FORMAT_AS(short, int); -FMT_FORMAT_AS(unsigned short, unsigned); -FMT_FORMAT_AS(long, long long); -FMT_FORMAT_AS(unsigned long, unsigned long long); -FMT_FORMAT_AS(Char*, const Char*); -FMT_FORMAT_AS(std::basic_string, basic_string_view); -FMT_FORMAT_AS(std::nullptr_t, const void*); -FMT_FORMAT_AS(detail::std_string_view, basic_string_view); - -template struct basic_runtime { basic_string_view str; }; - -/** A compile-time format string. */ -template class basic_format_string { - private: - basic_string_view str_; - - public: - template >::value)> - FMT_CONSTEVAL FMT_INLINE basic_format_string(const S& s) : str_(s) { - static_assert( - detail::count< - (std::is_base_of>::value && - std::is_reference::value)...>() == 0, - "passing views as lvalues is disallowed"); -#ifdef FMT_HAS_CONSTEVAL - if constexpr (detail::count_named_args() == - detail::count_statically_named_args()) { - using checker = detail::format_string_checker...>; - detail::parse_format_string(str_, checker(s, {})); - } -#else - detail::check_format_string(s); -#endif - } - basic_format_string(basic_runtime r) : str_(r.str) {} - - FMT_INLINE operator basic_string_view() const { return str_; } - FMT_INLINE basic_string_view get() const { return str_; } -}; - -#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 -// Workaround broken conversion on older gcc. -template using format_string = string_view; -inline auto runtime(string_view s) -> string_view { return s; } -#else -template -using format_string = basic_format_string...>; -/** - \rst - Creates a runtime format string. - - **Example**:: - - // Check format string at runtime instead of compile-time. - fmt::print(fmt::runtime("{:d}"), "I am not a number"); - \endrst - */ -inline auto runtime(string_view s) -> basic_runtime { return {{s}}; } -#endif - -FMT_API auto vformat(string_view fmt, format_args args) -> std::string; - -/** - \rst - Formats ``args`` according to specifications in ``fmt`` and returns the result - as a string. - - **Example**:: - - #include - std::string message = fmt::format("The answer is {}.", 42); - \endrst -*/ -template -FMT_NODISCARD FMT_INLINE auto format(format_string fmt, T&&... args) - -> std::string { - return vformat(fmt, fmt::make_format_args(args...)); -} - -/** Formats a string and writes the output to ``out``. */ -template ::value)> -auto vformat_to(OutputIt out, string_view fmt, format_args args) -> OutputIt { - auto&& buf = detail::get_buffer(out); - detail::vformat_to(buf, fmt, args, {}); - return detail::get_iterator(buf, out); -} - -/** - \rst - Formats ``args`` according to specifications in ``fmt``, writes the result to - the output iterator ``out`` and returns the iterator past the end of the output - range. `format_to` does not append a terminating null character. - - **Example**:: - - auto out = std::vector(); - fmt::format_to(std::back_inserter(out), "{}", 42); - \endrst - */ -template ::value)> -FMT_INLINE auto format_to(OutputIt out, format_string fmt, T&&... args) - -> OutputIt { - return vformat_to(out, fmt, fmt::make_format_args(args...)); -} - -template struct format_to_n_result { - /** Iterator past the end of the output range. */ - OutputIt out; - /** Total (not truncated) output size. */ - size_t size; -}; - -template ::value)> -auto vformat_to_n(OutputIt out, size_t n, string_view fmt, format_args args) - -> format_to_n_result { - using traits = detail::fixed_buffer_traits; - auto buf = detail::iterator_buffer(out, n); - detail::vformat_to(buf, fmt, args, {}); - return {buf.out(), buf.count()}; -} - -/** - \rst - Formats ``args`` according to specifications in ``fmt``, writes up to ``n`` - characters of the result to the output iterator ``out`` and returns the total - (not truncated) output size and the iterator past the end of the output range. - `format_to_n` does not append a terminating null character. - \endrst - */ -template ::value)> -FMT_INLINE auto format_to_n(OutputIt out, size_t n, format_string fmt, - T&&... args) -> format_to_n_result { - return vformat_to_n(out, n, fmt, fmt::make_format_args(args...)); -} - -/** Returns the number of chars in the output of ``format(fmt, args...)``. */ -template -FMT_NODISCARD FMT_INLINE auto formatted_size(format_string fmt, - T&&... args) -> size_t { - auto buf = detail::counting_buffer<>(); - detail::vformat_to(buf, string_view(fmt), - format_args(fmt::make_format_args(args...)), {}); - return buf.count(); -} - -FMT_API void vprint(string_view fmt, format_args args); -FMT_API void vprint(std::FILE* f, string_view fmt, format_args args); - -/** - \rst - Formats ``args`` according to specifications in ``fmt`` and writes the output - to ``stdout``. - - **Example**:: - - fmt::print("Elapsed time: {0:.2f} seconds", 1.23); - \endrst - */ -template -FMT_INLINE void print(format_string fmt, T&&... args) { - const auto& vargs = fmt::make_format_args(args...); - return detail::is_utf8() ? vprint(fmt, vargs) - : detail::vprint_mojibake(stdout, fmt, vargs); -} - -/** - \rst - Formats ``args`` according to specifications in ``fmt`` and writes the - output to the file ``f``. - - **Example**:: - - fmt::print(stderr, "Don't {}!", "panic"); - \endrst - */ -template -FMT_INLINE void print(std::FILE* f, format_string fmt, T&&... args) { - const auto& vargs = fmt::make_format_args(args...); - return detail::is_utf8() ? vprint(f, fmt, vargs) - : detail::vprint_mojibake(f, fmt, vargs); -} - -FMT_MODULE_EXPORT_END -FMT_GCC_PRAGMA("GCC pop_options") -FMT_END_NAMESPACE - -#ifdef FMT_HEADER_ONLY -# include "format.h" -#endif -#endif // FMT_CORE_H_ +#include "format.h" diff --git a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/format-inl.h b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/format-inl.h index 9ac55e47f22f..a887483b6f46 100644 --- a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/format-inl.h +++ b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/format-inl.h @@ -8,21 +8,19 @@ #ifndef FMT_FORMAT_INL_H_ #define FMT_FORMAT_INL_H_ -#include -#include -#include // errno -#include -#include -#include -#include // std::memmove -#include -#include - -#ifndef FMT_STATIC_THOUSANDS_SEPARATOR -# include +#ifndef FMT_MODULE +# include +# include // errno +# include +# include +# include + +# if !defined(FMT_STATIC_THOUSANDS_SEPARATOR) +# include +# endif #endif -#ifdef _WIN32 +#if defined(_WIN32) && !defined(FMT_USE_WRITE_CONSOLE) # include // _isatty #endif @@ -40,10 +38,6 @@ FMT_FUNC void assert_fail(const char* file, int line, const char* message) { std::terminate(); } -FMT_FUNC void throw_format_error(const char* message) { - FMT_THROW(format_error(message)); -} - FMT_FUNC void format_error_code(detail::buffer& out, int error_code, string_view message) noexcept { // Report error code making sure that the output fits into @@ -60,10 +54,10 @@ FMT_FUNC void format_error_code(detail::buffer& out, int error_code, ++error_code_size; } error_code_size += detail::to_unsigned(detail::count_digits(abs_value)); - auto it = buffer_appender(out); + auto it = appender(out); if (message.size() <= inline_buffer_size - error_code_size) - format_to(it, FMT_STRING("{}{}"), message, SEP); - format_to(it, FMT_STRING("{}{}"), ERROR_STR, error_code); + fmt::format_to(it, FMT_STRING("{}{}"), message, SEP); + fmt::format_to(it, FMT_STRING("{}{}"), ERROR_STR, error_code); FMT_ASSERT(out.size() <= inline_buffer_size, ""); } @@ -77,13 +71,10 @@ FMT_FUNC void report_error(format_func func, int error_code, } // A wrapper around fwrite that throws on error. -inline void fwrite_fully(const void* ptr, size_t size, size_t count, - FILE* stream) { - size_t written = std::fwrite(ptr, size, count, stream); -#if !__NVCC__ +inline void fwrite_fully(const void* ptr, size_t count, FILE* stream) { + size_t written = std::fwrite(ptr, 1, count, stream); if (written < count) FMT_THROW(system_error(errno, FMT_STRING("cannot write to file"))); -#endif } #ifndef FMT_STATIC_THOUSANDS_SEPARATOR @@ -92,7 +83,7 @@ locale_ref::locale_ref(const Locale& loc) : locale_(&loc) { static_assert(std::is_same::value, ""); } -template Locale locale_ref::get() const { +template auto locale_ref::get() const -> Locale { static_assert(std::is_same::value, ""); return locale_ ? *static_cast(locale_) : std::locale(); } @@ -104,7 +95,8 @@ FMT_FUNC auto thousands_sep_impl(locale_ref loc) -> thousands_sep_result { auto thousands_sep = grouping.empty() ? Char() : facet.thousands_sep(); return {std::move(grouping), thousands_sep}; } -template FMT_FUNC Char decimal_point_impl(locale_ref loc) { +template +FMT_FUNC auto decimal_point_impl(locale_ref loc) -> Char { return std::use_facet>(loc.get()) .decimal_point(); } @@ -120,7 +112,11 @@ template FMT_FUNC Char decimal_point_impl(locale_ref) { FMT_FUNC auto write_loc(appender out, loc_value value, const format_specs& specs, locale_ref loc) -> bool { -#ifndef FMT_STATIC_THOUSANDS_SEPARATOR +#ifdef FMT_STATIC_THOUSANDS_SEPARATOR + value.visit(loc_writer<>{ + out, specs, std::string(1, FMT_STATIC_THOUSANDS_SEPARATOR), "\3", "."}); + return true; +#else auto locale = loc.get(); // We cannot use the num_put facet because it may produce output in // a wrong encoding. @@ -129,10 +125,13 @@ FMT_FUNC auto write_loc(appender out, loc_value value, return std::use_facet(locale).put(out, value, specs); return facet(locale).put(out, value, specs); #endif - return false; } } // namespace detail +FMT_FUNC void report_error(const char* message) { + FMT_THROW(format_error(message)); +} + template typename Locale::id format_facet::id; #ifndef FMT_STATIC_THOUSANDS_SEPARATOR @@ -150,96 +149,41 @@ FMT_API FMT_FUNC auto format_facet::do_put( } #endif -#if !FMT_MSC_VERSION -FMT_API FMT_FUNC format_error::~format_error() noexcept = default; -#endif - -#if !__NVCC__ -FMT_FUNC std::system_error vsystem_error(int error_code, string_view format_str, - format_args args) { +FMT_FUNC auto vsystem_error(int error_code, string_view fmt, format_args args) + -> std::system_error { auto ec = std::error_code(error_code, std::generic_category()); - return std::system_error(ec, vformat(format_str, args)); + return std::system_error(ec, vformat(fmt, args)); } -#endif namespace detail { -template inline bool operator==(basic_fp x, basic_fp y) { +template +inline auto operator==(basic_fp x, basic_fp y) -> bool { return x.f == y.f && x.e == y.e; } // Compilers should be able to optimize this into the ror instruction. -FMT_CONSTEXPR inline uint32_t rotr(uint32_t n, uint32_t r) noexcept { +FMT_CONSTEXPR inline auto rotr(uint32_t n, uint32_t r) noexcept -> uint32_t { r &= 31; return (n >> r) | (n << (32 - r)); } -FMT_CONSTEXPR inline uint64_t rotr(uint64_t n, uint32_t r) noexcept { +FMT_CONSTEXPR inline auto rotr(uint64_t n, uint32_t r) noexcept -> uint64_t { r &= 63; return (n >> r) | (n << (64 - r)); } -// Computes 128-bit result of multiplication of two 64-bit unsigned integers. -inline uint128_fallback umul128(uint64_t x, uint64_t y) noexcept { -#if FMT_USE_INT128 - auto p = static_cast(x) * static_cast(y); - return {static_cast(p >> 64), static_cast(p)}; -#elif defined(_MSC_VER) && defined(_M_X64) - auto result = uint128_fallback(); - result.lo_ = _umul128(x, y, &result.hi_); - return result; -#else - const uint64_t mask = static_cast(max_value()); - - uint64_t a = x >> 32; - uint64_t b = x & mask; - uint64_t c = y >> 32; - uint64_t d = y & mask; - - uint64_t ac = a * c; - uint64_t bc = b * c; - uint64_t ad = a * d; - uint64_t bd = b * d; - - uint64_t intermediate = (bd >> 32) + (ad & mask) + (bc & mask); - - return {ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32), - (intermediate << 32) + (bd & mask)}; -#endif -} - // Implementation of Dragonbox algorithm: https://github.com/jk-jeon/dragonbox. namespace dragonbox { -// Computes upper 64 bits of multiplication of two 64-bit unsigned integers. -inline uint64_t umul128_upper64(uint64_t x, uint64_t y) noexcept { -#if FMT_USE_INT128 - auto p = static_cast(x) * static_cast(y); - return static_cast(p >> 64); -#elif defined(_MSC_VER) && defined(_M_X64) - return __umulh(x, y); -#else - return umul128(x, y).high(); -#endif -} - -// Computes upper 128 bits of multiplication of a 64-bit unsigned integer and a -// 128-bit unsigned integer. -inline uint128_fallback umul192_upper128(uint64_t x, - uint128_fallback y) noexcept { - uint128_fallback r = umul128(x, y.high()); - r += umul128_upper64(x, y.low()); - return r; -} - // Computes upper 64 bits of multiplication of a 32-bit unsigned integer and a // 64-bit unsigned integer. -inline uint64_t umul96_upper64(uint32_t x, uint64_t y) noexcept { +inline auto umul96_upper64(uint32_t x, uint64_t y) noexcept -> uint64_t { return umul128_upper64(static_cast(x) << 32, y); } // Computes lower 128 bits of multiplication of a 64-bit unsigned integer and a // 128-bit unsigned integer. -inline uint128_fallback umul192_lower128(uint64_t x, - uint128_fallback y) noexcept { +inline auto umul192_lower128(uint64_t x, uint128_fallback y) noexcept + -> uint128_fallback { uint64_t high = x * y.high(); uint128_fallback high_low = umul128(x, y.low()); return {high + high_low.high(), high_low.low()}; @@ -247,29 +191,17 @@ inline uint128_fallback umul192_lower128(uint64_t x, // Computes lower 64 bits of multiplication of a 32-bit unsigned integer and a // 64-bit unsigned integer. -inline uint64_t umul96_lower64(uint32_t x, uint64_t y) noexcept { +inline auto umul96_lower64(uint32_t x, uint64_t y) noexcept -> uint64_t { return x * y; } -// Computes floor(log10(pow(2, e))) for e in [-2620, 2620] using the method from -// https://fmt.dev/papers/Dragonbox.pdf#page=28, section 6.1. -inline int floor_log10_pow2(int e) noexcept { - FMT_ASSERT(e <= 2620 && e >= -2620, "too large exponent"); - static_assert((-1 >> 1) == -1, "right shift is not arithmetic"); - return (e * 315653) >> 20; -} - // Various fast log computations. -inline int floor_log2_pow10(int e) noexcept { - FMT_ASSERT(e <= 1233 && e >= -1233, "too large exponent"); - return (e * 1741647) >> 19; -} -inline int floor_log10_pow2_minus_log10_4_over_3(int e) noexcept { +inline auto floor_log10_pow2_minus_log10_4_over_3(int e) noexcept -> int { FMT_ASSERT(e <= 2936 && e >= -2985, "too large exponent"); return (e * 631305 - 261663) >> 21; } -static constexpr struct { +FMT_INLINE_VARIABLE constexpr struct { uint32_t divisor; int shift_amount; } div_small_pow10_infos[] = {{10, 16}, {100, 16}}; @@ -278,7 +210,7 @@ static constexpr struct { // divisible by pow(10, N). // Precondition: n <= pow(10, N + 1). template -bool check_divisibility_and_divide_by_pow10(uint32_t& n) noexcept { +auto check_divisibility_and_divide_by_pow10(uint32_t& n) noexcept -> bool { // The numbers below are chosen such that: // 1. floor(n/d) = floor(nm / 2^k) where d=10 or d=100, // 2. nm mod 2^k < m if and only if n is divisible by d, @@ -303,7 +235,7 @@ bool check_divisibility_and_divide_by_pow10(uint32_t& n) noexcept { // Computes floor(n / pow(10, N)) for small n and N. // Precondition: n <= pow(10, N + 1). -template uint32_t small_division_by_pow10(uint32_t n) noexcept { +template auto small_division_by_pow10(uint32_t n) noexcept -> uint32_t { constexpr auto info = div_small_pow10_infos[N - 1]; FMT_ASSERT(n <= info.divisor * 10, "n is too large"); constexpr uint32_t magic_number = @@ -312,24 +244,24 @@ template uint32_t small_division_by_pow10(uint32_t n) noexcept { } // Computes floor(n / 10^(kappa + 1)) (float) -inline uint32_t divide_by_10_to_kappa_plus_1(uint32_t n) noexcept { +inline auto divide_by_10_to_kappa_plus_1(uint32_t n) noexcept -> uint32_t { // 1374389535 = ceil(2^37/100) return static_cast((static_cast(n) * 1374389535) >> 37); } // Computes floor(n / 10^(kappa + 1)) (double) -inline uint64_t divide_by_10_to_kappa_plus_1(uint64_t n) noexcept { +inline auto divide_by_10_to_kappa_plus_1(uint64_t n) noexcept -> uint64_t { // 2361183241434822607 = ceil(2^(64+7)/1000) return umul128_upper64(n, 2361183241434822607ull) >> 7; } // Various subroutines using pow10 cache -template struct cache_accessor; +template struct cache_accessor; template <> struct cache_accessor { using carrier_uint = float_info::carrier_uint; using cache_entry_type = uint64_t; - static uint64_t get_cached_power(int k) noexcept { + static auto get_cached_power(int k) noexcept -> uint64_t { FMT_ASSERT(k >= float_info::min_k && k <= float_info::max_k, "k is out of range"); static constexpr const uint64_t pow10_significands[] = { @@ -371,20 +303,23 @@ template <> struct cache_accessor { bool is_integer; }; - static compute_mul_result compute_mul( - carrier_uint u, const cache_entry_type& cache) noexcept { + static auto compute_mul(carrier_uint u, + const cache_entry_type& cache) noexcept + -> compute_mul_result { auto r = umul96_upper64(u, cache); return {static_cast(r >> 32), static_cast(r) == 0}; } - static uint32_t compute_delta(const cache_entry_type& cache, - int beta) noexcept { + static auto compute_delta(const cache_entry_type& cache, int beta) noexcept + -> uint32_t { return static_cast(cache >> (64 - 1 - beta)); } - static compute_mul_parity_result compute_mul_parity( - carrier_uint two_f, const cache_entry_type& cache, int beta) noexcept { + static auto compute_mul_parity(carrier_uint two_f, + const cache_entry_type& cache, + int beta) noexcept + -> compute_mul_parity_result { FMT_ASSERT(beta >= 1, ""); FMT_ASSERT(beta < 64, ""); @@ -393,22 +328,22 @@ template <> struct cache_accessor { static_cast(r >> (32 - beta)) == 0}; } - static carrier_uint compute_left_endpoint_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_left_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return static_cast( (cache - (cache >> (num_significand_bits() + 2))) >> (64 - num_significand_bits() - 1 - beta)); } - static carrier_uint compute_right_endpoint_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_right_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return static_cast( (cache + (cache >> (num_significand_bits() + 1))) >> (64 - num_significand_bits() - 1 - beta)); } - static carrier_uint compute_round_up_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_round_up_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return (static_cast( cache >> (64 - num_significand_bits() - 2 - beta)) + 1) / @@ -420,7 +355,7 @@ template <> struct cache_accessor { using carrier_uint = float_info::carrier_uint; using cache_entry_type = uint128_fallback; - static uint128_fallback get_cached_power(int k) noexcept { + static auto get_cached_power(int k) noexcept -> uint128_fallback { FMT_ASSERT(k >= float_info::min_k && k <= float_info::max_k, "k is out of range"); @@ -1044,8 +979,22 @@ template <> struct cache_accessor { {0xfcf62c1dee382c42, 0x46729e03dd9ed7b6}, {0x9e19db92b4e31ba9, 0x6c07a2c26a8346d2}, {0xc5a05277621be293, 0xc7098b7305241886}, - { 0xf70867153aa2db38, - 0xb8cbee4fc66d1ea8 } + {0xf70867153aa2db38, 0xb8cbee4fc66d1ea8}, + {0x9a65406d44a5c903, 0x737f74f1dc043329}, + {0xc0fe908895cf3b44, 0x505f522e53053ff3}, + {0xf13e34aabb430a15, 0x647726b9e7c68ff0}, + {0x96c6e0eab509e64d, 0x5eca783430dc19f6}, + {0xbc789925624c5fe0, 0xb67d16413d132073}, + {0xeb96bf6ebadf77d8, 0xe41c5bd18c57e890}, + {0x933e37a534cbaae7, 0x8e91b962f7b6f15a}, + {0xb80dc58e81fe95a1, 0x723627bbb5a4adb1}, + {0xe61136f2227e3b09, 0xcec3b1aaa30dd91d}, + {0x8fcac257558ee4e6, 0x213a4f0aa5e8a7b2}, + {0xb3bd72ed2af29e1f, 0xa988e2cd4f62d19e}, + {0xe0accfa875af45a7, 0x93eb1b80a33b8606}, + {0x8c6c01c9498d8b88, 0xbc72f130660533c4}, + {0xaf87023b9bf0ee6a, 0xeb8fad7c7f8680b5}, + {0xdb68c2ca82ed2a05, 0xa67398db9f6820e2}, #else {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b}, {0xce5d73ff402d98e3, 0xfb0a3d212dc81290}, @@ -1069,8 +1018,8 @@ template <> struct cache_accessor { {0x8da471a9de737e24, 0x5ceaecfed289e5d3}, {0xe4d5e82392a40515, 0x0fabaf3feaa5334b}, {0xb8da1662e7b00a17, 0x3d6a751f3b936244}, - { 0x95527a5202df0ccb, - 0x0f37801e0c43ebc9 } + {0x95527a5202df0ccb, 0x0f37801e0c43ebc9}, + {0xf13e34aabb430a15, 0x647726b9e7c68ff0} #endif }; @@ -1130,19 +1079,22 @@ template <> struct cache_accessor { bool is_integer; }; - static compute_mul_result compute_mul( - carrier_uint u, const cache_entry_type& cache) noexcept { + static auto compute_mul(carrier_uint u, + const cache_entry_type& cache) noexcept + -> compute_mul_result { auto r = umul192_upper128(u, cache); return {r.high(), r.low() == 0}; } - static uint32_t compute_delta(cache_entry_type const& cache, - int beta) noexcept { + static auto compute_delta(cache_entry_type const& cache, int beta) noexcept + -> uint32_t { return static_cast(cache.high() >> (64 - 1 - beta)); } - static compute_mul_parity_result compute_mul_parity( - carrier_uint two_f, const cache_entry_type& cache, int beta) noexcept { + static auto compute_mul_parity(carrier_uint two_f, + const cache_entry_type& cache, + int beta) noexcept + -> compute_mul_parity_result { FMT_ASSERT(beta >= 1, ""); FMT_ASSERT(beta < 64, ""); @@ -1151,31 +1103,35 @@ template <> struct cache_accessor { ((r.high() << beta) | (r.low() >> (64 - beta))) == 0}; } - static carrier_uint compute_left_endpoint_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_left_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return (cache.high() - (cache.high() >> (num_significand_bits() + 2))) >> (64 - num_significand_bits() - 1 - beta); } - static carrier_uint compute_right_endpoint_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_right_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return (cache.high() + (cache.high() >> (num_significand_bits() + 1))) >> (64 - num_significand_bits() - 1 - beta); } - static carrier_uint compute_round_up_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_round_up_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return ((cache.high() >> (64 - num_significand_bits() - 2 - beta)) + 1) / 2; } }; +FMT_FUNC auto get_cached_power(int k) noexcept -> uint128_fallback { + return cache_accessor::get_cached_power(k); +} + // Various integer checks -template -bool is_left_endpoint_integer_shorter_interval(int exponent) noexcept { +template +auto is_left_endpoint_integer_shorter_interval(int exponent) noexcept -> bool { const int case_shorter_interval_left_endpoint_lower_threshold = 2; const int case_shorter_interval_left_endpoint_upper_threshold = 3; return exponent >= case_shorter_interval_left_endpoint_lower_threshold && @@ -1183,12 +1139,12 @@ bool is_left_endpoint_integer_shorter_interval(int exponent) noexcept { } // Remove trailing zeros from n and return the number of zeros removed (float) -FMT_INLINE int remove_trailing_zeros(uint32_t& n) noexcept { +FMT_INLINE int remove_trailing_zeros(uint32_t& n, int s = 0) noexcept { FMT_ASSERT(n != 0, ""); - const uint32_t mod_inv_5 = 0xcccccccd; - const uint32_t mod_inv_25 = mod_inv_5 * mod_inv_5; + // Modular inverse of 5 (mod 2^32): (mod_inv_5 * 5) mod 2^32 = 1. + constexpr uint32_t mod_inv_5 = 0xcccccccd; + constexpr uint32_t mod_inv_25 = 0xc28f5c29; // = mod_inv_5 * mod_inv_5 - int s = 0; while (true) { auto q = rotr(n * mod_inv_25, 2); if (q > max_value() / 100) break; @@ -1200,7 +1156,6 @@ FMT_INLINE int remove_trailing_zeros(uint32_t& n) noexcept { n = q; s |= 1; } - return s; } @@ -1214,32 +1169,17 @@ FMT_INLINE int remove_trailing_zeros(uint64_t& n) noexcept { // Is n is divisible by 10^8? if ((nm.high() & ((1ull << (90 - 64)) - 1)) == 0 && nm.low() < magic_number) { - // If yes, work with the quotient. + // If yes, work with the quotient... auto n32 = static_cast(nm.high() >> (90 - 64)); - - const uint32_t mod_inv_5 = 0xcccccccd; - const uint32_t mod_inv_25 = mod_inv_5 * mod_inv_5; - - int s = 8; - while (true) { - auto q = rotr(n32 * mod_inv_25, 2); - if (q > max_value() / 100) break; - n32 = q; - s += 2; - } - auto q = rotr(n32 * mod_inv_5, 1); - if (q <= max_value() / 10) { - n32 = q; - s |= 1; - } - + // ... and use the 32 bit variant of the function + int s = remove_trailing_zeros(n32, 8); n = n32; return s; } // If n is not divisible by 10^8, work with n itself. - const uint64_t mod_inv_5 = 0xcccccccccccccccd; - const uint64_t mod_inv_25 = mod_inv_5 * mod_inv_5; + constexpr uint64_t mod_inv_5 = 0xcccccccccccccccd; + constexpr uint64_t mod_inv_25 = 0x8f5c28f5c28f5c29; // mod_inv_5 * mod_inv_5 int s = 0; while (true) { @@ -1258,7 +1198,7 @@ FMT_INLINE int remove_trailing_zeros(uint64_t& n) noexcept { } // The main algorithm for shorter interval case -template +template FMT_INLINE decimal_fp shorter_interval_case(int exponent) noexcept { decimal_fp ret_value; // Compute k and beta @@ -1305,7 +1245,7 @@ FMT_INLINE decimal_fp shorter_interval_case(int exponent) noexcept { return ret_value; } -template decimal_fp to_decimal(T x) noexcept { +template auto to_decimal(T x) noexcept -> decimal_fp { // Step 1: integer promotion & Schubfach multiplier calculation. using carrier_uint = typename float_info::carrier_uint; @@ -1429,17 +1369,6 @@ template decimal_fp to_decimal(T x) noexcept { return ret_value; } } // namespace dragonbox - -#ifdef _MSC_VER -FMT_FUNC auto fmt_snprintf(char* buf, size_t size, const char* fmt, ...) - -> int { - auto args = va_list(); - va_start(args, fmt); - int result = vsnprintf_s(buf, size, _TRUNCATE, fmt, args); - va_end(args); - return result; -} -#endif } // namespace detail template <> struct formatter { @@ -1455,15 +1384,15 @@ template <> struct formatter { for (auto i = n.bigits_.size(); i > 0; --i) { auto value = n.bigits_[i - 1u]; if (first) { - out = format_to(out, FMT_STRING("{:x}"), value); + out = fmt::format_to(out, FMT_STRING("{:x}"), value); first = false; continue; } - out = format_to(out, FMT_STRING("{:08x}"), value); + out = fmt::format_to(out, FMT_STRING("{:08x}"), value); } if (n.exp_ > 0) - out = format_to(out, FMT_STRING("p{}"), - n.exp_ * detail::bigint::bigit_bits); + out = fmt::format_to(out, FMT_STRING("p{}"), + n.exp_ * detail::bigint::bigit_bits); return out; } }; @@ -1485,14 +1414,12 @@ FMT_FUNC detail::utf8_to_utf16::utf8_to_utf16(string_view s) { FMT_FUNC void format_system_error(detail::buffer& out, int error_code, const char* message) noexcept { -#if !__NVCC__ FMT_TRY { auto ec = std::error_code(error_code, std::generic_category()); - write(std::back_inserter(out), std::system_error(ec, message).what()); + detail::write(appender(out), std::system_error(ec, message).what()); return; } FMT_CATCH(...) {} -#endif format_error_code(out, error_code, message); } @@ -1501,7 +1428,7 @@ FMT_FUNC void report_system_error(int error_code, report_error(format_system_error, error_code, message); } -FMT_FUNC std::string vformat(string_view fmt, format_args args) { +FMT_FUNC auto vformat(string_view fmt, format_args args) -> std::string { // Don't optimize the "{}" case to keep the binary size small and because it // can be better optimized in fmt::format anyway. auto buffer = memory_buffer(); @@ -1510,57 +1437,299 @@ FMT_FUNC std::string vformat(string_view fmt, format_args args) { } namespace detail { -#ifdef _WIN32 + +template struct span { + T* data; + size_t size; +}; + +template auto flockfile(F* f) -> decltype(_lock_file(f)) { + _lock_file(f); +} +template auto funlockfile(F* f) -> decltype(_unlock_file(f)) { + _unlock_file(f); +} + +#ifndef getc_unlocked +template auto getc_unlocked(F* f) -> decltype(_fgetc_nolock(f)) { + return _fgetc_nolock(f); +} +#endif + +template +struct has_flockfile : std::false_type {}; + +template +struct has_flockfile()))>> + : std::true_type {}; + +// A FILE wrapper. F is FILE defined as a template parameter to make system API +// detection work. +template class file_base { + public: + F* file_; + + public: + file_base(F* file) : file_(file) {} + operator F*() const { return file_; } + + // Reads a code unit from the stream. + auto get() -> int { + int result = getc_unlocked(file_); + if (result == EOF && ferror(file_) != 0) + FMT_THROW(system_error(errno, FMT_STRING("getc failed"))); + return result; + } + + // Puts the code unit back into the stream buffer. + void unget(char c) { + if (ungetc(c, file_) == EOF) + FMT_THROW(system_error(errno, FMT_STRING("ungetc failed"))); + } + + void flush() { fflush(this->file_); } +}; + +// A FILE wrapper for glibc. +template class glibc_file : public file_base { + private: + enum { + line_buffered = 0x200, // _IO_LINE_BUF + unbuffered = 2 // _IO_UNBUFFERED + }; + + public: + using file_base::file_base; + + auto is_buffered() const -> bool { + return (this->file_->_flags & unbuffered) == 0; + } + + void init_buffer() { + if (this->file_->_IO_write_ptr) return; + // Force buffer initialization by placing and removing a char in a buffer. + putc_unlocked(0, this->file_); + --this->file_->_IO_write_ptr; + } + + // Returns the file's read buffer. + auto get_read_buffer() const -> span { + auto ptr = this->file_->_IO_read_ptr; + return {ptr, to_unsigned(this->file_->_IO_read_end - ptr)}; + } + + // Returns the file's write buffer. + auto get_write_buffer() const -> span { + auto ptr = this->file_->_IO_write_ptr; + return {ptr, to_unsigned(this->file_->_IO_buf_end - ptr)}; + } + + void advance_write_buffer(size_t size) { this->file_->_IO_write_ptr += size; } + + bool needs_flush() const { + if ((this->file_->_flags & line_buffered) == 0) return false; + char* end = this->file_->_IO_write_end; + return memchr(end, '\n', to_unsigned(this->file_->_IO_write_ptr - end)); + } + + void flush() { fflush_unlocked(this->file_); } +}; + +// A FILE wrapper for Apple's libc. +template class apple_file : public file_base { + private: + enum { + line_buffered = 1, // __SNBF + unbuffered = 2 // __SLBF + }; + + public: + using file_base::file_base; + + auto is_buffered() const -> bool { + return (this->file_->_flags & unbuffered) == 0; + } + + void init_buffer() { + if (this->file_->_p) return; + // Force buffer initialization by placing and removing a char in a buffer. + putc_unlocked(0, this->file_); + --this->file_->_p; + ++this->file_->_w; + } + + auto get_read_buffer() const -> span { + return {reinterpret_cast(this->file_->_p), + to_unsigned(this->file_->_r)}; + } + + auto get_write_buffer() const -> span { + return {reinterpret_cast(this->file_->_p), + to_unsigned(this->file_->_bf._base + this->file_->_bf._size - + this->file_->_p)}; + } + + void advance_write_buffer(size_t size) { + this->file_->_p += size; + this->file_->_w -= size; + } + + bool needs_flush() const { + if ((this->file_->_flags & line_buffered) == 0) return false; + return memchr(this->file_->_p + this->file_->_w, '\n', + to_unsigned(-this->file_->_w)); + } +}; + +// A fallback FILE wrapper. +template class fallback_file : public file_base { + private: + char next_; // The next unconsumed character in the buffer. + bool has_next_ = false; + + public: + using file_base::file_base; + + auto is_buffered() const -> bool { return false; } + auto needs_flush() const -> bool { return false; } + void init_buffer() {} + + auto get_read_buffer() const -> span { + return {&next_, has_next_ ? 1u : 0u}; + } + + auto get_write_buffer() const -> span { return {nullptr, 0}; } + + void advance_write_buffer(size_t) {} + + auto get() -> int { + has_next_ = false; + return file_base::get(); + } + + void unget(char c) { + file_base::unget(c); + next_ = c; + has_next_ = true; + } +}; + +#ifndef FMT_USE_FALLBACK_FILE +# define FMT_USE_FALLBACK_FILE 1 +#endif + +template +auto get_file(F* f, int) -> apple_file { + return f; +} +template +inline auto get_file(F* f, int) -> glibc_file { + return f; +} + +inline auto get_file(FILE* f, ...) -> fallback_file { return f; } + +using file_ref = decltype(get_file(static_cast(nullptr), 0)); + +template +class file_print_buffer : public buffer { + public: + explicit file_print_buffer(F*) : buffer(nullptr, size_t()) {} +}; + +template +class file_print_buffer::value>> + : public buffer { + private: + file_ref file_; + + static void grow(buffer& base, size_t) { + auto& self = static_cast(base); + self.file_.advance_write_buffer(self.size()); + if (self.file_.get_write_buffer().size == 0) self.file_.flush(); + auto buf = self.file_.get_write_buffer(); + FMT_ASSERT(buf.size > 0, ""); + self.set(buf.data, buf.size); + self.clear(); + } + + public: + explicit file_print_buffer(F* f) : buffer(grow, size_t()), file_(f) { + flockfile(f); + file_.init_buffer(); + auto buf = file_.get_write_buffer(); + set(buf.data, buf.size); + } + ~file_print_buffer() { + file_.advance_write_buffer(size()); + bool flush = file_.needs_flush(); + F* f = file_; // Make funlockfile depend on the template parameter F + funlockfile(f); // for the system API detection to work. + if (flush) fflush(file_); + } +}; + +#if !defined(_WIN32) || defined(FMT_USE_WRITE_CONSOLE) +FMT_FUNC auto write_console(int, string_view) -> bool { return false; } +#else using dword = conditional_t; extern "C" __declspec(dllimport) int __stdcall WriteConsoleW( // void*, const void*, dword, dword*, void*); -FMT_FUNC bool write_console(std::FILE* f, string_view text) { - auto fd = _fileno(f); - if (_isatty(fd)) { - detail::utf8_to_utf16 u16(string_view(text.data(), text.size())); - auto written = detail::dword(); - if (detail::WriteConsoleW(reinterpret_cast(_get_osfhandle(fd)), - u16.c_str(), static_cast(u16.size()), - &written, nullptr)) { - return true; - } - } - // We return false if the file descriptor was not TTY, or it was but - // SetConsoleW failed which can happen if the output has been redirected to - // NUL. In both cases when we return false, we should attempt to do regular - // write via fwrite or std::ostream::write. - return false; +FMT_FUNC bool write_console(int fd, string_view text) { + auto u16 = utf8_to_utf16(text); + return WriteConsoleW(reinterpret_cast(_get_osfhandle(fd)), u16.c_str(), + static_cast(u16.size()), nullptr, nullptr) != 0; } #endif -FMT_FUNC void print(std::FILE* f, string_view text) { #ifdef _WIN32 - if (write_console(f, text)) return; +// Print assuming legacy (non-Unicode) encoding. +FMT_FUNC void vprint_mojibake(std::FILE* f, string_view fmt, format_args args, + bool newline) { + auto buffer = memory_buffer(); + detail::vformat_to(buffer, fmt, args); + if (newline) buffer.push_back('\n'); + fwrite_fully(buffer.data(), buffer.size(), f); +} #endif - detail::fwrite_fully(text.data(), 1, text.size(), f); + +FMT_FUNC void print(std::FILE* f, string_view text) { +#if defined(_WIN32) && !defined(FMT_USE_WRITE_CONSOLE) + int fd = _fileno(f); + if (_isatty(fd)) { + std::fflush(f); + if (write_console(fd, text)) return; + } +#endif + fwrite_fully(text.data(), text.size(), f); } } // namespace detail -FMT_FUNC void vprint(std::FILE* f, string_view format_str, format_args args) { - memory_buffer buffer; - detail::vformat_to(buffer, format_str, args); +FMT_FUNC void vprint_buffered(std::FILE* f, string_view fmt, format_args args) { + auto buffer = memory_buffer(); + detail::vformat_to(buffer, fmt, args); detail::print(f, {buffer.data(), buffer.size()}); } -#ifdef _WIN32 -// Print assuming legacy (non-Unicode) encoding. -FMT_FUNC void detail::vprint_mojibake(std::FILE* f, string_view format_str, - format_args args) { - memory_buffer buffer; - detail::vformat_to(buffer, format_str, - basic_format_args>(args)); - fwrite_fully(buffer.data(), 1, buffer.size(), f); +FMT_FUNC void vprint(std::FILE* f, string_view fmt, format_args args) { + if (!detail::file_ref(f).is_buffered() || !detail::has_flockfile<>()) + return vprint_buffered(f, fmt, args); + auto&& buffer = detail::file_print_buffer<>(f); + return detail::vformat_to(buffer, fmt, args); +} + +FMT_FUNC void vprintln(std::FILE* f, string_view fmt, format_args args) { + auto buffer = memory_buffer(); + detail::vformat_to(buffer, fmt, args); + buffer.push_back('\n'); + detail::print(f, {buffer.data(), buffer.size()}); } -#endif -FMT_FUNC void vprint(string_view format_str, format_args args) { - vprint(stdout, format_str, args); +FMT_FUNC void vprint(string_view fmt, format_args args) { + vprint(stdout, fmt, args); } namespace detail { diff --git a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/format.h b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/format.h index 8dda88727d5c..67f0ab739b0d 100644 --- a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/format.h +++ b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/format.h @@ -33,31 +33,65 @@ #ifndef FMT_FORMAT_H_ #define FMT_FORMAT_H_ -#include // std::signbit -#include // uint32_t -#include // std::memcpy -#include // std::initializer_list -#include // std::numeric_limits -#include // std::uninitialized_copy -#include // std::runtime_error -#include // std::system_error - -#ifdef __cpp_lib_bit_cast -# include // std::bitcast +#ifndef _LIBCPP_REMOVE_TRANSITIVE_INCLUDES +# define _LIBCPP_REMOVE_TRANSITIVE_INCLUDES +# define FMT_REMOVE_TRANSITIVE_INCLUDES #endif -#include "core.h" +#include "base.h" + +#ifndef FMT_MODULE +# include // std::signbit +# include // uint32_t +# include // std::memcpy +# include // std::initializer_list +# include // std::numeric_limits +# if defined(__GLIBCXX__) && !defined(_GLIBCXX_USE_DUAL_ABI) +// Workaround for pre gcc 5 libstdc++. +# include // std::allocator_traits +# endif +# include // std::runtime_error +# include // std::string +# include // std::system_error + +// Checking FMT_CPLUSPLUS for warning suppression in MSVC. +# if FMT_HAS_INCLUDE() && FMT_CPLUSPLUS > 201703L +# include // std::bit_cast +# endif + +// libc++ supports string_view in pre-c++17. +# if FMT_HAS_INCLUDE() && \ + (FMT_CPLUSPLUS >= 201703L || defined(_LIBCPP_VERSION)) +# include +# define FMT_USE_STRING_VIEW +# endif +#endif // FMT_MODULE -#if FMT_GCC_VERSION -# define FMT_GCC_VISIBILITY_HIDDEN __attribute__((visibility("hidden"))) +#if defined __cpp_inline_variables && __cpp_inline_variables >= 201606L +# define FMT_INLINE_VARIABLE inline #else -# define FMT_GCC_VISIBILITY_HIDDEN +# define FMT_INLINE_VARIABLE +#endif + +#ifndef FMT_NO_UNIQUE_ADDRESS +# if FMT_CPLUSPLUS >= 202002L +# if FMT_HAS_CPP_ATTRIBUTE(no_unique_address) +# define FMT_NO_UNIQUE_ADDRESS [[no_unique_address]] +// VS2019 v16.10 and later except clang-cl (https://reviews.llvm.org/D110485). +# elif (FMT_MSC_VERSION >= 1929) && !FMT_CLANG_VERSION +# define FMT_NO_UNIQUE_ADDRESS [[msvc::no_unique_address]] +# endif +# endif +#endif +#ifndef FMT_NO_UNIQUE_ADDRESS +# define FMT_NO_UNIQUE_ADDRESS #endif -#ifdef __NVCC__ -# define FMT_CUDA_VERSION (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__) +// Visibility when compiled as a shared library/object. +#if defined(FMT_LIB_EXPORT) || defined(FMT_SHARED) +# define FMT_SO_VISIBILITY(value) FMT_VISIBILITY(value) #else -# define FMT_CUDA_VERSION 0 +# define FMT_SO_VISIBILITY(value) #endif #ifdef __has_builtin @@ -72,11 +106,12 @@ # define FMT_NOINLINE #endif -#if FMT_MSC_VERSION -# define FMT_MSC_DEFAULT = default -#else -# define FMT_MSC_DEFAULT -#endif +namespace std { +template <> struct iterator_traits { + using iterator_category = output_iterator_tag; + using value_type = char; +}; +} // namespace std #ifndef FMT_THROW # if FMT_EXCEPTIONS @@ -96,21 +131,11 @@ FMT_END_NAMESPACE # define FMT_THROW(x) throw x # endif # else -# define FMT_THROW(x) \ - do { \ - FMT_ASSERT(false, (x).what()); \ - } while (false) +# define FMT_THROW(x) \ + ::fmt::detail::assert_fail(__FILE__, __LINE__, (x).what()) # endif #endif -#if FMT_EXCEPTIONS -# define FMT_TRY try -# define FMT_CATCH(x) catch (x) -#else -# define FMT_TRY if (true) -# define FMT_CATCH(x) if (false) -#endif - #ifndef FMT_MAYBE_UNUSED # if FMT_HAS_CPP17_ATTRIBUTE(maybe_unused) # define FMT_MAYBE_UNUSED [[maybe_unused]] @@ -121,7 +146,10 @@ FMT_END_NAMESPACE #ifndef FMT_USE_USER_DEFINED_LITERALS // EDG based compilers (Intel, NVIDIA, Elbrus, etc), GCC and MSVC support UDLs. -# if (FMT_HAS_FEATURE(cxx_user_literals) || FMT_GCC_VERSION >= 407 || \ +// +// GCC before 4.9 requires a space in `operator"" _a` which is invalid in later +// compiler versions. +# if (FMT_HAS_FEATURE(cxx_user_literals) || FMT_GCC_VERSION >= 409 || \ FMT_MSC_VERSION >= 1900) && \ (!defined(__EDG_VERSION__) || __EDG_VERSION__ >= /* UDL feature */ 480) # define FMT_USE_USER_DEFINED_LITERALS 1 @@ -201,7 +229,8 @@ inline auto clzll(uint64_t x) -> int { _BitScanReverse64(&r, x); # else // Scan the high 32 bits. - if (_BitScanReverse(&r, static_cast(x >> 32))) return 63 ^ (r + 32); + if (_BitScanReverse(&r, static_cast(x >> 32))) + return 63 ^ static_cast(r + 32); // Scan the low 32 bits. _BitScanReverse(&r, static_cast(x)); # endif @@ -241,6 +270,11 @@ FMT_END_NAMESPACE #endif FMT_BEGIN_NAMESPACE + +template +struct is_contiguous> + : std::true_type {}; + namespace detail { FMT_CONSTEXPR inline void abort_fuzzing_if(bool condition) { @@ -250,49 +284,12 @@ FMT_CONSTEXPR inline void abort_fuzzing_if(bool condition) { #endif } -template struct string_literal { - static constexpr CharT value[sizeof...(C)] = {C...}; - constexpr operator basic_string_view() const { - return {value, sizeof...(C)}; - } -}; - -#if FMT_CPLUSPLUS < 201703L -template -constexpr CharT string_literal::value[sizeof...(C)]; +#if defined(FMT_USE_STRING_VIEW) +template using std_string_view = std::basic_string_view; +#else +template struct std_string_view {}; #endif -template class formatbuf : public Streambuf { - private: - using char_type = typename Streambuf::char_type; - using streamsize = decltype(std::declval().sputn(nullptr, 0)); - using int_type = typename Streambuf::int_type; - using traits_type = typename Streambuf::traits_type; - - buffer& buffer_; - - public: - explicit formatbuf(buffer& buf) : buffer_(buf) {} - - protected: - // The put area is always empty. This makes the implementation simpler and has - // the advantage that the streambuf and the buffer are always in sync and - // sputc never writes into uninitialized memory. A disadvantage is that each - // call to sputc always results in a (virtual) call to overflow. There is no - // disadvantage here for sputn since this always results in a call to xsputn. - - auto overflow(int_type ch) -> int_type override { - if (!traits_type::eq_int_type(ch, traits_type::eof())) - buffer_.push_back(static_cast(ch)); - return ch; - } - - auto xsputn(const char_type* s, streamsize count) -> streamsize override { - buffer_.append(s, s + count); - return count; - } -}; - // Implementation of std::bit_cast for pre-C++20. template FMT_CONSTEXPR20 auto bit_cast(const From& from) -> To { @@ -324,14 +321,12 @@ class uint128_fallback { private: uint64_t lo_, hi_; - friend uint128_fallback umul128(uint64_t x, uint64_t y) noexcept; - public: constexpr uint128_fallback(uint64_t hi, uint64_t lo) : lo_(lo), hi_(hi) {} constexpr uint128_fallback(uint64_t value = 0) : lo_(value), hi_(0) {} - constexpr uint64_t high() const noexcept { return hi_; } - constexpr uint64_t low() const noexcept { return lo_; } + constexpr auto high() const noexcept -> uint64_t { return hi_; } + constexpr auto low() const noexcept -> uint64_t { return lo_; } template ::value)> constexpr explicit operator T() const { @@ -360,6 +355,10 @@ class uint128_fallback { -> uint128_fallback { return {lhs.hi_ & rhs.hi_, lhs.lo_ & rhs.lo_}; } + friend constexpr auto operator~(const uint128_fallback& n) + -> uint128_fallback { + return {~n.hi_, ~n.lo_}; + } friend auto operator+(const uint128_fallback& lhs, const uint128_fallback& rhs) -> uint128_fallback { auto result = uint128_fallback(lhs); @@ -398,8 +397,12 @@ class uint128_fallback { lo_ = new_lo; hi_ = new_hi; } + FMT_CONSTEXPR void operator&=(uint128_fallback n) { + lo_ &= n.lo_; + hi_ &= n.hi_; + } - FMT_CONSTEXPR20 uint128_fallback& operator+=(uint64_t n) noexcept { + FMT_CONSTEXPR20 auto operator+=(uint64_t n) noexcept -> uint128_fallback& { if (is_constant_evaluated()) { lo_ += n; hi_ += (lo_ < n ? 1 : 0); @@ -443,7 +446,8 @@ template constexpr auto num_bits() -> int { } // std::numeric_limits::digits may return 0 for 128-bit ints. template <> constexpr auto num_bits() -> int { return 128; } -template <> constexpr auto num_bits() -> int { return 128; } +template <> constexpr auto num_bits() -> int { return 128; } +template <> constexpr auto num_bits() -> int { return 128; } // A heterogeneous bit_cast used for converting 96-bit long double to uint128_t // and 128-bit pointers to uint128_fallback. @@ -464,10 +468,34 @@ inline auto bit_cast(const From& from) -> To { return result; } +template +FMT_CONSTEXPR20 inline auto countl_zero_fallback(UInt n) -> int { + int lz = 0; + constexpr UInt msb_mask = static_cast(1) << (num_bits() - 1); + for (; (n & msb_mask) == 0; n <<= 1) lz++; + return lz; +} + +FMT_CONSTEXPR20 inline auto countl_zero(uint32_t n) -> int { +#ifdef FMT_BUILTIN_CLZ + if (!is_constant_evaluated()) return FMT_BUILTIN_CLZ(n); +#endif + return countl_zero_fallback(n); +} + +FMT_CONSTEXPR20 inline auto countl_zero(uint64_t n) -> int { +#ifdef FMT_BUILTIN_CLZLL + if (!is_constant_evaluated()) return FMT_BUILTIN_CLZLL(n); +#endif + return countl_zero_fallback(n); +} + FMT_INLINE void assume(bool condition) { (void)condition; #if FMT_HAS_BUILTIN(__builtin_assume) && !FMT_ICC_VERSION __builtin_assume(condition); +#elif FMT_GCC_VERSION + if (!condition) __builtin_unreachable(); #endif } @@ -486,37 +514,24 @@ inline auto get_data(Container& c) -> typename Container::value_type* { return c.data(); } -#if defined(_SECURE_SCL) && _SECURE_SCL -// Make a checked iterator to avoid MSVC warnings. -template using checked_ptr = stdext::checked_array_iterator; -template -constexpr auto make_checked(T* p, size_t size) -> checked_ptr { - return {p, size}; -} -#else -template using checked_ptr = T*; -template constexpr auto make_checked(T* p, size_t) -> T* { - return p; -} -#endif - // Attempts to reserve space for n extra characters in the output range. // Returns a pointer to the reserved range or a reference to it. -template ::value)> +template ::value&& + is_contiguous::value)> #if FMT_CLANG_VERSION >= 307 && !FMT_ICC_VERSION __attribute__((no_sanitize("undefined"))) #endif inline auto -reserve(std::back_insert_iterator it, size_t n) - -> checked_ptr { - Container& c = get_container(it); +reserve(OutputIt it, size_t n) -> typename OutputIt::value_type* { + auto& c = get_container(it); size_t size = c.size(); c.resize(size + n); - return make_checked(get_data(c) + size, n); + return get_data(c) + size; } template -inline auto reserve(buffer_appender it, size_t n) -> buffer_appender { +inline auto reserve(basic_appender it, size_t n) -> basic_appender { buffer& buf = get_container(it); buf.try_reserve(buf.size() + n); return it; @@ -535,18 +550,21 @@ template constexpr auto to_pointer(OutputIt, size_t) -> T* { return nullptr; } -template auto to_pointer(buffer_appender it, size_t n) -> T* { +template auto to_pointer(basic_appender it, size_t n) -> T* { buffer& buf = get_container(it); auto size = buf.size(); + buf.try_reserve(size + n); if (buf.capacity() < size + n) return nullptr; buf.try_resize(size + n); return buf.data() + size; } -template ::value)> -inline auto base_iterator(std::back_insert_iterator& it, - checked_ptr) - -> std::back_insert_iterator { +template ::value&& + is_contiguous::value)> +inline auto base_iterator(OutputIt it, + typename OutputIt::container_type::value_type*) + -> OutputIt { return it; } @@ -572,16 +590,10 @@ FMT_CONSTEXPR20 auto fill_n(T* out, Size count, char value) -> T* { return out + count; } -#ifdef __cpp_char8_t -using char8_type = char8_t; -#else -enum char8_type : unsigned char {}; -#endif - template -FMT_CONSTEXPR FMT_NOINLINE auto copy_str_noinline(InputIt begin, InputIt end, - OutputIt out) -> OutputIt { - return copy_str(begin, end, out); +FMT_CONSTEXPR FMT_NOINLINE auto copy_noinline(InputIt begin, InputIt end, + OutputIt out) -> OutputIt { + return copy(begin, end, out); } // A public domain branchless UTF-8 decoder by Christopher Wellons: @@ -608,7 +620,8 @@ FMT_CONSTEXPR inline auto utf8_decode(const char* s, uint32_t* c, int* e) constexpr const int shiftc[] = {0, 18, 12, 6, 0}; constexpr const int shifte[] = {0, 6, 4, 2, 0}; - int len = code_point_length_impl(*s); + int len = "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0\0\0\2\2\2\2\3\3\4" + [static_cast(*s) >> 3]; // Compute the pointer to the next character early so that the next // iteration can start working on the next character. Neither Clang // nor GCC figure out this reordering on their own. @@ -637,7 +650,7 @@ FMT_CONSTEXPR inline auto utf8_decode(const char* s, uint32_t* c, int* e) return next; } -constexpr uint32_t invalid_code_point = ~uint32_t(); +constexpr FMT_INLINE_VARIABLE uint32_t invalid_code_point = ~uint32_t(); // Invokes f(cp, sv) for every code point cp in s with sv being the string view // corresponding to the code point. cp is invalid_code_point on error. @@ -661,7 +674,7 @@ FMT_CONSTEXPR void for_each_codepoint(string_view s, F f) { } if (auto num_chars_left = s.data() + s.size() - p) { char buf[2 * block_size - 1] = {}; - copy_str(p, p + num_chars_left, buf); + copy(p, p + num_chars_left, buf); const char* buf_ptr = buf; do { auto end = decode(buf_ptr, p); @@ -678,7 +691,7 @@ inline auto compute_width(basic_string_view s) -> size_t { } // Computes approximate display width of a UTF-8 string. -FMT_CONSTEXPR inline size_t compute_width(string_view s) { +FMT_CONSTEXPR inline auto compute_width(string_view s) -> size_t { size_t num_code_points = 0; // It is not a lambda for compatibility with C++14. struct count_code_points { @@ -712,11 +725,6 @@ FMT_CONSTEXPR inline size_t compute_width(string_view s) { return num_code_points; } -inline auto compute_width(basic_string_view s) -> size_t { - return compute_width( - string_view(reinterpret_cast(s.data()), s.size())); -} - template inline auto code_point_index(basic_string_view s, size_t n) -> size_t { size_t size = s.size(); @@ -725,18 +733,17 @@ inline auto code_point_index(basic_string_view s, size_t n) -> size_t { // Calculates the index of the nth code point in a UTF-8 string. inline auto code_point_index(string_view s, size_t n) -> size_t { - const char* data = s.data(); - size_t num_code_points = 0; - for (size_t i = 0, size = s.size(); i != size; ++i) { - if ((data[i] & 0xc0) != 0x80 && ++num_code_points > n) return i; - } - return s.size(); -} - -inline auto code_point_index(basic_string_view s, size_t n) - -> size_t { - return code_point_index( - string_view(reinterpret_cast(s.data()), s.size()), n); + size_t result = s.size(); + const char* begin = s.begin(); + for_each_codepoint(s, [begin, &n, &result](uint32_t, string_view sv) { + if (n != 0) { + --n; + return true; + } + result = to_unsigned(sv.begin() - begin); + return false; + }); + return result; } template struct is_integral : std::is_integral {}; @@ -754,18 +761,32 @@ using is_integer = !std::is_same::value && !std::is_same::value>; -#ifndef FMT_USE_FLOAT128 -# ifdef __SIZEOF_FLOAT128__ -# define FMT_USE_FLOAT128 1 -# else -# define FMT_USE_FLOAT128 0 -# endif +#ifndef FMT_USE_FLOAT +# define FMT_USE_FLOAT 1 +#endif +#ifndef FMT_USE_DOUBLE +# define FMT_USE_DOUBLE 1 +#endif +#ifndef FMT_USE_LONG_DOUBLE +# define FMT_USE_LONG_DOUBLE 1 +#endif + +#if defined(FMT_USE_FLOAT128) +// Use the provided definition. +#elif FMT_CLANG_VERSION && FMT_HAS_INCLUDE() +# define FMT_USE_FLOAT128 1 +#elif FMT_GCC_VERSION && defined(_GLIBCXX_USE_FLOAT128) && \ + !defined(__STRICT_ANSI__) +# define FMT_USE_FLOAT128 1 +#else +# define FMT_USE_FLOAT128 0 #endif #if FMT_USE_FLOAT128 using float128 = __float128; #else using float128 = void; #endif + template using is_float128 = std::is_same; template @@ -784,61 +805,39 @@ using is_double_double = bool_constant::digits == 106>; # define FMT_USE_FULL_CACHE_DRAGONBOX 0 #endif -template -template -void buffer::append(const U* begin, const U* end) { - while (begin != end) { - auto count = to_unsigned(end - begin); - try_reserve(size_ + count); - auto free_cap = capacity_ - size_; - if (free_cap < count) count = free_cap; - std::uninitialized_copy_n(begin, count, make_checked(ptr_ + size_, count)); - size_ += count; - begin += count; - } -} - template struct is_locale : std::false_type {}; template struct is_locale> : std::true_type {}; } // namespace detail -FMT_MODULE_EXPORT_BEGIN +FMT_BEGIN_EXPORT // The number of characters to store in the basic_memory_buffer object itself // to avoid dynamic memory allocation. enum { inline_buffer_size = 500 }; /** - \rst - A dynamically growing memory buffer for trivially copyable/constructible types - with the first ``SIZE`` elements stored in the object itself. - - You can use the ``memory_buffer`` type alias for ``char`` instead. - - **Example**:: - - auto out = fmt::memory_buffer(); - format_to(std::back_inserter(out), "The answer is {}.", 42); - - This will append the following output to the ``out`` object: - - .. code-block:: none - - The answer is 42. - - The output can be converted to an ``std::string`` with ``to_string(out)``. - \endrst + * A dynamically growing memory buffer for trivially copyable/constructible + * types with the first `SIZE` elements stored in the object itself. Most + * commonly used via the `memory_buffer` alias for `char`. + * + * **Example**: + * + * auto out = fmt::memory_buffer(); + * fmt::format_to(std::back_inserter(out), "The answer is {}.", 42); + * + * This will append "The answer is 42." to `out`. The buffer content can be + * converted to `std::string` with `to_string(out)`. */ template > -class basic_memory_buffer final : public detail::buffer { +class basic_memory_buffer : public detail::buffer { private: T store_[SIZE]; - // Don't inherit from Allocator avoid generating type_info for it. - Allocator alloc_; + // Don't inherit from Allocator to avoid generating type_info for it. + FMT_NO_UNIQUE_ADDRESS Allocator alloc_; // Deallocate memory allocated by the buffer. FMT_CONSTEXPR20 void deallocate() { @@ -846,8 +845,29 @@ class basic_memory_buffer final : public detail::buffer { if (data != store_) alloc_.deallocate(data, this->capacity()); } - protected: - FMT_CONSTEXPR20 void grow(size_t size) override; + static FMT_CONSTEXPR20 void grow(detail::buffer& buf, size_t size) { + detail::abort_fuzzing_if(size > 5000); + auto& self = static_cast(buf); + const size_t max_size = + std::allocator_traits::max_size(self.alloc_); + size_t old_capacity = buf.capacity(); + size_t new_capacity = old_capacity + old_capacity / 2; + if (size > new_capacity) + new_capacity = size; + else if (new_capacity > max_size) + new_capacity = size > max_size ? size : max_size; + T* old_data = buf.data(); + T* new_data = self.alloc_.allocate(new_capacity); + // Suppress a bogus -Wstringop-overflow in gcc 13.1 (#3481). + detail::assume(buf.size() <= new_capacity); + // The following code doesn't throw, so the raw pointer above doesn't leak. + memcpy(new_data, old_data, buf.size() * sizeof(T)); + self.set(new_data, new_capacity); + // deallocate must not throw according to the standard, but even if it does, + // the buffer already uses the new storage and will deallocate it in + // destructor. + if (old_data != self.store_) self.alloc_.deallocate(old_data, old_capacity); + } public: using value_type = T; @@ -855,7 +875,7 @@ class basic_memory_buffer final : public detail::buffer { FMT_CONSTEXPR20 explicit basic_memory_buffer( const Allocator& alloc = Allocator()) - : alloc_(alloc) { + : detail::buffer(grow), alloc_(alloc) { this->set(store_, SIZE); if (detail::is_constant_evaluated()) detail::fill_n(store_, SIZE, T()); } @@ -869,8 +889,7 @@ class basic_memory_buffer final : public detail::buffer { size_t size = other.size(), capacity = other.capacity(); if (data == other.store_) { this->set(store_, capacity); - detail::copy_str(other.store_, other.store_ + size, - detail::make_checked(store_, capacity)); + detail::copy(other.store_, other.store_ + size, store_); } else { this->set(data, capacity); // Set pointer to the inline array so that delete is not called @@ -882,21 +901,14 @@ class basic_memory_buffer final : public detail::buffer { } public: - /** - \rst - Constructs a :class:`fmt::basic_memory_buffer` object moving the content - of the other object to it. - \endrst - */ - FMT_CONSTEXPR20 basic_memory_buffer(basic_memory_buffer&& other) noexcept { + /// Constructs a `basic_memory_buffer` object moving the content of the other + /// object to it. + FMT_CONSTEXPR20 basic_memory_buffer(basic_memory_buffer&& other) noexcept + : detail::buffer(grow) { move(other); } - /** - \rst - Moves the content of the other ``basic_memory_buffer`` object to this one. - \endrst - */ + /// Moves the content of the other `basic_memory_buffer` object to this one. auto operator=(basic_memory_buffer&& other) noexcept -> basic_memory_buffer& { FMT_ASSERT(this != &other, ""); deallocate(); @@ -907,16 +919,13 @@ class basic_memory_buffer final : public detail::buffer { // Returns a copy of the allocator associated with this buffer. auto get_allocator() const -> Allocator { return alloc_; } - /** - Resizes the buffer to contain *count* elements. If T is a POD type new - elements may not be initialized. - */ + /// Resizes the buffer to contain `count` elements. If T is a POD type new + /// elements may not be initialized. FMT_CONSTEXPR20 void resize(size_t count) { this->try_resize(count); } - /** Increases the buffer capacity to *new_capacity*. */ + /// Increases the buffer capacity to `new_capacity`. void reserve(size_t new_capacity) { this->try_reserve(new_capacity); } - // Directly append data into the buffer using detail::buffer::append; template void append(const ContiguousRange& range) { @@ -924,61 +933,37 @@ class basic_memory_buffer final : public detail::buffer { } }; -template -FMT_CONSTEXPR20 void basic_memory_buffer::grow( - size_t size) { - detail::abort_fuzzing_if(size > 5000); - const size_t max_size = std::allocator_traits::max_size(alloc_); - size_t old_capacity = this->capacity(); - size_t new_capacity = old_capacity + old_capacity / 2; - if (size > new_capacity) - new_capacity = size; - else if (new_capacity > max_size) - new_capacity = size > max_size ? size : max_size; - T* old_data = this->data(); - T* new_data = - std::allocator_traits::allocate(alloc_, new_capacity); - // The following code doesn't throw, so the raw pointer above doesn't leak. - std::uninitialized_copy(old_data, old_data + this->size(), - detail::make_checked(new_data, new_capacity)); - this->set(new_data, new_capacity); - // deallocate must not throw according to the standard, but even if it does, - // the buffer already uses the new storage and will deallocate it in - // destructor. - if (old_data != store_) alloc_.deallocate(old_data, old_capacity); -} - using memory_buffer = basic_memory_buffer; template struct is_contiguous> : std::true_type { }; +FMT_END_EXPORT namespace detail { -#ifdef _WIN32 -FMT_API bool write_console(std::FILE* f, string_view text); -#endif +FMT_API auto write_console(int fd, string_view text) -> bool; FMT_API void print(std::FILE*, string_view); } // namespace detail -/** An error reported from a formatting function. */ -FMT_CLASS_API -class FMT_API format_error : public std::runtime_error { +FMT_BEGIN_EXPORT + +// Suppress a misleading warning in older versions of clang. +#if FMT_CLANG_VERSION +# pragma clang diagnostic ignored "-Wweak-vtables" +#endif + +/// An error reported from a formatting function. +class FMT_SO_VISIBILITY("default") format_error : public std::runtime_error { public: using std::runtime_error::runtime_error; - format_error(const format_error&) = default; - format_error& operator=(const format_error&) = default; - format_error(format_error&&) = default; - format_error& operator=(format_error&&) = default; - ~format_error() noexcept override FMT_MSC_DEFAULT; }; namespace detail_exported { #if FMT_USE_NONTYPE_TEMPLATE_ARGS template struct fixed_string { constexpr fixed_string(const Char (&str)[N]) { - detail::copy_str(static_cast(str), - str + N, data); + detail::copy(static_cast(str), + str + N, data); } Char data[N] = {}; }; @@ -993,12 +978,57 @@ constexpr auto compile_string_to_view(const Char (&s)[N]) return {s, N - (std::char_traits::to_int_type(s[N - 1]) == 0 ? 1 : 0)}; } template -constexpr auto compile_string_to_view(detail::std_string_view s) +constexpr auto compile_string_to_view(basic_string_view s) -> basic_string_view { - return {s.data(), s.size()}; + return s; } } // namespace detail_exported +// A generic formatting context with custom output iterator and character +// (code unit) support. Char is the format string code unit type which can be +// different from OutputIt::value_type. +template class generic_context { + private: + OutputIt out_; + basic_format_args args_; + detail::locale_ref loc_; + + public: + using char_type = Char; + using iterator = OutputIt; + using parse_context_type = basic_format_parse_context; + template using formatter_type = formatter; + + constexpr generic_context(OutputIt out, + basic_format_args ctx_args, + detail::locale_ref loc = {}) + : out_(out), args_(ctx_args), loc_(loc) {} + generic_context(generic_context&&) = default; + generic_context(const generic_context&) = delete; + void operator=(const generic_context&) = delete; + + constexpr auto arg(int id) const -> basic_format_arg { + return args_.get(id); + } + auto arg(basic_string_view name) -> basic_format_arg { + return args_.get(name); + } + FMT_CONSTEXPR auto arg_id(basic_string_view name) -> int { + return args_.get_id(name); + } + auto args() const -> const basic_format_args& { + return args_; + } + + FMT_CONSTEXPR auto out() -> iterator { return out_; } + + void advance_to(iterator it) { + if (!detail::is_back_insert_iterator()) out_ = it; + } + + FMT_CONSTEXPR auto locale() -> detail::locale_ref { return loc_; } +}; + class loc_value { private: basic_format_arg value_; @@ -1011,7 +1041,7 @@ class loc_value { loc_value(T) {} template auto visit(Visitor&& vis) -> decltype(vis(0)) { - return visit_format_arg(vis, value_); + return value_.visit(vis); } }; @@ -1044,7 +1074,9 @@ template class format_facet : public Locale::facet { } }; -FMT_BEGIN_DETAIL_NAMESPACE +FMT_END_EXPORT + +namespace detail { // Returns true if value is negative, false otherwise. // Same as `value < 0` but doesn't produce warnings if T is an unsigned type. @@ -1075,13 +1107,13 @@ using uint32_or_64_or_128_t = template using uint64_or_128_t = conditional_t() <= 64, uint64_t, uint128_t>; -#define FMT_POWERS_OF_10(factor) \ - factor * 10, (factor)*100, (factor)*1000, (factor)*10000, (factor)*100000, \ - (factor)*1000000, (factor)*10000000, (factor)*100000000, \ - (factor)*1000000000 +#define FMT_POWERS_OF_10(factor) \ + factor * 10, (factor) * 100, (factor) * 1000, (factor) * 10000, \ + (factor) * 100000, (factor) * 1000000, (factor) * 10000000, \ + (factor) * 100000000, (factor) * 1000000000 // Converts value in the range [0, 100) to a string. -constexpr const char* digits2(size_t value) { +constexpr auto digits2(size_t value) -> const char* { // GCC generates slightly better code when value is pointer-size. return &"0001020304050607080910111213141516171819" "2021222324252627282930313233343536373839" @@ -1091,11 +1123,11 @@ constexpr const char* digits2(size_t value) { } // Sign is a template parameter to workaround a bug in gcc 4.8. -template constexpr Char sign(Sign s) { +template constexpr auto sign(Sign s) -> Char { #if !FMT_GCC_VERSION || FMT_GCC_VERSION >= 604 static_assert(std::is_same::value, ""); #endif - return static_cast("\0-+ "[s]); + return static_cast(((' ' << 24) | ('+' << 16) | ('-' << 8)) >> (s * 8)); } template FMT_CONSTEXPR auto count_digits_fallback(T n) -> int { @@ -1143,9 +1175,7 @@ inline auto do_count_digits(uint64_t n) -> int { // except for n == 0 in which case count_digits returns 1. FMT_CONSTEXPR20 inline auto count_digits(uint64_t n) -> int { #ifdef FMT_BUILTIN_CLZLL - if (!is_constant_evaluated()) { - return do_count_digits(n); - } + if (!is_constant_evaluated()) return do_count_digits(n); #endif return count_digits_fallback(n); } @@ -1173,7 +1203,7 @@ FMT_CONSTEXPR auto count_digits(UInt n) -> int { FMT_INLINE auto do_count_digits(uint32_t n) -> int { // An optimization by Kendall Willets from https://bit.ly/3uOIQrB. // This increments the upper 32 bits (log10(T) - 1) when >= T is added. -# define FMT_INC(T) (((sizeof(# T) - 1ull) << 32) - T) +# define FMT_INC(T) (((sizeof(#T) - 1ull) << 32) - T) static constexpr uint64_t table[] = { FMT_INC(0), FMT_INC(0), FMT_INC(0), // 8 FMT_INC(10), FMT_INC(10), FMT_INC(10), // 64 @@ -1291,7 +1321,7 @@ FMT_CONSTEXPR inline auto format_decimal(Iterator out, UInt value, int size) // Buffer is large enough to hold all digits (digits10 + 1). Char buffer[digits10() + 1] = {}; auto end = format_decimal(buffer, value, size).end; - return {out, detail::copy_str_noinline(buffer, end, out)}; + return {out, detail::copy_noinline(buffer, end, out)}; } template @@ -1309,16 +1339,16 @@ FMT_CONSTEXPR auto format_uint(Char* buffer, UInt value, int num_digits, } template -inline auto format_uint(It out, UInt value, int num_digits, bool upper = false) - -> It { +FMT_CONSTEXPR inline auto format_uint(It out, UInt value, int num_digits, + bool upper = false) -> It { if (auto ptr = to_pointer(out, to_unsigned(num_digits))) { format_uint(ptr, value, num_digits, upper); return out; } // Buffer should be large enough to hold all digits (digits / BASE_BITS + 1). - char buffer[num_bits() / BASE_BITS + 1]; + char buffer[num_bits() / BASE_BITS + 1] = {}; format_uint(buffer, value, num_digits, upper); - return detail::copy_str_noinline(buffer, buffer + num_digits, out); + return detail::copy_noinline(buffer, buffer + num_digits, out); } // A converter from UTF-8 to UTF-16. @@ -1334,7 +1364,140 @@ class utf8_to_utf16 { auto str() const -> std::wstring { return {&buffer_[0], size()}; } }; +enum class to_utf8_error_policy { abort, replace }; + +// A converter from UTF-16/UTF-32 (host endian) to UTF-8. +template class to_utf8 { + private: + Buffer buffer_; + + public: + to_utf8() {} + explicit to_utf8(basic_string_view s, + to_utf8_error_policy policy = to_utf8_error_policy::abort) { + static_assert(sizeof(WChar) == 2 || sizeof(WChar) == 4, + "Expect utf16 or utf32"); + if (!convert(s, policy)) + FMT_THROW(std::runtime_error(sizeof(WChar) == 2 ? "invalid utf16" + : "invalid utf32")); + } + operator string_view() const { return string_view(&buffer_[0], size()); } + auto size() const -> size_t { return buffer_.size() - 1; } + auto c_str() const -> const char* { return &buffer_[0]; } + auto str() const -> std::string { return std::string(&buffer_[0], size()); } + + // Performs conversion returning a bool instead of throwing exception on + // conversion error. This method may still throw in case of memory allocation + // error. + auto convert(basic_string_view s, + to_utf8_error_policy policy = to_utf8_error_policy::abort) + -> bool { + if (!convert(buffer_, s, policy)) return false; + buffer_.push_back(0); + return true; + } + static auto convert(Buffer& buf, basic_string_view s, + to_utf8_error_policy policy = to_utf8_error_policy::abort) + -> bool { + for (auto p = s.begin(); p != s.end(); ++p) { + uint32_t c = static_cast(*p); + if (sizeof(WChar) == 2 && c >= 0xd800 && c <= 0xdfff) { + // Handle a surrogate pair. + ++p; + if (p == s.end() || (c & 0xfc00) != 0xd800 || (*p & 0xfc00) != 0xdc00) { + if (policy == to_utf8_error_policy::abort) return false; + buf.append(string_view("\xEF\xBF\xBD")); + --p; + } else { + c = (c << 10) + static_cast(*p) - 0x35fdc00; + } + } else if (c < 0x80) { + buf.push_back(static_cast(c)); + } else if (c < 0x800) { + buf.push_back(static_cast(0xc0 | (c >> 6))); + buf.push_back(static_cast(0x80 | (c & 0x3f))); + } else if ((c >= 0x800 && c <= 0xd7ff) || (c >= 0xe000 && c <= 0xffff)) { + buf.push_back(static_cast(0xe0 | (c >> 12))); + buf.push_back(static_cast(0x80 | ((c & 0xfff) >> 6))); + buf.push_back(static_cast(0x80 | (c & 0x3f))); + } else if (c >= 0x10000 && c <= 0x10ffff) { + buf.push_back(static_cast(0xf0 | (c >> 18))); + buf.push_back(static_cast(0x80 | ((c & 0x3ffff) >> 12))); + buf.push_back(static_cast(0x80 | ((c & 0xfff) >> 6))); + buf.push_back(static_cast(0x80 | (c & 0x3f))); + } else { + return false; + } + } + return true; + } +}; + +// Computes 128-bit result of multiplication of two 64-bit unsigned integers. +inline auto umul128(uint64_t x, uint64_t y) noexcept -> uint128_fallback { +#if FMT_USE_INT128 + auto p = static_cast(x) * static_cast(y); + return {static_cast(p >> 64), static_cast(p)}; +#elif defined(_MSC_VER) && defined(_M_X64) + auto hi = uint64_t(); + auto lo = _umul128(x, y, &hi); + return {hi, lo}; +#else + const uint64_t mask = static_cast(max_value()); + + uint64_t a = x >> 32; + uint64_t b = x & mask; + uint64_t c = y >> 32; + uint64_t d = y & mask; + + uint64_t ac = a * c; + uint64_t bc = b * c; + uint64_t ad = a * d; + uint64_t bd = b * d; + + uint64_t intermediate = (bd >> 32) + (ad & mask) + (bc & mask); + + return {ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32), + (intermediate << 32) + (bd & mask)}; +#endif +} + namespace dragonbox { +// Computes floor(log10(pow(2, e))) for e in [-2620, 2620] using the method from +// https://fmt.dev/papers/Dragonbox.pdf#page=28, section 6.1. +inline auto floor_log10_pow2(int e) noexcept -> int { + FMT_ASSERT(e <= 2620 && e >= -2620, "too large exponent"); + static_assert((-1 >> 1) == -1, "right shift is not arithmetic"); + return (e * 315653) >> 20; +} + +inline auto floor_log2_pow10(int e) noexcept -> int { + FMT_ASSERT(e <= 1233 && e >= -1233, "too large exponent"); + return (e * 1741647) >> 19; +} + +// Computes upper 64 bits of multiplication of two 64-bit unsigned integers. +inline auto umul128_upper64(uint64_t x, uint64_t y) noexcept -> uint64_t { +#if FMT_USE_INT128 + auto p = static_cast(x) * static_cast(y); + return static_cast(p >> 64); +#elif defined(_MSC_VER) && defined(_M_X64) + return __umulh(x, y); +#else + return umul128(x, y).high(); +#endif +} + +// Computes upper 128 bits of multiplication of a 64-bit unsigned integer and a +// 128-bit unsigned integer. +inline auto umul192_upper128(uint64_t x, uint128_fallback y) noexcept + -> uint128_fallback { + uint128_fallback r = umul128(x, y.high()); + r += umul128_upper64(x, y.low()); + return r; +} + +FMT_API auto get_cached_power(int k) noexcept -> uint128_fallback; // Type-specific information that Dragonbox uses. template struct float_info; @@ -1358,7 +1521,7 @@ template <> struct float_info { static const int big_divisor = 1000; static const int small_divisor = 100; static const int min_k = -292; - static const int max_k = 326; + static const int max_k = 341; static const int shorter_interval_tie_lower_threshold = -77; static const int shorter_interval_tie_upper_threshold = -77; }; @@ -1388,14 +1551,14 @@ template FMT_API auto to_decimal(T x) noexcept -> decimal_fp; } // namespace dragonbox // Returns true iff Float has the implicit bit which is not stored. -template constexpr bool has_implicit_bit() { +template constexpr auto has_implicit_bit() -> bool { // An 80-bit FP number has a 64-bit significand an no implicit bit. return std::numeric_limits::digits != 64; } // Returns the number of significand bits stored in Float. The implicit bit is // not counted since it is not stored. -template constexpr int num_significand_bits() { +template constexpr auto num_significand_bits() -> int { // std::numeric_limits may not support __float128. return is_float128() ? 112 : (std::numeric_limits::digits - @@ -1405,8 +1568,8 @@ template constexpr int num_significand_bits() { template constexpr auto exponent_mask() -> typename dragonbox::float_info::carrier_uint { - using uint = typename dragonbox::float_info::carrier_uint; - return ((uint(1) << dragonbox::float_info::exponent_bits) - 1) + using float_uint = typename dragonbox::float_info::carrier_uint; + return ((float_uint(1) << dragonbox::float_info::exponent_bits) - 1) << num_significand_bits(); } template constexpr auto exponent_bias() -> int { @@ -1488,7 +1651,7 @@ using fp = basic_fp; // Normalizes the value converted from double and multiplied by (1 << SHIFT). template -FMT_CONSTEXPR basic_fp normalize(basic_fp value) { +FMT_CONSTEXPR auto normalize(basic_fp value) -> basic_fp { // Handle subnormals. const auto implicit_bit = F(1) << num_significand_bits(); const auto shifted_implicit_bit = implicit_bit << SHIFT; @@ -1505,7 +1668,7 @@ FMT_CONSTEXPR basic_fp normalize(basic_fp value) { } // Computes lhs * rhs / pow(2, 64) rounded to nearest with half-up tie breaking. -FMT_CONSTEXPR inline uint64_t multiply(uint64_t lhs, uint64_t rhs) { +FMT_CONSTEXPR inline auto multiply(uint64_t lhs, uint64_t rhs) -> uint64_t { #if FMT_USE_INT128 auto product = static_cast<__uint128_t>(lhs) * rhs; auto f = static_cast(product >> 64); @@ -1522,188 +1685,36 @@ FMT_CONSTEXPR inline uint64_t multiply(uint64_t lhs, uint64_t rhs) { #endif } -FMT_CONSTEXPR inline fp operator*(fp x, fp y) { +FMT_CONSTEXPR inline auto operator*(fp x, fp y) -> fp { return {multiply(x.f, y.f), x.e + y.e + 64}; } -template struct basic_data { - // Normalized 64-bit significands of pow(10, k), for k = -348, -340, ..., 340. - // These are generated by support/compute-powers.py. - static constexpr uint64_t pow10_significands[87] = { - 0xfa8fd5a0081c0288, 0xbaaee17fa23ebf76, 0x8b16fb203055ac76, - 0xcf42894a5dce35ea, 0x9a6bb0aa55653b2d, 0xe61acf033d1a45df, - 0xab70fe17c79ac6ca, 0xff77b1fcbebcdc4f, 0xbe5691ef416bd60c, - 0x8dd01fad907ffc3c, 0xd3515c2831559a83, 0x9d71ac8fada6c9b5, - 0xea9c227723ee8bcb, 0xaecc49914078536d, 0x823c12795db6ce57, - 0xc21094364dfb5637, 0x9096ea6f3848984f, 0xd77485cb25823ac7, - 0xa086cfcd97bf97f4, 0xef340a98172aace5, 0xb23867fb2a35b28e, - 0x84c8d4dfd2c63f3b, 0xc5dd44271ad3cdba, 0x936b9fcebb25c996, - 0xdbac6c247d62a584, 0xa3ab66580d5fdaf6, 0xf3e2f893dec3f126, - 0xb5b5ada8aaff80b8, 0x87625f056c7c4a8b, 0xc9bcff6034c13053, - 0x964e858c91ba2655, 0xdff9772470297ebd, 0xa6dfbd9fb8e5b88f, - 0xf8a95fcf88747d94, 0xb94470938fa89bcf, 0x8a08f0f8bf0f156b, - 0xcdb02555653131b6, 0x993fe2c6d07b7fac, 0xe45c10c42a2b3b06, - 0xaa242499697392d3, 0xfd87b5f28300ca0e, 0xbce5086492111aeb, - 0x8cbccc096f5088cc, 0xd1b71758e219652c, 0x9c40000000000000, - 0xe8d4a51000000000, 0xad78ebc5ac620000, 0x813f3978f8940984, - 0xc097ce7bc90715b3, 0x8f7e32ce7bea5c70, 0xd5d238a4abe98068, - 0x9f4f2726179a2245, 0xed63a231d4c4fb27, 0xb0de65388cc8ada8, - 0x83c7088e1aab65db, 0xc45d1df942711d9a, 0x924d692ca61be758, - 0xda01ee641a708dea, 0xa26da3999aef774a, 0xf209787bb47d6b85, - 0xb454e4a179dd1877, 0x865b86925b9bc5c2, 0xc83553c5c8965d3d, - 0x952ab45cfa97a0b3, 0xde469fbd99a05fe3, 0xa59bc234db398c25, - 0xf6c69a72a3989f5c, 0xb7dcbf5354e9bece, 0x88fcf317f22241e2, - 0xcc20ce9bd35c78a5, 0x98165af37b2153df, 0xe2a0b5dc971f303a, - 0xa8d9d1535ce3b396, 0xfb9b7cd9a4a7443c, 0xbb764c4ca7a44410, - 0x8bab8eefb6409c1a, 0xd01fef10a657842c, 0x9b10a4e5e9913129, - 0xe7109bfba19c0c9d, 0xac2820d9623bf429, 0x80444b5e7aa7cf85, - 0xbf21e44003acdd2d, 0x8e679c2f5e44ff8f, 0xd433179d9c8cb841, - 0x9e19db92b4e31ba9, 0xeb96bf6ebadf77d9, 0xaf87023b9bf0ee6b, - }; - -#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wnarrowing" -#endif - // Binary exponents of pow(10, k), for k = -348, -340, ..., 340, corresponding - // to significands above. - static constexpr int16_t pow10_exponents[87] = { - -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, -954, - -927, -901, -874, -847, -821, -794, -768, -741, -715, -688, -661, - -635, -608, -582, -555, -529, -502, -475, -449, -422, -396, -369, - -343, -316, -289, -263, -236, -210, -183, -157, -130, -103, -77, - -50, -24, 3, 30, 56, 83, 109, 136, 162, 189, 216, - 242, 269, 295, 322, 348, 375, 402, 428, 455, 481, 508, - 534, 561, 588, 614, 641, 667, 694, 720, 747, 774, 800, - 827, 853, 880, 907, 933, 960, 986, 1013, 1039, 1066}; -#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 -# pragma GCC diagnostic pop -#endif - - static constexpr uint64_t power_of_10_64[20] = { - 1, FMT_POWERS_OF_10(1ULL), FMT_POWERS_OF_10(1000000000ULL), - 10000000000000000000ULL}; -}; - -#if FMT_CPLUSPLUS < 201703L -template constexpr uint64_t basic_data::pow10_significands[]; -template constexpr int16_t basic_data::pow10_exponents[]; -template constexpr uint64_t basic_data::power_of_10_64[]; -#endif - -// This is a struct rather than an alias to avoid shadowing warnings in gcc. -struct data : basic_data<> {}; - -// Returns a cached power of 10 `c_k = c_k.f * pow(2, c_k.e)` such that its -// (binary) exponent satisfies `min_exponent <= c_k.e <= min_exponent + 28`. -FMT_CONSTEXPR inline fp get_cached_power(int min_exponent, - int& pow10_exponent) { - const int shift = 32; - // log10(2) = 0x0.4d104d427de7fbcc... - const int64_t significand = 0x4d104d427de7fbcc; - int index = static_cast( - ((min_exponent + fp::num_significand_bits - 1) * (significand >> shift) + - ((int64_t(1) << shift) - 1)) // ceil - >> 32 // arithmetic shift - ); - // Decimal exponent of the first (smallest) cached power of 10. - const int first_dec_exp = -348; - // Difference between 2 consecutive decimal exponents in cached powers of 10. - const int dec_exp_step = 8; - index = (index - first_dec_exp - 1) / dec_exp_step + 1; - pow10_exponent = first_dec_exp + index * dec_exp_step; - // Using *(x + index) instead of x[index] avoids an issue with some compilers - // using the EDG frontend (e.g. nvhpc/22.3 in C++17 mode). - return {*(data::pow10_significands + index), - *(data::pow10_exponents + index)}; -} - -#ifndef _MSC_VER -# define FMT_SNPRINTF snprintf -#else -FMT_API auto fmt_snprintf(char* buf, size_t size, const char* fmt, ...) -> int; -# define FMT_SNPRINTF fmt_snprintf -#endif // _MSC_VER - -// Formats a floating-point number with snprintf using the hexfloat format. -template -auto snprintf_float(T value, int precision, float_specs specs, - buffer& buf) -> int { - // Buffer capacity must be non-zero, otherwise MSVC's vsnprintf_s will fail. - FMT_ASSERT(buf.capacity() > buf.size(), "empty buffer"); - FMT_ASSERT(specs.format == float_format::hex, ""); - static_assert(!std::is_same::value, ""); - - // Build the format string. - char format[7]; // The longest format is "%#.*Le". - char* format_ptr = format; - *format_ptr++ = '%'; - if (specs.showpoint) *format_ptr++ = '#'; - if (precision >= 0) { - *format_ptr++ = '.'; - *format_ptr++ = '*'; - } - if (std::is_same()) *format_ptr++ = 'L'; - *format_ptr++ = specs.upper ? 'A' : 'a'; - *format_ptr = '\0'; - - // Format using snprintf. - auto offset = buf.size(); - for (;;) { - auto begin = buf.data() + offset; - auto capacity = buf.capacity() - offset; - abort_fuzzing_if(precision > 100000); - // Suppress the warning about a nonliteral format string. - // Cannot use auto because of a bug in MinGW (#1532). - int (*snprintf_ptr)(char*, size_t, const char*, ...) = FMT_SNPRINTF; - int result = precision >= 0 - ? snprintf_ptr(begin, capacity, format, precision, value) - : snprintf_ptr(begin, capacity, format, value); - if (result < 0) { - // The buffer will grow exponentially. - buf.try_reserve(buf.capacity() + 1); - continue; - } - auto size = to_unsigned(result); - // Size equal to capacity means that the last character was truncated. - if (size < capacity) { - buf.try_resize(size + offset); - return 0; - } - buf.try_reserve(size + offset + 1); // Add 1 for the terminating '\0'. - } -} - -template +template () == num_bits()> using convert_float_result = - conditional_t::value || - std::numeric_limits::digits == - std::numeric_limits::digits, - double, T>; + conditional_t::value || doublish, double, T>; template constexpr auto convert_float(T value) -> convert_float_result { return static_cast>(value); } -template -FMT_NOINLINE FMT_CONSTEXPR auto fill(OutputIt it, size_t n, - const fill_t& fill) -> OutputIt { +template +FMT_NOINLINE FMT_CONSTEXPR auto fill(OutputIt it, size_t n, const fill_t& fill) + -> OutputIt { auto fill_size = fill.size(); - if (fill_size == 1) return detail::fill_n(it, n, fill[0]); - auto data = fill.data(); - for (size_t i = 0; i < n; ++i) - it = copy_str(data, data + fill_size, it); + if (fill_size == 1) return detail::fill_n(it, n, fill.template get()); + if (const Char* data = fill.template data()) { + for (size_t i = 0; i < n; ++i) it = copy(data, data + fill_size, it); + } return it; } // Writes the output of f, padded according to format specifications in specs. // size: output size in code units. // width: output display width in (terminal) column positions. -template -FMT_CONSTEXPR auto write_padded(OutputIt out, - const basic_format_specs& specs, +FMT_CONSTEXPR auto write_padded(OutputIt out, const format_specs& specs, size_t size, size_t width, F&& f) -> OutputIt { static_assert(align == align::left || align == align::right, ""); unsigned spec_width = to_unsigned(specs.width); @@ -1714,33 +1725,32 @@ FMT_CONSTEXPR auto write_padded(OutputIt out, size_t left_padding = padding >> shifts[specs.align]; size_t right_padding = padding - left_padding; auto it = reserve(out, size + padding * specs.fill.size()); - if (left_padding != 0) it = fill(it, left_padding, specs.fill); + if (left_padding != 0) it = fill(it, left_padding, specs.fill); it = f(it); - if (right_padding != 0) it = fill(it, right_padding, specs.fill); + if (right_padding != 0) it = fill(it, right_padding, specs.fill); return base_iterator(out, it); } -template -constexpr auto write_padded(OutputIt out, const basic_format_specs& specs, +constexpr auto write_padded(OutputIt out, const format_specs& specs, size_t size, F&& f) -> OutputIt { - return write_padded(out, specs, size, size, f); + return write_padded(out, specs, size, size, f); } -template +template FMT_CONSTEXPR auto write_bytes(OutputIt out, string_view bytes, - const basic_format_specs& specs) - -> OutputIt { - return write_padded( + const format_specs& specs = {}) -> OutputIt { + return write_padded( out, specs, bytes.size(), [bytes](reserve_iterator it) { const char* data = bytes.data(); - return copy_str(data, data + bytes.size(), it); + return copy(data, data + bytes.size(), it); }); } template -auto write_ptr(OutputIt out, UIntPtr value, - const basic_format_specs* specs) -> OutputIt { +auto write_ptr(OutputIt out, UIntPtr value, const format_specs* specs) + -> OutputIt { int num_digits = count_digits<4>(value); auto size = to_unsigned(num_digits) + size_t(2); auto write = [=](reserve_iterator it) { @@ -1748,7 +1758,7 @@ auto write_ptr(OutputIt out, UIntPtr value, *it++ = static_cast('x'); return format_uint<4, Char>(it, value, num_digits); }; - return specs ? write_padded(out, *specs, size, write) + return specs ? write_padded(out, *specs, size, write) : base_iterator(out, write(reserve(out, size))); } @@ -1766,17 +1776,11 @@ template struct find_escape_result { uint32_t cp; }; -template -using make_unsigned_char = - typename conditional_t::value, - std::make_unsigned, - type_identity>::type; - template auto find_escape(const Char* begin, const Char* end) -> find_escape_result { for (; begin != end; ++begin) { - uint32_t cp = static_cast>(*begin); + uint32_t cp = static_cast>(*begin); if (const_check(sizeof(Char) == 1) && cp >= 0x80) continue; if (needs_escape(cp)) return {begin, begin + 1, cp}; } @@ -1785,7 +1789,7 @@ auto find_escape(const Char* begin, const Char* end) inline auto find_escape(const char* begin, const char* end) -> find_escape_result { - if (!is_utf8()) return find_escape(begin, end); + if (!use_utf8()) return find_escape(begin, end); auto result = find_escape_result{end, nullptr, 0}; for_each_codepoint(string_view(begin, to_unsigned(end - begin)), [&](uint32_t cp, string_view sv) { @@ -1802,7 +1806,7 @@ inline auto find_escape(const char* begin, const char* end) [] { \ /* Use the hidden visibility as a workaround for a GCC bug (#1973). */ \ /* Use a macro-like name to avoid shadowing warnings. */ \ - struct FMT_GCC_VISIBILITY_HIDDEN FMT_COMPILE_STRING : base { \ + struct FMT_VISIBILITY("hidden") FMT_COMPILE_STRING : base { \ using char_type FMT_MAYBE_UNUSED = fmt::remove_cvref_t; \ FMT_MAYBE_UNUSED FMT_CONSTEXPR explicit \ operator fmt::basic_string_view() const { \ @@ -1813,14 +1817,12 @@ inline auto find_escape(const char* begin, const char* end) }() /** - \rst - Constructs a compile-time format string from a string literal *s*. - - **Example**:: - - // A compile-time error because 'd' is an invalid specifier for strings. - std::string s = fmt::format(FMT_STRING("{:d}"), "foo"); - \endrst + * Constructs a compile-time format string from a string literal `s`. + * + * **Example**: + * + * // A compile-time error because 'd' is an invalid specifier for strings. + * std::string s = fmt::format(FMT_STRING("{:d}"), "foo"); */ #define FMT_STRING(s) FMT_STRING_IMPL(s, fmt::detail::compile_string, ) @@ -1831,7 +1833,7 @@ auto write_codepoint(OutputIt out, char prefix, uint32_t cp) -> OutputIt { Char buf[width]; fill_n(buf, width, static_cast('0')); format_uint<4>(buf, cp, width); - return copy_str(buf, buf + width, out); + return copy(buf, buf + width, out); } template @@ -1859,17 +1861,11 @@ auto write_escaped_cp(OutputIt out, const find_escape_result& escape) *out++ = static_cast('\\'); break; default: - if (is_utf8()) { - if (escape.cp < 0x100) { - return write_codepoint<2, Char>(out, 'x', escape.cp); - } - if (escape.cp < 0x10000) { - return write_codepoint<4, Char>(out, 'u', escape.cp); - } - if (escape.cp < 0x110000) { - return write_codepoint<8, Char>(out, 'U', escape.cp); - } - } + if (escape.cp < 0x100) return write_codepoint<2, Char>(out, 'x', escape.cp); + if (escape.cp < 0x10000) + return write_codepoint<4, Char>(out, 'u', escape.cp); + if (escape.cp < 0x110000) + return write_codepoint<8, Char>(out, 'U', escape.cp); for (Char escape_char : basic_string_view( escape.begin, to_unsigned(escape.end - escape.begin))) { out = write_codepoint<2, Char>(out, 'x', @@ -1888,7 +1884,7 @@ auto write_escaped_string(OutputIt out, basic_string_view str) auto begin = str.begin(), end = str.end(); do { auto escape = find_escape(begin, end); - out = copy_str(begin, escape.begin, out); + out = copy(begin, escape.begin, out); begin = escape.end; if (!begin) break; out = write_escaped_cp(out, escape); @@ -1899,11 +1895,13 @@ auto write_escaped_string(OutputIt out, basic_string_view str) template auto write_escaped_char(OutputIt out, Char v) -> OutputIt { + Char v_array[1] = {v}; *out++ = static_cast('\''); if ((needs_escape(static_cast(v)) && v != static_cast('"')) || v == static_cast('\'')) { - out = write_escaped_cp( - out, find_escape_result{&v, &v + 1, static_cast(v)}); + out = write_escaped_cp(out, + find_escape_result{v_array, v_array + 1, + static_cast(v)}); } else { *out++ = v; } @@ -1913,22 +1911,23 @@ auto write_escaped_char(OutputIt out, Char v) -> OutputIt { template FMT_CONSTEXPR auto write_char(OutputIt out, Char value, - const basic_format_specs& specs) - -> OutputIt { + const format_specs& specs) -> OutputIt { bool is_debug = specs.type == presentation_type::debug; - return write_padded(out, specs, 1, [=](reserve_iterator it) { + return write_padded(out, specs, 1, [=](reserve_iterator it) { if (is_debug) return write_escaped_char(it, value); *it++ = value; return it; }); } template -FMT_CONSTEXPR auto write(OutputIt out, Char value, - const basic_format_specs& specs, +FMT_CONSTEXPR auto write(OutputIt out, Char value, const format_specs& specs, locale_ref loc = {}) -> OutputIt { + // char is formatted as unsigned char for consistency across platforms. + using unsigned_type = + conditional_t::value, unsigned char, unsigned>; return check_char_specs(specs) - ? write_char(out, value, specs) - : write(out, static_cast(value), specs, loc); + ? write_char(out, value, specs) + : write(out, static_cast(value), specs, loc); } // Data for write_int that doesn't depend on output iterator type. It is used to @@ -1938,7 +1937,7 @@ template struct write_int_data { size_t padding; FMT_CONSTEXPR write_int_data(int num_digits, unsigned prefix, - const basic_format_specs& specs) + const format_specs& specs) : size((prefix >> 24) + to_unsigned(num_digits)), padding(0) { if (specs.align == align::numeric) { auto width = to_unsigned(specs.width); @@ -1957,10 +1956,10 @@ template struct write_int_data { // // where are written by write_digits(it). // prefix contains chars in three lower bytes and the size in the fourth byte. -template +template FMT_CONSTEXPR FMT_INLINE auto write_int(OutputIt out, int num_digits, unsigned prefix, - const basic_format_specs& specs, + const format_specs& specs, W write_digits) -> OutputIt { // Slightly faster check for specs.width == 0 && specs.precision == -1. if ((specs.width | (specs.precision + 1)) == 0) { @@ -1972,7 +1971,7 @@ FMT_CONSTEXPR FMT_INLINE auto write_int(OutputIt out, int num_digits, return base_iterator(out, write_digits(it)); } auto data = write_int_data(num_digits, prefix, specs); - return write_padded( + return write_padded( out, specs, data.size, [=](reserve_iterator it) { for (unsigned p = prefix & 0xffffff; p != 0; p >>= 8) *it++ = static_cast(p & 0xff); @@ -1990,10 +1989,10 @@ template class digit_grouping { std::string::const_iterator group; int pos; }; - next_state initial_state() const { return {grouping_.begin(), 0}; } + auto initial_state() const -> next_state { return {grouping_.begin(), 0}; } // Returns the next digit group separator position. - int next(next_state& state) const { + auto next(next_state& state) const -> int { if (thousands_sep_.empty()) return max_value(); if (state.group == grouping_.end()) return state.pos += grouping_.back(); if (*state.group <= 0 || *state.group == max_value()) @@ -2012,9 +2011,9 @@ template class digit_grouping { digit_grouping(std::string grouping, std::basic_string sep) : grouping_(std::move(grouping)), thousands_sep_(std::move(sep)) {} - bool has_separator() const { return !thousands_sep_.empty(); } + auto has_separator() const -> bool { return !thousands_sep_.empty(); } - int count_separators(int num_digits) const { + auto count_separators(int num_digits) const -> int { int count = 0; auto state = initial_state(); while (num_digits > next(state)) ++count; @@ -2023,7 +2022,7 @@ template class digit_grouping { // Applies grouping to digits and write the output to out. template - Out apply(Out out, basic_string_view digits) const { + auto apply(Out out, basic_string_view digits) const -> Out { auto num_digits = static_cast(digits.size()); auto separators = basic_memory_buffer(); separators.push_back(0); @@ -2035,9 +2034,8 @@ template class digit_grouping { for (int i = 0, sep_index = static_cast(separators.size() - 1); i < num_digits; ++i) { if (num_digits - i == separators[sep_index]) { - out = - copy_str(thousands_sep_.data(), - thousands_sep_.data() + thousands_sep_.size(), out); + out = copy(thousands_sep_.data(), + thousands_sep_.data() + thousands_sep_.size(), out); --sep_index; } *out++ = static_cast(digits[to_unsigned(i)]); @@ -2046,41 +2044,71 @@ template class digit_grouping { } }; +FMT_CONSTEXPR inline void prefix_append(unsigned& prefix, unsigned value) { + prefix |= prefix != 0 ? value << 8 : value; + prefix += (1u + (value > 0xff ? 1 : 0)) << 24; +} + // Writes a decimal integer with digit grouping. template auto write_int(OutputIt out, UInt value, unsigned prefix, - const basic_format_specs& specs, - const digit_grouping& grouping) -> OutputIt { + const format_specs& specs, const digit_grouping& grouping) + -> OutputIt { static_assert(std::is_same, UInt>::value, ""); - int num_digits = count_digits(value); - char digits[40]; - format_decimal(digits, value, num_digits); - unsigned size = to_unsigned((prefix != 0 ? 1 : 0) + num_digits + - grouping.count_separators(num_digits)); - return write_padded( + int num_digits = 0; + auto buffer = memory_buffer(); + switch (specs.type) { + default: + FMT_ASSERT(false, ""); + FMT_FALLTHROUGH; + case presentation_type::none: + case presentation_type::dec: + num_digits = count_digits(value); + format_decimal(appender(buffer), value, num_digits); + break; + case presentation_type::hex: + if (specs.alt) + prefix_append(prefix, unsigned(specs.upper ? 'X' : 'x') << 8 | '0'); + num_digits = count_digits<4>(value); + format_uint<4, char>(appender(buffer), value, num_digits, specs.upper); + break; + case presentation_type::oct: + num_digits = count_digits<3>(value); + // Octal prefix '0' is counted as a digit, so only add it if precision + // is not greater than the number of digits. + if (specs.alt && specs.precision <= num_digits && value != 0) + prefix_append(prefix, '0'); + format_uint<3, char>(appender(buffer), value, num_digits); + break; + case presentation_type::bin: + if (specs.alt) + prefix_append(prefix, unsigned(specs.upper ? 'B' : 'b') << 8 | '0'); + num_digits = count_digits<1>(value); + format_uint<1, char>(appender(buffer), value, num_digits); + break; + case presentation_type::chr: + return write_char(out, static_cast(value), specs); + } + + unsigned size = (prefix != 0 ? prefix >> 24 : 0) + to_unsigned(num_digits) + + to_unsigned(grouping.count_separators(num_digits)); + return write_padded( out, specs, size, size, [&](reserve_iterator it) { - if (prefix != 0) { - char sign = static_cast(prefix); - *it++ = static_cast(sign); - } - return grouping.apply(it, string_view(digits, to_unsigned(num_digits))); + for (unsigned p = prefix & 0xffffff; p != 0; p >>= 8) + *it++ = static_cast(p & 0xff); + return grouping.apply(it, string_view(buffer.data(), buffer.size())); }); } // Writes a localized value. FMT_API auto write_loc(appender out, loc_value value, const format_specs& specs, locale_ref loc) -> bool; -template -inline auto write_loc(OutputIt, loc_value, const basic_format_specs&, - locale_ref) -> bool { +template +inline auto write_loc(OutputIt, loc_value, const format_specs&, locale_ref) + -> bool { return false; } -FMT_CONSTEXPR inline void prefix_append(unsigned& prefix, unsigned value) { - prefix |= prefix != 0 ? value << 8 : value; - prefix += (1u + (value > 0xff ? 1 : 0)) << 24; -} - template struct write_int_arg { UInt abs_value; unsigned prefix; @@ -2103,8 +2131,8 @@ FMT_CONSTEXPR auto make_write_int_arg(T value, sign_t sign) } template struct loc_writer { - buffer_appender out; - const basic_format_specs& specs; + basic_appender out; + const format_specs& specs; std::basic_string sep; std::string grouping; std::basic_string decimal_point; @@ -2117,97 +2145,94 @@ template struct loc_writer { return true; } - template ::value)> + template ::value)> auto operator()(T) -> bool { return false; } - - auto operator()(...) -> bool { return false; } }; template FMT_CONSTEXPR FMT_INLINE auto write_int(OutputIt out, write_int_arg arg, - const basic_format_specs& specs, - locale_ref) -> OutputIt { + const format_specs& specs, locale_ref) + -> OutputIt { static_assert(std::is_same>::value, ""); auto abs_value = arg.abs_value; auto prefix = arg.prefix; switch (specs.type) { + default: + FMT_ASSERT(false, ""); + FMT_FALLTHROUGH; case presentation_type::none: case presentation_type::dec: { - auto num_digits = count_digits(abs_value); - return write_int( + int num_digits = count_digits(abs_value); + return write_int( out, num_digits, prefix, specs, [=](reserve_iterator it) { return format_decimal(it, abs_value, num_digits).end; }); } - case presentation_type::hex_lower: - case presentation_type::hex_upper: { - bool upper = specs.type == presentation_type::hex_upper; + case presentation_type::hex: { if (specs.alt) - prefix_append(prefix, unsigned(upper ? 'X' : 'x') << 8 | '0'); + prefix_append(prefix, unsigned(specs.upper ? 'X' : 'x') << 8 | '0'); int num_digits = count_digits<4>(abs_value); - return write_int( + return write_int( out, num_digits, prefix, specs, [=](reserve_iterator it) { - return format_uint<4, Char>(it, abs_value, num_digits, upper); + return format_uint<4, Char>(it, abs_value, num_digits, specs.upper); }); } - case presentation_type::bin_lower: - case presentation_type::bin_upper: { - bool upper = specs.type == presentation_type::bin_upper; - if (specs.alt) - prefix_append(prefix, unsigned(upper ? 'B' : 'b') << 8 | '0'); - int num_digits = count_digits<1>(abs_value); - return write_int(out, num_digits, prefix, specs, - [=](reserve_iterator it) { - return format_uint<1, Char>(it, abs_value, num_digits); - }); - } case presentation_type::oct: { int num_digits = count_digits<3>(abs_value); // Octal prefix '0' is counted as a digit, so only add it if precision // is not greater than the number of digits. if (specs.alt && specs.precision <= num_digits && abs_value != 0) prefix_append(prefix, '0'); - return write_int(out, num_digits, prefix, specs, - [=](reserve_iterator it) { - return format_uint<3, Char>(it, abs_value, num_digits); - }); + return write_int( + out, num_digits, prefix, specs, [=](reserve_iterator it) { + return format_uint<3, Char>(it, abs_value, num_digits); + }); + } + case presentation_type::bin: { + if (specs.alt) + prefix_append(prefix, unsigned(specs.upper ? 'B' : 'b') << 8 | '0'); + int num_digits = count_digits<1>(abs_value); + return write_int( + out, num_digits, prefix, specs, [=](reserve_iterator it) { + return format_uint<1, Char>(it, abs_value, num_digits); + }); } case presentation_type::chr: - return write_char(out, static_cast(abs_value), specs); - default: - throw_format_error("invalid type specifier"); + return write_char(out, static_cast(abs_value), specs); } - return out; } template -FMT_CONSTEXPR FMT_NOINLINE auto write_int_noinline( - OutputIt out, write_int_arg arg, const basic_format_specs& specs, - locale_ref loc) -> OutputIt { - return write_int(out, arg, specs, loc); +FMT_CONSTEXPR FMT_NOINLINE auto write_int_noinline(OutputIt out, + write_int_arg arg, + const format_specs& specs, + locale_ref loc) -> OutputIt { + return write_int(out, arg, specs, loc); } -template ::value && !std::is_same::value && - std::is_same>::value)> -FMT_CONSTEXPR FMT_INLINE auto write(OutputIt out, T value, - const basic_format_specs& specs, - locale_ref loc) -> OutputIt { + !std::is_same::value)> +FMT_CONSTEXPR FMT_INLINE auto write(basic_appender out, T value, + const format_specs& specs, locale_ref loc) + -> basic_appender { if (specs.localized && write_loc(out, value, specs, loc)) return out; - return write_int_noinline(out, make_write_int_arg(value, specs.sign), specs, - loc); + return write_int_noinline(out, make_write_int_arg(value, specs.sign), + specs, loc); } // An inlined version of write used in format string compilation. template ::value && !std::is_same::value && - !std::is_same>::value)> + !std::is_same::value && + !std::is_same>::value)> FMT_CONSTEXPR FMT_INLINE auto write(OutputIt out, T value, - const basic_format_specs& specs, - locale_ref loc) -> OutputIt { + const format_specs& specs, locale_ref loc) + -> OutputIt { if (specs.localized && write_loc(out, value, specs, loc)) return out; - return write_int(out, make_write_int_arg(value, specs.sign), specs, loc); + return write_int(out, make_write_int_arg(value, specs.sign), specs, + loc); } // An output iterator that counts the number of objects written to it and @@ -2229,63 +2254,64 @@ class counting_iterator { FMT_CONSTEXPR counting_iterator() : count_(0) {} - FMT_CONSTEXPR size_t count() const { return count_; } + FMT_CONSTEXPR auto count() const -> size_t { return count_; } - FMT_CONSTEXPR counting_iterator& operator++() { + FMT_CONSTEXPR auto operator++() -> counting_iterator& { ++count_; return *this; } - FMT_CONSTEXPR counting_iterator operator++(int) { + FMT_CONSTEXPR auto operator++(int) -> counting_iterator { auto it = *this; ++*this; return it; } - FMT_CONSTEXPR friend counting_iterator operator+(counting_iterator it, - difference_type n) { + FMT_CONSTEXPR friend auto operator+(counting_iterator it, difference_type n) + -> counting_iterator { it.count_ += static_cast(n); return it; } - FMT_CONSTEXPR value_type operator*() const { return {}; } + FMT_CONSTEXPR auto operator*() const -> value_type { return {}; } }; template FMT_CONSTEXPR auto write(OutputIt out, basic_string_view s, - const basic_format_specs& specs) -> OutputIt { + const format_specs& specs) -> OutputIt { auto data = s.data(); auto size = s.size(); if (specs.precision >= 0 && to_unsigned(specs.precision) < size) size = code_point_index(s, to_unsigned(specs.precision)); bool is_debug = specs.type == presentation_type::debug; size_t width = 0; + + if (is_debug) size = write_escaped_string(counting_iterator{}, s).count(); + if (specs.width != 0) { if (is_debug) - width = write_escaped_string(counting_iterator{}, s).count(); + width = size; else width = compute_width(basic_string_view(data, size)); } - return write_padded(out, specs, size, width, - [=](reserve_iterator it) { - if (is_debug) return write_escaped_string(it, s); - return copy_str(data, data + size, it); - }); + return write_padded(out, specs, size, width, + [=](reserve_iterator it) { + if (is_debug) return write_escaped_string(it, s); + return copy(data, data + size, it); + }); } template FMT_CONSTEXPR auto write(OutputIt out, basic_string_view> s, - const basic_format_specs& specs, locale_ref) - -> OutputIt { - check_string_type_spec(specs.type); - return write(out, s, specs); + const format_specs& specs, locale_ref) -> OutputIt { + return write(out, s, specs); } template -FMT_CONSTEXPR auto write(OutputIt out, const Char* s, - const basic_format_specs& specs, locale_ref) - -> OutputIt { - return check_cstring_type_spec(specs.type) - ? write(out, basic_string_view(s), specs, {}) - : write_ptr(out, bit_cast(s), &specs); +FMT_CONSTEXPR auto write(OutputIt out, const Char* s, const format_specs& specs, + locale_ref) -> OutputIt { + if (specs.type == presentation_type::pointer) + return write_ptr(out, bit_cast(s), &specs); + if (!s) report_error("string pointer is null"); + return write(out, basic_string_view(s), specs, {}); } template OutputIt { if (negative) abs_value = ~abs_value + 1; int num_digits = count_digits(abs_value); auto size = (negative ? 1 : 0) + static_cast(num_digits); - auto it = reserve(out, size); - if (auto ptr = to_pointer(it, size)) { + if (auto ptr = to_pointer(out, size)) { if (negative) *ptr++ = static_cast('-'); format_decimal(ptr, abs_value, num_digits); return out; } - if (negative) *it++ = static_cast('-'); - it = format_decimal(it, abs_value, num_digits).end; - return base_iterator(out, it); + if (negative) *out++ = static_cast('-'); + return format_decimal(out, abs_value, num_digits).end; +} + +// DEPRECATED! +template +FMT_CONSTEXPR auto parse_align(const Char* begin, const Char* end, + format_specs& specs) -> const Char* { + FMT_ASSERT(begin != end, ""); + auto align = align::none; + auto p = begin + code_point_length(begin); + if (end - p <= 0) p = begin; + for (;;) { + switch (to_ascii(*p)) { + case '<': + align = align::left; + break; + case '>': + align = align::right; + break; + case '^': + align = align::center; + break; + } + if (align != align::none) { + if (p != begin) { + auto c = *begin; + if (c == '}') return begin; + if (c == '{') { + report_error("invalid fill character '{'"); + return begin; + } + specs.fill = basic_string_view(begin, to_unsigned(p - begin)); + begin = p + 1; + } else { + ++begin; + } + break; + } else if (p == begin) { + break; + } + p = begin; + } + specs.align = align; + return begin; +} + +// A floating-point presentation format. +enum class float_format : unsigned char { + general, // General: exponent notation or fixed point based on magnitude. + exp, // Exponent notation with the default precision of 6, e.g. 1.2e-3. + fixed // Fixed point with the default precision of 6, e.g. 0.0012. +}; + +struct float_specs { + int precision; + float_format format : 8; + sign_t sign : 8; + bool locale : 1; + bool binary32 : 1; + bool showpoint : 1; +}; + +// DEPRECATED! +FMT_CONSTEXPR inline auto parse_float_type_spec(const format_specs& specs) + -> float_specs { + auto result = float_specs(); + result.showpoint = specs.alt; + result.locale = specs.localized; + switch (specs.type) { + default: + FMT_FALLTHROUGH; + case presentation_type::none: + result.format = float_format::general; + break; + case presentation_type::exp: + result.format = float_format::exp; + result.showpoint |= specs.precision != 0; + break; + case presentation_type::fixed: + result.format = float_format::fixed; + result.showpoint |= specs.precision != 0; + break; + case presentation_type::general: + result.format = float_format::general; + break; + } + return result; } template FMT_CONSTEXPR20 auto write_nonfinite(OutputIt out, bool isnan, - basic_format_specs specs, - const float_specs& fspecs) -> OutputIt { + format_specs specs, sign_t sign) + -> OutputIt { auto str = - isnan ? (fspecs.upper ? "NAN" : "nan") : (fspecs.upper ? "INF" : "inf"); + isnan ? (specs.upper ? "NAN" : "nan") : (specs.upper ? "INF" : "inf"); constexpr size_t str_size = 3; - auto sign = fspecs.sign; auto size = str_size + (sign ? 1 : 0); // Replace '0'-padding with space for non-finite values. const bool is_zero_fill = - specs.fill.size() == 1 && *specs.fill.data() == static_cast('0'); - if (is_zero_fill) specs.fill[0] = static_cast(' '); - return write_padded(out, specs, size, [=](reserve_iterator it) { - if (sign) *it++ = detail::sign(sign); - return copy_str(str, str + str_size, it); - }); + specs.fill.size() == 1 && specs.fill.template get() == '0'; + if (is_zero_fill) specs.fill = ' '; + return write_padded(out, specs, size, + [=](reserve_iterator it) { + if (sign) *it++ = detail::sign(sign); + return copy(str, str + str_size, it); + }); } // A decimal floating-point number significand * pow(10, exp). @@ -2347,7 +2457,7 @@ inline auto get_significand_size(const dragonbox::decimal_fp& f) -> int { template constexpr auto write_significand(OutputIt out, const char* significand, int significand_size) -> OutputIt { - return copy_str(significand, significand + significand_size, out); + return copy(significand, significand + significand_size, out); } template inline auto write_significand(OutputIt out, UInt significand, @@ -2400,19 +2510,19 @@ inline auto write_significand(OutputIt out, UInt significand, Char buffer[digits10() + 2]; auto end = write_significand(buffer, significand, significand_size, integral_size, decimal_point); - return detail::copy_str_noinline(buffer, end, out); + return detail::copy_noinline(buffer, end, out); } template FMT_CONSTEXPR auto write_significand(OutputIt out, const char* significand, int significand_size, int integral_size, Char decimal_point) -> OutputIt { - out = detail::copy_str_noinline(significand, - significand + integral_size, out); + out = detail::copy_noinline(significand, significand + integral_size, + out); if (!decimal_point) return out; *out++ = decimal_point; - return detail::copy_str_noinline(significand + integral_size, - significand + significand_size, out); + return detail::copy_noinline(significand + integral_size, + significand + significand_size, out); } template @@ -2425,18 +2535,18 @@ FMT_CONSTEXPR20 auto write_significand(OutputIt out, T significand, decimal_point); } auto buffer = basic_memory_buffer(); - write_significand(buffer_appender(buffer), significand, - significand_size, integral_size, decimal_point); + write_significand(basic_appender(buffer), significand, significand_size, + integral_size, decimal_point); grouping.apply( out, basic_string_view(buffer.data(), to_unsigned(integral_size))); - return detail::copy_str_noinline(buffer.data() + integral_size, - buffer.end(), out); + return detail::copy_noinline(buffer.data() + integral_size, + buffer.end(), out); } -template > FMT_CONSTEXPR20 auto do_write_float(OutputIt out, const DecimalFP& f, - const basic_format_specs& specs, + const format_specs& specs, float_specs fspecs, locale_ref loc) -> OutputIt { auto significand = f.significand; @@ -2473,7 +2583,7 @@ FMT_CONSTEXPR20 auto do_write_float(OutputIt out, const DecimalFP& f, if (abs_output_exp >= 100) exp_digits = abs_output_exp >= 1000 ? 4 : 3; size += to_unsigned((decimal_point ? 1 : 0) + 2 + exp_digits); - char exp_char = fspecs.upper ? 'E' : 'e'; + char exp_char = specs.upper ? 'E' : 'e'; auto write = [=](iterator it) { if (sign) *it++ = detail::sign(sign); // Insert a decimal point after the first digit and add an exponent. @@ -2483,8 +2593,9 @@ FMT_CONSTEXPR20 auto do_write_float(OutputIt out, const DecimalFP& f, *it++ = static_cast(exp_char); return write_exponent(output_exp, it); }; - return specs.width > 0 ? write_padded(out, specs, size, write) - : base_iterator(out, write(reserve(out, size))); + return specs.width > 0 + ? write_padded(out, specs, size, write) + : base_iterator(out, write(reserve(out, size))); } int exp = f.exponent + significand_size; @@ -2495,12 +2606,12 @@ FMT_CONSTEXPR20 auto do_write_float(OutputIt out, const DecimalFP& f, abort_fuzzing_if(num_zeros > 5000); if (fspecs.showpoint) { ++size; - if (num_zeros <= 0 && fspecs.format != float_format::fixed) num_zeros = 1; + if (num_zeros <= 0 && fspecs.format != float_format::fixed) num_zeros = 0; if (num_zeros > 0) size += to_unsigned(num_zeros); } auto grouping = Grouping(loc, fspecs.locale); size += to_unsigned(grouping.count_separators(exp)); - return write_padded(out, specs, size, [&](iterator it) { + return write_padded(out, specs, size, [&](iterator it) { if (sign) *it++ = detail::sign(sign); it = write_significand(it, significand, significand_size, f.exponent, grouping); @@ -2513,8 +2624,8 @@ FMT_CONSTEXPR20 auto do_write_float(OutputIt out, const DecimalFP& f, int num_zeros = fspecs.showpoint ? fspecs.precision - significand_size : 0; size += 1 + to_unsigned(num_zeros > 0 ? num_zeros : 0); auto grouping = Grouping(loc, fspecs.locale); - size += to_unsigned(grouping.count_separators(significand_size)); - return write_padded(out, specs, size, [&](iterator it) { + size += to_unsigned(grouping.count_separators(exp)); + return write_padded(out, specs, size, [&](iterator it) { if (sign) *it++ = detail::sign(sign); it = write_significand(it, significand, significand_size, exp, decimal_point, grouping); @@ -2529,7 +2640,7 @@ FMT_CONSTEXPR20 auto do_write_float(OutputIt out, const DecimalFP& f, } bool pointy = num_zeros != 0 || significand_size != 0 || fspecs.showpoint; size += 1 + (pointy ? 1 : 0) + to_unsigned(num_zeros); - return write_padded(out, specs, size, [&](iterator it) { + return write_padded(out, specs, size, [&](iterator it) { if (sign) *it++ = detail::sign(sign); *it++ = zero; if (!pointy) return it; @@ -2543,32 +2654,31 @@ template class fallback_digit_grouping { public: constexpr fallback_digit_grouping(locale_ref, bool) {} - constexpr bool has_separator() const { return false; } + constexpr auto has_separator() const -> bool { return false; } - constexpr int count_separators(int) const { return 0; } + constexpr auto count_separators(int) const -> int { return 0; } template - constexpr Out apply(Out out, basic_string_view) const { + constexpr auto apply(Out out, basic_string_view) const -> Out { return out; } }; -template +template FMT_CONSTEXPR20 auto write_float(OutputIt out, const DecimalFP& f, - const basic_format_specs& specs, - float_specs fspecs, locale_ref loc) - -> OutputIt { + const format_specs& specs, float_specs fspecs, + locale_ref loc) -> OutputIt { if (is_constant_evaluated()) { - return do_write_float>(out, f, specs, fspecs, loc); } else { - return do_write_float(out, f, specs, fspecs, loc); + return do_write_float(out, f, specs, fspecs, loc); } } -template constexpr bool isnan(T value) { - return !(value >= value); // std::isnan doesn't support __float128. +template constexpr auto isnan(T value) -> bool { + return value != value; // std::isnan doesn't support __float128. } template @@ -2580,14 +2690,14 @@ struct has_isfinite> template ::value&& has_isfinite::value)> -FMT_CONSTEXPR20 bool isfinite(T value) { +FMT_CONSTEXPR20 auto isfinite(T value) -> bool { constexpr T inf = T(std::numeric_limits::infinity()); if (is_constant_evaluated()) return !detail::isnan(value) && value < inf && value > -inf; return std::isfinite(value); } template ::value)> -FMT_CONSTEXPR bool isfinite(T value) { +FMT_CONSTEXPR auto isfinite(T value) -> bool { T inf = T(std::numeric_limits::infinity()); // std::isfinite doesn't support __float128. return !detail::isnan(value) && value < inf && value > -inf; @@ -2606,78 +2716,6 @@ FMT_INLINE FMT_CONSTEXPR bool signbit(T value) { return std::signbit(static_cast(value)); } -enum class round_direction { unknown, up, down }; - -// Given the divisor (normally a power of 10), the remainder = v % divisor for -// some number v and the error, returns whether v should be rounded up, down, or -// whether the rounding direction can't be determined due to error. -// error should be less than divisor / 2. -FMT_CONSTEXPR inline round_direction get_round_direction(uint64_t divisor, - uint64_t remainder, - uint64_t error) { - FMT_ASSERT(remainder < divisor, ""); // divisor - remainder won't overflow. - FMT_ASSERT(error < divisor, ""); // divisor - error won't overflow. - FMT_ASSERT(error < divisor - error, ""); // error * 2 won't overflow. - // Round down if (remainder + error) * 2 <= divisor. - if (remainder <= divisor - remainder && error * 2 <= divisor - remainder * 2) - return round_direction::down; - // Round up if (remainder - error) * 2 >= divisor. - if (remainder >= error && - remainder - error >= divisor - (remainder - error)) { - return round_direction::up; - } - return round_direction::unknown; -} - -namespace digits { -enum result { - more, // Generate more digits. - done, // Done generating digits. - error // Digit generation cancelled due to an error. -}; -} - -struct gen_digits_handler { - char* buf; - int size; - int precision; - int exp10; - bool fixed; - - FMT_CONSTEXPR digits::result on_digit(char digit, uint64_t divisor, - uint64_t remainder, uint64_t error, - bool integral) { - FMT_ASSERT(remainder < divisor, ""); - buf[size++] = digit; - if (!integral && error >= remainder) return digits::error; - if (size < precision) return digits::more; - if (!integral) { - // Check if error * 2 < divisor with overflow prevention. - // The check is not needed for the integral part because error = 1 - // and divisor > (1 << 32) there. - if (error >= divisor || error >= divisor - error) return digits::error; - } else { - FMT_ASSERT(error == 1 && divisor > 2, ""); - } - auto dir = get_round_direction(divisor, remainder, error); - if (dir != round_direction::up) - return dir == round_direction::down ? digits::done : digits::error; - ++buf[size - 1]; - for (int i = size - 1; i > 0 && buf[i] > '9'; --i) { - buf[i] = '0'; - ++buf[i - 1]; - } - if (buf[0] > '9') { - buf[0] = '1'; - if (fixed) - buf[size++] = '0'; - else - ++exp10; - } - return digits::done; - } -}; - inline FMT_CONSTEXPR20 void adjust_precision(int& precision, int exp10) { // Adjust fixed precision by exponent because it is relative to decimal // point. @@ -2686,101 +2724,6 @@ inline FMT_CONSTEXPR20 void adjust_precision(int& precision, int exp10) { precision += exp10; } -// Generates output using the Grisu digit-gen algorithm. -// error: the size of the region (lower, upper) outside of which numbers -// definitely do not round to value (Delta in Grisu3). -FMT_INLINE FMT_CONSTEXPR20 auto grisu_gen_digits(fp value, uint64_t error, - int& exp, - gen_digits_handler& handler) - -> digits::result { - const fp one(1ULL << -value.e, value.e); - // The integral part of scaled value (p1 in Grisu) = value / one. It cannot be - // zero because it contains a product of two 64-bit numbers with MSB set (due - // to normalization) - 1, shifted right by at most 60 bits. - auto integral = static_cast(value.f >> -one.e); - FMT_ASSERT(integral != 0, ""); - FMT_ASSERT(integral == value.f >> -one.e, ""); - // The fractional part of scaled value (p2 in Grisu) c = value % one. - uint64_t fractional = value.f & (one.f - 1); - exp = count_digits(integral); // kappa in Grisu. - // Non-fixed formats require at least one digit and no precision adjustment. - if (handler.fixed) { - adjust_precision(handler.precision, exp + handler.exp10); - // Check if precision is satisfied just by leading zeros, e.g. - // format("{:.2f}", 0.001) gives "0.00" without generating any digits. - if (handler.precision <= 0) { - if (handler.precision < 0) return digits::done; - // Divide by 10 to prevent overflow. - uint64_t divisor = data::power_of_10_64[exp - 1] << -one.e; - auto dir = get_round_direction(divisor, value.f / 10, error * 10); - if (dir == round_direction::unknown) return digits::error; - handler.buf[handler.size++] = dir == round_direction::up ? '1' : '0'; - return digits::done; - } - } - // Generate digits for the integral part. This can produce up to 10 digits. - do { - uint32_t digit = 0; - auto divmod_integral = [&](uint32_t divisor) { - digit = integral / divisor; - integral %= divisor; - }; - // This optimization by Milo Yip reduces the number of integer divisions by - // one per iteration. - switch (exp) { - case 10: - divmod_integral(1000000000); - break; - case 9: - divmod_integral(100000000); - break; - case 8: - divmod_integral(10000000); - break; - case 7: - divmod_integral(1000000); - break; - case 6: - divmod_integral(100000); - break; - case 5: - divmod_integral(10000); - break; - case 4: - divmod_integral(1000); - break; - case 3: - divmod_integral(100); - break; - case 2: - divmod_integral(10); - break; - case 1: - digit = integral; - integral = 0; - break; - default: - FMT_ASSERT(false, "invalid number of digits"); - } - --exp; - auto remainder = (static_cast(integral) << -one.e) + fractional; - auto result = handler.on_digit(static_cast('0' + digit), - data::power_of_10_64[exp] << -one.e, - remainder, error, true); - if (result != digits::more) return result; - } while (exp > 0); - // Generate digits for the fractional part. - for (;;) { - fractional *= 10; - error *= 10; - char digit = static_cast('0' + (fractional >> -one.e)); - fractional &= one.f - 1; - --exp; - auto result = handler.on_digit(digit, one.f, fractional, error, false); - if (result != digits::more) return result; - } -} - class bigint { private: // A bigint is stored as an array of bigits (big digits), with bigit at index @@ -2791,10 +2734,10 @@ class bigint { basic_memory_buffer bigits_; int exp_; - FMT_CONSTEXPR20 bigit operator[](int index) const { + FMT_CONSTEXPR20 auto operator[](int index) const -> bigit { return bigits_[to_unsigned(index)]; } - FMT_CONSTEXPR20 bigit& operator[](int index) { + FMT_CONSTEXPR20 auto operator[](int index) -> bigit& { return bigits_[to_unsigned(index)]; } @@ -2881,7 +2824,7 @@ class bigint { auto size = other.bigits_.size(); bigits_.resize(size); auto data = other.bigits_.data(); - std::copy(data, data + size, make_checked(bigits_.data(), size)); + copy(data, data + size, bigits_.data()); exp_ = other.exp_; } @@ -2890,11 +2833,11 @@ class bigint { assign(uint64_or_128_t(n)); } - FMT_CONSTEXPR20 int num_bigits() const { + FMT_CONSTEXPR20 auto num_bigits() const -> int { return static_cast(bigits_.size()) + exp_; } - FMT_NOINLINE FMT_CONSTEXPR20 bigint& operator<<=(int shift) { + FMT_NOINLINE FMT_CONSTEXPR20 auto operator<<=(int shift) -> bigint& { FMT_ASSERT(shift >= 0, ""); exp_ += shift / bigit_bits; shift %= bigit_bits; @@ -2909,13 +2852,15 @@ class bigint { return *this; } - template FMT_CONSTEXPR20 bigint& operator*=(Int value) { + template + FMT_CONSTEXPR20 auto operator*=(Int value) -> bigint& { FMT_ASSERT(value > 0, ""); multiply(uint32_or_64_or_128_t(value)); return *this; } - friend FMT_CONSTEXPR20 int compare(const bigint& lhs, const bigint& rhs) { + friend FMT_CONSTEXPR20 auto compare(const bigint& lhs, const bigint& rhs) + -> int { int num_lhs_bigits = lhs.num_bigits(), num_rhs_bigits = rhs.num_bigits(); if (num_lhs_bigits != num_rhs_bigits) return num_lhs_bigits > num_rhs_bigits ? 1 : -1; @@ -2932,8 +2877,9 @@ class bigint { } // Returns compare(lhs1 + lhs2, rhs). - friend FMT_CONSTEXPR20 int add_compare(const bigint& lhs1, const bigint& lhs2, - const bigint& rhs) { + friend FMT_CONSTEXPR20 auto add_compare(const bigint& lhs1, + const bigint& lhs2, const bigint& rhs) + -> int { auto minimum = [](int a, int b) { return a < b ? a : b; }; auto maximum = [](int a, int b) { return a > b ? a : b; }; int max_lhs_bigits = maximum(lhs1.num_bigits(), lhs2.num_bigits()); @@ -3014,13 +2960,13 @@ class bigint { bigits_.resize(to_unsigned(num_bigits + exp_difference)); for (int i = num_bigits - 1, j = i + exp_difference; i >= 0; --i, --j) bigits_[j] = bigits_[i]; - std::uninitialized_fill_n(bigits_.data(), exp_difference, 0); + memset(bigits_.data(), 0, to_unsigned(exp_difference) * sizeof(bigit)); exp_ -= exp_difference; } // Divides this bignum by divisor, assigning the remainder to this and // returning the quotient. - FMT_CONSTEXPR20 int divmod_assign(const bigint& divisor) { + FMT_CONSTEXPR20 auto divmod_assign(const bigint& divisor) -> int { FMT_ASSERT(this != &divisor, ""); if (compare(*this, divisor) < 0) return 0; FMT_ASSERT(divisor.bigits_[divisor.bigits_.size() - 1u] != 0, ""); @@ -3095,6 +3041,7 @@ FMT_CONSTEXPR20 inline void format_dragon(basic_fp value, } int even = static_cast((value.f & 1) == 0); if (!upper) upper = &lower; + bool shortest = num_digits < 0; if ((flags & dragon::fixup) != 0) { if (add_compare(numerator, *upper, denominator) + even <= 0) { --exp10; @@ -3107,7 +3054,7 @@ FMT_CONSTEXPR20 inline void format_dragon(basic_fp value, if ((flags & dragon::fixed) != 0) adjust_precision(num_digits, exp10 + 1); } // Invariant: value == (numerator / denominator) * pow(10, exp10). - if (num_digits < 0) { + if (shortest) { // Generate the shortest representation. num_digits = 0; char* data = buf.data(); @@ -3137,9 +3084,12 @@ FMT_CONSTEXPR20 inline void format_dragon(basic_fp value, } // Generate the given number of digits. exp10 -= num_digits - 1; - if (num_digits == 0) { - denominator *= 10; - auto digit = add_compare(numerator, numerator, denominator) > 0 ? '1' : '0'; + if (num_digits <= 0) { + auto digit = '0'; + if (num_digits == 0) { + denominator *= 10; + digit = add_compare(numerator, numerator, denominator) > 0 ? '1' : '0'; + } buf.push_back(digit); return; } @@ -3162,7 +3112,10 @@ FMT_CONSTEXPR20 inline void format_dragon(basic_fp value, } if (buf[0] == overflow) { buf[0] = '1'; - ++exp10; + if ((flags & dragon::fixed) != 0) + buf.push_back('0'); + else + ++exp10; } return; } @@ -3171,6 +3124,105 @@ FMT_CONSTEXPR20 inline void format_dragon(basic_fp value, buf[num_digits - 1] = static_cast('0' + digit); } +// Formats a floating-point number using the hexfloat format. +template ::value)> +FMT_CONSTEXPR20 void format_hexfloat(Float value, format_specs specs, + buffer& buf) { + // float is passed as double to reduce the number of instantiations and to + // simplify implementation. + static_assert(!std::is_same::value, ""); + + using info = dragonbox::float_info; + + // Assume Float is in the format [sign][exponent][significand]. + using carrier_uint = typename info::carrier_uint; + + constexpr auto num_float_significand_bits = + detail::num_significand_bits(); + + basic_fp f(value); + f.e += num_float_significand_bits; + if (!has_implicit_bit()) --f.e; + + constexpr auto num_fraction_bits = + num_float_significand_bits + (has_implicit_bit() ? 1 : 0); + constexpr auto num_xdigits = (num_fraction_bits + 3) / 4; + + constexpr auto leading_shift = ((num_xdigits - 1) * 4); + const auto leading_mask = carrier_uint(0xF) << leading_shift; + const auto leading_xdigit = + static_cast((f.f & leading_mask) >> leading_shift); + if (leading_xdigit > 1) f.e -= (32 - countl_zero(leading_xdigit) - 1); + + int print_xdigits = num_xdigits - 1; + if (specs.precision >= 0 && print_xdigits > specs.precision) { + const int shift = ((print_xdigits - specs.precision - 1) * 4); + const auto mask = carrier_uint(0xF) << shift; + const auto v = static_cast((f.f & mask) >> shift); + + if (v >= 8) { + const auto inc = carrier_uint(1) << (shift + 4); + f.f += inc; + f.f &= ~(inc - 1); + } + + // Check long double overflow + if (!has_implicit_bit()) { + const auto implicit_bit = carrier_uint(1) << num_float_significand_bits; + if ((f.f & implicit_bit) == implicit_bit) { + f.f >>= 4; + f.e += 4; + } + } + + print_xdigits = specs.precision; + } + + char xdigits[num_bits() / 4]; + detail::fill_n(xdigits, sizeof(xdigits), '0'); + format_uint<4>(xdigits, f.f, num_xdigits, specs.upper); + + // Remove zero tail + while (print_xdigits > 0 && xdigits[print_xdigits] == '0') --print_xdigits; + + buf.push_back('0'); + buf.push_back(specs.upper ? 'X' : 'x'); + buf.push_back(xdigits[0]); + if (specs.alt || print_xdigits > 0 || print_xdigits < specs.precision) + buf.push_back('.'); + buf.append(xdigits + 1, xdigits + 1 + print_xdigits); + for (; print_xdigits < specs.precision; ++print_xdigits) buf.push_back('0'); + + buf.push_back(specs.upper ? 'P' : 'p'); + + uint32_t abs_e; + if (f.e < 0) { + buf.push_back('-'); + abs_e = static_cast(-f.e); + } else { + buf.push_back('+'); + abs_e = static_cast(f.e); + } + format_decimal(appender(buf), abs_e, detail::count_digits(abs_e)); +} + +template ::value)> +FMT_CONSTEXPR20 void format_hexfloat(Float value, format_specs specs, + buffer& buf) { + format_hexfloat(static_cast(value), specs, buf); +} + +constexpr auto fractional_part_rounding_thresholds(int index) -> uint32_t { + // For checking rounding thresholds. + // The kth entry is chosen to be the smallest integer such that the + // upper 32-bits of 10^(k+1) times it is strictly bigger than 5 * 10^k. + // It is equal to ceil(2^31 + 2^32/10^(k + 1)). + // These are stored in a string literal because we cannot have static arrays + // in constexpr functions and non-static ones are poorly optimized. + return U"\x9999999a\x828f5c29\x80418938\x80068db9\x8000a7c6\x800010c7" + U"\x800001ae\x8000002b"[index]; +} + template FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs, buffer& buf) -> int { @@ -3193,7 +3245,7 @@ FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs, int exp = 0; bool use_dragon = true; unsigned dragon_flags = 0; - if (!is_fast_float()) { + if (!is_fast_float() || is_constant_evaluated()) { const auto inv_log2_10 = 0.3010299956639812; // 1 / log2(10) using info = dragonbox::float_info; const auto f = basic_fp(converted_value); @@ -3201,37 +3253,259 @@ FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs, // 10^(exp - 1) <= value < 10^exp or 10^exp <= value < 10^(exp + 1). // This is based on log10(value) == log2(value) / log2(10) and approximation // of log2(value) by e + num_fraction_bits idea from double-conversion. - exp = static_cast( - std::ceil((f.e + count_digits<1>(f.f) - 1) * inv_log2_10 - 1e-10)); + auto e = (f.e + count_digits<1>(f.f) - 1) * inv_log2_10 - 1e-10; + exp = static_cast(e); + if (e > exp) ++exp; // Compute ceil. dragon_flags = dragon::fixup; - } else if (!is_constant_evaluated() && precision < 0) { + } else if (precision < 0) { // Use Dragonbox for the shortest format. if (specs.binary32) { auto dec = dragonbox::to_decimal(static_cast(value)); - write(buffer_appender(buf), dec.significand); + write(appender(buf), dec.significand); return dec.exponent; } auto dec = dragonbox::to_decimal(static_cast(value)); - write(buffer_appender(buf), dec.significand); + write(appender(buf), dec.significand); return dec.exponent; } else { - // Use Grisu + Dragon4 for the given precision: - // https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf. - const int min_exp = -60; // alpha in Grisu. - int cached_exp10 = 0; // K in Grisu. - fp normalized = normalize(fp(converted_value)); - const auto cached_pow = get_cached_power( - min_exp - (normalized.e + fp::num_significand_bits), cached_exp10); - normalized = normalized * cached_pow; - gen_digits_handler handler{buf.data(), 0, precision, -cached_exp10, fixed}; - if (grisu_gen_digits(normalized, 1, exp, handler) != digits::error && - !is_constant_evaluated()) { - exp += handler.exp10; - buf.try_resize(to_unsigned(handler.size)); - use_dragon = false; + // Extract significand bits and exponent bits. + using info = dragonbox::float_info; + auto br = bit_cast(static_cast(value)); + + const uint64_t significand_mask = + (static_cast(1) << num_significand_bits()) - 1; + uint64_t significand = (br & significand_mask); + int exponent = static_cast((br & exponent_mask()) >> + num_significand_bits()); + + if (exponent != 0) { // Check if normal. + exponent -= exponent_bias() + num_significand_bits(); + significand |= + (static_cast(1) << num_significand_bits()); + significand <<= 1; } else { - exp += handler.size - cached_exp10 - 1; - precision = handler.precision; + // Normalize subnormal inputs. + FMT_ASSERT(significand != 0, "zeros should not appear here"); + int shift = countl_zero(significand); + FMT_ASSERT(shift >= num_bits() - num_significand_bits(), + ""); + shift -= (num_bits() - num_significand_bits() - 2); + exponent = (std::numeric_limits::min_exponent - + num_significand_bits()) - + shift; + significand <<= shift; + } + + // Compute the first several nonzero decimal significand digits. + // We call the number we get the first segment. + const int k = info::kappa - dragonbox::floor_log10_pow2(exponent); + exp = -k; + const int beta = exponent + dragonbox::floor_log2_pow10(k); + uint64_t first_segment; + bool has_more_segments; + int digits_in_the_first_segment; + { + const auto r = dragonbox::umul192_upper128( + significand << beta, dragonbox::get_cached_power(k)); + first_segment = r.high(); + has_more_segments = r.low() != 0; + + // The first segment can have 18 ~ 19 digits. + if (first_segment >= 1000000000000000000ULL) { + digits_in_the_first_segment = 19; + } else { + // When it is of 18-digits, we align it to 19-digits by adding a bogus + // zero at the end. + digits_in_the_first_segment = 18; + first_segment *= 10; + } + } + + // Compute the actual number of decimal digits to print. + if (fixed) adjust_precision(precision, exp + digits_in_the_first_segment); + + // Use Dragon4 only when there might be not enough digits in the first + // segment. + if (digits_in_the_first_segment > precision) { + use_dragon = false; + + if (precision <= 0) { + exp += digits_in_the_first_segment; + + if (precision < 0) { + // Nothing to do, since all we have are just leading zeros. + buf.try_resize(0); + } else { + // We may need to round-up. + buf.try_resize(1); + if ((first_segment | static_cast(has_more_segments)) > + 5000000000000000000ULL) { + buf[0] = '1'; + } else { + buf[0] = '0'; + } + } + } // precision <= 0 + else { + exp += digits_in_the_first_segment - precision; + + // When precision > 0, we divide the first segment into three + // subsegments, each with 9, 9, and 0 ~ 1 digits so that each fits + // in 32-bits which usually allows faster calculation than in + // 64-bits. Since some compiler (e.g. MSVC) doesn't know how to optimize + // division-by-constant for large 64-bit divisors, we do it here + // manually. The magic number 7922816251426433760 below is equal to + // ceil(2^(64+32) / 10^10). + const uint32_t first_subsegment = static_cast( + dragonbox::umul128_upper64(first_segment, 7922816251426433760ULL) >> + 32); + const uint64_t second_third_subsegments = + first_segment - first_subsegment * 10000000000ULL; + + uint64_t prod; + uint32_t digits; + bool should_round_up; + int number_of_digits_to_print = precision > 9 ? 9 : precision; + + // Print a 9-digits subsegment, either the first or the second. + auto print_subsegment = [&](uint32_t subsegment, char* buffer) { + int number_of_digits_printed = 0; + + // If we want to print an odd number of digits from the subsegment, + if ((number_of_digits_to_print & 1) != 0) { + // Convert to 64-bit fixed-point fractional form with 1-digit + // integer part. The magic number 720575941 is a good enough + // approximation of 2^(32 + 24) / 10^8; see + // https://jk-jeon.github.io/posts/2022/12/fixed-precision-formatting/#fixed-length-case + // for details. + prod = ((subsegment * static_cast(720575941)) >> 24) + 1; + digits = static_cast(prod >> 32); + *buffer = static_cast('0' + digits); + number_of_digits_printed++; + } + // If we want to print an even number of digits from the + // first_subsegment, + else { + // Convert to 64-bit fixed-point fractional form with 2-digits + // integer part. The magic number 450359963 is a good enough + // approximation of 2^(32 + 20) / 10^7; see + // https://jk-jeon.github.io/posts/2022/12/fixed-precision-formatting/#fixed-length-case + // for details. + prod = ((subsegment * static_cast(450359963)) >> 20) + 1; + digits = static_cast(prod >> 32); + copy2(buffer, digits2(digits)); + number_of_digits_printed += 2; + } + + // Print all digit pairs. + while (number_of_digits_printed < number_of_digits_to_print) { + prod = static_cast(prod) * static_cast(100); + digits = static_cast(prod >> 32); + copy2(buffer + number_of_digits_printed, digits2(digits)); + number_of_digits_printed += 2; + } + }; + + // Print first subsegment. + print_subsegment(first_subsegment, buf.data()); + + // Perform rounding if the first subsegment is the last subsegment to + // print. + if (precision <= 9) { + // Rounding inside the subsegment. + // We round-up if: + // - either the fractional part is strictly larger than 1/2, or + // - the fractional part is exactly 1/2 and the last digit is odd. + // We rely on the following observations: + // - If fractional_part >= threshold, then the fractional part is + // strictly larger than 1/2. + // - If the MSB of fractional_part is set, then the fractional part + // must be at least 1/2. + // - When the MSB of fractional_part is set, either + // second_third_subsegments being nonzero or has_more_segments + // being true means there are further digits not printed, so the + // fractional part is strictly larger than 1/2. + if (precision < 9) { + uint32_t fractional_part = static_cast(prod); + should_round_up = + fractional_part >= fractional_part_rounding_thresholds( + 8 - number_of_digits_to_print) || + ((fractional_part >> 31) & + ((digits & 1) | (second_third_subsegments != 0) | + has_more_segments)) != 0; + } + // Rounding at the subsegment boundary. + // In this case, the fractional part is at least 1/2 if and only if + // second_third_subsegments >= 5000000000ULL, and is strictly larger + // than 1/2 if we further have either second_third_subsegments > + // 5000000000ULL or has_more_segments == true. + else { + should_round_up = second_third_subsegments > 5000000000ULL || + (second_third_subsegments == 5000000000ULL && + ((digits & 1) != 0 || has_more_segments)); + } + } + // Otherwise, print the second subsegment. + else { + // Compilers are not aware of how to leverage the maximum value of + // second_third_subsegments to find out a better magic number which + // allows us to eliminate an additional shift. 1844674407370955162 = + // ceil(2^64/10) < ceil(2^64*(10^9/(10^10 - 1))). + const uint32_t second_subsegment = + static_cast(dragonbox::umul128_upper64( + second_third_subsegments, 1844674407370955162ULL)); + const uint32_t third_subsegment = + static_cast(second_third_subsegments) - + second_subsegment * 10; + + number_of_digits_to_print = precision - 9; + print_subsegment(second_subsegment, buf.data() + 9); + + // Rounding inside the subsegment. + if (precision < 18) { + // The condition third_subsegment != 0 implies that the segment was + // of 19 digits, so in this case the third segment should be + // consisting of a genuine digit from the input. + uint32_t fractional_part = static_cast(prod); + should_round_up = + fractional_part >= fractional_part_rounding_thresholds( + 8 - number_of_digits_to_print) || + ((fractional_part >> 31) & + ((digits & 1) | (third_subsegment != 0) | + has_more_segments)) != 0; + } + // Rounding at the subsegment boundary. + else { + // In this case, the segment must be of 19 digits, thus + // the third subsegment should be consisting of a genuine digit from + // the input. + should_round_up = third_subsegment > 5 || + (third_subsegment == 5 && + ((digits & 1) != 0 || has_more_segments)); + } + } + + // Round-up if necessary. + if (should_round_up) { + ++buf[precision - 1]; + for (int i = precision - 1; i > 0 && buf[i] > '9'; --i) { + buf[i] = '0'; + ++buf[i - 1]; + } + if (buf[0] > '9') { + buf[0] = '1'; + if (fixed) + buf[precision++] = '0'; + else + ++exp; + } + } + buf.try_resize(to_unsigned(precision)); + } + } // if (digits_in_the_first_segment > precision) + else { + // Adjust the exponent for its use in Dragon4. + exp += digits_in_the_first_segment - 1; } } if (use_dragon) { @@ -3258,100 +3532,102 @@ FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs, } return exp; } + template -FMT_CONSTEXPR20 auto write_float(OutputIt out, T value, - basic_format_specs specs, locale_ref loc) - -> OutputIt { - float_specs fspecs = parse_float_type_spec(specs); - fspecs.sign = specs.sign; +FMT_CONSTEXPR20 auto write_float(OutputIt out, T value, format_specs specs, + locale_ref loc) -> OutputIt { + sign_t sign = specs.sign; if (detail::signbit(value)) { // value < 0 is false for NaN so use signbit. - fspecs.sign = sign::minus; + sign = sign::minus; value = -value; - } else if (fspecs.sign == sign::minus) { - fspecs.sign = sign::none; + } else if (sign == sign::minus) { + sign = sign::none; } if (!detail::isfinite(value)) - return write_nonfinite(out, detail::isnan(value), specs, fspecs); + return write_nonfinite(out, detail::isnan(value), specs, sign); - if (specs.align == align::numeric && fspecs.sign) { + if (specs.align == align::numeric && sign) { auto it = reserve(out, 1); - *it++ = detail::sign(fspecs.sign); + *it++ = detail::sign(sign); out = base_iterator(out, it); - fspecs.sign = sign::none; + sign = sign::none; if (specs.width != 0) --specs.width; } memory_buffer buffer; - if (fspecs.format == float_format::hex) { - if (fspecs.sign) buffer.push_back(detail::sign(fspecs.sign)); - snprintf_float(convert_float(value), specs.precision, fspecs, buffer); - return write_bytes(out, {buffer.data(), buffer.size()}, - specs); + if (specs.type == presentation_type::hexfloat) { + if (sign) buffer.push_back(detail::sign(sign)); + format_hexfloat(convert_float(value), specs, buffer); + return write_bytes(out, {buffer.data(), buffer.size()}, + specs); } + int precision = specs.precision >= 0 || specs.type == presentation_type::none ? specs.precision : 6; - if (fspecs.format == float_format::exp) { + if (specs.type == presentation_type::exp) { if (precision == max_value()) - throw_format_error("number is too big"); + report_error("number is too big"); else ++precision; - } else if (fspecs.format != float_format::fixed && precision == 0) { + } else if (specs.type != presentation_type::fixed && precision == 0) { precision = 1; } + float_specs fspecs = parse_float_type_spec(specs); + fspecs.sign = sign; if (const_check(std::is_same())) fspecs.binary32 = true; int exp = format_float(convert_float(value), precision, fspecs, buffer); fspecs.precision = precision; auto f = big_decimal_fp{buffer.data(), static_cast(buffer.size()), exp}; - return write_float(out, f, specs, fspecs, loc); + return write_float(out, f, specs, fspecs, loc); } template ::value)> -FMT_CONSTEXPR20 auto write(OutputIt out, T value, - basic_format_specs specs, locale_ref loc = {}) - -> OutputIt { +FMT_CONSTEXPR20 auto write(OutputIt out, T value, format_specs specs, + locale_ref loc = {}) -> OutputIt { if (const_check(!is_supported_floating_point(value))) return out; return specs.localized && write_loc(out, value, specs, loc) ? out - : write_float(out, value, specs, loc); + : write_float(out, value, specs, loc); } template ::value)> FMT_CONSTEXPR20 auto write(OutputIt out, T value) -> OutputIt { - if (is_constant_evaluated()) - return write(out, value, basic_format_specs()); + if (is_constant_evaluated()) return write(out, value, format_specs()); if (const_check(!is_supported_floating_point(value))) return out; - auto fspecs = float_specs(); + auto sign = sign_t::none; if (detail::signbit(value)) { - fspecs.sign = sign::minus; + sign = sign::minus; value = -value; } - constexpr auto specs = basic_format_specs(); + constexpr auto specs = format_specs(); using floaty = conditional_t::value, double, T>; - using uint = typename dragonbox::float_info::carrier_uint; - uint mask = exponent_mask(); - if ((bit_cast(value) & mask) == mask) - return write_nonfinite(out, std::isnan(value), specs, fspecs); + using floaty_uint = typename dragonbox::float_info::carrier_uint; + floaty_uint mask = exponent_mask(); + if ((bit_cast(value) & mask) == mask) + return write_nonfinite(out, std::isnan(value), specs, sign); + auto fspecs = float_specs(); + fspecs.sign = sign; auto dec = dragonbox::to_decimal(static_cast(value)); - return write_float(out, dec, specs, fspecs, {}); + return write_float(out, dec, specs, fspecs, {}); } template ::value && !is_fast_float::value)> inline auto write(OutputIt out, T value) -> OutputIt { - return write(out, value, basic_format_specs()); + return write(out, value, format_specs()); } template -auto write(OutputIt out, monostate, basic_format_specs = {}, - locale_ref = {}) -> OutputIt { +auto write(OutputIt out, monostate, format_specs = {}, locale_ref = {}) + -> OutputIt { FMT_ASSERT(false, ""); return out; } @@ -3359,13 +3635,11 @@ auto write(OutputIt out, monostate, basic_format_specs = {}, template FMT_CONSTEXPR auto write(OutputIt out, basic_string_view value) -> OutputIt { - auto it = reserve(out, value.size()); - it = copy_str_noinline(value.begin(), value.end(), it); - return base_iterator(out, it); + return copy_noinline(value.begin(), value.end(), out); } template ::value)> + FMT_ENABLE_IF(has_to_string_view::value)> constexpr auto write(OutputIt out, const T& value) -> OutputIt { return write(out, to_string_view(value)); } @@ -3384,13 +3658,12 @@ FMT_CONSTEXPR auto write(OutputIt out, T value) -> OutputIt { template ::value)> -FMT_CONSTEXPR auto write(OutputIt out, T value, - const basic_format_specs& specs = {}, +FMT_CONSTEXPR auto write(OutputIt out, T value, const format_specs& specs = {}, locale_ref = {}) -> OutputIt { return specs.type != presentation_type::none && specs.type != presentation_type::string - ? write(out, value ? 1 : 0, specs, {}) - : write_bytes(out, value ? "true" : "false", specs); + ? write(out, value ? 1 : 0, specs, {}) + : write_bytes(out, value ? "true" : "false", specs); } template @@ -3401,22 +3674,16 @@ FMT_CONSTEXPR auto write(OutputIt out, Char value) -> OutputIt { } template -FMT_CONSTEXPR_CHAR_TRAITS auto write(OutputIt out, const Char* value) - -> OutputIt { - if (!value) { - throw_format_error("string pointer is null"); - } else { - out = write(out, basic_string_view(value)); - } +FMT_CONSTEXPR20 auto write(OutputIt out, const Char* value) -> OutputIt { + if (value) return write(out, basic_string_view(value)); + report_error("string pointer is null"); return out; } template ::value)> -auto write(OutputIt out, const T* value, - const basic_format_specs& specs = {}, locale_ref = {}) - -> OutputIt { - check_pointer_type_spec(specs.type, error_handler()); +auto write(OutputIt out, const T* value, const format_specs& specs = {}, + locale_ref = {}) -> OutputIt { return write_ptr(out, bit_cast(value), &specs); } @@ -3424,7 +3691,7 @@ auto write(OutputIt out, const T* value, template > FMT_CONSTEXPR auto write(OutputIt out, const T& value) -> enable_if_t< - std::is_class::value && !is_string::value && + std::is_class::value && !has_to_string_view::value && !is_floating_point::value && !std::is_same::value && !std::is_same().map( value))>>::value, @@ -3435,21 +3702,22 @@ FMT_CONSTEXPR auto write(OutputIt out, const T& value) -> enable_if_t< template > FMT_CONSTEXPR auto write(OutputIt out, const T& value) - -> enable_if_t::value == type::custom_type, + -> enable_if_t::value == + type::custom_type && + !std::is_fundamental::value, OutputIt> { - using formatter_type = - conditional_t::value, - typename Context::template formatter_type, - fallback_formatter>; + auto formatter = typename Context::template formatter_type(); + auto parse_ctx = typename Context::parse_context_type({}); + formatter.parse(parse_ctx); auto ctx = Context(out, {}, {}); - return formatter_type().format(value, ctx); + return formatter.format(value, ctx); } // An argument visitor that formats the argument and writes it via the output // iterator. It's a class and not a generic lambda for compatibility with C++11. template struct default_arg_formatter { - using iterator = buffer_appender; - using context = buffer_context; + using iterator = basic_appender; + using context = buffered_context; iterator out; basic_format_args args; @@ -3467,16 +3735,16 @@ template struct default_arg_formatter { }; template struct arg_formatter { - using iterator = buffer_appender; - using context = buffer_context; + using iterator = basic_appender; + using context = buffered_context; iterator out; - const basic_format_specs& specs; + const format_specs& specs; locale_ref locale; template FMT_CONSTEXPR FMT_INLINE auto operator()(T value) -> iterator { - return detail::write(out, value, specs, locale); + return detail::write(out, value, specs, locale); } auto operator()(typename basic_format_arg::handle) -> iterator { // User-defined types are handled separately because they require access @@ -3485,116 +3753,49 @@ template struct arg_formatter { } }; -template struct custom_formatter { - basic_format_parse_context& parse_ctx; - buffer_context& ctx; - - void operator()( - typename basic_format_arg>::handle h) const { - h.format(parse_ctx, ctx); - } - template void operator()(T) const {} -}; - -template class width_checker { - public: - explicit FMT_CONSTEXPR width_checker(ErrorHandler& eh) : handler_(eh) {} - +struct width_checker { template ::value)> FMT_CONSTEXPR auto operator()(T value) -> unsigned long long { - if (is_negative(value)) handler_.on_error("negative width"); + if (is_negative(value)) report_error("negative width"); return static_cast(value); } template ::value)> FMT_CONSTEXPR auto operator()(T) -> unsigned long long { - handler_.on_error("width is not integer"); + report_error("width is not integer"); return 0; } - - private: - ErrorHandler& handler_; }; -template class precision_checker { - public: - explicit FMT_CONSTEXPR precision_checker(ErrorHandler& eh) : handler_(eh) {} - +struct precision_checker { template ::value)> FMT_CONSTEXPR auto operator()(T value) -> unsigned long long { - if (is_negative(value)) handler_.on_error("negative precision"); + if (is_negative(value)) report_error("negative precision"); return static_cast(value); } template ::value)> FMT_CONSTEXPR auto operator()(T) -> unsigned long long { - handler_.on_error("precision is not integer"); + report_error("precision is not integer"); return 0; } - - private: - ErrorHandler& handler_; }; -template