@@ -169,6 +169,7 @@ enum StorageMode {
169
169
defaultMajor = columnMajor
170
170
};
171
171
172
+
172
173
/* *
173
174
* This library uses tensors to store and manipulate data on a GPU device.
174
175
* A tensor has three axes: [rows (m) x columns (n) x matrices (k)].
@@ -256,13 +257,16 @@ public:
256
257
*
257
258
* This static function reads data from a text file, creates a DTensor and uploads the data to the device.
258
259
*
260
+ * The data may be stored in a text file or a binary file. Binary files must have the extension .bt.
261
+ *
259
262
* @param path_to_file path to file as string
260
263
* @param mode storage mode (default: StorageMode::defaultMajor)
261
264
* @return instance of DTensor
262
265
*
263
266
* @throws std::invalid_argument if the file is not found
264
267
*/
265
- static DTensor<T> parseFromTextFile (std::string path_to_file, StorageMode mode = StorageMode::defaultMajor);
268
+ static DTensor<T> parseFromFile (std::string path_to_file,
269
+ StorageMode mode = StorageMode::defaultMajor);
266
270
267
271
/* *
268
272
* Constructs a DTensor object.
@@ -504,7 +508,12 @@ public:
504
508
/* *
505
509
* Saves the current instance of DTensor to a (text) file
506
510
*
507
- * @param pathToFile
511
+ * If the file extension is .bt, the data will be stored in a binary file.
512
+ * Writing to and reading from a binary file is significantly faster and
513
+ * the generated binary files tend to have a smaller size (about 40% of the
514
+ * size of text files for data of type double and float).
515
+ *
516
+ * @param pathToFile path to file
508
517
*/
509
518
void saveToFile (std::string pathToFile);
510
519
@@ -595,7 +604,7 @@ struct data_t {
595
604
};
596
605
597
606
template <typename T>
598
- data_t <T> vectorFromFile (std::string path_to_file) {
607
+ data_t <T> vectorFromTextFile (std::string path_to_file) {
599
608
data_t <T> dataStruct;
600
609
std::ifstream file;
601
610
file.open (path_to_file, std::ios::in);
@@ -641,24 +650,70 @@ data_t<T> vectorFromFile(std::string path_to_file) {
641
650
}
642
651
643
652
template <typename T>
644
- DTensor<T> DTensor<T>::parseFromTextFile(std::string path_to_file,
645
- StorageMode mode) {
646
- auto parsedData = vectorFromFile<T>(path_to_file);
653
+ data_t <T> vectorFromBinaryFile (std::string path_to_file) {
654
+ data_t <T> dataStruct;
655
+ /* Read from binary file */
656
+ std::ifstream inFile;
657
+ inFile.open (path_to_file, std::ios::binary);
658
+ inFile.read (reinterpret_cast <char *>(&(dataStruct.numRows )), sizeof (uint64_t ));
659
+ inFile.read (reinterpret_cast <char *>(&(dataStruct.numCols )), sizeof (uint64_t ));
660
+ inFile.read (reinterpret_cast <char *>(&(dataStruct.numMats )), sizeof (uint64_t ));
661
+ uint64_t numElements = dataStruct.numRows * dataStruct.numCols * dataStruct.numMats ;
662
+ std::vector<T> vecDataFromFile (numElements);
663
+ for (size_t i = 0 ; i < numElements; i++) {
664
+ T el;
665
+ inFile.read (reinterpret_cast <char *>(&el), sizeof (T));
666
+ vecDataFromFile[i] = el;
667
+ }
668
+ inFile.close ();
669
+ dataStruct.data = vecDataFromFile;
670
+ return dataStruct;
671
+ }
672
+
673
+ template <typename T>
674
+ DTensor<T> DTensor<T>::parseFromFile(std::string path_to_file,
675
+ StorageMode mode) {
676
+ // Figure out file extension
677
+ size_t pathToFileLength = path_to_file.length () ;
678
+ std::string fileNameExtension = path_to_file.substr (pathToFileLength-3 );
679
+ typedef data_t <T> (*PARSER)(std::string);
680
+ PARSER parser = (fileNameExtension == " .bt" ) ? vectorFromBinaryFile<T> : vectorFromTextFile<T>;
681
+ auto parsedData = parser (path_to_file);
647
682
DTensor<T> tensorFromData (parsedData.data , parsedData.numRows , parsedData.numCols , parsedData.numMats , mode);
648
683
return tensorFromData;
649
684
}
650
685
651
686
template <typename T>
652
687
void DTensor<T>::saveToFile(std::string pathToFile) {
653
- std::ofstream file (pathToFile);
654
- file << numRows () << std::endl << numCols () << std::endl << numMats () << std::endl;
655
- std::vector<T> myData (numEl ()); download (myData);
656
- if constexpr (std::is_floating_point<T>::value) {
657
- file << std::setprecision (std::numeric_limits<T>::max_digits10);
658
- }
659
- for (const T& el : myData) file << el << std::endl;
688
+ std::vector<T> myData (numEl ());
689
+ download (myData);
690
+
691
+ // Figure out file extension
692
+ size_t pathToFileLength = pathToFile.length () ;
693
+ std::string fileNameExtension = pathToFile.substr (pathToFileLength-3 );
694
+ // If the extension is .bt...
695
+ if (fileNameExtension == " .bt" ) {
696
+ uint64_t nr = (uint64_t ) numRows (),
697
+ nc = (uint64_t ) numCols (),
698
+ nm = (uint64_t ) numMats ();
699
+ std::ofstream outFile;
700
+ outFile.open (pathToFile, std::ios::binary);
701
+ outFile.write (reinterpret_cast <const char *>(&nr), sizeof (uint64_t ));
702
+ outFile.write (reinterpret_cast <const char *>(&nc), sizeof (uint64_t ));
703
+ outFile.write (reinterpret_cast <const char *>(&nm), sizeof (uint64_t ));
704
+ for (const T &el: myData) outFile.write (reinterpret_cast <const char *>(&el), sizeof (T));
705
+ outFile.close ();
706
+ } else {
707
+ std::ofstream file (pathToFile);
708
+ file << numRows () << std::endl << numCols () << std::endl << numMats () << std::endl;
709
+ if constexpr (std::is_floating_point<T>::value) {
710
+ file << std::setprecision (std::numeric_limits<T>::max_digits10);
711
+ }
712
+ for (const T &el: myData) file << el << std::endl;
713
+ }
660
714
}
661
715
716
+
662
717
template <typename T>
663
718
void DTensor<T>::reshape(size_t newNumRows, size_t newNumCols, size_t newNumMats) {
664
719
if (m_numRows == newNumRows && m_numCols == newNumCols && m_numMats == newNumMats) return ;
0 commit comments