Fixed an overflow problem in workload definition, and checks if kernel is fast enough

tmrlvi · tmrlvi · commit 30ee8f062eb6 · 2022-04-21T02:10:14.000+03:00
diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml
@@ -88,7 +88,6 @@ jobs:
           asset_name="kaspa-miner-${{ github.event.release.tag_name }}-cpu-only-osx-amd64"
           mkdir ${asset_name}
           mv ./target/x86_64-apple-darwin/release/kaspa-miner ${asset_name}/${asset_name}
-          mv ./target/x86_64-apple-darwin/release/libkaspa*.so ${asset_name}/
           tar czvf ${asset_name}.tgz ${asset_name}
           echo "archive=${asset_name}.tgz" >> $GITHUB_ENV
           echo "asset_name=${asset_name}.tgz" >> $GITHUB_ENV
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -49,7 +49,7 @@ num = "0.4"
 nix = "0.23"
 hex = "0.4"
 semver = "1.0"
-chrono = "0.4"
+time = { version = "0.3", features = ["formatting", "macros"] }
 
 [features]
 default = ["parking_lot"]
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 [![Build status](https://github.com/tmrlvi/kaspa-miner/workflows/ci/badge.svg)](https://github.com/tmrlvi/kaspa-miner/actions)
 [![Latest version](https://img.shields.io/crates/v/kaspa-miner.svg)](https://crates.io/crates/kaspa-miner)
 ![License](https://img.shields.io/crates/l/kaspa-miner.svg)
-[![dependency status](https://deps.rs/repo/github/tmrlvi/kaspa-miner/status.svg)](https://deps.rs/repo/github/elichai/kaspa-miner)
+[![dependency status](https://deps.rs/repo/github/tmrlvi/kaspa-miner/status.svg)](https://deps.rs/repo/github/tmrlvi/kaspa-miner)
 
 [![Discord](https://discordapp.com/api/guilds/599153230659846165/embed.png)](https://discord.gg/kS3SK5F36R)
 [![Telegram](https://img.shields.io/badge/Telegram-2CA5E0?style=for-the-badge&logo=telegram&logoColor=white)](https://t.me/Kaspaenglish)
diff --git a/integrations/windows/create_bat.sh b/integrations/windows/create_bat.sh
@@ -1,3 +1,7 @@
-echo :start > ${1}/mine.bat
+echo REM When mining to a local node, you can drop the -s option. > ${1}/mine.bat
+echo echo =========================================================== >> ${1}/mine.bat
+echo echo = Running Kaspa Miner With Defualt Bat. Edit to configure = >> ${1}/mine.bat
+echo echo =========================================================== >> ${1}/mine.bat
+echo :start >> ${1}/mine.bat
 echo ${1}.exe -a kaspa:qz4jdyu04hv4hpyy00pl6trzw4gllnhnwy62xattejv2vaj5r0p5quvns058f -s n.seeder1.kaspad.net >> ${1}/mine.bat
 echo goto start >> ${1}/mine.bat
diff --git a/plugins/cuda/src/worker.rs b/plugins/cuda/src/worker.rs
@@ -11,6 +11,8 @@ use rand::{Fill, RngCore};
 use std::ffi::CString;
 use std::sync::{Arc, Weak};
 
+static BPS: f32 = 1.;
+
 static PTX_86: &str = include_str!("../resources/kaspa-cuda-sm86.ptx");
 static PTX_75: &str = include_str!("../resources/kaspa-cuda-sm75.ptx");
 static PTX_61: &str = include_str!("../resources/kaspa-cuda-sm61.ptx");
@@ -53,6 +55,8 @@ pub struct CudaGPUWorker<'gpu> {
     // NOTE: The order is important! context must be closed last
     heavy_hash_kernel: Kernel<'gpu>,
     stream: Stream,
+    start_event: Event,
+    stop_event: Event,
     _module: Arc<Module>,
 
     rand_state: DeviceBuffer<u64>,
@@ -95,6 +99,7 @@ impl<'gpu> Worker for CudaGPUWorker<'gpu> {
             NonceGenEnum::Xoshiro => 1,
         };
 
+        self.start_event.record(stream).unwrap();
         unsafe {
             launch!(
                 func<<<
@@ -110,11 +115,16 @@ impl<'gpu> Worker for CudaGPUWorker<'gpu> {
             )
             .unwrap(); // We see errors in sync
         }
+        self.stop_event.record(stream).unwrap();
     }
 
     #[inline(always)]
     fn sync(&self) -> Result<(), Error> {
-        self.stream.synchronize()?;
+        //self.stream.synchronize()?;
+        self.stop_event.synchronize()?;
+        if self.stop_event.elapsed_time_f32(&self.start_event)? > 1000. / BPS {
+            return Err("Cuda takes longer then block rate. Please reduce your workload.".into());
+        }
         Ok(())
     }
 
@@ -183,32 +193,32 @@ impl<'gpu> CudaGPUWorker<'gpu> {
 
         let mut heavy_hash_kernel = Kernel::new(Arc::downgrade(&_module), "heavy_hash")?;
 
-        let mut chosen_workload = 0usize;
+        let mut chosen_workload = 0u32;
         if is_absolute {
             chosen_workload = 1;
         } else {
             let cur_workload = heavy_hash_kernel.get_workload();
-            if chosen_workload == 0 || chosen_workload < cur_workload as usize {
-                chosen_workload = cur_workload as usize;
+            if chosen_workload == 0 || chosen_workload < cur_workload {
+                chosen_workload = cur_workload;
             }
         }
-        chosen_workload = (chosen_workload as f32 * workload) as usize;
+        chosen_workload = (chosen_workload as f32 * workload) as u32;
         info!("GPU #{} Chosen workload: {}", device_id, chosen_workload);
-        heavy_hash_kernel.set_workload(chosen_workload as u32);
+        heavy_hash_kernel.set_workload(chosen_workload);
 
         let final_nonce_buff = vec![0u64; 1].as_slice().as_dbuf()?;
 
         let rand_state: DeviceBuffer<u64> = match random {
             NonceGenEnum::Xoshiro => {
                 info!("Using xoshiro for nonce-generation");
-                let mut buffer = DeviceBuffer::<u64>::zeroed(4 * chosen_workload).unwrap();
+                let mut buffer = DeviceBuffer::<u64>::zeroed(4 * (chosen_workload as usize)).unwrap();
                 info!("GPU #{} is generating initial seed. This may take some time.", device_id);
                 let mut seed = [1u64; 4];
                 seed.try_fill(&mut rand::thread_rng())?;
                 buffer.copy_from(
                     Xoshiro256StarStar::new(&seed)
                         .iter_jump_state()
-                        .take(chosen_workload)
+                        .take(chosen_workload as usize)
                         .flatten()
                         .collect::<Vec<u64>>()
                         .as_slice(),
@@ -228,7 +238,9 @@ impl<'gpu> CudaGPUWorker<'gpu> {
             device_id,
             _context,
             _module,
-            workload: chosen_workload,
+            start_event: Event::new(EventFlags::DEFAULT)?,
+            stop_event: Event::new(EventFlags::DEFAULT)?,
+            workload: chosen_workload as usize,
             stream,
             rand_state,
             final_nonce_buff,
diff --git a/src/main.rs b/src/main.rs
@@ -135,6 +135,9 @@ async fn main() -> Result<(), Error> {
     let mut opt: Opt = Opt::from_arg_matches(&matches)?;
     opt.process()?;
     env_logger::builder().filter_level(opt.log_level()).parse_default_env().init();
+    info!("==============================");
+    info!("       Kaspa-Miner GPU {}", env!("CARGO_PKG_VERSION"));
+    info!("==============================");
     info!("Found plugins: {:?}", plugins);
     info!("Plugins found {} workers", worker_count);
     if worker_count == 0 && opt.num_threads.unwrap_or(0) == 0 {
diff --git a/src/miner.rs b/src/miner.rs
@@ -258,7 +258,10 @@ impl MinerManager {
                         None => continue,
                     };
                     state_ref.pow_gpu(gpu_work);
-                    gpu_work.sync().unwrap();
+                    if let Err(e) = gpu_work.sync() {
+                        warn!("CUDA run ignored: {}", e);
+                        continue
+                    }
 
                     gpu_work.copy_output_to(&mut nonces)?;
                     if nonces[0] != 0 {
@@ -418,19 +421,29 @@ impl MinerManager {
                 "Current hashrate is".into(),
                 "Workers stalled or crashed. Considered reducing workload and check that your node is synced",
                 duration,
+                false,
             );
             for (device, rate) in &*hashes_by_worker.lock().unwrap() {
-                Self::log_single_hashrate(rate, format!("Device {}:", device), "0 hash/s", duration);
+                Self::log_single_hashrate(rate, format!("Device {}:", device), "0 hash/s", duration, true);
             }
             last_instant = now;
         }
     }
 
-    fn log_single_hashrate(counter: &Arc<AtomicU64>, prefix: String, warn_message: &str, duration: f64) {
+    fn log_single_hashrate(
+        counter: &Arc<AtomicU64>,
+        prefix: String,
+        warn_message: &str,
+        duration: f64,
+        keep_prefix: bool,
+    ) {
         let hashes = counter.swap(0, Ordering::AcqRel);
         let rate = (hashes as f64) / duration;
         if hashes == 0 {
-            warn!("{}{}", prefix, warn_message)
+            match keep_prefix {
+                true => warn!("{}{}", prefix, warn_message),
+                false => warn!("{}", warn_message),
+            };
         } else if hashes != 0 {
             let (rate, suffix) = Self::hash_suffix(rate);
             info!("{} {:.2} {}", prefix, rate, suffix);
diff --git a/src/pow.rs b/src/pow.rs
@@ -1,7 +1,7 @@
-use chrono::{DateTime, Utc};
 use log::info;
 use std::sync::Arc;
 use std::time::{Duration, UNIX_EPOCH};
+use time::{macros::format_description, OffsetDateTime};
 
 pub use crate::pow::hasher::HeaderHasher;
 use crate::{
@@ -41,13 +41,14 @@ impl BlockSeed {
             BlockSeed::FullBlock(block) => {
                 let block_hash =
                     block.block_hash().expect("We just got it from the state, we should be able to hash it");
-                let block_time = DateTime::<Utc>::from(
+                let format = format_description!("[year]-[month]-[day] [hour]:[minute]:[second]");
+                let block_time = OffsetDateTime::from(
                     UNIX_EPOCH + Duration::from_millis(block.header.as_ref().unwrap().timestamp as u64),
                 );
                 info!(
                     "Found a block: {:x} (Timestamp: {})",
                     block_hash,
-                    block_time.format("%Y-%m-%d %H:%M:%S").to_string()
+                    block_time.format(format).unwrap_or_else(|_| "unknown".to_string())
                 );
             }
             BlockSeed::PartialBlock { .. } => info!("Found a share!"),