Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add restart modules #170

Merged
merged 9 commits into from
Aug 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions commands/station.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import * as bacalhau from '../lib/bacalhau.js'
import fs from 'node:fs/promises'
import { metrics } from '../lib/metrics.js'
import { paths } from '../lib/paths.js'
import pRetry from 'p-retry'

const { FIL_WALLET_ADDRESS } = process.env

Expand Down Expand Up @@ -50,7 +51,7 @@ export const station = async ({ json, experimental }) => {
})

const modules = [
zinniaRuntime.start({
pRetry(() => zinniaRuntime.run({
FIL_WALLET_ADDRESS,
STATE_ROOT: join(paths.moduleState, 'zinnia'),
CACHE_ROOT: join(paths.moduleCache, 'zinnia'),
Expand All @@ -64,18 +65,18 @@ export const station = async ({ json, experimental }) => {
})
},
onMetrics: m => metrics.submit('zinnia', m)
})
}), { retries: 1000 })
]

if (experimental) {
modules.push(bacalhau.start({
modules.push(pRetry(() => bacalhau.run({
FIL_WALLET_ADDRESS,
storagePath: join(paths.moduleCache, 'bacalhau'),
onActivity: activity => {
activities.submit({ source: 'Bacalhau', ...activity })
},
onMetrics: m => metrics.submit('bacalhau', m)
}))
}), { retries: 1000 }))
}

await Promise.all(modules)
Expand Down
122 changes: 68 additions & 54 deletions lib/bacalhau.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { fetch } from 'undici'
import Sentry from '@sentry/node'
import { installBinaryModule, getBinaryModuleExecutable } from './modules.js'
import os from 'node:os'
import { once } from 'node:events'

const DIST_TAG = 'v1.0.3'
const { TARGET_ARCH = os.arch() } = process.env
Expand All @@ -25,7 +26,42 @@ export async function install () {
})
}

export async function start ({
const getApiUrl = childProcess => new Promise((resolve, reject) => {
let output = ''

const readyHandler = data => {
output += data.toString()

const apiMatch = output.match(/^API: (http.*)$/m)
if (apiMatch) {
childProcess.stdout.off('data', readyHandler)
const apiUrl = apiMatch[1]
resolve(apiUrl)
}
}
childProcess.stdout.on('data', readyHandler)
childProcess.catch(reject)
})

const runMetricsLoop = async ({ childProcess, apiUrl, onMetrics }) => {
while (true) {
if (
childProcess.exitCode !== null ||
childProcess.signalCode !== null
) {
break
}
try {
await updateStats({ apiUrl, onMetrics })
} catch (err) {
const errString = err.stack || err.message || err
console.error(`Cannot fetch Bacalhau module stats. ${errString}`)
}
await timers.setTimeout(1000)
}
}

export async function run ({
FIL_WALLET_ADDRESS,
storagePath,
onActivity,
Expand Down Expand Up @@ -57,68 +93,46 @@ export async function start ({
}
)

const readyPromise = new Promise((resolve, reject) => {
childProcess.stdout.setEncoding('utf-8')
childProcess.stdout.on('data', data => {
if (data.includes('/compute/debug') && data.includes(200)) {
// Ignore noisy lines
return
}
handleActivityLogs({ onActivity, text: data })
})
childProcess.stderr.pipe(process.stderr, { end: false })

let output = ''

const readyHandler = data => {
output += data.toString()

const apiMatch = output.match(/^API: (http.*)$/m)
if (apiMatch) {
const apiUrl = apiMatch[1]

childProcess.stdout.off('data', readyHandler)
onActivity({ type: 'info', message: 'Bacalhau module started.' })
setInterval(() => {
updateStats({ apiUrl, onMetrics })
.catch(err => {
console.error(
`Cannot fetch Bacalhau module stats. ${err.stack || err.message || err}`
)
})
}, 1000).unref()
resolve()
}
childProcess.stdout.setEncoding('utf-8')
childProcess.stdout.on('data', data => {
if (data.includes('/compute/debug') && data.includes(200)) {
// Ignore noisy lines
return
}
childProcess.stdout.on('data', readyHandler)
childProcess.catch(reject)
})

childProcess.on('close', code => {
console.error(
`Bacalhau closed all stdio with code ${code ?? '<no code>'}`
)
childProcess.stderr.removeAllListeners()
childProcess.stdout.removeAllListeners()
Sentry.captureException('Bacalhau exited')
handleActivityLogs({ onActivity, text: data })
})
childProcess.stderr.pipe(process.stderr, { end: false })

childProcess.on('exit', (code, signal) => {
const reason = signal ? `via signal ${signal}` : `with code: ${code}`
const msg = `Bacalhau exited ${reason}`
onActivity({ type: 'info', message: msg })
})

try {
await Promise.race([
readyPromise,
timers.setTimeout(500)
])
} catch (err) {
const errorMsg = err instanceof Error ? err.message : '' + err
const message = `Cannot start Bacalhau: ${errorMsg}`
onActivity({ type: 'error', message })
}
await Promise.all([
(async () => {
let apiUrl
try {
apiUrl = await getApiUrl(childProcess)
} catch (err) {
const errorMsg = err instanceof Error ? err.message : '' + err
const message = `Cannot start Bacalhau: ${errorMsg}`
onActivity({ type: 'error', message })
throw err
}

onActivity({ type: 'info', message: 'Bacalhau module started.' })
await runMetricsLoop({ childProcess, apiUrl, onMetrics })
})(),
(async () => {
const [code] = await once(childProcess, 'close')
console.error(`Bacalhau closed all stdio with code ${code ?? '<no code>'}`)
childProcess.stderr.removeAllListeners()
childProcess.stdout.removeAllListeners()
Sentry.captureException('Bacalhau exited')
throw new Error('Bacalhau exited')
})()
])
}

function handleActivityLogs ({ onActivity, text }) {
Expand Down
58 changes: 26 additions & 32 deletions lib/zinnia.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import timers from 'node:timers/promises'
import { execa } from 'execa'
import Sentry from '@sentry/node'
import { installBinaryModule, downloadSourceFiles, getBinaryModuleExecutable } from './modules.js'
import { moduleBinaries } from './paths.js'
import os from 'node:os'
import { once } from 'node:events'

const ZINNIA_DIST_TAG = 'v0.13.0'
const ZINNIA_MODULES = [
Expand Down Expand Up @@ -36,7 +36,7 @@ export async function install () {
])
}

export async function start ({
export async function run ({
FIL_WALLET_ADDRESS,
STATE_ROOT,
CACHE_ROOT,
Expand All @@ -57,44 +57,38 @@ export async function start ({
}
})

const readyPromise = new Promise((resolve, reject) => {
childProcess.stdout.setEncoding('utf-8')
childProcess.stdout.on('data', data => {
handleEvents({ onActivity, onMetrics, text: data })
})
childProcess.stderr.pipe(process.stderr, { end: false })

childProcess.stdout.once('data', _data => {
// This is based on an implicit assumption that zinniad reports an info activity
// after it starts
resolve()
})
childProcess.catch(reject)
})

childProcess.on('close', code => {
console.error(`Zinnia closed all stdio with code ${code ?? '<no code>'}`)
childProcess.stderr.removeAllListeners()
childProcess.stdout.removeAllListeners()
Sentry.captureException('Zinnia exited')
childProcess.stdout.setEncoding('utf-8')
childProcess.stdout.on('data', data => {
handleEvents({ onActivity, onMetrics, text: data })
})
childProcess.stderr.pipe(process.stderr, { end: false })

childProcess.on('exit', (code, signal) => {
const reason = signal ? `via signal ${signal}` : `with code: ${code}`
const msg = `Zinnia exited ${reason}`
onActivity({ type: 'info', message: msg })
})

try {
await Promise.race([
readyPromise,
timers.setTimeout(500)
])
} catch (err) {
const errorMsg = err instanceof Error ? err.message : '' + err
const message = `Cannot start Zinnia: ${errorMsg}`
onActivity({ type: 'error', message })
}
await Promise.all([
(async () => {
try {
await childProcess
} catch (err) {
const errorMsg = err instanceof Error ? err.message : '' + err
const message = `Cannot start Zinnia: ${errorMsg}`
onActivity({ type: 'error', message })
throw err
}
})(),
(async () => {
const [code] = await once(childProcess, 'close')
console.error(`Zinnia closed all stdio with code ${code ?? '<no code>'}`)
childProcess.stderr.removeAllListeners()
childProcess.stdout.removeAllListeners()
Sentry.captureException('Zinnia exited')
throw new Error('Zinnia exited')
})()
])
}

function handleEvents ({ onActivity, onMetrics, text }) {
Expand Down
48 changes: 48 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
"@sentry/node": "^7.41.0",
"execa": "^8.0.1",
"gunzip-maybe": "^1.4.2",
"p-retry": "^5.1.2",
"tar-fs": "^3.0.3",
"undici": "^5.20.0",
"unzip-stream": "^0.3.1",
Expand Down