diff --git a/nbval/plugin.py b/nbval/plugin.py index 4200ec8..104e0ca 100644 --- a/nbval/plugin.py +++ b/nbval/plugin.py @@ -428,12 +428,22 @@ def compare_outputs(self, test, ref, skip_compare=None): test_keys = set(testing_outs.keys()) if ref_keys - test_keys: - self.comparison_traceback.append( - cc.FAIL - + "Missing output fields from running code: %s" - % (ref_keys - test_keys) - + cc.ENDC - ) + if ref_keys == {'evalue', 'ename'}: + self.comparison_traceback.append( + cc.FAIL + + "Expected error:\n %s: %r" % ( + '\n'.join(reference_outs['ename']), + '\n'.join(reference_outs['evalue']) + ) + + cc.ENDC + ) + else: + self.comparison_traceback.append( + cc.FAIL + + "Missing output fields from running code: %s" + % (ref_keys - test_keys) + + cc.ENDC + ) return False elif test_keys - ref_keys: self.comparison_traceback.append( @@ -570,7 +580,7 @@ def runtest(self): # Poll the shell channel to get a message try: - self.parent.kernel.await_reply(msg_id, timeout=timeout) + kernel.await_reply(msg_id, timeout=timeout) except Empty: # Timeout reached # Try to interrupt kernel, as this will give us traceback: kernel.interrupt() @@ -582,6 +592,13 @@ def runtest(self): # TODO: Only store if comparing with nbdime, to save on memory usage self.test_outputs = outs + # Cells where the reference is not run, will not check outputs: + unrun = self.cell.execution_count is None + if unrun and self.cell.outputs: + self.raise_cell_error('Unrun reference cell has outputs') + + cell_has_error = False + # Now get the outputs from the iopub channel while True: # The iopub channel broadcasts a range of messages. We keep reading @@ -692,6 +709,7 @@ def runtest(self): # cell execution. Therefore raise a cell error and pass the # traceback information. elif msg_type == 'error': + cell_has_error = True # Store error in output first out['ename'] = reply['ename'] out['evalue'] = reply['evalue'] @@ -700,9 +718,9 @@ def runtest(self): if not self.options['check_exception']: # Ensure we flush iopub before raising error try: - self.parent.kernel.await_idle(msg_id, self.output_timeout) + kernel.await_idle(msg_id, self.output_timeout) except Empty: - self.stop() + kernel.stop() raise RuntimeError('Timed out waiting for idle kernel!') traceback = '\n' + '\n'.join(reply['traceback']) if out['ename'] == 'KeyboardInterrupt' and self.parent.timed_out: @@ -718,10 +736,11 @@ def runtest(self): outs[:] = coalesce_streams(outs) - # Cells where the reference is not run, will not check outputs: - unrun = self.cell.execution_count is None - if unrun and self.cell.outputs: - self.raise_cell_error('Unrun reference cell has outputs') + if self.options['check_exception'] and unrun and not cell_has_error: + # If unrun, we cannot rely on output comparison for checking errors + self.raise_cell_error( + "Expected error", + "Expected cell to produce an error, but none was produced!") # Compare if the outputs have the same number of lines # and throw an error if it fails diff --git a/tests/test_expected_exceptions.py b/tests/test_expected_exceptions.py new file mode 100644 index 0000000..d949d3b --- /dev/null +++ b/tests/test_expected_exceptions.py @@ -0,0 +1,89 @@ +import os + +import nbformat +import pytest + +from utils import build_nb + + +pytest_plugins = "pytester" + + +def test_run_raises(testdir): + # This test uses the testdir fixture from pytester, which is useful for + # testing pytest plugins. It writes a notebook to a temporary dir + # and then runs pytest. + + # Setup notebook to test: + sources = [ + # In [1]: + "", # No error produced, when one is expected + # In [2]: + "raise ValueError('foo')", # Wrong ename + # In [3]: + "raise ValueError('foo')", # Wrong evalue + ] + # Build unrun notebook: + nb = build_nb(sources, mark_run=True) + + nb.cells[0].metadata.tags = ['raises-exception'] + nb.cells[0].outputs.append( + nbformat.v4.new_output( + 'error', + ename='ValueError', + evalue='foo', + traceback=['foobar', 'bob'], # Should be ignored + ) + ) + + nb.cells[1].metadata.tags = ['raises-exception'] + nb.cells[1].outputs.append( + nbformat.v4.new_output( + 'error', + ename='TypeError', # Expected TypeError, got ValueError + evalue='foo', + traceback=['foobar', 'bob'], # Should be ignored + ) + ) + + nb.cells[2].metadata.tags = ['raises-exception'] + nb.cells[2].outputs.append( + nbformat.v4.new_output( + 'error', + ename='ValueError', + evalue='bar', # Expected bar, got foo + traceback=['foobar', 'bob'], # Should be ignored + ) + ) + + # Write notebook to test dir + nbformat.write(nb, os.path.join( + str(testdir.tmpdir), 'test_expcted_exceptions.ipynb')) + + # Run tests + result = testdir.runpytest_subprocess('--nbval', '--current-env', '-s') + result.assert_outcomes(failed=3) + + + +def test_unrun_raises(testdir): + # This test uses the testdir fixture from pytester, which is useful for + # testing pytest plugins. It writes a notebook to a temporary dir + # and then runs pytest. + + # Setup notebook to test: + sources = [ + # In [1]: + "pass", + ] + # Build unrun notebook: + nb = build_nb(sources, mark_run=False) + nb.cells[0].metadata.tags = ['raises-exception'] + + # Write notebook to test dir + nbformat.write(nb, os.path.join( + str(testdir.tmpdir), 'test_expcted_exceptions.ipynb')) + + # Run tests + result = testdir.runpytest_subprocess('--nbval', '--current-env', '-s') + result.assert_outcomes(failed=1)