Skip to content

Commit

Permalink
Implement the DSP primitive. (#239)
Browse files Browse the repository at this point in the history
* Implement the DSP primitive.

For chips that have these capabilities, a DSP implementation has been
added in the form of all the primitives described in the Gowin
documentation (UG287-1.3.3E_Gowin Digital Signal Processing (DSP) User
Guide), namely:

  - PADD9
  - PADD18
  - MULT9X9
  - MULT18X18
  - MULT36X36
  - MULTALU18X18
  - MULTALU36X18
  - MULTADDALU18X18
  - ALU54D

The most complex but also the most useful is the MULTADDALU18X18
primitive - it allows you to easily make a typical FIR filter, while all
connections between these primitives in the chain will be implemented by
direct fixed wires with minimal delay.

MULT36X36 are not combined into chains, but they have a different task -
this primitive can be found in Linux SOCs.

Added examples (in the examples/himbaechel directory) that are based on
the tiny Riscv demonstrating UART calculations. Only the TXD pin is used
(can be found in the specific .CST file for each board), so on the large
computer side, only GND and RXD are enough. Port speed 115200, no
parity, 8 data bits, 1 stop bit, linefeed only.

Picocom launch example:

``` shell
picocom -l --imap lfcrlf -b 115200 /dev/ttyU0
```

The source code for the riscv test programs is provided along with the
assembly instructions, but they are not built during the compilation of
the examples due to additional compilers.

Implemented the combination of primitives into chains using wires
CASO-CASI, SO(A, B)-SI(A, B), as well as SBO-SBI for PADD.

Signed-off-by: YRabbit <[email protected]>

* I forgot to remove the debugging part. Fixed.

Signed-off-by: YRabbit <[email protected]>

---------

Signed-off-by: YRabbit <[email protected]>
  • Loading branch information
yrabbit authored Mar 31, 2024
1 parent 1ff94b7 commit 91807b0
Show file tree
Hide file tree
Showing 37 changed files with 8,885 additions and 72 deletions.
479 changes: 479 additions & 0 deletions apycula/attrids.py

Large diffs are not rendered by default.

882 changes: 833 additions & 49 deletions apycula/chipdb.py

Large diffs are not rendered by default.

1,360 changes: 1,357 additions & 3 deletions apycula/gowin_pack.py

Large diffs are not rendered by default.

34 changes: 32 additions & 2 deletions apycula/gowin_unpack.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@
'GW2A-18C' : 'PBGA256S'
}

def print_sorted_dict(start, d):
print(start, end='{')
for i in sorted(d):
print(f'{i}:{d[i]}, ', end='')
print('}')

# bank iostandards
# XXX default io standard may be board-dependent!
_banks = {'0': "LVCMOS18", '1': "LVCMOS18", '2': "LVCMOS18", '3': "LVCMOS18"}
Expand Down Expand Up @@ -59,6 +65,7 @@ def get_attr_name(attrname_table, code):
for name, cod in attrname_table.items():
if cod == code:
return name
print(f'Unknown attr name for {code}/0x{code:x}.')
return ''

# fix names and types of the PLL attributes
Expand Down Expand Up @@ -293,7 +300,7 @@ def get_pll_A(db, row, col, typ):
'IDDRX8': 'IDES16',
}

# BSRAM have 3 cells: BSRAM, BSRAM0 and BSRAM1
# BSRAM has 3 cells: BSRAM, BSRAM0 and BSRAM1
# { (row, col) : idx }
_bsram_cells = {}
def get_bsram_main_cell(db, row, col, typ):
Expand All @@ -303,6 +310,12 @@ def get_bsram_main_cell(db, row, col, typ):
col -= 2
return row, col

# The DSP has 9 cells: the main one and a group of auxiliary ones.
def get_dsp_main_cell(db, row, col, typ):
if type[-6:-2] == '_AUX':
col = 1 + (col - 1) // 9
return row, col

# noiostd --- this is the case when the function is called
# with iostd by default, e.g. from the clock fuzzer
# With normal gowin_unpack io standard is determined first and it is known.
Expand Down Expand Up @@ -361,6 +374,23 @@ def parse_tile_(db, row, col, tile, default=True, noalias=False, noiostd = True)
#print(row, col, name, idx, tiledata.ttyp, attrvals)
bels[f'{name}'] = {}
continue
if name.startswith("ALU54D"):
continue
if name.startswith("DSP") or name.startswith("DSP_AUX"):
modes = set()
idx = name[-1]
#print(row, col, name, idx, tiledata.ttyp)
if name.startswith("DSP_AUX"):
row, col = get_dsp_main_cell(db, row, col, name)

if f'DSP{idx}' in db.shortval[tiledata.ttyp]:
attrvals = parse_attrvals(tile, db.logicinfo['DSP'], db.shortval[tiledata.ttyp][f'DSP{idx}'], attrids.dsp_attrids)
#print_sorted_dict(f'{row}, {col}, {name}, {idx}, {tiledata.ttyp} - ', attrvals)
for attrval in attrvals:
modes.add(attrval)
if modes and not name.startswith("DSP_AUX"):
bels[f'{name}{idx}'] = modes
continue
if name.startswith("IOLOGIC"):
idx = name[-1]
attrvals = parse_attrvals(tile, db.logicinfo['IOLOGIC'], db.shortval[tiledata.ttyp][f'IOLOGIC{idx}'], attrids.iologic_attrids)
Expand Down Expand Up @@ -802,7 +832,7 @@ def tile2verilog(dbrow, dbcol, bels, pips, clock_pips, mod, cst, db):
mod.wires.update({srcg, destg})
mod.assigns.append((destg, srcg))

belre = re.compile(r"(IOB|LUT|DFF|BANK|CFG|ALU|RAM16|ODDR|OSC[ZFHWO]?|BUFS|RPLL[AB]|PLLVR|IOLOGIC|BSRAM)(\w*)")
belre = re.compile(r"(IOB|LUT|DFF|BANK|CFG|ALU|RAM16|ODDR|OSC[ZFHWO]?|BUFS|RPLL[AB]|PLLVR|IOLOGIC|BSRAM|DSP)(\w*)")
bels_items = move_iologic(bels)

iologic_detected = set()
Expand Down
40 changes: 32 additions & 8 deletions examples/himbaechel/Makefile.himbaechel
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@ all: \
bsram-pROM-tangnano20k.fs bsram-SDPB-tangnano20k.fs bsram-SP-tangnano20k.fs \
bsram-DPB-tangnano20k.fs bsram-pROMX9-tangnano20k.fs bsram-SDPX9B-tangnano20k.fs \
bsram-SPX9-tangnano20k.fs bsram-DPX9B-tangnano20k.fs \
femto-riscv-15-tangnano20k.fs femto-riscv-16-tangnano20k.fs femto-riscv-16-tangnano20k.fs \
femto-riscv-15-tangnano20k.fs femto-riscv-16-tangnano20k.fs femto-riscv-18-tangnano20k.fs \
dsp-mult36x36-tangnano20k.fs dsp-padd9-tangnano20k.fs dsp-padd18-tangnano20k.fs \
dsp-mult9x9-tangnano20k.fs dsp-alu54d-tangnano20k.fs dsp-multalu18x18-tangnano20k.fs \
dsp-multalu36x18-tangnano20k.fs dsp-multaddalu18x18-tangnano20k.fs \
\
blinky-primer20k.fs shift-primer20k.fs blinky-tbuf-primer20k.fs blinky-oddr-primer20k.fs \
blinky-osc-primer20k.fs tlvds-primer20k.fs elvds-primer20k.fs oddr-tlvds-primer20k.fs \
Expand All @@ -22,6 +25,10 @@ all: \
bsram-pROM-primer20k.fs bsram-SDPB-primer20k.fs bsram-SP-primer20k.fs \
bsram-DPB-primer20k.fs bsram-pROMX9-primer20k.fs bsram-SDPX9B-primer20k.fs \
bsram-SPX9-primer20k.fs bsram-DPX9B-primer20k.fs \
femto-riscv-15-primer20k.fs femto-riscv-16-primer20k.fs femto-riscv-18-primer20k.fs \
dsp-mult36x36-primer20k.fs dsp-padd9-primer20k.fs dsp-padd18-primer20k.fs \
dsp-mult9x9-primer20k.fs dsp-alu54d-primer20k.fs dsp-multalu18x18-primer20k.fs \
dsp-multalu36x18-primer20k.fs dsp-multaddalu18x18-primer20k.fs \
\
blinky-tangnano.fs shift-tangnano.fs blinky-tbuf-tangnano.fs blinky-oddr-tangnano.fs \
blinky-osc-tangnano.fs elvds-tangnano.fs oddr-elvds-tangnano.fs pll-nanolcd-tangnano.fs \
Expand All @@ -46,7 +53,10 @@ all: \
ides16-tangnano4k.fs \
ides4-tangnano4k.fs ivideo-tangnano4k.fs ides8-tangnano4k.fs ides10-tangnano4k.fs \
oser10-tlvds-tangnano4k.fs \
femto-riscv-15-tangnano4k.fs femto-riscv-16-tangnano4k.fs femto-riscv-16-tangnano4k.fs \
femto-riscv-15-tangnano4k.fs femto-riscv-16-tangnano4k.fs femto-riscv-18-tangnano4k.fs \
dsp-mult36x36-tangnano4k.fs dsp-padd9-tangnano4k.fs dsp-padd18-tangnano4k.fs \
dsp-mult9x9-tangnano4k.fs dsp-alu54d-tangnano4k.fs dsp-multalu18x18-tangnano4k.fs \
dsp-multalu36x18-tangnano4k.fs dsp-multaddalu18x18-tangnano4k.fs \
\
blinky-tangnano9k.fs shift-tangnano9k.fs blinky-tbuf-tangnano9k.fs blinky-oddr-tangnano9k.fs \
blinky-osc-tangnano9k.fs tlvds-tangnano9k.fs elvds-tangnano9k.fs oddr-tlvds-tangnano9k.fs \
Expand All @@ -58,6 +68,9 @@ all: \
bsram-SPX9-tangnano9k.fs bsram-DPX9B-tangnano9k.fs \
oser10-elvds-tangnano9k.fs \
femto-riscv-15-tangnano9k.fs femto-riscv-16-tangnano9k.fs femto-riscv-18-tangnano9k.fs \
dsp-mult36x36-tangnano9k.fs dsp-padd9-tangnano9k.fs dsp-padd18-tangnano9k.fs \
dsp-mult9x9-tangnano9k.fs dsp-alu54d-tangnano9k.fs dsp-multalu18x18-tangnano9k.fs \
dsp-multalu36x18-tangnano9k.fs dsp-multaddalu18x18-tangnano9k.fs \
\
blinky-szfpga.fs shift-szfpga.fs blinky-tbuf-szfpga.fs blinky-oddr-szfpga.fs \
blinky-osc-szfpga.fs tlvds-szfpga.fs elvds-szfpga.fs oddr-tlvds-szfpga.fs \
Expand All @@ -68,19 +81,29 @@ all: \
bsram-pROM-szfpga.fs bsram-SDPB-szfpga.fs bsram-SP-szfpga.fs \
bsram-pROMX9-szfpga.fs bsram-SDPX9B-szfpga.fs \
bsram-SPX9-szfpga.fs \
femto-riscv-15-szfpga.fs femto-riscv-16-szfpga.fs femto-riscv-18-szfpga.fs \
dsp-mult36x36-szfpga.fs dsp-padd9-szfpga.fs dsp-padd18-szfpga.fs \
dsp-mult9x9-szfpga.fs dsp-alu54d-szfpga.fs dsp-multalu18x18-szfpga.fs \
dsp-multalu36x18-szfpga.fs dsp-multaddalu18x18-szfpga.fs \
\
blinky-tec0117.fs shift-tec0117.fs blinky-tbuf-tec0117.fs blinky-oddr-tec0117.fs \
blinky-osc-tec0117.fs tlvds-tec0117.fs elvds-tec0117.fs oddr-tlvds-tec0117.fs \
oddr-elvds-tec0117.fs blinky-pll-tec0117.fs oser16-tec0117.fs attosoc-tec0117.fs \
oser4-tec0117.fs ovideo-tec0117.fs oser8-tec0117.fs oser10-tec0117.fs \
ides16-tec0117.fs \
ides4-tec0117.fs ivideo-tec0117.fs ides8-tec0117.fs ides10-tec0117.fs \
dsp-mult36x36-tec0117.fs dsp-padd9-tec0117.fs dsp-padd18-tec0117.fs \
dsp-mult9x9-tec0117.fs dsp-alu54d-tec0117.fs dsp-multalu18x18-tec0117.fs \
dsp-multalu36x18-tec0117.fs dsp-multaddalu18x18-tec0117.fs \
\
blinky-runber.fs shift-runber.fs blinky-tbuf-runber.fs blinky-oddr-runber.fs \
blinky-osc-runber.fs tlvds-runber.fs elvds-runber.fs oddr-tlvds-runber.fs \
oddr-elvds-runber.fs blinky-pll-runber.fs \
oser4-runber.fs ovideo-runber.fs oser8-runber.fs oser10-runber.fs \
ides4-runber.fs ivideo-runber.fs ides8-runber.fs ides10-runber.fs
ides4-runber.fs ivideo-runber.fs ides8-runber.fs ides10-runber.fs \
dsp-mult36x36-runber.fs dsp-padd9-runber.fs dsp-padd18-runber.fs \
dsp-mult9x9-runber.fs dsp-alu54d-runber.fs dsp-multalu18x18-runber.fs \
dsp-multalu36x18-runber.fs dsp-multaddalu18x18-runber.fs

unpacked:\
blinky-tangnano20k-unpacked.v shift-tangnano20k-unpacked.v \
Expand Down Expand Up @@ -173,7 +196,8 @@ unpacked:\
elvds-runber-unpacked.v oddr-tlvds-runber-unpacked.v oddr-elvds-runber-unpacked.v \
blinky-pll-runber-unpacked.v oser4-runber-unpacked.v ovideo-runber-unpacked.v \
oser8-runber-unpacked.v oser10-runber-unpacked.v ides4-runber-unpacked.v \
ivideo-runber-unpacked.v ides8-runber-unpacked.v ides10-runber-unpacked.v
ivideo-runber-unpacked.v ides8-runber-unpacked.v ides10-runber-unpacked.v

clean:
rm -f *.json *.fs *-unpacked.v

Expand Down Expand Up @@ -208,7 +232,7 @@ attosoc-tangnano20k-synth.json: attosoc/attosoc.v attosoc/picorv32.v
$(NEXTPNR) --json $< --write $@ --device GW2A-LV18PG256C8/I7 --vopt family=GW2A-18 --vopt cst=primer20k.cst

%-primer20k-synth.json: %.v
$(YOSYS) -D LEDS_NR=6 -D OSC_TYPE_OSC -D INV_BTN=0 -p "read_verilog $^; synth_gowin -json $@"
$(YOSYS) -D LEDS_NR=6 -D OSC_TYPE_OSC -D INV_BTN=0 -D CPU_FREQ=27 -D BAUD_RATE=115200 -p "read_verilog $^; synth_gowin -json $@"

pll-nanolcd-primer20k-synth.json: pll/GW2A-18-dyn.vh pll-nanolcd/TOP.v pll-nanolcd/VGAMod.v
$(YOSYS) -D INV_BTN=0 -p "read_verilog $^; synth_gowin -json $@"
Expand Down Expand Up @@ -299,7 +323,7 @@ bsram-%-tangnano9k-synth.json: pll/GW1N-9C-dyn.vh %-image-rom.v %-video-ram.v %.
$(NEXTPNR) --json $< --write $@ --device GW1NR-LV9LQ144PC6/I5 --vopt family=GW1N-9 --vopt cst=szfpga.cst

%-szfpga-synth.json: %.v
$(YOSYS) -D LEDS_NR=4 -D OSC_TYPE_OSC -D INV_BTN=0 -p "read_verilog $^; synth_gowin -json $@"
$(YOSYS) -D LEDS_NR=4 -D OSC_TYPE_OSC -D INV_BTN=0 -D CPU_FREQ=50 -D BAUD_RATE=115200 -p "read_verilog $^; synth_gowin -json $@"

blinky-pll-szfpga-synth.json: pll/GW1N-9-dyn.vh blinky-pll.v
$(YOSYS) -D INV_BTN=0 -D LEDS_NR=4 -p "read_verilog $^; synth_gowin -json $@"
Expand All @@ -316,7 +340,7 @@ bsram-%-szfpga-synth.json: pll/GW1N-9-dyn.vh %-image-rom.v %-video-ram.v %.v
$(NEXTPNR) --json $< --write $@ --device GW1NR-LV9QN88C6/I5 --vopt family=GW1N-9 --vopt cst=tec0117.cst

%-tec0117-synth.json: %.v
$(YOSYS) -D LEDS_NR=8 -D OSC_TYPE_OSC -D INV_BTN=0 -p "read_verilog $^; synth_gowin -json $@"
$(YOSYS) -D LEDS_NR=8 -D OSC_TYPE_OSC -D INV_BTN=0 -D CPU_FREQ=12 -D BAUD_RATE=115200 -p "read_verilog $^; synth_gowin -json $@"

blinky-pll-tec0117-synth.json: pll/GW1N-9-dyn.vh blinky-pll.v
$(YOSYS) -D INV_BTN=0 -D LEDS_NR=8 -p "read_verilog $^; synth_gowin -json $@"
Expand All @@ -330,7 +354,7 @@ blinky-pll-tec0117-synth.json: pll/GW1N-9-dyn.vh blinky-pll.v
$(NEXTPNR) --json $< --write $@ --device GW1N-UV4LQ144C6/I5 --vopt cst=runber.cst

%-runber-synth.json: %.v
$(YOSYS) -D LEDS_NR=8 -D OSC_TYPE_OSC -D INV_BTN=0 -p "read_verilog $^; synth_gowin -json $@"
$(YOSYS) -D LEDS_NR=8 -D OSC_TYPE_OSC -D INV_BTN=0 -D FORCE_BRAM -D CPU_FREQ=12 -D BAUD_RATE=115200 -p "read_verilog $^; synth_gowin -json $@"

blinky-pll-runber-synth.json: pll/GW1N-4-dyn.vh blinky-pll.v
$(YOSYS) -D INV_BTN=0 -D LEDS_NR=8 -p "read_verilog $^; synth_gowin -json $@"
Expand Down
101 changes: 101 additions & 0 deletions examples/himbaechel/dsp-alu54d.v
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
`default_nettype none
// alu0 mode 0 - simple substraction with accumulator in C
// alu1 mode 1 - addition with CASI and accumulator in A
// alu2 mode 2 - addition with CASI
module idsp(input wire clk, input wire reset,
output wire [63:0] product,
output wire [63:0] product1,
output wire [63:0] product2,
output wire [63:0] product3,
output wire [63:0] product4
);

wire [17:0] soa;
wire [17:0] sob;
wire [17:0] soa0;
wire [17:0] sob0;
wire [17:0] soa1;
wire [17:0] sob1;
wire [17:0] soa2;
wire [17:0] sob2;
wire gnd = 1'b0;

wire [54:0]caso;
wire [54:0]caso0;

ALU54D alu0(
.A(54'hde1ec7ab1e),
.B(54'hcad),
.DOUT(product[53:0]),
.CASI(gnd),
.CASO(caso),
.ASIGN(gnd),
.BSIGN(gnd),
.ACCLOAD(1'b1),
.CLK(clk),
.CE(1'b1),
.RESET(reset)
);
defparam alu0.AREG=1'b0;
defparam alu0.BREG=1'b0;
defparam alu0.ASIGN_REG=1'b0;
defparam alu0.BSIGN_REG=1'b0;
defparam alu0.ACCLOAD_REG=1'b0;
defparam alu0.OUT_REG=1'b1;
defparam alu0.B_ADD_SUB=1'b1;
defparam alu0.C_ADD_SUB=1'b0;
defparam alu0.ALUD_MODE=2'b0;
defparam alu0.ALU_RESET_MODE="SYNC";

ALU54D alu1(
.A(54'h1111),
.B(54'h2),
.DOUT(product1[53:0]),
.CASI(caso),
.CASO(caso0),
.ASIGN(gnd),
.BSIGN(gnd),
.ACCLOAD(gnd),
.CLK(clk),
.CE(1'b1),
.RESET(reset)
);
defparam alu1.AREG=1'b1;
defparam alu1.BREG=1'b0;
defparam alu1.ASIGN_REG=1'b0;
defparam alu1.BSIGN_REG=1'b0;
defparam alu1.ACCLOAD_REG=1'b0;
defparam alu1.OUT_REG=1'b0;
defparam alu1.B_ADD_SUB=1'b0;
defparam alu1.C_ADD_SUB=1'b0;
defparam alu1.ALUD_MODE=1;
defparam alu1.ALU_RESET_MODE="SYNC";

ALU54D alu2(
.A(54'h100000000),
.B(54'h00000f000),
.DOUT(product2[53:0]),
.CASI(caso0),
.CASO(),
.ASIGN(gnd),
.BSIGN(gnd),
.ACCLOAD(gnd),
.CLK(clk),
.CE(1'b1),
.RESET(reset)
);
defparam alu2.AREG=1'b1;
defparam alu2.BREG=1'b1;
defparam alu2.ASIGN_REG=1'b1;
defparam alu2.BSIGN_REG=1'b1;
defparam alu2.ACCLOAD_REG=1'b1;
defparam alu2.OUT_REG=1'b0;
defparam alu2.B_ADD_SUB=1'b0;
defparam alu2.C_ADD_SUB=1'b0;
defparam alu2.ALUD_MODE=2;
defparam alu2.ALU_RESET_MODE="SYNC";
endmodule

`define FIRMWARE "riscv-dsp-firmware/alu54d.hex"
`include "dsp-riscv.v"

Loading

0 comments on commit 91807b0

Please sign in to comment.