From f5abb118aa3b897eabe45f21c380e064c56a7829 Mon Sep 17 00:00:00 2001 From: David Garske Date: Mon, 23 Feb 2026 17:23:34 -0800 Subject: [PATCH 1/3] Add VORAGO VA416xx Ethernet port (PEB1 EVK) --- .gitignore | 2 + Makefile | 2 + src/port/va416xx/Makefile | 130 +++++ src/port/va416xx/README.md | 241 +++++++++ src/port/va416xx/board.h | 28 ++ src/port/va416xx/config.h | 74 +++ src/port/va416xx/flash.jlink | 16 + src/port/va416xx/hal_config.h | 30 ++ src/port/va416xx/ivt.c | 60 +++ src/port/va416xx/main.c | 758 ++++++++++++++++++++++++++++ src/port/va416xx/startup.c | 53 ++ src/port/va416xx/syscalls.c | 147 ++++++ src/port/va416xx/target.ld | 90 ++++ src/port/va416xx/va416xx_eth.c | 885 +++++++++++++++++++++++++++++++++ src/port/va416xx/va416xx_eth.h | 37 ++ 15 files changed, 2553 insertions(+) create mode 100644 src/port/va416xx/Makefile create mode 100644 src/port/va416xx/README.md create mode 100644 src/port/va416xx/board.h create mode 100644 src/port/va416xx/config.h create mode 100644 src/port/va416xx/flash.jlink create mode 100644 src/port/va416xx/hal_config.h create mode 100644 src/port/va416xx/ivt.c create mode 100644 src/port/va416xx/main.c create mode 100644 src/port/va416xx/startup.c create mode 100644 src/port/va416xx/syscalls.c create mode 100644 src/port/va416xx/target.ld create mode 100644 src/port/va416xx/va416xx_eth.c create mode 100644 src/port/va416xx/va416xx_eth.h diff --git a/.gitignore b/.gitignore index 9aea4cc..e4878b3 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,5 @@ build/* test/unit/unit tags cppcheck_results.xml +src/port/stm32h563/app.elf +src/port/va416xx/app.elf diff --git a/Makefile b/Makefile index d6ab7b3..d9f0ce2 100644 --- a/Makefile +++ b/Makefile @@ -119,6 +119,8 @@ CPPCHECK_FLAGS=--enable=warning,performance,portability,missingInclude \ --suppress=comparePointers:src/port/stm32h563/syscalls.c \ --suppress=comparePointers:src/port/stm32h753/startup.c \ --suppress=comparePointers:src/port/stm32h753/syscalls.c \ + --suppress=comparePointers:src/port/va416xx/startup.c \ + --suppress=comparePointers:src/port/va416xx/syscalls.c \ --disable=style \ --std=c99 --language=c \ --platform=unix64 \ diff --git a/src/port/va416xx/Makefile b/src/port/va416xx/Makefile new file mode 100644 index 0000000..4925a6c --- /dev/null +++ b/src/port/va416xx/Makefile @@ -0,0 +1,130 @@ +CC ?= arm-none-eabi-gcc +OBJCOPY ?= arm-none-eabi-objcopy +SIZE ?= arm-none-eabi-size + +ROOT := ../../.. + +# VA416xx SDK path (default: sibling directory to wolfip repo) +SDK_ROOT ?= ../../../../VA416xx_SDK + +# Base compiler flags (Cortex-M4, no FPU) +CFLAGS := -mcpu=cortex-m4 -mthumb -mfloat-abi=soft -Os -ffreestanding +CFLAGS += -fdata-sections -ffunction-sections +CFLAGS += -g -ggdb -Wall -Wextra -Werror + +# Optional extra flags (e.g., EXTRA_CFLAGS=-DDEBUG_ETH for verbose ETH diagnostics) +CFLAGS += $(EXTRA_CFLAGS) + +# Include paths +CFLAGS += -I. -I$(ROOT) -I$(ROOT)/src +CFLAGS += -I$(SDK_ROOT)/common/mcu/hdr +CFLAGS += -I$(SDK_ROOT)/common/drivers/hdr +CFLAGS += -I$(SDK_ROOT)/common/BSP/evk/hdr + +# Relaxed warnings for SDK and external sources +CFLAGS_EXT := $(filter-out -Werror,$(CFLAGS)) +CFLAGS_EXT += -Wno-unused-variable -Wno-unused-function -Wno-unused-parameter +CFLAGS_EXT += -Wno-sign-compare -Wno-missing-field-initializers +# Workaround SDK typo: en_iocfg_dir_input vs en_iocfg_dir__input +CFLAGS_EXT += -Den_iocfg_dir_input=en_iocfg_dir__input +# HBO (Heart Beat Oscillator) not exposed in a shared header +CFLAGS_EXT += -DHBO=20000000UL + +LDFLAGS := -nostdlib -T target.ld -Wl,-gc-sections + +# Application sources (compiled with strict flags) +APP_SRCS := startup.c ivt.c syscalls.c main.c va416xx_eth.c +APP_OBJS := $(patsubst %.c,%.o,$(APP_SRCS)) + +# wolfIP core source +WOLFIP_SRC := $(ROOT)/src/wolfip.c +WOLFIP_OBJ := wolfip.o + +# SDK sources (compiled with relaxed flags, objects go in local sdk/ dir) +SDK_SRC_DIR := $(SDK_ROOT)/common +SDK_SRCS := \ + $(SDK_SRC_DIR)/mcu/src/system_va416xx.c \ + $(SDK_SRC_DIR)/drivers/src/va416xx_hal.c \ + $(SDK_SRC_DIR)/drivers/src/va416xx_hal_ethernet.c \ + $(SDK_SRC_DIR)/drivers/src/va416xx_hal_uart.c \ + $(SDK_SRC_DIR)/drivers/src/va416xx_hal_ioconfig.c \ + $(SDK_SRC_DIR)/drivers/src/va416xx_hal_irqrouter.c \ + $(SDK_SRC_DIR)/drivers/src/va416xx_hal_clkgen.c + +# Build SDK objects locally (avoids polluting SDK tree) +SDK_OBJS := $(notdir $(patsubst %.c,%.o,$(SDK_SRCS))) + +ALL_OBJS := $(APP_OBJS) $(WOLFIP_OBJ) $(SDK_OBJS) + +all: app.bin + @echo "Built VA416xx wolfIP port" + @$(SIZE) app.elf + +app.elf: $(ALL_OBJS) target.ld + $(CC) $(CFLAGS) $(ALL_OBJS) $(LDFLAGS) \ + -Wl,--start-group -lc -lm -lgcc -lnosys -Wl,--end-group -o $@ + +app.bin: app.elf + $(OBJCOPY) -O binary $< $@ + +# Application objects (strict warnings) +%.o: %.c + $(CC) $(CFLAGS) -c $< -o $@ + +# wolfIP core (relaxed warnings) +$(WOLFIP_OBJ): $(WOLFIP_SRC) + $(CC) $(CFLAGS_EXT) -c $< -o $@ + +# SDK objects (relaxed warnings, built locally) +system_va416xx.o: $(SDK_SRC_DIR)/mcu/src/system_va416xx.c + $(CC) $(CFLAGS_EXT) -c $< -o $@ + +va416xx_hal.o: $(SDK_SRC_DIR)/drivers/src/va416xx_hal.c + $(CC) $(CFLAGS_EXT) -c $< -o $@ + +va416xx_hal_ethernet.o: $(SDK_SRC_DIR)/drivers/src/va416xx_hal_ethernet.c + $(CC) $(CFLAGS_EXT) -c $< -o $@ + +va416xx_hal_uart.o: $(SDK_SRC_DIR)/drivers/src/va416xx_hal_uart.c + $(CC) $(CFLAGS_EXT) -c $< -o $@ + +va416xx_hal_ioconfig.o: $(SDK_SRC_DIR)/drivers/src/va416xx_hal_ioconfig.c + $(CC) $(CFLAGS_EXT) -c $< -o $@ + +va416xx_hal_irqrouter.o: $(SDK_SRC_DIR)/drivers/src/va416xx_hal_irqrouter.c + $(CC) $(CFLAGS_EXT) -c $< -o $@ + +va416xx_hal_clkgen.o: $(SDK_SRC_DIR)/drivers/src/va416xx_hal_clkgen.c + $(CC) $(CFLAGS_EXT) -c $< -o $@ + +clean: + rm -f *.o app.elf app.bin + +# Show memory usage +size: app.elf + @echo "=== Memory Usage ===" + @$(SIZE) app.elf + @echo "" + @echo "Flash usage: $$($(SIZE) app.elf | awk 'NR==2{printf "%.1f%% (%d / %d bytes)", ($$1+$$2)*100/262144, $$1+$$2, 262144}')" + @echo "RAM usage (static): $$($(SIZE) app.elf | awk 'NR==2{printf "%.1f%% (%d / %d bytes)", ($$2+$$3)*100/65536, $$2+$$3, 65536}')" + +.PHONY: all clean size help + +help: + @echo "VA416xx wolfIP Build System" + @echo "" + @echo "Usage: make [target] [options]" + @echo "" + @echo "Targets:" + @echo " all Build app.bin (default)" + @echo " clean Remove build artifacts" + @echo " size Show memory usage statistics" + @echo " help Show this help" + @echo "" + @echo "Options:" + @echo " SDK_ROOT= Path to VA416xx SDK (default: ../../../../VA416xx_SDK)" + @echo " CC= C compiler (default: arm-none-eabi-gcc)" + @echo "" + @echo "Testing:" + @echo " ping # ICMP ping" + @echo " echo 'hello' | nc 7 # TCP echo" diff --git a/src/port/va416xx/README.md b/src/port/va416xx/README.md new file mode 100644 index 0000000..6ba8d9d --- /dev/null +++ b/src/port/va416xx/README.md @@ -0,0 +1,241 @@ +# wolfIP VA416xx Port + +Bare-metal port of wolfIP for the VORAGO VA416xx Cortex-M4 microcontroller, targeting the PEB1 EVK board with KSZ8041TL Ethernet PHY over MII. + +## Hardware + +- **MCU:** VA416xx (Cortex-M4, no FPU, 100 MHz via 40 MHz crystal × PLL 2.5×) +- **Flash:** 256 KB @ 0x00000000 +- **SRAM:** 64 KB (RAM0 32 KB @ 0x1FFF8000 + RAM1 32 KB @ 0x20000000, contiguous) +- **Ethernet:** Synopsys DesignWare GMAC (normal/legacy descriptor format), MII +- **PHY:** KSZ8041TL via MII (PORTA[8-15], PORTB[0-10], funsel=1) +- **Debug UART:** UART0 on PORTG[0] TX / PORTG[1] RX, 115200 8N1 +- **LED:** PORTG pin 5 (heartbeat) +- **Board:** PEB1 VA416xx EVK + +## Prerequisites + +- ARM GCC toolchain: `arm-none-eabi-gcc` +- VA416xx SDK at `../../../../VA416xx_SDK` (sibling directory to the wolfip repo) +- Serial terminal (minicom, screen, picocom) at 115200 baud +- JTAG/SWD debugger (Segger J-Link, OpenOCD, etc.) + +### Installing ARM Toolchain (Ubuntu/Debian) + +```bash +sudo apt install gcc-arm-none-eabi +``` + +## Quick Start + +```bash +# 1. Build +cd src/port/va416xx +make CC=arm-none-eabi-gcc + +# 2. Flash app.bin to the EVK via your debugger + +# 3. Monitor UART (115200 baud) +screen /dev/ttyUSB0 115200 + +# 4. Test +ping +echo "Hello" | nc 7 +``` + +## Build Variants + +| Command | Description | +|---------|-------------| +| `make CC=arm-none-eabi-gcc` | Production build (48 KB) | +| `make CC=arm-none-eabi-gcc EXTRA_CFLAGS=-DDEBUG_ETH` | Per-frame ETH diagnostics on UART | +| `make CC=arm-none-eabi-gcc EXTRA_CFLAGS=-DTX_SELFTEST` | Startup TX self-test (sends gratuitous ARPs, checks hw_tx, runs loopback) | +| `make CC=arm-none-eabi-gcc EXTRA_CFLAGS="-DTX_SELFTEST -DDEBUG_ETH"` | Full debug output | +| `make CC=arm-none-eabi-gcc EXTRA_CFLAGS=-DSPEED_TEST` | Replace echo server with throughput test service on port 9 | + +## Memory Usage (production build) + +``` + text data bss dec hex filename + 48028 1740 52920 102688 19120 app.elf + +Flash: 48 KB / 256 KB (19%) +RAM (static BSS): 52 KB / 64 KB (81%) +``` + +## Example Output + +Normal boot with DHCP and echo server: + +``` +=== wolfIP VA416xx Echo Server === +Build: Feb 23 2026 13:30:13 +Initializing Ethernet... + PHY link: UP + PHY: 10M Half Duplex (negotiated) + MAC: 10M Full Duplex (MAC_CONFIG=0x00018C80) +Starting DHCP... +Creating TCP echo server on port 7... +Ready! Test with: + ping + echo 'hello' | nc 7 + +Entering main loop... +[46] rx=0 tx=1/0 hw_tx=0 cfg=0x8C8C dbg=0x01100000 dma=0x00260400 TS=2 +DHCP bound: + IP: 10.0.4.184 + Mask: 255.255.255.0 + GW: 10.0.4.1 +[56] rx=12 tx=4/0 hw_tx=4 cfg=0x8C8C dbg=0x00000000 dma=0x00660445 TS=6 +Echo: client connected (fd=257) +[196] rx=208 tx=14/0 hw_tx=14 cfg=0x8C8C dbg=0x00000000 dma=0x00660445 TS=6 +Echo: client disconnected +``` + +Periodic diagnostic fields: `[time_s] rx= tx=/ hw_tx= cfg= dbg= dma= TS=` + +## Testing + +### ICMP Ping + +```bash +ping +``` + +### TCP Echo (Port 7) + +```bash +echo "Hello wolfIP!" | nc 7 +nc 7 # interactive +``` + +### Throughput Test (Port 9, with `-DSPEED_TEST`) + +```bash +# RX throughput: host → device; Ctrl+C nc after dd finishes +dd if=/dev/zero bs=1460 count=700 | nc 9 +^C + +# TX throughput: device → host; Ctrl+C after desired duration +nc 9 /dev/null +^C +``` + +#### Measured Results (PEB1 EVK, 10M Full Duplex) + +| Direction | Host-measured | Device-measured | Notes | +|-----------|--------------|-----------------|-------| +| RX (host→device) | **1.2 MB/s** | 1,022,000 bytes received | dd: 1,022,000 B in 0.86 s; ≈98% of 10 Mbps theoretical max | +| TX (device→host) | ~736 KB/s peak | **136 KB/s avg** | Peak early; average limited by wolfIP TCP window cycling | + +Theoretical maximum for 10 Mbps MII (1460-byte segments, ~4% Ethernet overhead): +`10 Mbps × 0.96 / 8 ≈ 1,200 KB/s` + +**Device UART output (RX test then TX test):** +``` +Speed: client connected (fd=257) +Speed: 1549643 ms, RX 1022000 bytes (~659 B/s), TX 3336 bytes (~2 B/s) +Speed: client connected (fd=257) +Speed: 170096 ms, RX 0 bytes (~0 B/s), TX 23195968 bytes (~136369 B/s) +``` + +> **RX elapsed time** reflects how long `nc` held the connection open after `dd` +> finished — TCP has no application-level EOF, so the connection stays alive until +> `nc` is killed. The device-reported rate for RX is therefore meaningless; use +> host `dd` timing instead (0.86 s for 1 MB → 1.2 MB/s). +> +> **TX average** (136 KB/s) is lower than the peak visible in `pv` (~736 KB/s) +> because wolfIP's TCP send window cycles: the device transmits until the remote +> receive window fills, then waits for ACKs to reopen it before sending more. + +## Architecture + +### Ethernet Driver (`va416xx_eth.c`) + +The driver uses the Synopsys DesignWare GMAC with the **normal (legacy) descriptor format**. + +#### Descriptor Layout + +| Word | TX (TDES) | RX (RDES) | +|------|-----------|-----------| +| des0 | OWN (bit 31) doorbell; TX status bits written back by DMA | OWN (bit 31); RX status + frame length written back by DMA | +| des1 | IC/LS/FS/TCH control bits + TBS1 buffer size | DIC/RER/RCH bits + RBS1 buffer size | +| des2 | Buffer 1 address | Buffer 1 address | +| des3 | Next descriptor address (chain mode) | Unused | + +**Critical:** In the normal format, TX frame control bits (FS/LS/IC/TCH) belong in **TDES1**, not TDES0. TDES0 is a status-only word — the CPU must set only OWN=1 as a doorbell. Setting control bits in TDES0 causes the DMA to advance linearly (ignoring des3) and never transmit. + +TX uses chain mode (TCH in TDES1, des3 = next descriptor pointer). RX uses ring mode (RER in RDES1 on the last descriptor). Ring mode (TER in TDES0) is not used for TX because the DMA overwrites des0 on writeback, clearing the TER bit. + +#### DMA Configuration + +- 3 RX + 3 TX descriptors (16-byte aligned, in `.dma_bss` / RAM1) +- 1536-byte per-descriptor buffers (`.dma_bss`) +- TX: threshold mode, TTC=16B (starts MAC TX as soon as 16 bytes in FIFO) +- RX: store-and-forward (RSF=1) +- PBL=8 (programmable burst length), no Fixed Burst (AAL/FB omitted for safety) +- Polling mode (DMA interrupts disabled) +- FTF (Flush TX FIFO) applied once before ST=1 during init; self-clears in ~30 AHB cycles + +#### PHY (KSZ8041TL) + +Auto-negotiation is restricted to **10M only** (10M-FD + 10M-HD in AN advertisement) because the FES bit (MAC_CONFIG bit 14) is read-only=0 on this silicon variant — the MAC is permanently configured for 10 Mbps (TXCLK = 2.5 MHz). Advertising 100M would cause speed mismatch if the link partner selected 100M. + +The MAC is forced to Full Duplex (DM=1) regardless of PHY negotiation result. Half-duplex mode checks CRS before transmitting; CRS is unreliable on this silicon causing indefinite TX deferral. + +#### SDK Workarounds + +| Issue | Fix | +|-------|-----| +| MDIO HAL busy-wait has inverted polarity (exits immediately instead of waiting) | 50 µs software settle after every `HAL_ReadPhyReg`/`HAL_WritePhyReg` call | +| PHY reset completes in 100–300 ms (KSZ8041TL datasheet) | 500 ms wait after `HAL_ResetPHY()` | +| `en_iocfg_dir_input` typo in SDK IOCONFIG driver | `-Den_iocfg_dir_input=en_iocfg_dir__input` in Makefile | +| `HBO` oscillator constant not exported from `system_va416xx.c` | `-DHBO=20000000UL` in Makefile | + +### vs. STM32H5 Port + +| Feature | VA416xx (Normal) | STM32H5 (Enhanced) | +|---------|-----------------|-------------------| +| TX control bits | TDES1 | TDES0 | +| Buffer address | des2 | des0 | +| Ring wrap | TER/RER in des1 | Tail pointer register | +| DMA kick | `DMA_TX_POLL_DEMAND` | Tail pointer update | +| MTL layer | None (`DMA_OPER_MODE`) | Separate MTL registers | +| Speed | 10M (FES read-only=0) | 100M/1G | + +### Memory Budget (64 KB SRAM) + +| Component | Size | +|-----------|------| +| DMA TX descriptors (3 × 16 B) | 48 B | +| DMA RX descriptors (3 × 16 B) | 48 B | +| DMA TX buffers (3 × 1536 B) | 4,608 B | +| DMA RX buffers (3 × 1536 B) | 4,608 B | +| RX staging buffer | 1,536 B | +| wolfIP stack + sockets | ~42 KB | +| Stack + stack frame | ~4 KB | +| **Static BSS total** | **~52 KB** | + +All DMA descriptors and buffers are placed in `.dma_bss` (RAM1, 0x20000000+). The Ethernet DMA is an AHB system bus master and cannot access RAM0 (code bus / D-Code bus). + +## Files + +| File | Description | +|------|-------------| +| `main.c` | Application: HAL init, UART, ETH GPIO, wolfIP, DHCP, TCP echo/speed-test | +| `va416xx_eth.c` | Ethernet MAC/DMA driver (normal descriptor format, chain TX, ring RX) | +| `va416xx_eth.h` | Ethernet driver public API | +| `config.h` | wolfIP configuration (memory-optimized for 64 KB SRAM) | +| `startup.c` | Cortex-M4 reset handler (.data copy, .bss clear, SysTick enable) | +| `ivt.c` | Interrupt vector table (16 system + 64 external IRQs) | +| `syscalls.c` | Newlib stubs (`_write` routes to UART0) | +| `target.ld` | Linker script (Flash 256 KB, RAM 64 KB, `.dma_bss` in RAM1) | +| `hal_config.h` | SDK HAL configuration (SysTick 10 ms tick) | +| `board.h` | Board selection (includes PEB1 EVK header) | +| `Makefile` | Build system with SDK integration | + +## License + +This code is part of wolfIP and is licensed under GPLv3. See the LICENSE file in the repository root for details. + +Copyright (C) 2026 wolfSSL Inc. diff --git a/src/port/va416xx/board.h b/src/port/va416xx/board.h new file mode 100644 index 0000000..6b8d6ca --- /dev/null +++ b/src/port/va416xx/board.h @@ -0,0 +1,28 @@ +/* board.h + * + * Board configuration for VA416xx EVK (PEB1) + * + * Copyright (C) 2026 wolfSSL Inc. + * + * This file is part of wolfIP TCP/IP stack. + * + * wolfIP is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfIP is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ +#ifndef BOARD_H +#define BOARD_H + +#include "peb1_va416xx_evk.h" + +#endif /* BOARD_H */ diff --git a/src/port/va416xx/config.h b/src/port/va416xx/config.h new file mode 100644 index 0000000..266c9c7 --- /dev/null +++ b/src/port/va416xx/config.h @@ -0,0 +1,74 @@ +/* config.h + * + * wolfIP configuration for VA416xx (memory-optimized for 64KB SRAM) + * + * Copyright (C) 2026 wolfSSL Inc. + * + * This file is part of wolfIP TCP/IP stack. + * + * wolfIP is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfIP is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ +#ifndef WOLF_CONFIG_H +#define WOLF_CONFIG_H + +#ifndef CONFIG_IPFILTER +#define CONFIG_IPFILTER 0 +#endif + +#define ETHERNET +#define LINK_MTU 1536 + +/* Memory-constrained: 64KB SRAM total + * Each TCP socket: ~9KB (TX/RX bufs + OOO reassembly 4*1460) + * DMA buffers: 6 * 1536 + 1536 staging = 10,752 bytes + * ARP pending: 2 * 1536 = 3,072 bytes + * TCP state, timers, misc: ~4KB + * Total estimated: ~42KB (fits in 64KB with stack room) + */ +#define MAX_TCPSOCKETS 2 /* listen + 1 client */ +#define MAX_UDPSOCKETS 1 /* for DHCP */ +#define MAX_ICMPSOCKETS 1 +#define RXBUF_SIZE LINK_MTU /* 1536 per socket */ +#define TXBUF_SIZE LINK_MTU /* 1536 per socket */ + +#define MAX_NEIGHBORS 4 +#define WOLFIP_ARP_PENDING_MAX 2 + +#ifndef WOLFIP_MAX_INTERFACES +#define WOLFIP_MAX_INTERFACES 1 +#endif + +#ifndef WOLFIP_ENABLE_FORWARDING +#define WOLFIP_ENABLE_FORWARDING 0 +#endif + +#ifndef WOLFIP_ENABLE_LOOPBACK +#define WOLFIP_ENABLE_LOOPBACK 0 +#endif + +#ifndef WOLFIP_ENABLE_DHCP +#define WOLFIP_ENABLE_DHCP 1 +#endif + +#if WOLFIP_ENABLE_DHCP +#define DHCP +#else +#define WOLFIP_IP "10.0.4.90" +#define WOLFIP_NETMASK "255.255.255.0" +#define WOLFIP_GW "10.0.4.1" +#define WOLFIP_STATIC_DNS_IP "8.8.8.8" +#endif + +#endif /* WOLF_CONFIG_H */ diff --git a/src/port/va416xx/flash.jlink b/src/port/va416xx/flash.jlink new file mode 100644 index 0000000..1a07056 --- /dev/null +++ b/src/port/va416xx/flash.jlink @@ -0,0 +1,16 @@ +device VA416XX +si 1 +speed 2000 +r +h +write4 0x40010010 0x1 +exec SetCompareMode = 0 +loadbin app.bin 0x0 +loadfile ../../../../VA416xx_SDK/loader.elf +go +sleep 2000 +h +write4 0x40010010 0x0 +r +g +exit diff --git a/src/port/va416xx/hal_config.h b/src/port/va416xx/hal_config.h new file mode 100644 index 0000000..a650e1d --- /dev/null +++ b/src/port/va416xx/hal_config.h @@ -0,0 +1,30 @@ +/* hal_config.h + * + * VA416xx HAL configuration for wolfIP port + * + * Copyright (C) 2026 wolfSSL Inc. + * + * This file is part of wolfIP TCP/IP stack. + * + * wolfIP is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfIP is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ +#ifndef HAL_CONFIG_H +#define HAL_CONFIG_H + +/* SysTick: 1ms interval for wolfIP timing */ +#define SYSTICK_INTERVAL_MS (1U) +#define SYSTICK_PRIORITY (7U) + +#endif /* HAL_CONFIG_H */ diff --git a/src/port/va416xx/ivt.c b/src/port/va416xx/ivt.c new file mode 100644 index 0000000..de54143 --- /dev/null +++ b/src/port/va416xx/ivt.c @@ -0,0 +1,60 @@ +/* ivt.c + * + * VA416xx Cortex-M4 Interrupt Vector Table + * + * Copyright (C) 2026 wolfSSL Inc. + * + * This file is part of wolfIP TCP/IP stack. + * + * wolfIP is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfIP is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ +#include + +extern void Reset_Handler(void); +extern unsigned long _estack; + +static void default_handler(void) +{ + while (1) { } +} + +void NMI_Handler(void) __attribute__((weak, alias("default_handler"))); +void HardFault_Handler(void) __attribute__((weak, alias("default_handler"))); +void MemManage_Handler(void) __attribute__((weak, alias("default_handler"))); +void BusFault_Handler(void) __attribute__((weak, alias("default_handler"))); +void UsageFault_Handler(void)__attribute__((weak, alias("default_handler"))); +void SVC_Handler(void) __attribute__((weak, alias("default_handler"))); +void DebugMon_Handler(void) __attribute__((weak, alias("default_handler"))); +void PendSV_Handler(void) __attribute__((weak, alias("default_handler"))); +void SysTick_Handler(void) __attribute__((weak, alias("default_handler"))); + +/* VA416xx has 64 external interrupts (IRQ 0-63) */ +__attribute__((section(".isr_vector"))) +const uint32_t vector_table[16 + 64] = { + [0] = (uint32_t)&_estack, + [1] = (uint32_t)&Reset_Handler, + [2] = (uint32_t)&NMI_Handler, + [3] = (uint32_t)&HardFault_Handler, + [4] = (uint32_t)&MemManage_Handler, + [5] = (uint32_t)&BusFault_Handler, + [6] = (uint32_t)&UsageFault_Handler, + [7] = 0, [8] = 0, [9] = 0, [10] = 0, + [11] = (uint32_t)&SVC_Handler, + [12] = (uint32_t)&DebugMon_Handler, + [13] = 0, + [14] = (uint32_t)&PendSV_Handler, + [15] = (uint32_t)&SysTick_Handler, + [16 ... 79] = (uint32_t)&default_handler +}; diff --git a/src/port/va416xx/main.c b/src/port/va416xx/main.c new file mode 100644 index 0000000..d488ee2 --- /dev/null +++ b/src/port/va416xx/main.c @@ -0,0 +1,758 @@ +/* main.c + * + * VA416xx wolfIP Echo Server Test Application + * + * Copyright (C) 2026 wolfSSL Inc. + * + * This file is part of wolfIP TCP/IP stack. + * + * wolfIP is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfIP is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ +#include +#include +#include +#include "config.h" +#include "wolfip.h" +#include "va416xx_eth.h" +#include "va416xx_hal_ethernet.h" + +#include "va416xx.h" +#include "va416xx_hal.h" +#include "va416xx_hal_uart.h" +#include "va416xx_hal_ioconfig.h" +#include "va416xx_hal_clkgen.h" +#include "board.h" + +/* HAL_time_ms: millisecond tick counter maintained by SysTick ISR (10ms + * resolution by default). Used as the wolfIP `now` parameter so that all + * stack timers (DHCP, ARP, TCP retransmit, etc.) run in real wall-clock + * time rather than depending on CPU loop speed. */ +extern volatile uint64_t HAL_time_ms; + +#define RX_BUF_SIZE 1024 + +/* DHCP timeout: total time to wait for DHCP before static IP fallback. + * wolfIP's internal DHCP state machine only retries for ~8 seconds + * (DHCP_DISCOVER_RETRIES=3 × 2s timeout). After that it sets state to + * DHCP_OFF and the UDP socket stops accepting unicast DHCP responses + * (because DHCP_IS_RUNNING becomes false). We re-init periodically to + * keep trying, but must space re-inits apart to avoid socket churn + * (close/reopen loses in-flight responses). */ +#define DHCP_TIMEOUT_MS 120000U /* 120s total before static fallback */ +#define DHCP_REINIT_MS 15000U /* 15s between DHCP re-init attempts */ + +static struct wolfIP *IPStack; +static uint8_t rx_buf[RX_BUF_SIZE]; + +#ifdef SPEED_TEST + +/* Combined speed test service (port 9) + * - RX test: host sends data, device counts bytes (discard) + * - TX test: device sends data as fast as possible (chargen) + * Both directions measured simultaneously on one connection. */ +#define SPEED_PORT 9 +static int speed_listen_fd = -1; +static int speed_client_fd = -1; +static uint32_t speed_rx_bytes; +static uint32_t speed_tx_bytes; +static uint64_t speed_start_ms; + +#else + +/* Echo server (port 7) */ +#define ECHO_PORT 7 +static int listen_fd = -1; +static int client_fd = -1; + +#endif /* SPEED_TEST */ + +/* ========================================================================= */ +/* wolfIP random number generator (required by stack) */ +/* ========================================================================= */ + +uint32_t wolfIP_getrandom(void) +{ + static uint32_t lfsr = 0x1A2B3C4DU; + lfsr ^= lfsr << 13; + lfsr ^= lfsr >> 17; + lfsr ^= lfsr << 5; + return lfsr; +} + +/* ========================================================================= */ +/* LED on EVK top board (PORTG pin 5) */ +/* ========================================================================= */ + +static void led_init(void) +{ + /* Enable PORTG clock (HAL_Init already does this, but be safe) */ + VOR_SYSCONFIG->PERIPHERAL_CLK_ENABLE |= + SYSCONFIG_PERIPHERAL_CLK_ENABLE_PORTG_Msk; + + /* Set PORTG pin 5 as output */ + EVK_LED_BANK.DIR |= (1U << EVK_LED_PIN); +} + +static void led_on(void) +{ + EVK_LED_BANK.SETOUT = (1U << EVK_LED_PIN); +} + +static void led_toggle(void) +{ + EVK_LED_BANK.TOGOUT = (1U << EVK_LED_PIN); +} + +/* ========================================================================= */ +/* UART0 Debug Output (PORTG pins 0=TX, 1=RX, funsel=1) */ +/* ========================================================================= */ + +static void uart_init(void) +{ + /* Configure UART0 pins: PORTG[0]=TX, PORTG[1]=RX, funsel=1 + * (matches PEB1 EVK routing / wolfBoot configuration) */ + HAL_Iocfg_PinMux(VOR_PORTG, 0, 1); + HAL_Iocfg_PinMux(VOR_PORTG, 1, 1); + + /* Initialize UART0 at 115200 8N1 */ + HAL_Uart_Init(VOR_UART0, UART_CFG_115K_8N1); +} + +/* ========================================================================= */ +/* Ethernet MII Pin Configuration */ +/* PORTA[8-15] and PORTB[0-10], all funsel=1 */ +/* ========================================================================= */ + +static void eth_gpio_init(void) +{ + uint32_t pin; + + /* Enable PORTA and PORTB clocks */ + VOR_SYSCONFIG->PERIPHERAL_CLK_ENABLE |= + SYSCONFIG_PERIPHERAL_CLK_ENABLE_PORTA_Msk | + SYSCONFIG_PERIPHERAL_CLK_ENABLE_PORTB_Msk; + + /* PORTA pins 8-15: MII signals */ + for (pin = 8; pin <= 15; pin++) { + HAL_Iocfg_PinMux(VOR_PORTA, pin, 1); + } + + /* PORTB pins 0-10: MII signals */ + for (pin = 0; pin <= 10; pin++) { + HAL_Iocfg_PinMux(VOR_PORTB, pin, 1); + } +} + +/* ========================================================================= */ +/* Ethernet Peripheral Clock and Reset */ +/* ========================================================================= */ + +static void eth_clk_init(void) +{ + /* Enable ETH peripheral clock */ + VOR_SYSCONFIG->PERIPHERAL_CLK_ENABLE |= + SYSCONFIG_PERIPHERAL_CLK_ENABLE_ETH_Msk; + + /* Assert ETH reset (clear bit), then release (set bit) + * All SDK peripheral drivers use this clear-then-set pattern */ + VOR_SYSCONFIG->PERIPHERAL_RESET &= + ~SYSCONFIG_PERIPHERAL_RESET_ETH_Msk; + for (volatile uint32_t i = 0; i < 1000; i++) { } + VOR_SYSCONFIG->PERIPHERAL_RESET |= + SYSCONFIG_PERIPHERAL_RESET_ETH_Msk; + + /* Brief delay for clock to stabilize */ + for (volatile uint32_t i = 0; i < 10000; i++) { } +} + +/* ========================================================================= */ +/* UART Debug Helpers */ +/* ========================================================================= */ + +static void uart_putip4(ip4 ip) +{ + printf("%u.%u.%u.%u", + (unsigned)((ip >> 24) & 0xFF), + (unsigned)((ip >> 16) & 0xFF), + (unsigned)((ip >> 8) & 0xFF), + (unsigned)(ip & 0xFF)); +} + +#ifdef SPEED_TEST + +/* ========================================================================= */ +/* Combined Speed Test Callback (port 9) */ +/* Measures RX (discard incoming) and TX (chargen outgoing) simultaneously. */ +/* RX test: dd if=/dev/zero bs=1460 count=700 | nc 9 */ +/* TX test: nc 9 /dev/null */ +/* ========================================================================= */ + +static void speed_cb(int fd, uint16_t event, void *arg) +{ + struct wolfIP *s = (struct wolfIP *)arg; + int ret; + + /* Accept new connection */ + if ((fd == speed_listen_fd) && (event & CB_EVENT_READABLE) && + (speed_client_fd == -1)) { + speed_client_fd = wolfIP_sock_accept(s, speed_listen_fd, NULL, NULL); + if (speed_client_fd > 0) { + printf("Speed: client connected (fd=%d)\n", speed_client_fd); + wolfIP_register_callback(s, speed_client_fd, speed_cb, s); + speed_rx_bytes = 0; + speed_tx_bytes = 0; + speed_start_ms = HAL_time_ms; + } + return; + } + + if (fd != speed_client_fd) + return; + + /* RX: read and discard incoming data */ + if (event & CB_EVENT_READABLE) { + ret = wolfIP_sock_recvfrom(s, speed_client_fd, rx_buf, sizeof(rx_buf), + 0, NULL, NULL); + if (ret > 0) { + speed_rx_bytes += (uint32_t)ret; + } else if (ret == 0) { + goto speed_done; + } + } + + /* TX: send pattern data when buffer has space */ + if (event & CB_EVENT_WRITABLE) { + ret = wolfIP_sock_send(s, speed_client_fd, rx_buf, sizeof(rx_buf), 0); + if (ret > 0) { + speed_tx_bytes += (uint32_t)ret; + } + } + + if (event & CB_EVENT_CLOSED) { +speed_done: + { + uint32_t elapsed = (uint32_t)(HAL_time_ms - speed_start_ms); + uint32_t rx_bps = 0, tx_bps = 0; + if (elapsed > 0) { + rx_bps = (uint32_t)((uint64_t)speed_rx_bytes * 1000U / elapsed); + tx_bps = (uint32_t)((uint64_t)speed_tx_bytes * 1000U / elapsed); + } + printf("Speed: %lu ms, RX %lu bytes (~%lu B/s), " + "TX %lu bytes (~%lu B/s)\n", + (unsigned long)elapsed, + (unsigned long)speed_rx_bytes, (unsigned long)rx_bps, + (unsigned long)speed_tx_bytes, (unsigned long)tx_bps); + } + wolfIP_sock_close(s, speed_client_fd); + speed_client_fd = -1; + } +} + +#else /* !SPEED_TEST */ + +/* ========================================================================= */ +/* TCP Echo Server Callback */ +/* ========================================================================= */ + +static void echo_cb(int fd, uint16_t event, void *arg) +{ + struct wolfIP *s = (struct wolfIP *)arg; + int ret; + + if ((fd == listen_fd) && (event & CB_EVENT_READABLE) && (client_fd == -1)) { + client_fd = wolfIP_sock_accept(s, listen_fd, NULL, NULL); + if (client_fd > 0) { + printf("Echo: client connected (fd=%d)\n", client_fd); + wolfIP_register_callback(s, client_fd, echo_cb, s); + } + return; + } + + if ((fd == client_fd) && (event & CB_EVENT_READABLE)) { + ret = wolfIP_sock_recvfrom(s, client_fd, rx_buf, sizeof(rx_buf), + 0, NULL, NULL); + if (ret > 0) { + (void)wolfIP_sock_sendto(s, client_fd, rx_buf, (uint32_t)ret, + 0, NULL, 0); + } else if (ret == 0) { + printf("Echo: client disconnected\n"); + wolfIP_sock_close(s, client_fd); + client_fd = -1; + } + } + + if ((fd == client_fd) && (event & CB_EVENT_CLOSED)) { + printf("Echo: connection closed\n"); + wolfIP_sock_close(s, client_fd); + client_fd = -1; + } +} + +#endif /* SPEED_TEST */ + +/* ========================================================================= */ +/* Main */ +/* ========================================================================= */ + +int main(void) +{ + struct wolfIP_ll_dev *ll; + struct wolfIP_sockaddr_in addr; + int ret; + + /* 1. HAL init: clocks (GPIO, IOCONFIG, CLKGEN), SysTick, IRQ router */ + HAL_Init(); + + /* 2. Update SystemCoreClock, then configure PLL for 100MHz + * PEB1 EVK has 40MHz crystal, * 2.5 = 100MHz */ + SystemCoreClockUpdate(); + (void)HAL_Clkgen_PLL(CLK_CTRL0_XTAL_N_PLL2P5X); + + /* 3. Disable Watchdog (should be disabled out of reset, but be safe) */ + VOR_WATCH_DOG->WDOGLOCK = 0x1ACCE551; + VOR_WATCH_DOG->WDOGCONTROL = 0x0; + NVIC_ClearPendingIRQ(WATCHDOG_IRQn); + + /* 4. LED on immediately to confirm code is running */ + led_init(); + led_on(); + + /* 5. UART0 for debug output */ + uart_init(); + + printf("\n\n=== wolfIP VA416xx Echo Server ===\n"); + printf("Build: " __DATE__ " " __TIME__ "\n"); + + /* 6. Configure ETH GPIO pins (MII) */ + eth_gpio_init(); + + /* 7. Enable ETH peripheral clock and release reset */ + eth_clk_init(); + + /* 8. Initialize wolfIP stack */ + wolfIP_init_static(&IPStack); + + /* 9. Initialize Ethernet driver */ + printf("Initializing Ethernet...\n"); + ll = wolfIP_getdev(IPStack); + ret = va416xx_eth_init(ll, NULL); + if (ret < 0) { + printf(" ERROR: va416xx_eth_init failed (%d)\n", ret); + } + + /* 8. IP configuration: DHCP (non-blocking) or static */ +#ifdef DHCP + printf("Starting DHCP...\n"); + /* Prime wolfIP's last_tick before starting DHCP. Without this, + * last_tick=0 but HAL_time_ms is already ~2000 (boot time elapsed), + * so the first DHCP timer expires immediately. */ + (void)wolfIP_poll(IPStack, HAL_time_ms); + (void)dhcp_client_init(IPStack); +#else + { + ip4 ip = atoip4(WOLFIP_IP); + ip4 nm = atoip4(WOLFIP_NETMASK); + ip4 gw = atoip4(WOLFIP_GW); + printf("Static IP configuration:\n"); + printf(" IP: "); uart_putip4(ip); printf("\n"); + printf(" Mask: "); uart_putip4(nm); printf("\n"); + printf(" GW: "); uart_putip4(gw); printf("\n"); + wolfIP_ipconfig_set(IPStack, ip, nm, gw); + } +#endif + + /* Create TCP services */ + memset(&addr, 0, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = 0; + +#ifdef SPEED_TEST + printf("=== Speed Test Mode ===\n"); + + /* Speed test service on port 9 (RX + TX throughput) */ + printf("Creating TCP speed test service on port %d...\n", SPEED_PORT); + speed_listen_fd = wolfIP_sock_socket(IPStack, AF_INET, + IPSTACK_SOCK_STREAM, 0); + wolfIP_register_callback(IPStack, speed_listen_fd, speed_cb, IPStack); + addr.sin_port = ee16(SPEED_PORT); + (void)wolfIP_sock_bind(IPStack, speed_listen_fd, + (struct wolfIP_sockaddr *)&addr, sizeof(addr)); + (void)wolfIP_sock_listen(IPStack, speed_listen_fd, 1); + + printf("Ready! Test with:\n"); + printf(" ping \n"); + printf(" dd if=/dev/zero bs=1460 count=700 | nc 9 (RX test)\n"); + printf(" nc 9 /dev/null (TX test)\n"); +#else + /* Echo server on port 7 */ + printf("Creating TCP echo server on port %d...\n", ECHO_PORT); + listen_fd = wolfIP_sock_socket(IPStack, AF_INET, IPSTACK_SOCK_STREAM, 0); + wolfIP_register_callback(IPStack, listen_fd, echo_cb, IPStack); + addr.sin_port = ee16(ECHO_PORT); + (void)wolfIP_sock_bind(IPStack, listen_fd, + (struct wolfIP_sockaddr *)&addr, sizeof(addr)); + (void)wolfIP_sock_listen(IPStack, listen_fd, 1); + + printf("Ready! Test with:\n"); + printf(" ping \n"); + printf(" echo 'hello' | nc 7\n"); +#endif + printf("\nEntering main loop...\n"); + +#ifdef TX_SELFTEST + /* TX Self-Test: send gratuitous ARP frames directly via ll->send() to + * exercise the TX path at startup before any external traffic arrives. + * The per-TX diagnostic in eth_send() will print TXFSTS (TX FIFO fill + * level) at 3 time points: pre-kick, +10µs, +500µs. + * If hw_tx > 0 after this, the MAC is transmitting to the wire. + * If hw_tx == 0, the diagnostic TXFSTS values tell us where it breaks. */ + { + uint8_t garp[42]; + ip4 self_ip = 0, dummy_nm = 0, dummy_gw = 0; + int i; + + wolfIP_ipconfig_get(IPStack, &self_ip, &dummy_nm, &dummy_gw); + + /* Ethernet header: broadcast dst, our src MAC, ARP ethertype */ + memset(garp, 0xFF, 6); /* dst: broadcast */ + memcpy(garp + 6, ll->mac, 6); /* src: our MAC */ + garp[12] = 0x08; garp[13] = 0x06; /* ethertype: ARP (0x0806) */ + + /* ARP payload (28 bytes, gratuitous request) */ + garp[14] = 0x00; garp[15] = 0x01; /* htype = Ethernet */ + garp[16] = 0x08; garp[17] = 0x00; /* ptype = IPv4 */ + garp[18] = 6; /* hlen = 6 */ + garp[19] = 4; /* plen = 4 */ + garp[20] = 0x00; garp[21] = 0x01; /* op = ARP request */ + memcpy(garp + 22, ll->mac, 6); /* sha = our MAC */ + garp[28] = (uint8_t)((self_ip >> 24) & 0xFF); /* spa = our IP */ + garp[29] = (uint8_t)((self_ip >> 16) & 0xFF); + garp[30] = (uint8_t)((self_ip >> 8) & 0xFF); + garp[31] = (uint8_t)( self_ip & 0xFF); + memset(garp + 32, 0, 6); /* tha = 0:0:0:0:0:0 */ + garp[38] = garp[28]; garp[39] = garp[29]; /* tpa = our IP */ + garp[40] = garp[30]; garp[41] = garp[31]; + + printf("TX Self-Test: sending 3 gratuitous ARP frames via ll->send()\n"); + printf(" self_ip=%lu.%lu.%lu.%lu src_mac=%02X:%02X:%02X:%02X:%02X:%02X\n", + (unsigned long)((self_ip >> 24) & 0xFF), + (unsigned long)((self_ip >> 16) & 0xFF), + (unsigned long)((self_ip >> 8) & 0xFF), + (unsigned long)( self_ip & 0xFF), + ll->mac[0], ll->mac[1], ll->mac[2], + ll->mac[3], ll->mac[4], ll->mac[5]); + + for (i = 0; i < 3; i++) { + int r = ll->send(ll, garp, 42); + printf(" send[%d] = %d\n", i, r); + /* ~50ms delay (at 100MHz: 5M cycles) */ + for (volatile uint32_t d = 0; d < 5000000U; d++) { } + } + + /* Wait ~200ms then sample MAC MMC counters */ + for (volatile uint32_t d = 0; d < 20000000U; d++) { } + { + uint32_t mac_cfg2, mac_dbg2, hw_tx2; + uint32_t dma_st2; + va416xx_eth_get_mac_diag(&mac_cfg2, &mac_dbg2, &hw_tx2); + dma_st2 = va416xx_eth_get_dma_status(); + printf(" Post self-test: hw_tx=%lu dbg=0x%08lX dma=0x%08lX TS=%lu\n", + (unsigned long)hw_tx2, + (unsigned long)mac_dbg2, + (unsigned long)dma_st2, + (unsigned long)((dma_st2 >> 20) & 0x7U)); + if (hw_tx2 > 0) + printf(" *** TX OK: MAC IS TRANSMITTING - issue is MII/PHY ***\n"); + else + printf(" *** TX FAIL: hw_tx=0 - DMA->MAC TX FIFO path broken ***\n"); + } + + /* --- MMC Counter Sanity Check --- + * RXFRAMECOUNT_GB should be > 0 because we received external ARP/etc + * traffic (rx_pkt_count > 0). If RXFRAMECOUNT_GB is also 0, the MMC + * counters are frozen/broken and TXFRAMECOUNT_GB (hw_tx) cannot be + * trusted either. If RXFRAMECOUNT_GB > 0, MMC counters work and TX + * is truly silent. */ + { + uint32_t rx_frames_gb = VOR_ETH->RXFRAMECOUNT_GB; + uint32_t tx_octs_gb = VOR_ETH->TXOCTETCOUNT_GB; + uint32_t tx_under = VOR_ETH->TXUNDERERR; + uint32_t tx_carrier = VOR_ETH->TXCARRIERERROR; + uint32_t tx_latecol = VOR_ETH->TXLATECOL; + uint32_t mmc_ctrl = VOR_ETH->MMC_CNTRL; + printf(" MMC_CNTRL=0x%02lX RXFRAMES_GB=%lu TXOCTS_GB=%lu " + "TXUNDERERR=%lu TXCARRIER=%lu TXLATECOL=%lu\n", + (unsigned long)mmc_ctrl, + (unsigned long)rx_frames_gb, + (unsigned long)tx_octs_gb, + (unsigned long)tx_under, + (unsigned long)tx_carrier, + (unsigned long)tx_latecol); + if (rx_frames_gb == 0) + printf(" *** WARNING: RXFRAMES_GB=0 - MMC counters may be frozen ***\n"); + else + printf(" MMC: RXFRAMES_GB=%lu TXOCTS_GB=%lu\n", + (unsigned long)rx_frames_gb, (unsigned long)tx_octs_gb); + } + + /* --- MAC Internal Loopback Test --- + * Enable MAC_CONFIG.LM (bit 12): TX data loops internally back to RX + * without going through the MII/PHY. In loopback mode the DWC GMAC + * uses an internal clock, so missing TXCLK from the PHY is irrelevant. + * + * If rx_pkt_count increases after ll->send() in loopback mode: + * TX works internally => problem is TXCLK/MII/PHY (external path) + * If rx_pkt_count stays flat: + * MAC TX engine broken even internally (DMA->FIFO or FIFO->MAC TX) */ + { + uint32_t dummy_p, rx_before = 0, dummy_tx, dummy_e; + int lbk_i; + + va416xx_eth_get_stats(&dummy_p, &rx_before, &dummy_tx, &dummy_e); + + /* Enable MAC loopback */ + VOR_ETH->MAC_CONFIG |= ETH_MAC_CONFIG_LM_Msk; + __DSB(); + { volatile uint32_t _d; for (_d = 0; _d < 100000U; _d++) { } } /* ~1ms */ + + printf(" MAC Loopback test (LM=1): rx_before=%lu\n", + (unsigned long)rx_before); + for (lbk_i = 0; lbk_i < 3; lbk_i++) { + ll->send(ll, garp, 42); + { volatile uint32_t _d; for (_d = 0; _d < 1000000U; _d++) { } } /* ~10ms */ + (void)wolfIP_poll(IPStack, HAL_time_ms); + } + /* Extra settle + poll */ + { volatile uint32_t _d; for (_d = 0; _d < 5000000U; _d++) { } } /* ~50ms */ + (void)wolfIP_poll(IPStack, HAL_time_ms); + + { + uint32_t rx_after = 0; + va416xx_eth_get_stats(NULL, &rx_after, NULL, NULL); + printf(" MAC Loopback result: rx_after=%lu delta=%lu\n", + (unsigned long)rx_after, + (unsigned long)(rx_after - rx_before)); + if (rx_after > rx_before) + printf(" *** LOOPBACK OK: MAC TX works! Problem is TXCLK/MII/PHY ***\n"); + else + printf(" *** LOOPBACK FAIL: MAC TX broken even in internal mode ***\n"); + } + + /* Disable MAC loopback */ + VOR_ETH->MAC_CONFIG &= ~ETH_MAC_CONFIG_LM_Msk; + __DSB(); + } + + /* === PHY Register Dump === + * Read key PHY registers post-AN to confirm speed and link state. + * BMSR bit2=link, bit5=AN_done. BMCR bit13=speed100, bit8=FD. + * PHY_CTRL2 bits[6:4]=OpMode: 1=10HD 2=100HD 5=10FD 6=100FD. + * Isolate (BMCR bit10) or PowerDown (BMCR bit11) would suppress TXCLK. */ + { +#define MDIO_S() do { volatile uint32_t _s; for (_s=0; _s<5000U; _s++) {} } while(0) + uint16_t bmcr, bmsr, an_adv, an_lpa, ctrl1, ctrl2; + HAL_ReadPhyReg(PHY_CONTROL_REG, &bmcr); MDIO_S(); + HAL_ReadPhyReg(PHY_CONTROL_REG, &bmcr); MDIO_S(); + HAL_ReadPhyReg(PHY_STATUS_REG, &bmsr); MDIO_S(); + HAL_ReadPhyReg(PHY_STATUS_REG, &bmsr); MDIO_S(); + HAL_ReadPhyReg(PHY_AN_ADV_REG, &an_adv); MDIO_S(); + HAL_ReadPhyReg(PHY_AN_ADV_REG, &an_adv); MDIO_S(); + HAL_ReadPhyReg(PHY_LNK_PART_ABl_REG, &an_lpa); MDIO_S(); + HAL_ReadPhyReg(PHY_LNK_PART_ABl_REG, &an_lpa); MDIO_S(); + HAL_ReadPhyReg(PHY_CONTROL_ONE, &ctrl1); MDIO_S(); + HAL_ReadPhyReg(PHY_CONTROL_ONE, &ctrl1); MDIO_S(); + HAL_ReadPhyReg(PHY_CONTROL_TWO, &ctrl2); MDIO_S(); + HAL_ReadPhyReg(PHY_CONTROL_TWO, &ctrl2); MDIO_S(); +#undef MDIO_S + printf(" PHY BMCR=0x%04X BMSR=0x%04X AN_ADV=0x%04X AN_LPA=0x%04X\n", + bmcr, bmsr, an_adv, an_lpa); + printf(" PHY CTRL1=0x%04X CTRL2=0x%04X OpMode[6:4]=%u\n", + ctrl1, ctrl2, (unsigned)((ctrl2 >> 4) & 7U)); + printf(" PHY: link=%u AN_done=%u isolate=%u pwrdn=%u speed100=%u FD=%u\n", + (unsigned)((bmsr >> 2) & 1), + (unsigned)((bmsr >> 5) & 1), + (unsigned)((bmcr >> 10) & 1), + (unsigned)((bmcr >> 11) & 1), + (unsigned)((bmcr >> 13) & 1), + (unsigned)((bmcr >> 8) & 1)); + } + + /* === GPIO Pin Activity Scan === + * Identify which ETH pins carry PHY-driven clocks (TXCLK, RXCLK). + * Each pin is briefly switched to GPIO input (funsel=0), sampled + * 64 times in a tight loop (~2µs at 100MHz), then restored to + * funsel=1. At 2.5MHz TXCLK (10Mbps), 64 samples span ~5 clock + * periods, enough to see both high and low states. + * Pins marked <> have both high and low samples. */ + printf(" GPIO Pin Activity Scan (detecting TXCLK/RXCLK):\n"); + { + uint32_t pin, i, ones, zeros; + /* PORTA[8:15] */ + for (pin = 8; pin <= 15; pin++) { + HAL_Iocfg_PinMux(PORTA, pin, 0); + PORTA->DIR &= ~(1U << pin); + { volatile uint32_t _s; for (_s = 0; _s < 20U; _s++) {} } + ones = 0; zeros = 0; + for (i = 0; i < 64U; i++) { + if (PORTA->DATAIN & (1U << pin)) ones++; else zeros++; + } + HAL_Iocfg_PinMux(PORTA, pin, 1); + { volatile uint32_t _s; for (_s = 0; _s < 20U; _s++) {} } + printf(" PA%02lu: hi=%02lu lo=%02lu%s\n", + (unsigned long)pin, (unsigned long)ones, (unsigned long)zeros, + (ones > 0 && zeros > 0) ? " <>" : ""); + } + /* PORTB[0:10] */ + for (pin = 0; pin <= 10; pin++) { + HAL_Iocfg_PinMux(PORTB, pin, 0); + PORTB->DIR &= ~(1U << pin); + { volatile uint32_t _s; for (_s = 0; _s < 20U; _s++) {} } + ones = 0; zeros = 0; + for (i = 0; i < 64U; i++) { + if (PORTB->DATAIN & (1U << pin)) ones++; else zeros++; + } + HAL_Iocfg_PinMux(PORTB, pin, 1); + { volatile uint32_t _s; for (_s = 0; _s < 20U; _s++) {} } + printf(" PB%02lu: hi=%02lu lo=%02lu%s\n", + (unsigned long)pin, (unsigned long)ones, (unsigned long)zeros, + (ones > 0 && zeros > 0) ? " <>" : ""); + } + } + + /* === FES=1 TX Attempt === + * If PHY negotiated 100M (TXCLK=25MHz) but MAC has FES=0 (expects + * 2.5MHz), the TX clock domain is mismatched. Try forcing FES=1 + * (100M) and see if a frame exits the MAC (TXFRAMECOUNT_GB delta). */ + { + uint32_t hw_tx_before = 0, hw_tx_after = 0; + uint32_t cfg_save; + va416xx_eth_get_mac_diag(NULL, NULL, &hw_tx_before); + cfg_save = VOR_ETH->MAC_CONFIG; + VOR_ETH->MAC_CONFIG = cfg_save | ETH_MAC_CONFIG_FES_Msk; + __DSB(); + printf(" FES=1 TX attempt: MAC_CONFIG=0x%08lX (FES=%lu)\n", + (unsigned long)VOR_ETH->MAC_CONFIG, + (unsigned long)!!(VOR_ETH->MAC_CONFIG & ETH_MAC_CONFIG_FES_Msk)); + ll->send(ll, garp, 42); + { volatile uint32_t _d; for (_d = 0; _d < 20000000U; _d++) {} } + va416xx_eth_get_mac_diag(NULL, NULL, &hw_tx_after); + printf(" FES=1: hw_tx before=%lu after=%lu delta=%lu%s\n", + (unsigned long)hw_tx_before, (unsigned long)hw_tx_after, + (unsigned long)(hw_tx_after - hw_tx_before), + (hw_tx_after > hw_tx_before) ? " *** TX WORKS AT FES=1 ***" : " (still silent)"); + VOR_ETH->MAC_CONFIG = cfg_save; /* restore original speed */ + __DSB(); + } + } +#endif /* TX_SELFTEST */ + + /* 10. Main loop — use HAL_time_ms (SysTick-based, 10ms resolution) + * so wolfIP timers (TCP, ARP, etc.) run in real wall-clock time. */ + { + uint64_t last_led_ms = 0; + uint64_t last_diag_ms = 0; +#ifdef DHCP + uint64_t dhcp_start_ms = HAL_time_ms; + uint64_t dhcp_reinit_ms = HAL_time_ms; + int dhcp_done = 0; +#endif + + for (;;) { + uint64_t now = HAL_time_ms; + (void)wolfIP_poll(IPStack, now); + +#ifdef DHCP + /* Non-blocking DHCP handling. + * + * wolfIP's internal DHCP state machine gives up after ~8s + * (3 retries × 2s). When state goes to DHCP_OFF, the UDP + * socket stops accepting unicast DHCP responses (the + * DHCP_IS_RUNNING check in udp_process fails). + * + * We periodically re-init DHCP (every 15s) to restart the + * state machine and keep the UDP socket accepting responses. + * Must space re-inits apart to avoid socket churn (the + * close/reopen cycle can lose in-flight responses). */ + if (!dhcp_done) { + if (dhcp_bound(IPStack)) { + ip4 ip = 0, nm = 0, gw = 0; + wolfIP_ipconfig_get(IPStack, &ip, &nm, &gw); + printf("DHCP bound:\n"); + printf(" IP: "); uart_putip4(ip); printf("\n"); + printf(" Mask: "); uart_putip4(nm); printf("\n"); + printf(" GW: "); uart_putip4(gw); printf("\n"); + dhcp_done = 1; + } else if ((now - dhcp_start_ms) > DHCP_TIMEOUT_MS) { + /* Final timeout: check for partial IP from DHCP offer */ + ip4 ip = 0, nm = 0, gw = 0; + wolfIP_ipconfig_get(IPStack, &ip, &nm, &gw); + if (ip != 0) { + printf("DHCP assigned IP:\n"); + } else { + printf("DHCP timeout, using static IP\n"); + ip = atoip4("10.0.4.90"); + nm = atoip4("255.255.255.0"); + gw = atoip4("10.0.4.1"); + wolfIP_ipconfig_set(IPStack, ip, nm, gw); + } + printf(" IP: "); uart_putip4(ip); printf("\n"); + printf(" Mask: "); uart_putip4(nm); printf("\n"); + printf(" GW: "); uart_putip4(gw); printf("\n"); + dhcp_done = 1; + } else if ((now - dhcp_reinit_ms) > DHCP_REINIT_MS) { + /* Re-init DHCP if internal state machine expired. + * dhcp_client_init only succeeds when state==DHCP_OFF, + * so this is a no-op while DHCP is still active. */ + (void)dhcp_client_init(IPStack); + dhcp_reinit_ms = now; + } + } +#endif + + /* LED heartbeat: toggle every ~2 seconds */ + if ((now - last_led_ms) >= 2000U) { + led_toggle(); + last_led_ms = now; + } + + /* Periodic diagnostics every ~10 seconds */ + if ((now - last_diag_ms) >= 10000U) { + uint32_t polls, pkts, tx_pkts, tx_errs; + uint32_t mac_cfg, mac_dbg, hw_tx, dma_st; + va416xx_eth_get_stats(&polls, &pkts, &tx_pkts, &tx_errs); + va416xx_eth_get_mac_diag(&mac_cfg, &mac_dbg, &hw_tx); + dma_st = va416xx_eth_get_dma_status(); + /* mac_dbg full 32-bit: TX bits are in [22:16] (TWCSTS=22, + * TRCSTS=[21:20], TFCSTS=[18:17], TPESTS=16). + * Masking with 0xFFFF would hide all TX activity. */ + printf("[%lu] rx=%lu tx=%lu/%lu hw_tx=%lu " + "cfg=0x%04lX dbg=0x%08lX " + "dma=0x%08lX TS=%lu\n", + (unsigned long)(now / 1000U), + (unsigned long)pkts, + (unsigned long)tx_pkts, + (unsigned long)tx_errs, + (unsigned long)hw_tx, + (unsigned long)(mac_cfg & 0xFFFF), + (unsigned long)mac_dbg, + (unsigned long)dma_st, + (unsigned long)((dma_st >> 20) & 0x7U)); + last_diag_ms = now; + } + } + } + + return 0; +} diff --git a/src/port/va416xx/startup.c b/src/port/va416xx/startup.c new file mode 100644 index 0000000..f1649a5 --- /dev/null +++ b/src/port/va416xx/startup.c @@ -0,0 +1,53 @@ +/* startup.c + * + * Copyright (C) 2026 wolfSSL Inc. + * + * This file is part of wolfIP TCP/IP stack. + * + * wolfIP is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfIP is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ +#include + +extern uint32_t _sidata; +extern uint32_t _sdata; +extern uint32_t _edata; +extern uint32_t _sbss; +extern uint32_t _ebss; +extern uint32_t _sdma_bss; +extern uint32_t _edma_bss; +extern void __libc_init_array(void); + +int main(void); + +void Reset_Handler(void) +{ + uint32_t *src; + uint32_t *dst; + + src = &_sidata; + for (dst = &_sdata; dst < &_edata; ) { + *dst++ = *src++; + } + for (dst = &_sbss; dst < &_ebss; ) { + *dst++ = 0u; + } + /* Zero DMA buffers in RAM1 (system bus, 0x20000000+) */ + for (dst = &_sdma_bss; dst < &_edma_bss; ) { + *dst++ = 0u; + } + __libc_init_array(); + (void)main(); + while (1) { } +} diff --git a/src/port/va416xx/syscalls.c b/src/port/va416xx/syscalls.c new file mode 100644 index 0000000..3739893 --- /dev/null +++ b/src/port/va416xx/syscalls.c @@ -0,0 +1,147 @@ +/* syscalls.c + * + * Minimal newlib stubs for VA416xx bare-metal. + * _write routes to UART0 via SDK HAL for debug output. + * + * Copyright (C) 2026 wolfSSL Inc. + * + * This file is part of wolfIP TCP/IP stack. + * + * wolfIP is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfIP is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ +#include +#include +#include +#include +#include +#include + +#include "va416xx.h" +#include "va416xx_hal_uart.h" + +extern uint32_t _ebss; +extern uint32_t _estack; + +static char *heap_end; + +int _write(int file, const char *ptr, int len) +{ + (void)file; + HAL_Uart_TxStrN(VOR_UART0, ptr, len); + return len; +} + +int _close(int file) +{ + (void)file; + return -1; +} + +int _fstat(int file, struct stat *st) +{ + (void)file; + if (st == 0) { + errno = EINVAL; + return -1; + } + st->st_mode = S_IFCHR; + return 0; +} + +int _isatty(int file) +{ + (void)file; + return 1; +} + +int _lseek(int file, int ptr, int dir) +{ + (void)file; + (void)ptr; + (void)dir; + return 0; +} + +int _read(int file, char *ptr, int len) +{ + (void)file; + (void)ptr; + (void)len; + return 0; +} + +void *_sbrk(ptrdiff_t incr) +{ + char *prev; + if (heap_end == 0) { + heap_end = (char *)&_ebss; + } + prev = heap_end; + if ((heap_end + incr) >= (char *)&_estack) { + errno = ENOMEM; + return (void *)-1; + } + heap_end += incr; + return prev; +} + +int _gettimeofday(struct timeval *tv, void *tzvp) +{ + (void)tzvp; + if (tv == 0) { + errno = EINVAL; + return -1; + } + tv->tv_sec = 0; + tv->tv_usec = 0; + return 0; +} + +time_t time(time_t *t) +{ + if (t != 0) { + *t = 0; + } + return 0; +} + +void _exit(int status) +{ + (void)status; + while (1) { + __asm volatile("wfi"); + } +} + +int _kill(int pid, int sig) +{ + (void)pid; + (void)sig; + errno = EINVAL; + return -1; +} + +int _getpid(void) +{ + return 1; +} + +void _init(void) +{ +} + +void _fini(void) +{ +} diff --git a/src/port/va416xx/target.ld b/src/port/va416xx/target.ld new file mode 100644 index 0000000..f17f832 --- /dev/null +++ b/src/port/va416xx/target.ld @@ -0,0 +1,90 @@ +/* VA416xx Linker Script + * Flash: 256KB at 0x00000000 + * SRAM: 64KB total, split across two bus domains: + * RAM0: 32KB @ 0x1FFF8000 (code bus / D-Code bus) - CPU only + * RAM1: 32KB @ 0x20000000 (system bus / AHB) - CPU + DMA + * + * Both regions are contiguous from the CPU's perspective (64KB total). + * The Ethernet DMA is an AHB system bus master and can ONLY access + * memory at 0x20000000+. All DMA descriptors and buffers are placed + * in the .dma_bss section which is forced to start at 0x20000000. + */ +MEMORY +{ + FLASH (rx) : ORIGIN = 0x00000000, LENGTH = 256K + RAM (rwx) : ORIGIN = 0x1FFF8000, LENGTH = 64K +} + +_estack = ORIGIN(RAM) + LENGTH(RAM); +_sidata = LOADADDR(.data); + +SECTIONS +{ + .isr_vector : + { + KEEP(*(.isr_vector)) + } > FLASH + + .text : + { + *(.text*) + *(.rodata*) + *(.ARM.extab* .gnu.linkonce.armextab.*) + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + *(.glue_7) + *(.glue_7t) + *(.eh_frame) + } > FLASH + + .preinit_array : + { + __preinit_array_start = .; + KEEP(*(.preinit_array*)) + __preinit_array_end = .; + } > FLASH + + .init_array : + { + __init_array_start = .; + KEEP(*(.init_array*)) + __init_array_end = .; + } > FLASH + + .fini_array : + { + __fini_array_start = .; + KEEP(*(.fini_array*)) + __fini_array_end = .; + } > FLASH + + .data : + { + _sdata = .; + *(.data*) + _edata = .; + } > RAM AT > FLASH + + .bss (NOLOAD) : + { + _sbss = .; + *(.bss*) + *(COMMON) + _ebss = .; + } > RAM + + /* Force location counter to system bus boundary (0x20000000). + * DMA descriptors and buffers MUST be at 0x20000000+ for the + * Ethernet DMA controller (AHB system bus master) to access them. */ + . = MAX(., 0x20000000); + + .dma_bss (NOLOAD) : + { + . = ALIGN(16); + _sdma_bss = .; + *(.dma_bss*) + _edma_bss = .; + } > RAM + + /* Verify DMA buffers are in system bus region */ + ASSERT(_sdma_bss >= 0x20000000, "ERROR: .dma_bss must be at 0x20000000+ for Ethernet DMA access") +} diff --git a/src/port/va416xx/va416xx_eth.c b/src/port/va416xx/va416xx_eth.c new file mode 100644 index 0000000..7dec275 --- /dev/null +++ b/src/port/va416xx/va416xx_eth.c @@ -0,0 +1,885 @@ +/* va416xx_eth.c + * + * VA416xx Ethernet driver for wolfIP + * Synopsys DesignWare GMAC with normal (legacy) descriptor format + * + * Copyright (C) 2026 wolfSSL Inc. + * + * This file is part of wolfIP TCP/IP stack. + * + * wolfIP is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfIP is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ +#include +#include +#include +#include "config.h" +#include "va416xx_eth.h" +#include "va416xx.h" +#include "va416xx_hal_ethernet.h" + +/* Define DEBUG_ETH to enable verbose hardware diagnostic output. + * Without it, only essential status messages (link up/down, errors, + * speed/duplex result) are printed. */ +#ifdef DEBUG_ETH +#define ETH_DEBUG(...) printf(__VA_ARGS__) +#else +#define ETH_DEBUG(...) do {} while(0) +#endif + +/* ========================================================================= */ +/* Normal (Legacy) DMA Descriptor Bit Definitions */ +/* */ +/* The VA416xx uses the Synopsys DesignWare GMAC with normal descriptors. */ +/* This is fundamentally different from the STM32H5 enhanced/QoS format: */ +/* - TX: OWN in des0 (doorbell); FS/LS/IC/TCH/size in des1 (control) */ +/* - RX: OWN/FL/status in des0; RER/size in des1 */ +/* - Buffer address is in des2 (not des0) */ +/* - Ring wraps via TER/RER bits (not tail pointer registers) */ +/* - DMA kicked via poll demand registers (not tail pointer updates) */ +/* ========================================================================= */ + +/* --- TX Descriptor TDES0 bits --- + * Normal (non-enhanced) DWC GMAC descriptor format: + * TDES0 contains ONLY the OWN bit (bit 31) for CPU→DMA handoff. + * All other TDES0 bits are TX STATUS written back by DMA on completion + * (error flags, CC, UF, etc.). They must NOT be pre-loaded by the CPU. + * + * NOTE: The "ALTDESCRIPTOR" (enhanced) format puts FS/LS/IC/TCH/TER + * in TDES0 instead. This hardware uses the normal format — confirmed + * by observing that setting those bits in TDES0 causes the DMA to + * advance linearly (ignoring des3) and never transmit any frames. */ +#define TDES0_OWN (1U << 31) /* DMA owns descriptor (doorbell) */ +/* Bits [30:0] are TX status written back by DMA — do not set on TX submit */ + +/* --- TX Descriptor TDES1 bits (Control, normal descriptor format) --- + * In the normal DWC GMAC descriptor format, TX frame control belongs + * in TDES1 (dmamac_cntl). This matches the u-boot designware.c driver + * (non-ALTDESCRIPTOR path). */ +#define TDES1_IC (1U << 31) /* Interrupt on Completion */ +#define TDES1_LS (1U << 30) /* Last Segment */ +#define TDES1_FS (1U << 29) /* First Segment */ +#define TDES1_DC (1U << 28) /* Disable CRC */ +#define TDES1_DP (1U << 27) /* Disable Padding */ +#define TDES1_TER (1U << 25) /* Transmit End of Ring */ +#define TDES1_TCH (1U << 24) /* Second Address Chained (chain mode) */ +#define TDES1_TBS2_SHIFT 11 +#define TDES1_TBS2_MASK 0x003FF800U /* Buffer 2 Size [21:11] */ +#define TDES1_TBS1_MASK 0x000007FFU /* Buffer 1 Size [10:0] */ + +/* --- RX Descriptor RDES0 bits --- */ +#define RDES0_OWN (1U << 31) /* DMA owns descriptor */ +#define RDES0_AFM (1U << 30) /* DA Filter Fail */ +#define RDES0_FL_SHIFT 16 /* Frame Length shift */ +#define RDES0_FL_MASK 0x3FFF0000U /* Frame Length [29:16] */ +#define RDES0_ES (1U << 15) /* Error Summary */ +#define RDES0_FS (1U << 9) /* First Descriptor */ +#define RDES0_LS (1U << 8) /* Last Descriptor */ + +/* --- RX Descriptor RDES1 bits --- */ +#define RDES1_DIC (1U << 31) /* Disable Interrupt on Completion */ +#define RDES1_RER (1U << 25) /* Receive End of Ring */ +#define RDES1_RCH (1U << 24) /* Second Address Chained */ +#define RDES1_RBS2_SHIFT 11 +#define RDES1_RBS2_MASK 0x003FF800U /* Buffer 2 Size [21:11] */ +#define RDES1_RBS1_MASK 0x000007FFU /* Buffer 1 Size [10:0] */ + +/* --- KSZ8041TL PHY-specific registers --- */ +/* PHY Control 2 register (reg 0x1F), encoded for SDK HAL (reg_num << 6). + * Bits [6:4] = Operation Mode Indication after AN: + * 001 = 10BASE-T HD, 010 = 100BASE-TX HD, + * 101 = 10BASE-T FD, 110 = 100BASE-TX FD */ +#define PHY_CTRL2_REG (0x1FU << 6) /* reg 0x1F, GMII encoding */ +#define PHY_CTRL2_OPMODE_SHIFT 4 +#define PHY_CTRL2_OPMODE_MASK 0x7U +#define PHY_CTRL2_OPMODE_10HD 1U +#define PHY_CTRL2_OPMODE_100HD 2U +#define PHY_CTRL2_OPMODE_10FD 5U +#define PHY_CTRL2_OPMODE_100FD 6U + +/* ========================================================================= */ +/* DMA Descriptor Structure (4 x 32-bit words, 16 bytes) */ +/* ========================================================================= */ + +struct eth_desc { + volatile uint32_t des0; /* TX: status/OWN/FS/LS RX: status/OWN/FL */ + volatile uint32_t des1; /* TX: TBS1/TBS2 RX: RBS1/RBS2/RER */ + volatile uint32_t des2; /* Buffer 1 address */ + volatile uint32_t des3; /* Buffer 2 / next descriptor address */ +}; + +/* ========================================================================= */ +/* Static Buffers and Descriptor Rings */ +/* ========================================================================= */ + +#define RX_DESC_COUNT 3U +#define TX_DESC_COUNT 3U +#define RX_BUF_SIZE LINK_MTU +#define TX_BUF_SIZE LINK_MTU +#define FRAME_MIN_LEN 60U + +/* All DMA descriptors and buffers MUST be in .dma_bss (RAM1, 0x20000000+). + * The Ethernet DMA is an AHB system bus master and cannot access RAM0 + * (0x1FFF8000, code bus / D-Code bus). */ +static struct eth_desc rx_ring[RX_DESC_COUNT] + __attribute__((aligned(16), section(".dma_bss"))); +static struct eth_desc tx_ring[TX_DESC_COUNT] + __attribute__((aligned(16), section(".dma_bss"))); +static uint8_t rx_buffers[RX_DESC_COUNT][RX_BUF_SIZE] + __attribute__((aligned(4), section(".dma_bss"))); +static uint8_t tx_buffers[TX_DESC_COUNT][TX_BUF_SIZE] + __attribute__((aligned(4), section(".dma_bss"))); +static uint8_t rx_staging_buffer[RX_BUF_SIZE] + __attribute__((aligned(4), section(".dma_bss"))); + +static uint32_t rx_idx; +static uint32_t tx_idx; + +static uint32_t rx_poll_count; +static uint32_t rx_pkt_count; +static uint32_t tx_pkt_count; +static uint32_t tx_err_count; + +/* ========================================================================= */ +/* Hardware Reset */ +/* ========================================================================= */ + +static int eth_hw_reset(void) +{ + uint32_t timeout = 1000000U; + + /* DMA software reset */ + VOR_ETH->DMA_BUS_MODE |= ETH_DMA_BUS_MODE_SWR_Msk; + while ((VOR_ETH->DMA_BUS_MODE & ETH_DMA_BUS_MODE_SWR_Msk) && (timeout > 0U)) { + timeout--; + } + return (timeout > 0U) ? 0 : -1; +} + +/* ========================================================================= */ +/* MAC Configuration */ +/* ========================================================================= */ + +static void eth_config_mac(const uint8_t *mac) +{ + /* Set MAC address via SDK HAL */ + HAL_SetMacAddr((uint8_t *)mac); + + /* Initial MAC config: PS=1 (MII port select), ACS (Auto Pad/CRC Strip), + * IPC (IP Checksum). Speed (FES) and duplex (DM) are configured later + * by eth_config_speed_duplex() based on actual PHY negotiation result. */ + VOR_ETH->MAC_CONFIG = ETH_MAC_CONFIG_PS_Msk | + ETH_MAC_CONFIG_ACS_Msk | + ETH_MAC_CONFIG_IPC_Msk; + + /* Frame filter: promiscuous for initial bring-up */ + VOR_ETH->MAC_FRAME_FLTR = ETH_MAC_FRAME_FLTR_PR_Msk; +} + +/* ========================================================================= */ +/* MDIO Settling Delay */ +/* */ +/* The SDK HAL's MDIO busy-wait loop has inverted polarity: it checks */ +/* !(GB) instead of (GB), so it exits immediately after setting GB=1 */ +/* rather than waiting for the MDIO transaction to complete (~27 µs). */ +/* Back-to-back HAL_ReadPhyReg / HAL_WritePhyReg calls overlap on the */ +/* wire: a new transaction starts while the previous one is still in */ +/* progress. This can silently corrupt reads and lose writes entirely. */ +/* */ +/* Workaround: insert a ~50 µs delay after every MDIO operation to */ +/* guarantee the previous transaction has finished before starting the */ +/* next one. At 100 MHz, 5000 loop iterations ≈ 50 µs. */ +/* ========================================================================= */ + +static void mdio_settle(void) +{ + volatile uint32_t d; + for (d = 0; d < 5000U; d++) { } +} + +/* ========================================================================= */ +/* Speed/Duplex Configuration from PHY */ +/* ========================================================================= */ + +static void eth_config_speed_duplex(void) +{ + uint32_t maccr, readback; + uint16_t ctrl2; + uint32_t opmode; + int full_duplex, speed_100; + int actual_100; + + /* Read KSZ8041TL PHY Control 2 (reg 0x1F) to determine the actual + * negotiated speed/duplex. The BMCR speed/duplex bits are unreliable + * after AN; PHY Ctrl2 OpMode[6:4] is the authoritative source. */ + HAL_ReadPhyReg(PHY_CTRL2_REG, &ctrl2); + mdio_settle(); + HAL_ReadPhyReg(PHY_CTRL2_REG, &ctrl2); /* double-read */ + mdio_settle(); + opmode = (ctrl2 >> PHY_CTRL2_OPMODE_SHIFT) & PHY_CTRL2_OPMODE_MASK; + full_duplex = (opmode == PHY_CTRL2_OPMODE_10FD || + opmode == PHY_CTRL2_OPMODE_100FD); + speed_100 = (opmode == PHY_CTRL2_OPMODE_100HD || + opmode == PHY_CTRL2_OPMODE_100FD); + ETH_DEBUG(" PHY Ctrl2: 0x%04X OpMode=%lu\n", + ctrl2, (unsigned long)opmode); + + /* Configure MAC speed and duplex. + * PS=1: MII port select (required for VA416xx 10/100 MAC) + * FES: 1=100Mbps, 0=10Mbps (may be read-only on some variants) + * DM: Always set to Full Duplex. In Half Duplex mode the DWC GMAC + * checks CRS before transmitting; CRS is unreliable on this + * silicon so MAC defers indefinitely in HD mode. + * DCRS=1: Explicitly disable CRS check. This is a no-op in FD mode + * but provides defense-in-depth in case DM is somehow ignored. + * ACS: Auto pad/CRC strip (RX) + * IPC: IP checksum offload (RX) + * LM: Loopback mode — compile with -DMAC_LOOPBACK_TEST to enable. + * Loops TX back to RX internally (bypasses MII). Used to verify + * whether the DMA→MAC TX path works independently of the PHY. */ + maccr = ETH_MAC_CONFIG_PS_Msk | + ETH_MAC_CONFIG_DM_Msk | + ETH_MAC_CONFIG_DCRS_Msk | + ETH_MAC_CONFIG_ACS_Msk | + ETH_MAC_CONFIG_IPC_Msk | + ETH_MAC_CONFIG_BE_Msk; /* Frame Burst Enable — u-boot reference always sets this */ + if (speed_100) + maccr |= ETH_MAC_CONFIG_FES_Msk; +#ifdef MAC_LOOPBACK_TEST + maccr |= ETH_MAC_CONFIG_LM_Msk; + printf(" *** MAC LOOPBACK MODE ENABLED ***\n"); +#endif + + VOR_ETH->MAC_CONFIG = maccr; + + /* Read back to verify — FES/DM may be read-only on some variants */ + readback = VOR_ETH->MAC_CONFIG; + actual_100 = !!(readback & ETH_MAC_CONFIG_FES_Msk); + printf(" PHY: %s %s Duplex (negotiated)\n", + speed_100 ? "100M" : "10M", + full_duplex ? "Full" : "Half"); + printf(" MAC: %s %s Duplex (MAC_CONFIG=0x%08lX)\n", + actual_100 ? "100M" : "10M", + (readback & ETH_MAC_CONFIG_DM_Msk) ? "Full" : "Half", + (unsigned long)readback); + if (speed_100 && !actual_100) + printf(" NOTE: FES read-only, MAC limited to 10M\n"); + if (!(readback & ETH_MAC_CONFIG_DM_Msk)) + printf(" WARNING: DM bit not retained! MAC stuck in Half Duplex\n"); +} + +/* ========================================================================= */ +/* DMA Configuration */ +/* ========================================================================= */ + +static void eth_config_dma(void) +{ + /* DMA Bus Mode: PBL=8 only. + * FB (Fixed Burst) and AAL (Address-Aligned Beats) are intentionally + * omitted: they require 32-byte-aligned buffers and can silently cause + * AHB bus errors when the DMA tries to issue misaligned fixed-length + * bursts. Plain INCR bursts (no FB) work with any 4-byte-aligned + * address and are the safest default for bring-up. */ + VOR_ETH->DMA_BUS_MODE = (8U << ETH_DMA_BUS_MODE_PBL_Pos); + + /* Operation Mode: + * - TX: Threshold mode TTC=7 (16-byte threshold). Previous testing with + * TSF (Store-and-Forward) showed TXFSTS=0 at ALL times including the + * 10µs post-kick sample window, proving the DMA was NOT writing frame + * data to the TX FIFO in TSF mode. TTC threshold mode starts MAC TX + * as soon as 16 bytes are in the FIFO rather than waiting for the + * complete frame, which may work if TSF's "frame complete" gating + * is the issue on this silicon. + * - RX: Store-and-Forward (RSF=1) - keeps working as before. */ + VOR_ETH->DMA_OPER_MODE = ETH_DMA_OPER_MODE_RSF_Msk | + (7U << ETH_DMA_OPER_MODE_TTC_Pos); /* TTC=7 = 16B threshold */ + + /* Disable all DMA interrupts (polling mode) */ + VOR_ETH->DMA_INTR_EN = 0; +} + +/* ========================================================================= */ +/* Descriptor Ring Initialization */ +/* ========================================================================= */ + +static void eth_init_desc(void) +{ + uint32_t i; + + /* Clear all TX descriptors - CPU owns (OWN=0). + * + * Use CHAIN mode (TCH=1 in TDES0, des3 = next descriptor pointer). + * + * Why NOT ring mode (TER): + * TER lives in TDES0 bit 21. The DWC GMAC TX DMA overwrites the + * entire des0 word when it writes back completion status (making + * des0 = 0x00000000 on success). If the DMA then re-reads des0 to + * decide the next address, TER=0 → no wrap → the DMA runs linearly + * past the last descriptor into adjacent memory (RX ring), corrupting + * it and processing garbage as TX frames. + * + * Why chain mode works: + * In TCH mode the DMA follows des3 (the chain pointer) to find the + * next descriptor. The DMA ONLY writes back to des0; des1/des2/des3 + * are never touched. So des3 chain pointers survive indefinitely and + * the ring wraps reliably without re-reading des0. */ + for (i = 0; i < TX_DESC_COUNT; i++) { + tx_ring[i].des0 = 0; + tx_ring[i].des1 = 0; + tx_ring[i].des2 = (uint32_t)tx_buffers[i]; + /* Chain: each descriptor points to the next; last wraps to first */ + tx_ring[i].des3 = (uint32_t)&tx_ring[(i + 1U) % TX_DESC_COUNT]; + } + /* No TDES0_TER needed - chain pointers in des3 replace ring-mode wrap */ + + /* Initialize RX descriptors - DMA owns (OWN=1) */ + for (i = 0; i < RX_DESC_COUNT; i++) { + rx_ring[i].des0 = RDES0_OWN; + rx_ring[i].des1 = (RX_BUF_SIZE & RDES1_RBS1_MASK); + rx_ring[i].des2 = (uint32_t)rx_buffers[i]; + rx_ring[i].des3 = 0; + } + /* Set RER on last RX descriptor for ring wrap */ + rx_ring[RX_DESC_COUNT - 1U].des1 |= RDES1_RER; + + rx_idx = 0; + tx_idx = 0; + + __DSB(); + + /* Program descriptor list base addresses */ + VOR_ETH->DMA_RX_DESC_LIST_ADDR = (uint32_t)&rx_ring[0]; + VOR_ETH->DMA_TX_DESC_LIST_ADDR = (uint32_t)&tx_ring[0]; + + __DSB(); +} + +/* ========================================================================= */ +/* Start / Stop */ +/* ========================================================================= */ + +static void eth_start(void) +{ + /* Ensure MMC counters are not frozen (bit 3 = freeze) */ + VOR_ETH->MMC_CNTRL &= ~(1U << 3); + + /* Enable MAC TX and RX */ + VOR_ETH->MAC_CONFIG |= ETH_MAC_CONFIG_TE_Msk | ETH_MAC_CONFIG_RE_Msk; + + /* Settling delay after TE/RE assertion (~100µs). + * Gives the TX/RX FIFO controllers time to initialize before DMA starts. */ + { volatile uint32_t _d; for (_d = 0; _d < 10000U; _d++) { } } + + /* Flush TX FIFO (FTF) BEFORE starting DMA. + * u-boot DWC GMAC reference driver always applies FTF before ST. + * FTF is self-clearing (~8 AHB cycles). Must be applied while ST=0: + * if applied after ST=1, it latches and flushes DMA-written data. */ + VOR_ETH->DMA_OPER_MODE |= ETH_DMA_OPER_MODE_FTF_Msk; + { + uint32_t _ftf; + for (_ftf = 0; _ftf < 100000U; _ftf++) { + if (!(VOR_ETH->DMA_OPER_MODE & ETH_DMA_OPER_MODE_FTF_Msk)) + break; + } + ETH_DEBUG(" FTF flush: cleared=%lu (t=%lu)\n", + (unsigned long)!(VOR_ETH->DMA_OPER_MODE & ETH_DMA_OPER_MODE_FTF_Msk), + (unsigned long)_ftf); + } + + /* Start DMA TX and RX via DMA_OPER_MODE */ + VOR_ETH->DMA_OPER_MODE |= ETH_DMA_OPER_MODE_ST_Msk | + ETH_DMA_OPER_MODE_SR_Msk; + + __DSB(); + + ETH_DEBUG(" DMA_OPER_MODE=0x%08lX (FTF=%lu TSF=%lu TTC=%lu ST=%lu)\n", + (unsigned long)VOR_ETH->DMA_OPER_MODE, + (unsigned long)!!(VOR_ETH->DMA_OPER_MODE & ETH_DMA_OPER_MODE_FTF_Msk), + (unsigned long)!!(VOR_ETH->DMA_OPER_MODE & ETH_DMA_OPER_MODE_TSF_Msk), + (unsigned long)((VOR_ETH->DMA_OPER_MODE & ETH_DMA_OPER_MODE_TTC_Msk) >> ETH_DMA_OPER_MODE_TTC_Pos), + (unsigned long)!!(VOR_ETH->DMA_OPER_MODE & ETH_DMA_OPER_MODE_ST_Msk)); + + /* Kick RX DMA to start processing descriptors */ + VOR_ETH->DMA_RX_POLL_DEMAND = 0; + + ETH_DEBUG(" MAC_CONFIG: 0x%08lX\n", + (unsigned long)VOR_ETH->MAC_CONFIG); + ETH_DEBUG(" DMA_BUS_MODE: 0x%08lX\n", + (unsigned long)VOR_ETH->DMA_BUS_MODE); + ETH_DEBUG(" DMA_OPER_MODE: 0x%08lX\n", + (unsigned long)VOR_ETH->DMA_OPER_MODE); +} + +static void eth_stop(void) +{ + /* Stop DMA TX and RX */ + VOR_ETH->DMA_OPER_MODE &= ~(ETH_DMA_OPER_MODE_ST_Msk | + ETH_DMA_OPER_MODE_SR_Msk); + + /* Disable MAC TX and RX */ + VOR_ETH->MAC_CONFIG &= ~(ETH_MAC_CONFIG_TE_Msk | ETH_MAC_CONFIG_RE_Msk); +} + +/* ========================================================================= */ +/* PHY Initialization (uses SDK HAL) */ +/* ========================================================================= */ + +static int eth_phy_init(void) +{ + uint16_t phy_status; + uint16_t phy_id_hi; + uint32_t timeout; + + /* Set GMII clock divider in MAC_GMII_ADDR.CR field + * PEB1 EVK: 40MHz crystal * PLL 2.5x = 100MHz -> DIV42 (60-100MHz) + * MDIO clock: 100MHz / 42 = 2.38MHz (within 1-2.5MHz spec) + */ + VOR_ETH->MAC_GMII_ADDR = (VOR_ETH->MAC_GMII_ADDR & + ~ETH_MAC_GMII_ADDR_CR_Msk) | + (PHY_MACMII_CR_DIV42 << ETH_MAC_GMII_ADDR_CR_Pos); + + /* Read PHY ID to verify communication */ + if (HAL_ReadPhyReg(PHY_ID_HI_REG, &phy_id_hi) != hal_status_ok) { + printf(" PHY: MDIO read failed\n"); + return -1; /* MDIO communication failed */ + } + mdio_settle(); + HAL_ReadPhyReg(PHY_ID_HI_REG, &phy_id_hi); /* double-read */ + mdio_settle(); + ETH_DEBUG(" PHY ID: 0x%04X\n", phy_id_hi); + if (phy_id_hi == 0xFFFFU || phy_id_hi == 0x0000U) { + printf(" PHY: not found (ID=0x%04X)\n", phy_id_hi); + return -1; /* PHY not found */ + } + + /* Reset PHY */ + HAL_ResetPHY(); + + /* Wait for PHY reset to complete. + * KSZ8041TL datasheet: reset completes in ~100-300 ms. + * Previous wait of 100K cycles (~1 ms) was far too short — the PHY + * was still resetting when we started configuring it, so the AN + * advertisement write was likely lost. + * 50M iterations at 100 MHz ≈ 500 ms — conservative but safe. */ + ETH_DEBUG(" PHY: waiting for reset (~500ms)...\n"); + for (volatile uint32_t i = 0; i < 50000000U; i++) { } + + /* Verify PHY reset completed: bit 15 (RESET) in BMCR should auto-clear */ + { + uint16_t cr; + HAL_ReadPhyReg(PHY_CONTROL_REG, &cr); + mdio_settle(); + HAL_ReadPhyReg(PHY_CONTROL_REG, &cr); + mdio_settle(); + ETH_DEBUG(" PHY BMCR after reset: 0x%04X (bit15=%u)\n", + cr, (unsigned)((cr >> 15) & 1)); + } + + /* Configure Auto-Negotiation advertisement. + * + * Why AN instead of forced mode: + * With AN disabled and the PHY forced to a specific speed/duplex, + * the link partner (switch) still runs auto-negotiation. Without + * an AN exchange it detects speed (from link pulses) but defaults + * to Half Duplex. Result: duplex mismatch → switch drops our FD + * TX frames as "collisions" while RX still works. + * + * With AN enabled, both sides agree on speed and duplex — no + * mismatch. + * + * PHY AN Advertisement Register (reg 4) bit layout: + * [8] 100BASE-TX FD [7] 100BASE-TX HD [6] 10BASE-T FD + * [5] 10BASE-T HD [4:0] selector (00001 = 802.3) + * + * Advertise only 10BASE-T speeds (10M FD + 10M HD). + * + * The FES bit in MAC_CONFIG is read-only=0 on this VA416xx silicon + * variant — the MAC is permanently at 10Mbps internal timing and + * expects TXCLK = 2.5MHz from the PHY. If we advertise 100M and AN + * negotiates 100M, the PHY outputs 25MHz TXCLK which the MAC cannot + * use: TX is silently broken (confirmed by GPIO clock scan and FES + * read-only test). Restricting AN to 10M ensures TXCLK = 2.5MHz. */ + { + uint16_t an_adv, an_readback; + HAL_ReadPhyReg(PHY_AN_ADV_REG, &an_adv); + mdio_settle(); + HAL_ReadPhyReg(PHY_AN_ADV_REG, &an_adv); /* double-read */ + mdio_settle(); + ETH_DEBUG(" PHY AN adv (before): 0x%04X\n", an_adv); + an_adv &= ~0x01E0U; /* clear 100M-FD, 100M-HD, 10M-FD, 10M-HD */ + an_adv |= 0x0060U; /* advertise only 10M-FD (bit6) + 10M-HD (bit5) */ + HAL_WritePhyReg(PHY_AN_ADV_REG, an_adv); + mdio_settle(); + + /* Readback verification: confirm the write actually reached the PHY. + * If this doesn't match, the MDIO write was lost. */ + HAL_ReadPhyReg(PHY_AN_ADV_REG, &an_readback); + mdio_settle(); + HAL_ReadPhyReg(PHY_AN_ADV_REG, &an_readback); /* double-read */ + mdio_settle(); + ETH_DEBUG(" PHY AN adv: wrote=0x%04X read=0x%04X %s\n", + an_adv, an_readback, + (an_readback == an_adv) ? "OK" : "MISMATCH!"); + } + + /* Enable AN and restart it */ + HAL_SetPhyAutoNegotiate(PHYAUTONEGEN); + mdio_settle(); + { + uint16_t cr; + HAL_ReadPhyReg(PHY_CONTROL_REG, &cr); + mdio_settle(); + HAL_ReadPhyReg(PHY_CONTROL_REG, &cr); /* double-read */ + mdio_settle(); + cr |= 0x0200U; /* restart AN (bit 9) */ + HAL_WritePhyReg(PHY_CONTROL_REG, cr); + mdio_settle(); + } + ETH_DEBUG(" PHY AN: enabled, restart issued\n"); + + /* Wait for link up. + * + * With MDIO settling delays (~50 µs each), each loop iteration now + * takes real wall time. With double-read + 2x settle = ~100 µs per + * iteration, 50K iterations ≈ 5 seconds — enough for AN to complete + * (typically 1-2 seconds). */ + timeout = 50000U; + do { + HAL_ReadPhyReg(PHY_STATUS_REG, &phy_status); + mdio_settle(); + HAL_ReadPhyReg(PHY_STATUS_REG, &phy_status); /* double-read: latch */ + mdio_settle(); + } while (!(phy_status & MIISTATUS_PHY_LINK) && --timeout); + + printf(" PHY link: %s\n", + (phy_status & MIISTATUS_PHY_LINK) ? "UP" : "DOWN"); + + /* Read KSZ8041TL PHY Control 2 (reg 0x1F) to confirm actual + * negotiated speed/duplex. Bits [6:4] = Operation Mode: + * 001=10HD 010=100HD 101=10FD 110=100FD */ + { + uint16_t ctrl2; + HAL_ReadPhyReg(PHY_CTRL2_REG, &ctrl2); + mdio_settle(); + HAL_ReadPhyReg(PHY_CTRL2_REG, &ctrl2); /* double-read */ + mdio_settle(); + ETH_DEBUG(" PHY Ctrl2: 0x%04X OpMode[6:4]=%u\n", + ctrl2, (unsigned)((ctrl2 >> 4) & 0x7)); + } + + /* Read BMCR and BMSR for diagnostics */ + { + uint16_t cr, sr; + HAL_ReadPhyReg(PHY_CONTROL_REG, &cr); + mdio_settle(); + HAL_ReadPhyReg(PHY_CONTROL_REG, &cr); + mdio_settle(); + HAL_ReadPhyReg(PHY_STATUS_REG, &sr); + mdio_settle(); + HAL_ReadPhyReg(PHY_STATUS_REG, &sr); + mdio_settle(); + ETH_DEBUG(" PHY BMCR=0x%04X BMSR=0x%04X\n", cr, sr); + ETH_DEBUG(" AN_complete=%u link=%u speed100=%u duplex=%u\n", + (unsigned)((sr >> 5) & 1), + (unsigned)((sr >> 2) & 1), + (unsigned)((cr >> 13) & 1), + (unsigned)((cr >> 8) & 1)); + } + + return (phy_status & MIISTATUS_PHY_LINK) ? 0 : -2; +} + +/* ========================================================================= */ +/* Poll (RX) - Called from wolfIP_poll() */ +/* ========================================================================= */ + +static int eth_poll(struct wolfIP_ll_dev *dev, void *frame, uint32_t len) +{ + struct eth_desc *desc; + uint32_t status; + uint32_t frame_len = 0; + + (void)dev; + rx_poll_count++; + + desc = &rx_ring[rx_idx]; + + /* Check if DMA still owns this descriptor */ + if (desc->des0 & RDES0_OWN) + return 0; + + rx_pkt_count++; + status = desc->des0; + + /* Check for complete single-frame (FS + LS) with no errors */ + if (((status & (RDES0_FS | RDES0_LS)) == (RDES0_FS | RDES0_LS)) && + !(status & RDES0_ES)) { + /* Extract frame length from RDES0[29:16] */ + frame_len = (status & RDES0_FL_MASK) >> RDES0_FL_SHIFT; + if (frame_len > len) + frame_len = len; + + /* Copy via staging buffer (avoids DMA/CPU bus contention) */ + memcpy(rx_staging_buffer, rx_buffers[rx_idx], frame_len); + memcpy(frame, rx_staging_buffer, frame_len); + +#ifdef DEBUG_ETH + /* Print header of first 10 received frames for diagnostics */ + if (rx_pkt_count <= 10 && frame_len >= 14) { + uint8_t *f = (uint8_t *)frame; + ETH_DEBUG("RX[%lu] len=%lu dst=%02X:%02X:%02X:%02X:%02X:%02X " + "src=%02X:%02X:%02X:%02X:%02X:%02X type=%02X%02X\n", + (unsigned long)rx_pkt_count, (unsigned long)frame_len, + f[0], f[1], f[2], f[3], f[4], f[5], + f[6], f[7], f[8], f[9], f[10], f[11], + f[12], f[13]); + if (f[12] == 0x08 && f[13] == 0x06 && frame_len >= 42) { + ETH_DEBUG(" ARP op=%u target=%u.%u.%u.%u\n", + (unsigned)((f[20] << 8) | f[21]), + f[38], f[39], f[40], f[41]); + } + } +#endif + } + + /* Re-arm descriptor: give back to DMA */ + desc->des0 = RDES0_OWN; + desc->des1 = (RX_BUF_SIZE & RDES1_RBS1_MASK); + /* Preserve RER on last descriptor */ + if (rx_idx == (RX_DESC_COUNT - 1U)) + desc->des1 |= RDES1_RER; + + __DSB(); + + /* Clear RU (Receive Buffer Unavailable) if set */ + if (VOR_ETH->DMA_STATUS & ETH_DMA_STATUS_RU_Msk) { + VOR_ETH->DMA_STATUS = ETH_DMA_STATUS_RU_Msk; + } + + /* Kick RX DMA to resume polling */ + VOR_ETH->DMA_RX_POLL_DEMAND = 0; + + rx_idx = (rx_idx + 1U) % RX_DESC_COUNT; + return (int)frame_len; +} + +/* ========================================================================= */ +/* Send (TX) - Called from wolfIP stack */ +/* ========================================================================= */ + +static int eth_send(struct wolfIP_ll_dev *dev, void *frame, uint32_t len) +{ + struct eth_desc *desc; + uint32_t dma_len; + + (void)dev; + if (len == 0 || len > TX_BUF_SIZE) { + tx_err_count++; + return -1; + } + + desc = &tx_ring[tx_idx]; + + /* Check if CPU owns this descriptor (OWN=0) */ + if (desc->des0 & TDES0_OWN) { + tx_err_count++; + return -2; + } + + tx_pkt_count++; + +#ifdef DEBUG_ETH + /* Print frame header for first few packets */ + if (tx_pkt_count <= 8) { + uint8_t *f = (uint8_t *)frame; + ETH_DEBUG("TX[%lu] len=%lu dst=%02X:%02X:%02X:%02X:%02X:%02X " + "src=%02X:%02X:%02X:%02X:%02X:%02X type=%02X%02X\n", + (unsigned long)tx_idx, (unsigned long)len, + f[0], f[1], f[2], f[3], f[4], f[5], + f[6], f[7], f[8], f[9], f[10], f[11], + f[12], f[13]); + if (f[12] == 0x08 && f[13] == 0x06 && len >= 42) { + ETH_DEBUG(" ARP op=%u sender=%u.%u.%u.%u target=%u.%u.%u.%u\n", + (unsigned)((f[20] << 8) | f[21]), + f[28], f[29], f[30], f[31], + f[38], f[39], f[40], f[41]); + } + } +#endif + + /* Copy frame to DMA buffer */ + memcpy(tx_buffers[tx_idx], frame, len); + + /* Pad to minimum frame size */ + dma_len = (len < FRAME_MIN_LEN) ? FRAME_MIN_LEN : len; + if (dma_len > len) + memset(tx_buffers[tx_idx] + len, 0, dma_len - len); + + /* Set buffer address in des2 */ + desc->des2 = (uint32_t)tx_buffers[tx_idx]; + + /* Set des1: frame control + buffer size. + * Normal DWC GMAC format: FS/LS/IC/TCH live in TDES1, not TDES0. + * TCH=1: DMA follows des3 (chain pointer) to find the next descriptor. + * des3 was set permanently in eth_init_desc() and is never overwritten + * by DMA writeback, so the ring wraps correctly. + * Set des1 BEFORE des0 so the descriptor is complete when OWN is given. */ + desc->des1 = TDES1_IC | TDES1_LS | TDES1_FS | + TDES1_TCH | (dma_len & TDES1_TBS1_MASK); + + __DSB(); + + /* Set des0: OWN only — this is the doorbell that transfers ownership to + * the DMA. All other TDES0 bits are TX status written back by DMA. */ + desc->des0 = TDES0_OWN; + + __DSB(); + + /* Clear TU (Transmit Buffer Unavailable) if set, then kick TX DMA */ + if (VOR_ETH->DMA_STATUS & ETH_DMA_STATUS_TU_Msk) + VOR_ETH->DMA_STATUS = ETH_DMA_STATUS_TU_Msk; + VOR_ETH->DMA_TX_POLL_DEMAND = 0; + +#ifdef DEBUG_ETH + /* Per-TX diagnostic: sample MAC_DEBUG at 3 time points to verify + * data actually enters the TX FIFO (TXFSTS≠0 at 10µs → MAC transmitted). */ + if (tx_pkt_count <= 5U) { + volatile uint32_t _d; + uint32_t mac_dbg_before, mac_dbg_imm, mac_dbg_10us, mac_dbg_500us; + uint32_t dma_st, des0_wb, cur_desc; + + mac_dbg_before = VOR_ETH->MAC_DEBUG; + mac_dbg_imm = VOR_ETH->MAC_DEBUG; + for (_d = 0; _d < 1000U; _d++) { } + mac_dbg_10us = VOR_ETH->MAC_DEBUG; + for (_d = 0; _d < 50000U; _d++) { } + mac_dbg_500us = VOR_ETH->MAC_DEBUG; + + dma_st = VOR_ETH->DMA_STATUS; + des0_wb = desc->des0; + cur_desc = VOR_ETH->DMA_CURR_TX_DESC; + + ETH_DEBUG(" TX#%lu des1=0x%08lX des0_wb=0x%08lX curr=0x%08lX TS=%lu\n", + (unsigned long)tx_pkt_count, + (unsigned long)desc->des1, (unsigned long)des0_wb, + (unsigned long)cur_desc, + (unsigned long)((dma_st >> 20) & 0x7U)); + ETH_DEBUG(" TXFSTS: bef=%lu imm=%lu @10us=%lu @500us=%lu " + "TRCSTS: @10us=%lu hw_tx=%lu\n", + (unsigned long)((mac_dbg_before >> 24) & 3U), + (unsigned long)((mac_dbg_imm >> 24) & 3U), + (unsigned long)((mac_dbg_10us >> 24) & 3U), + (unsigned long)((mac_dbg_500us >> 24) & 3U), + (unsigned long)((mac_dbg_10us >> 20) & 3U), + (unsigned long)VOR_ETH->TXFRAMECOUNT_GB); + if (dma_st & (1U << 13)) + ETH_DEBUG(" *** FATAL BUS ERROR (FBI) ***\n"); + } +#endif /* DEBUG_ETH */ + + tx_idx = (tx_idx + 1U) % TX_DESC_COUNT; + return (int)len; +} + +/* ========================================================================= */ +/* Default MAC Address */ +/* ========================================================================= */ + +static void va416xx_eth_generate_mac(uint8_t mac[6]) +{ + mac[0] = 0x02; /* locally administered */ + mac[1] = 0x11; + mac[2] = 0xAA; + mac[3] = 0xBB; + mac[4] = 0x44; /* '4' for VA416xx */ + mac[5] = 0x16; +} + +/* ========================================================================= */ +/* Statistics */ +/* ========================================================================= */ + +void va416xx_eth_get_stats(uint32_t *polls, uint32_t *pkts, uint32_t *tx_pkts, + uint32_t *tx_errs) +{ + if (polls) *polls = rx_poll_count; + if (pkts) *pkts = rx_pkt_count; + if (tx_pkts) *tx_pkts = tx_pkt_count; + if (tx_errs) *tx_errs = tx_err_count; +} + +uint32_t va416xx_eth_get_dma_status(void) +{ + return VOR_ETH->DMA_STATUS; +} + +void va416xx_eth_get_mac_diag(uint32_t *mac_cfg, uint32_t *mac_dbg, + uint32_t *tx_frames_gb) +{ + if (mac_cfg) *mac_cfg = VOR_ETH->MAC_CONFIG; + if (mac_dbg) *mac_dbg = VOR_ETH->MAC_DEBUG; + if (tx_frames_gb) *tx_frames_gb = VOR_ETH->TXFRAMECOUNT_GB; +} + +/* ========================================================================= */ +/* Initialization */ +/* ========================================================================= */ + +int va416xx_eth_init(struct wolfIP_ll_dev *ll, const uint8_t *mac) +{ + uint8_t local_mac[6]; + int ret; + + if (ll == NULL) + return -1; + + if (mac == NULL) { + va416xx_eth_generate_mac(local_mac); + mac = local_mac; + } + + memcpy(ll->mac, mac, 6); + strncpy(ll->ifname, "eth0", sizeof(ll->ifname) - 1); + ll->ifname[sizeof(ll->ifname) - 1] = '\0'; + ll->poll = eth_poll; + ll->send = eth_send; + + /* 1. Stop any running DMA/MAC */ + eth_stop(); + + /* 2. DMA Software Reset */ + if (eth_hw_reset() != 0) + return -2; + + /* 3. Configure DMA (bus mode, operation mode) */ + eth_config_dma(); + + /* 4. Initialize descriptor rings */ + eth_init_desc(); + + /* 5. Configure MAC (address, speed, duplex, filter) */ + eth_config_mac(mac); + + /* 6. Initialize PHY via SDK HAL */ + ret = eth_phy_init(); + + /* 7. Configure MAC speed/duplex. Always run this regardless of link + * state so MAC_CONFIG is correct even if link came up late. */ + eth_config_speed_duplex(); + + /* 8. Start MAC and DMA */ + eth_start(); + + return ret; +} diff --git a/src/port/va416xx/va416xx_eth.h b/src/port/va416xx/va416xx_eth.h new file mode 100644 index 0000000..6809465 --- /dev/null +++ b/src/port/va416xx/va416xx_eth.h @@ -0,0 +1,37 @@ +/* va416xx_eth.h + * + * VA416xx Ethernet driver for wolfIP + * Synopsys DesignWare GMAC with normal (legacy) descriptor format + * + * Copyright (C) 2026 wolfSSL Inc. + * + * This file is part of wolfIP TCP/IP stack. + * + * wolfIP is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfIP is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ +#ifndef WOLFIP_VA416XX_ETH_H +#define WOLFIP_VA416XX_ETH_H + +#include +#include "wolfip.h" + +int va416xx_eth_init(struct wolfIP_ll_dev *ll, const uint8_t *mac); +void va416xx_eth_get_stats(uint32_t *polls, uint32_t *pkts, uint32_t *tx_pkts, + uint32_t *tx_errs); +uint32_t va416xx_eth_get_dma_status(void); +void va416xx_eth_get_mac_diag(uint32_t *mac_cfg, uint32_t *mac_dbg, + uint32_t *tx_frames_gb); + +#endif /* WOLFIP_VA416XX_ETH_H */ From 9855970ad8310a35b53f8ec38b53a787d52419cc Mon Sep 17 00:00:00 2001 From: David Garske Date: Tue, 24 Feb 2026 09:42:41 -0800 Subject: [PATCH 2/3] Peer review fixes --- src/port/va416xx/README.md | 2 +- src/port/va416xx/main.c | 39 ++++++++++++------- src/port/va416xx/syscalls.c | 4 +- src/port/va416xx/va416xx_eth.c | 68 ++++++++++++++++++---------------- 4 files changed, 65 insertions(+), 48 deletions(-) diff --git a/src/port/va416xx/README.md b/src/port/va416xx/README.md index 6ba8d9d..3823116 100644 --- a/src/port/va416xx/README.md +++ b/src/port/va416xx/README.md @@ -230,7 +230,7 @@ All DMA descriptors and buffers are placed in `.dma_bss` (RAM1, 0x20000000+). Th | `ivt.c` | Interrupt vector table (16 system + 64 external IRQs) | | `syscalls.c` | Newlib stubs (`_write` routes to UART0) | | `target.ld` | Linker script (Flash 256 KB, RAM 64 KB, `.dma_bss` in RAM1) | -| `hal_config.h` | SDK HAL configuration (SysTick 10 ms tick) | +| `hal_config.h` | SDK HAL configuration (`SYSTICK_INTERVAL_MS=1`, 1 ms SysTick tick) | | `board.h` | Board selection (includes PEB1 EVK header) | | `Makefile` | Build system with SDK integration | diff --git a/src/port/va416xx/main.c b/src/port/va416xx/main.c index d488ee2..d2b3dac 100644 --- a/src/port/va416xx/main.c +++ b/src/port/va416xx/main.c @@ -35,10 +35,10 @@ #include "va416xx_hal_clkgen.h" #include "board.h" -/* HAL_time_ms: millisecond tick counter maintained by SysTick ISR (10ms - * resolution by default). Used as the wolfIP `now` parameter so that all - * stack timers (DHCP, ARP, TCP retransmit, etc.) run in real wall-clock - * time rather than depending on CPU loop speed. */ +/* HAL_time_ms: millisecond tick counter maintained by SysTick ISR (1ms + * resolution; SYSTICK_INTERVAL_MS=1 in hal_config.h). Used as the wolfIP + * `now` parameter so that all stack timers (DHCP, ARP, TCP retransmit, etc.) + * run in real wall-clock time rather than depending on CPU loop speed. */ extern volatile uint64_t HAL_time_ms; #define RX_BUF_SIZE 1024 @@ -84,7 +84,18 @@ static int client_fd = -1; uint32_t wolfIP_getrandom(void) { - static uint32_t lfsr = 0x1A2B3C4DU; + static uint32_t lfsr; + static int seeded = 0; + + if (!seeded) { + /* Seed from boot time so ISNs and ephemeral ports vary per power-up. + * HAL_time_ms at first wolfIP call is typically 1-5 s into boot. + * Note: not cryptographically secure; suitable for embedded demo use. */ + lfsr = (uint32_t)HAL_time_ms; + if (lfsr == 0U) + lfsr = 0x1A2B3C4DU; /* LFSR must never be zero */ + seeded = 1; + } lfsr ^= lfsr << 13; lfsr ^= lfsr >> 17; lfsr ^= lfsr << 5; @@ -601,14 +612,14 @@ int main(void) uint32_t pin, i, ones, zeros; /* PORTA[8:15] */ for (pin = 8; pin <= 15; pin++) { - HAL_Iocfg_PinMux(PORTA, pin, 0); - PORTA->DIR &= ~(1U << pin); + HAL_Iocfg_PinMux(VOR_PORTA, pin, 0); + VOR_PORTA->DIR &= ~(1U << pin); { volatile uint32_t _s; for (_s = 0; _s < 20U; _s++) {} } ones = 0; zeros = 0; for (i = 0; i < 64U; i++) { - if (PORTA->DATAIN & (1U << pin)) ones++; else zeros++; + if (VOR_PORTA->DATAIN & (1U << pin)) ones++; else zeros++; } - HAL_Iocfg_PinMux(PORTA, pin, 1); + HAL_Iocfg_PinMux(VOR_PORTA, pin, 1); { volatile uint32_t _s; for (_s = 0; _s < 20U; _s++) {} } printf(" PA%02lu: hi=%02lu lo=%02lu%s\n", (unsigned long)pin, (unsigned long)ones, (unsigned long)zeros, @@ -616,14 +627,14 @@ int main(void) } /* PORTB[0:10] */ for (pin = 0; pin <= 10; pin++) { - HAL_Iocfg_PinMux(PORTB, pin, 0); - PORTB->DIR &= ~(1U << pin); + HAL_Iocfg_PinMux(VOR_PORTB, pin, 0); + VOR_PORTB->DIR &= ~(1U << pin); { volatile uint32_t _s; for (_s = 0; _s < 20U; _s++) {} } ones = 0; zeros = 0; for (i = 0; i < 64U; i++) { - if (PORTB->DATAIN & (1U << pin)) ones++; else zeros++; + if (VOR_PORTB->DATAIN & (1U << pin)) ones++; else zeros++; } - HAL_Iocfg_PinMux(PORTB, pin, 1); + HAL_Iocfg_PinMux(VOR_PORTB, pin, 1); { volatile uint32_t _s; for (_s = 0; _s < 20U; _s++) {} } printf(" PB%02lu: hi=%02lu lo=%02lu%s\n", (unsigned long)pin, (unsigned long)ones, (unsigned long)zeros, @@ -658,7 +669,7 @@ int main(void) } #endif /* TX_SELFTEST */ - /* 10. Main loop — use HAL_time_ms (SysTick-based, 10ms resolution) + /* 10. Main loop — use HAL_time_ms (SysTick-based, 1ms resolution) * so wolfIP timers (TCP, ARP, etc.) run in real wall-clock time. */ { uint64_t last_led_ms = 0; diff --git a/src/port/va416xx/syscalls.c b/src/port/va416xx/syscalls.c index 3739893..a33721b 100644 --- a/src/port/va416xx/syscalls.c +++ b/src/port/va416xx/syscalls.c @@ -32,7 +32,7 @@ #include "va416xx_hal_uart.h" extern uint32_t _ebss; -extern uint32_t _estack; +extern uint32_t _sdma_bss; /* start of .dma_bss: hard upper limit for heap */ static char *heap_end; @@ -89,7 +89,7 @@ void *_sbrk(ptrdiff_t incr) heap_end = (char *)&_ebss; } prev = heap_end; - if ((heap_end + incr) >= (char *)&_estack) { + if ((heap_end + incr) >= (char *)&_sdma_bss) { errno = ENOMEM; return (void *)-1; } diff --git a/src/port/va416xx/va416xx_eth.c b/src/port/va416xx/va416xx_eth.c index 7dec275..b60e5e8 100644 --- a/src/port/va416xx/va416xx_eth.c +++ b/src/port/va416xx/va416xx_eth.c @@ -183,8 +183,15 @@ static void eth_config_mac(const uint8_t *mac) ETH_MAC_CONFIG_ACS_Msk | ETH_MAC_CONFIG_IPC_Msk; - /* Frame filter: promiscuous for initial bring-up */ + /* Frame filter. + * PR=0: perfect DA filtering — accepts unicast to our MAC + broadcast. + * DBF=0: broadcast NOT disabled, so DHCP (broadcast) and ARP work. + * In DEBUG_ETH builds, enable promiscuous to capture all traffic. */ +#ifdef DEBUG_ETH VOR_ETH->MAC_FRAME_FLTR = ETH_MAC_FRAME_FLTR_PR_Msk; +#else + VOR_ETH->MAC_FRAME_FLTR = 0U; +#endif } /* ========================================================================= */ @@ -439,7 +446,6 @@ static int eth_phy_init(void) { uint16_t phy_status; uint16_t phy_id_hi; - uint32_t timeout; /* Set GMII clock divider in MAC_GMII_ADDR.CR field * PEB1 EVK: 40MHz crystal * PLL 2.5x = 100MHz -> DIV42 (60-100MHz) @@ -466,24 +472,24 @@ static int eth_phy_init(void) /* Reset PHY */ HAL_ResetPHY(); - /* Wait for PHY reset to complete. - * KSZ8041TL datasheet: reset completes in ~100-300 ms. - * Previous wait of 100K cycles (~1 ms) was far too short — the PHY - * was still resetting when we started configuring it, so the AN - * advertisement write was likely lost. - * 50M iterations at 100 MHz ≈ 500 ms — conservative but safe. */ - ETH_DEBUG(" PHY: waiting for reset (~500ms)...\n"); - for (volatile uint32_t i = 0; i < 50000000U; i++) { } - - /* Verify PHY reset completed: bit 15 (RESET) in BMCR should auto-clear */ + /* Wait for PHY reset to complete by polling BMCR bit 15 (RESET). + * KSZ8041TL datasheet: reset completes in 100-300 ms. Hard deadline + * of 500 ms as a safety net. Using HAL_time_ms for real wall-clock + * timing rather than cycle counting (which is sensitive to -Os inlining + * and actual loop overhead). */ + ETH_DEBUG(" PHY: waiting for reset (max 500ms)...\n"); { uint16_t cr; - HAL_ReadPhyReg(PHY_CONTROL_REG, &cr); - mdio_settle(); - HAL_ReadPhyReg(PHY_CONTROL_REG, &cr); - mdio_settle(); - ETH_DEBUG(" PHY BMCR after reset: 0x%04X (bit15=%u)\n", - cr, (unsigned)((cr >> 15) & 1)); + uint64_t deadline = HAL_time_ms + 500U; + do { + HAL_ReadPhyReg(PHY_CONTROL_REG, &cr); + mdio_settle(); + HAL_ReadPhyReg(PHY_CONTROL_REG, &cr); /* double-read */ + mdio_settle(); + } while ((cr & (1U << 15)) && (HAL_time_ms < deadline)); + ETH_DEBUG(" PHY BMCR after reset: 0x%04X (bit15=%u, t=%lums)\n", + cr, (unsigned)((cr >> 15) & 1), + (unsigned long)(HAL_time_ms - (deadline - 500U))); } /* Configure Auto-Negotiation advertisement. @@ -548,19 +554,19 @@ static int eth_phy_init(void) } ETH_DEBUG(" PHY AN: enabled, restart issued\n"); - /* Wait for link up. - * - * With MDIO settling delays (~50 µs each), each loop iteration now - * takes real wall time. With double-read + 2x settle = ~100 µs per - * iteration, 50K iterations ≈ 5 seconds — enough for AN to complete - * (typically 1-2 seconds). */ - timeout = 50000U; - do { - HAL_ReadPhyReg(PHY_STATUS_REG, &phy_status); - mdio_settle(); - HAL_ReadPhyReg(PHY_STATUS_REG, &phy_status); /* double-read: latch */ - mdio_settle(); - } while (!(phy_status & MIISTATUS_PHY_LINK) && --timeout); + /* Wait for link up, 5-second deadline. + * BMSR is a latch-on-read register for some bits (link loss latches + * until read), so we double-read: first read clears the latch, second + * read returns the current state. */ + { + uint64_t an_deadline = HAL_time_ms + 5000U; + do { + HAL_ReadPhyReg(PHY_STATUS_REG, &phy_status); + mdio_settle(); + HAL_ReadPhyReg(PHY_STATUS_REG, &phy_status); /* double-read: latch */ + mdio_settle(); + } while (!(phy_status & MIISTATUS_PHY_LINK) && (HAL_time_ms < an_deadline)); + } printf(" PHY link: %s\n", (phy_status & MIISTATUS_PHY_LINK) ? "UP" : "DOWN"); From bd13a7e5b35e6926d09407cc4b945163cca87632 Mon Sep 17 00:00:00 2001 From: David Garske Date: Tue, 24 Feb 2026 12:56:04 -0800 Subject: [PATCH 3/3] Further peer review fixes --- src/port/va416xx/README.md | 26 ++++++++++++++++++++++++++ src/port/va416xx/main.c | 22 +++++++++++++++++++--- 2 files changed, 45 insertions(+), 3 deletions(-) diff --git a/src/port/va416xx/README.md b/src/port/va416xx/README.md index 3823116..ed92cb5 100644 --- a/src/port/va416xx/README.md +++ b/src/port/va416xx/README.md @@ -148,6 +148,32 @@ Speed: 170096 ms, RX 0 bytes (~0 B/s), TX 23195968 bytes (~136369 B/s) > because wolfIP's TCP send window cycles: the device transmits until the remote > receive window fills, then waits for ACKs to reopen it before sending more. +## Known Limitations + +### DHCP Subnet Mask Display + +wolfIP's internal DHCP client stores the subnet mask correctly in the stack, but `wolfIP_ipconfig_get()` returns the gateway address in the netmask slot on some DHCP server responses. Connectivity is unaffected (ARP, ping, TCP echo, and the throughput test all work correctly). Only the UART diagnostic print is wrong: + +``` +DHCP bound: + IP: 10.0.4.184 + Mask: 10.0.4.1 ← displayed incorrectly; actual mask is 255.255.255.0 + GW: 10.0.4.1 +``` + +This is a wolfIP core issue, not a driver bug. + +### PHY Link Down at Startup + +If the Ethernet cable is not connected (or the switch is powering up) when the board boots, auto-negotiation will time out after 5 seconds and `va416xx_eth_init` reports: + +``` + PHY link: DOWN + NOTE: PHY link down at startup (cable disconnected?) — continuing +``` + +The MAC and DMA are fully initialized and running. The device will respond to traffic once the link comes up — no reboot required. + ## Architecture ### Ethernet Driver (`va416xx_eth.c`) diff --git a/src/port/va416xx/main.c b/src/port/va416xx/main.c index d2b3dac..387dc03 100644 --- a/src/port/va416xx/main.c +++ b/src/port/va416xx/main.c @@ -359,7 +359,14 @@ int main(void) printf("Initializing Ethernet...\n"); ll = wolfIP_getdev(IPStack); ret = va416xx_eth_init(ll, NULL); - if (ret < 0) { + if (ret == -2) { + /* PHY link was down when auto-negotiation timed out. MAC/DMA + * are fully initialized and running; the device will respond + * to traffic once the link comes up (e.g. cable or switch + * powered on after the board). */ + printf(" NOTE: PHY link down at startup (cable disconnected?)" + " — continuing\n"); + } else if (ret < 0) { printf(" ERROR: va416xx_eth_init failed (%d)\n", ret); } @@ -600,8 +607,14 @@ int main(void) (unsigned)((bmcr >> 8) & 1)); } +#ifdef DEBUG_ETH /* === GPIO Pin Activity Scan === - * Identify which ETH pins carry PHY-driven clocks (TXCLK, RXCLK). + * One-time bring-up tool: identifies which ETH pins carry PHY-driven + * clocks (TXCLK, RXCLK) by briefly sampling them as GPIO inputs. + * Hardware is now characterized (TXCLK=PB02, RXCLK=PA15 at 2.5 MHz + * for 10M; FES read-only=0 confirmed), so this section is only + * compiled in with DEBUG_ETH to keep TX_SELFTEST UART output concise. + * * Each pin is briefly switched to GPIO input (funsel=0), sampled * 64 times in a tight loop (~2µs at 100MHz), then restored to * funsel=1. At 2.5MHz TXCLK (10Mbps), 64 samples span ~5 clock @@ -645,7 +658,9 @@ int main(void) /* === FES=1 TX Attempt === * If PHY negotiated 100M (TXCLK=25MHz) but MAC has FES=0 (expects * 2.5MHz), the TX clock domain is mismatched. Try forcing FES=1 - * (100M) and see if a frame exits the MAC (TXFRAMECOUNT_GB delta). */ + * (100M) and see if a frame exits the MAC (TXFRAMECOUNT_GB delta). + * Result: FES is confirmed read-only=0 on this silicon; MAC always + * runs at 10M. Gated under DEBUG_ETH as silicon is characterized. */ { uint32_t hw_tx_before = 0, hw_tx_after = 0; uint32_t cfg_save; @@ -666,6 +681,7 @@ int main(void) VOR_ETH->MAC_CONFIG = cfg_save; /* restore original speed */ __DSB(); } +#endif /* DEBUG_ETH */ } #endif /* TX_SELFTEST */