Bug in NetBSD compiler or "bus.h" header file crashes my ISDN4BSD


Summed up:

It appears that because the bus_space_xxx functions are implemented like 
macros this can cause errors in the generated code! The following code is 
excuted when filtering on the "%cx" register:


0xcd71796a:     movw    0xffffffda(%ebp),%cx //0x8
0xcd71796e:     shrw    $0x2,%cx //0x2



0xcd7179df:     lodsl   (%esi) // valid address
0xcd7179e0:     movl    %eax,0(%edx) //valid address
0xcd7179e2:     loop    0xcd7179df // uses %ecx, but only %cx was initialised

I use a 16-bit length variable which I pass to "bus_space_write_multi_4" as 
the last argument. This is actually the "count". I suspect that somehow the 
upper 16-bits of the %ecx register never gets initialised --- oops. Is this a 
bug in the NetBSD compiler or in the "bus.h" header file ?

Full details:

First of all. I would like to wish that the NetBSD kernel would load the 
symbols from kernel modules and resolve these when the kernel panics, hence 
my software is a kernel module. Anyway here goes:

db> bt                                   
_prop_dictionary_keysym32_pool(c312a000,c312ad94,c2a90938,8,c312a000) at 
at 0
at 0
_prop_dictionary_keysym32_pool(c312a000,c312ad94,cd73f828,341,c312a93c) at 
_prop_dictionary_keysym32_pool(c312a000,8,3,7,0) at 0xcd715516
_prop_dictionary_keysym32_pool(c312a000,22bf98,cd3f2b5c,c04390ca,c09c3ac0) at 
_prop_dictionary_keysym32_pool(c3116000,0,dd310010,30,c0970010) at 0xcd700075
Xintr_ioapic_level20() at netbsd:Xintr_ioapic_level20+0x9b
--- interrupt ---
Xspllower(0,c09993c4,7,6,c04338c1) at netbsd:Xspllower+0xe
mpidle(cc8ac2e4,0,0,cc8ac2e4,103) at netbsd:mpidle+0xd7
preempt(0,4,cc8bde88,8062000,3ff0) at netbsd:preempt+0x5d
trap() at netbsd:trap+0x6bb
--- trap (number 3) ---

The topmost function translates to the following function:



static void
hfc4s8s_fifo_write FIFO_WRITE_T(sc,f,ptr,len)
        u_int16_t temp;

            f->last_byte = ptr[len-1];

            /* pre increment Z-counter, before "len" is changed */
            f->Z_drvr += len;

            if(f->Z_drvr >= f->fm.h.Zend)
                f->Z_drvr -= f->fm.h.Zsize;

            temp = len & 3;
                ptr += temp;

            len /= 4;

u_int32_t *)ptr,len);


0xcd717915:     leal    0(%esi),%esi
0xcd717918:     pushl   %ebp
0xcd717919:     movl    %esp,%ebp
0xcd71791b:     pushl   %edi
0xcd71791c:     pushl   %esi
0xcd71791d:     pushl   %ebx
0xcd71791e:     subl    $0x1c,%esp
0xcd717921:     movl    0x8(%ebp),%eax //0xc312a000
0xcd717924:     movl    0xc(%ebp),%ecx //0xc312ad94
0xcd717927:     movl    0x10(%ebp),%edi //0xc2a90938
0xcd71792a:     movl    0x14(%ebp),%edx //0x8
0xcd71792d:     movw    %dx,0xffffffda(%ebp) //0x8
0xcd717931:     movl    0x3a4(%eax),%esi //0x1
0xcd717937:     movl    %esi,0xffffffdc(%ebp) //0x1
0xcd71793a:     movl    0x3b0(%eax),%eax //0xcd427000
0xcd717940:     movl    %eax,0xffffffe0(%ebp) //0xcd427000
0xcd717943:     testw   %dx,%dx
0xcd717946:     jz      0xcd71798c //not jumped

0xcd717948:     movzwl  %edx,%eax //0x8
0xcd71794b:     movb    0xffffffff(%eax,%edi,1),%al //0xff
0xcd71794f:     movb    %al,0x14(%ecx)
0xcd717952:     movl    %edx,%eax //0x8
0xcd717954:     addl    0x74(%ecx),%eax
0xcd717957:     movw    %ax,0x74(%ecx)
0xcd71795b:     cmpw    0xc(%ecx),%ax
0xcd71795f:     jnb     0xcd7179c0 //not jumped
0xcd717961:     movw    0xffffffda(%ebp),%bx //0x8
0xcd717965:     andl    $0x3,%ebx //0x0
0xcd717968:     jnz     0xcd717994 //not jumped
0xcd71796a:     movw    0xffffffda(%ebp),%cx //0x8
0xcd71796e:     shrw    $0x2,%cx //0x2
0xcd717972:     jz      0xcd71798c //not jumped
0xcd717974:     movl    0xffffffdc(%ebp),%eax //0x1
0xcd717977:     testl   %eax,%eax
0xcd717979:     jnz     0xcd7179d4 // jumped
0xcd71797b:     movl    0xffffffe0(%ebp),%edx
0xcd71797e:     subl    $-0x80,%edx
0xcd717981:     movzwl  %ecx,%ecx
0xcd717984:     movl    %edi,%esi
0xcd717986:     cld
0xcd717987:     repe outsl      (%esi),%dx
0xcd717989:     leal    0(%esi),%esi
0xcd71798c:     addl    $0x1c,%esp
0xcd71798f:     popl    %ebx
0xcd717990:     popl    %esi
0xcd717991:     popl    %edi
0xcd717992:     leave
0xcd717993:     ret
0xcd717994:     movl    0xffffffdc(%ebp),%eax
0xcd717997:     testl   %eax,%eax
0xcd717999:     jz      0xcd7179ec
0xcd71799b:     movl    0xffffffe0(%ebp),%eax
// continues here

0xcd7179d4:     movl    0xffffffe0(%ebp),%eax //0xcd427000
0xcd7179d7:     subl    $-0x80,%eax //0xcd427080
0xcd7179da:     movl    %edi,%esi //0xc2a90938
0xcd7179dc:     movl    %eax,%edx //0xcd427080
0xcd7179de:     cld
0xcd7179df:     lodsl   (%esi)
0xcd7179e0:     movl    %eax,0(%edx)
// The "loop" will decrement "%ecx" and not "%cx" ???

0xcd7179e2:     loop    0xcd7179df
0xcd7179e4:     addl    $0x1c,%esp
0xcd7179e7:     popl    %ebx
0xcd7179e8:     popl    %esi
0xcd7179e9:     popl    %edi
0xcd7179ea:     leave
0xcd7179eb:     ret

550 #define bus_space_write_multi_4(t, h, o, ptr, cnt)                      \
551 do {                                                                    \
552         __BUS_SPACE_ADDRESS_SANITY((ptr), u_int32_t, "buffer");         \
553         __BUS_SPACE_ADDRESS_SANITY((h) + (o), u_int32_t, "bus addr");   \
554         if ((t) == X86_BUS_SPACE_IO) {                                  \
555                 outsl((h) + (o), (ptr), (cnt));                         \
556         } else {                                                        \
557                 void *dummy1;                                           \
558                 int dummy2;                                             \
559                 void *dummy3;                                           \
560                 int __x;                                                \
561                 __asm volatile("                                        \
562                         cld                                     ;       \
563                 1:      lodsl                                   ;       \
564                         movl %%eax,(%2)                         ;       \
565                         loop 1b"                                :       \
566                     "=S" (dummy1), "=c" (dummy2), "=r" (dummy3), "=&a" 
(__x) : \
567                     "" ((ptr)), "1" ((cnt)), "2" ((h) + (o)));         \
568         }                                                               \
569 } while (/* CONSTCOND */ 0)

db> show registers
ds          0x10
es          0x10
fs          0x30
gs          0x10
edi         0xc2a90938
esi         0xc2a9a000
ebp         0xcd3f2a5c
ebx         0
edx         0xcd427080
ecx         0xc311da50
eax         0x6d666900
eip         0xcd7179df
cs          0x8
eflags      0x10283
esp         0xcd3f2a34
ss          0x10
0xcd7179df:     lodsl   (%esi)

On FreeBSD it works and they use the following bus_space implementation:

static __inline void
bus_space_write_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh,
                        bus_size_t offset, const u_int32_t *addr, size_t 

        if (tag == I386_BUS_SPACE_IO)
                outsl(bsh + offset, addr, count);
        else {
                __asm __volatile("                              \n\
                        cld                                     \n\
                1:      lodsl                                   \n\
                        movl %%eax,(%2)                         \n\
                        loop 1b"                                :
                    "=S" (addr), "=c" (count)                   :
                    "r" (bsh + offset), "0" (addr), "1" (count) :
                    "%eax", "memory", "cc");
# ifndef lint
#  error "no assembler code for your compiler"
# endif


