Skip to content
This repository has been archived by the owner on Dec 1, 2021. It is now read-only.

go BLAS #17

Open
zmonoid opened this issue Oct 24, 2018 · 2 comments
Open

go BLAS #17

zmonoid opened this issue Oct 24, 2018 · 2 comments

Comments

@zmonoid
Copy link

zmonoid commented Oct 24, 2018

Hi,

I am trying to convert openblas to go with this project.

A simple C code:

// degmm.c
#include <cblas.h>

void degmm()
{
  int i=0;
  double A[6] = {1.0,2.0,1.0,-3.0,4.0,-1.0};
  double B[6] = {1.0,2.0,1.0,-3.0,4.0,-1.0};
  double C[9] = {.5,.5,.5,.5,.5,.5,.5,.5,.5};
  cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans,3,3,2,1,A, 3, B, 3,2,C,3);
}

int main () {
    degmm();
}

Use the following command:

clang-6.0 -O3 -mavx -mfma -masm=intel -fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti -S degmm.c

Will give the following asm code degmm.s:

	.text
	.intel_syntax noprefix
	.file	"degmm.c"
	.section	.rodata.cst32,"aM",@progbits,32
	.p2align	5               # -- Begin function degmm
.LCPI0_0:
	.quad	4607182418800017408     # double 1
	.quad	4611686018427387904     # double 2
	.quad	4607182418800017408     # double 1
	.quad	-4609434218613702656    # double -3
	.section	.rodata.cst8,"aM",@progbits,8
	.p2align	3
.LCPI0_1:
	.quad	4607182418800017408     # double 1
.LCPI0_2:
	.quad	4611686018427387904     # double 2
	.text
	.globl	degmm
	.p2align	4, 0x90
	.type	degmm,@function
degmm:                                  # @degmm
# %bb.0:
	sub	rsp, 168
	vmovaps	ymm0, ymmword ptr [rip + .LCPI0_0] # ymm0 = [1.000000e+00,2.000000e+00,1.000000e+00,-3.000000e+00]
	vmovups	ymmword ptr [rsp + 48], ymm0
	movabs	rax, 4616189618054758400
	mov	qword ptr [rsp + 80], rax
	movabs	rcx, -4616189618054758400
	mov	qword ptr [rsp + 88], rcx
	vmovups	ymmword ptr [rsp], ymm0
	mov	qword ptr [rsp + 32], rax
	mov	qword ptr [rsp + 40], rcx
	mov	rax, qword ptr [rip + .Ldegmm.C+64]
	mov	qword ptr [rsp + 160], rax
	vmovups	ymm0, ymmword ptr [rip + .Ldegmm.C+32]
	vmovups	ymmword ptr [rsp + 128], ymm0
	vmovups	ymm0, ymmword ptr [rip + .Ldegmm.C]
	vmovups	ymmword ptr [rsp + 96], ymm0
	lea	rax, [rsp + 96]
	mov	r10, rsp
	lea	r11, [rsp + 48]
	vmovsd	xmm0, qword ptr [rip + .LCPI0_1] # xmm0 = mem[0],zero
	vmovsd	xmm1, qword ptr [rip + .LCPI0_2] # xmm1 = mem[0],zero
	mov	edi, 102
	mov	esi, 111
	mov	edx, 112
	mov	ecx, 3
	mov	r8d, 3
	mov	r9d, 2
	push	3
	push	rax
	push	3
	push	r10
	push	3
	push	r11
	vzeroupper
	call	cblas_dgemm
	add	rsp, 216
	ret
.Lfunc_end0:
	.size	degmm, .Lfunc_end0-degmm
                                        # -- End function
	.section	.rodata.cst32,"aM",@progbits,32
	.p2align	5               # -- Begin function main
.LCPI1_0:
	.quad	4607182418800017408     # double 1
	.quad	4611686018427387904     # double 2
	.quad	4607182418800017408     # double 1
	.quad	-4609434218613702656    # double -3
	.section	.rodata.cst8,"aM",@progbits,8
	.p2align	3
.LCPI1_1:
	.quad	4607182418800017408     # double 1
.LCPI1_2:
	.quad	4611686018427387904     # double 2
	.text
	.globl	main
	.p2align	4, 0x90
	.type	main,@function
main:                                   # @main
# %bb.0:
	sub	rsp, 168
	vmovaps	ymm0, ymmword ptr [rip + .LCPI1_0] # ymm0 = [1.000000e+00,2.000000e+00,1.000000e+00,-3.000000e+00]
	vmovups	ymmword ptr [rsp + 48], ymm0
	movabs	rax, 4616189618054758400
	mov	qword ptr [rsp + 80], rax
	movabs	rcx, -4616189618054758400
	mov	qword ptr [rsp + 88], rcx
	vmovups	ymmword ptr [rsp], ymm0
	mov	qword ptr [rsp + 32], rax
	mov	qword ptr [rsp + 40], rcx
	mov	rax, qword ptr [rip + .Ldegmm.C+64]
	mov	qword ptr [rsp + 160], rax
	vmovups	ymm0, ymmword ptr [rip + .Ldegmm.C+32]
	vmovups	ymmword ptr [rsp + 128], ymm0
	vmovups	ymm0, ymmword ptr [rip + .Ldegmm.C]
	vmovups	ymmword ptr [rsp + 96], ymm0
	lea	rax, [rsp + 96]
	mov	r10, rsp
	lea	r11, [rsp + 48]
	vmovsd	xmm0, qword ptr [rip + .LCPI1_1] # xmm0 = mem[0],zero
	vmovsd	xmm1, qword ptr [rip + .LCPI1_2] # xmm1 = mem[0],zero
	mov	edi, 102
	mov	esi, 111
	mov	edx, 112
	mov	ecx, 3
	mov	r8d, 3
	mov	r9d, 2
	push	3
	push	rax
	push	3
	push	r10
	push	3
	push	r11
	vzeroupper
	call	cblas_dgemm
	add	rsp, 48
	xor	eax, eax
	add	rsp, 168
	ret
.Lfunc_end1:
	.size	main, .Lfunc_end1-main
                                        # -- End function
	.type	.Ldegmm.C,@object       # @degmm.C
	.section	.rodata,"a",@progbits
	.p2align	4
.Ldegmm.C:
	.quad	4602678819172646912     # double 0.5
	.quad	4602678819172646912     # double 0.5
	.quad	4602678819172646912     # double 0.5
	.quad	4602678819172646912     # double 0.5
	.quad	4602678819172646912     # double 0.5
	.quad	4602678819172646912     # double 0.5
	.quad	4602678819172646912     # double 0.5
	.quad	4602678819172646912     # double 0.5
	.quad	4602678819172646912     # double 0.5
	.size	.Ldegmm.C, 72


	.ident	"clang version 6.0.0-1ubuntu2 (tags/RELEASE_600/final)"
	.section	".note.GNU-stack","",@progbits

Then converting to go asm:

c2goasm -a degmm.s Degmm.s

Report the following error:

Processing cpp/degmm.s
panic: 'sub rsp' found but in unexpected scenario:      sub     rsp, 168

goroutine 1 [running]:
main.(*Epilogue).isPrologueInstruction(0xc0000575b8, 0xc0000171a0, 0xd, 0xd)
        /home/bzhou/go/src/github.com/minio/c2goasm/epilogue.go:205 +0x3d4
main.getPrologueLines(0xc0000d0150, 0x26, 0xeb, 0xc0000575b8, 0x0)
        /home/bzhou/go/src/github.com/minio/c2goasm/subroutine.go:231 +0xb4
main.(*Subroutine).removePrologueLines(0xc000057590, 0xc0000d0000, 0x8d, 0x100, 0x15, 0x3b)
        /home/bzhou/go/src/github.com/minio/c2goasm/subroutine.go:134 +0x87
main.extractSubroutine(0x3a, 0xc0000d0000, 0x8d, 0x100, 0x11, 0xc000017188, 0x5, 0x14, 0x0, 0x0, ...)
        /home/bzhou/go/src/github.com/minio/c2goasm/subroutine.go:127 +0x2cd
main.segmentSource(0xc0000d0000, 0x8d, 0x100, 0x20, 0xf, 0xc)
        /home/bzhou/go/src/github.com/minio/c2goasm/subroutine.go:85 +0x20e
main.process(0xc0000d0000, 0x8d, 0x100, 0xc000017058, 0x8, 0x0, 0x0, 0x0, 0x0, 0x0)
        /home/bzhou/go/src/github.com/minio/c2goasm/c2goasm.go:78 +0x80
main.main()
        /home/bzhou/go/src/github.com/minio/c2goasm/c2goasm.go:264 +0x37e

Any suggestion?

@fwessels
Copy link
Contributor

Maybe experiment with -mno-red-zone and/or -mstackrealign options?

@zmonoid
Copy link
Author

zmonoid commented Oct 25, 2018

@fwessels Still the same error. I guess it may be the compiler problem. The ASM code I generated is different from the one given in example. May I check your compiler version?

Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants