Code Extension in GCC Inline Assembler


Inline Assembler - It’s something kind of magic. It allows to take advantage of both of assembly and code optimization by GCC.

For example, imagine system calls on ARM.

Inline Assembly:

static inline int open(const char *path, int flags)
{
	register const char *r0 __asm__("r0");
	register int r1 __asm__("r1");
	register int res __asm__("r0");

	r0 = path;
	r1 = flags;
	__asm__ ("svc #0" : "=r"(res) : "r"(r0), "r"(r1));

	return res;
}

Assembly:

	.global	open
	.type	open, %function
open:
	svc	#0
	bx	lr
	.size	open, . - open

Can you see the difference? In the inline assembly code, it has inline qualifier in the declaration of the function. However, the assembly code can’t be inlined. The assembly code will be called as a function, so caller should follow ABI and save registers before calling the function and restore registers after that. It can be large overheads in system calls which is used so often.

Now you should understand why inline assembly is useful. However, you may also know the danger in inline assembly. Inline assembly can have unpredicted behaviors.

Here is MIPS code.

   static short C4WFXVal;
   static short C4WFYVal;
   static short C4WFZVal;
   static short C4WFX2Val;
   static short C4WFY2Val;
   static short C4WFDist;
   static short C4WFScale;

    __asm__ volatile ("mtv %0, S000\n" :: "r"(C4WFX2Val));
    __asm__ volatile ("mtv %0, S001\n" :: "r"(C4WFY2Val));
    __asm__ volatile ("mtv %0, S002\n" :: "r"(C4WFDist));
    __asm__ volatile ("mtv %0, S003\n" :: "r"(C4WFScale));
    __asm__ volatile ("vi2f.q C000, C000, 0\n"
                      "vfim.s S010, -0.03125\n"
                      "vscl.t C000, C000, S010\n"
                      "vcos.t C010, C000\n"
                      "vsin.t C000, C000\n");

    __asm__ volatile ("mtv    %0, S020\n" :: "r"(C4WFXVal));
    __asm__ volatile ("mtv    %0, S021\n" :: "r"(C4WFYVal));
    __asm__ volatile ("mtv    %0, S022\n" :: "r"(C4WFZVal));
    __asm__ volatile ("vi2f.t C021, C020, 0\n");

Usually short values are loaded with lh instruction, which extends code. However, it’s not always true in inline assembly. I got the following code.

	lui	$v0, %hi(C4WFX2Val)
	lhu	$v0, $v0(%lo(C4WFX2Val))
	mtv	$v0, S00
	lui	$v0, %hi(C4WFY2Val)
	lhu	$v0, $v0(%lo(C4WFY2Val))
	mtv	$v0, S00
	# ... continues

See? This code doesn’t extend the codes! Then, how about the following code?

   static short C4WFXVal;
   static short C4WFYVal;
   static short C4WFZVal;
   static short C4WFX2Val;
   static short C4WFY2Val;
   static short C4WFDist;
   static short C4WFScale;

    __asm__ volatile ("" :: "r"(C4WFX2Val));
	lui	$v0, %hi(C4WFX2Val)
	lh	$v0, $v0(%lo(C4WFX2Val))

LOL. Let’s have another experiment.

   static short C4WFXVal;
   static short C4WFYVal;
   static short C4WFZVal;
   static short C4WFX2Val;
   static short C4WFY2Val;
   static short C4WFDist;
   static short C4WFScale;

    __asm__ volatile ("" :: "r"(C4WFX2Val));
    __asm__ volatile ("mtv %0, S000\n" :: "r"(C4WFX2Val));
    __asm__ volatile ("mtv %0, S001\n" :: "r"(C4WFY2Val));
	lui	$v0, %hi(C4WFY2Val)
	lh	$v0, $v0(%lo(C4WFY2Val))
	lui	$v1, %hi(C4WFX2Val)
	lhu	$v1, $v1(%lo(C4WFX2Val))
	mtv	$v1, S000
	mtv	$v0, S000

As you can see, it’s undefined whether values get extended code. The problem is solved with the following code.

   static short C4WFXVal;
   static short C4WFYVal;
   static short C4WFZVal;
   static short C4WFX2Val;
   static short C4WFY2Val;
   static short C4WFDist;
   static short C4WFScale;

    __asm__ volatile ("mtv %0, S000\n" :: "r"((int)C4WFX2Val));
    __asm__ volatile ("mtv %0, S001\n" :: "r"((int)C4WFY2Val));
    __asm__ volatile ("mtv %0, S002\n" :: "r"((int)C4WFDist));
    __asm__ volatile ("mtv %0, S003\n" :: "r"((int)C4WFScale));
    __asm__ volatile ("vi2f.q C000, C000, 0\n"
                      "vfim.s S010, -0.03125\n"
                      "vscl.t C000, C000, S010\n"
                      "vcos.t C010, C000\n"
                      "vsin.t C000, C000\n");

    __asm__ volatile ("mtv    %0, S020\n" :: "r"((int)C4WFXVal));
    __asm__ volatile ("mtv    %0, S021\n" :: "r"((int)C4WFYVal));
    __asm__ volatile ("mtv    %0, S022\n" :: "r"((int)C4WFZVal));
    __asm__ volatile ("vi2f.t C021, C020, 0\n");

Be careful!

[top]