Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/lib/libnvmm Micro optimizations:



details:   https://anonhg.NetBSD.org/src/rev/3c7b47af9866
branches:  trunk
changeset: 449451:3c7b47af9866
user:      maxv <maxv%NetBSD.org@localhost>
date:      Thu Mar 07 15:47:34 2019 +0000

description:
Micro optimizations:

 - Compress x86_rexpref, x86_regmodrm, x86_opcode and x86_instr.
 - Cache-align the register, opcode and group tables.
 - Modify the opcode tables to have 256 entries, and avoid a lookup.

diffstat:

 lib/libnvmm/libnvmm_x86.c |  314 ++++++++++++++++++---------------------------
 1 files changed, 127 insertions(+), 187 deletions(-)

diffs (truncated from 786 to 300 lines):

diff -r b6eef6d64a5b -r 3c7b47af9866 lib/libnvmm/libnvmm_x86.c
--- a/lib/libnvmm/libnvmm_x86.c Thu Mar 07 15:22:21 2019 +0000
+++ b/lib/libnvmm/libnvmm_x86.c Thu Mar 07 15:47:34 2019 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: libnvmm_x86.c,v 1.26 2019/02/26 12:23:12 maxv Exp $    */
+/*     $NetBSD: libnvmm_x86.c,v 1.27 2019/03/07 15:47:34 maxv Exp $    */
 
 /*
  * Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -46,6 +46,7 @@
 #include "nvmm.h"
 
 #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
+#define __cacheline_aligned __attribute__((__aligned__(64)))
 
 #include <x86/specialreg.h>
 
@@ -904,15 +905,15 @@
        bool adr_ovr:1;
        bool rep:1;
        bool repn:1;
-       int seg;
+       int8_t seg;
 };
 
 struct x86_rexpref {
-       bool present;
-       bool w;
-       bool r;
-       bool x;
-       bool b;
+       bool b:1;
+       bool x:1;
+       bool r:1;
+       bool w:1;
+       bool present:1;
 };
 
 struct x86_reg {
@@ -962,10 +963,9 @@
 };
 
 struct x86_regmodrm {
-       bool present;
-       enum REGMODRM__Mod mod;
-       enum REGMODRM__Reg reg;
-       enum REGMODRM__Rm rm;
+       uint8_t mod:2;
+       uint8_t reg:3;
+       uint8_t rm:3;
 };
 
 struct x86_immediate {
@@ -999,22 +999,20 @@
 };
 
 struct x86_instr {
-       size_t len;
+       uint8_t len;
        struct x86_legpref legpref;
        struct x86_rexpref rexpref;
-       size_t operand_size;
-       size_t address_size;
+       struct x86_regmodrm regmodrm;
+       uint8_t operand_size;
+       uint8_t address_size;
        uint64_t zeroextend_mask;
 
-       struct x86_regmodrm regmodrm;
-
        const struct x86_opcode *opcode;
+       const struct x86_emul *emul;
 
        struct x86_store src;
        struct x86_store dst;
        struct x86_store *strm;
-
-       const struct x86_emul *emul;
 };
 
 struct x86_decode_fsm {
@@ -1030,22 +1028,21 @@
 };
 
 struct x86_opcode {
-       uint8_t byte;
-       bool regmodrm;
-       bool regtorm;
-       bool dmo;
-       bool todmo;
-       bool movs;
-       bool stos;
-       bool lods;
-       bool szoverride;
-       int defsize;
-       int allsize;
-       bool group1;
-       bool group3;
-       bool group11;
-       bool immediate;
-       int flags;
+       bool valid:1;
+       bool regmodrm:1;
+       bool regtorm:1;
+       bool dmo:1;
+       bool todmo:1;
+       bool movs:1;
+       bool stos:1;
+       bool lods:1;
+       bool szoverride:1;
+       bool group1:1;
+       bool group3:1;
+       bool group11:1;
+       bool immediate:1;
+       uint8_t defsize;
+       uint8_t flags;
        const struct x86_emul *emul;
 };
 
@@ -1062,59 +1059,56 @@
 #define FLAG_immz      0x02
 #define FLAG_ze                0x04
 
-static const struct x86_group_entry group1[8] = {
+static const struct x86_group_entry group1[8] __cacheline_aligned = {
        [1] = { .emul = &x86_emul_or },
        [4] = { .emul = &x86_emul_and },
        [6] = { .emul = &x86_emul_xor },
        [7] = { .emul = &x86_emul_cmp }
 };
 
-static const struct x86_group_entry group3[8] = {
+static const struct x86_group_entry group3[8] __cacheline_aligned = {
        [0] = { .emul = &x86_emul_test },
        [1] = { .emul = &x86_emul_test }
 };
 
-static const struct x86_group_entry group11[8] = {
+static const struct x86_group_entry group11[8] __cacheline_aligned = {
        [0] = { .emul = &x86_emul_mov }
 };
 
-static const struct x86_opcode primary_opcode_table[] = {
+static const struct x86_opcode primary_opcode_table[256] __cacheline_aligned = {
        /*
         * Group1
         */
-       {
+       [0x80] = {
                /* Eb, Ib */
-               .byte = 0x80,
+               .valid = true,
                .regmodrm = true,
                .regtorm = true,
                .szoverride = false,
                .defsize = OPSIZE_BYTE,
-               .allsize = -1,
                .group1 = true,
                .immediate = true,
                .emul = NULL /* group1 */
        },
-       {
+       [0x81] = {
                /* Ev, Iz */
-               .byte = 0x81,
+               .valid = true,
                .regmodrm = true,
                .regtorm = true,
                .szoverride = true,
                .defsize = -1,
-               .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
                .group1 = true,
                .immediate = true,
                .flags = FLAG_immz,
                .emul = NULL /* group1 */
        },
-       {
+       [0x83] = {
                /* Ev, Ib */
-               .byte = 0x83,
+               .valid = true,
                .regmodrm = true,
                .regtorm = true,
                .szoverride = true,
                .defsize = -1,
-               .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
                .group1 = true,
                .immediate = true,
                .flags = FLAG_imm8,
@@ -1124,26 +1118,24 @@
        /*
         * Group3
         */
-       {
+       [0xF6] = {
                /* Eb, Ib */
-               .byte = 0xF6,
+               .valid = true,
                .regmodrm = true,
                .regtorm = true,
                .szoverride = false,
                .defsize = OPSIZE_BYTE,
-               .allsize = -1,
                .group3 = true,
                .immediate = true,
                .emul = NULL /* group3 */
        },
-       {
+       [0xF7] = {
                /* Ev, Iz */
-               .byte = 0xF7,
+               .valid = true,
                .regmodrm = true,
                .regtorm = true,
                .szoverride = true,
                .defsize = -1,
-               .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
                .group3 = true,
                .immediate = true,
                .flags = FLAG_immz,
@@ -1153,26 +1145,24 @@
        /*
         * Group11
         */
-       {
+       [0xC6] = {
                /* Eb, Ib */
-               .byte = 0xC6,
+               .valid = true,
                .regmodrm = true,
                .regtorm = true,
                .szoverride = false,
                .defsize = OPSIZE_BYTE,
-               .allsize = -1,
                .group11 = true,
                .immediate = true,
                .emul = NULL /* group11 */
        },
-       {
+       [0xC7] = {
                /* Ev, Iz */
-               .byte = 0xC7,
+               .valid = true,
                .regmodrm = true,
                .regtorm = true,
                .szoverride = true,
                .defsize = -1,
-               .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
                .group11 = true,
                .immediate = true,
                .flags = FLAG_immz,
@@ -1182,353 +1172,321 @@
        /*
         * OR
         */
-       {
+       [0x08] = {
                /* Eb, Gb */
-               .byte = 0x08,
+               .valid = true,
                .regmodrm = true,
                .regtorm = true,
                .szoverride = false,
                .defsize = OPSIZE_BYTE,
-               .allsize = -1,
                .emul = &x86_emul_or
        },
-       {
+       [0x09] = {
                /* Ev, Gv */
-               .byte = 0x09,
+               .valid = true,
                .regmodrm = true,
                .regtorm = true,
                .szoverride = true,
                .defsize = -1,
-               .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
                .emul = &x86_emul_or
        },
-       {
+       [0x0A] = {
                /* Gb, Eb */
-               .byte = 0x0A,
+               .valid = true,
                .regmodrm = true,
                .regtorm = false,
                .szoverride = false,
                .defsize = OPSIZE_BYTE,
-               .allsize = -1,
                .emul = &x86_emul_or
        },
-       {
+       [0x0B] = {
                /* Gv, Ev */
-               .byte = 0x0B,
+               .valid = true,
                .regmodrm = true,
                .regtorm = false,
                .szoverride = true,
                .defsize = -1,



Home | Main Index | Thread Index | Old Index