diff --git a/amogus.h b/amogus.h index 0983a12..17c7319 100644 --- a/amogus.h +++ b/amogus.h @@ -1,14 +1,3 @@ -// int entry(...) -// -// Parse regex. Find matching string. -// If found: -// call callback( -// matched string (only valid until callback return), -// length of the string including NUL delimiter, -// ) -// return 0 -// Else: -// return ERR typedef void(entry_callback)(char *match, int size, void *data); int entry(char *regex, entry_callback *callback, void *data); diff --git a/build b/build index 5193d3e..3c9e2c8 100755 --- a/build +++ b/build @@ -1,7 +1,7 @@ #!/bin/sh mkdir -p target ./charjmpt.lua > target/charjmpt.s -gcc -c meow.s -o target/amogus.o || exit +gcc -O2 -Os -c meow.s -o target/amogus.o || exit if command -v ~/stuff/zig/zig >/dev/null; then ~/stuff/zig/zig cc -shared -nostdlib target/amogus.o -o target/amogus || exit elif command -v ld.lld >/dev/null; then diff --git a/ctest b/ctest index 8f890a8..ba4e6ab 100755 --- a/ctest +++ b/ctest @@ -1,5 +1,5 @@ -#!/bin/sh -./build || exit +#!/bin/bash +{ ./build || exit; } 2>&1>/dev/null export C_INCLUDE_PATH="$PWD:$C_INCLUDE_PATH" export LD_LIBRARY_PATH="$PWD/target:$LD_LIBRARY_PATH" export LIBRARY_PATH="$PWD/target:$LIBRARY_PATH" @@ -8,9 +8,9 @@ if [ -z "$NODEBUG" ]; then gdb -q \ -iex 'set confirm no' \ - -ex 'layout asm' \ -ex 'start' \ target/cmogus + #-ex 'layout asm' \ else target/cmogus fi diff --git a/meow.s b/meow.s index f0aea36..c9e0f9a 100644 --- a/meow.s +++ b/meow.s @@ -51,22 +51,29 @@ strlen: // rdi - buf // rsi - len write: + push %rdx push %rdi push %rsi pop %rdx pop %rsi mov $1,%rax mov $1,%rdi + push %rcx + push %r11 syscall + pop %r11 + pop %rcx + pop %rdx ret help0: .ascii "Usage: " .set help0l, .-help0 + .ascii "./amogus" help1: - .ascii " regex0 [regex1 ...]\n" - .ascii "writes NUL-separated list of matching strings, one per regex to stdout\n" - .ascii "uses stack memory, consider disabling the limit if it segfaults\n" + .ascii "\nRead from standard input, write to standard output." + .ascii "\nFind a string matched by a regular expression.\n" + .ascii "\nAsssumes unlimited call stack space.\n" .set help1l, .-help1 // rdi - exit code @@ -75,7 +82,26 @@ _exit: syscall ret -.include "target/charjmpt.s" +.macro print str="" + jmp printps\@ + print\@: +.altmacro + .ascii "\str" +.noaltmacro + .set print\@l , . - print\@ + printps\@: + push %rdi + push %rsi + push %rdx + push %rax + lea "print\@" (%rip),%rdi + mov $"print\@l" , %rsi + call write + pop %rax + pop %rdx + pop %rsi + pop %rdi +.endm .macro eaptrdiff32 ptr=%rax, out=%rax, off=0, tmp=%r11 eaptrdiff32.\@: @@ -88,12 +114,17 @@ _exit: push \tmp mov \ptr,\tmp movslq (\tmp),\tmp + test \tmp,\tmp + jnz eaptrdiffnz.\@ + xor \ptr,\ptr + eaptrdiffnz.\@: add \tmp,\ptr pop \tmp .if \ptr != \out mov \ptr,\out pop \ptr .endif + eaptrdiff32o.\@: .endm .macro mkptrdiff32 to=%r11, to_low=%r11d, ptr=%rax, off=0 @@ -111,96 +142,6 @@ _exit: pop \to .endm -// rdi - &state -// rsi - char -// rcx, r8, r9, r10, r11 - scratch -// r12 - real stack (rsp - fake stack) -// -> rax - status -//parse_...: - -append_tnode_recurse: - - -// rdi - &group_alt -// rsi - type -// -> rax - text_node ptr -push_tnode: - movslq (%rdi),%rcx - sub $(5*4),%r12 - and $-8,%r12 - movl %esi,0(%r12) - movl $0,4(%r12) - mov %r12,%rax - mkptrdiff32 %rax,%eax,%rdi - test %rcx,%rcx - jz ptn_ptout - add %rdi,%rcx - mkptrdiff32 %rax,%eax,%rcx,4 - ptn_ptout: - movslq 4(%rdi),%rcx - test %rcx,%rcx - jnz ptn_jout - mkptrdiff32 %rax,%eax,%rdi,4 - ptn_jout: - ret - -parse_escape: - incq (2*8)(%rdi) - mov (2*8)(%rdi),%rsi - movzbq (%rsi),%rsi - test %sil,%sil - jz parse_exit - jmp parse_self - -parse_self: - push %rdi - push %rsi - sub $2,%rsp - movw $0,(%rsp) - movb %sil,(%rsp) - mov %rsp,%rdi - mov $1,%rsi - call write - add $2,%rsp - pop %rsi - pop %rdi - push %rsi - movq (%rdi),%rax # rax - group - eaptrdiff32 %rax # rax - group_alt - mov %rax,%rdi - movslq (%rax),%r9 - test %r9,%r9 - jz ps_alloc - eaptrdiff32 %rax # rax - text_node - movl (%rax),%r9d - cmp $3,%r9d - je ps_noalloc - ps_alloc: - mov $3,%rsi - call push_tnode - dec %r12 - mkptrdiff32 %r12,%r12d,%rax,(8+0) # text_chars.text - inc %r12 - movl $0,(8+4)(%rax) # text_chars.len - ps_noalloc: - eaptrdiff32 %rax,%r9,(8+0) - pop %rsi - movl (8+4)(%rax),%r8d - sub %r8,%r9 - movb %sil,(%r9) - incl (8+4)(%rax) - xor %rax,%rax - ret - -parse_grbegin: -parse_grend: -parse_murder: -parse_nextalt: -parse_erase: -parse_exit: - mov $1,%rax - ret - // rdi - regex // rsi - callback // rdx - cb data @@ -233,6 +174,19 @@ entry: entry_parse_begin: mov (-3*8-5*8 + 2*8)(%rbp),%rsi movzbq (%rsi),%rsi # char to sil + + print "\t[[ char=" + push %rdi + push %rsi + push %rax + mov (-3*8-5*8 + 2*8)(%rbp),%rdi + mov $1,%rsi + call write + pop %rax + pop %rsi + pop %rdi + print " ]]\n" + lea charjmpt(%rip),%r11 movzx %sil,%r10 shl $1,%r10 @@ -242,12 +196,28 @@ entry: lea charjmpt_prej(%rip),%r10 add %r10,%r11 lea (-3*8-5*8)(%rbp),%rdi # &state + epl_call: call *%r11 + cmp $1,%rax + jg entry_parse_fail test %rax,%rax jnz entry_parse_end incq (-3*8-5*8+2*8)(%rbp) jmp entry_parse_begin entry_parse_end: + xchg %r12,%rsp + #mov $0,%rax + lea (-3*8-5*8)(%rbp),%rdi # &state + mov 8(%rdi),%rsi # mother_gr + eaptrdiff32 %rsi,%rsi,4 # alts_head + xor %rdx,%rdx # len + xor %rcx,%rcx # tnode + mov %rsp,%r12 + call traverse_ast + mov $1,%rax + entry_untraverse: + entry_parse_fail: + mov %r12,%rsp mov -8(%rbp),%r12 mov -16(%rbp),%r13 mov -24(%rbp),%r14 @@ -255,6 +225,351 @@ entry: pop %rbp ret +// rdi - &state +// rsi - option<&group_alt> +// rdx - len +// rcx - option<&tnode> +traverse_ast: + print "trav {\n" + test %rsi,%rsi + jz tast_bye + push %rsi + push %rdx + push %rcx + eaptrdiff32 %rsi,%rsi,4 + call traverse_galt + pop %rcx + pop %rdx + pop %rsi + eaptrdiff32 %rsi,%rsi,(2*4) + jmp traverse_ast + tast_bye: + print "} trav\n" + ret + +// rdi - &state +// rsi - option<&tnode> +// rdx - len +// rcx - option<&tnode> +traverse_galt: + print "travgalt {\n" + test %rsi,%rsi + jnz tgand + tgfin: + test %rcx,%rcx + jz tgad + print "Stringing\n" + dec %rsp + movb $0,(%rsp) + tgalb: + movl (2*4+1*4)(%rcx),%r8d + eaptrdiff32 %rcx,%r9,(2*4) + tgscpb: + test %r8,%r8 + jz tgscpe + print "char=" + dec %rsp + mover: + movb (%r9),%r10b + movb %r10b,(%rsp) + + push %rdi + push %rsi + push %rax + mov %rsp,%rdi + mov $1,%rsi + call write + pop %rax + pop %rsi + pop %rdi + print "\n" + + dec %r9 + dec %r8 + jmp tgscpb + tgscpe: + eaptrdiff32 %rcx,%rcx,(2*4+2*4) + test %rcx,%rcx + jnz tgalb + tgad: + mov %rdi,%rcx + mov %rsp,%rdi + mov %rdx,%rsi + mov (4*8)(%rcx),%rdx + mov (3*8)(%rcx),%rcx + and $-16,%rsp + call *%rcx + xor %rax,%rax + jmp entry_untraverse + tgand: + movl (%rsi),%r9d + cmp $1,%r9 + je tgt_murder + cmp $2,%r9 + je tgt_bye + cmp $3,%r9 + je tgt_chars + cmp $4,%r9 + je tgt_group + movb $0,0 + tgt_murder: + print "(murder)\n" + ret + tgt_wiped: + print "(wiped)\n" + jmp tgt_bye + tgt_chars: + print "(chars)\n" + movl (2*4+1*4)(%rsi),%r8d + add %r8,%rdx + movl $0,(2*4+2*4)(%rsi) + test %rcx,%rcx + jz nosetptr + mkptrdiff32 %rcx,%ecx,%rsi,(2*4+2*4) + nosetptr: + mov %rsi,%rcx + jmp tgt_bye + tgt_group: + print "(group)\n" + push %rsi + eaptrdiff32 %rsi,%rsi,(2*4+1*4) + call traverse_ast + pop %rsi + tgt_bye: + print "bye\n" + eaptrdiff32 %rsi,%rsi,4 + jmp traverse_galt + +// rdi - &state +// rsi - char +// rcx, r8, r9, r10, r11 - scratch +// r12 - real stack (rsp - fake stack) +// -> rax - status +//parse_...: + +parse_escape: + incq (2*8)(%rdi) + mov (2*8)(%rdi),%rsi + movzbq (%rsi),%rsi + test %sil,%sil + jz parse_exit + jmp parse_self + +parse_self: + /* + push %rdi # DBG{ + push %rsi + mov %rsp,%rdi + mov $1,%rsi + call write + pop %rsi + pop %rdi # DBG} + //*/ + + push %rsi + movq (%rdi),%rax # rax - group + eaptrdiff32 %rax # rax - group_alt + mov %rax,%rdi + movslq (%rax),%r9 + test %r9,%r9 + jz ps_alloc + eaptrdiff32 %rax # rax - text_node + movl (%rax),%r9d + cmp $3,%r9d + je ps_noalloc + ps_alloc: + mov $3,%rsi + print "(alloc)" + call push_tnode + dec %r12 + mkptrdiff32 %r12,%r12d,%rax,(8+0) # text_chars.text + inc %r12 + movl $0,(8+4)(%rax) # text_chars.len + ps_noalloc: + eaptrdiff32 %rax,%r9,(8+0) + pop %rsi + movl (8+4)(%rax),%r8d + movsil: + dec %r12 + sub %r8,%r9 + movb %sil,(%r9) + + push %rdi + push %rsi + push %rax + mov %r12,%rdi + mov $1,%rsi + call write + pop %rax + pop %rsi + pop %rdi + + incl (8+4)(%rax) + xor %rax,%rax + ret + +.include "target/charjmpt.s" + +parse_grbegin: + push %rdi + movq (%rdi),%rax # rax - group + push %rax + eaptrdiff32 %rax,%rdi # rdi - group_alt + mov $4,%rsi + call push_tnode # rax - ng_text_node + pop %rsi # rsi - group + pop %rdi # rdi - state + lea (2*4)(%rax),%rax # rax - newgroup + mkptrdiff32 %rsi,%esi,%rax,(2*4) + mov %rax,(%rdi) + sub $(3*4),%r12 + and $-4,%r12 + mkptrdiff32 %r12,%r12d,%rax + mkptrdiff32 %r12,%r12d,%rax,4 + movl $0,(0*4)(%r12) + movl $0,(1*4)(%r12) + movl $0,(2*4)(%r12) + xor %rax,%rax + ret + +parse_grend: + movq (%rdi),%rax # rax - group + movl (2*4)(%rax),%esi # rsi - upgroup off + pgrt: + test %rsi,%rsi + jnz pgre_nodie + mov $2,%rax + ret + pgre_nodie: + eaptrdiff32 %rax,%rax,(2*4) # rax - upgroup + mov %rax,(%rdi) + xor %rax,%rax + ret + +parse_nextalt: + mov (%rdi),%rdi # group + eaptrdiff32 %rdi,%rsi,(0*4) # group_alt orig + sub $(3*4),%r12 + and $-4,%r12 + movl $0,(0*4)(%r12) + movl $0,(1*4)(%r12) + movl $0,(2*4)(%r12) + mkptrdiff32 %r12,%r12d,%rsi,(2*4) + mkptrdiff32 %r12,%r12d,%rdi + ret + +parse_murder: + incq (2*8)(%rdi) + mov (2*8)(%rdi),%rsi + movzbq (%rsi),%rsi + cmp $'],%sil + je pm_succ + mov $3,%rax + ret + pm_succ: + mov (%rdi),%rdi + eaptrdiff32 %rdi + mov $1,%rsi + call push_tnode + xor %rax,%rax + ret + +parse_erase: + mov (%rdi),%rsi # group + eaptrdiff32 %rsi # group_alt + movl (%rsi),%ecx + test %rcx,%rcx + je pers_push + eaptrdiff32 %rsi,%rcx # text_node + movl (%rcx),%r8d # type + cmp $3,%r8d + je pers_text + jmp pers_set + pers_text: + movl (2*4)(%rcx),%r8d + test %r8d,%r8d + jz pers_set + decl (2*4)(%rcx) + inc %r12 + xor %rax,%rax + ret + pers_set: + movl $2,(%rcx) + mov %rcx,%r12 + xor %rax,%rax + ret + pers_push: + mov %rsi,%rdi + mov $2,%rsi + call push_tnode + xor %rax,%rax + ret + +parse_exit: + mov $1,%rax + ret + +// rdi - &tnode to +// rsi - &tnode ptr +append_tnode_recurse: + mkptrdiff32 %rdi,%edi,%rsi,4 + movl (%rsi),%r8d + cmp $4,%r8d + jne atnr_exit + lea (2*4+1*4)(%rsi),%rsi + atnr_nn: + movl (%rsi),%r11d + test %r11d,%r11d + jz atnr_exit + eaptrdiff32 %rsi + push %rsi + movl (%rsi),%r11d + test %r11d,%r11d + jz atnr_rej + eaptrdiff32 %rsi + call append_tnode_recurse + jmp atrnr_norej + atnr_rej: + mkptrdiff32 %rdi,%edi,%rsi + mkptrdiff32 %rdi,%edi,%rsi,4 + atrnr_norej: + pop %rsi + lea (2*4)(%rsi),%rsi + jmp atnr_nn + atnr_exit: + ret + +// rdi - &group_alt +// rsi - type +// -> rax - text_node ptr +push_tnode: + movslq (%rdi),%rcx + sub $(5*4),%r12 + and $-4,%r12 + movl %esi,0(%r12) + movl $0,4(%r12) + mov %r12,%rax + mkptrdiff32 %rax,%eax,%rdi + test %rcx,%rcx + jz ptn_ptout + add %rdi,%rcx + push %rax + push %rdi + mov %rax,%rdi + mov %rcx,%rsi + xchg %r12,%rsp + call append_tnode_recurse + xchg %r12,%rsp + pop %rdi + pop %rax + ptn_ptout: + movslq 4(%rdi),%rcx + test %rcx,%rcx + jnz ptn_jout + mkptrdiff32 %rax,%eax,%rdi,4 + ptn_jout: + ret + .bss argc: .zero 8 diff --git a/structs.txt b/structs.txt index 09a8d64..5a26b12 100644 --- a/structs.txt +++ b/structs.txt @@ -26,6 +26,7 @@ struct text_node { struct text_chars { text: char[]rev ptrdiff32 len: u32 + prevtmp: text_chars ptrdiff32 } struct parse_state { @@ -36,5 +37,12 @@ struct parse_state { cbdata: void ptr64 ...? } + +enum err { + OK = 0 + NO_MATCH = 1 + BAD_GRCLOSE = 2 + BAD_MURDER = 3 +} */ diff --git a/test b/test index 7a5f01e..30d689b 100755 --- a/test +++ b/test @@ -1,2 +1,2 @@ #!/bin/sh -./build && gdb -iex 'set confirm no' -ex 'run' target/amogus +./build && gdb -iex 'set confirm no' -ex 'b _start' -ex 'start' target/amogus diff --git a/test.c b/test.c index 809dc23..9d24b4b 100644 --- a/test.c +++ b/test.c @@ -13,7 +13,11 @@ void callback(char* str, int size, void* data) { int main() { char* match; - if(!entry("me\\|\\|ow.*|m(r(r(p..*)))",callback,&match)) + int err; +// if((err=entry("me\\|\\|ow.(nya|sin|cos)XX,[]*(||)..*|m(r(r(p..*)))",callback,&match))) { + if((err=entry("meow()*ww|mrrp",callback,&match))) { + printf("error: %i\n",err); return 1; + } printf("%s",match); }