diff --git a/amogus.h b/amogus.h index 4cab2c3..0983a12 100644 --- a/amogus.h +++ b/amogus.h @@ -3,13 +3,12 @@ // Parse regex. Find matching string. // If found: // call callback( -// matched string (only valid before callback return), +// matched string (only valid until callback return), // length of the string including NUL delimiter, -// data passed in the data argument // ) // return 0 // Else: -// return 1 +// return ERR -typedef void (entry_callback)(char* match, int size, void* data); -int entry(char* regex, entry_callback *callback, void *data); +typedef void(entry_callback)(char *match, int size, void *data); +int entry(char *regex, entry_callback *callback, void *data); diff --git a/amogus.png b/amogus.png new file mode 100644 index 0000000..1890982 Binary files /dev/null and b/amogus.png differ diff --git a/build b/build index 42be51a..180aece 100755 --- a/build +++ b/build @@ -2,9 +2,17 @@ mkdir -p target ./charjmpt.lua > target/charjmpt.s gcc -c meow.s -o target/amogus.o || exit -gnu() { shift 1 && clang "$@"; } -"$(command -v ~/stuff/zig/zig || echo gnu)" \ -cc -O3 -shared -static -nostdlib -nodefaultlibs target/amogus.o -o target/amogus || exit +if command -v ~/stuff/zig/zig >/dev/null; then + ~/stuff/zig/zig cc -shared -nostdlib target/amogus.o -o target/amogus || exit +elif command -v ld.lld >/dev/null; then + ld.lld -shared -nostdlib target/amogus.o -o target/amogus || exit +else + gcc -shared -nostdlib target/amogus.o -o target/amogus || exit +fi +#gnu() { shift 1 && gcc "$@"; } +#"$(command -v ~/stuff/zig/zig || echo gnu)" \ +#cc -shared -nostdlib target/amogus.o -o target/amogus || exit +#ld.lld -shared -nostdlib target/amogus.o -o target/amogus || exit objdump -d target/amogus cp target/amogus target/amogstrip strip -K entry target/amogstrip @@ -14,4 +22,5 @@ rm -rf target/cg/ mkdir -p target/cg cp target/amogstrip target/cg/amogus cp amogus.h target/cg/ +cp amogus.png target/cg/ diff --git a/charjmpt.lua b/charjmpt.lua index c9d7d41..dc44c22 100755 --- a/charjmpt.lua +++ b/charjmpt.lua @@ -2,9 +2,15 @@ print("charjmpt:") chars = { ['\0'] = "parse_exit", + ['('] = "parse_grbegin", + [')'] = "parse_grend", + ['\\'] = "parse_escape", + ['|'] = "parse_nextalt", + ['['] = "parse_murder", + ['*'] = "parse_erase", } local def = "parse_self" for n=0,255 do local c = string.char(n) - print("\t.4byte "..(chars[c] or "parse_self").." - charjmpt_prej") + print("\t.2byte "..(chars[c] or "parse_self").." - charjmpt_prej") end diff --git a/ctest b/ctest index 5c3b0db..8f890a8 100755 --- a/ctest +++ b/ctest @@ -4,8 +4,13 @@ export C_INCLUDE_PATH="$PWD:$C_INCLUDE_PATH" export LD_LIBRARY_PATH="$PWD/target:$LD_LIBRARY_PATH" export LIBRARY_PATH="$PWD/target:$LIBRARY_PATH" gcc -O2 test.c -o target/cmogus -L"$LIBRARY_PATH" -l:amogus || exit -gdb -q \ - -iex 'set confirm no' \ - -ex 'layout asm' \ - -ex 'start' \ - target/cmogus +if [ -z "$NODEBUG" ]; +then + gdb -q \ + -iex 'set confirm no' \ + -ex 'layout asm' \ + -ex 'start' \ + target/cmogus +else + target/cmogus +fi diff --git a/meow.s b/meow.s index dd01cfc..f0aea36 100644 --- a/meow.s +++ b/meow.s @@ -2,6 +2,7 @@ .type _start, @function .globl entry .type entry, @function + _start: pop %rax mov %rax,argc(%rip) @@ -36,18 +37,19 @@ usage: ret -# rdi - buf -# -> rax - len +// rdi - buf +// -> rax - len strlen: mov %rdi,%rsi xor %rax,%rax repne scasb sub %rsi,%rdi mov %rdi,%rax + dec %rax ret -# rdi - buf -# rsi - len +// rdi - buf +// rsi - len write: push %rdi push %rsi @@ -62,10 +64,12 @@ help0: .ascii "Usage: " .set help0l, .-help0 help1: - .ascii " regex0 [regex1 ...]\nstdout: NUL-separated list of matching strings\n" + .ascii " regex0 [regex1 ...]\n" + .ascii "writes NUL-separated list of matching strings, one per regex to stdout\n" + .ascii "uses stack memory, consider disabling the limit if it segfaults\n" .set help1l, .-help1 -# rdi - exit code +// rdi - exit code _exit: mov $0x3c,%rax syscall @@ -73,55 +77,180 @@ _exit: .include "target/charjmpt.s" -# rdi - where -# rsi - char -#parse_...: +.macro eaptrdiff32 ptr=%rax, out=%rax, off=0, tmp=%r11 + eaptrdiff32.\@: + .if \ptr != \out + push \ptr + .endif + .if \off + add $\off,\ptr + .endif + push \tmp + mov \ptr,\tmp + movslq (\tmp),\tmp + add \tmp,\ptr + pop \tmp + .if \ptr != \out + mov \ptr,\out + pop \ptr + .endif +.endm + +.macro mkptrdiff32 to=%r11, to_low=%r11d, ptr=%rax, off=0 + mkptrdiff32.\@: + push \to + .if \off + push \ptr + add $\off,\ptr + .endif + sub \ptr,\to + movl \to_low,(\ptr) + .if \off + pop \ptr + .endif + pop \to +.endm + +// rdi - &state +// rsi - char +// rcx, r8, r9, r10, r11 - scratch +// r12 - real stack (rsp - fake stack) +// -> rax - status +//parse_...: + +append_tnode_recurse: + + +// rdi - &group_alt +// rsi - type +// -> rax - text_node ptr +push_tnode: + movslq (%rdi),%rcx + sub $(5*4),%r12 + and $-8,%r12 + movl %esi,0(%r12) + movl $0,4(%r12) + mov %r12,%rax + mkptrdiff32 %rax,%eax,%rdi + test %rcx,%rcx + jz ptn_ptout + add %rdi,%rcx + mkptrdiff32 %rax,%eax,%rcx,4 + ptn_ptout: + movslq 4(%rdi),%rcx + test %rcx,%rcx + jnz ptn_jout + mkptrdiff32 %rax,%eax,%rdi,4 + ptn_jout: + ret + +parse_escape: + incq (2*8)(%rdi) + mov (2*8)(%rdi),%rsi + movzbq (%rsi),%rsi + test %sil,%sil + jz parse_exit + jmp parse_self parse_self: + push %rdi + push %rsi + sub $2,%rsp + movw $0,(%rsp) + movb %sil,(%rsp) + mov %rsp,%rdi + mov $1,%rsi + call write + add $2,%rsp + pop %rsi + pop %rdi + push %rsi + movq (%rdi),%rax # rax - group + eaptrdiff32 %rax # rax - group_alt + mov %rax,%rdi + movslq (%rax),%r9 + test %r9,%r9 + jz ps_alloc + eaptrdiff32 %rax # rax - text_node + movl (%rax),%r9d + cmp $3,%r9d + je ps_noalloc + ps_alloc: + mov $3,%rsi + call push_tnode + dec %r12 + mkptrdiff32 %r12,%r12d,%rax,(8+0) # text_chars.text + inc %r12 + movl $0,(8+4)(%rax) # text_chars.len + ps_noalloc: + eaptrdiff32 %rax,%r9,(8+0) + pop %rsi + movl (8+4)(%rax),%r8d + sub %r8,%r9 + movb %sil,(%r9) + incl (8+4)(%rax) xor %rax,%rax ret +parse_grbegin: +parse_grend: +parse_murder: +parse_nextalt: +parse_erase: parse_exit: mov $1,%rax ret - -# rdi - regex -# rsi - callback -# rdx - cb data +// rdi - regex +// rsi - callback +// rdx - cb data entry: push %rbp mov %rsp,%rbp - sub $48,%rsp - mov %rsi,-8(%rbp) - mov %rdx,-16(%rbp) - mov %rdi,-24(%rbp) + // svregs state group group_alt + // (r12-r14) + sub $(3*8 + 5*8 + 3*4 + 3*4),%rsp + mov %r12,-8(%rbp) + mov %r13,-16(%rbp) + mov %r14,-24(%rbp) + mov %rsp,%r12 + mov %rdi,(-3*8-5*8 + 2*8)(%rbp) # state.regchar + mov %rsi,(-3*8-5*8 + 3*8)(%rbp) # state.callback + mov %rdx,(-3*8-5*8 + 4*8)(%rbp) # state.cbdata + lea (-3*8-5*8-3*4)(%rbp),%r9 + mov %r9,(-3*8-5*8 + 0*8)(%rbp) # state.mother_gr + mov %r9,(-3*8-5*8 + 1*8)(%rbp) # state.current_gr + lea (-3*8-5*8-3*4)(%rbp),%r9 + lea (-3*8-5*8-3*4-3*4)(%rbp),%r8 + mkptrdiff32 %r8,%r8d,%r9 # group.alts_tail + mkptrdiff32 %r8,%r8d,%r9,4 # group.alts_head + movl $0,(-3*8-5*8-3*4 + 2*4)(%rbp) # group.up + movl $0,(-3*8-5*8-3*4-3*4 + 0*4)(%rbp) # group_alt.text_tail + movl $0,(-3*8-5*8-3*4-3*4 + 1*4)(%rbp) # group_alt.text_head + movl $0,(-3*8-5*8-3*4-3*4 + 2*4)(%rbp) # group_alt.next + xchg %r12,%rsp + sub $512,%r12 entry_parse_begin: - movzbl (%rdi),%ecx + mov (-3*8-5*8 + 2*8)(%rbp),%rsi + movzbq (%rsi),%rsi # char to sil lea charjmpt(%rip),%r11 - movzx %cl,%r10 - shl $2,%r10 + movzx %sil,%r10 + shl $1,%r10 add %r10,%r11 - movsxd (%r11),%r11 + movswq (%r11),%r11 charjmpt_prej: lea charjmpt_prej(%rip),%r10 add %r10,%r11 - mov %rdi,-32(%rbp) + lea (-3*8-5*8)(%rbp),%rdi # &state call *%r11 test %rax,%rax jnz entry_parse_end - mov -32(%rbp),%rdi - inc %rdi + incq (-3*8-5*8+2*8)(%rbp) jmp entry_parse_begin entry_parse_end: - mov %rdi,%rsi - mov -24(%rbp),%rdi - sub %rdi,%rsi - inc %rsi - mov -16(%rbp),%rdx - mov -8(%rbp),%r9 - entry_pre_call: - call *%r9 + mov -8(%rbp),%r12 + mov -16(%rbp),%r13 + mov -24(%rbp),%r14 mov %rbp,%rsp pop %rbp ret diff --git a/regen.lua b/regen.lua new file mode 100755 index 0000000..4dbca58 --- /dev/null +++ b/regen.lua @@ -0,0 +1,101 @@ +#!/usr/bin/env lua +local chs={} +local function dchar(n) + local ch=string.char(n) + local cho=ch + if ch:match("[^%w ]") then + ch="\\"..ch + end + chs[#chs+1]=function() + return ch,ch,cho + end +end +local recurse=0; +local function dren() + return asterisked==0 and badl==0 +end +local out="" +for n=32,126 do + dchar(n) +end +chs[#chs+1]=function() return "([]*)","","" end +local function charf() + local chl=chs + return function() + local c,b,o=chl[math.random(1,#chl)]() + return c,b,o + end +end +local str +local char=charf() +function str(n,bad) + local t={} + local ot={} + local ob={} + for x=1,n do + t[#t+1],ob[#ob+1],ot[#ot+1]=char() + end + if bad then + if n>0 then + t[math.random(1,#t)]=math.random()>0.5 and t[n].."[]" or "[]"..t[n] + else + t[#t+1]='[]' + end + end + return table.concat(t),bad and '' or table.concat(ob),bad and '' or table.concat(ot) +end +str=(function(str) return function(min,man,c) + local t={} + local goods={} + for n=1,1 do + local nn + repeat + nn=math.random(1,c) + until not goods[nn] + goods[nn]=true + end + local ot + local ob + for n=1,c do + local o + local oc + t[n],oc,o=str(math.random(min,man),not goods[n]) + if goods[n] then + ot=o + ob=oc + end + end + return table.concat(t,'|'),ob,ot +end end)(str); + +local mrecurse=0 +char=(function(char) return function() + local c,o + local b + local ast=math.random()<0.2 + if math.random()<0.05*(1-math.atan(recurse*(1.5))/math.atan(math.huge)) then + local s + recurse=recurse+1 + if recurse>mrecurse then + mrecurse=math.max(mrecurse,recurse) + io.stderr:write(mrecurse,' rec\n') + end + s,b,o=str(0,16,math.random(1,64)) + recurse=recurse-1 + c='('..s..')' + else + c,b,o=char() + end + if math.random()<0.2 then + c=c.."*" + b=c + o='' + end + return c,b,o +end end)(char) + +local s,b,o=str(0,16,256) +print(s) +io.stderr:write(mrecurse,' rec\n') +io.stderr:write(b,'\n') +io.stderr:write(o,'\n') diff --git a/structs.txt b/structs.txt new file mode 100644 index 0000000..09a8d64 --- /dev/null +++ b/structs.txt @@ -0,0 +1,40 @@ +/* +struct group { + alts_tail: group_alt ptrdiff32 + alts_head: group_alt ptrdiff32 + up: group ptrdiff32 +} + +struct group_alt { + text_tail: text_node ptrdiff32 + text_head: text_node ptrdiff32 + next: group_alt ptrdiff32 +} + +struct text_node { + type: i32 enum { + // null = 0 + murder = 1 // union: void[0] + wiped = 2 // union: void[0] + chars = 3 // union: text_chars + group = 4 // union: group + } + next: text_node ptrdiff32 + data: union ... +} + +struct text_chars { + text: char[]rev ptrdiff32 + len: u32 +} + +struct parse_state { + current_gr: group ptr64 + mother_gr: group ptr64 + regchar: char ptr64 + callback: fn ptr64 + cbdata: void ptr64 + ...? +} +*/ +