This commit is contained in:
Kimapr 2024-04-09 20:17:19 +05:00
parent 27a3c79c46
commit acceb0e114
7 changed files with 427 additions and 111 deletions

View file

@ -1,14 +1,3 @@
// int entry(...)
//
// Parse regex. Find matching string.
// If found:
// call callback(
// matched string (only valid until callback return),
// length of the string including NUL delimiter,
// )
// return 0
// Else:
// return ERR
typedef void(entry_callback)(char *match, int size, void *data); typedef void(entry_callback)(char *match, int size, void *data);
int entry(char *regex, entry_callback *callback, void *data); int entry(char *regex, entry_callback *callback, void *data);

2
build
View file

@ -1,7 +1,7 @@
#!/bin/sh #!/bin/sh
mkdir -p target mkdir -p target
./charjmpt.lua > target/charjmpt.s ./charjmpt.lua > target/charjmpt.s
gcc -c meow.s -o target/amogus.o || exit gcc -O2 -Os -c meow.s -o target/amogus.o || exit
if command -v ~/stuff/zig/zig >/dev/null; then if command -v ~/stuff/zig/zig >/dev/null; then
~/stuff/zig/zig cc -shared -nostdlib target/amogus.o -o target/amogus || exit ~/stuff/zig/zig cc -shared -nostdlib target/amogus.o -o target/amogus || exit
elif command -v ld.lld >/dev/null; then elif command -v ld.lld >/dev/null; then

6
ctest
View file

@ -1,5 +1,5 @@
#!/bin/sh #!/bin/bash
./build || exit { ./build || exit; } 2>&1>/dev/null
export C_INCLUDE_PATH="$PWD:$C_INCLUDE_PATH" export C_INCLUDE_PATH="$PWD:$C_INCLUDE_PATH"
export LD_LIBRARY_PATH="$PWD/target:$LD_LIBRARY_PATH" export LD_LIBRARY_PATH="$PWD/target:$LD_LIBRARY_PATH"
export LIBRARY_PATH="$PWD/target:$LIBRARY_PATH" export LIBRARY_PATH="$PWD/target:$LIBRARY_PATH"
@ -8,9 +8,9 @@ if [ -z "$NODEBUG" ];
then then
gdb -q \ gdb -q \
-iex 'set confirm no' \ -iex 'set confirm no' \
-ex 'layout asm' \
-ex 'start' \ -ex 'start' \
target/cmogus target/cmogus
#-ex 'layout asm' \
else else
target/cmogus target/cmogus
fi fi

503
meow.s
View file

@ -51,22 +51,29 @@ strlen:
// rdi - buf // rdi - buf
// rsi - len // rsi - len
write: write:
push %rdx
push %rdi push %rdi
push %rsi push %rsi
pop %rdx pop %rdx
pop %rsi pop %rsi
mov $1,%rax mov $1,%rax
mov $1,%rdi mov $1,%rdi
push %rcx
push %r11
syscall syscall
pop %r11
pop %rcx
pop %rdx
ret ret
help0: help0:
.ascii "Usage: " .ascii "Usage: "
.set help0l, .-help0 .set help0l, .-help0
.ascii "./amogus"
help1: help1:
.ascii " regex0 [regex1 ...]\n" .ascii "\nRead from standard input, write to standard output."
.ascii "writes NUL-separated list of matching strings, one per regex to stdout\n" .ascii "\nFind a string matched by a regular expression.\n"
.ascii "uses stack memory, consider disabling the limit if it segfaults\n" .ascii "\nAsssumes unlimited call stack space.\n"
.set help1l, .-help1 .set help1l, .-help1
// rdi - exit code // rdi - exit code
@ -75,7 +82,26 @@ _exit:
syscall syscall
ret ret
.include "target/charjmpt.s" .macro print str=""
jmp printps\@
print\@:
.altmacro
.ascii "\str"
.noaltmacro
.set print\@l , . - print\@
printps\@:
push %rdi
push %rsi
push %rdx
push %rax
lea "print\@" (%rip),%rdi
mov $"print\@l" , %rsi
call write
pop %rax
pop %rdx
pop %rsi
pop %rdi
.endm
.macro eaptrdiff32 ptr=%rax, out=%rax, off=0, tmp=%r11 .macro eaptrdiff32 ptr=%rax, out=%rax, off=0, tmp=%r11
eaptrdiff32.\@: eaptrdiff32.\@:
@ -88,12 +114,17 @@ _exit:
push \tmp push \tmp
mov \ptr,\tmp mov \ptr,\tmp
movslq (\tmp),\tmp movslq (\tmp),\tmp
test \tmp,\tmp
jnz eaptrdiffnz.\@
xor \ptr,\ptr
eaptrdiffnz.\@:
add \tmp,\ptr add \tmp,\ptr
pop \tmp pop \tmp
.if \ptr != \out .if \ptr != \out
mov \ptr,\out mov \ptr,\out
pop \ptr pop \ptr
.endif .endif
eaptrdiff32o.\@:
.endm .endm
.macro mkptrdiff32 to=%r11, to_low=%r11d, ptr=%rax, off=0 .macro mkptrdiff32 to=%r11, to_low=%r11d, ptr=%rax, off=0
@ -111,96 +142,6 @@ _exit:
pop \to pop \to
.endm .endm
// rdi - &state
// rsi - char
// rcx, r8, r9, r10, r11 - scratch
// r12 - real stack (rsp - fake stack)
// -> rax - status
//parse_...:
append_tnode_recurse:
// rdi - &group_alt
// rsi - type
// -> rax - text_node ptr
push_tnode:
movslq (%rdi),%rcx
sub $(5*4),%r12
and $-8,%r12
movl %esi,0(%r12)
movl $0,4(%r12)
mov %r12,%rax
mkptrdiff32 %rax,%eax,%rdi
test %rcx,%rcx
jz ptn_ptout
add %rdi,%rcx
mkptrdiff32 %rax,%eax,%rcx,4
ptn_ptout:
movslq 4(%rdi),%rcx
test %rcx,%rcx
jnz ptn_jout
mkptrdiff32 %rax,%eax,%rdi,4
ptn_jout:
ret
parse_escape:
incq (2*8)(%rdi)
mov (2*8)(%rdi),%rsi
movzbq (%rsi),%rsi
test %sil,%sil
jz parse_exit
jmp parse_self
parse_self:
push %rdi
push %rsi
sub $2,%rsp
movw $0,(%rsp)
movb %sil,(%rsp)
mov %rsp,%rdi
mov $1,%rsi
call write
add $2,%rsp
pop %rsi
pop %rdi
push %rsi
movq (%rdi),%rax # rax - group
eaptrdiff32 %rax # rax - group_alt
mov %rax,%rdi
movslq (%rax),%r9
test %r9,%r9
jz ps_alloc
eaptrdiff32 %rax # rax - text_node
movl (%rax),%r9d
cmp $3,%r9d
je ps_noalloc
ps_alloc:
mov $3,%rsi
call push_tnode
dec %r12
mkptrdiff32 %r12,%r12d,%rax,(8+0) # text_chars.text
inc %r12
movl $0,(8+4)(%rax) # text_chars.len
ps_noalloc:
eaptrdiff32 %rax,%r9,(8+0)
pop %rsi
movl (8+4)(%rax),%r8d
sub %r8,%r9
movb %sil,(%r9)
incl (8+4)(%rax)
xor %rax,%rax
ret
parse_grbegin:
parse_grend:
parse_murder:
parse_nextalt:
parse_erase:
parse_exit:
mov $1,%rax
ret
// rdi - regex // rdi - regex
// rsi - callback // rsi - callback
// rdx - cb data // rdx - cb data
@ -233,6 +174,19 @@ entry:
entry_parse_begin: entry_parse_begin:
mov (-3*8-5*8 + 2*8)(%rbp),%rsi mov (-3*8-5*8 + 2*8)(%rbp),%rsi
movzbq (%rsi),%rsi # char to sil movzbq (%rsi),%rsi # char to sil
print "\t[[ char="
push %rdi
push %rsi
push %rax
mov (-3*8-5*8 + 2*8)(%rbp),%rdi
mov $1,%rsi
call write
pop %rax
pop %rsi
pop %rdi
print " ]]\n"
lea charjmpt(%rip),%r11 lea charjmpt(%rip),%r11
movzx %sil,%r10 movzx %sil,%r10
shl $1,%r10 shl $1,%r10
@ -242,12 +196,28 @@ entry:
lea charjmpt_prej(%rip),%r10 lea charjmpt_prej(%rip),%r10
add %r10,%r11 add %r10,%r11
lea (-3*8-5*8)(%rbp),%rdi # &state lea (-3*8-5*8)(%rbp),%rdi # &state
epl_call:
call *%r11 call *%r11
cmp $1,%rax
jg entry_parse_fail
test %rax,%rax test %rax,%rax
jnz entry_parse_end jnz entry_parse_end
incq (-3*8-5*8+2*8)(%rbp) incq (-3*8-5*8+2*8)(%rbp)
jmp entry_parse_begin jmp entry_parse_begin
entry_parse_end: entry_parse_end:
xchg %r12,%rsp
#mov $0,%rax
lea (-3*8-5*8)(%rbp),%rdi # &state
mov 8(%rdi),%rsi # mother_gr
eaptrdiff32 %rsi,%rsi,4 # alts_head
xor %rdx,%rdx # len
xor %rcx,%rcx # tnode
mov %rsp,%r12
call traverse_ast
mov $1,%rax
entry_untraverse:
entry_parse_fail:
mov %r12,%rsp
mov -8(%rbp),%r12 mov -8(%rbp),%r12
mov -16(%rbp),%r13 mov -16(%rbp),%r13
mov -24(%rbp),%r14 mov -24(%rbp),%r14
@ -255,6 +225,351 @@ entry:
pop %rbp pop %rbp
ret ret
// rdi - &state
// rsi - option<&group_alt>
// rdx - len
// rcx - option<&tnode>
traverse_ast:
print "trav {\n"
test %rsi,%rsi
jz tast_bye
push %rsi
push %rdx
push %rcx
eaptrdiff32 %rsi,%rsi,4
call traverse_galt
pop %rcx
pop %rdx
pop %rsi
eaptrdiff32 %rsi,%rsi,(2*4)
jmp traverse_ast
tast_bye:
print "} trav\n"
ret
// rdi - &state
// rsi - option<&tnode>
// rdx - len
// rcx - option<&tnode>
traverse_galt:
print "travgalt {\n"
test %rsi,%rsi
jnz tgand
tgfin:
test %rcx,%rcx
jz tgad
print "Stringing\n"
dec %rsp
movb $0,(%rsp)
tgalb:
movl (2*4+1*4)(%rcx),%r8d
eaptrdiff32 %rcx,%r9,(2*4)
tgscpb:
test %r8,%r8
jz tgscpe
print "char="
dec %rsp
mover:
movb (%r9),%r10b
movb %r10b,(%rsp)
push %rdi
push %rsi
push %rax
mov %rsp,%rdi
mov $1,%rsi
call write
pop %rax
pop %rsi
pop %rdi
print "\n"
dec %r9
dec %r8
jmp tgscpb
tgscpe:
eaptrdiff32 %rcx,%rcx,(2*4+2*4)
test %rcx,%rcx
jnz tgalb
tgad:
mov %rdi,%rcx
mov %rsp,%rdi
mov %rdx,%rsi
mov (4*8)(%rcx),%rdx
mov (3*8)(%rcx),%rcx
and $-16,%rsp
call *%rcx
xor %rax,%rax
jmp entry_untraverse
tgand:
movl (%rsi),%r9d
cmp $1,%r9
je tgt_murder
cmp $2,%r9
je tgt_bye
cmp $3,%r9
je tgt_chars
cmp $4,%r9
je tgt_group
movb $0,0
tgt_murder:
print "(murder)\n"
ret
tgt_wiped:
print "(wiped)\n"
jmp tgt_bye
tgt_chars:
print "(chars)\n"
movl (2*4+1*4)(%rsi),%r8d
add %r8,%rdx
movl $0,(2*4+2*4)(%rsi)
test %rcx,%rcx
jz nosetptr
mkptrdiff32 %rcx,%ecx,%rsi,(2*4+2*4)
nosetptr:
mov %rsi,%rcx
jmp tgt_bye
tgt_group:
print "(group)\n"
push %rsi
eaptrdiff32 %rsi,%rsi,(2*4+1*4)
call traverse_ast
pop %rsi
tgt_bye:
print "bye\n"
eaptrdiff32 %rsi,%rsi,4
jmp traverse_galt
// rdi - &state
// rsi - char
// rcx, r8, r9, r10, r11 - scratch
// r12 - real stack (rsp - fake stack)
// -> rax - status
//parse_...:
parse_escape:
incq (2*8)(%rdi)
mov (2*8)(%rdi),%rsi
movzbq (%rsi),%rsi
test %sil,%sil
jz parse_exit
jmp parse_self
parse_self:
/*
push %rdi # DBG{
push %rsi
mov %rsp,%rdi
mov $1,%rsi
call write
pop %rsi
pop %rdi # DBG}
//*/
push %rsi
movq (%rdi),%rax # rax - group
eaptrdiff32 %rax # rax - group_alt
mov %rax,%rdi
movslq (%rax),%r9
test %r9,%r9
jz ps_alloc
eaptrdiff32 %rax # rax - text_node
movl (%rax),%r9d
cmp $3,%r9d
je ps_noalloc
ps_alloc:
mov $3,%rsi
print "(alloc)"
call push_tnode
dec %r12
mkptrdiff32 %r12,%r12d,%rax,(8+0) # text_chars.text
inc %r12
movl $0,(8+4)(%rax) # text_chars.len
ps_noalloc:
eaptrdiff32 %rax,%r9,(8+0)
pop %rsi
movl (8+4)(%rax),%r8d
movsil:
dec %r12
sub %r8,%r9
movb %sil,(%r9)
push %rdi
push %rsi
push %rax
mov %r12,%rdi
mov $1,%rsi
call write
pop %rax
pop %rsi
pop %rdi
incl (8+4)(%rax)
xor %rax,%rax
ret
.include "target/charjmpt.s"
parse_grbegin:
push %rdi
movq (%rdi),%rax # rax - group
push %rax
eaptrdiff32 %rax,%rdi # rdi - group_alt
mov $4,%rsi
call push_tnode # rax - ng_text_node
pop %rsi # rsi - group
pop %rdi # rdi - state
lea (2*4)(%rax),%rax # rax - newgroup
mkptrdiff32 %rsi,%esi,%rax,(2*4)
mov %rax,(%rdi)
sub $(3*4),%r12
and $-4,%r12
mkptrdiff32 %r12,%r12d,%rax
mkptrdiff32 %r12,%r12d,%rax,4
movl $0,(0*4)(%r12)
movl $0,(1*4)(%r12)
movl $0,(2*4)(%r12)
xor %rax,%rax
ret
parse_grend:
movq (%rdi),%rax # rax - group
movl (2*4)(%rax),%esi # rsi - upgroup off
pgrt:
test %rsi,%rsi
jnz pgre_nodie
mov $2,%rax
ret
pgre_nodie:
eaptrdiff32 %rax,%rax,(2*4) # rax - upgroup
mov %rax,(%rdi)
xor %rax,%rax
ret
parse_nextalt:
mov (%rdi),%rdi # group
eaptrdiff32 %rdi,%rsi,(0*4) # group_alt orig
sub $(3*4),%r12
and $-4,%r12
movl $0,(0*4)(%r12)
movl $0,(1*4)(%r12)
movl $0,(2*4)(%r12)
mkptrdiff32 %r12,%r12d,%rsi,(2*4)
mkptrdiff32 %r12,%r12d,%rdi
ret
parse_murder:
incq (2*8)(%rdi)
mov (2*8)(%rdi),%rsi
movzbq (%rsi),%rsi
cmp $'],%sil
je pm_succ
mov $3,%rax
ret
pm_succ:
mov (%rdi),%rdi
eaptrdiff32 %rdi
mov $1,%rsi
call push_tnode
xor %rax,%rax
ret
parse_erase:
mov (%rdi),%rsi # group
eaptrdiff32 %rsi # group_alt
movl (%rsi),%ecx
test %rcx,%rcx
je pers_push
eaptrdiff32 %rsi,%rcx # text_node
movl (%rcx),%r8d # type
cmp $3,%r8d
je pers_text
jmp pers_set
pers_text:
movl (2*4)(%rcx),%r8d
test %r8d,%r8d
jz pers_set
decl (2*4)(%rcx)
inc %r12
xor %rax,%rax
ret
pers_set:
movl $2,(%rcx)
mov %rcx,%r12
xor %rax,%rax
ret
pers_push:
mov %rsi,%rdi
mov $2,%rsi
call push_tnode
xor %rax,%rax
ret
parse_exit:
mov $1,%rax
ret
// rdi - &tnode to
// rsi - &tnode ptr
append_tnode_recurse:
mkptrdiff32 %rdi,%edi,%rsi,4
movl (%rsi),%r8d
cmp $4,%r8d
jne atnr_exit
lea (2*4+1*4)(%rsi),%rsi
atnr_nn:
movl (%rsi),%r11d
test %r11d,%r11d
jz atnr_exit
eaptrdiff32 %rsi
push %rsi
movl (%rsi),%r11d
test %r11d,%r11d
jz atnr_rej
eaptrdiff32 %rsi
call append_tnode_recurse
jmp atrnr_norej
atnr_rej:
mkptrdiff32 %rdi,%edi,%rsi
mkptrdiff32 %rdi,%edi,%rsi,4
atrnr_norej:
pop %rsi
lea (2*4)(%rsi),%rsi
jmp atnr_nn
atnr_exit:
ret
// rdi - &group_alt
// rsi - type
// -> rax - text_node ptr
push_tnode:
movslq (%rdi),%rcx
sub $(5*4),%r12
and $-4,%r12
movl %esi,0(%r12)
movl $0,4(%r12)
mov %r12,%rax
mkptrdiff32 %rax,%eax,%rdi
test %rcx,%rcx
jz ptn_ptout
add %rdi,%rcx
push %rax
push %rdi
mov %rax,%rdi
mov %rcx,%rsi
xchg %r12,%rsp
call append_tnode_recurse
xchg %r12,%rsp
pop %rdi
pop %rax
ptn_ptout:
movslq 4(%rdi),%rcx
test %rcx,%rcx
jnz ptn_jout
mkptrdiff32 %rax,%eax,%rdi,4
ptn_jout:
ret
.bss .bss
argc: argc:
.zero 8 .zero 8

View file

@ -26,6 +26,7 @@ struct text_node {
struct text_chars { struct text_chars {
text: char[]rev ptrdiff32 text: char[]rev ptrdiff32
len: u32 len: u32
prevtmp: text_chars ptrdiff32
} }
struct parse_state { struct parse_state {
@ -36,5 +37,12 @@ struct parse_state {
cbdata: void ptr64 cbdata: void ptr64
...? ...?
} }
enum err {
OK = 0
NO_MATCH = 1
BAD_GRCLOSE = 2
BAD_MURDER = 3
}
*/ */

2
test
View file

@ -1,2 +1,2 @@
#!/bin/sh #!/bin/sh
./build && gdb -iex 'set confirm no' -ex 'run' target/amogus ./build && gdb -iex 'set confirm no' -ex 'b _start' -ex 'start' target/amogus

6
test.c
View file

@ -13,7 +13,11 @@ void callback(char* str, int size, void* data) {
int main() { int main() {
char* match; char* match;
if(!entry("me\\|\\|ow.*|m(r(r(p..*)))",callback,&match)) int err;
// if((err=entry("me\\|\\|ow.(nya|sin|cos)XX,[]*(||)..*|m(r(r(p..*)))",callback,&match))) {
if((err=entry("meow()*ww|mrrp",callback,&match))) {
printf("error: %i\n",err);
return 1; return 1;
}
printf("%s",match); printf("%s",match);
} }