This commit is contained in:
Kimapr 2024-04-08 05:25:15 +05:00
parent e1efc5acaf
commit 879d7c3172
8 changed files with 335 additions and 46 deletions

View file

@ -3,13 +3,12 @@
// Parse regex. Find matching string.
// If found:
// call callback(
// matched string (only valid before callback return),
// matched string (only valid until callback return),
// length of the string including NUL delimiter,
// data passed in the data argument
// )
// return 0
// Else:
// return 1
// return ERR
typedef void (entry_callback)(char* match, int size, void* data);
int entry(char* regex, entry_callback *callback, void *data);
typedef void(entry_callback)(char *match, int size, void *data);
int entry(char *regex, entry_callback *callback, void *data);

BIN
amogus.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.7 KiB

15
build
View file

@ -2,9 +2,17 @@
mkdir -p target
./charjmpt.lua > target/charjmpt.s
gcc -c meow.s -o target/amogus.o || exit
gnu() { shift 1 && clang "$@"; }
"$(command -v ~/stuff/zig/zig || echo gnu)" \
cc -O3 -shared -static -nostdlib -nodefaultlibs target/amogus.o -o target/amogus || exit
if command -v ~/stuff/zig/zig >/dev/null; then
~/stuff/zig/zig cc -shared -nostdlib target/amogus.o -o target/amogus || exit
elif command -v ld.lld >/dev/null; then
ld.lld -shared -nostdlib target/amogus.o -o target/amogus || exit
else
gcc -shared -nostdlib target/amogus.o -o target/amogus || exit
fi
#gnu() { shift 1 && gcc "$@"; }
#"$(command -v ~/stuff/zig/zig || echo gnu)" \
#cc -shared -nostdlib target/amogus.o -o target/amogus || exit
#ld.lld -shared -nostdlib target/amogus.o -o target/amogus || exit
objdump -d target/amogus
cp target/amogus target/amogstrip
strip -K entry target/amogstrip
@ -14,4 +22,5 @@ rm -rf target/cg/
mkdir -p target/cg
cp target/amogstrip target/cg/amogus
cp amogus.h target/cg/
cp amogus.png target/cg/

View file

@ -2,9 +2,15 @@
print("charjmpt:")
chars = {
['\0'] = "parse_exit",
['('] = "parse_grbegin",
[')'] = "parse_grend",
['\\'] = "parse_escape",
['|'] = "parse_nextalt",
['['] = "parse_murder",
['*'] = "parse_erase",
}
local def = "parse_self"
for n=0,255 do
local c = string.char(n)
print("\t.4byte "..(chars[c] or "parse_self").." - charjmpt_prej")
print("\t.2byte "..(chars[c] or "parse_self").." - charjmpt_prej")
end

7
ctest
View file

@ -4,8 +4,13 @@ export C_INCLUDE_PATH="$PWD:$C_INCLUDE_PATH"
export LD_LIBRARY_PATH="$PWD/target:$LD_LIBRARY_PATH"
export LIBRARY_PATH="$PWD/target:$LIBRARY_PATH"
gcc -O2 test.c -o target/cmogus -L"$LIBRARY_PATH" -l:amogus || exit
gdb -q \
if [ -z "$NODEBUG" ];
then
gdb -q \
-iex 'set confirm no' \
-ex 'layout asm' \
-ex 'start' \
target/cmogus
else
target/cmogus
fi

193
meow.s
View file

@ -2,6 +2,7 @@
.type _start, @function
.globl entry
.type entry, @function
_start:
pop %rax
mov %rax,argc(%rip)
@ -36,18 +37,19 @@ usage:
ret
# rdi - buf
# -> rax - len
// rdi - buf
// -> rax - len
strlen:
mov %rdi,%rsi
xor %rax,%rax
repne scasb
sub %rsi,%rdi
mov %rdi,%rax
dec %rax
ret
# rdi - buf
# rsi - len
// rdi - buf
// rsi - len
write:
push %rdi
push %rsi
@ -62,10 +64,12 @@ help0:
.ascii "Usage: "
.set help0l, .-help0
help1:
.ascii " regex0 [regex1 ...]\nstdout: NUL-separated list of matching strings\n"
.ascii " regex0 [regex1 ...]\n"
.ascii "writes NUL-separated list of matching strings, one per regex to stdout\n"
.ascii "uses stack memory, consider disabling the limit if it segfaults\n"
.set help1l, .-help1
# rdi - exit code
// rdi - exit code
_exit:
mov $0x3c,%rax
syscall
@ -73,55 +77,180 @@ _exit:
.include "target/charjmpt.s"
# rdi - where
# rsi - char
#parse_...:
.macro eaptrdiff32 ptr=%rax, out=%rax, off=0, tmp=%r11
eaptrdiff32.\@:
.if \ptr != \out
push \ptr
.endif
.if \off
add $\off,\ptr
.endif
push \tmp
mov \ptr,\tmp
movslq (\tmp),\tmp
add \tmp,\ptr
pop \tmp
.if \ptr != \out
mov \ptr,\out
pop \ptr
.endif
.endm
.macro mkptrdiff32 to=%r11, to_low=%r11d, ptr=%rax, off=0
mkptrdiff32.\@:
push \to
.if \off
push \ptr
add $\off,\ptr
.endif
sub \ptr,\to
movl \to_low,(\ptr)
.if \off
pop \ptr
.endif
pop \to
.endm
// rdi - &state
// rsi - char
// rcx, r8, r9, r10, r11 - scratch
// r12 - real stack (rsp - fake stack)
// -> rax - status
//parse_...:
append_tnode_recurse:
// rdi - &group_alt
// rsi - type
// -> rax - text_node ptr
push_tnode:
movslq (%rdi),%rcx
sub $(5*4),%r12
and $-8,%r12
movl %esi,0(%r12)
movl $0,4(%r12)
mov %r12,%rax
mkptrdiff32 %rax,%eax,%rdi
test %rcx,%rcx
jz ptn_ptout
add %rdi,%rcx
mkptrdiff32 %rax,%eax,%rcx,4
ptn_ptout:
movslq 4(%rdi),%rcx
test %rcx,%rcx
jnz ptn_jout
mkptrdiff32 %rax,%eax,%rdi,4
ptn_jout:
ret
parse_escape:
incq (2*8)(%rdi)
mov (2*8)(%rdi),%rsi
movzbq (%rsi),%rsi
test %sil,%sil
jz parse_exit
jmp parse_self
parse_self:
push %rdi
push %rsi
sub $2,%rsp
movw $0,(%rsp)
movb %sil,(%rsp)
mov %rsp,%rdi
mov $1,%rsi
call write
add $2,%rsp
pop %rsi
pop %rdi
push %rsi
movq (%rdi),%rax # rax - group
eaptrdiff32 %rax # rax - group_alt
mov %rax,%rdi
movslq (%rax),%r9
test %r9,%r9
jz ps_alloc
eaptrdiff32 %rax # rax - text_node
movl (%rax),%r9d
cmp $3,%r9d
je ps_noalloc
ps_alloc:
mov $3,%rsi
call push_tnode
dec %r12
mkptrdiff32 %r12,%r12d,%rax,(8+0) # text_chars.text
inc %r12
movl $0,(8+4)(%rax) # text_chars.len
ps_noalloc:
eaptrdiff32 %rax,%r9,(8+0)
pop %rsi
movl (8+4)(%rax),%r8d
sub %r8,%r9
movb %sil,(%r9)
incl (8+4)(%rax)
xor %rax,%rax
ret
parse_grbegin:
parse_grend:
parse_murder:
parse_nextalt:
parse_erase:
parse_exit:
mov $1,%rax
ret
# rdi - regex
# rsi - callback
# rdx - cb data
// rdi - regex
// rsi - callback
// rdx - cb data
entry:
push %rbp
mov %rsp,%rbp
sub $48,%rsp
mov %rsi,-8(%rbp)
mov %rdx,-16(%rbp)
mov %rdi,-24(%rbp)
// svregs state group group_alt
// (r12-r14)
sub $(3*8 + 5*8 + 3*4 + 3*4),%rsp
mov %r12,-8(%rbp)
mov %r13,-16(%rbp)
mov %r14,-24(%rbp)
mov %rsp,%r12
mov %rdi,(-3*8-5*8 + 2*8)(%rbp) # state.regchar
mov %rsi,(-3*8-5*8 + 3*8)(%rbp) # state.callback
mov %rdx,(-3*8-5*8 + 4*8)(%rbp) # state.cbdata
lea (-3*8-5*8-3*4)(%rbp),%r9
mov %r9,(-3*8-5*8 + 0*8)(%rbp) # state.mother_gr
mov %r9,(-3*8-5*8 + 1*8)(%rbp) # state.current_gr
lea (-3*8-5*8-3*4)(%rbp),%r9
lea (-3*8-5*8-3*4-3*4)(%rbp),%r8
mkptrdiff32 %r8,%r8d,%r9 # group.alts_tail
mkptrdiff32 %r8,%r8d,%r9,4 # group.alts_head
movl $0,(-3*8-5*8-3*4 + 2*4)(%rbp) # group.up
movl $0,(-3*8-5*8-3*4-3*4 + 0*4)(%rbp) # group_alt.text_tail
movl $0,(-3*8-5*8-3*4-3*4 + 1*4)(%rbp) # group_alt.text_head
movl $0,(-3*8-5*8-3*4-3*4 + 2*4)(%rbp) # group_alt.next
xchg %r12,%rsp
sub $512,%r12
entry_parse_begin:
movzbl (%rdi),%ecx
mov (-3*8-5*8 + 2*8)(%rbp),%rsi
movzbq (%rsi),%rsi # char to sil
lea charjmpt(%rip),%r11
movzx %cl,%r10
shl $2,%r10
movzx %sil,%r10
shl $1,%r10
add %r10,%r11
movsxd (%r11),%r11
movswq (%r11),%r11
charjmpt_prej:
lea charjmpt_prej(%rip),%r10
add %r10,%r11
mov %rdi,-32(%rbp)
lea (-3*8-5*8)(%rbp),%rdi # &state
call *%r11
test %rax,%rax
jnz entry_parse_end
mov -32(%rbp),%rdi
inc %rdi
incq (-3*8-5*8+2*8)(%rbp)
jmp entry_parse_begin
entry_parse_end:
mov %rdi,%rsi
mov -24(%rbp),%rdi
sub %rdi,%rsi
inc %rsi
mov -16(%rbp),%rdx
mov -8(%rbp),%r9
entry_pre_call:
call *%r9
mov -8(%rbp),%r12
mov -16(%rbp),%r13
mov -24(%rbp),%r14
mov %rbp,%rsp
pop %rbp
ret

101
regen.lua Executable file
View file

@ -0,0 +1,101 @@
#!/usr/bin/env lua
local chs={}
local function dchar(n)
local ch=string.char(n)
local cho=ch
if ch:match("[^%w ]") then
ch="\\"..ch
end
chs[#chs+1]=function()
return ch,ch,cho
end
end
local recurse=0;
local function dren()
return asterisked==0 and badl==0
end
local out=""
for n=32,126 do
dchar(n)
end
chs[#chs+1]=function() return "([]*)","","" end
local function charf()
local chl=chs
return function()
local c,b,o=chl[math.random(1,#chl)]()
return c,b,o
end
end
local str
local char=charf()
function str(n,bad)
local t={}
local ot={}
local ob={}
for x=1,n do
t[#t+1],ob[#ob+1],ot[#ot+1]=char()
end
if bad then
if n>0 then
t[math.random(1,#t)]=math.random()>0.5 and t[n].."[]" or "[]"..t[n]
else
t[#t+1]='[]'
end
end
return table.concat(t),bad and '' or table.concat(ob),bad and '' or table.concat(ot)
end
str=(function(str) return function(min,man,c)
local t={}
local goods={}
for n=1,1 do
local nn
repeat
nn=math.random(1,c)
until not goods[nn]
goods[nn]=true
end
local ot
local ob
for n=1,c do
local o
local oc
t[n],oc,o=str(math.random(min,man),not goods[n])
if goods[n] then
ot=o
ob=oc
end
end
return table.concat(t,'|'),ob,ot
end end)(str);
local mrecurse=0
char=(function(char) return function()
local c,o
local b
local ast=math.random()<0.2
if math.random()<0.05*(1-math.atan(recurse*(1.5))/math.atan(math.huge)) then
local s
recurse=recurse+1
if recurse>mrecurse then
mrecurse=math.max(mrecurse,recurse)
io.stderr:write(mrecurse,' rec\n')
end
s,b,o=str(0,16,math.random(1,64))
recurse=recurse-1
c='('..s..')'
else
c,b,o=char()
end
if math.random()<0.2 then
c=c.."*"
b=c
o=''
end
return c,b,o
end end)(char)
local s,b,o=str(0,16,256)
print(s)
io.stderr:write(mrecurse,' rec\n')
io.stderr:write(b,'\n')
io.stderr:write(o,'\n')

40
structs.txt Normal file
View file

@ -0,0 +1,40 @@
/*
struct group {
alts_tail: group_alt ptrdiff32
alts_head: group_alt ptrdiff32
up: group ptrdiff32
}
struct group_alt {
text_tail: text_node ptrdiff32
text_head: text_node ptrdiff32
next: group_alt ptrdiff32
}
struct text_node {
type: i32 enum {
// null = 0
murder = 1 // union: void[0]
wiped = 2 // union: void[0]
chars = 3 // union: text_chars
group = 4 // union: group
}
next: text_node ptrdiff32
data: union ...
}
struct text_chars {
text: char[]rev ptrdiff32
len: u32
}
struct parse_state {
current_gr: group ptr64
mother_gr: group ptr64
regchar: char ptr64
callback: fn ptr64
cbdata: void ptr64
...?
}
*/