From 879d7c317251b3fafc653d319ee5a6ec71202ffc Mon Sep 17 00:00:00 2001 From: Kimapr Date: Mon, 8 Apr 2024 05:25:15 +0500 Subject: [PATCH] mrrrrrp --- amogus.h | 9 ++- amogus.png | Bin 0 -> 4858 bytes build | 15 +++- charjmpt.lua | 8 ++- ctest | 15 ++-- meow.s | 193 ++++++++++++++++++++++++++++++++++++++++++--------- regen.lua | 101 +++++++++++++++++++++++++++ structs.txt | 40 +++++++++++ 8 files changed, 335 insertions(+), 46 deletions(-) create mode 100644 amogus.png create mode 100755 regen.lua create mode 100644 structs.txt diff --git a/amogus.h b/amogus.h index 4cab2c3..0983a12 100644 --- a/amogus.h +++ b/amogus.h @@ -3,13 +3,12 @@ // Parse regex. Find matching string. // If found: // call callback( -// matched string (only valid before callback return), +// matched string (only valid until callback return), // length of the string including NUL delimiter, -// data passed in the data argument // ) // return 0 // Else: -// return 1 +// return ERR -typedef void (entry_callback)(char* match, int size, void* data); -int entry(char* regex, entry_callback *callback, void *data); +typedef void(entry_callback)(char *match, int size, void *data); +int entry(char *regex, entry_callback *callback, void *data); diff --git a/amogus.png b/amogus.png new file mode 100644 index 0000000000000000000000000000000000000000..189098252b050683e8fe7d94cdb85f6f9de3e183 GIT binary patch literal 4858 zcmb_giCYuNwoeF)3JC%V5;lcVM%mOvWJw^=L>7@n+zFzNN)Qn-QAm&_WE4;cE+B%T z>4`XuB5?2EfWUw=IKqqq-9ZJ1RYd4^1Y+z~1OZ=r-}k=n{Q+5^83l#Y0Na5(8`*QSL#32xy@|Ggp-$E?{mk}KDteJe>dUU4iYPPAj6z3KlV*K0a@GZ1ic)y;BE|?wj`5 zTzS0s@-EkQo>S4$vI0ZKv&ZF@F{3R(_pZd8-}m=ghf1+Bd(1LM9dz%&s(nm{%73n3 zZY>gObMBp7wU5Pc`Tv|5wI-|8@@wCPlc`Pfb0NG~MrZX<-5(y79#cnVZBMCwd4W}k zc(Eo(&u>|~=Z~zO>R2_Okza(x zJ|&o|h{j+3y=aa4+m-li;9|80qMpr-QJP9pM=uD0%d5ODBD-6&2pd%i`uh&D9-Jan z-fqaMdOck*h@B>Pi0nIi{K}?VJgX36o>^@vvfBol;ycvdKz6T&c$mPNsrHtn7GTGD zR!}{Q7>X-IjP?l@r@pVXwqmLWsXx9q60e7UR*3gjh_9XvAq2uo2v zJEzydfU{W-lIH!HXIe#TU>;^YSYr~`c=-uK%|hNA;mu%_8~DI(qK$ z{ilZ*DyvWQPWglZH_FE9uL>79B&BMNm{0-Hd|bCU8kFeM(d)$1>oQ^iov3^#fEciM zQfgDWelc^4<`Z8eQmEJxXks#;CCL)4I5d=AyDfWs`qeZo*jw|bH(GJrF5^T;@?Rij=FWh@ zdfz!4IV1Ug%ioP%xhr|! ztJ*y7(K~;x!6@7ke@`BG&%|!!Mm+NZ4>s3_I!(Ph!mrl$cWwN%Qz1AE9;8W!{{^Cd z0b$DI71roMV6{NhW{R4CmV(iGCAtt?D0owU9UYDk&hJW|N55GHGuu80{P90eA9l+@ zLOS(e=gbd+XkKQ0XxT|kI}sE_@CRSCF?b?l5BYj?%q)7b0ka*cZCt@gXxdNQ_ULsS z1-5|Wt5(KEBsEJ^|ImcJ?pmclT>kv~I6G*na~B`}DejSH9a`OU_1+w7$Sd}dJ~PH! zsxNKVAZmLkF<7tyR>r;ctP}zlrTX--9YBSA%(DowLe}~UTTPRQaNPl1z0myNPS#4T z(iERKveG#>8cYZtot}GIWX?RWfm^2pwK`rV!u#*DtBmNCk5!CR_keTNB2wj!-6~nP zxeAL@#jJMmvJvje(%&cHQ7U(+Y9>4{99m9C7m0F(;8x|Ac9qOE#o>^P7_5*r@2VSm zOQDaRyDtl-&h&z)o2P{^zs{iveP<-hpIRKZ>83J}39l_Tr7}Bj^C%tfPjzwP7tTC> zK34ZZc3z?sf>Wj`D_vfhlDCO;)lc&l3YAlfsmv$0&)9slURz$Q=8Pzh@f;~BzX{Ir zD7gei)mpFhGogx?!4D(8V3E&8T8sXdQELiYC+uC6_OMo@$F40S^QA`+NNv z=Gc4c`0t+Da~9x61Dg^|+_QqV!5DoRw5IIpN0DN0V+B=z==O0x&M#LM{Cy59&PP5GM~b>5ZiZ*_*b)3))6$NyhvYa09$Uz zvu_#~ZcnEr^!9b@*;8?om-%%Qh7NS}1JZcw+bkityL9~8EhCj#daI}Nn`r}edTn3U zmfm!CVC0Iv*D~~&l9HuQNB%66(MG1%{8L{!edD&2BsXuCD$wnqV?t_LfoW1|l-M7pqE&#MW$_9xU9=2HcIhe&U_A(-qb}USky)QN zNh;ao*!1NJwaOq8AvHjzM@D9=P$4Lptl{P5$^MkJR_P~Bb2W)%=`0@jQx|- z6|o+DIp{60X56FUuJ}NQA>U1JAf4s~Mr691G?|L8#Yge&;L4Cu&PI?zL%XY8c$5+; zr>-ha+x)M->{ve6;A6kEFqYSsVAOYV{CCN7S=HaC?y11!yYF3`6~ma!KQ^CE|ASK<_TlZZ!vEEef#Oy9(||pBLUjOk?B`%cX{}0lfKpCP+m?xNTT} zF93ed*7q_&-EZlsqU9$xa1F42I{cPn1(%$@C+VE8*{V6^*wO& zq2+NtH);i^ZYkMj2&2Ohr)VjwXFi9&NZ-E!^k%Rj-DCwHG>$;y4~l!vHXApL>XjVv z(YS+s3!uwH)Ms;zyM7hmc;dMxRy3(;(q(vUdm=A-KvbqZdwd;epcr!oA9!D^bVl_k z4clJM)(Sz{l05B)-U^*yxLGpKvr2)9A2g z;n-0IopFCl zbQ}yfLK^*|==fzip1Jks4ahLCS?;r4J{XBbBlp&$4ja&cDLRz^1V@<;j*EihVxfy1yT7x<`NIbTK=cAEIET0vwend|#mZu{wn@;znpbYkksIKvrj{Lw-R=+Ag$ z&Hs+oxZUq847rj%CP{;#X$|eEF*>q*CK`sSUd%)d)4l>S1>NY8wmH6Qqd~Xa9jw!t zyWJcUWwXcC>krxtX(k2My#4$c3Ym3nKcsfIHsL3d=JLL%w&~8fzgY@cK~5asw7V4K z^3X0LL+RsmADT*&G2hgJL;!xNb7u-ra}qJpnPA##ZB*K;lJdRS?Jgzd>)84+t4i`5 z?TykKm7_m^LlS_oPzZc?A$T1)w3yu|su|v6FP%vb)+t7MLBF)U+B)ALG8bb7;y9VT z2@in=?ZoNi6l-3d-oP$dYOujTw=rSMU9j{icO%8?E;{Y4nB!ss9sD!2{tt`=yO0Re zrEAgJ1-OXVq-@PWBSL;=pxxIVIIw77PloNMWNh$5e+%ITh4IWs5kAH;6q6v1pYGiF z#PnYb^&GF7Jg1~K$fKG=toGMO6PkG2<~#&32Rab;7QE1SP=>+efkR0? zcibbynQimeFsKb~jkTRKQOHrtGH>>Ki|c+gv{kfH9cecu>UhYh1AsV>qw*7*J%*BA z7_T$#P3Yh613b)bmc>7V{z_@r*mfM~WM%3O%p_0FHe$`}Vv}o`QD42(c6mRpm!a^( zy?b(z!(iIhlfxSiHQ0u0`j_Xp%oLhuYMxB#bzrN&hXs-6dS09T(VUxfh-_SG+~}HP`n@R35CYZbxR5v@IgX~0js%D ztiq)wFt(we0VnW#K8&9a3{Pur_+%%m z+`Q=l5~Gy3daB|#E2E2|9i%VM&hMjl^772*n%WLtlT_Jz%CWZaw6mr*1-@eyU(%%Dv(?cXbX!y^fXW6+Dwg zds*L?@UtH-g>s`+lGY+L9lW8<5!0c_yel56Lh0n${9jF)VA0ggGHQ%+XD)7DEZHKx zjI8s^UZ!?p<#`4EmIaJUUUR$nxS;d9yelqct0$gR0FRunUw!-W2ivphy{9h-`euIQ+_-nuvR zDZynB+_oBUZ12)A@1D@7&Sf#?&zXkDl{+uoGrR44w}d@8N-Uw?<`JU R`OhI%06&;lwdp{{{{TYZb; target/charjmpt.s gcc -c meow.s -o target/amogus.o || exit -gnu() { shift 1 && clang "$@"; } -"$(command -v ~/stuff/zig/zig || echo gnu)" \ -cc -O3 -shared -static -nostdlib -nodefaultlibs target/amogus.o -o target/amogus || exit +if command -v ~/stuff/zig/zig >/dev/null; then + ~/stuff/zig/zig cc -shared -nostdlib target/amogus.o -o target/amogus || exit +elif command -v ld.lld >/dev/null; then + ld.lld -shared -nostdlib target/amogus.o -o target/amogus || exit +else + gcc -shared -nostdlib target/amogus.o -o target/amogus || exit +fi +#gnu() { shift 1 && gcc "$@"; } +#"$(command -v ~/stuff/zig/zig || echo gnu)" \ +#cc -shared -nostdlib target/amogus.o -o target/amogus || exit +#ld.lld -shared -nostdlib target/amogus.o -o target/amogus || exit objdump -d target/amogus cp target/amogus target/amogstrip strip -K entry target/amogstrip @@ -14,4 +22,5 @@ rm -rf target/cg/ mkdir -p target/cg cp target/amogstrip target/cg/amogus cp amogus.h target/cg/ +cp amogus.png target/cg/ diff --git a/charjmpt.lua b/charjmpt.lua index c9d7d41..dc44c22 100755 --- a/charjmpt.lua +++ b/charjmpt.lua @@ -2,9 +2,15 @@ print("charjmpt:") chars = { ['\0'] = "parse_exit", + ['('] = "parse_grbegin", + [')'] = "parse_grend", + ['\\'] = "parse_escape", + ['|'] = "parse_nextalt", + ['['] = "parse_murder", + ['*'] = "parse_erase", } local def = "parse_self" for n=0,255 do local c = string.char(n) - print("\t.4byte "..(chars[c] or "parse_self").." - charjmpt_prej") + print("\t.2byte "..(chars[c] or "parse_self").." - charjmpt_prej") end diff --git a/ctest b/ctest index 5c3b0db..8f890a8 100755 --- a/ctest +++ b/ctest @@ -4,8 +4,13 @@ export C_INCLUDE_PATH="$PWD:$C_INCLUDE_PATH" export LD_LIBRARY_PATH="$PWD/target:$LD_LIBRARY_PATH" export LIBRARY_PATH="$PWD/target:$LIBRARY_PATH" gcc -O2 test.c -o target/cmogus -L"$LIBRARY_PATH" -l:amogus || exit -gdb -q \ - -iex 'set confirm no' \ - -ex 'layout asm' \ - -ex 'start' \ - target/cmogus +if [ -z "$NODEBUG" ]; +then + gdb -q \ + -iex 'set confirm no' \ + -ex 'layout asm' \ + -ex 'start' \ + target/cmogus +else + target/cmogus +fi diff --git a/meow.s b/meow.s index dd01cfc..f0aea36 100644 --- a/meow.s +++ b/meow.s @@ -2,6 +2,7 @@ .type _start, @function .globl entry .type entry, @function + _start: pop %rax mov %rax,argc(%rip) @@ -36,18 +37,19 @@ usage: ret -# rdi - buf -# -> rax - len +// rdi - buf +// -> rax - len strlen: mov %rdi,%rsi xor %rax,%rax repne scasb sub %rsi,%rdi mov %rdi,%rax + dec %rax ret -# rdi - buf -# rsi - len +// rdi - buf +// rsi - len write: push %rdi push %rsi @@ -62,10 +64,12 @@ help0: .ascii "Usage: " .set help0l, .-help0 help1: - .ascii " regex0 [regex1 ...]\nstdout: NUL-separated list of matching strings\n" + .ascii " regex0 [regex1 ...]\n" + .ascii "writes NUL-separated list of matching strings, one per regex to stdout\n" + .ascii "uses stack memory, consider disabling the limit if it segfaults\n" .set help1l, .-help1 -# rdi - exit code +// rdi - exit code _exit: mov $0x3c,%rax syscall @@ -73,55 +77,180 @@ _exit: .include "target/charjmpt.s" -# rdi - where -# rsi - char -#parse_...: +.macro eaptrdiff32 ptr=%rax, out=%rax, off=0, tmp=%r11 + eaptrdiff32.\@: + .if \ptr != \out + push \ptr + .endif + .if \off + add $\off,\ptr + .endif + push \tmp + mov \ptr,\tmp + movslq (\tmp),\tmp + add \tmp,\ptr + pop \tmp + .if \ptr != \out + mov \ptr,\out + pop \ptr + .endif +.endm + +.macro mkptrdiff32 to=%r11, to_low=%r11d, ptr=%rax, off=0 + mkptrdiff32.\@: + push \to + .if \off + push \ptr + add $\off,\ptr + .endif + sub \ptr,\to + movl \to_low,(\ptr) + .if \off + pop \ptr + .endif + pop \to +.endm + +// rdi - &state +// rsi - char +// rcx, r8, r9, r10, r11 - scratch +// r12 - real stack (rsp - fake stack) +// -> rax - status +//parse_...: + +append_tnode_recurse: + + +// rdi - &group_alt +// rsi - type +// -> rax - text_node ptr +push_tnode: + movslq (%rdi),%rcx + sub $(5*4),%r12 + and $-8,%r12 + movl %esi,0(%r12) + movl $0,4(%r12) + mov %r12,%rax + mkptrdiff32 %rax,%eax,%rdi + test %rcx,%rcx + jz ptn_ptout + add %rdi,%rcx + mkptrdiff32 %rax,%eax,%rcx,4 + ptn_ptout: + movslq 4(%rdi),%rcx + test %rcx,%rcx + jnz ptn_jout + mkptrdiff32 %rax,%eax,%rdi,4 + ptn_jout: + ret + +parse_escape: + incq (2*8)(%rdi) + mov (2*8)(%rdi),%rsi + movzbq (%rsi),%rsi + test %sil,%sil + jz parse_exit + jmp parse_self parse_self: + push %rdi + push %rsi + sub $2,%rsp + movw $0,(%rsp) + movb %sil,(%rsp) + mov %rsp,%rdi + mov $1,%rsi + call write + add $2,%rsp + pop %rsi + pop %rdi + push %rsi + movq (%rdi),%rax # rax - group + eaptrdiff32 %rax # rax - group_alt + mov %rax,%rdi + movslq (%rax),%r9 + test %r9,%r9 + jz ps_alloc + eaptrdiff32 %rax # rax - text_node + movl (%rax),%r9d + cmp $3,%r9d + je ps_noalloc + ps_alloc: + mov $3,%rsi + call push_tnode + dec %r12 + mkptrdiff32 %r12,%r12d,%rax,(8+0) # text_chars.text + inc %r12 + movl $0,(8+4)(%rax) # text_chars.len + ps_noalloc: + eaptrdiff32 %rax,%r9,(8+0) + pop %rsi + movl (8+4)(%rax),%r8d + sub %r8,%r9 + movb %sil,(%r9) + incl (8+4)(%rax) xor %rax,%rax ret +parse_grbegin: +parse_grend: +parse_murder: +parse_nextalt: +parse_erase: parse_exit: mov $1,%rax ret - -# rdi - regex -# rsi - callback -# rdx - cb data +// rdi - regex +// rsi - callback +// rdx - cb data entry: push %rbp mov %rsp,%rbp - sub $48,%rsp - mov %rsi,-8(%rbp) - mov %rdx,-16(%rbp) - mov %rdi,-24(%rbp) + // svregs state group group_alt + // (r12-r14) + sub $(3*8 + 5*8 + 3*4 + 3*4),%rsp + mov %r12,-8(%rbp) + mov %r13,-16(%rbp) + mov %r14,-24(%rbp) + mov %rsp,%r12 + mov %rdi,(-3*8-5*8 + 2*8)(%rbp) # state.regchar + mov %rsi,(-3*8-5*8 + 3*8)(%rbp) # state.callback + mov %rdx,(-3*8-5*8 + 4*8)(%rbp) # state.cbdata + lea (-3*8-5*8-3*4)(%rbp),%r9 + mov %r9,(-3*8-5*8 + 0*8)(%rbp) # state.mother_gr + mov %r9,(-3*8-5*8 + 1*8)(%rbp) # state.current_gr + lea (-3*8-5*8-3*4)(%rbp),%r9 + lea (-3*8-5*8-3*4-3*4)(%rbp),%r8 + mkptrdiff32 %r8,%r8d,%r9 # group.alts_tail + mkptrdiff32 %r8,%r8d,%r9,4 # group.alts_head + movl $0,(-3*8-5*8-3*4 + 2*4)(%rbp) # group.up + movl $0,(-3*8-5*8-3*4-3*4 + 0*4)(%rbp) # group_alt.text_tail + movl $0,(-3*8-5*8-3*4-3*4 + 1*4)(%rbp) # group_alt.text_head + movl $0,(-3*8-5*8-3*4-3*4 + 2*4)(%rbp) # group_alt.next + xchg %r12,%rsp + sub $512,%r12 entry_parse_begin: - movzbl (%rdi),%ecx + mov (-3*8-5*8 + 2*8)(%rbp),%rsi + movzbq (%rsi),%rsi # char to sil lea charjmpt(%rip),%r11 - movzx %cl,%r10 - shl $2,%r10 + movzx %sil,%r10 + shl $1,%r10 add %r10,%r11 - movsxd (%r11),%r11 + movswq (%r11),%r11 charjmpt_prej: lea charjmpt_prej(%rip),%r10 add %r10,%r11 - mov %rdi,-32(%rbp) + lea (-3*8-5*8)(%rbp),%rdi # &state call *%r11 test %rax,%rax jnz entry_parse_end - mov -32(%rbp),%rdi - inc %rdi + incq (-3*8-5*8+2*8)(%rbp) jmp entry_parse_begin entry_parse_end: - mov %rdi,%rsi - mov -24(%rbp),%rdi - sub %rdi,%rsi - inc %rsi - mov -16(%rbp),%rdx - mov -8(%rbp),%r9 - entry_pre_call: - call *%r9 + mov -8(%rbp),%r12 + mov -16(%rbp),%r13 + mov -24(%rbp),%r14 mov %rbp,%rsp pop %rbp ret diff --git a/regen.lua b/regen.lua new file mode 100755 index 0000000..4dbca58 --- /dev/null +++ b/regen.lua @@ -0,0 +1,101 @@ +#!/usr/bin/env lua +local chs={} +local function dchar(n) + local ch=string.char(n) + local cho=ch + if ch:match("[^%w ]") then + ch="\\"..ch + end + chs[#chs+1]=function() + return ch,ch,cho + end +end +local recurse=0; +local function dren() + return asterisked==0 and badl==0 +end +local out="" +for n=32,126 do + dchar(n) +end +chs[#chs+1]=function() return "([]*)","","" end +local function charf() + local chl=chs + return function() + local c,b,o=chl[math.random(1,#chl)]() + return c,b,o + end +end +local str +local char=charf() +function str(n,bad) + local t={} + local ot={} + local ob={} + for x=1,n do + t[#t+1],ob[#ob+1],ot[#ot+1]=char() + end + if bad then + if n>0 then + t[math.random(1,#t)]=math.random()>0.5 and t[n].."[]" or "[]"..t[n] + else + t[#t+1]='[]' + end + end + return table.concat(t),bad and '' or table.concat(ob),bad and '' or table.concat(ot) +end +str=(function(str) return function(min,man,c) + local t={} + local goods={} + for n=1,1 do + local nn + repeat + nn=math.random(1,c) + until not goods[nn] + goods[nn]=true + end + local ot + local ob + for n=1,c do + local o + local oc + t[n],oc,o=str(math.random(min,man),not goods[n]) + if goods[n] then + ot=o + ob=oc + end + end + return table.concat(t,'|'),ob,ot +end end)(str); + +local mrecurse=0 +char=(function(char) return function() + local c,o + local b + local ast=math.random()<0.2 + if math.random()<0.05*(1-math.atan(recurse*(1.5))/math.atan(math.huge)) then + local s + recurse=recurse+1 + if recurse>mrecurse then + mrecurse=math.max(mrecurse,recurse) + io.stderr:write(mrecurse,' rec\n') + end + s,b,o=str(0,16,math.random(1,64)) + recurse=recurse-1 + c='('..s..')' + else + c,b,o=char() + end + if math.random()<0.2 then + c=c.."*" + b=c + o='' + end + return c,b,o +end end)(char) + +local s,b,o=str(0,16,256) +print(s) +io.stderr:write(mrecurse,' rec\n') +io.stderr:write(b,'\n') +io.stderr:write(o,'\n') diff --git a/structs.txt b/structs.txt new file mode 100644 index 0000000..09a8d64 --- /dev/null +++ b/structs.txt @@ -0,0 +1,40 @@ +/* +struct group { + alts_tail: group_alt ptrdiff32 + alts_head: group_alt ptrdiff32 + up: group ptrdiff32 +} + +struct group_alt { + text_tail: text_node ptrdiff32 + text_head: text_node ptrdiff32 + next: group_alt ptrdiff32 +} + +struct text_node { + type: i32 enum { + // null = 0 + murder = 1 // union: void[0] + wiped = 2 // union: void[0] + chars = 3 // union: text_chars + group = 4 // union: group + } + next: text_node ptrdiff32 + data: union ... +} + +struct text_chars { + text: char[]rev ptrdiff32 + len: u32 +} + +struct parse_state { + current_gr: group ptr64 + mother_gr: group ptr64 + regchar: char ptr64 + callback: fn ptr64 + cbdata: void ptr64 + ...? +} +*/ +