From a7171681f481285f07a205a7dfcfafd7aa78f3b3 Mon Sep 17 00:00:00 2001 From: Rickard Green Date: Sat, 28 Oct 2023 15:59:30 +0200 Subject: [PATCH] [erts] Remove unnecessary regex lib --- erts/lib_src/yielding_c_fun/GNUmakefile | 4 +- erts/lib_src/yielding_c_fun/Makefile | 27 +- .../lib/tiny_regex_c/GIT_VERSION | 468 ----------------- .../yielding_c_fun/lib/tiny_regex_c/LICENSE | 24 - .../yielding_c_fun/lib/tiny_regex_c/Makefile | 106 ---- .../yielding_c_fun/lib/tiny_regex_c/README.md | 138 ----- .../yielding_c_fun/lib/tiny_regex_c/re.c | 470 ------------------ .../yielding_c_fun/lib/tiny_regex_c/re.h | 54 -- erts/lib_src/yielding_c_fun/main_target.mk | 5 - erts/lib_src/yielding_c_fun/ycf_lexer.c | 155 ++++-- 10 files changed, 115 insertions(+), 1336 deletions(-) delete mode 100644 erts/lib_src/yielding_c_fun/lib/tiny_regex_c/GIT_VERSION delete mode 100644 erts/lib_src/yielding_c_fun/lib/tiny_regex_c/LICENSE delete mode 100644 erts/lib_src/yielding_c_fun/lib/tiny_regex_c/Makefile delete mode 100644 erts/lib_src/yielding_c_fun/lib/tiny_regex_c/README.md delete mode 100644 erts/lib_src/yielding_c_fun/lib/tiny_regex_c/re.c delete mode 100644 erts/lib_src/yielding_c_fun/lib/tiny_regex_c/re.h diff --git a/erts/lib_src/yielding_c_fun/GNUmakefile b/erts/lib_src/yielding_c_fun/GNUmakefile index c87a945cf67b..2706571d06fa 100644 --- a/erts/lib_src/yielding_c_fun/GNUmakefile +++ b/erts/lib_src/yielding_c_fun/GNUmakefile @@ -194,13 +194,13 @@ cmake_compile: CMakeLists.txt cd cmake_mkdir && cmake .. clang_tidy: - (ls *.c ; echo lib/tiny_regex_c/re.c ; echo lib/simple_c_gc/simple_c_gc.c) | xargs -I{} -n1 clang-tidy -warnings-as-errors=* {} -- $(YCF_INCLUDE_DIRS) $(YCF_CFLAGS) + (ls *.c ; echo lib/simple_c_gc/simple_c_gc.c) | xargs -I{} -n1 clang-tidy -warnings-as-errors=* {} -- $(YCF_INCLUDE_DIRS) $(YCF_CFLAGS) clang_format: clang-format -style="{BasedOnStyle: LLVM}" -i *.c *.h clean: - rm -f lib/simple_c_gc/*.o lib/tiny_regex_c/*.o ./*.o ./*~ core trap parse $(YCF_EXECUTABLE) CMakeLists.txt + rm -f lib/simple_c_gc/*.o ./*.o ./*~ core trap parse $(YCF_EXECUTABLE) CMakeLists.txt # Produce a CMakeLists.txt to build with cmake CMakeLists.txt: $(YCF_SOURCES) diff --git a/erts/lib_src/yielding_c_fun/Makefile b/erts/lib_src/yielding_c_fun/Makefile index 71cef1b3208b..c3bcc7a45237 100644 --- a/erts/lib_src/yielding_c_fun/Makefile +++ b/erts/lib_src/yielding_c_fun/Makefile @@ -16,39 +16,36 @@ # building YCF independently of Erlang/OTP. # -./bin/yielding_c_fun.bin: simple_c_gc.o re.o ycf_lexer.o ycf_main.o ycf_node.o ycf_parser.o ycf_printers.o ycf_string.o ycf_symbol.o ycf_utils.o ycf_yield_fun.o - $(CC) -I. -I./lib/tiny_regex_c -I./lib/simple_c_gc $(CFLAGS) -o ./bin/yielding_c_fun.bin simple_c_gc.o re.o ycf_lexer.o ycf_main.o ycf_node.o ycf_parser.o ycf_printers.o ycf_string.o ycf_symbol.o ycf_utils.o ycf_yield_fun.o +./bin/yielding_c_fun.bin: simple_c_gc.o ycf_lexer.o ycf_main.o ycf_node.o ycf_parser.o ycf_printers.o ycf_string.o ycf_symbol.o ycf_utils.o ycf_yield_fun.o + $(CC) -I. -I./lib/simple_c_gc $(CFLAGS) -o ./bin/yielding_c_fun.bin simple_c_gc.o ycf_lexer.o ycf_main.o ycf_node.o ycf_parser.o ycf_printers.o ycf_string.o ycf_symbol.o ycf_utils.o ycf_yield_fun.o simple_c_gc.o: ./lib/simple_c_gc/simple_c_gc.c - $(CC) -I. -I./lib/tiny_regex_c -I./lib/simple_c_gc $(CFLAGS) -c -o simple_c_gc.o ./lib/simple_c_gc/simple_c_gc.c - -re.o: ./lib/tiny_regex_c/re.c - $(CC) -I. -I./lib/tiny_regex_c -I./lib/simple_c_gc $(CFLAGS) -c -o re.o ./lib/tiny_regex_c/re.c + $(CC) -I. -I./lib/simple_c_gc $(CFLAGS) -c -o simple_c_gc.o ./lib/simple_c_gc/simple_c_gc.c ycf_lexer.o: ./ycf_lexer.c - $(CC) -I. -I./lib/tiny_regex_c -I./lib/simple_c_gc $(CFLAGS) -c -o ycf_lexer.o ./ycf_lexer.c + $(CC) -I. -I./lib/simple_c_gc $(CFLAGS) -c -o ycf_lexer.o ./ycf_lexer.c ycf_main.o: ./ycf_main.c - $(CC) -I. -I./lib/tiny_regex_c -I./lib/simple_c_gc $(CFLAGS) -c -o ycf_main.o ./ycf_main.c + $(CC) -I. -I./lib/simple_c_gc $(CFLAGS) -c -o ycf_main.o ./ycf_main.c ycf_node.o: ./ycf_node.c - $(CC) -I. -I./lib/tiny_regex_c -I./lib/simple_c_gc $(CFLAGS) -c -o ycf_node.o ./ycf_node.c + $(CC) -I. -I./lib/simple_c_gc $(CFLAGS) -c -o ycf_node.o ./ycf_node.c ycf_parser.o: ./ycf_parser.c - $(CC) -I. -I./lib/tiny_regex_c -I./lib/simple_c_gc $(CFLAGS) -c -o ycf_parser.o ./ycf_parser.c + $(CC) -I. -I./lib/simple_c_gc $(CFLAGS) -c -o ycf_parser.o ./ycf_parser.c ycf_printers.o: ./ycf_printers.c - $(CC) -I. -I./lib/tiny_regex_c -I./lib/simple_c_gc $(CFLAGS) -c -o ycf_printers.o ./ycf_printers.c + $(CC) -I. -I./lib/simple_c_gc $(CFLAGS) -c -o ycf_printers.o ./ycf_printers.c ycf_string.o: ./ycf_string.c - $(CC) -I. -I./lib/tiny_regex_c -I./lib/simple_c_gc $(CFLAGS) -c -o ycf_string.o ./ycf_string.c + $(CC) -I. -I./lib/simple_c_gc $(CFLAGS) -c -o ycf_string.o ./ycf_string.c ycf_symbol.o: ./ycf_symbol.c - $(CC) -I. -I./lib/tiny_regex_c -I./lib/simple_c_gc $(CFLAGS) -c -o ycf_symbol.o ./ycf_symbol.c + $(CC) -I. -I./lib/simple_c_gc $(CFLAGS) -c -o ycf_symbol.o ./ycf_symbol.c ycf_utils.o: ./ycf_utils.c - $(CC) -I. -I./lib/tiny_regex_c -I./lib/simple_c_gc $(CFLAGS) -c -o ycf_utils.o ./ycf_utils.c + $(CC) -I. -I./lib/simple_c_gc $(CFLAGS) -c -o ycf_utils.o ./ycf_utils.c ycf_yield_fun.o: ./ycf_yield_fun.c - $(CC) -I. -I./lib/tiny_regex_c -I./lib/simple_c_gc $(CFLAGS) -c -o ycf_yield_fun.o ./ycf_yield_fun.c + $(CC) -I. -I./lib/simple_c_gc $(CFLAGS) -c -o ycf_yield_fun.o ./ycf_yield_fun.c diff --git a/erts/lib_src/yielding_c_fun/lib/tiny_regex_c/GIT_VERSION b/erts/lib_src/yielding_c_fun/lib/tiny_regex_c/GIT_VERSION deleted file mode 100644 index 0bbde9c80d10..000000000000 --- a/erts/lib_src/yielding_c_fun/lib/tiny_regex_c/GIT_VERSION +++ /dev/null @@ -1,468 +0,0 @@ -origin https://github.com/kokke/tiny-regex-c.git (fetch) -origin https://github.com/kokke/tiny-regex-c.git (push) -commit d3058f271f7a06ff298dff0a6a9a1e0753a5fa17 -Merge: 28882c4 c2ed772 -Author: kokke -Date: Fri Oct 26 23:20:07 2018 +0200 - - Merge pull request #22 from monolifed/master - - Update re.c for #20 - -commit c2ed77267c86e30aa342f48528e5ffce6ab4d103 -Author: monolifed <6624464+monolifed@users.noreply.github.com> -Date: Thu Oct 25 19:19:24 2018 +0300 - - Update re.c - -commit 28882c4a39fbc9ddd44e69caa22aac4c4a208934 -Author: kokke -Date: Tue Oct 23 11:27:23 2018 +0200 - - Update README.md - -commit 4583018febd2d28277b512f09427e2ba01b4cbd5 -Author: kokke -Date: Tue Oct 23 11:26:47 2018 +0200 - - Update README.md - -commit 2211111107da75f5574d5f0140e4396ae701d947 -Author: kokke -Date: Mon Oct 22 16:04:02 2018 +0200 - - Update test1.c - - Adding failing test-case for question-mark '?', brought to my attention by @tobermory in https://github.com/kokke/tiny-regex-c/issues/20 - -commit 679aebd38a245afb9f9d107d066b68765b94865b -Author: kokke -Date: Mon Oct 22 15:41:33 2018 +0200 - - Update re.c - - fixing typo, noticed by @tobermory -> https://github.com/kokke/tiny-regex-c/issues/19 - -commit 2f225fa5e355ad3a99cdd5e953768399fe0b6607 -Author: kokke -Date: Wed Jun 6 18:15:48 2018 +0200 - - Update test1.c - -commit b587a65abf0f1347a3b7c7050b73c5dbb94d9cb7 -Merge: 89a479f 96a8f77 -Author: kokke -Date: Wed Jun 6 18:10:57 2018 +0200 - - Merge pull request #17 from monolifed/patch-1 - - Update re.c - -commit 96a8f770c2922505699c9a4d6ba9b9584be5ee29 -Author: monolifed <6624464+monolifed@users.noreply.github.com> -Date: Thu May 31 02:06:49 2018 +0300 - - Update re.c - - hopefully fixes #12 - -commit 89a479f985cb25284c4e11870c1531c299255790 -Merge: bf9b2f0 e5f3564 -Author: kokke -Date: Tue May 15 11:25:53 2018 +0200 - - Merge pull request #16 from TermoSINteZ/master - - Fix pattern ".?" issues - -commit e5f3564a1de7230cec207cb6aec3866b0b7931e0 -Author: TermoSINteZ -Date: Tue May 15 10:41:17 2018 +0300 - - Remove tabs - -commit acb0a441470808c99a08ecc8d8716d258866835c -Author: TermoSINteZ -Date: Tue May 15 00:10:55 2018 +0300 - - Fix pattern ".?" issues - -commit bf9b2f0c5e91dd12e1fea8cbc7ae7a6193e7b4ed -Merge: cb80dee 84af23d -Author: kokke -Date: Tue Apr 17 14:09:03 2018 +0200 - - Merge pull request #14 from roflcopter4/master - - Check for correct python2 binary in Makefile - -commit 84af23dde1c6785ca680d5aced93e20e484efa8d -Author: roflcopter4 -Date: Mon Apr 16 14:45:34 2018 -0600 - - Fix dumb typos - -commit 0cb0b1348392b795971be9472d8dd2854403a2cb -Author: roflcopter4 -Date: Mon Apr 16 14:42:13 2018 -0600 - - Add back '@' signs I accidentally removed - -commit 81d12dfd3de805d0969e66650731e4df9158169c -Author: roflcopter4 -Date: Mon Apr 16 14:26:28 2018 -0600 - - Check for correct python2 binry in Makefile - -commit cb80dee0644f41df67b1740fefd8573d18d84a53 -Author: kokke -Date: Fri Mar 23 15:39:17 2018 +0100 - - Update README.md - -commit 005de160fa2d8796eb2bce75b52eeaac3ac13d8d -Author: kokke -Date: Fri Mar 23 15:32:37 2018 +0100 - - Update Makefile - -commit 9ec0029e83e7cba718f6b6f8b107a0133d22b4a7 -Author: kokke -Date: Fri Mar 23 15:31:40 2018 +0100 - - Create regex_test_neg.py - -commit 960dd3ebec78d2ac61969840d4e23c97d443bdc9 -Author: kokke -Date: Fri Mar 23 15:31:11 2018 +0100 - - Create test_rand_neg.c - -commit 3d472f3d78d9702ffcdf4439ac842e3250da7c49 -Author: kokke -Date: Fri Mar 23 14:02:23 2018 +0100 - - Update README.md - -commit cdf61829adc1c94a9f2d019b2683c34c7732ca60 -Author: kokke -Date: Fri Mar 23 13:06:49 2018 +0100 - - Update Makefile - -commit 98812bdcafa58c7f35fc40fae9ba64d6d2a9eac1 -Author: kokke -Date: Fri Mar 23 12:52:34 2018 +0100 - - Update README.md - -commit 9c192d4199e4e1e6a764f1b1699deb5b159b161e -Author: kokke -Date: Fri Mar 23 12:47:27 2018 +0100 - - Update re.c - -commit fb677f315fd159e13c2c2fc5b40f199148c0fb0f -Author: kokke -Date: Fri Mar 23 12:45:20 2018 +0100 - - Update test1.c - -commit dc1b3ee8fc4354e5dfa1846ee6b778452609d50e -Author: kokke -Date: Fri Mar 23 12:08:34 2018 +0100 - - Update README.md - -commit eac0cef080a32c4b606bd10994ddcf8a72249d78 -Author: kokke -Date: Mon Dec 11 23:26:53 2017 +0100 - - Update README.md - -commit dc9f34d74b6ac80cd0bed17fca76ef35fca3b101 -Author: kokke -Date: Mon Dec 11 23:23:40 2017 +0100 - - Update re.c - -commit d76301fa18f3575cca94816cff291a01fda58ad7 -Author: kokke -Date: Mon Dec 11 23:23:22 2017 +0100 - - Update test1.c - -commit b72898ef7a67a0650c7d00b7a09f36abc1fa57a1 -Author: kokke -Date: Mon Dec 11 21:35:55 2017 +0100 - - Update re.c - -commit 3cd275c9c55ec51a01ad01c317232247aeec9bab -Merge: baf3a15 881f634 -Author: kokke -Date: Mon Dec 11 21:32:26 2017 +0100 - - Merge pull request #9 from mrigger/out-of-bounds-fixes - - Out of bounds fixes. - -commit 881f634e9a905933d1889a4e0b9b09920337478a -Author: Manuel Rigger -Date: Sun Dec 10 11:42:17 2017 +0100 - - Fix out-of-bunds access found by AFL (input: [00000000000000000000000000000000000000][). - -commit e6c91ab986f1ed7d7e39d58e6fdfae44d3110c99 -Author: Manuel Rigger -Date: Sun Dec 10 10:30:06 2017 +0100 - - Fix out-of-bounds accesses found by AFL (input: [00000000000000000000000000000000000000). - -commit 619a9c654df6a471d7e043081b3fb4bf0e1cd642 -Author: Manuel Rigger -Date: Sat Dec 9 22:56:12 2017 +0100 - - Fix out-of-bounds access found by AFL. - -commit 43051e257141740e99c32f1cffea78d2a72fe10e -Author: Manuel Rigger -Date: Fri Dec 8 21:25:01 2017 +0100 - - Fix out-of-bounds access found by AFL. - -commit baf3a15d7b99b856e82d3dd69555d3bac6750049 -Author: kokke -Date: Thu Oct 12 00:54:16 2017 +0200 - - Update re.h - - To make the C++ crowd and their compilers happy ;) - -commit 107352174172b05f25d153c651b327890ab3b574 -Author: kokke -Date: Sat Jul 8 03:32:29 2017 +0200 - - Update re.c - -commit ef6b2416b17388da413d3b9cce95ef6897255970 -Author: kokke -Date: Sat Jul 8 03:26:59 2017 +0200 - - Update test1.c - -commit b8446ecba1c59b7a9f29e7d8174661deb38a74a5 -Author: kokke -Date: Wed May 3 22:55:11 2017 +0200 - - Update README.md - -commit 1600de0a66b11610183c02c4f778866dce6927af -Author: kokke -Date: Mon May 1 21:43:47 2017 +0200 - - Update regex_test.py - -commit e5dafc83fe1672de45ab1bc40e7f34876e2d1d15 -Author: kokke -Date: Mon May 1 21:34:22 2017 +0200 - - Update test_rand.c - -commit a39262a534a628d7c07bbb753a66d031a4b4a43a -Author: kokke -Date: Mon May 1 21:27:17 2017 +0200 - - Update test_print.c - -commit 96b9af356129a263ef60355426de0c339308cd26 -Author: kokke -Date: Mon May 1 21:25:11 2017 +0200 - - Update re.c - -commit 529889acae614c91697af87feba5abe5334a4274 -Author: kokke -Date: Mon May 1 21:18:40 2017 +0200 - - Update README.md - -commit 1afc07dc2936f17dc0df5178ef61caeb19d2c5b1 -Author: kokke -Date: Mon May 1 21:17:57 2017 +0200 - - Update README.md - -commit 407f4fe08fb219d68026817d47d1a453a0b6c1ba -Author: kokke -Date: Fri Apr 28 17:27:12 2017 +0200 - - Update README.md - -commit 2d2ebe66d55349ba6add2a4335b24b0a13fb26d2 -Author: kokke -Date: Mon Apr 24 23:48:18 2017 +0200 - - Update README.md - -commit 5c76b8cc4e77af9c4a3cb1bbd1cfda3e73e1be56 -Author: kokke -Date: Mon Apr 24 23:33:34 2017 +0200 - - Update README.md - -commit e4c7cb9d63d1b284f92053f535402738ee2c3a6a -Author: kokke -Date: Mon Apr 24 09:28:24 2017 +0200 - - Update README.md - -commit 1e694fe184be1261e9f684ea3556103e45649c3b -Author: kokke -Date: Thu Apr 20 17:58:33 2017 +0200 - - Update README.md - -commit 52810460dd877f337e23d58e627b5ae3fbbee430 -Author: kokke -Date: Wed Apr 19 02:38:00 2017 +0200 - - Update Makefile - -commit d44fb891007d56e0344f7372521f3ff583630d61 -Author: kokke -Date: Wed Apr 19 01:19:38 2017 +0200 - - Update README.md - -commit af6e7b642b75255bc2a608383a1b0969816bbd83 -Author: kokke -Date: Wed Apr 19 01:16:23 2017 +0200 - - Update README.md - -commit 858d217db95390b6e7bd1fd1ba7be950142852c9 -Author: kokke -Date: Wed Apr 19 00:32:20 2017 +0200 - - Update README.md - -commit 28fffd4d32b56cd9b318091cacc8a5c71fbfffed -Author: kokke -Date: Wed Apr 19 00:31:44 2017 +0200 - - Update README.md - -commit bd874728041ad0444744a9e739dd5d078f249150 -Author: kokke -Date: Wed Apr 19 00:28:26 2017 +0200 - - Update README.md - -commit 02110e89252e76c1a2b2a44430f0f402e523f50a -Author: kokke -Date: Wed Apr 19 00:26:41 2017 +0200 - - Update test1.c - -commit 939bc7572148c77d398b22892857b769ec63ac54 -Author: kokke -Date: Tue Apr 18 23:31:14 2017 +0200 - - Update README.md - -commit 5fef3901aabb6100b28460dccf3154d3e1cc18e7 -Author: kokke -Date: Tue Apr 18 23:09:12 2017 +0200 - - Update README.md - -commit cae4b96ced1679ea703890eff00deefb0b80cc16 -Author: kokke -Date: Tue Apr 18 23:08:34 2017 +0200 - - Update README.md - -commit 72398075ee66c49e3e804be94146956dc8bafff6 -Author: kokke -Date: Tue Apr 18 22:58:10 2017 +0200 - - Update Makefile - -commit 2994559b99506b35e71c8e18aa99dd706f3b7e38 -Author: kokke -Date: Tue Apr 18 22:53:05 2017 +0200 - - Update re.c - -commit 124052b32f302c5a570e79089f951a7d1d56cb38 -Author: kokke -Date: Tue Apr 18 22:52:01 2017 +0200 - - Update Makefile - -commit ba8caa931b36b6c274c82214b636770857f7872d -Author: kokke -Date: Tue Apr 18 22:50:11 2017 +0200 - - Create regex_test.py - -commit 9630b56d1faffea799575bb9a81f9c0278906a16 -Author: kokke -Date: Tue Apr 18 22:49:39 2017 +0200 - - Create exrex.py - -commit 7da49b443622deaaa472d233a80c58a378fb8646 -Author: kokke -Date: Tue Apr 18 22:47:37 2017 +0200 - - Create test_rand.c - -commit 29384927296062ecf0fec81a1c7e35d315b5d84a -Author: kokke -Date: Tue Apr 18 22:47:09 2017 +0200 - - Create test_print.c - -commit 475c199a1b1af4fdd8422b8639117ec97f53b350 -Author: kokke -Date: Tue Apr 18 22:46:34 2017 +0200 - - Create test2.c - -commit 2e6e69622061daa740fa03c726f4cc66c5fbc38c -Author: kokke -Date: Tue Apr 18 22:45:08 2017 +0200 - - Create test1.c - -commit e9e4ce1b91609d839a2996bb918397b502f908b7 -Author: kokke -Date: Tue Apr 18 22:44:33 2017 +0200 - - Create Makefile - -commit 5a8f0e60718fbfdca7c1fc079a77051af68bdc87 -Author: kokke -Date: Tue Apr 18 22:41:01 2017 +0200 - - Create re.c - -commit 52b0aeb459cf1682b636fabe01a97de17579b036 -Author: kokke -Date: Tue Apr 18 22:38:55 2017 +0200 - - Create re.h - -commit d061985f504096a59e320a4760784a4090eaf1e4 -Author: kokke -Date: Tue Apr 18 22:38:01 2017 +0200 - - Update README.md - -commit 32cbf08728415efb96a3f562f31b780c024502ff -Author: kokke -Date: Tue Apr 18 22:37:02 2017 +0200 - - Initial commit diff --git a/erts/lib_src/yielding_c_fun/lib/tiny_regex_c/LICENSE b/erts/lib_src/yielding_c_fun/lib/tiny_regex_c/LICENSE deleted file mode 100644 index cf1ab25da034..000000000000 --- a/erts/lib_src/yielding_c_fun/lib/tiny_regex_c/LICENSE +++ /dev/null @@ -1,24 +0,0 @@ -This is free and unencumbered software released into the public domain. - -Anyone is free to copy, modify, publish, use, compile, sell, or -distribute this software, either in source code form or as a compiled -binary, for any purpose, commercial or non-commercial, and by any -means. - -In jurisdictions that recognize copyright laws, the author or authors -of this software dedicate any and all copyright interest in the -software to the public domain. We make this dedication for the benefit -of the public at large and to the detriment of our heirs and -successors. We intend this dedication to be an overt act of -relinquishment in perpetuity of all present and future rights to this -software under copyright law. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. - -For more information, please refer to diff --git a/erts/lib_src/yielding_c_fun/lib/tiny_regex_c/Makefile b/erts/lib_src/yielding_c_fun/lib/tiny_regex_c/Makefile deleted file mode 100644 index deb9fea33a9b..000000000000 --- a/erts/lib_src/yielding_c_fun/lib/tiny_regex_c/Makefile +++ /dev/null @@ -1,106 +0,0 @@ -# Compiler to use - can be replaced by clang for instance -CC := gcc - -# Number of random text expressions to generate, for random testing -NRAND_TESTS := 1000 - -PYTHON != if (python --version 2>&1 | grep -q 'Python 2\..*'); then \ - echo 'python'; \ - elif command -v python2 >/dev/null 2>&1; then \ - echo 'python2'; \ - else \ - echo 'Error: no compatible python version found.' >&2; \ - exit 1; \ - fi - -# Flags to pass to compiler -CFLAGS := -O3 -Wall -Wextra -std=c99 -I. - -all: - @$(CC) $(CFLAGS) re.c tests/test1.c -o tests/test1 - @$(CC) $(CFLAGS) re.c tests/test2.c -o tests/test2 - @$(CC) $(CFLAGS) re.c tests/test_rand.c -o tests/test_rand - @$(CC) $(CFLAGS) re.c tests/test_rand_neg.c -o tests/test_rand_neg - -clean: - @rm -f tests/test1 tests/test2 tests/test_rand - @#@$(foreach test_bin,$(TEST_BINS), rm -f $(test_bin) ; ) - @rm -f a.out - @rm -f *.o - - -test: all - @$(test $(PYTHON)) - @echo - @echo Testing hand-picked regex\'s: - @./tests/test1 - @echo Testing patterns against $(NRAND_TESTS) random strings matching the Python implementation and comparing: - @echo - @$(PYTHON) ./scripts/regex_test.py \\d+\\w?\\D\\d $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py \\s+[a-zA-Z0-9?]* $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py \\w*\\d?\\w\\? $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py [^\\d]+\\\\?\\s $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py [^\\w][^-1-4] $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py [^\\w] $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py [^1-4] $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py [^-1-4] $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py [^\\d]+\\s?[\\w]* $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py a+b*[ac]*.+.*.[\\.]. $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py a?b[ac*]*.?[\\]+[?]? $(NRAND_TESTS) - @#python ./scripts/regex_test.py [1-5-]+[-1-2]-[-] $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py [-1-3]-[-]+ $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py [1-5]+[-1-2]-[\\-] $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py [-1-2]* $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py \\s?[a-fKL098]+-? $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py [\\-]* $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py [\\\\]+ $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py [0-9a-fA-F]+ $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py [1379][2468][abcdef] $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py [012345-9]?[0123-789] $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py [012345-9] $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py [0-56789] $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py [abc-zABC-Z] $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py [a\d]?1234 $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py .*123faerdig $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py .?\\w+jsj$ $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py [?to][+to][?ta][*ta] $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py \\d+ $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py [a-z]+ $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py \\s+[a-zA-Z0-9?]* $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py \\w $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py \\d $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py [\\d] $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test.py [^\\d] $(NRAND_TESTS) - @#python ./scripts/regex_test.py [^-1-4] $(NRAND_TESTS) - @echo - @echo - @echo - @echo Testing rejection of patterns against $(NRAND_TESTS) random strings also rejected by the Python implementation: - @echo - @$(PYTHON) ./scripts/regex_test_neg.py \\d+ $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test_neg.py [a-z]+ $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test_neg.py \\s+[a-zA-Z0-9?]* $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test_neg.py ^\\w $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test_neg.py ^\\d $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test_neg.py [\\d] $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test_neg.py ^[^\\d] $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test_neg.py [^\\w]+ $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test_neg.py ^[\\w]+ $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test_neg.py ^[^0-9] $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test_neg.py [a-z].[A-Z] $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test_neg.py [-1-3]-[-]+ $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test_neg.py [1-5]+[-1-2]-[\\-] $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test_neg.py [-0-9]+ $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test_neg.py [\\-]+ $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test_neg.py [\\\\]+ $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test_neg.py [0-9a-fA-F]+ $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test_neg.py [1379][2468][abcdef] $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test_neg.py [012345-9] $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test_neg.py [0-56789] $(NRAND_TESTS) - @$(PYTHON) ./scripts/regex_test_neg.py .*123faerdig $(NRAND_TESTS) - @echo - @echo - @./tests/test2 - @echo - @echo - diff --git a/erts/lib_src/yielding_c_fun/lib/tiny_regex_c/README.md b/erts/lib_src/yielding_c_fun/lib/tiny_regex_c/README.md deleted file mode 100644 index ab89576f085c..000000000000 --- a/erts/lib_src/yielding_c_fun/lib/tiny_regex_c/README.md +++ /dev/null @@ -1,138 +0,0 @@ -# tiny-regex-c -# A small regex implementation in C -### Description -Small and portable [Regular Expression](https://en.wikipedia.org/wiki/Regular_expression) (regex) library written in C. - -Design is inspired by Rob Pike's regex-code for the book *"Beautiful Code"* [available online here](http://www.cs.princeton.edu/courses/archive/spr09/cos333/beautiful.html). - -Supports a subset of the syntax and semantics of the Python standard library implementation (the `re`-module). - -### Current status -All supported regex-operators seem to work properly according to the test-set, with the following exception: - -There is a problem with ranges (e.g. `[0-9]` for a digit 0-9) combined with inverted character-cases, e.g. `[^ab]` for anything but 'a' or 'b' - like `[^-0-9]` for anything not '-' or a digit 0-9. I think the code mathces too broadly in that case. - -I think you should test the patterns you are going to use. You can easily modify the test-harness to generate tests for your intended patterns to check for compliance. - -**I will gladly accept patches correcting bugs.** - -### Design goals -The main design goal of this library is to be small, correct, self contained and use few resources while retaining acceptable performance and feature completeness. Clarity of the code is also highly valued. - -### Notable features and omissions -- Small code and binary size: <500 SLOC, ~3kb binary for x86. Statically #define'd memory usage / allocation. -- No use of dynamic memory allocation (i.e. no calls to `malloc` / `free`). -- To avoid call-stack exhaustion, iterative searching is preferred over recursive by default (can be changed with a pre-processor flag). -- No support for capturing groups or named capture: `(^Pgroup)` etc. -- Thorough testing : [exrex](https://github.com/asciimoo/exrex) is used to randomly generate test-cases from regex patterns, which are fed into the regex code for verification. Try `make test` to generate a few thousand tests cases yourself. -- Compiled for x86 using GCC 4.7.4 and optimizing for size, the binary takes up ~2-3kb code space and allocates ~0.5kb RAM : - ``` - > gcc -Os -c re.c - > size re.o - text data bss dec hex filename - 2319 0 544 2863 b2f re.o - - ``` - For ARM/Thumb using GCC 4.8.1 it's around 1.5kb code and less RAM : - ``` - > arm-none-eabi-gcc -Os -mthumb -c re.c - > size re.o - text data bss dec hex filename - 1418 0 280 1698 6a2 re.o - - ``` - For 8-bit AVR using AVR-GCC 4.8.1 it's around 2kb code and less RAM : - ``` - > avr-gcc -Os -c re.c - > size re.o - text data bss dec hex filename - 2128 0 130 2258 8d2 re.o - ``` - - - -### API -This is the public / exported API: -```C -/* Typedef'd pointer to hide implementation details. */ -typedef struct regex_t* re_t; - -/* Compiles regex string pattern to a regex_t-array. */ -re_t re_compile(const char* pattern); - -/* Finds matches of the compiled pattern inside text. */ -int re_matchp(re_t pattern, const char* text); - -/* Finds matches of pattern inside text (compiles first automatically). */ -int re_match(const char* pattern, const char* text); -``` - -### Supported regex-operators -The following features / regex-operators are supported by this library. - -NOTE: inverted character classes are buggy - see the test harness for concrete examples. - - - - `.` Dot, matches any character - - `^` Start anchor, matches beginning of string - - `$` End anchor, matches end of string - - `*` Asterisk, match zero or more (greedy) - - `+` Plus, match one or more (greedy) - - `?` Question, match zero or one (non-greedy) - - `[abc]` Character class, match if one of {'a', 'b', 'c'} - - `[^abc]` Inverted class, match if NOT one of {'a', 'b', 'c'} - **`NOTE: This feature is currently broken for some usage of character ranges!`** - - `[a-zA-Z]` Character ranges, the character set of the ranges { a-z | A-Z } - - `\s` Whitespace, \t \f \r \n \v and spaces - - `\S` Non-whitespace - - `\w` Alphanumeric, [a-zA-Z0-9_] - - `\W` Non-alphanumeric - - `\d` Digits, [0-9] - - `\D` Non-digits - -### Usage -Compile a regex from ASCII-string (char-array) to a custom pattern structure using `re_compile()`. - -Search a text-string for a regex and get an index into the string, using `re_match()` or `re_matchp()`. - -The returned index points to the first place in the string, where the regex pattern matches. - -If the regular expression doesn't match, the matching function returns an index of -1 to indicate failure. - -### Examples -Example of usage: -```C -/* Standard null-terminated C-string to search: */ -const char* string_to_search = "ahem.. 'hello world !' .."; - -/* Compile a simple regular expression using character classes, meta-char and greedy + non-greedy quantifiers: */ -re_t pattern = re_compile("[Hh]ello [Ww]orld\\s*[!]?"); - -/* Check if the regex matches the text: */ -int match_idx = re_matchp(pattern, string_to_search); -if (match_idx != -1) -{ - printf("match at idx %d.\n", match_idx); -} -``` - -For more usage examples I encourage you to look at the code in the `tests`-folder. - -### TODO -- Fix the implementation of inverted character classes. -- Fix implementation of branches (`|`), and see if that can lead us closer to groups as well, e.g. `(a|b)+`. -- Add `example.c` that demonstrates usage. -- Add `tests/test_perf.c` for performance and time measurements. -- Testing: Improve pattern rejection testing. - -### FAQ -- *Q: What differentiates this library from other C regex implementations?* - - A: Well, the small size for one. <500 lines of C-code compiling to 2-3kb ROM, using very little RAM. - -### License -All material in this repository is in the public domain. - - - - diff --git a/erts/lib_src/yielding_c_fun/lib/tiny_regex_c/re.c b/erts/lib_src/yielding_c_fun/lib/tiny_regex_c/re.c deleted file mode 100644 index 76d406624746..000000000000 --- a/erts/lib_src/yielding_c_fun/lib/tiny_regex_c/re.c +++ /dev/null @@ -1,470 +0,0 @@ -/* - * - * Mini regex-module inspired by Rob Pike's regex code described in: - * - * http://www.cs.princeton.edu/courses/archive/spr09/cos333/beautiful.html - * - * - * - * Supports: - * --------- - * '.' Dot, matches any character - * '^' Start anchor, matches beginning of string - * '$' End anchor, matches end of string - * '*' Asterisk, match zero or more (greedy) - * '+' Plus, match one or more (greedy) - * '?' Question, match zero or one (non-greedy) - * '[abc]' Character class, match if one of {'a', 'b', 'c'} - * '[^abc]' Inverted class, match if NOT one of {'a', 'b', 'c'} -- NOTE: feature is currently broken! - * '[a-zA-Z]' Character ranges, the character set of the ranges { a-z | A-Z } - * '\s' Whitespace, \t \f \r \n \v and spaces - * '\S' Non-whitespace - * '\w' Alphanumeric, [a-zA-Z0-9_] - * '\W' Non-alphanumeric - * '\d' Digits, [0-9] - * '\D' Non-digits - * - * - */ - - - -#include "re.h" -#include - -/* Definitions: */ - -#define MAX_REGEXP_OBJECTS 30 /* Max number of regex symbols in expression. */ -#define MAX_CHAR_CLASS_LEN 40 /* Max length of character-class buffer in. */ - - -enum { UNUSED, DOT, BEGIN, END, QUESTIONMARK, STAR, PLUS, CHAR, CHAR_CLASS, INV_CHAR_CLASS, DIGIT, NOT_DIGIT, ALPHA, NOT_ALPHA, WHITESPACE, NOT_WHITESPACE, /* BRANCH */ }; - -typedef struct regex_t -{ - unsigned char type; /* CHAR, STAR, etc. */ - union - { - unsigned char ch; /* the character itself */ - unsigned char* ccl; /* OR a pointer to characters in class */ - } u; -} regex_t; - - - -/* Private function declarations: */ -static int matchpattern(regex_t* pattern, const char* text); -static int matchcharclass(char c, const char* str); -static int matchstar(regex_t p, regex_t* pattern, const char* text); -static int matchplus(regex_t p, regex_t* pattern, const char* text); -static int matchone(regex_t p, char c); -static int matchdigit(char c); -static int matchalpha(char c); -static int matchwhitespace(char c); -static int matchmetachar(char c, const char* str); -static int matchrange(char c, const char* str); -static int ismetachar(char c); - - - -/* Public functions: */ -int re_match(const char* pattern, const char* text) -{ - return re_matchp(re_compile(pattern), text); -} - -int re_matchp(re_t pattern, const char* text) -{ - if (pattern != 0) - { - if (pattern[0].type == BEGIN) - { - return ((matchpattern(&pattern[1], text)) ? 0 : -1); - } - else - { - int idx = -1; - - do - { - idx += 1; - - if (matchpattern(pattern, text)) - { - if (text[0] == '\0') - return -1; - - return idx; - } - } - while (*text++ != '\0'); - } - } - return -1; -} - -re_t re_compile(const char* pattern) -{ - /* The sizes of the two static arrays below substantiates the static RAM usage of this module. - MAX_REGEXP_OBJECTS is the max number of symbols in the expression. - MAX_CHAR_CLASS_LEN determines the size of buffer for chars in all char-classes in the expression. */ - static regex_t re_compiled[MAX_REGEXP_OBJECTS]; - static unsigned char ccl_buf[MAX_CHAR_CLASS_LEN]; - int ccl_bufidx = 1; - - char c; /* current char in pattern */ - int i = 0; /* index into pattern */ - int j = 0; /* index into re_compiled */ - - while (pattern[i] != '\0' && (j+1 < MAX_REGEXP_OBJECTS)) - { - c = pattern[i]; - - switch (c) - { - /* Meta-characters: */ - case '^': { re_compiled[j].type = BEGIN; } break; - case '$': { re_compiled[j].type = END; } break; - case '.': { re_compiled[j].type = DOT; } break; - case '*': { re_compiled[j].type = STAR; } break; - case '+': { re_compiled[j].type = PLUS; } break; - case '?': { re_compiled[j].type = QUESTIONMARK; } break; -/* case '|': { re_compiled[j].type = BRANCH; } break; <-- not working properly */ - - /* Escaped character-classes (\s \w ...): */ - case '\\': - { - if (pattern[i+1] != '\0') - { - /* Skip the escape-char '\\' */ - i += 1; - /* ... and check the next */ - switch (pattern[i]) - { - /* Meta-character: */ - case 'd': { re_compiled[j].type = DIGIT; } break; - case 'D': { re_compiled[j].type = NOT_DIGIT; } break; - case 'w': { re_compiled[j].type = ALPHA; } break; - case 'W': { re_compiled[j].type = NOT_ALPHA; } break; - case 's': { re_compiled[j].type = WHITESPACE; } break; - case 'S': { re_compiled[j].type = NOT_WHITESPACE; } break; - - /* Escaped character, e.g. '.' or '$' */ - default: - { - re_compiled[j].type = CHAR; - re_compiled[j].u.ch = pattern[i]; - } break; - } - } - /* '\\' as last char in pattern -> invalid regular expression. */ -/* - else - { - re_compiled[j].type = CHAR; - re_compiled[j].ch = pattern[i]; - } -*/ - } break; - - /* Character class: */ - case '[': - { - /* Remember where the char-buffer starts. */ - int buf_begin = ccl_bufidx; - - /* Look-ahead to determine if negated */ - if (pattern[i+1] == '^') - { - re_compiled[j].type = INV_CHAR_CLASS; - i += 1; /* Increment i to avoid including '^' in the char-buffer */ - } - else - { - re_compiled[j].type = CHAR_CLASS; - } - - /* Copy characters inside [..] to buffer */ - while ( (pattern[++i] != ']') - && (pattern[i] != '\0')) /* Missing ] */ - { - if (pattern[i] == '\\') - { - if (ccl_bufidx >= MAX_CHAR_CLASS_LEN - 1) - { - //fputs("exceeded internal buffer!\n", stderr); - return 0; - } - ccl_buf[ccl_bufidx++] = pattern[i++]; - } - else if (ccl_bufidx >= MAX_CHAR_CLASS_LEN) - { - //fputs("exceeded internal buffer!\n", stderr); - return 0; - } - ccl_buf[ccl_bufidx++] = pattern[i]; - } - if (ccl_bufidx >= MAX_CHAR_CLASS_LEN) - { - /* Catches cases such as [00000000000000000000000000000000000000][ */ - //fputs("exceeded internal buffer!\n", stderr); - return 0; - } - /* Null-terminate string end */ - ccl_buf[ccl_bufidx++] = 0; - re_compiled[j].u.ccl = &ccl_buf[buf_begin]; - } break; - - /* Other characters: */ - default: - { - re_compiled[j].type = CHAR; - re_compiled[j].u.ch = c; - } break; - } - i += 1; - j += 1; - } - /* 'UNUSED' is a sentinel used to indicate end-of-pattern */ - re_compiled[j].type = UNUSED; - - return (re_t) re_compiled; -} - -void re_print(regex_t* pattern) -{ - const char* types[] = { "UNUSED", "DOT", "BEGIN", "END", "QUESTIONMARK", "STAR", "PLUS", "CHAR", "CHAR_CLASS", "INV_CHAR_CLASS", "DIGIT", "NOT_DIGIT", "ALPHA", "NOT_ALPHA", "WHITESPACE", "NOT_WHITESPACE", "BRANCH" }; - - int i; - for (i = 0; i < MAX_REGEXP_OBJECTS; ++i) - { - if (pattern[i].type == UNUSED) - { - break; - } - - printf("type: %s", types[pattern[i].type]); - if (pattern[i].type == CHAR_CLASS || pattern[i].type == INV_CHAR_CLASS) - { - printf(" ["); - int j; - char c; - for (j = 0; j < MAX_CHAR_CLASS_LEN; ++j) - { - c = pattern[i].u.ccl[j]; - if ((c == '\0') || (c == ']')) - { - break; - } - printf("%c", c); - } - printf("]"); - } - else if (pattern[i].type == CHAR) - { - printf(" '%c'", pattern[i].u.ch); - } - printf("\n"); - } -} - - - -/* Private functions: */ -static int matchdigit(char c) -{ - return ((c >= '0') && (c <= '9')); -} -static int matchalpha(char c) -{ - return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')); -} -static int matchwhitespace(char c) -{ - return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r') || (c == '\f') || (c == '\v')); -} -static int matchalphanum(char c) -{ - return ((c == '_') || matchalpha(c) || matchdigit(c)); -} -static int matchrange(char c, const char* str) -{ - return ((c != '-') && (str[0] != '\0') && (str[0] != '-') && - (str[1] == '-') && (str[1] != '\0') && - (str[2] != '\0') && ((c >= str[0]) && (c <= str[2]))); -} -static int ismetachar(char c) -{ - return ((c == 's') || (c == 'S') || (c == 'w') || (c == 'W') || (c == 'd') || (c == 'D')); -} - -static int matchmetachar(char c, const char* str) -{ - switch (str[0]) - { - case 'd': return matchdigit(c); - case 'D': return !matchdigit(c); - case 'w': return matchalphanum(c); - case 'W': return !matchalphanum(c); - case 's': return matchwhitespace(c); - case 'S': return !matchwhitespace(c); - default: return (c == str[0]); - } -} - -static int matchcharclass(char c, const char* str) -{ - do - { - if (matchrange(c, str)) - { - return 1; - } - else if (str[0] == '\\') - { - /* Escape-char: increment str-ptr and match on next char */ - str += 1; - if (matchmetachar(c, str)) - { - return 1; - } - else if ((c == str[0]) && !ismetachar(c)) - { - return 1; - } - } - else if (c == str[0]) - { - if (c == '-') - { - return ((str[-1] == '\0') || (str[1] == '\0')); - } - else - { - return 1; - } - } - } - while (*str++ != '\0'); - - return 0; -} - -static int matchone(regex_t p, char c) -{ - switch (p.type) - { - case DOT: return 1; - case CHAR_CLASS: return matchcharclass(c, (const char*)p.u.ccl); - case INV_CHAR_CLASS: return !matchcharclass(c, (const char*)p.u.ccl); - case DIGIT: return matchdigit(c); - case NOT_DIGIT: return !matchdigit(c); - case ALPHA: return matchalphanum(c); - case NOT_ALPHA: return !matchalphanum(c); - case WHITESPACE: return matchwhitespace(c); - case NOT_WHITESPACE: return !matchwhitespace(c); - default: return (p.u.ch == c); - } -} - -static int matchstar(regex_t p, regex_t* pattern, const char* text) -{ - do - { - if (matchpattern(pattern, text)) - return 1; - } - while ((text[0] != '\0') && matchone(p, *text++)); - - return 0; -} - -static int matchplus(regex_t p, regex_t* pattern, const char* text) -{ - while ((text[0] != '\0') && matchone(p, *text++)) - { - if (matchpattern(pattern, text)) - return 1; - } - return 0; -} - -static int matchquestion(regex_t p, regex_t* pattern, const char* text) -{ - if (p.type == UNUSED) - return 1; - if (matchpattern(pattern, text)) - return 1; - if (*text && matchone(p, *text++)) - return matchpattern(pattern, text); - return 0; -} - - -#if 0 - -/* Recursive matching */ -static int matchpattern(regex_t* pattern, const char* text) -{ - if ((pattern[0].type == UNUSED) || (pattern[1].type == QUESTIONMARK)) - { - return matchquestion(pattern[1], &pattern[2], text); - } - else if (pattern[1].type == STAR) - { - return matchstar(pattern[0], &pattern[2], text); - } - else if (pattern[1].type == PLUS) - { - return matchplus(pattern[0], &pattern[2], text); - } - else if ((pattern[0].type == END) && pattern[1].type == UNUSED) - { - return text[0] == '\0'; - } - else if ((text[0] != '\0') && matchone(pattern[0], text[0])) - { - return matchpattern(&pattern[1], text+1); - } - else - { - return 0; - } -} - -#else - -/* Iterative matching */ -static int matchpattern(regex_t* pattern, const char* text) -{ - do - { - if ((pattern[0].type == UNUSED) || (pattern[1].type == QUESTIONMARK)) - { - return matchquestion(pattern[0], &pattern[2], text); - } - else if (pattern[1].type == STAR) - { - return matchstar(pattern[0], &pattern[2], text); - } - else if (pattern[1].type == PLUS) - { - return matchplus(pattern[0], &pattern[2], text); - } - else if ((pattern[0].type == END) && pattern[1].type == UNUSED) - { - return (text[0] == '\0'); - } -/* Branching is not working properly - else if (pattern[1].type == BRANCH) - { - return (matchpattern(pattern, text) || matchpattern(&pattern[2], text)); - } -*/ - } - while ((text[0] != '\0') && matchone(*pattern++, *text++)); - - return 0; -} - -#endif diff --git a/erts/lib_src/yielding_c_fun/lib/tiny_regex_c/re.h b/erts/lib_src/yielding_c_fun/lib/tiny_regex_c/re.h deleted file mode 100644 index fd364121001a..000000000000 --- a/erts/lib_src/yielding_c_fun/lib/tiny_regex_c/re.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * - * Mini regex-module inspired by Rob Pike's regex code described in: - * - * http://www.cs.princeton.edu/courses/archive/spr09/cos333/beautiful.html - * - * - * - * Supports: - * --------- - * '.' Dot, matches any character - * '^' Start anchor, matches beginning of string - * '$' End anchor, matches end of string - * '*' Asterisk, match zero or more (greedy) - * '+' Plus, match one or more (greedy) - * '?' Question, match zero or one (non-greedy) - * '[abc]' Character class, match if one of {'a', 'b', 'c'} - * '[^abc]' Inverted class, match if NOT one of {'a', 'b', 'c'} -- NOTE: feature is currently broken! - * '[a-zA-Z]' Character ranges, the character set of the ranges { a-z | A-Z } - * '\s' Whitespace, \t \f \r \n \v and spaces - * '\S' Non-whitespace - * '\w' Alphanumeric, [a-zA-Z0-9_] - * '\W' Non-alphanumeric - * '\d' Digits, [0-9] - * '\D' Non-digits - * - * - */ - -#ifdef __cplusplus -extern "C"{ -#endif - - - -/* Typedef'd pointer to get abstract datatype. */ -typedef struct regex_t* re_t; - - -/* Compile regex string pattern to a regex_t-array. */ -re_t re_compile(const char* pattern); - - -/* Find matches of the compiled pattern inside text. */ -int re_matchp(re_t pattern, const char* text); - - -/* Find matches of the txt pattern inside text (will compile automatically first). */ -int re_match(const char* pattern, const char* text); - - -#ifdef __cplusplus -} -#endif diff --git a/erts/lib_src/yielding_c_fun/main_target.mk b/erts/lib_src/yielding_c_fun/main_target.mk index 4c97d4d9cf90..3a66432c6589 100644 --- a/erts/lib_src/yielding_c_fun/main_target.mk +++ b/erts/lib_src/yielding_c_fun/main_target.mk @@ -13,13 +13,11 @@ endif YCF_INCLUDE_DIRS = \ -I$(YCF_SOURCE_DIR) \ - -I$(YCF_SOURCE_DIR)/lib/tiny_regex_c \ -I$(YCF_SOURCE_DIR)/lib/simple_c_gc YCF_HEADERS = $(sort $(shell find $(YCF_SOURCE_DIR) -name '*.h')) YCF_EXTRA_SOURCES = \ - $(YCF_SOURCE_DIR)/lib/tiny_regex_c/re.c \ $(YCF_SOURCE_DIR)/lib/simple_c_gc/simple_c_gc.c YCF_SOURCES = $(sort $(wildcard $(YCF_SOURCE_DIR)/*.c) $(YCF_EXTRA_SOURCES)) @@ -31,9 +29,6 @@ YCF_CFLAGS = $(filter-out -Wstrict-prototypes -Wdeclaration-after-statement -Wmi $(YCF_EXECUTABLE): $(YCF_OBJECTS) $(V_LD) $(YCF_CFLAGS) $(LDFLAGS) $(YCF_OBJECTS) -o $@ -$(YCF_OBJ_DIR)/%.o: $(YCF_SOURCE_DIR)/lib/tiny_regex_c/%.c $(YCF_HEADERS) - $(V_CC) $(YCF_CFLAGS) $(LDFLAGS) $(YCF_INCLUDE_DIRS) -c $< -o $@ - $(YCF_OBJ_DIR)/%.o: $(YCF_SOURCE_DIR)/lib/simple_c_gc/%.c $(YCF_HEADERS) $(V_CC) $(YCF_CFLAGS) $(LDFLAGS) $(YCF_INCLUDE_DIRS) -c $< -o $@ diff --git a/erts/lib_src/yielding_c_fun/ycf_lexer.c b/erts/lib_src/yielding_c_fun/ycf_lexer.c index 91426b88c287..be4112dca1dd 100644 --- a/erts/lib_src/yielding_c_fun/ycf_lexer.c +++ b/erts/lib_src/yielding_c_fun/ycf_lexer.c @@ -23,7 +23,6 @@ */ -#include "lib/tiny_regex_c/re.h" #include "ycf_yield_fun.h" #include "ycf_utils.h" @@ -102,36 +101,99 @@ typedef struct symbol_finder { char *str_2; } symbol_finder; +static int is_whitespace(char c) +{ + return c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\f'; +} + +static int is_alphanum(char c) +{ + if ('a' <= c && c <= 'z') { + return !0; + } + if ('A' <= c && c <= 'Z') { + return !0; + } + if ('0' <= c && c <= '9') { + return !0; + } + return c == '_'; +} + +static int is_alpha(char c) +{ + if ('a' <= c && c <= 'z') { + return !0; + } + return ('A' <= c && c <= 'Z'); +} + +static int is_digit(char c) +{ + return ('0' <= c && c <= '9'); +} + int starts_with(char *str, char *prefix) { return strncmp(str, prefix, strlen(prefix)) == 0; } -int until_no_match(symbol_finder* f, char* text){ - int pos = 0; - while(re_match(f->str_1, &(text[pos])) == 0){ - pos++; - } - return pos; +int until_not_whitespace(symbol_finder *f, char *text) +{ + int pos = 0; + while (is_whitespace(text[pos])) { + pos++; + } + return pos; } -int string_litteral_finder(symbol_finder* f, char* text){ - int pos = 0; - if (starts_with(text, "\"")){ - pos++; - //\"(\\.|[^"\\])*\" - while(re_match("\\.", &(text[pos])) == 0 || - re_match("[^\"]", &(text[pos])) == 0){ - pos++; +int until_not_digit(symbol_finder *f, char *text) +{ + int pos = 0; + while (is_digit(text[pos])) { + pos++; } - if(starts_with(&(text[pos]), "\"")){ - return pos + 1; - }else { - printf("Broken string litteral\n"); - exit(1); + return pos; +} + +int starts_with_alpha_until_not_alphanum(symbol_finder *f, char *text) +{ + int pos = 0; + + if (is_alpha(text[pos])) { + pos++; + while (is_alphanum(text[pos])) { + pos++; + } + } + return pos; +} + +int string_litteral_finder(symbol_finder* f, char* text) +{ + int pos, quoted; + if (text[0] != '"') { + return 0; + } + pos = 1; + quoted = 0; + while (!0) { + char c = text[pos++]; + if (c == '\0') { + printf("Broken string literal\n"); + exit(1); + } + if (quoted) { + quoted = 0; + continue; + } + if (c == '"') { + return pos; + } + if (c == '\\') { + quoted = !0; + } } - } - return pos; } int macro_define_finder(symbol_finder* f, char* text){ @@ -151,17 +213,6 @@ int macro_define_finder(symbol_finder* f, char* text){ return pos; } - -int starts_with_until_no_match(symbol_finder* f, char* text){ - int pos = 0; - if(re_match(f->str_1, text) == 0){ - while(re_match(f->str_2, &(text[pos])) == 0){ - pos++; - } - } - return pos; -} - int starts_with_ends_with(symbol_finder* f, char* text){ if(starts_with(text, f->str_1)){ int pos = 1; @@ -180,19 +231,20 @@ int fixed_string(symbol_finder* f, char* text){ return 0; } -int fixed_alpha_string(symbol_finder* f, char* text){ - if(starts_with(text, f->str_1) && - re_match("[^\\W]", &text[strlen(f->str_1)])){ - return strlen(f->str_1); - } - return 0; +int fixed_alpha_string(symbol_finder* f, char* text) +{ + if (starts_with(text, f->str_1)) { + int len = (int) strlen(f->str_1); + if (!is_alphanum(text[len])) { + return len; + } + } + return 0; } -int regex_char(symbol_finder* f, char* text){ - if(re_match(f->str_1, text) == 0){ - return 1; - } - return 0; +int is_char(symbol_finder* f, char* text) +{ + return text[0] != '\0'; } void fold_whitespace_and_comments(ycf_symbol_list* symbols){ @@ -267,8 +319,7 @@ ycf_symbol_list ycf_symbol_list_from_text(char* text){ }, { .type = ycf_symbol_type_whitespace, - .str_1 = "\\s", - .finder = until_no_match + .finder = until_not_whitespace }, { .type = ycf_symbol_type_void, @@ -347,14 +398,11 @@ ycf_symbol_list ycf_symbol_list_from_text(char* text){ }, { .type = ycf_symbol_type_identifier, - .str_1 = "[a-zA-Z]", - .str_2 = "\\w", - .finder = starts_with_until_no_match - }, + .finder = starts_with_alpha_until_not_alphanum + }, { .type = ycf_symbol_type_number, - .str_1 = "\\d", - .finder = until_no_match + .finder = until_not_digit }, { .type = ycf_symbol_type_open_parenthesis, @@ -433,8 +481,7 @@ ycf_symbol_list ycf_symbol_list_from_text(char* text){ }, { .type = ycf_symbol_type_something_else, - .str_1 = ".", - .finder = regex_char + .finder = is_char } }; while(text[pos] != 0){