Skip to content

Commit 14675b3

Browse files
committed
Merge branch 'master' into dev
2 parents ffe2550 + 7164df6 commit 14675b3

File tree

12 files changed

+10026
-138
lines changed

12 files changed

+10026
-138
lines changed

README.md

Lines changed: 73 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,74 @@
1-
# libpll-2
1+
# # Libpll-2
2+
3+
libpll-2 is the new official fork of libpll (https://github.com/xflouris/libpll/). It implements site repeats to speed up computations.
4+
5+
6+
Please read the wiki for more information.
7+
8+
9+
10+
# Projects that are already using libpll-2
11+
List of projects already using libpll-2 and site repeats, and reported speedups compared with the tip pattern optimization:
12+
* [RAxML-NG](https://github.com/amkozlov/raxml-ng): speedup ranges between 1.2 and 1.5
13+
* [ModelTest-NG](https://github.com/ddarriba/modeltest): speedup around 1.3
14+
* [EPA-ng](https://github.com/Pbdas/epa-ng): memory footprint reduced by 30%.
15+
16+
17+
# Compilation instructions
18+
19+
Currently, `libpll` requires that [GNU Bison](http://www.gnu.org/software/bison/)
20+
and [Flex](http://flex.sourceforge.net/) are installed on the target system. On
21+
a Debian-based Linux system, the two packages can be installed using the command
22+
23+
`apt-get install flex bison`
24+
25+
The library also requires that a GNU system is available as it uses several
26+
functions (e.g. `asprintf`) which are not present in the POSIX standard.
27+
This, however will change in the future in order to have a more portable
28+
and cross-platform library.
29+
30+
The library can be compiled using either of the following two ways.
31+
32+
**Cloning the repo** Clone the repo and bild the executable and documentation
33+
using the following commands.
34+
35+
```bash
36+
git clone https://github.com/xflouris/libpll.git
37+
cd libpll
38+
./autogen.sh
39+
./configure
40+
make
41+
make install # as root, otherwise run: sudo make install
42+
```
43+
44+
When using the cloned repository version, you will also need
45+
[autoconf](https://www.gnu.org/software/autoconf/autoconf.html),
46+
[automake](https://www.gnu.org/software/automake/) and
47+
[libtool](https://www.gnu.org/software/libtool/) installed. On a Debian-based
48+
Linux system, the packages can be installed using the command
49+
50+
```bash
51+
sudo apt-get install autotools-dev autoconf libtool
52+
```
53+
54+
The library will be installed on the operating system's standard paths. For
55+
some GNU/Linux distributions it might be necessary to add that standard path
56+
(typically `/usr/local/lib`) to `/etc/ld.so.conf` and run `ldconfig`.
57+
58+
Microsoft Windows compatibility was tested with a cross-compiler and seems to
59+
work out-of-the-box using [MingW](http://www.mingw.org/).
60+
61+
# libpll-2 license and third party licenses
62+
63+
The libpll-2 code is currently licensed under the
64+
[GNU Affero General Public License version 3](http://www.gnu.org/licenses/agpl-3.0.en.html).
65+
Please see LICENSE.txt for details.
66+
67+
libpll-2 includes code from several other projects. We would like to thank the
68+
authors for making their source code available.
69+
70+
libpll includes code from GNU Compiler Collection distributed under the GNU
71+
General Public License.
72+
73+
274

3-
The future line of libpll...

src/CMakeLists.txt

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,11 @@ endif()
1111
set (SSE_FLAGS "-msse3")
1212
set (AVX_FLAGS "-mavx")
1313
set (AVX2_FLAGS "-mfma -mavx2")
14+
set (NEON_FLAGS "-march=armv8-a+fp+simd")
1415

1516
find_package(BISON)
1617
find_package(FLEX)
17-
set(LIBPLL_BISON_FLAGS "-y -d -p pll_utree_")
18+
set(LIBPLL_BISON_FLAGS "-d -p pll_utree_")
1819
set(LIBPLL_FLEX_FLAGS "-P pll_utree_")
1920
BISON_TARGET(parse_utree_t
2021
${CMAKE_CURRENT_SOURCE_DIR}/parse_utree.y ${CMAKE_CURRENT_BINARY_DIR}/parse_utree.c
@@ -23,7 +24,7 @@ FLEX_TARGET(lex_utree_t
2324
${CMAKE_CURRENT_SOURCE_DIR}/lex_utree.l ${CMAKE_CURRENT_BINARY_DIR}/lex_utree.c
2425
COMPILE_FLAGS ${LIBPLL_FLEX_FLAGS})
2526
ADD_FLEX_BISON_DEPENDENCY(lex_utree_t parse_utree_t)
26-
set(LIBPLL_BISON_FLAGS "-y -d -p pll_rtree_")
27+
set(LIBPLL_BISON_FLAGS "-d -p pll_rtree_")
2728
set(LIBPLL_FLEX_FLAGS "-P pll_rtree_")
2829
BISON_TARGET(parse_rtree_t
2930
${CMAKE_CURRENT_SOURCE_DIR}/parse_rtree.y ${CMAKE_CURRENT_BINARY_DIR}/parse_rtree.c
@@ -104,8 +105,11 @@ endif ()
104105
if (NOT DEFINED ENABLE_AVX2)
105106
SET(ENABLE_AVX2 "True")
106107
endif ()
108+
if (NOT DEFINED ENABLE_SSE2NEON)
109+
SET(ENABLE_SSE2NEON "True")
110+
endif ()
107111

108-
# check simd installed
112+
# check simd supported by the compiler
109113
if (ENABLE_SSE)
110114
SET(_code " #include <immintrin.h>
111115
int main() {__m128d a = _mm_setzero_pd(); return 1;}")
@@ -142,7 +146,18 @@ if (ENABLE_AVX2)
142146
set(ENABLE_AVX2 "False")
143147
endif()
144148
endif()
145-
149+
if (ENABLE_SSE2NEON)
150+
SET(_code " #include <sse2neon.h>
151+
int main() {__m128d a = _mm_setzero_pd(); return 1;}")
152+
SET(_file ${CMAKE_CURRENT_BINARY_DIR}/testsse2neon.c)
153+
FILE(WRITE "${_file}" "${_code}")
154+
TRY_COMPILE(SSE2NEON_COMPILED ${CMAKE_CURRENT_BINARY_DIR} ${_file}
155+
COMPILE_DEFINITIONS "${NEON_FLAGS} -I${CMAKE_CURRENT_SOURCE_DIR}")
156+
if (NOT SSE2NEON_COMPILED)
157+
message(STATUS "Disable sse2neon simd, because not supported")
158+
set(ENABLE_SSE2NEON "False")
159+
endif()
160+
endif()
146161

147162
# set simd flags
148163
if (ENABLE_SSE)
@@ -151,7 +166,15 @@ if (ENABLE_SSE)
151166
message(STATUS "SSE enabled. To disable it, run cmake with -DENABLE_SSE=false")
152167
set(LIBPLL_SOURCES ${LIBPLL_SOURCES} ${LIBPLL_SSE_SOURCES})
153168
SET_SOURCE_FILES_PROPERTIES( ${LIBPLL_SSE_SOURCES} PROPERTIES COMPILE_FLAGS ${SSE_FLAGS} )
169+
elseif (ENABLE_SSE2NEON)
170+
# atm either SSE or SSE2NEON kernels can be built, since the share the same function and object names
171+
add_definitions(-DHAVE_SSE3 -DHAVE_SSE2NEON)
172+
set(SIMD_FLAGS "${SIMD_FLAGS} ${NEON_FLAGS}")
173+
message(STATUS "SSE2NEON enabled. To disable it, run cmake with -DENABLE_SSE2NEON=false")
174+
set(LIBPLL_SOURCES ${LIBPLL_SOURCES} ${LIBPLL_SSE_SOURCES})
175+
SET_SOURCE_FILES_PROPERTIES( ${LIBPLL_SSE_SOURCES} PROPERTIES COMPILE_FLAGS ${NEON_FLAGS} )
154176
endif ()
177+
155178
if (ENABLE_AVX)
156179
add_definitions(-DHAVE_AVX)
157180
set(SIMD_FLAGS "${SIMD_FLAGS} ${AVX_FLAGS}")
@@ -167,7 +190,9 @@ if (ENABLE_AVX2)
167190
SET_SOURCE_FILES_PROPERTIES( ${LIBPLL_AVX2_SOURCES} PROPERTIES COMPILE_FLAGS ${AVX2_FLAGS} )
168191
endif ()
169192

170-
add_definitions(-DHAVE_X86INTRIN_H)
193+
if (ENABLE_SSE OR ENABLE_AVX OR ENABLE_AVX2)
194+
add_definitions(-DHAVE_X86INTRIN_H)
195+
endif()
171196

172197
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${LIBPLL_BASE_FLAGS}")
173198

src/hardware.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
https://github.com/xflouris/libpll/issues/138
2929
3030
*/
31-
#if (defined(__APPLE__)) || \
31+
#if (defined(__APPLE__) && !defined(__aarch64__)) || \
3232
(!defined(__clang__) && defined(__GNUC__) && (__GNUC__ < 4 || \
3333
(__GNUC__ == 4 && __GNUC_MINOR__ < 8))) || \
3434
(defined(__clang__) && (__clang_major__ < 3 || \
@@ -112,6 +112,13 @@ static void cpu_features_detect()
112112
pll_hardware.init = 1;
113113
#if defined(__PPC__)
114114
pll_hardware.altivec_present = __builtin_cpu_supports("altivec");
115+
#elif defined(__aarch64__) && defined(HAVE_SSE2NEON)
116+
pll_hardware.sse_present = 1;
117+
pll_hardware.sse2_present = 1;
118+
pll_hardware.sse3_present = 1;
119+
pll_hardware.ssse3_present = 1;
120+
pll_hardware.sse41_present = 1;
121+
pll_hardware.sse42_present = 1;
115122
#elif defined(__x86_64__) || defined(__i386__)
116123
pll_hardware.mmx_present = __builtin_cpu_supports("mmx");
117124
pll_hardware.sse_present = __builtin_cpu_supports("sse");

0 commit comments

Comments
 (0)