Skip to content

Commit 4079020

Browse files
authored
dockerfile cleanup; enforce text LF line endings (Mintplex-Labs#81)
1 parent 3945a77 commit 4079020

File tree

5 files changed

+46
-52
lines changed

5 files changed

+46
-52
lines changed

.gitattributes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
* text=auto eol=lf

collector/scripts/sitemap.py

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,39 @@
1-
import requests
2-
import xml.etree.ElementTree as ET
3-
from scripts.link import parse_links
4-
import re
5-
6-
def parse_sitemap(url):
7-
response = requests.get(url)
8-
root = ET.fromstring(response.content)
9-
10-
urls = []
11-
for element in root.iter('{http://www.sitemaps.org/schemas/sitemap/0.9}url'):
12-
for loc in element.iter('{http://www.sitemaps.org/schemas/sitemap/0.9}loc'):
13-
if not has_extension_to_ignore(loc.text):
14-
urls.append(loc.text)
15-
else:
16-
print(f"Skipping filetype: {loc.text}")
17-
18-
return urls
19-
20-
# Example sitemap URL https://www.nerdwallet.com/blog/wp-sitemap-news-articles-1.xml
21-
def sitemap():
22-
sitemap_url = input("Enter the URL of the sitemap: ")
23-
24-
if(len(sitemap_url) == 0):
25-
print("No valid sitemap provided!")
26-
exit(1)
27-
28-
url_array = parse_sitemap(sitemap_url)
29-
30-
#parse links from array
31-
parse_links(url_array)
32-
33-
def has_extension_to_ignore(string):
34-
image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.pdf']
35-
36-
pattern = r'\b(' + '|'.join(re.escape(ext) for ext in image_extensions) + r')\b'
37-
match = re.search(pattern, string, re.IGNORECASE)
38-
1+
import requests
2+
import xml.etree.ElementTree as ET
3+
from scripts.link import parse_links
4+
import re
5+
6+
def parse_sitemap(url):
7+
response = requests.get(url)
8+
root = ET.fromstring(response.content)
9+
10+
urls = []
11+
for element in root.iter('{http://www.sitemaps.org/schemas/sitemap/0.9}url'):
12+
for loc in element.iter('{http://www.sitemaps.org/schemas/sitemap/0.9}loc'):
13+
if not has_extension_to_ignore(loc.text):
14+
urls.append(loc.text)
15+
else:
16+
print(f"Skipping filetype: {loc.text}")
17+
18+
return urls
19+
20+
# Example sitemap URL https://www.nerdwallet.com/blog/wp-sitemap-news-articles-1.xml
21+
def sitemap():
22+
sitemap_url = input("Enter the URL of the sitemap: ")
23+
24+
if(len(sitemap_url) == 0):
25+
print("No valid sitemap provided!")
26+
exit(1)
27+
28+
url_array = parse_sitemap(sitemap_url)
29+
30+
#parse links from array
31+
parse_links(url_array)
32+
33+
def has_extension_to_ignore(string):
34+
image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.pdf']
35+
36+
pattern = r'\b(' + '|'.join(re.escape(ext) for ext in image_extensions) + r')\b'
37+
match = re.search(pattern, string, re.IGNORECASE)
38+
3939
return match is not None

docker/Dockerfile

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,10 @@ RUN groupadd -g $ARG_GID anythingllm && \
3434
# Copy docker helper scripts
3535
COPY ./docker/docker-entrypoint.sh /usr/local/bin/
3636
COPY ./docker/docker-healthcheck.sh /usr/local/bin/
37-
COPY ./docker/dual_boot.sh /usr/local/bin/
3837

3938
# Ensure the scripts are executable
4039
RUN chmod +x /usr/local/bin/docker-entrypoint.sh && \
41-
chmod +x /usr/local/bin/docker-healthcheck.sh && \
42-
chmod 777 /usr/local/bin/dual_boot.sh
40+
chmod +x /usr/local/bin/docker-healthcheck.sh
4341

4442
USER anythingllm
4543

@@ -91,6 +89,4 @@ HEALTHCHECK --interval=1m --timeout=10s --start-period=1m \
9189
CMD /bin/bash /usr/local/bin/docker-healthcheck.sh || exit 1
9290

9391
# Run the server
94-
ENTRYPOINT ["docker-entrypoint.sh"]
95-
96-
CMD /bin/bash /usr/local/bin/dual_boot.sh
92+
ENTRYPOINT ["/bin/bash", "/usr/local/bin/docker-entrypoint.sh"]

docker/docker-entrypoint.sh

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1-
#!/usr/bin/env bash
2-
3-
exec "$@"
1+
#!/bin/bash
2+
node /app/server/index.js &
3+
{ FLASK_ENV=production FLASK_APP=wsgi.py cd collector && gunicorn --workers 4 --bind 0.0.0.0:8888 wsgi:api; } &
4+
wait -n
5+
exit $?

docker/dual_boot.sh

Lines changed: 0 additions & 5 deletions
This file was deleted.

0 commit comments

Comments
 (0)