Skip to content

Commit

Permalink
also re-run html modifyer
Browse files Browse the repository at this point in the history
  • Loading branch information
svbaelen committed Nov 19, 2024
1 parent 1be9d2e commit 3c88494
Show file tree
Hide file tree
Showing 6 changed files with 200 additions and 15 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ COPY . /data/example/
# move executables to PATH
RUN mv /data/example/utils/watcher_docker.sh /usr/local/bin/watcher.sh
RUN mv /data/example/utils/entrypoint.sh /usr/local/bin/cowkit.sh
RUN mv /data/example/utils/html_parser.js /usr/local/bin/html_parser.js
RUN mv /data/example/utils/html_* /usr/local/bin/

## install other latex engine
#RUN curl --proto '=https' --tlsv1.2 -fsSL https://drop-sh.fullyjustified.net | sh
Expand Down
9 changes: 7 additions & 2 deletions src/scripts/subfigs.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,13 @@ function addSubfigCaption() {
figcap.textContent = `(${img.alt})`;
figure.style.width = img.style.width;
img.style.width = "100%";
subfig.insertBefore(figure, img);
figure.appendChild(img);
let oldNode = img;
// use parentNode on img in case of "hyperlinked" img
if (img.parentNode.tagName == "A"){
oldNode = img.parentNode
}
subfig.insertBefore(figure, oldNode);
figure.appendChild(oldNode);
figure.appendChild(figcap);
})
});
Expand Down
14 changes: 11 additions & 3 deletions utils/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -197,11 +197,15 @@ run_pandoc () {
cp -rf $SCRIPTS_DIR $html_chunkdir

# create search index
# html_parsers <file-in> <file-out> <jstemplate>
node /usr/local/bin/html_parser.js \
# html_search_index <file-in> <file-out> <jstemplate>
node /usr/local/bin/html_search_index.js \
"$html_chunkdir/index.html" \
"$ASSETS_DIR/search.json"

# html_modify <file-in> <file-out> <jstemplate>
node /usr/local/bin/html_modify.js \
"$html_chunkdir/index.html"

cp -rf "$ASSETS_DIR/search.json" $html_assetsdir/

#find $html_chunkdir -name "*.html" | while read file; do
Expand Down Expand Up @@ -343,7 +347,11 @@ else
cd /app
echo "[INFO - main] launching file watcher..."
# run ./utils/watcher_docker.sh in docker container
watcher.sh "$CONFIG_MAIN" "$CONFIG_FORMAT" "$TEMPLATE" "$servedir" "$HTTP_PORT"
watcher.sh "$CONFIG_MAIN" "$CONFIG_FORMAT" "$TEMPLATE" \
"$OUTPUT_DIR" $CHUNKED_HTML "$SUBDIR_CHUNKED_HTML" \
"$ASSETS_DIR" "$CONFIG_MAIN_DIR" "$SCRIPTS_DIR" \
"$servedir"

fi
fi
fi
115 changes: 115 additions & 0 deletions utils/html_modify.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
// SPDX-FileCopyrightText: 2024 Senne Van Baelen
//
// SPDX-License-Identifier: Apache-2.0

const jsdom = require("jsdom");
const { JSDOM } = jsdom;
var path = require('path');
const fs = require('node:fs/promises');

/********************************************************************/
/* Config + Args */
/********************************************************************/

// read input
var ARGS = process.argv.slice(2);

var MAIN_FPATH = ARGS[0];
var MAIN_DIRNAME = path.dirname(MAIN_FPATH);


/********************************************************************/
/* Helpers */
/********************************************************************/

function escapeId(id) {
return id.replace(':', '\\:')
}

async function readFile(fpath) {
try {
const data = await fs.readFile(fpath, { encoding: 'utf8' });
return data;
} catch (err) {
console.log(err);
}
}

async function clickable_images(sHTML) {

const dom = new JSDOM(sHTML);
const imgElems = dom.window.document.querySelectorAll(`img`);

imgElems.forEach(img => {
const link = dom.window.document.createElement('a');
link.innerHTML = img.outerHTML;
//link.setAttribute('href', img.href);
link.setAttribute('href', img.src);
link.setAttribute('target', "_blank");
img.parentNode.insertBefore(link, img);
img.remove()
});

return dom;
}

async function process_chuncked_html(sHTML) {

const dom = new JSDOM(sHTML);

// The script will be executed and modify the DOM:
var tocContent = dom.window.document.querySelector("#toc-content");

var tocLinkArr = [];
var links = tocContent.getElementsByTagName("a");
for(var i=0; i<links.length; i++) {
tocLinkArr.push(links[i].href);
}

//console.log(tocLinkArr);
var fileContent = null;
var prevFileName = null;
var fileDom = null;
var arrFileObj = {};

for (const [idx, link] of tocLinkArr.entries()) {
const fileIdSplit = link.split('#');
const fileName = fileIdSplit[0];
const elId = escapeId(fileIdSplit[1]);
let dom = null;

if (fileName !== prevFileName){
fileContent = await readFile(`${MAIN_DIRNAME}/${fileName}`);
prevFileName = fileName;
dom = await clickable_images(fileContent);

const res = dom.serialize();
if (dom){
await fs.writeFile(`${MAIN_DIRNAME}/${fileName}`, res,
'utf8', function(err) {
if (err) throw err;
});
}
}

}

}


/********************************************************************/
/* Main */
/********************************************************************/

async function main() {

var mainFileContent = await readFile(MAIN_FPATH);
//var sDoc = await generate_search_documents(arrFileObj);
var arrFileObj = await process_chuncked_html(mainFileContent);

/*await fs.writeFile(FPATH_JSON_OUT, json, 'utf8', function(err) {*/
/*if (err) throw err;*/
/*});*/
}

main();
File renamed without changes.
75 changes: 66 additions & 9 deletions utils/watcher_docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,22 +40,76 @@ CONFIG_MAIN=$1
CONFIG_FORMAT=$2
TEMPLATE=$3
OUTPUT_DIR=$4
CHUNKED_HTML=$5
SUBDIR_CHUNKED_HTML=$6
ASSETS_DIR=$7
CONFIG_MAIN_DIR=$8
SCRIPTS_DIR=$9
SERVER_DIR=${10}

#=========================================================
# Functions
#=========================================================

run_pandoc () {

#rm -rf "$OUTPUT_DIR"

pandoc \
--defaults=${CONFIG_MAIN} \
--defaults=${CONFIG_FORMAT} \
--template=${TEMPLATE} \
--output="${OUTPUT_DIR}_tmp"
--output="${SERVER_DIR}_tmp"

# html_modify <file-in> <file-out> <jstemplate>
node /usr/local/bin/html_modify.js \
"${SERVER_DIR}_tmp/index.html"
}

#run_pandoc () {
#config_format=$1
#template=$2

#if [ $CHUNKED_HTML = 1 ]; then
#html_chunkdir="$OUTPUT_DIR/$SUBDIR_CHUNKED_HTML"
#echo $html_chunkdir
## make sure it's empty
#rm -rf $html_chunkdir
#fi

#pandoc \
#--defaults=${CONFIG_MAIN} \
#--defaults=${config_format} \
#--template=${template}

## check if unzipping chunked HTML is necessary
## unzipping is not necessary as it can be done through pandoc too, however,
## the copying still is
#if [ $CHUNKED_HTML = 1 ]; then

## unzip if chunked html
#html_chunkdir="$OUTPUT_DIR/$SUBDIR_CHUNKED_HTML"
#htmlcnfdir="$html_chunkdir/$CONFIG_MAIN_DIR/html"
#html_assetsdir="$html_chunkdir/assets"
#mkdir -p $htmlcnfdir
#mkdir -p $html_assetsdir

#cp -rf $CONFIG_MAIN_DIR/html/* $htmlcnfdir
#cp -rf $SCRIPTS_DIR $html_chunkdir

## create search index
## html_search_index <file-in> <file-out> <jstemplate>
#node /usr/local/bin/html_search_index.js \
#"$html_chunkdir/index.html" \
#"$ASSETS_DIR/search.json"

## html_modify <file-in> <file-out> <jstemplate>
#node /usr/local/bin/html_modify.js \
#"$html_chunkdir/index.html"

#cp -rf "$ASSETS_DIR/search.json" $html_assetsdir/
#fi
#}


#=========================================================
# Main
#=========================================================
Expand Down Expand Up @@ -102,12 +156,14 @@ do

LAST_EVENT_TIME=$EVENT_TIME
echo "[INFO - watcher] running pandoc"
run_pandoc
#run_pandoc
run_pandoc $CONFIG_FORMAT $TEMPLATE

echo "[INFO - watcher] done - build output updated"

# next two lines are to avoid pandoc error "createDirectory" failed (alreayd exist)
rsync --remove-source-files --include "*.html" -a ${OUTPUT_DIR}_tmp/* $OUTPUT_DIR/
rm -rf ${OUTPUT_DIR}_tmp
rsync --remove-source-files --include "*.html" -a ${SERVER_DIR}_tmp/* $SERVER_DIR/
rm -rf ${SERVER_DIR}_tmp

# reload
# first arg, or default to firefox if not set
Expand All @@ -123,9 +179,10 @@ do
sleep $TIME_DIFF
echo "[INFO - watcher] running pandoc"
# next two lines are to avoid pandoc error "createDirectory" failed (alreayd exist)
run_pandoc
rsync --remove-source-files --include "*.html" -a ${OUTPUT_DIR}_tmp/* $OUTPUT_DIR/
rm -rf ${OUTPUT_DIR}_tmp
#run_pandoc
run_pandoc $CONFIG_FORMAT $TEMPLATE
rsync --remove-source-files --include "*.html" -a ${SERVER_DIR}_tmp/* $SERVER_DIR/
rm -rf ${SERVER_DIR}_tmp
else
echo "[INFO - watcher] skipping rerun (timediff set to $TIME_UNTIL_RERUN sec)"
fi
Expand Down

0 comments on commit 3c88494

Please sign in to comment.