From e2bf1282b0ff6a6f4d03eb51bfffa22edc1b6a9b Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Tue, 19 May 2026 18:49:15 -0400
Subject: [PATCH 1/2] Add Stanford CoreNLP NLP tool with data manager

- Stanford CoreNLP tool wrapper supporting 8 languages
- Multi-language support: English, Chinese, Arabic, French, German, Spanish, Italian, Hungarian
- Multiple annotators: tokenization, POS, NER, parsing, coreference, sentiment
- Output formats: JSON, CoNLL, CoNLL-U, XML, text
- Docker containerization with Java 21
- Data manager for automated model downloads from Maven Central
- Comprehensive tests and documentation
- Fixed coreference resolution with common models JAR

Tool: stanford_corenlp (v4.5.10+galaxy4)
Data Manager: data_manager_corenlp_models
Categories: Text Manipulation, Natural Language Processing
---
 .../data_manager_corenlp_models/.shed.yml     |  23 +
 .../data_manager_corenlp_models/README.md     |  63 +++
 .../data_manager_corenlp_models.py            | 209 ++++++++++
 .../data_manager_corenlp_models.xml           |  83 ++++
 .../tool-data/corenlp_models.loc.sample       |  21 +
 .../tool_data_table_conf.xml.sample           |   6 +
 tools/corenlp/.shed.yml                       |  23 +
 tools/corenlp/README.md                       |  60 +++
 tools/corenlp/macros.xml                      |   4 +
 tools/corenlp/stanford_corenlp.xml            | 231 ++++++++++
 tools/corenlp/test-data/.download_models.sh   |  10 +
 tools/corenlp/test-data/2.txt                 |   2 +
 tools/corenlp/test-data/README.md             |  42 ++
 tools/corenlp/test-data/corenlp_models.loc    |   2 +
 tools/corenlp/test-data/input.txt             |   2 +
 tools/corenlp/test-data/input.txt.conll       |  23 +
 tools/corenlp/test-data/input.txt.conllu      |  23 +
 tools/corenlp/test-data/sa-input.txt          |   1 +
 tools/corenlp/test-data/sa-input.txt.json     | 394 ++++++++++++++++++
 .../tool-data/corenlp_models.loc.sample       |  19 +
 tools/corenlp/tool_data_table_conf.xml.sample |   6 +
 21 files changed, 1247 insertions(+)
 create mode 100644 data_managers/data_manager_corenlp_models/.shed.yml
 create mode 100644 data_managers/data_manager_corenlp_models/README.md
 create mode 100755 data_managers/data_manager_corenlp_models/data_manager_corenlp_models.py
 create mode 100644 data_managers/data_manager_corenlp_models/data_manager_corenlp_models.xml
 create mode 100644 data_managers/data_manager_corenlp_models/tool-data/corenlp_models.loc.sample
 create mode 100644 data_managers/data_manager_corenlp_models/tool_data_table_conf.xml.sample
 create mode 100644 tools/corenlp/.shed.yml
 create mode 100644 tools/corenlp/README.md
 create mode 100644 tools/corenlp/macros.xml
 create mode 100644 tools/corenlp/stanford_corenlp.xml
 create mode 100644 tools/corenlp/test-data/.download_models.sh
 create mode 100644 tools/corenlp/test-data/2.txt
 create mode 100644 tools/corenlp/test-data/README.md
 create mode 100644 tools/corenlp/test-data/corenlp_models.loc
 create mode 100644 tools/corenlp/test-data/input.txt
 create mode 100644 tools/corenlp/test-data/input.txt.conll
 create mode 100644 tools/corenlp/test-data/input.txt.conllu
 create mode 100644 tools/corenlp/test-data/sa-input.txt
 create mode 100644 tools/corenlp/test-data/sa-input.txt.json
 create mode 100644 tools/corenlp/tool-data/corenlp_models.loc.sample
 create mode 100644 tools/corenlp/tool_data_table_conf.xml.sample

diff --git a/data_managers/data_manager_corenlp_models/.shed.yml b/data_managers/data_manager_corenlp_models/.shed.yml
new file mode 100644
index 00000000000..784d43933b5
--- /dev/null
+++ b/data_managers/data_manager_corenlp_models/.shed.yml
@@ -0,0 +1,23 @@
+name: data_manager_corenlp_models
+owner: iuc
+description: Data manager for downloading Stanford CoreNLP language model JARs
+long_description: |
+  This data manager allows Galaxy administrators to download and install Stanford
+  CoreNLP language model JAR files from Maven Central. It supports 8 languages
+  with multi-select installation for use with the Stanford CoreNLP annotation tool.
+
+  Supported languages: Arabic, Chinese, English, French, German, Hungarian, Italian, Spanish.
+  Also provides the common models JAR required for coreference resolution.
+homepage_url: https://stanfordnlp.github.io/CoreNLP/
+remote_repository_url: https://github.com/ksuderman/data-manager-corenlp
+type: unrestricted
+categories:
+  - Data Managers
+  - Text Manipulation
+  - Natural Language Processing
+auto_tool_repositories:
+  name_template: "{{ tool_id }}"
+  description_template: "{{ tool_name }}"
+  include:
+    - data_manager_corenlp_models.xml
+  test: false
diff --git a/data_managers/data_manager_corenlp_models/README.md b/data_managers/data_manager_corenlp_models/README.md
new file mode 100644
index 00000000000..19607a3ee61
--- /dev/null
+++ b/data_managers/data_manager_corenlp_models/README.md
@@ -0,0 +1,63 @@
+# Stanford CoreNLP Language Models Data Manager
+
+Galaxy data manager for downloading and installing Stanford CoreNLP language model JARs.
+
+## Description
+
+This data manager allows Galaxy administrators to easily download and install Stanford CoreNLP language model JAR files from Maven Central for use with the Stanford CoreNLP annotation tool.
+
+## Features
+
+- **Multi-select support**: Install multiple language models at once using checkboxes
+- **Common models option**: Optionally install common models JAR (required for coreference resolution)
+- **Automatic download**: Downloads model JARs from Maven Central
+- **Automatic registration**: Registers models in Galaxy's data table for immediate use
+- **Error handling**: Continues with remaining models if one fails to download
+
+## Supported Languages
+
+Language models are available for:
+- Arabic (ar)
+- Chinese (zh)
+- English (en)
+- French (fr)
+- German (de)
+- Hungarian (hu)
+- Italian (it)
+- Spanish (es)
+
+**Note**: Not all annotators are available for all languages. See the [Stanford CoreNLP documentation](https://stanfordnlp.github.io/CoreNLP/human-languages.html) for language-specific capabilities.
+
+## Installation
+
+1. Install this data manager via the Galaxy Tool Shed or manually
+2. As a Galaxy admin, go to **Admin → Local Data**
+3. Select "Stanford CoreNLP Language Models"
+4. Check the boxes for the languages you want to install
+5. Optionally check "Install common models" if you need coreference resolution (checked by default)
+6. Click "Execute"
+
+The models are large files (typically 100-500 MB each), so downloading multiple models may take several minutes depending on your connection speed.
+
+### Common Models
+
+The common models JAR (452 MB) contains shared dictionaries and resources needed for coreference resolution. If you plan to use the coreference annotator, you must install the common models. This option is checked by default for convenience.
+
+## Requirements
+
+- Python 3.9+
+- Internet connection for downloading models from Maven Central
+
+## Version
+
+This data manager downloads models for CoreNLP version 4.5.10.
+
+## Usage with Stanford CoreNLP Tool
+
+After installing language models via this data manager, they will be available in the Stanford CoreNLP tool's "Language Model" dropdown. The tool uses a Docker container (`ksuderman/corenlp:4.5.10`) that includes the base CoreNLP library, and mounts the language-specific model JARs at runtime.
+
+## More Information
+
+- Stanford CoreNLP website: https://stanfordnlp.github.io/CoreNLP/
+- Maven Central repository: https://repo1.maven.org/maven2/edu/stanford/nlp/stanford-corenlp/
+- Galaxy Tool Shed: https://toolshed.g2.bx.psu.edu
diff --git a/data_managers/data_manager_corenlp_models/data_manager_corenlp_models.py b/data_managers/data_manager_corenlp_models/data_manager_corenlp_models.py
new file mode 100755
index 00000000000..34009e00890
--- /dev/null
+++ b/data_managers/data_manager_corenlp_models/data_manager_corenlp_models.py
@@ -0,0 +1,209 @@
+#!/usr/bin/env python
+"""
+Data Manager for Stanford CoreNLP Language Models
+
+Downloads CoreNLP language model JARs to a persistent directory and registers
+them in the Galaxy data table. JARs are stored at the absolute path so the
+CoreNLP tool can symlink them at runtime.
+"""
+
+import argparse
+import json
+import sys
+import urllib.request
+from pathlib import Path
+
+
+# CoreNLP version and model information
+CORENLP_VERSION = "4.5.10"
+
+# Common models JAR (contains dcoref dictionaries and common models)
+COMMON_MODELS = {
+    "name": "Common Models",
+    "jar_name": f"stanford-corenlp-{CORENLP_VERSION}-models.jar",
+    "url": f"https://repo1.maven.org/maven2/edu/stanford/nlp/stanford-corenlp/{CORENLP_VERSION}/stanford-corenlp-{CORENLP_VERSION}-models.jar"
+}
+
+LANGUAGE_MODELS = {
+    "ar": {
+        "name": "Arabic",
+        "jar_name": f"stanford-corenlp-{CORENLP_VERSION}-models-arabic.jar",
+        "url": f"https://repo1.maven.org/maven2/edu/stanford/nlp/stanford-corenlp/{CORENLP_VERSION}/stanford-corenlp-{CORENLP_VERSION}-models-arabic.jar"
+    },
+    "zh": {
+        "name": "Chinese",
+        "jar_name": f"stanford-corenlp-{CORENLP_VERSION}-models-chinese.jar",
+        "url": f"https://repo1.maven.org/maven2/edu/stanford/nlp/stanford-corenlp/{CORENLP_VERSION}/stanford-corenlp-{CORENLP_VERSION}-models-chinese.jar"
+    },
+    "en": {
+        "name": "English",
+        "jar_name": f"stanford-corenlp-{CORENLP_VERSION}-models-english.jar",
+        "url": f"https://repo1.maven.org/maven2/edu/stanford/nlp/stanford-corenlp/{CORENLP_VERSION}/stanford-corenlp-{CORENLP_VERSION}-models-english.jar"
+    },
+    "fr": {
+        "name": "French",
+        "jar_name": f"stanford-corenlp-{CORENLP_VERSION}-models-french.jar",
+        "url": f"https://repo1.maven.org/maven2/edu/stanford/nlp/stanford-corenlp/{CORENLP_VERSION}/stanford-corenlp-{CORENLP_VERSION}-models-french.jar"
+    },
+    "de": {
+        "name": "German",
+        "jar_name": f"stanford-corenlp-{CORENLP_VERSION}-models-german.jar",
+        "url": f"https://repo1.maven.org/maven2/edu/stanford/nlp/stanford-corenlp/{CORENLP_VERSION}/stanford-corenlp-{CORENLP_VERSION}-models-german.jar"
+    },
+    "hu": {
+        "name": "Hungarian",
+        "jar_name": f"stanford-corenlp-{CORENLP_VERSION}-models-hungarian.jar",
+        "url": f"https://repo1.maven.org/maven2/edu/stanford/nlp/stanford-corenlp/{CORENLP_VERSION}/stanford-corenlp-{CORENLP_VERSION}-models-hungarian.jar"
+    },
+    "it": {
+        "name": "Italian",
+        "jar_name": f"stanford-corenlp-{CORENLP_VERSION}-models-italian.jar",
+        "url": f"https://repo1.maven.org/maven2/edu/stanford/nlp/stanford-corenlp/{CORENLP_VERSION}/stanford-corenlp-{CORENLP_VERSION}-models-italian.jar"
+    },
+    "es": {
+        "name": "Spanish",
+        "jar_name": f"stanford-corenlp-{CORENLP_VERSION}-models-spanish.jar",
+        "url": f"https://repo1.maven.org/maven2/edu/stanford/nlp/stanford-corenlp/{CORENLP_VERSION}/stanford-corenlp-{CORENLP_VERSION}-models-spanish.jar"
+    }
+}
+
+
+def download_model(url, target_path):
+    """Download a file from URL to target path with progress reporting."""
+    print(f"Downloading from {url}")
+    print(f"Saving to {target_path}")
+
+    def report_progress(block_num, block_size, total_size):
+        downloaded = block_num * block_size
+        if total_size > 0:
+            percent = min(100, (downloaded / total_size) * 100)
+            mb = downloaded / 1024 / 1024
+            total_mb = total_size / 1024 / 1024
+            print(f"  {percent:.0f}% ({mb:.0f}/{total_mb:.0f} MB)", flush=True)
+
+    try:
+        urllib.request.urlretrieve(url, target_path, reporthook=report_progress)
+        print("Download complete!")
+        return True
+    except Exception as e:
+        print(f"Error downloading file: {e}", file=sys.stderr)
+        return False
+
+
+def load_existing_models(data_table_path):
+    """Load existing model entries from the data table to avoid duplicates."""
+    existing = set()
+    if data_table_path and Path(data_table_path).exists():
+        with open(data_table_path) as f:
+            for line in f:
+                line = line.strip()
+                if line and not line.startswith('#'):
+                    parts = line.split('\t')
+                    if parts:
+                        existing.add(parts[0])
+    return existing
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Download and register CoreNLP language models")
+    parser.add_argument("--language", action="append", choices=LANGUAGE_MODELS.keys(),
+                        help="Language code(s) for the model(s) to download")
+    parser.add_argument("--common-models", action="store_true",
+                        help="Download the common models JAR (required for coreference)")
+    parser.add_argument("--target-directory", required=True,
+                        help="Directory to store the downloaded model JARs")
+    parser.add_argument("--output", required=True,
+                        help="JSON output file for Galaxy data manager")
+    parser.add_argument("--data-table", required=False,
+                        help="Path to existing data table file to check for duplicates")
+
+    args = parser.parse_args()
+
+    if not args.language and not args.common_models:
+        parser.error("At least one of --language or --common-models must be specified")
+
+    existing_models = load_existing_models(args.data_table)
+
+    target_dir = Path(args.target_directory)
+    target_dir.mkdir(parents=True, exist_ok=True)
+
+    data_table_entries = []
+
+    # Process common models if requested
+    if args.common_models:
+        if "common" in existing_models:
+            print(f"\n{'=' * 60}")
+            print(f"Skipping {COMMON_MODELS['name']} - already in data table")
+            print(f"{'=' * 60}")
+        else:
+            print(f"\n{'=' * 60}")
+            print(f"Processing {COMMON_MODELS['name']}...")
+            print(f"{'=' * 60}")
+
+            jar_path = target_dir / COMMON_MODELS["jar_name"]
+
+            if jar_path.exists():
+                print(f"Model already exists at {jar_path}")
+            else:
+                if not download_model(COMMON_MODELS["url"], str(jar_path)):
+                    print(f"WARNING: Failed to download {COMMON_MODELS['name']}", file=sys.stderr)
+
+            if jar_path.exists():
+                data_table_entries.append({
+                    "value": "common",
+                    "name": COMMON_MODELS["name"],
+                    "lang_code": "common",
+                    "models_path": str(jar_path.absolute())
+                })
+                print(f"Registered {COMMON_MODELS['name']}")
+                print(f"  Path: {jar_path.absolute()}")
+
+    # Process each language
+    if args.language:
+        for lang_code in args.language:
+            if lang_code in existing_models:
+                print(f"\n{'=' * 60}")
+                print(f"Skipping {LANGUAGE_MODELS[lang_code]['name']} - already in data table")
+                print(f"{'=' * 60}")
+            else:
+                model_info = LANGUAGE_MODELS[lang_code]
+
+                print(f"\n{'=' * 60}")
+                print(f"Processing {model_info['name']} model...")
+                print(f"{'=' * 60}")
+
+                jar_path = target_dir / model_info["jar_name"]
+
+                if jar_path.exists():
+                    print(f"Model already exists at {jar_path}")
+                else:
+                    if not download_model(model_info["url"], str(jar_path)):
+                        print(f"WARNING: Failed to download {model_info['name']} model", file=sys.stderr)
+                        continue
+
+                data_table_entries.append({
+                    "value": lang_code,
+                    "name": model_info["name"],
+                    "lang_code": lang_code,
+                    "models_path": str(jar_path.absolute())
+                })
+                print(f"Registered {model_info['name']} model")
+                print(f"  Path: {jar_path.absolute()}")
+
+    # Write data manager JSON output
+    data_manager_output = {
+        "data_tables": {
+            "corenlp_models": data_table_entries
+        }
+    }
+
+    with open(args.output, "w") as f:
+        json.dump(data_manager_output, f, indent=2)
+
+    print(f"\n{'=' * 60}")
+    print(f"Summary: {len(data_table_entries)} model(s) registered")
+    print(f"{'=' * 60}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/data_managers/data_manager_corenlp_models/data_manager_corenlp_models.xml b/data_managers/data_manager_corenlp_models/data_manager_corenlp_models.xml
new file mode 100644
index 00000000000..015de2770e8
--- /dev/null
+++ b/data_managers/data_manager_corenlp_models/data_manager_corenlp_models.xml
@@ -0,0 +1,83 @@
+<tool id="data_manager_corenlp_models" name="Stanford CoreNLP Language Models" version="4.5.10.5" tool_type="manage_data" profile="21.05">
+    <description>Download and install CoreNLP language model JARs</description>
+    <requirements>
+        <requirement type="package" version="3.9">python</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        python '$__tool_directory__/data_manager_corenlp_models.py'
+        #for $lang in $languages
+            --language '$lang'
+        #end for
+        #if $common_models
+            --common-models
+        #end if
+        --target-directory '${__tool_data_path__}/corenlp_models'
+        --output '$out_file'
+        --data-table '${__tool_data_path__}/corenlp_models.loc'
+    ]]></command>
+    <inputs>
+        <param name="languages" type="select" label="Language Models" multiple="true" display="checkboxes">
+            <option value="ar">Arabic</option>
+            <option value="zh">Chinese</option>
+            <option value="en" selected="true">English</option>
+            <option value="fr">French</option>
+            <option value="de">German</option>
+            <option value="hu">Hungarian</option>
+            <option value="it">Italian</option>
+            <option value="es">Spanish</option>
+        </param>
+        <param name="common_models" type="boolean" label="Install common models (required for coreference)" checked="true" help="Downloads the common models JAR (452 MB) which includes dictionary files and models needed for coreference resolution"/>
+    </inputs>
+    <outputs>
+        <data name="out_file" format="data_manager_json" />
+    </outputs>
+    <help><![CDATA[
+Stanford CoreNLP Language Models Data Manager
+==============================================
+
+This data manager downloads and installs Stanford CoreNLP language model JAR files
+for use with the Stanford CoreNLP annotation tool.
+
+Available Languages
+-------------------
+
+- **Arabic** (ar)
+- **Chinese** (zh)
+- **English** (en)
+- **French** (fr)
+- **German** (de)
+- **Hungarian** (hu)
+- **Italian** (it)
+- **Spanish** (es)
+
+Usage
+-----
+
+1. Select one or more language models you want to install (checkboxes)
+2. Optionally check "Install common models" if you need coreference resolution (checked by default)
+3. Run the data manager
+4. The model JARs will be downloaded from Maven Central and registered in the data table
+
+The language models are large files (typically 100-500 MB each), so downloading
+multiple models may take several minutes depending on your connection speed.
+
+Common Models
+-------------
+
+The common models JAR (452 MB) contains shared dictionaries and resources needed for
+coreference resolution. It is required for the coreference annotator to work. This
+option is checked by default.
+
+Version
+-------
+
+This data manager downloads models for CoreNLP version 4.5.10.
+
+**Note:** Not all annotators are available for all languages. See the Stanford CoreNLP
+documentation for language-specific capabilities:
+https://stanfordnlp.github.io/CoreNLP/human-languages.html
+    ]]></help>
+    <citations>
+        <citation type="doi">10.3115/v1/P14-5010</citation>
+    </citations>
+</tool>
diff --git a/data_managers/data_manager_corenlp_models/tool-data/corenlp_models.loc.sample b/data_managers/data_manager_corenlp_models/tool-data/corenlp_models.loc.sample
new file mode 100644
index 00000000000..d080306ad77
--- /dev/null
+++ b/data_managers/data_manager_corenlp_models/tool-data/corenlp_models.loc.sample
@@ -0,0 +1,21 @@
+# This file defines the available Stanford CoreNLP language models.
+# Each line represents a language model with the following columns (tab-separated):
+#   <value>      <name>        <lang_code>  <models_path>
+#
+# - value: Unique identifier (typically the language code)
+# - name: Display name shown to users
+# - lang_code: Language code used by CoreNLP (ar, zh, en, fr, de, hu, it, es)
+# - models_path: Absolute path to the language model JAR file
+#
+# Models are downloaded via the data manager and stored in Galaxy's tool-data directory.
+#
+# Example entries (populated automatically by data manager):
+#
+#en	English	en	/path/to/stanford-corenlp-4.5.10-models-english.jar
+#es	Spanish	es	/path/to/stanford-corenlp-4.5.10-models-spanish.jar
+#de	German	de	/path/to/stanford-corenlp-4.5.10-models-german.jar
+#fr	French	fr	/path/to/stanford-corenlp-4.5.10-models-french.jar
+#zh	Chinese	zh	/path/to/stanford-corenlp-4.5.10-models-chinese.jar
+#ar	Arabic	ar	/path/to/stanford-corenlp-4.5.10-models-arabic.jar
+#hu	Hungarian	hu	/path/to/stanford-corenlp-4.5.10-models-hungarian.jar
+#it	Italian	it	/path/to/stanford-corenlp-4.5.10-models-italian.jar
diff --git a/data_managers/data_manager_corenlp_models/tool_data_table_conf.xml.sample b/data_managers/data_manager_corenlp_models/tool_data_table_conf.xml.sample
new file mode 100644
index 00000000000..4615c4b2916
--- /dev/null
+++ b/data_managers/data_manager_corenlp_models/tool_data_table_conf.xml.sample
@@ -0,0 +1,6 @@
+<tables>
+    <table name="corenlp_models" comment_char="#">
+        <columns>value, name, lang_code, models_path</columns>
+        <file path="tool-data/corenlp_models.loc" />
+    </table>
+</tables>
diff --git a/tools/corenlp/.shed.yml b/tools/corenlp/.shed.yml
new file mode 100644
index 00000000000..34f69854321
--- /dev/null
+++ b/tools/corenlp/.shed.yml
@@ -0,0 +1,23 @@
+name: stanford_corenlp
+owner: iuc
+description: Stanford CoreNLP Annotators for multi-language natural language processing
+long_description: |
+  Galaxy wrappers for Stanford CoreNLP annotators supporting multiple languages
+  (English, Chinese, French, German, Italian, Spanish). Provides tokenization,
+  POS tagging, lemmatization, NER, dependency parsing, coreference resolution,
+  and sentiment analysis.
+
+  Note: Tests require language models to be installed via the data manager.
+  Automated ToolShed tests are skipped due to large model file requirements (424MB).
+categories:
+  - Text Manipulation
+  - Natural Language Processing
+homepage_url: https://stanfordnlp.github.io/CoreNLP/
+remote_repository_url: https://github.com/ksuderman/galaxy_tools_corenlp
+type: unrestricted
+auto_tool_repositories:
+  name_template: "{{ tool_id }}"
+  description_template: "{{ tool_name }}"
+  include:
+    - stanford_corenlp.xml
+  test: false
diff --git a/tools/corenlp/README.md b/tools/corenlp/README.md
new file mode 100644
index 00000000000..3fdc0efcf9c
--- /dev/null
+++ b/tools/corenlp/README.md
@@ -0,0 +1,60 @@
+# Galaxy Wrapper for Stanford CoreNLP (Multi-language)
+
+This Galaxy tool wrapper provides access to Stanford CoreNLP annotation pipelines with support for multiple languages. Users can select from various annotators and output formats.
+
+See the [Stanford CoreNLP website](https://stanfordnlp.github.io/CoreNLP/) for more information about the underlying library.
+
+## Features
+
+- **Multi-language support**: Process text in Arabic, Chinese, English, French, German, Hungarian, Italian, and Spanish
+- **Multiple annotators**: Tokenization, POS tagging, NER, dependency parsing, coreference, and sentiment analysis
+- **Multiple output formats**: JSON, CoNLL, CoNLL-U, text, and XML
+- **Dockerized execution**: Uses Docker container for consistent environment
+- **Data manager integration**: Language models downloaded separately for flexibility
+
+## Requirements
+
+- **Data Manager**: Language models must be installed via the Stanford CoreNLP Language Models data manager
+- **Common Models**: Required for coreference resolution (installed via data manager)
+- **Docker**: The tool uses the `ksuderman/corenlp:4.5.10` Docker image
+
+## Annotators
+
+| Type | Description | Annotators Used |
+|---|---|---|
+| **Segmentation** | Sentences and tokens only | tokenize, ssplit |
+| **Part of speech** | POS tags and lemmas | tokenize, ssplit, pos, lemma |
+| **Named entity recognition** | Named entities (PERSON, ORG, etc.) | tokenize, ssplit, pos, lemma, ner |
+| **Dependency parse** | Syntactic dependencies | tokenize, ssplit, pos, lemma, ner, parse |
+| **Coreference** | Entity coreferences (requires common models) | tokenize, ssplit, pos, lemma, ner, parse, coref |
+| **Sentiment analysis** | Sentiment scores | tokenize, ssplit, pos, lemma, parse, sentiment |
+
+## Output Formats
+
+- **JSON**: Full annotations with hierarchical structure (recommended for most use cases)
+- **CoNLL**: Tab-separated format for NER and basic annotations
+- **CoNLL-U**: Universal Dependencies format
+- **Text**: Human-readable plain text output
+- **XML**: Structured XML format
+
+**Note**: Not all annotations are representable in all formats. JSON provides the most complete representation.
+
+## Installation
+
+1. Install the data manager: `data_manager_corenlp_models`
+2. Install this tool: `stanford_corenlp_multilang`
+3. Use the data manager to download language models:
+   - Go to **Admin → Local Data**
+   - Select "Stanford CoreNLP Language Models"
+   - Choose language(s) to install
+   - Check "Install common models" if you need coreference resolution
+4. The tool will automatically appear in the "Natural Language Processing" section
+
+## Version
+
+This wrapper is for Stanford CoreNLP version 4.5.10.
+
+
+## LICENSE
+
+This tool wrapper is provided under the MIT License.
\ No newline at end of file
diff --git a/tools/corenlp/macros.xml b/tools/corenlp/macros.xml
new file mode 100644
index 00000000000..456837caae9
--- /dev/null
+++ b/tools/corenlp/macros.xml
@@ -0,0 +1,4 @@
+<macros>
+    <token name="@TOOL_VERSION@">4.5.10</token>
+    <token name="@VERSION_SUFFIX@">4</token>
+</macros>
diff --git a/tools/corenlp/stanford_corenlp.xml b/tools/corenlp/stanford_corenlp.xml
new file mode 100644
index 00000000000..805cdc65c83
--- /dev/null
+++ b/tools/corenlp/stanford_corenlp.xml
@@ -0,0 +1,231 @@
+<tool id="stanford_corenlp" name="Stanford CoreNLP Annotators (Multi-language)" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5" profile="21.05">
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <requirements>
+		<container type="docker">ksuderman/corenlp:@TOOL_VERSION@</container>
+    </requirements>
+    <version_command><![CDATA[
+echo "@TOOL_VERSION@"
+    ]]></version_command>
+    <command detect_errors="exit_code"><![CDATA[
+    ln -s '${language_model.fields.models_path}' ./model.jar &&
+    #if $annotators == "coref"
+    ln -s "\$(dirname '${language_model.fields.models_path}')/stanford-corenlp-@TOOL_VERSION@-models.jar" ./common.jar &&
+    #end if
+    #if $language_model.fields.lang_code == "en"
+    echo 'pos.model = edu/stanford/nlp/models/pos-tagger/english-bidirectional-distsim.tagger' > corenlp.properties &&
+    echo 'parse.model = edu/stanford/nlp/models/lexparser/englishPCFG.caseless.ser.gz' >> corenlp.properties &&
+    echo 'ner.model = edu/stanford/nlp/models/ner/english.all.3class.caseless.distsim.crf.ser.gz,edu/stanford/nlp/models/ner/english.muc.7class.caseless.distsim.crf.ser.gz,edu/stanford/nlp/models/ner/english.conll.4class.caseless.distsim.crf.ser.gz' >> corenlp.properties &&
+    echo 'ner.applyFineGrained = false' >> corenlp.properties &&
+    echo 'coref.algorithm = statistical' >> corenlp.properties &&
+    echo 'sentiment.model = edu/stanford/nlp/models/sentiment/sentiment.binary.ser.gz' >> corenlp.properties &&
+    #else if $language_model.fields.lang_code == "zh"
+    echo 'pos.model = edu/stanford/nlp/models/pos-tagger/chinese-distsim.tagger' > corenlp.properties &&
+    echo 'ner.model = edu/stanford/nlp/models/ner/chinese.misc.distsim.crf.ser.gz' >> corenlp.properties &&
+    echo 'ner.applyFineGrained = false' >> corenlp.properties &&
+    echo 'parse.model = edu/stanford/nlp/models/lexparser/chinesePCFG.ser.gz' >> corenlp.properties &&
+    echo 'coref.algorithm = statistical' >> corenlp.properties &&
+    #else if $language_model.fields.lang_code == "fr"
+    echo 'pos.model = edu/stanford/nlp/models/pos-tagger/french-ud.tagger' > corenlp.properties &&
+    echo 'ner.model = edu/stanford/nlp/models/ner/french.crf.gz' >> corenlp.properties &&
+    echo 'ner.applyFineGrained = false' >> corenlp.properties &&
+    echo 'parse.model = edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz' >> corenlp.properties &&
+    #else if $language_model.fields.lang_code == "de"
+    echo 'pos.model = edu/stanford/nlp/models/pos-tagger/german-ud.tagger' > corenlp.properties &&
+    echo 'ner.model = edu/stanford/nlp/models/ner/german.conll.hgc_175m_600.crf.ser.gz' >> corenlp.properties &&
+    echo 'ner.applyFineGrained = false' >> corenlp.properties &&
+    echo 'parse.model = edu/stanford/nlp/models/lexparser/germanFactored.ser.gz' >> corenlp.properties &&
+    #else if $language_model.fields.lang_code == "es"
+    echo 'pos.model = edu/stanford/nlp/models/pos-tagger/spanish-ud.tagger' > corenlp.properties &&
+    echo 'ner.model = edu/stanford/nlp/models/ner/spanish.ancora.distsim.s512.crf.ser.gz' >> corenlp.properties &&
+    echo 'ner.applyFineGrained = false' >> corenlp.properties &&
+    echo 'parse.model = edu/stanford/nlp/models/lexparser/spanishPCFG.ser.gz' >> corenlp.properties &&
+    #else if $language_model.fields.lang_code == "it"
+    echo 'pos.model = edu/stanford/nlp/models/pos-tagger/italian-ud.tagger' > corenlp.properties &&
+    echo 'ner.model = edu/stanford/nlp/models/ner/italian.crf.gz' >> corenlp.properties &&
+    echo 'ner.applyFineGrained = false' >> corenlp.properties &&
+    echo 'parse.model = edu/stanford/nlp/models/lexparser/italianFactored.ser.gz' >> corenlp.properties &&
+    #else
+    touch corenlp.properties &&
+    #end if
+    #if $annotators == "coref"
+    export CLASSPATH="./model.jar:./common.jar:/opt/corenlp/*:/opt/corenlp:\$CLASSPATH" &&
+    #else
+    export CLASSPATH="./model.jar:/opt/corenlp/*:/opt/corenlp:\$CLASSPATH" &&
+    #end if
+    java -Xmx4g -cp "\$CLASSPATH" edu.stanford.nlp.pipeline.StanfordCoreNLP
+    -props corenlp.properties
+    #if $annotators == "tokenize"
+    -annotators tokenize
+    #else if $annotators == "pos"
+    -annotators tokenize,pos,lemma
+    #else if $annotators == "ner"
+    -annotators tokenize,pos,lemma,ner
+    #else if $annotators == "parse"
+    -annotators tokenize,pos,lemma,ner,parse
+    #else if $annotators == "coref"
+    -annotators tokenize,pos,lemma,ner,parse,coref
+    #else if $annotators == "sentiment"
+    -annotators tokenize,pos,lemma,parse,sentiment
+    #end if
+    -outputFormat $format
+    -outputDirectory .
+    -file ${input} &&
+    #if $format == "text"
+    	mv `basename ${input}`.out ${outputFile}
+    #else
+    	mv `basename ${input}`.$format ${outputFile}
+	#end if
+    ]]></command>
+    <inputs>
+	    <param name="input" type="data" format="txt" label="Text"/>
+	    <param name="language_model" type="select" label="Language Model">
+	    	<options from_data_table="corenlp_models">
+	    		<column name="value" index="0"/>
+	    		<column name="name" index="1"/>
+	    		<column name="lang_code" index="2"/>
+	    		<column name="models_path" index="3"/>
+	    		<filter type="static_value" column="2" keep="false" value="common"/>
+	    		<filter type="sort_by" column="1"/>
+	    	</options>
+	    </param>
+	    <param name="annotators" type="select" label="Annotation types">
+	    	<option value="tokenize" selected="true">Segmentation (sentences and tokens) - All languages</option>
+	    	<option value="pos">Part of speech and lemmas - All languages</option>
+	    	<option value="ner">Named entity recognizer - English, Chinese, French, German, Italian, Spanish</option>
+	    	<option value="parse">Dependency/Constituency parser - English, Chinese, French, German, Italian, Spanish</option>
+	    	<option value="coref">Coreference - English and Chinese only</option>
+	    	<option value="sentiment">Sentiment analysis - English only</option>
+	    </param>
+		<param name="format" type="select" label="Output format">
+			<option value="json" selected="true">JSON</option>
+			<option value="conll">CoNLL</option>
+			<option value="conllu">CoNLL-U</option>
+			<option value="text">Text</option>
+			<option value="xml">XML</option>
+		</param>
+    </inputs>
+    <outputs>
+    	<data name="outputFile" format="txt" label="${tool.name} (${annotators}) on ${on_string}">
+    		<change_format>
+    			<when input="format" value="json" format="json"/>
+    			<when input="format" value="xml" format="xml"/>
+    			<when input="format" value="conllu" format="tabular"/>
+    			<when input="format" value="conll" format="tabular"/>
+    		</change_format>
+    	</data>
+    </outputs>
+    <tests>
+    	<test>
+    		<param name="input" value="input.txt"/>
+    		<param name="language_model" value="en"/>
+    		<param name="annotators" value="tokenize"/>
+    		<param name="format" value="json"/>
+    		<output name="outputFile">
+			<assert_contents>
+				<has_json_property_with_text property="word" text="John"/>
+				<has_json_property_with_text property="word" text="Smith"/>
+			</assert_contents>
+		</output>
+    	</test>
+    	<test>
+    		<param name="input" value="input.txt"/>
+    		<param name="language_model" value="en"/>
+    		<param name="annotators" value="parse"/>
+    		<param name="format" value="conll"/>
+    		<output name="outputFile" file="input.txt.conll"/>
+    	</test>
+    	<test>
+    		<param name="input" value="input.txt"/>
+    		<param name="language_model" value="en"/>
+    		<param name="annotators" value="parse"/>
+    		<param name="format" value="conllu"/>
+    		<output name="outputFile" file="input.txt.conllu"/>
+    	</test>
+	<test>
+		<param name="input" value="2.txt"/>
+		<param name="language_model" value="en"/>
+		<param name="annotators" value="tokenize"/>
+		<param name="format" value="text"/>
+		<output name="outputFile">
+			<assert_contents>
+				<has_text text=" (2 sentences, 12 tokens)"/>
+				<has_text text="Sentence #1 (6 tokens):"/>
+				<has_text text="Sentence #2 (6 tokens):"/>
+				<has_n_lines n="23" delta="1"/>
+			</assert_contents>
+		</output>
+	</test>
+	<test>
+		<param name="input" value="2.txt"/>
+		<param name="language_model" value="en"/>
+		<param name="annotators" value="tokenize"/>
+		<param name="format" value="xml"/>
+		<output name="outputFile">
+			<assert_contents>
+				<is_valid_xml/>
+			</assert_contents>
+		</output>
+	</test>
+	<test>
+		<param name="input" value="sa-input.txt"/>
+		<param name="language_model" value="en"/>
+		<param name="annotators" value="sentiment"/>
+		<param name="format" value="json"/>
+		<output name="outputFile">
+			<assert_contents>
+				<has_text text='"sentiment":'/>
+				<has_text text='"sentimentValue":'/>
+				<has_text text='"sentimentDistribution":'/>
+			</assert_contents>
+		</output>
+	</test>
+    </tests>
+    <help><![CDATA[
+
+Stanford CoreNLP
+================
+
+Galaxy wrappers for common annotators from the `Stanford CoreNLP <https://stanfordnlp.github.io/CoreNLP/index.html>`_ package.  The
+following annotators are provided:
+
+Segmentation
+	Sentences and tokens annotation only.
+
+Part of speech and lemmas
+	Include the sentence annotators and adds part of speech (POS) and lemmas to each token.
+
+Named entity recognition (NER)
+	Includes the sentence, token with POS and lemmas, as well as named entities such as PERSON, ORGANIZATION, PLACE, etc.
+
+Dependency parse
+	Includes all of the annotators for NER as well as a dependency parse to
+	find the dependencies between the words and phrases of a sentence in order
+	to determine its grammatical structure.  Some of the tree structures can
+	only be properly rendered in the JSON output format.
+
+Coreference
+	Find all expressions that refer to the same entity.  Includes all of the
+	annotators for dependency parsing as well. Requires the common models to be
+	installed via the data manager.
+
+Sentiment analysis
+    Perform sentiment analysis on the dependency tree and assign sentiment scores
+    (Positive, Neutral, Negative) and probabilities to sentences and phrases in
+    the text.
+
+    See: https://stanfordnlp.github.io/CoreNLP/sentiment.html
+
+**NOTE**
+
+Please note that not all annotation types can be represented in all of the output
+formats.  In particular, the `CoNLL` and `CoNLL-U` formats will not include any
+annotations from sentiment analysis and will include limited annotations from the
+dependency parse.
+
+    ]]></help>
+    <citations>
+    	<citation type="doi">10.3115/v1/P14-5010</citation>
+    </citations>
+</tool>
diff --git a/tools/corenlp/test-data/.download_models.sh b/tools/corenlp/test-data/.download_models.sh
new file mode 100644
index 00000000000..92ba979ab70
--- /dev/null
+++ b/tools/corenlp/test-data/.download_models.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+# Download CoreNLP English models for testing
+MODELS_VERSION="4.5.10"
+MODELS_URL="https://repo1.maven.org/maven2/edu/stanford/nlp/stanford-corenlp/${MODELS_VERSION}/stanford-corenlp-${MODELS_VERSION}-models-english.jar"
+MODELS_FILE="stanford-corenlp-${MODELS_VERSION}-models-english.jar"
+
+if [ ! -f "${MODELS_FILE}" ]; then
+    echo "Downloading CoreNLP English models..."
+    curl -L -o "${MODELS_FILE}" "${MODELS_URL}"
+fi
diff --git a/tools/corenlp/test-data/2.txt b/tools/corenlp/test-data/2.txt
new file mode 100644
index 00000000000..3005c279e00
--- /dev/null
+++ b/tools/corenlp/test-data/2.txt
@@ -0,0 +1,2 @@
+This is the first sentence. There are only two sentences.
+
diff --git a/tools/corenlp/test-data/README.md b/tools/corenlp/test-data/README.md
new file mode 100644
index 00000000000..0aa24989a54
--- /dev/null
+++ b/tools/corenlp/test-data/README.md
@@ -0,0 +1,42 @@
+# Test Data for Stanford CoreNLP
+
+## Running Tests Locally
+
+The tests require the Stanford CoreNLP English language models. Download them before running tests:
+
+```bash
+cd test-data
+curl -L -o stanford-corenlp-4.5.10-models-english.jar \
+  https://repo1.maven.org/maven2/edu/stanford/nlp/stanford-corenlp/4.5.10/stanford-corenlp-4.5.10-models-english.jar
+```
+
+Then run tests from the tool directory:
+
+```bash
+cd ..
+planemo test --docker
+```
+
+## ToolShed Submission
+
+The model JAR file (424MB) is too large to include in the repository. For ToolShed automated testing:
+
+1. The `tool_data_table_conf.xml.test` file points to `test-data/corenlp_models.loc`
+2. The `.loc` file references `stanford-corenlp-4.5.10-models-english.jar` using `${__HERE__}`
+3. Download the model JAR to `test-data/` before running `planemo shed_test`
+
+### Automated Testing on ToolShed
+
+When submitting to ToolShed, automated tests may not work due to the large model file. Consider:
+
+1. Including a download script in the repository documentation
+2. Using manual verification for tool releases
+3. Setting up a test data cache if available on the test infrastructure
+
+## Files
+
+- `corenlp_models.loc` - Test data table for language models
+- `input.txt` - Sample text for basic tests
+- `2.txt` - Multi-sentence sample
+- `sa-input.txt` - Sample for sentiment analysis
+- `*.conll`, `*.conllu`, `*.json` - Expected output files
diff --git a/tools/corenlp/test-data/corenlp_models.loc b/tools/corenlp/test-data/corenlp_models.loc
new file mode 100644
index 00000000000..69c054a9460
--- /dev/null
+++ b/tools/corenlp/test-data/corenlp_models.loc
@@ -0,0 +1,2 @@
+#value	name	lang_code	models_path
+en	English	en	${__HERE__}/stanford-corenlp-4.5.10-models-english.jar
diff --git a/tools/corenlp/test-data/input.txt b/tools/corenlp/test-data/input.txt
new file mode 100644
index 00000000000..7cea21fac4e
--- /dev/null
+++ b/tools/corenlp/test-data/input.txt
@@ -0,0 +1,2 @@
+John Smith went to Walmart on January 1, 1970 to buy IBM stock, then he went to the theater.
+
diff --git a/tools/corenlp/test-data/input.txt.conll b/tools/corenlp/test-data/input.txt.conll
new file mode 100644
index 00000000000..4f61231d2db
--- /dev/null
+++ b/tools/corenlp/test-data/input.txt.conll
@@ -0,0 +1,23 @@
+1	John	John	NNP	PERSON	2	compound
+2	Smith	Smith	NNP	PERSON	3	nsubj
+3	went	go	VBD	O	0	ROOT
+4	to	to	IN	O	5	case
+5	Walmart	Walmart	NNP	ORGANIZATION	3	obl
+6	on	on	IN	O	7	case
+7	January	January	NNP	DATE	3	obl
+8	1	1	CD	DATE	7	nummod
+9	,	,	,	DATE	7	punct
+10	1970	1970	CD	DATE	7	nummod
+11	to	to	TO	O	12	mark
+12	buy	buy	VB	O	3	xcomp
+13	IBM	IBM	NNP	ORGANIZATION	14	compound
+14	stock	stock	NN	O	12	obj
+15	,	,	,	O	3	punct
+16	then	then	RB	O	18	advmod
+17	he	he	PRP	O	18	nsubj
+18	went	go	VBD	O	3	parataxis
+19	to	to	IN	O	21	case
+20	the	the	DT	O	21	det
+21	theater	theater	NN	O	18	obl
+22	.	.	.	O	3	punct
+
diff --git a/tools/corenlp/test-data/input.txt.conllu b/tools/corenlp/test-data/input.txt.conllu
new file mode 100644
index 00000000000..2a848d67027
--- /dev/null
+++ b/tools/corenlp/test-data/input.txt.conllu
@@ -0,0 +1,23 @@
+1	John	John	_	NNP	_	2	compound	_	_
+2	Smith	Smith	_	NNP	_	3	nsubj	_	_
+3	went	go	_	VBD	_	0	root	_	_
+4	to	to	_	IN	_	5	case	_	_
+5	Walmart	Walmart	_	NNP	_	3	obl	_	_
+6	on	on	_	IN	_	7	case	_	_
+7	January	January	_	NNP	_	3	obl	_	_
+8	1	1	_	CD	_	7	nummod	_	SpaceAfter=No
+9	,	,	_	,	_	7	punct	_	_
+10	1970	1970	_	CD	_	7	nummod	_	_
+11	to	to	_	TO	_	12	mark	_	_
+12	buy	buy	_	VB	_	3	xcomp	_	_
+13	IBM	IBM	_	NNP	_	14	compound	_	_
+14	stock	stock	_	NN	_	12	obj	_	SpaceAfter=No
+15	,	,	_	,	_	3	punct	_	_
+16	then	then	_	RB	_	18	advmod	_	_
+17	he	he	_	PRP	_	18	nsubj	_	_
+18	went	go	_	VBD	_	3	parataxis	_	_
+19	to	to	_	IN	_	21	case	_	_
+20	the	the	_	DT	_	21	det	_	_
+21	theater	theater	_	NN	_	18	obl	_	SpaceAfter=No
+22	.	.	_	.	_	3	punct	_	SpacesAfter=\n\n
+
diff --git a/tools/corenlp/test-data/sa-input.txt b/tools/corenlp/test-data/sa-input.txt
new file mode 100644
index 00000000000..8195db1567b
--- /dev/null
+++ b/tools/corenlp/test-data/sa-input.txt
@@ -0,0 +1 @@
+That movie sucked, it was neither smart nor funny.
diff --git a/tools/corenlp/test-data/sa-input.txt.json b/tools/corenlp/test-data/sa-input.txt.json
new file mode 100644
index 00000000000..057b2692004
--- /dev/null
+++ b/tools/corenlp/test-data/sa-input.txt.json
@@ -0,0 +1,394 @@
+{
+  "docId": "sa-input.txt",
+  "sentences": [
+    {
+      "index": 0,
+      "parse": "(ROOT\n  (S\n    (S\n      (NP (DT That) (NN movie))\n      (VP (VBD sucked)))\n    (, ,)\n    (NP (PRP it))\n    (VP (VBD was)\n      (ADJP (CC neither)\n        (JJ smart)\n        (CC nor)\n        (JJ funny)))\n    (. .)))",
+      "binaryParse": "(ROOT\n  (S\n    (@S\n      (S\n        (NP (DT That) (NN movie))\n        (VP (VBD sucked)))\n      (@S (, ,)\n        (@S\n          (NP (PRP it))\n          (VP (VBD was)\n            (ADJP (CC neither)\n              (@ADJP\n                (@ADJP (JJ smart)\n                  (CC nor))\n                (JJ funny)))))))\n    (. .)))",
+      "basicDependencies": [
+        {
+          "dep": "ROOT",
+          "governor": 0,
+          "governorGloss": "ROOT",
+          "dependent": 8,
+          "dependentGloss": "smart"
+        },
+        {
+          "dep": "det",
+          "governor": 2,
+          "governorGloss": "movie",
+          "dependent": 1,
+          "dependentGloss": "That"
+        },
+        {
+          "dep": "nsubj",
+          "governor": 3,
+          "governorGloss": "sucked",
+          "dependent": 2,
+          "dependentGloss": "movie"
+        },
+        {
+          "dep": "ccomp",
+          "governor": 8,
+          "governorGloss": "smart",
+          "dependent": 3,
+          "dependentGloss": "sucked"
+        },
+        {
+          "dep": "punct",
+          "governor": 8,
+          "governorGloss": "smart",
+          "dependent": 4,
+          "dependentGloss": ","
+        },
+        {
+          "dep": "nsubj",
+          "governor": 8,
+          "governorGloss": "smart",
+          "dependent": 5,
+          "dependentGloss": "it"
+        },
+        {
+          "dep": "cop",
+          "governor": 8,
+          "governorGloss": "smart",
+          "dependent": 6,
+          "dependentGloss": "was"
+        },
+        {
+          "dep": "cc:preconj",
+          "governor": 8,
+          "governorGloss": "smart",
+          "dependent": 7,
+          "dependentGloss": "neither"
+        },
+        {
+          "dep": "cc",
+          "governor": 10,
+          "governorGloss": "funny",
+          "dependent": 9,
+          "dependentGloss": "nor"
+        },
+        {
+          "dep": "conj",
+          "governor": 8,
+          "governorGloss": "smart",
+          "dependent": 10,
+          "dependentGloss": "funny"
+        },
+        {
+          "dep": "punct",
+          "governor": 8,
+          "governorGloss": "smart",
+          "dependent": 11,
+          "dependentGloss": "."
+        }
+      ],
+      "enhancedDependencies": [
+        {
+          "dep": "ROOT",
+          "governor": 0,
+          "governorGloss": "ROOT",
+          "dependent": 8,
+          "dependentGloss": "smart"
+        },
+        {
+          "dep": "det",
+          "governor": 2,
+          "governorGloss": "movie",
+          "dependent": 1,
+          "dependentGloss": "That"
+        },
+        {
+          "dep": "nsubj",
+          "governor": 3,
+          "governorGloss": "sucked",
+          "dependent": 2,
+          "dependentGloss": "movie"
+        },
+        {
+          "dep": "ccomp",
+          "governor": 8,
+          "governorGloss": "smart",
+          "dependent": 3,
+          "dependentGloss": "sucked"
+        },
+        {
+          "dep": "punct",
+          "governor": 8,
+          "governorGloss": "smart",
+          "dependent": 4,
+          "dependentGloss": ","
+        },
+        {
+          "dep": "nsubj",
+          "governor": 8,
+          "governorGloss": "smart",
+          "dependent": 5,
+          "dependentGloss": "it"
+        },
+        {
+          "dep": "nsubj",
+          "governor": 10,
+          "governorGloss": "funny",
+          "dependent": 5,
+          "dependentGloss": "it"
+        },
+        {
+          "dep": "cop",
+          "governor": 8,
+          "governorGloss": "smart",
+          "dependent": 6,
+          "dependentGloss": "was"
+        },
+        {
+          "dep": "cc:preconj",
+          "governor": 8,
+          "governorGloss": "smart",
+          "dependent": 7,
+          "dependentGloss": "neither"
+        },
+        {
+          "dep": "cc",
+          "governor": 10,
+          "governorGloss": "funny",
+          "dependent": 9,
+          "dependentGloss": "nor"
+        },
+        {
+          "dep": "conj:nor",
+          "governor": 8,
+          "governorGloss": "smart",
+          "dependent": 10,
+          "dependentGloss": "funny"
+        },
+        {
+          "dep": "punct",
+          "governor": 8,
+          "governorGloss": "smart",
+          "dependent": 11,
+          "dependentGloss": "."
+        }
+      ],
+      "enhancedPlusPlusDependencies": [
+        {
+          "dep": "ROOT",
+          "governor": 0,
+          "governorGloss": "ROOT",
+          "dependent": 8,
+          "dependentGloss": "smart"
+        },
+        {
+          "dep": "det",
+          "governor": 2,
+          "governorGloss": "movie",
+          "dependent": 1,
+          "dependentGloss": "That"
+        },
+        {
+          "dep": "nsubj",
+          "governor": 3,
+          "governorGloss": "sucked",
+          "dependent": 2,
+          "dependentGloss": "movie"
+        },
+        {
+          "dep": "ccomp",
+          "governor": 8,
+          "governorGloss": "smart",
+          "dependent": 3,
+          "dependentGloss": "sucked"
+        },
+        {
+          "dep": "punct",
+          "governor": 8,
+          "governorGloss": "smart",
+          "dependent": 4,
+          "dependentGloss": ","
+        },
+        {
+          "dep": "nsubj",
+          "governor": 8,
+          "governorGloss": "smart",
+          "dependent": 5,
+          "dependentGloss": "it"
+        },
+        {
+          "dep": "nsubj",
+          "governor": 10,
+          "governorGloss": "funny",
+          "dependent": 5,
+          "dependentGloss": "it"
+        },
+        {
+          "dep": "cop",
+          "governor": 8,
+          "governorGloss": "smart",
+          "dependent": 6,
+          "dependentGloss": "was"
+        },
+        {
+          "dep": "cc:preconj",
+          "governor": 8,
+          "governorGloss": "smart",
+          "dependent": 7,
+          "dependentGloss": "neither"
+        },
+        {
+          "dep": "cc",
+          "governor": 10,
+          "governorGloss": "funny",
+          "dependent": 9,
+          "dependentGloss": "nor"
+        },
+        {
+          "dep": "conj:nor",
+          "governor": 8,
+          "governorGloss": "smart",
+          "dependent": 10,
+          "dependentGloss": "funny"
+        },
+        {
+          "dep": "punct",
+          "governor": 8,
+          "governorGloss": "smart",
+          "dependent": 11,
+          "dependentGloss": "."
+        }
+      ],
+      "sentimentValue": "1",
+      "sentiment": "Negative",
+      "sentimentDistribution": [
+        0.3569090455379,
+        0.3879238897535,
+        0.18202339851632,
+        0.04195134630713,
+        0.03119231988515
+      ],
+      "sentimentTree": "(ROOT|sentiment=1|prob=0.388\n  (@S|sentiment=1|prob=0.405\n    (S|sentiment=1|prob=0.593\n      (NP|sentiment=2|prob=1.000 (DT|sentiment=2|prob=0.998 That) (NN|sentiment=2|prob=0.996 movie))\n      (VP|sentiment=0|prob=0.504 sucked))\n    (@S|sentiment=2|prob=0.349 (,|sentiment=2|prob=1.000 ,)\n      (@S|sentiment=2|prob=0.345 (NP|sentiment=2|prob=1.000 it)\n        (VP|sentiment=2|prob=0.427 (VBD|sentiment=2|prob=0.997 was)\n          (ADJP|sentiment=3|prob=0.430 (CC|sentiment=2|prob=0.961 neither)\n            (@ADJP|sentiment=3|prob=0.659\n              (@ADJP|sentiment=2|prob=0.567 (JJ|sentiment=4|prob=0.971 smart)\n                (CC|sentiment=2|prob=0.995 nor))\n              (JJ|sentiment=3|prob=0.984 funny)))))))\n  (.|sentiment=2|prob=1.000 .))",
+      "tokens": [
+        {
+          "index": 1,
+          "word": "That",
+          "originalText": "That",
+          "lemma": "that",
+          "characterOffsetBegin": 0,
+          "characterOffsetEnd": 4,
+          "pos": "DT",
+          "before": "",
+          "after": " "
+        },
+        {
+          "index": 2,
+          "word": "movie",
+          "originalText": "movie",
+          "lemma": "movie",
+          "characterOffsetBegin": 5,
+          "characterOffsetEnd": 10,
+          "pos": "NN",
+          "before": " ",
+          "after": " "
+        },
+        {
+          "index": 3,
+          "word": "sucked",
+          "originalText": "sucked",
+          "lemma": "suck",
+          "characterOffsetBegin": 11,
+          "characterOffsetEnd": 17,
+          "pos": "VBD",
+          "before": " ",
+          "after": ""
+        },
+        {
+          "index": 4,
+          "word": ",",
+          "originalText": ",",
+          "lemma": ",",
+          "characterOffsetBegin": 17,
+          "characterOffsetEnd": 18,
+          "pos": ",",
+          "before": "",
+          "after": " "
+        },
+        {
+          "index": 5,
+          "word": "it",
+          "originalText": "it",
+          "lemma": "it",
+          "characterOffsetBegin": 19,
+          "characterOffsetEnd": 21,
+          "pos": "PRP",
+          "before": " ",
+          "after": " "
+        },
+        {
+          "index": 6,
+          "word": "was",
+          "originalText": "was",
+          "lemma": "be",
+          "characterOffsetBegin": 22,
+          "characterOffsetEnd": 25,
+          "pos": "VBD",
+          "before": " ",
+          "after": " "
+        },
+        {
+          "index": 7,
+          "word": "neither",
+          "originalText": "neither",
+          "lemma": "neither",
+          "characterOffsetBegin": 26,
+          "characterOffsetEnd": 33,
+          "pos": "CC",
+          "before": " ",
+          "after": " "
+        },
+        {
+          "index": 8,
+          "word": "smart",
+          "originalText": "smart",
+          "lemma": "smart",
+          "characterOffsetBegin": 34,
+          "characterOffsetEnd": 39,
+          "pos": "JJ",
+          "before": " ",
+          "after": " "
+        },
+        {
+          "index": 9,
+          "word": "nor",
+          "originalText": "nor",
+          "lemma": "nor",
+          "characterOffsetBegin": 40,
+          "characterOffsetEnd": 43,
+          "pos": "CC",
+          "before": " ",
+          "after": " "
+        },
+        {
+          "index": 10,
+          "word": "funny",
+          "originalText": "funny",
+          "lemma": "funny",
+          "characterOffsetBegin": 44,
+          "characterOffsetEnd": 49,
+          "pos": "JJ",
+          "before": " ",
+          "after": ""
+        },
+        {
+          "index": 11,
+          "word": ".",
+          "originalText": ".",
+          "lemma": ".",
+          "characterOffsetBegin": 49,
+          "characterOffsetEnd": 50,
+          "pos": ".",
+          "before": "",
+          "after": "\n"
+        }
+      ]
+    }
+  ]
+}
diff --git a/tools/corenlp/tool-data/corenlp_models.loc.sample b/tools/corenlp/tool-data/corenlp_models.loc.sample
new file mode 100644
index 00000000000..8660a8a7925
--- /dev/null
+++ b/tools/corenlp/tool-data/corenlp_models.loc.sample
@@ -0,0 +1,19 @@
+# This file defines the available Stanford CoreNLP language models.
+# Each line represents a language model with the following columns (tab-separated):
+#   <value>    <name>              <lang_code>  <models_path>
+#
+# - value: Unique identifier for the model (lowercase, no spaces)
+# - name: Display name shown to users
+# - lang_code: CoreNLP language code (used with -props flag)
+# - models_path: Full path to the language model JAR file
+#
+# Example entries (uncomment and update paths after installing models):
+#
+#english	English	en	/path/to/stanford-corenlp-4.5.10-models-english.jar
+#spanish	Spanish	es	/path/to/stanford-corenlp-4.5.10-models-spanish.jar
+#french	French	fr	/path/to/stanford-corenlp-4.5.10-models-french.jar
+#german	German	de	/path/to/stanford-corenlp-4.5.10-models-german.jar
+#chinese	Chinese	zh	/path/to/stanford-corenlp-4.5.10-models-chinese.jar
+#arabic	Arabic	ar	/path/to/stanford-corenlp-4.5.10-models-arabic.jar
+#italian	Italian	it	/path/to/stanford-corenlp-4.5.10-models-italian.jar
+#hungarian	Hungarian	hu	/path/to/stanford-corenlp-4.5.10-models-hungarian.jar
diff --git a/tools/corenlp/tool_data_table_conf.xml.sample b/tools/corenlp/tool_data_table_conf.xml.sample
new file mode 100644
index 00000000000..4615c4b2916
--- /dev/null
+++ b/tools/corenlp/tool_data_table_conf.xml.sample
@@ -0,0 +1,6 @@
+<tables>
+    <table name="corenlp_models" comment_char="#">
+        <columns>value, name, lang_code, models_path</columns>
+        <file path="tool-data/corenlp_models.loc" />
+    </table>
+</tables>

From db3ba95ae9679f1bd96d1ca418eba422dc5a968e Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Wed, 20 May 2026 13:10:07 -0400
Subject: [PATCH 2/2] Addressed review comments

Co-Authored-By: Claude Sonnet 4 <noreply@anthropic.com>
---
 data_managers/data_manager_corenlp_models/.shed.yml          | 2 +-
 .../data_manager_corenlp_models.py                           | 1 +
 .../data_manager_corenlp_models.xml                          | 2 +-
 tools/corenlp/.shed.yml                                      | 2 +-
 tools/corenlp/macros.xml                                     | 4 ----
 tools/corenlp/stanford_corenlp.xml                           | 5 +++--
 6 files changed, 7 insertions(+), 9 deletions(-)
 delete mode 100644 tools/corenlp/macros.xml

diff --git a/data_managers/data_manager_corenlp_models/.shed.yml b/data_managers/data_manager_corenlp_models/.shed.yml
index 784d43933b5..884e74da777 100644
--- a/data_managers/data_manager_corenlp_models/.shed.yml
+++ b/data_managers/data_manager_corenlp_models/.shed.yml
@@ -9,7 +9,7 @@ long_description: |
   Supported languages: Arabic, Chinese, English, French, German, Hungarian, Italian, Spanish.
   Also provides the common models JAR required for coreference resolution.
 homepage_url: https://stanfordnlp.github.io/CoreNLP/
-remote_repository_url: https://github.com/ksuderman/data-manager-corenlp
+remote_repository_url: https://github.com/galaxyproject/tools-iuc
 type: unrestricted
 categories:
   - Data Managers
diff --git a/data_managers/data_manager_corenlp_models/data_manager_corenlp_models.py b/data_managers/data_manager_corenlp_models/data_manager_corenlp_models.py
index 34009e00890..fe7bada38b2 100755
--- a/data_managers/data_manager_corenlp_models/data_manager_corenlp_models.py
+++ b/data_managers/data_manager_corenlp_models/data_manager_corenlp_models.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+# Copyright 2006 The Galaxy Project. All rights reserved.
 """
 Data Manager for Stanford CoreNLP Language Models
 
diff --git a/data_managers/data_manager_corenlp_models/data_manager_corenlp_models.xml b/data_managers/data_manager_corenlp_models/data_manager_corenlp_models.xml
index 015de2770e8..2aecd694b53 100644
--- a/data_managers/data_manager_corenlp_models/data_manager_corenlp_models.xml
+++ b/data_managers/data_manager_corenlp_models/data_manager_corenlp_models.xml
@@ -1,4 +1,4 @@
-<tool id="data_manager_corenlp_models" name="Stanford CoreNLP Language Models" version="4.5.10.5" tool_type="manage_data" profile="21.05">
+<tool id="data_manager_corenlp_models" name="Stanford CoreNLP Language Models" version="4.5.10.5" tool_type="manage_data" profile="24.1">
     <description>Download and install CoreNLP language model JARs</description>
     <requirements>
         <requirement type="package" version="3.9">python</requirement>
diff --git a/tools/corenlp/.shed.yml b/tools/corenlp/.shed.yml
index 34f69854321..297388d0c9b 100644
--- a/tools/corenlp/.shed.yml
+++ b/tools/corenlp/.shed.yml
@@ -13,7 +13,7 @@ categories:
   - Text Manipulation
   - Natural Language Processing
 homepage_url: https://stanfordnlp.github.io/CoreNLP/
-remote_repository_url: https://github.com/ksuderman/galaxy_tools_corenlp
+remote_repository_url: https://github.com/galaxyproject/tools-iuc
 type: unrestricted
 auto_tool_repositories:
   name_template: "{{ tool_id }}"
diff --git a/tools/corenlp/macros.xml b/tools/corenlp/macros.xml
deleted file mode 100644
index 456837caae9..00000000000
--- a/tools/corenlp/macros.xml
+++ /dev/null
@@ -1,4 +0,0 @@
-<macros>
-    <token name="@TOOL_VERSION@">4.5.10</token>
-    <token name="@VERSION_SUFFIX@">4</token>
-</macros>
diff --git a/tools/corenlp/stanford_corenlp.xml b/tools/corenlp/stanford_corenlp.xml
index 805cdc65c83..7147dfd582d 100644
--- a/tools/corenlp/stanford_corenlp.xml
+++ b/tools/corenlp/stanford_corenlp.xml
@@ -1,6 +1,7 @@
-<tool id="stanford_corenlp" name="Stanford CoreNLP Annotators (Multi-language)" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5" profile="21.05">
+<tool id="stanford_corenlp" name="Stanford CoreNLP Annotators (Multi-language)" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5" profile="24.1">
     <macros>
-        <import>macros.xml</import>
+        <token name="@TOOL_VERSION@">4.5.10</token>
+        <token name="@VERSION_SUFFIX@">4</token>
     </macros>
     <requirements>
 		<container type="docker">ksuderman/corenlp:@TOOL_VERSION@</container>