microsoft · JessicaXYWang · Sep 12, 2024 · Oct 15, 2024 · Oct 15, 2024 · Oct 15, 2024
@@ -34,7 +34,8 @@ val extraDependencies = Seq(
   "com.jcraft" % "jsch" % "0.1.54",
   "org.apache.httpcomponents.client5" % "httpclient5" % "5.1.3",
   "org.apache.httpcomponents" % "httpmime" % "4.5.13",
-  "com.linkedin.isolation-forest" %% "isolation-forest_3.4.2" % "3.0.4"
+  "com.linkedin.isolation-forest" %% "isolation-forest_3.4.2" % "3.0.4",
+  "org.apache.hadoop" % "hadoop-client-api" % "3.3.4"
     exclude("com.google.protobuf", "protobuf-java") exclude("org.apache.spark", "spark-mllib_2.12")
     exclude("org.apache.spark", "spark-core_2.12") exclude("org.apache.spark", "spark-avro_2.12")
     exclude("org.apache.spark", "spark-sql_2.12"),

@@ -0,0 +1,298 @@
+from pyspark.ml import Transformer
+from pyspark.ml.param.shared import (
+    HasInputCol,
+    HasOutputCol,
+    Param,
+    Params,
+    TypeConverters,
+)
+from pyspark.sql import Row
+from pyspark.sql.functions import udf
+from pyspark.sql.types import StringType, StructType, StructField
+from pyspark.ml.util import DefaultParamsReadable, DefaultParamsWritable
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from pyspark import keyword_only
+import re
+import os
+
+
+class _PeekableIterator:
+    def __init__(self, iterable):
+        self._iterator = iter(iterable)
+        self._cache = []
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        if self._cache:
+            return self._cache.pop(0)
+        else:
+            return next(self._iterator)
+
+    def peek(self, n=1):
+        """Peek at the next n elements without consuming them."""
+        while len(self._cache) < n:
+            try:
+                self._cache.append(next(self._iterator))
+            except StopIteration:
+                break
+        if n == 1:
+            return self._cache[0] if self._cache else None
+        else:
+            return self._cache[:n]
+
+
+class _ModelParam:
+    def __init__(self, **kwargs):
+        self.param = {}
+        self.param.update(kwargs)
+
+    def get_param(self):
+        return self.param
+
+
+class _ModelConfig:
+    def __init__(self, **kwargs):
+        self.config = {}
+        self.config.update(kwargs)
+
+    def get_config(self):
+        return self.config
+
+    def set_config(self, **kwargs):
+        self.config.update(kwargs)
+
+
+def camel_to_snake(text):
+    return re.sub(r"(?<!^)(?=[A-Z])", "_", text).lower()
+
+
+class HuggingFaceCausalLM(
+    Transformer, HasInputCol, HasOutputCol, DefaultParamsReadable, DefaultParamsWritable
+):
+
+    modelName = Param(
+        Params._dummy(),
+        "modelName",
+        "model name",
+        typeConverter=TypeConverters.toString,
+    )
+    inputCol = Param(
+        Params._dummy(),
+        "inputCol",
+        "input column",
+        typeConverter=TypeConverters.toString,
+    )
+    outputCol = Param(
+        Params._dummy(),
+        "outputCol",
+        "output column",
+        typeConverter=TypeConverters.toString,
+    )
+    modelParam = Param(
+        Params._dummy(), "modelParam", "Model Parameters, max_new_tokens"
+    )
+    modelConfig = Param(
+        Params._dummy(),
+        "modelConfig",
+        "Model configuration, local_files_only, trust_remote_code",
+    )
+    cachePath = Param(
+        Params._dummy(),
+        "cachePath",
+        "cache path for the model. could be a lakehouse path",
+        typeConverter=TypeConverters.toString,
+    )
+    deviceMap = Param(
+        Params._dummy(),
+        "deviceMap",
+        "Specifies a model parameter for the device Map. For GPU usage with models such as Phi 3, set it to 'cuda'.",
+        typeConverter=TypeConverters.toString,
+    )
+    torchDtype = Param(
+        Params._dummy(),
+        "torchDtype",
+        "Specifies a model parameter for the torch dtype. For GPU usage with models such as Phi 3, set it to 'auto'.",
+        typeConverter=TypeConverters.toString,
+    )
+
+    @keyword_only
+    def __init__(
+        self,
+        modelName=None,
+        inputCol=None,
+        outputCol=None,
+        cachePath=None,
+        deviceMap=None,
+        torchDtype=None,
+    ):
+        super(HuggingFaceCausalLM, self).__init__()
+        self._setDefault(
+            modelName=modelName,
+            inputCol=inputCol,
+            outputCol=outputCol,
+            modelParam=_ModelParam(),
+            modelConfig=_ModelConfig(),
+            cachePath=None,
+            deviceMap=None,
+            torchDtype=None,
+        )
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self):
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    def setModelName(self, value):
+        return self._set(modelName=value)
+
+    def getModelName(self):
+        return self.getOrDefault(self.modelName)
+
+    def setInputCol(self, value):
+        return self._set(inputCol=value)
+
+    def getInputCol(self):
+        return self.getOrDefault(self.inputCol)
+
+    def setOutputCol(self, value):
+        return self._set(outputCol=value)
+
+    def getOutputCol(self):
+        return self.getOrDefault(self.outputCol)
+
+    def setModelParam(self, **kwargs):
+        param = _ModelParam(**kwargs)
+        return self._set(modelParam=param)
+
+    def getModelParam(self):
+        return self.getOrDefault(self.modelParam)
+
+    def setModelConfig(self, **kwargs):
+        config = _ModelConfig(**kwargs)
+        return self._set(modelConfig=config)
+
+    def getModelConfig(self):
+        return self.getOrDefault(self.modelConfig)
+
+    def setCachePath(self, value):
+        return self._set(cachePath=value)
+
+    def getCachePath(self):
+        return self.getOrDefault(self.cachePath)
+
+    def setDeviceMap(self, value):
+        return self._set(deviceMap=value)
+
+    def getDeviceMap(self):
+        return self.getOrDefault(self.deviceMap)
+
+    def setTorchDtype(self, value):
+        return self._set(torchDtype=value)
+
+    def getTorchDtype(self):
+        return self.getOrDefault(self.torchDtype)
+
+    def load_model(self):
+        """
+        Loads model and tokenizer either from cache or the HuggingFace Hub
+        """
+        model_name = self.getModelName()
+        model_config = self.getModelConfig().get_config()
+        device_map = self.getDeviceMap()
+        torch_dtype = self.getTorchDtype()
+
+        if device_map:
+            model_config["device_map"] = device_map
+        if torch_dtype:
+            model_config["torch_dtype"] = torch_dtype
+
+        if self.getCachePath():
+
+            hf_cache = self.getCachePath()
+            if not os.path.isdir(hf_cache):
+                raise NotADirectoryError(f"Directory does not exist: {hf_cache}")
+
+            model = AutoModelForCausalLM.from_pretrained(
+                hf_cache, local_files_only=True, **model_config
+            )
+            tokenizer = AutoTokenizer.from_pretrained(hf_cache, local_files_only=True)
+        else:
+            model = AutoModelForCausalLM.from_pretrained(model_name, **model_config)
+            tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+        return model, tokenizer
+
+    def _predict_single_complete(self, prompt, model, tokenizer):
+        param = self.getModelParam().get_param()
+        inputs = tokenizer(prompt, return_tensors="pt").input_ids
+        outputs = model.generate(inputs, **param)
+        decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
+        return decoded_output
+
+    def _predict_single_chat(self, prompt, model, tokenizer):
+        param = self.getModelParam().get_param()
+        if isinstance(prompt, list):
+            chat = prompt
+        else:
+            chat = [{"role": "user", "content": prompt}]
+        formatted_chat = tokenizer.apply_chat_template(
+            chat, tokenize=False, add_generation_prompt=True
+        )
+        tokenized_chat = tokenizer(
+            formatted_chat, return_tensors="pt", add_special_tokens=False
+        )
+        inputs = {
+            key: tensor.to(model.device) for key, tensor in tokenized_chat.items()
+        }
+        merged_inputs = {**inputs, **param}
+        outputs = model.generate(**merged_inputs)
+        decoded_output = tokenizer.decode(
+            outputs[0][inputs["input_ids"].size(1) :], skip_special_tokens=True
+        )
+        return decoded_output
+
+    def _process_partition(self, iterator, task):
+        """Process each partition of the data."""
+        peekable_iterator = _PeekableIterator(iterator)
+        try:
+            first_row = peekable_iterator.peek()
+        except StopIteration:
+            return None
+
+        model, tokenizer = self.load_model()
+
+        for row in peekable_iterator:
+            prompt = row[self.getInputCol()]
+            if task == "chat":
+                result = self._predict_single_chat(prompt, model, tokenizer)
+            elif task == "complete":
+                result = self._predict_single_complete(prompt, model, tokenizer)
+            row_dict = row.asDict()
+            row_dict[self.getOutputCol()] = result
+            yield Row(**row_dict)
+
+    def _transform(self, dataset):
+        input_schema = dataset.schema
+        output_schema = StructType(
+            input_schema.fields + [StructField(self.getOutputCol(), StringType(), True)]
+        )
+        result_rdd = dataset.rdd.mapPartitions(
+            lambda partition: self._process_partition(partition, "chat")
+        )
+        result_df = result_rdd.toDF(output_schema)
+        return result_df
+
+    def complete(self, dataset):
+        input_schema = dataset.schema
+        output_schema = StructType(
+            input_schema.fields + [StructField(self.getOutputCol(), StringType(), True)]
+        )
+        result_rdd = dataset.rdd.mapPartitions(
+            lambda partition: self._process_partition(partition, "complete")
+        )
+        result_df = result_rdd.toDF(output_schema)
+        return result_df
@@ -69,7 +69,7 @@ object DatabricksUtilities {
     "interpret-community",
     "numpy==1.22.4",
     "unstructured==0.10.24",
-    "pytesseract"
+    "pytesseract",
   )
 
   def baseURL(apiVersion: String): String = s"https://$Region.azuredatabricks.net/api/$apiVersion/"
@@ -84,7 +84,7 @@ object DatabricksUtilities {
     Map("maven" -> Map("coordinates" -> PackageMavenCoordinate, "repo" -> PackageRepository)),
     Map("pypi" -> Map("package" -> "pytorch-lightning==1.5.0")),
     Map("pypi" -> Map("package" -> "torchvision==0.14.1")),
-    Map("pypi" -> Map("package" -> "transformers==4.32.1")),
+    Map("pypi" -> Map("package" -> "transformers==4.48.0")),
     Map("pypi" -> Map("package" -> "petastorm==0.12.0")),
     Map("pypi" -> Map("package" -> "protobuf==3.20.3"))
   ).toJson.compactPrint
@@ -105,12 +105,15 @@ object DatabricksUtilities {
   val CPUNotebooks: Seq[File] = ParallelizableNotebooks
     .filterNot(_.getAbsolutePath.contains("Fine-tune"))
     .filterNot(_.getAbsolutePath.contains("GPU"))
+    .filterNot(_.getAbsolutePath.contains("Language Model"))
     .filterNot(_.getAbsolutePath.contains("Multivariate Anomaly Detection")) // Deprecated
     .filterNot(_.getAbsolutePath.contains("Audiobooks")) // TODO Remove this by fixing auth
     .filterNot(_.getAbsolutePath.contains("Art")) // TODO Remove this by fixing performance
     .filterNot(_.getAbsolutePath.contains("Explanation Dashboard")) // TODO Remove this exclusion
 
-  val GPUNotebooks: Seq[File] = ParallelizableNotebooks.filter(_.getAbsolutePath.contains("Fine-tune"))
+  val GPUNotebooks: Seq[File] = ParallelizableNotebooks.filter { file =>
+    file.getAbsolutePath.contains("Fine-tune") || file.getAbsolutePath.contains("HuggingFace")
+  }
 
   val RapidsNotebooks: Seq[File] = ParallelizableNotebooks.filter(_.getAbsolutePath.contains("GPU"))
 

@@ -8,7 +8,7 @@ set -eu
 # Install prerequisite libraries that horovod depends on
 pip install pytorch-lightning==1.5.0
 pip install torchvision==0.14.1
-pip install transformers==4.32.1
+pip install transformers==4.48.0
 pip install petastorm>=0.12.0
 pip install protobuf==3.20.3
 

@@ -11,12 +11,12 @@
 if _TRANSFORMERS_AVAILABLE:
     import transformers
 
-    _TRANSFORMERS_EQUAL_4_32_1 = transformers.__version__ == "4.32.1"
-    if _TRANSFORMERS_EQUAL_4_32_1:
+    _TRANSFORMERS_EQUAL_4_48_0 = transformers.__version__ == "4.48.0"
+    if _TRANSFORMERS_EQUAL_4_48_0:
         from transformers import AutoTokenizer
     else:
         raise RuntimeError(
-            "transformers should be == 4.32.1, found: {}".format(
+            "transformers should be == 4.48.0, found: {}".format(
                 transformers.__version__
             )
         )

@@ -13,12 +13,12 @@
 if _TRANSFORMERS_AVAILABLE:
     import transformers
 
-    _TRANSFORMERS_EQUAL_4_32_1 = transformers.__version__ == "4.32.1"
-    if _TRANSFORMERS_EQUAL_4_32_1:
+    _TRANSFORMERS_EQUAL_4_48_0 = transformers.__version__ == "4.48.0"
+    if _TRANSFORMERS_EQUAL_4_48_0:
         from transformers import AutoModelForSequenceClassification
     else:
         raise RuntimeError(
-            "transformers should be == 4.32.1, found: {}".format(
+            "transformers should be == 4.48.0, found: {}".format(
                 transformers.__version__
             )
         )

@@ -0,0 +1 @@
+{"cells":[{"cell_type":"markdown","source":["# Apply Phi3 model with HuggingFace Causal ML"],"metadata":{"nteract":{"transient":{"deleting":false}}},"id":"7a355394-5b22-4c09-8d4f-9467a2fcfce4"},{"cell_type":"markdown","source":["![HuggingFace Logo](https://huggingface.co/front/assets/huggingface_logo-noborder.svg)\n","\n","**HuggingFace** is a popular open-source platform that develops computation tools for building application using machine learning. It is widely known for its Transformers library which contains open-source implementation of transformer models for text, image, and audio task.\n","\n","[**Phi 3**](https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/) is a family of AI models developed by Microsoft, designed to redefine what is possible with small language models (SLMs). Phi-3 models are the most compatable and cost-effective SLMs, [outperforming models of the same size and even larger ones in language](https://news.microsoft.com/source/features/ai/the-phi-3-small-language-models-with-big-potential/?msockid=26355e446adb6dfa06484f956b686c27), reasoning, coding, and math benchmarks. \n","\n","<img src=\"https://pub-66c8c8c5ae474e9a9161c92b21de2f08.r2.dev/2024/04/The-Phi-3-small-language-models-with-big-potential-1.jpg\" alt=\"Phi 3 model performance\" width=\"600\">\n","\n","To make it easier to scale up causal language model prediction on a large dataset, we have integrated [HuggingFace Causal LM](https://huggingface.co/docs/transformers/tasks/language_modeling) with SynapseML. This integration makes it easy to use the Apache Spark distributed computing framework to process large data on text generation tasks.\n","\n","This tutorial shows hot to apply [phi3 model](https://huggingface.co/collections/microsoft/phi-3-6626e15e9585a200d2d761e3) at scale with no extra setting.\n"],"metadata":{"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"aa35ae52-6a9e-458d-91ee-ae3962ab5b68"},{"cell_type":"code","source":["chats = [\n","    (1, \"fix grammar: helol mi friend\"),\n","    (2, \"What is SynapseML\"),\n","    (3, \"translate to Spanish: hello\"),\n","]\n","\n","chat_df = spark.createDataFrame(chats, [\"row_index\", \"content\"])\n","chat_df.show()"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":9,"statement_ids":[9],"state":"finished","livy_statement_state":"available","session_id":"0c9f61cd-1288-4e0e-9c81-e054702855b3","normalized_state":"finished","queued_time":"2025-01-16T17:14:07.1864063Z","session_start_time":null,"execution_start_time":"2025-01-16T17:18:56.122231Z","execution_finish_time":"2025-01-16T17:19:03.4236677Z","parent_msg_id":"11078688-e7a5-4a37-8e95-6485d95aa809"},"text/plain":"StatementMeta(, 0c9f61cd-1288-4e0e-9c81-e054702855b3, 9, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["+---------+--------------------+\n|row_index|             content|\n+---------+--------------------+\n|        1|fix grammar: helo...|\n|        2|   What is SynapseML|\n|        3|translate to Span...|\n+---------+--------------------+\n\n"]}],"execution_count":3,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"7e76b540-466f-4ab3-9aa9-da8de5517fc1"},{"cell_type":"markdown","source":["## Define and Apply Phi3 model"],"metadata":{"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"ac0687e7-6609-4af4-a1a4-c098cb404374"},{"cell_type":"code","source":["from synapse.ml.llm.HuggingFaceCausallmTransform import HuggingFaceCausalLM\n","\n","phi3_transformer = (\n","    HuggingFaceCausalLM()\n","    .setModelName(\"microsoft/Phi-3-mini-4k-instruct\")\n","    .setInputCol(\"content\")\n","    .setOutputCol(\"result\")\n","    .setModelParam(max_new_tokens=1000)\n","    .setModelConfig(local_files_only=False, trust_remote_code=True)\n",")\n","result_df = phi3_transformer.transform(chat_df).collect()\n","display(result_df)"],"outputs":[],"execution_count":null,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"},"collapsed":false,"jupyter":{"outputs_hidden":true},"editable":true,"run_control":{"frozen":false}},"id":"f8db55d9-b89d-420f-80e9-618041def698"},{"cell_type":"markdown","source":["## Use local cache\n","\n","By caching the model, you can reduce initialization time. On Fabric, store the model in a Lakehouse and use setCachePath to load it."],"metadata":{"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"4c839ac6-f92e-4615-a0c3-977a96231cc6"},{"cell_type":"code","source":["# %%sh\n","# azcopy copy \"https://mmlspark.blob.core.windows.net/huggingface/microsoft/Phi-3-mini-4k-instruct\" \"/lakehouse/default/Files/microsoft/\" --recursive=true"],"outputs":[],"execution_count":null,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"9bc5edf1-35cb-45d6-b1dc-49a22a01484b"},{"cell_type":"code","source":["# phi3_transformer = (\n","#     HuggingFaceCausalLM()\n","#     .setCachePath(\"/lakehouse/default/Files/microsoft/Phi-3-mini-4k-instruct\")\n","#     .setInputCol(\"content\")\n","#     .setOutputCol(\"result\")\n","#     .setModelParam(max_new_tokens=1000)\n","# )\n","# result_df = phi3_transformer.transform(chat_df).collect()\n","# display(result_df)"],"outputs":[],"execution_count":null,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"ee52c891-3be2-48fe-87b3-648e299a794e"}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"name":"synapse_pyspark","language":"Python","display_name":"Synapse PySpark"},"language_info":{"name":"python"},"microsoft":{"language":"python","language_group":"synapse_pyspark","ms_spell_check":{"ms_spell_check_language":"en"}},"nteract":{"version":"[email protected]"},"spark_compute":{"compute_id":"/trident/default","session_options":{"conf":{"spark.synapse.nbs.session.timeout":"1200000"}}},"synapse_widget":{"version":"0.1","state":{}},"dependencies":{"lakehouse":{"default_lakehouse":"cf3f397e-6a87-43ab-b8e0-bb9342e11c7a","default_lakehouse_name":"jessiwang_phi3","default_lakehouse_workspace_id":"4751a5bb-6a44-4164-8b31-c3b6a4cf1f8d"},"environment":{}}},"nbformat":4,"nbformat_minor":5}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"cells":[{"cell_type":"markdown","source":["# Apply Phi3 model with HuggingFace Causal ML"],"metadata":{"nteract":{"transient":{"deleting":false}}},"id":"7a355394-5b22-4c09-8d4f-9467a2fcfce4"},{"cell_type":"markdown","source":["![HuggingFace Logo](https://huggingface.co/front/assets/huggingface_logo-noborder.svg)\n","\n","HuggingFace is a popular open-source platform that develops computation tools for building application using machine learning. It is widely known for its Transformers library which contains open-source implementation of transformer models for text, image, and audio task.\n","\n","[Phi 3](https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/) is a family of AI models developed by Microsoft, designed to redefine what is possible with small language models (SLMs). Phi-3 models are the most compatable and cost-effective SLMs, [outperforming models of the same size and even larger ones in language](https://news.microsoft.com/source/features/ai/the-phi-3-small-language-models-with-big-potential/?msockid=26355e446adb6dfa06484f956b686c27), reasoning, coding, and math benchmarks. \n","\n","<img src=\"https://pub-66c8c8c5ae474e9a9161c92b21de2f08.r2.dev/2024/04/The-Phi-3-small-language-models-with-big-potential-1.jpg\" alt=\"Phi 3 model performance\" width=\"600\">\n","\n","To make it easier to scale up causal language model prediction on a large dataset, we have integrated [HuggingFace Causal LM](https://huggingface.co/docs/transformers/tasks/language_modeling) with SynapseML. This integration makes it easy to use the Apache Spark distributed computing framework to process large data on text generation tasks.\n","\n","This tutorial shows hot to apply [phi3 model](https://huggingface.co/collections/microsoft/phi-3-6626e15e9585a200d2d761e3) at scale with no extra setting.\n"],"metadata":{"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"aa35ae52-6a9e-458d-91ee-ae3962ab5b68"},{"cell_type":"code","source":["chats = [\n"," (1, \"fix grammar: helol mi friend\"),\n"," (2, \"What is SynapseML\"),\n"," (3, \"translate to Spanish: hello\"),\n","]\n","\n","chat_df = spark.createDataFrame(chats, [\"row_index\", \"content\"])\n","chat_df.show()"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":9,"statement_ids":[9],"state":"finished","livy_statement_state":"available","session_id":"0c9f61cd-1288-4e0e-9c81-e054702855b3","normalized_state":"finished","queued_time":"2025-01-16T17:14:07.1864063Z","session_start_time":null,"execution_start_time":"2025-01-16T17:18:56.122231Z","execution_finish_time":"2025-01-16T17:19:03.4236677Z","parent_msg_id":"11078688-e7a5-4a37-8e95-6485d95aa809"},"text/plain":"StatementMeta(, 0c9f61cd-1288-4e0e-9c81-e054702855b3, 9, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["+---------+--------------------+\n\|row_index\| content\|\n+---------+--------------------+\n\| 1\|fix grammar: helo...\|\n\| 2\| What is SynapseML\|\n\| 3\|translate to Span...\|\n+---------+--------------------+\n\n"]}],"execution_count":3,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"7e76b540-466f-4ab3-9aa9-da8de5517fc1"},{"cell_type":"markdown","source":["## Define and Apply Phi3 model"],"metadata":{"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"ac0687e7-6609-4af4-a1a4-c098cb404374"},{"cell_type":"code","source":["from synapse.ml.llm.HuggingFaceCausallmTransform import HuggingFaceCausalLM\n","\n","phi3_transformer = (\n"," HuggingFaceCausalLM()\n"," .setModelName(\"microsoft/Phi-3-mini-4k-instruct\")\n"," .setInputCol(\"content\")\n"," .setOutputCol(\"result\")\n"," .setModelParam(max_new_tokens=1000)\n"," .setModelConfig(local_files_only=False, trust_remote_code=True)\n",")\n","result_df = phi3_transformer.transform(chat_df).collect()\n","display(result_df)"],"outputs":[],"execution_count":null,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"},"collapsed":false,"jupyter":{"outputs_hidden":true},"editable":true,"run_control":{"frozen":false}},"id":"f8db55d9-b89d-420f-80e9-618041def698"},{"cell_type":"markdown","source":["## Use local cache\n","\n","By caching the model, you can reduce initialization time. On Fabric, store the model in a Lakehouse and use setCachePath to load it."],"metadata":{"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"4c839ac6-f92e-4615-a0c3-977a96231cc6"},{"cell_type":"code","source":["# %%sh\n","# azcopy copy \"https://mmlspark.blob.core.windows.net/huggingface/microsoft/Phi-3-mini-4k-instruct\" \"/lakehouse/default/Files/microsoft/\" --recursive=true"],"outputs":[],"execution_count":null,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"9bc5edf1-35cb-45d6-b1dc-49a22a01484b"},{"cell_type":"code","source":["# phi3_transformer = (\n","# HuggingFaceCausalLM()\n","# .setCachePath(\"/lakehouse/default/Files/microsoft/Phi-3-mini-4k-instruct\")\n","# .setInputCol(\"content\")\n","# .setOutputCol(\"result\")\n","# .setModelParam(max_new_tokens=1000)\n","# )\n","# result_df = phi3_transformer.transform(chat_df).collect()\n","# display(result_df)"],"outputs":[],"execution_count":null,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"ee52c891-3be2-48fe-87b3-648e299a794e"}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"name":"synapse_pyspark","language":"Python","display_name":"Synapse PySpark"},"language_info":{"name":"python"},"microsoft":{"language":"python","language_group":"synapse_pyspark","ms_spell_check":{"ms_spell_check_language":"en"}},"nteract":{"version":"[email protected]"},"spark_compute":{"compute_id":"/trident/default","session_options":{"conf":{"spark.synapse.nbs.session.timeout":"1200000"}}},"synapse_widget":{"version":"0.1","state":{}},"dependencies":{"lakehouse":{"default_lakehouse":"cf3f397e-6a87-43ab-b8e0-bb9342e11c7a","default_lakehouse_name":"jessiwang_phi3","default_lakehouse_workspace_id":"4751a5bb-6a44-4164-8b31-c3b6a4cf1f8d"},"environment":{}}},"nbformat":4,"nbformat_minor":5}
Copy link Collaborator mhamilton723 Jan 16, 2025 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. before checkin this style needs to be fixed with black . in the top level dir