Skip to content

Web Search: Inline citations #1118

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 3 additions & 30 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@ MONGODB_DB_NAME=chat-ui
MONGODB_DIRECT_CONNECTION=false

COOKIE_NAME=hf-chat
COOKIE_SAMESITE=
COOKIE_SECURE=
TRUSTED_EMAIL_HEADER= # only set this if you understand the implications

HF_TOKEN=#hf_<token> from https://huggingface.co/settings/token
HF_API_ROOT=https://api-inference.huggingface.co/models

Expand All @@ -18,7 +14,6 @@ ANTHROPIC_API_KEY=#your anthropic api key here
CLOUDFLARE_ACCOUNT_ID=#your cloudflare account id here
CLOUDFLARE_API_TOKEN=#your cloudflare api token here
COHERE_API_TOKEN=#your cohere api token here
GOOGLE_GENAI_API_KEY=#your google genai api token here

HF_ACCESS_TOKEN=#LEGACY! Use HF_TOKEN instead

Expand All @@ -27,16 +22,11 @@ YDC_API_KEY=#your docs.you.com api key here
SERPER_API_KEY=#your serper.dev api key here
SERPAPI_KEY=#your serpapi key here
SERPSTACK_API_KEY=#your serpstack api key here
SEARCHAPI_KEY=#your searchapi api key here
USE_LOCAL_WEBSEARCH=#set to true to parse google results yourself, overrides other API keys
SEARXNG_QUERY_URL=# where '<query>' will be replaced with query keywords see https://docs.searxng.org/dev/search_api.html eg https://searxng.yourdomain.com/search?q=<query>&engines=duckduckgo,google&format=json
BING_SUBSCRIPTION_KEY=#your key
PLAYWRIGHT_ADBLOCKER=true

WEBSEARCH_ALLOWLIST=`[]` # if it's defined, allow websites from only this list.
WEBSEARCH_BLOCKLIST=`[]` # if it's defined, block websites from this list.
WEBSEARCH_JAVASCRIPT=true # CPU usage reduces by 60% on average by disabling javascript. Enable to improve website compatibility
WEBSEARCH_TIMEOUT = 3500 # in milliseconds, determines how long to wait to load a page before timing out

# Parameters to enable open id login
OPENID_CONFIG=`{
Expand Down Expand Up @@ -134,7 +124,6 @@ PARQUET_EXPORT_SECRET=#DEPRECATED, use ADMIN_API_SECRET instead

RATE_LIMIT= # /!\ Legacy definition of messages per minute. Use USAGE_LIMITS.messagesPerMinute instead
MESSAGES_BEFORE_LOGIN=# how many messages a user can send in a conversation before having to login. set to 0 to force login right away
PUBLIC_APP_GUEST_MESSAGE=# a message to the guest user. If not set, a default message will be used

APP_BASE="" # base path of the app, e.g. /chat, left blank as default
PUBLIC_APP_NAME=ChatUI # name used as title throughout the app
Expand All @@ -144,10 +133,9 @@ PUBLIC_APP_DESCRIPTION=# description used throughout the app (if not set, a defa
PUBLIC_APP_DATA_SHARING=#set to 1 to enable options & text regarding data sharing
PUBLIC_APP_DISCLAIMER=#set to 1 to show a disclaimer on login page
PUBLIC_APP_DISCLAIMER_MESSAGE="Disclaimer: AI is an area of active research with known problems such as biased generation and misinformation. Do not use this application for high-stakes decisions or advice. Do not insert your personal data, especially sensitive, like health data."
LLM_SUMMARIZATION=true
LLM_SUMMERIZATION=true

EXPOSE_API=true
USE_HF_TOKEN_IN_API=false
# PUBLIC_APP_NAME=HuggingChat
# PUBLIC_APP_ASSETS=huggingchat
# PUBLIC_APP_COLOR=yellow
Expand All @@ -165,21 +153,6 @@ WEBHOOK_URL_REPORT_ASSISTANT=#provide webhook url to get notified when an assist
ALLOWED_USER_EMAILS=`[]` # if it's defined, only these emails will be allowed to use the app

USAGE_LIMITS=`{}`

ALLOW_IFRAME=true
ALLOW_INSECURE_COOKIES=false # recommended to keep this to false but set to true if you need to run over http without tls
METRICS_ENABLED=false
METRICS_PORT=5565
LOG_LEVEL=info


TOOLS=`[]`
BODY_SIZE_LIMIT=15728640

HF_ORG_ADMIN=
HF_ORG_EARLY_ACCESS=

PUBLIC_SMOOTH_UPDATES=false
COMMUNITY_TOOLS=false

PUBLIC_COMMIT_SHA=
METRICS_PORT=
LOG_LEVEL=info
298 changes: 298 additions & 0 deletions .env.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,298 @@
# template used in production for HuggingChat.

MODELS=`[
{
"name" : "CohereForAI/c4ai-command-r-plus",
"tokenizer": "Xenova/c4ai-command-r-v01-tokenizer",
"description": "Command R+ is Cohere's latest LLM and is the first open weight model to beat GPT4 in the Chatbot Arena!",
"modelUrl": "https://huggingface.co/CohereForAI/c4ai-command-r-plus",
"websiteUrl": "https://docs.cohere.com/docs/command-r-plus",
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/cohere-logo.png",
"parameters": {
"stop": ["<|END_OF_TURN_TOKEN|>"],
"truncate" : 28672,
"max_new_tokens" : 4096,
"temperature" : 0.3
},
"promptExamples" : [
{
"title": "Write an email from bullet list",
"prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
}, {
"title": "Code a snake game",
"prompt": "Code a basic snake game in python, give explanations for each step."
}, {
"title": "Assist in a task",
"prompt": "How do I make a delicious lemon cheesecake?"
}
]
},
{
"name" : "meta-llama/Meta-Llama-3-70B-Instruct",
"description": "Generation over generation, Meta Llama 3 demonstrates state-of-the-art performance on a wide range of industry benchmarks and offers new capabilities, including improved reasoning.",
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/meta-logo.png",
"modelUrl": "https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct",
"websiteUrl": "https://llama.meta.com/llama3/",
"tokenizer" : "philschmid/meta-llama-3-tokenizer",
"promptExamples" : [
{
"title": "Write an email from bullet list",
"prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
}, {
"title": "Code a snake game",
"prompt": "Code a basic snake game in python, give explanations for each step."
}, {
"title": "Assist in a task",
"prompt": "How do I make a delicious lemon cheesecake?"
}
],
"parameters": {
"stop": ["<|eot_id|>"],
"truncate": 6144,
"max_new_tokens": 2047
}
},
{
"name" : "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1",
"tokenizer": "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1",
"description": "Zephyr 141B-A35B is a fine-tuned version of Mistral 8x22B, trained using ORPO, a novel alignment algorithm.",
"modelUrl": "https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1",
"websiteUrl": "https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1",
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/zephyr-logo.png",
"parameters": {
"truncate" : 24576,
"max_new_tokens" : 8192,
},
"preprompt" : "You are Zephyr, an assistant developed by KAIST AI, Argilla, and Hugging Face. You should give concise responses to very simple questions, but provide thorough responses to more complex and open-ended questions. You are happy to help with writing, analysis, question answering, math, coding, and all sorts of other tasks.",
"promptExamples" : [
{
"title": "Write a poem",
"prompt": "Write a poem to help me remember the first 10 elements on the periodic table, giving each element its own line."
}, {
"title": "Code a snake game",
"prompt": "Code a basic snake game in python, give explanations for each step."
}, {
"title": "Assist in a task",
"prompt": "How do I make a delicious lemon cheesecake?"
}
]
},
{
"name" : "mistralai/Mixtral-8x7B-Instruct-v0.1",
"description" : "The latest MoE model from Mistral AI! 8x7B and outperforms Llama 2 70B in most benchmarks.",
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/mistral-logo.png",
"websiteUrl" : "https://mistral.ai/news/mixtral-of-experts/",
"modelUrl": "https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1",
"tokenizer": "mistralai/Mixtral-8x7B-Instruct-v0.1",
"preprompt" : "",
"chatPromptTemplate": "<s> {{#each messages}}{{#ifUser}}[INST]{{#if @first}}{{#if @root.preprompt}}{{@root.preprompt}}\n{{/if}}{{/if}} {{content}} [/INST]{{/ifUser}}{{#ifAssistant}} {{content}}</s> {{/ifAssistant}}{{/each}}",
"parameters" : {
"temperature" : 0.6,
"top_p" : 0.95,
"repetition_penalty" : 1.2,
"top_k" : 50,
"truncate" : 24576,
"max_new_tokens" : 8192,
"stop" : ["</s>"]
},
"promptExamples" : [
{
"title": "Write an email from bullet list",
"prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
}, {
"title": "Code a snake game",
"prompt": "Code a basic snake game in python, give explanations for each step."
}, {
"title": "Assist in a task",
"prompt": "How do I make a delicious lemon cheesecake?"
}
]
},
{
"name" : "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
"description" : "Nous Hermes 2 Mixtral 8x7B DPO is the new flagship Nous Research model trained over the Mixtral 8x7B MoE LLM.",
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/nous-logo.png",
"websiteUrl" : "https://nousresearch.com/",
"modelUrl": "https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
"tokenizer": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
"chatPromptTemplate" : "{{#if @root.preprompt}}<|im_start|>system\n{{@root.preprompt}}<|im_end|>\n{{/if}}{{#each messages}}{{#ifUser}}<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n{{/ifUser}}{{#ifAssistant}}{{content}}<|im_end|>\n{{/ifAssistant}}{{/each}}",
"promptExamples": [
{
"title": "Write an email from bullet list",
"prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
}, {
"title": "Code a snake game",
"prompt": "Code a basic snake game in python, give explanations for each step."
}, {
"title": "Assist in a task",
"prompt": "How do I make a delicious lemon cheesecake?"
}
],
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"repetition_penalty": 1,
"top_k": 50,
"truncate": 24576,
"max_new_tokens": 2048,
"stop": ["<|im_end|>"]
}
},
{
"name" : "google/gemma-1.1-7b-it",
"description": "Gemma 7B 1.1 is the latest release in the Gemma family of lightweight models built by Google, trained using a novel RLHF method.",
"websiteUrl" : "https://blog.google/technology/developers/gemma-open-models/",
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/google-logo.png",
"modelUrl": "https://huggingface.co/google/gemma-1.1-7b-it",
"preprompt": "",
"chatPromptTemplate" : "{{#each messages}}{{#ifUser}}<start_of_turn>user\n{{#if @first}}{{#if @root.preprompt}}{{@root.preprompt}}\n{{/if}}{{/if}}{{content}}<end_of_turn>\n<start_of_turn>model\n{{/ifUser}}{{#ifAssistant}}{{content}}<end_of_turn>\n{{/ifAssistant}}{{/each}}",
"promptExamples": [
{
"title": "Write an email from bullet list",
"prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
}, {
"title": "Code a snake game",
"prompt": "Code a basic snake game in python, give explanations for each step."
}, {
"title": "Assist in a task",
"prompt": "How do I make a delicious lemon cheesecake?"
}
],
"parameters": {
"do_sample": true,
"truncate": 7168,
"max_new_tokens": 1024,
"stop" : ["<end_of_turn>"]
}
},

{
"name": "mistralai/Mistral-7B-Instruct-v0.2",
"displayName": "mistralai/Mistral-7B-Instruct-v0.2",
"description": "Mistral 7B is a new Apache 2.0 model, released by Mistral AI that outperforms Llama2 13B in benchmarks.",
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/mistral-logo.png",
"websiteUrl": "https://mistral.ai/news/announcing-mistral-7b/",
"modelUrl": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2",
"tokenizer": "mistralai/Mistral-7B-Instruct-v0.2",
"preprompt": "",
"chatPromptTemplate" : "<s>{{#each messages}}{{#ifUser}}[INST] {{#if @first}}{{#if @root.preprompt}}{{@root.preprompt}}\n{{/if}}{{/if}}{{content}} [/INST]{{/ifUser}}{{#ifAssistant}}{{content}}</s>{{/ifAssistant}}{{/each}}",
"parameters": {
"temperature": 0.3,
"top_p": 0.95,
"repetition_penalty": 1.2,
"top_k": 50,
"truncate": 3072,
"max_new_tokens": 1024,
"stop": ["</s>"]
},
"promptExamples": [
{
"title": "Write an email from bullet list",
"prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
}, {
"title": "Code a snake game",
"prompt": "Code a basic snake game in python, give explanations for each step."
}, {
"title": "Assist in a task",
"prompt": "How do I make a delicious lemon cheesecake?"
}
]
},
{
"name": "microsoft/Phi-3-mini-4k-instruct",
"tokenizer": "microsoft/Phi-3-mini-4k-instruct",
"description" : "Phi-3 Mini-4K-Instruct is a 3.8B parameters, lightweight, state-of-the-art open model built upon datasets used for Phi-2.",
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/microsoft-logo.png",
"modelUrl": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct",
"websiteUrl": "https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/",
"preprompt": "",
"chatPromptTemplate": "<s>{{preprompt}}{{#each messages}}{{#ifUser}}<|user|>\n{{content}}<|end|>\n<|assistant|>\n{{/ifUser}}{{#ifAssistant}}{{content}}<|end|>\n{{/ifAssistant}}{{/each}}",
"parameters": {
"stop": ["<|end|>", "<|endoftext|>", "<|assistant|>"],
"max_new_tokens": 1024,
"truncate": 3071
},
"promptExamples": [
{
"title": "Write an email from bullet list",
"prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
}, {
"title": "Code a snake game",
"prompt": "Code a basic snake game in python, give explanations for each step."
}, {
"title": "Assist in a task",
"prompt": "How do I make a delicious lemon cheesecake?"
}
]
},
{
"name": "meta-llama/Meta-Llama-3-8B-Instruct",
"tokenizer" : "philschmid/meta-llama-3-tokenizer",
"parameters": {
"temperature": 0.1,
"stop": ["<|eot_id|>"],
"truncate": 1024,
},
"unlisted": true
}
]`

OLD_MODELS=`[
{"name":"bigcode/starcoder"},
{"name":"OpenAssistant/oasst-sft-6-llama-30b-xor"},
{"name":"HuggingFaceH4/zephyr-7b-alpha"},
{"name":"openchat/openchat_3.5"},
{"name":"openchat/openchat-3.5-1210"},
{"name": "tiiuae/falcon-180B-chat"},
{"name": "codellama/CodeLlama-34b-Instruct-hf"},
{"name": "google/gemma-7b-it"},
{"name": "meta-llama/Llama-2-70b-chat-hf"},
{"name": "codellama/CodeLlama-70b-Instruct-hf"},
{"name": "openchat/openchat-3.5-0106"}
]`

TASK_MODEL='meta-llama/Meta-Llama-3-8B-Instruct'

TEXT_EMBEDDING_MODELS = `[
{
"name": "bge-base-en-v1-5-sxa",
"displayName": "bge-base-en-v1-5-sxa",
"chunkCharLength": 512,
"endpoints": [
{ "type": "tei",
"url" : "https://huggingchat-tei.hf.space/"
}
]
}
]`


APP_BASE="/chat"
PUBLIC_ORIGIN=https://huggingface.co
PUBLIC_SHARE_PREFIX=https://hf.co/chat
PUBLIC_ANNOUNCEMENT_BANNERS=`[]`

PUBLIC_APP_NAME=HuggingChat
PUBLIC_APP_ASSETS=huggingchat
PUBLIC_APP_COLOR=yellow
PUBLIC_APP_DESCRIPTION="Making the community's best AI chat models available to everyone."
PUBLIC_APP_DISCLAIMER_MESSAGE="Disclaimer: AI is an area of active research with known problems such as biased generation and misinformation. Do not use this application for high-stakes decisions or advice."
PUBLIC_APP_DATA_SHARING=0
PUBLIC_APP_DISCLAIMER=1

PUBLIC_PLAUSIBLE_SCRIPT_URL="/js/script.js"
PUBLIC_APPLE_APP_ID=6476778843
# Not part of the .env but set as other variables in the space
# ADDRESS_HEADER=X-Forwarded-For
# XFF_DEPTH=2

ENABLE_ASSISTANTS=true
ENABLE_ASSISTANTS_RAG=true
REQUIRE_FEATURED_ASSISTANTS=true
EXPOSE_API=true

ALTERNATIVE_REDIRECT_URLS=`[
huggingchat://login/callback
]`

WEBSEARCH_BLOCKLIST=`["youtube.com", "twitter.com"]`
2 changes: 1 addition & 1 deletion .eslintrc.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ module.exports = {
extraFileExtensions: [".svelte"],
},
rules: {
"require-yield": "off",
"no-shadow": ["error"],
"@typescript-eslint/no-explicit-any": "error",
"@typescript-eslint/no-non-null-assertion": "error",
"@typescript-eslint/no-unused-vars": [
Expand Down
Loading
Loading