Skip to content

Commit 99207d6

Browse files
image get ocr info (#3)
* functionality * debug debug * debug
1 parent 5ec5554 commit 99207d6

22 files changed

+683
-308
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,3 +154,7 @@ cython_debug/
154154
__pycache__/
155155

156156
output/
157+
158+
*.ipynb
159+
160+
outputTEMP_MPY_wvf_snd.mp3

ExampleVideoGen.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import os
2-
from tools.tools import getCurrentTimeAsFolder
2+
from tools.tools import current_time_as_folder
33
from openai import AzureOpenAI
44
from tools.openai_adapter import OpenaiAdapter
55
from tools.speech_adapter import SpeechServiceAdapter, DefaultMaleSpeaker
@@ -13,7 +13,7 @@
1313
bing_search_api=os.getenv('BING_SEARCH_ENDPOINT'),
1414
bing_search_key=os.getenv('BING_SEARCH_KEY')
1515
)
16-
newsList = bing.newsCategoryTrending(ChinaCategory.Sports.value, Market.China.value)
16+
newsList = bing.news_category_trending(ChinaCategory.Sports.value, Market.China.value)
1717
news = newsList[0]
1818

1919
oai = OpenaiAdapter(openai_client=AzureOpenAI(
@@ -23,6 +23,6 @@
2323
))
2424
speech = SpeechServiceAdapter(os.getenv('SPEECH_HOST'), os.getenv('SPEECH_REGION'), os.getenv('SPEECH_KEY'), DefaultMaleSpeaker)
2525

26-
director = AIDirector(oai, speech, bing, '/System/Library/Fonts/Supplemental/Arial Unicode.ttf')
26+
director = AIDirector(oai, speech, bing)
2727

28-
director.news2Video(news, folderPath=getCurrentTimeAsFolder())
28+
director.news2Video(news, folderPath=current_time_as_folder())

ExampleVideoGenWithAvatar.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
import os
2-
from tools.tools import getCurrentTimeAsFolder
2+
from tools.tools import current_time_as_folder
33
from openai import AzureOpenAI
44
from tools.openai_adapter import OpenaiAdapter
55
from tools.speech_adapter import SpeechServiceAdapter, DefaultFemaleSpeaker
66
from tools.bing_search_adapter import BingSearchAdapter, ChinaCategory, Market
77
from workers.AIDirector import AIDirector
88
from dotenv import load_dotenv
9+
from configs.directorConfig import DirectorConfig
10+
911

1012
load_dotenv()
1113

@@ -17,10 +19,13 @@
1719
oai = OpenaiAdapter(openai_client=client)
1820
speech = SpeechServiceAdapter(os.getenv('SPEECH_HOST'), os.getenv('SPEECH_REGION'), os.getenv('SPEECH_KEY'), DefaultFemaleSpeaker)
1921
bing = BingSearchAdapter(bing_search_api=os.getenv('BING_SEARCH_ENDPOINT'), bing_search_key=os.getenv('BING_SEARCH_KEY'))
20-
director = AIDirector(oai, speech, bing, '/System/Library/Fonts/Supplemental/Arial Unicode.ttf')
22+
config = DirectorConfig({
23+
"use_avatar": True
24+
})
25+
director = AIDirector(oai, speech, bing, config=config)
2126

22-
folderPath = getCurrentTimeAsFolder()
23-
newsList = bing.newsCategoryTrending(ChinaCategory.Military.value, Market.China.value)
27+
folderPath = current_time_as_folder()
28+
newsList = bing.news_category_trending(ChinaCategory.Military.value, Market.China.value)
2429
director.news2Video(newsList[2], folderPath, with_avatar=True)
2530

2631

ExampleWepageVedioGen.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import os
2+
from tools.tools import current_time_as_folder
3+
from openai import AzureOpenAI
4+
from tools.openai_adapter import OpenaiAdapter
5+
from tools.speech_adapter import SpeechServiceAdapter, DefaultFemaleSpeaker
6+
from tools.bing_search_adapter import BingSearchAdapter
7+
from workers.AIDirector import AIDirector
8+
from dotenv import load_dotenv
9+
import easyocr
10+
from configs.directorConfig import DirectorConfig
11+
12+
load_dotenv()
13+
14+
client = AzureOpenAI(
15+
api_version="2023-12-01-preview",
16+
azure_endpoint=os.getenv('OPANAI_API_ENDPOINT'),
17+
api_key=os.getenv('OPANAI_API_KEY'),
18+
)
19+
oai = OpenaiAdapter(openai_client=client)
20+
speech = SpeechServiceAdapter(os.getenv('SPEECH_HOST'), os.getenv('SPEECH_REGION'), os.getenv('SPEECH_KEY'), DefaultFemaleSpeaker)
21+
bing = BingSearchAdapter(bing_search_api=os.getenv('BING_SEARCH_ENDPOINT'), bing_search_key=os.getenv('BING_SEARCH_KEY'))
22+
reader = easyocr.Reader(['ch_sim','en'])
23+
director = AIDirector(oai, speech, bing, reader, config=DirectorConfig({
24+
"use_ocr":True,
25+
"use_image_in_webpage": True
26+
}))
27+
28+
folderPath = current_time_as_folder()
29+
director.webpage2Video("https://azure.microsoft.com/zh-cn/products/ai-services/?activetab=pivot:azureopenai%E6%9C%8D%E5%8A%A1tab", folderPath)
30+

README.md

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ director.news2Video(newsList[0], folderPath, with_avatar=True)
229229

230230
- [ ] webpage to script
231231
- [x] news webpage to script
232-
- [ ] any webpage to script
232+
- [x] any webpage to script
233233
- [ ] any topic to script
234234
- [x] Collect/Generate multimedia resource for a script
235235
- [x] Text to speech
@@ -245,11 +245,13 @@ director.news2Video(newsList[0], folderPath, with_avatar=True)
245245
- [ ] Add BGM
246246
- [ ] Fix Avatar background issue, Avatar position and size auto-adjust
247247
- [ ] Different length
248+
- [ ] Any size
248249
- [ ] [Current on going] Go deeper into content
249-
- [ ] Download image/video in webpage
250-
- [ ] Add OCR when review image for news
250+
- [x] Download image/video in webpage
251+
- [x] Add OCR when review image for news
251252
- [ ] Search related information
252253
- [ ] Draw table / chart if need
254+
- [ ] RAG on knowledge
253255
- [ ] UX
254256
- [ ] UI Design
255257
- [ ] GUI
@@ -260,7 +262,9 @@ director.news2Video(newsList[0], folderPath, with_avatar=True)
260262
- [ ] Async methods
261263
- [ ] More Comments
262264
- [ ] Error handling
263-
- [ ] Cost statistic
265+
- [ ] Cost analysis
266+
- [ ] Test
267+
- [ ] Name of variables
264268
- [ ] Integrate social media
265269
- [ ] Integrate Lang Chain
266270
- [ ] Onboard GPT store

configs/config.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import copy
2+
3+
class Config(object):
4+
def __init__(self, conf:dir):
5+
self._config = copy.deepcopy(conf) # set it to conf
6+
7+
def get_property(self, property_name):
8+
if property_name not in self._config.keys(): # we don't want KeyError
9+
return None # just return None if not found
10+
return self._config[property_name]

configs/directorConfig.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
from configs.config import Config
2+
3+
class DirectorConfig(Config):
4+
def __init__(self, conf: dir):
5+
super().__init__(conf)
6+
7+
@property
8+
def path_to_font(self) -> str:
9+
return self.get_property("path_to_font") or '/System/Library/Fonts/Supplemental/Arial Unicode.ttf'
10+
11+
@property
12+
def video_shape(self) -> (int, int):
13+
return self.get_property("video_shape") or tuple((720, 1280))
14+
15+
@property
16+
def use_avatar(self) -> bool:
17+
return self.get_property("use_avatar") or False
18+
19+
@property
20+
def use_image_in_webpage(self) -> bool:
21+
return self.get_property("use_image_in_webpage") or False
22+
23+
@property
24+
def use_ocr(self) -> bool:
25+
return self.get_property("use_ocr") or False
26+
27+
@property
28+
def use_dalle(self) -> bool:
29+
return self.get_property("use_dalle") or False
30+

models/image.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
from enum import Enum
2+
3+
class ImageInfo:
4+
def __init__(self, path:str, raw_description:str='', ai_description:str='', provider:str=''):
5+
self.path = path
6+
self.raw_description = raw_description
7+
self.ai_description = ai_description
8+
self.provider = provider
9+
10+
def toJSON(self):
11+
return {
12+
"path": self.path,
13+
"raw_description": self.raw_description,
14+
"ai_description": self.ai_description,
15+
"provider": self.provider
16+
}
17+
18+
@property
19+
def description(self):
20+
return " ".join([
21+
"description: `{}`".format(self.raw_description) if self.raw_description else " ",
22+
"ocr result: `{}`".format(self.ai_description) if self.ai_description else " "
23+
])
24+
25+
class ImageEncodingFormatEnum(Enum):
26+
JPEG = 'jpeg'
27+
PNG = 'png'
28+
GIF = 'gif'
29+
SVG = 'svg+xml'
30+
31+
ImageTypeSuffix = {
32+
ImageEncodingFormatEnum.JPEG.value: [
33+
'jpg',
34+
'jpeg',
35+
'jfif',
36+
'pjpeg',
37+
'pjp'
38+
],
39+
ImageEncodingFormatEnum.PNG.value: [
40+
'png'
41+
],
42+
ImageEncodingFormatEnum.GIF.value: [
43+
'gif'
44+
],
45+
ImageEncodingFormatEnum.SVG.value: [
46+
'svg'
47+
]
48+
}

models/webpage.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from bs4 import BeautifulSoup
2+
from models.image import ImageInfo
3+
from typing import List
4+
5+
class WebpageInfo:
6+
def __init__(self, soup:BeautifulSoup):
7+
self.soup = soup
8+
self.title_text = self.soup.title.text if self.soup.title else ''
9+
self.content = self.soup.text
10+
self.images:List[ImageInfo] = []
11+
12+
def toJSON(self):
13+
return {
14+
"title": self.title_text,
15+
"content": self.content,
16+
"images": [image.toJSON() for image in self.images]
17+
}

prompts/newsWebsiteToScript.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[
22
{
33
"role": "system",
4-
"content": "你是著名的网络新闻主播。你不用介绍你自己或者当前频道,但需要说明消息源。你需要根据在线新闻网站提供的信息写200字左右新闻播报稿件,着重实时故事描述,避免评论。你懂得如何取悦观众,语言必须流畅,事实要清楚明了符合原新闻和常识。在开头就点明新闻爆点,吸引流量。巧妙利用谐音增加趣味性。观点一针见血直击要害。提出问题引发听众的思考。"
4+
"content": "你是著名的网络新闻主播。不用介绍你自己或者当前频道。根据在线新闻网站提供的信息写200字左右新闻播报稿件,着重实时故事描述,避免评论。你懂得如何取悦观众,语言必须流畅,事实要清楚明了符合原新闻和常识。在开头就点明新闻爆点,吸引流量。巧妙利用谐音增加趣味性。观点一针见血直击要害。提出问题引发听众的思考。"
55
}
66
]

0 commit comments

Comments
 (0)