diff --git a/docker-compose.yaml b/docker-compose.yaml index 33bcc58..313aa64 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,33 +1,10 @@ # Usage: -# docker compose --env-file env/ up +# docker compose --env-file env/ run news_fetch && docker-compose --env-file env/production down version: "3.9" services: - news_fetch: - build: news_fetch - image: news_fetch:latest - volumes: - - ${CONTAINER_DATA}:/app/containerdata - - ${CODE:-/dev/null}:/code # not set in prod, defaults to /dev/null - - ${XSOCK-/dev/null}:${XSOCK-/tmp/sock} - - ${XAUTHORITY-/dev/null}:/home/auto_news/.Xauthority - environment: - - DISPLAY=$DISPLAY - - TERM=xterm-256color # colored logs - - COLUMNS=150 # for wider logs - - - DEBUG=${DEBUG} - - CHECK=${CHECK} - - UPLOAD=${UPLOAD} - - HEADLESS=${HEADLESS} - - REDUCEDFETCH=${REDUCEDFETCH} - entrypoint: ${ENTRYPOINT:-python3 runner.py} # by default launch workers as defined in the Dockerfile - stdin_open: ${INTERACTIVE:-false} # docker run -i - tty: ${INTERACTIVE:-false} # docker run -t - - geckodriver: image: selenium/standalone-firefox:103.0 volumes: @@ -70,3 +47,29 @@ services: - nas22.ethz.ch/gess_coss_1/helbing_support/Files RM/Archiving/TEST # first command is the target mount path - lsyncd - /sync/nas_sync.config + + + news_fetch: + build: news_fetch + image: news_fetch:latest + + depends_on: # when using docker compose run news_fetch, the dependencies are started as well + - nas_sync + - geckodriver + + volumes: + - ${CONTAINER_DATA}:/app/containerdata # always set + - ${CODE:-/dev/null}:/code # not set in prod, defaults to /dev/null + - ${XSOCK-/dev/null}:${XSOCK-/tmp/sock} # x11 socket, needed for gui + # - ${XAUTHORITY-/dev/null}:/home/auto_news/.Xauthority # xauth needed for authenticating to x11 + environment: + - DISPLAY=$DISPLAY # needed to let x11 apps know where to connect to + + - DEBUG=${DEBUG} + - CHECK=${CHECK} + - UPLOAD=${UPLOAD} + - HEADLESS=${HEADLESS} + - REDUCEDFETCH=${REDUCEDFETCH} + entrypoint: ${ENTRYPOINT:-python3 runner.py} # by default launch workers as defined in the Dockerfile + stdin_open: ${INTERACTIVE:-false} # docker run -i + tty: ${INTERACTIVE:-false} # docker run -t diff --git a/news_fetch/app/utils_storage/models.py b/news_fetch/app/utils_storage/models.py index bfc1927..06739b0 100644 --- a/news_fetch/app/utils_storage/models.py +++ b/news_fetch/app/utils_storage/models.py @@ -45,7 +45,11 @@ class ArticleDownload(DownloadBaseModel): # ... are added through foreignkeys def __str__(self) -> str: - return f"ART [{self.title} -- {self.source_name}]" + if self.title != '' and self.source_name != '': + desc = f"{shorten_name(self.title)} -- {self.source_name}" + else: + desc = f"{self.article_url}" + return f"ART [{desc}]" ## Useful Properties @property @@ -255,7 +259,7 @@ class Message(ChatBaseModel): # reaction def __str__(self) -> str: - return "MSG [{}]".format(self.text[:min(len(self.text), 30)].replace('\n','/') + '...') + return "MSG [{}]".format(shorten_name(self.text).replace('\n','/')) @property def slack_ts(self): @@ -319,4 +323,9 @@ def clear_path_name(path): keepcharacters = (' ','.','_', '-') converted = "".join([c if (c.isalnum() or c in keepcharacters) else "_" for c in path]).rstrip() return converted - \ No newline at end of file + +def shorten_name(name, offset = 50): + if len(name) > offset: + return name[:offset] + "..." + else: + return name \ No newline at end of file diff --git a/news_fetch/app/utils_worker/download/browser.py b/news_fetch/app/utils_worker/download/browser.py index 4c47648..8693e39 100644 --- a/news_fetch/app/utils_worker/download/browser.py +++ b/news_fetch/app/utils_worker/download/browser.py @@ -155,11 +155,11 @@ class PDFDownloader: hrefs = [e.get_attribute("href") for e in self.driver.find_elements_by_xpath("//a[@href]")] except: hrefs = [] - len_old = len(hrefs) + # len_old = len(hrefs) hrefs = [h for h in hrefs \ if not sum([(domain in h) for domain in blacklisted]) # sum([True, False, False, False]) == 1 (esp. not 0) ] # filter a tiny bit at least - self.logger.info(f"Hrefs filtered (before: {len_old}, after: {len(hrefs)})") + # self.logger.info(f"Hrefs filtered (before: {len_old}, after: {len(hrefs)})") return hrefs diff --git a/testing.docker-compose.yaml b/testing.docker-compose.yaml deleted file mode 100644 index 313aa64..0000000 --- a/testing.docker-compose.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Usage: -# docker compose --env-file env/ run news_fetch && docker-compose --env-file env/production down - -version: "3.9" - -services: - - geckodriver: - image: selenium/standalone-firefox:103.0 - volumes: - - ${XSOCK-/dev/null}:${XSOCK-/tmp/sock} - - ${XAUTHORITY-/dev/null}:/home/auto_news/.Xauthority - environment: - - DISPLAY=$DISPLAY - - START_VNC=false - - START_XVFB=false - user: 1001:1001 - expose: # exposed to other docker-compose services only - - "4444" - - - vpn: - image: wazum/openconnect-proxy:latest - env_file: - - ${CONTAINER_DATA}/config/vpn.config - cap_add: - - NET_ADMIN - volumes: - - /dev/net/tun:/dev/net/tun - # alternative to cap_add & volumes: specify privileged: true - - - nas_sync: - depends_on: - - vpn # used to establish a connection to the SMB server - network_mode: "service:vpn" - build: nas_sync - image: nas_sync:latest - cap_add: # capabilities needed for mounting the SMB share - - SYS_ADMIN - - DAC_READ_SEARCH - volumes: - - ${CONTAINER_DATA}/files:/sync/local_files - - ${CONTAINER_DATA}/config/nas_sync.config:/sync/nas_sync.config - - ${CONTAINER_DATA}/config/nas_login.config:/sync/nas_login.config - command: - - nas22.ethz.ch/gess_coss_1/helbing_support/Files RM/Archiving/TEST # first command is the target mount path - - lsyncd - - /sync/nas_sync.config - - - news_fetch: - build: news_fetch - image: news_fetch:latest - - depends_on: # when using docker compose run news_fetch, the dependencies are started as well - - nas_sync - - geckodriver - - volumes: - - ${CONTAINER_DATA}:/app/containerdata # always set - - ${CODE:-/dev/null}:/code # not set in prod, defaults to /dev/null - - ${XSOCK-/dev/null}:${XSOCK-/tmp/sock} # x11 socket, needed for gui - # - ${XAUTHORITY-/dev/null}:/home/auto_news/.Xauthority # xauth needed for authenticating to x11 - environment: - - DISPLAY=$DISPLAY # needed to let x11 apps know where to connect to - - - DEBUG=${DEBUG} - - CHECK=${CHECK} - - UPLOAD=${UPLOAD} - - HEADLESS=${HEADLESS} - - REDUCEDFETCH=${REDUCEDFETCH} - entrypoint: ${ENTRYPOINT:-python3 runner.py} # by default launch workers as defined in the Dockerfile - stdin_open: ${INTERACTIVE:-false} # docker run -i - tty: ${INTERACTIVE:-false} # docker run -t