From 08557b99851c1b221b59b0f5bc70d7661c3772d3 Mon Sep 17 00:00:00 2001
From: Charlotte Hausman <chausman@nrao.edu>
Date: Fri, 12 Aug 2022 14:14:37 -0400
Subject: [PATCH] sanitize notes updates for invalid characters before
 persisting to database

---
 docker.properties                             |  2 +-
 .../capability/views/capability_version.py    |  6 ++-
 services/workflow/workflow/server.py          | 39 +++++++++++--------
 3 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/docker.properties b/docker.properties
index 52036c87e..ed55e0b80 100644
--- a/docker.properties
+++ b/docker.properties
@@ -25,7 +25,7 @@ edu.nrao.workspaces.CapabilitySettings.externalServiceUrl = http://capability:34
 edu.nrao.workspaces.ProcessingSettings.useCasa = false
 edu.nrao.workspaces.ProcessingSettings.rootDirectory =  /lustre/aoc/cluster/pipeline/docker/workspaces/spool
 edu.nrao.workspaces.ProcessingSettings.scriptLocation = /lustre/aoc/cluster/pipeline/docker/workspaces/sbin
-edu.nrao.workspaces.ProcessingSettings.ramInGb = 0.2G
+edu.nrao.workspaces.ProcessingSettings.ramInGb = 0.21G
 edu.nrao.workspaces.ProcessingSettings.autoGenerateStandardCals = False
 edu.nrao.workspaces.ProcessingSettings.CasaVersion.vlassSeci = /home/casa/packages/pipeline/casa-6.1.3-3-pipeline-2021.1.1.32
 
diff --git a/services/capability/capability/views/capability_version.py b/services/capability/capability/views/capability_version.py
index 441e24667..da6146b86 100644
--- a/services/capability/capability/views/capability_version.py
+++ b/services/capability/capability/views/capability_version.py
@@ -21,6 +21,7 @@ File containing definitions for the capability version routes of the Workspaces
 
 import copy
 import http
+import json
 import logging
 
 import requests
@@ -165,7 +166,10 @@ def update_da_notes(request: Request) -> Response:
     capability_request_id = request.matchdict["capability_request_id"]
     version_id = request.matchdict["version_id"]
     params = request.json_body
-    new_notes = params["edits"]
+    # sanitize input before writing/persisting
+    # \\u0000 is an invalid character that is incompatible with postgres json columns
+    # from StackOverflow: https://stackoverflow.com/questions/63092267/how-to-handle-api-responsesjson-containing-x00-or-u0000-in-its-data-and-s
+    new_notes = json.loads(json.dumps(params["edits"])).replace("\\u0000", "")
 
     capability_request = request.capability_info.lookup_capability_request(capability_request_id)
     if capability_request:
diff --git a/services/workflow/workflow/server.py b/services/workflow/workflow/server.py
index 0ca47da15..a3c3e3712 100644
--- a/services/workflow/workflow/server.py
+++ b/services/workflow/workflow/server.py
@@ -17,6 +17,7 @@
 # along with Workspaces.  If not, see <https://www.gnu.org/licenses/>.
 # pylint: disable=E0401
 """ This is our workflow server API. """
+import glob
 import http
 import json
 import logging
@@ -183,34 +184,38 @@ class WorkflowWorkingDirRestService:
     def get_qa_notes(self):
         requested_workflow = self.request.info.lookup_workflow_request(self.request.matchdict["request_id"])
         results_path = requested_workflow.results_dir
-        paths = []
-        for root, dirs, _ in os.walk(results_path, followlinks=True):
-            for d in dirs:
-                if d.startswith("pipeline-"):
-                    paths.append(os.path.join(root, d))
 
-        qa_notes_path = paths[0] + "/html/qa_notes.html"
-        qa_notes_text = ""
+        # qa notes need the products directory weblog
+        path = glob.glob(results_path + "/products/pipeline-*/html")[0]
+        qa_notes_path = path + "/qa_notes.html"
 
-        with open(qa_notes_path, "r") as qa_notes:
-            qa_notes_text = qa_notes.read()
+        if Path(qa_notes_path).exists():
+            qa_notes_text = ""
+            with open(qa_notes_path, "r") as qa_notes:
+                qa_notes_text = qa_notes.read()
 
-        return Response(status_int=http.HTTPStatus.OK, json_body={"resp": f"{qa_notes_text}"})
+            return Response(status_int=http.HTTPStatus.OK, json_body={"resp": f"{qa_notes_text}"})
+
+        return Response(
+            status_int=http.HTTPStatus.NOT_FOUND,
+            json_body={"resp": "QA Notes file not found. Please close the window and try again."},
+        )
 
     @view_config(request_method="POST", route_name="get_qa_notes", renderer="json")
     def save_qa_notes(self):
         requested_workflow = self.request.info.lookup_workflow_request(self.request.matchdict["request_id"])
         results_path = requested_workflow.results_dir
-        paths = []
-        for root, dirs, _ in os.walk(results_path, followlinks=True):
-            for d in dirs:
-                if d.startswith("pipeline-"):
-                    paths.append(os.path.join(root, d))
 
-        qa_notes_path = paths[0] + "/html/qa_notes.html"
+        # qa notes need the products directory weblog
+        path = glob.glob(results_path + "/products/pipeline-*/html")[0]
+        qa_notes_path = path + "/qa_notes.html"
 
         with open(qa_notes_path, "w") as qa_notes:
-            qa_notes.write(self.request.json_body["edits"])
+            # sanitize input before writing/persisting
+            # \\u0000 is an invalid character that is incompatible with postgres json columns
+            # from StackOverflow: https://stackoverflow.com/questions/63092267/how-to-handle-api-responsesjson-containing-x00-or-u0000-in-its-data-and-s
+            edits = json.loads(json.dumps(self.request.json_body["edits"])).replace("\\u0000", "")
+            qa_notes.write(edits)
 
         return Response(
             status_int=http.HTTPStatus.OK,
-- 
GitLab