pontoon-data
diff --git a/‎data-transfer/pontoon/pontoon/base.py‎
Lines changed: 7 additions & 3 deletions b/‎data-transfer/pontoon/pontoon/base.py‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎data-transfer/pontoon/pontoon/cache/sqlite_cache.py‎
Lines changed: 37 additions & 4 deletions b/‎data-transfer/pontoon/pontoon/cache/sqlite_cache.py‎
Lines changed: 37 additions & 4 deletions
diff --git a/‎data-transfer/pontoon/pontoon/destination/postgres_destination.py‎
Lines changed: 6 additions & 0 deletions b/‎data-transfer/pontoon/pontoon/destination/postgres_destination.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎data-transfer/pontoon/pontoon/destination/sql_destination.py‎
Lines changed: 28 additions & 5 deletions b/‎data-transfer/pontoon/pontoon/destination/sql_destination.py‎
Lines changed: 28 additions & 5 deletions
@@ -8,7 +8,7 @@
 import json
 import time
 from uuid import UUID
-from datetime import datetime, timedelta, timezone
+from datetime import datetime, timedelta, timezone, date
 from decimal import Decimal
 from abc import ABC, abstractmethod
 from typing import List, Dict, Tuple, Generator, Any
@@ -50,7 +50,7 @@ class Stream:
         str: pa.string(),
         bool: pa.bool_(),
         bytes: pa.binary(),
-        datetime.date: pa.date32(),
+        date: pa.date32(),
         datetime.time: pa.time64('us'),
         datetime: pa.timestamp('us', tz='UTC'),
         type(None): pa.null()  # NoneType corresponds to NULL
@@ -60,10 +60,12 @@ class Stream:
     PY_CONVERSION_MAP = {
         UUID: str,
         Decimal: float,
+        dict: str,  # JSONB and other dict-like types get converted to string
+        date: date,  # Date type maps to itself (already in PY_TO_PYARROW_MAP)
         'TIMESTAMP_NTZ': datetime,
         'TIMESTAMP_LTZ': datetime,
         'TIMESTAMP_TZ': datetime,
-        'DATE': datetime.date,
+        'DATE': date,  # String fallback for DATE type
         'TIME': datetime.time
     }
 
@@ -154,6 +156,8 @@ def convert(val):
             py_type = type(val)
             if py_type is datetime:
                 return val.astimezone(timezone.utc)
+            elif py_type is date:
+                return val  # date objects should be passed through as-is
             fn = type_map.get(py_type)
             return fn(val) if fn else val
 
 
@@ -1,6 +1,6 @@
 import sqlite3
 import pyarrow as pa
-from datetime import datetime
+from datetime import datetime, date
 from typing import List, Dict, Tuple, Generator, Any
 from pontoon.base import Cache, Namespace, Stream, Record
 
@@ -11,9 +11,17 @@ def adapt_datetime_iso(val):
 def convert_datetime(val):
     return datetime.fromisoformat(val.decode())
 
+def adapt_date_iso(val):
+    return val.isoformat()
+
+def convert_date(val):
+    return date.fromisoformat(val.decode())
+
 
 sqlite3.register_adapter(datetime, adapt_datetime_iso)
 sqlite3.register_converter("datetime", convert_datetime)
+sqlite3.register_adapter(date, adapt_date_iso)
+sqlite3.register_converter("date", convert_date)
 
 
 class SqliteCache(Cache):
@@ -48,7 +56,9 @@ def _arrow_to_sqlite_type(arrow_type):
             return "BLOB"
         elif pa.types.is_boolean(arrow_type):
             return "INTEGER"
-        elif pa.types.is_date(arrow_type) or pa.types.is_timestamp(arrow_type):
+        elif pa.types.is_date(arrow_type):
+            return "date"
+        elif pa.types.is_timestamp(arrow_type):
             return "datetime"
         elif pa.types.is_decimal(arrow_type):
             return "TEXT"
@@ -93,8 +103,31 @@ def _insert_rows_to_stream(self, stream:Stream, records:List[Record]):
 
 
     def _rows_to_records(self, stream:Stream, rows):
-        # covert a sqlite row back into a record      
-        return [Record(list(row)) for row in rows]
+        # convert a sqlite row back into a record with proper type conversion
+        records = []
+        for row in rows:
+            converted_data = []
+            for i, value in enumerate(row):
+                # Get the expected type from the schema
+                expected_type = stream.schema.types[i]
+                
+                # Convert value based on expected type
+                if pa.types.is_boolean(expected_type) and isinstance(value, int):
+                    # Convert SQLite integer (0/1) back to boolean
+                    converted_data.append(bool(value))
+                elif pa.types.is_date(expected_type) and isinstance(value, str):
+                    # Convert date string back to date object
+                    converted_data.append(date.fromisoformat(value))
+                elif pa.types.is_timestamp(expected_type) and isinstance(value, str):
+                    # Convert datetime string back to datetime object
+                    converted_data.append(datetime.fromisoformat(value))
+                else:
+                    # Keep the value as-is for other types
+                    converted_data.append(value)
+            
+            records.append(Record(converted_data))
+        
+        return records
 
 
     def write(self, stream:Stream, records:List[Record]):
 
@@ -141,6 +141,12 @@ def write(self, ds:Dataset, progress_callback = None):
             if callable(progress_callback):
                 progress.subscribe(progress_callback)
 
+            # Check if there are any records to process
+            stream_size = ds.size(stream)
+            if stream_size == 0:
+                progress.message("No records to process for this stream")
+                continue
+
             with self._connect() as conn:
                 # create target table for the stream if it doesn't exist
                 table = SQLDestination.create_table_if_not_exists(conn, stream)
 
@@ -24,8 +24,8 @@ class SQLDestination(Destination):
         pa.binary(): String,  
         pa.bool_(): Boolean,
         pa.timestamp('us', tz='UTC'): DateTime(True),  
-        pa.date32(): DateTime,
-        pa.date64(): DateTime
+        pa.date32(): Date,
+        pa.date64(): Date,
     }
 
 
@@ -39,7 +39,7 @@ class SQLDestination(Destination):
         SmallInteger: pa.int64(),
         Numeric: pa.float64(),
         Boolean: pa.bool_(),
-        Date: pa.timestamp('us', tz='UTC'),
+        Date: pa.date32(),
         Time: pa.timestamp('us', tz='UTC'),
         DateTime: pa.timestamp('us', tz='UTC'),
     }
@@ -102,6 +102,29 @@ def table_ddl_to_schema(cols) -> pa.Schema:
         return pa.schema(fields)
 
 
+    @staticmethod
+    def schemas_compatible(stream_schema: pa.Schema, existing_schema: pa.Schema) -> bool:
+        """
+        Compare two schemas for compatibility, ignoring column order.
+        Returns True if the schemas are compatible (same column names and types).
+        """
+        # Convert schemas to dictionaries for easier comparison
+        stream_fields = {field.name: field.type for field in stream_schema}
+        existing_fields = {field.name: field.type for field in existing_schema}
+        
+        # Check if all columns exist in both schemas with matching types
+        if set(stream_fields.keys()) != set(existing_fields.keys()):
+            return False
+        
+        # Check if all column types match
+        for col_name, stream_type in stream_fields.items():
+            existing_type = existing_fields[col_name]
+            if stream_type != existing_type:
+                return False
+        
+        return True
+
+
     @staticmethod
     def create_table_if_not_exists(conn, stream:Stream, override_name:str = None):
         # create a table for this stream if it doesn't already exist
@@ -118,8 +141,8 @@ def create_table_if_not_exists(conn, stream:Stream, override_name:str = None):
             table = Table(name, metadata_obj, schema=stream.schema_name, autoload_with=insp)
             existing_schema = SQLDestination.table_ddl_to_schema(table.columns)
 
-            # if not, we can't write to it
-            if not existing_schema.equals(stream.schema):
+            # Use flexible schema comparison that ignores column order
+            if not SQLDestination.schemas_compatible(stream.schema, existing_schema):
                 raise ValueError(f"Existing schema for stream {name} does not match.")
 
         else: