Fix pagnation issue

Peng Ren · Peng Ren · commit 2b5621501336 · 2025-12-23T10:09:19.000-05:00
diff --git a/pymongosql/cursor.py b/pymongosql/cursor.py
@@ -6,7 +6,7 @@
 
 from .common import BaseCursor, CursorIterator
 from .error import DatabaseError, OperationalError, ProgrammingError, SqlSyntaxError
-from .result_set import ResultSet
+from .result_set import DictResultSet, ResultSet
 from .sql.builder import ExecutionPlan
 from .sql.parser import SQLParser
 
@@ -132,7 +132,7 @@ def _execute_execution_plan(self, execution_plan: ExecutionPlan) -> None:
 
             # Create result set from command result
             self._result_set = self._result_set_class(
-                command_result=result, execution_plan=execution_plan, **self._kwargs
+                command_result=result, execution_plan=execution_plan, database=db, **self._kwargs
             )
 
             _logger.info(f"Query executed successfully on collection '{execution_plan.collection}'")
@@ -263,3 +263,12 @@ def __del__(self):
                 self.close()
             except Exception:
                 pass  # Ignore errors during cleanup
+
+
+class DictCursor(Cursor):
+    """Cursor that returns results as dictionaries instead of tuples/sequences"""
+
+    def __init__(self, connection: "Connection", **kwargs) -> None:
+        super().__init__(connection=connection, **kwargs)
+        # Override result set class to use DictResultSet
+        self._result_set_class = DictResultSet
diff --git a/pymongosql/result_set.py b/pymongosql/result_set.py
@@ -4,7 +4,6 @@
 from typing import Any, Dict, List, Optional, Sequence, Tuple
 
 import jmespath
-from pymongo.cursor import Cursor as MongoCursor
 from pymongo.errors import PyMongoError
 
 from .common import CursorIterator
@@ -20,28 +19,24 @@ class ResultSet(CursorIterator):
     def __init__(
         self,
         command_result: Optional[Dict[str, Any]] = None,
-        mongo_cursor: Optional[MongoCursor] = None,
         execution_plan: ExecutionPlan = None,
         arraysize: int = None,
+        database: Optional[Any] = None,
         **kwargs,
     ) -> None:
         super().__init__(arraysize=arraysize or self.DEFAULT_FETCH_SIZE, **kwargs)
 
-        # Handle both command results and legacy mongo cursor for backward compatibility
+        # Handle command results from db.command
         if command_result is not None:
             self._command_result = command_result
-            self._mongo_cursor = None
+            self._database = database
             # Extract cursor info from command result
             self._result_cursor = command_result.get("cursor", {})
+            self._cursor_id = self._result_cursor.get("id", 0)  # 0 means no more results
             self._raw_results = self._result_cursor.get("firstBatch", [])
             self._cached_results: List[Sequence[Any]] = []
-        elif mongo_cursor is not None:
-            self._mongo_cursor = mongo_cursor
-            self._command_result = None
-            self._raw_results = []
-            self._cached_results: List[Sequence[Any]] = []
         else:
-            raise ProgrammingError("Either command_result or mongo_cursor must be provided")
+            raise ProgrammingError("command_result must be provided")
 
         self._execution_plan = execution_plan
         self._is_closed = False
@@ -53,14 +48,22 @@ def __init__(
 
         # Process firstBatch immediately if available (after all attributes are set)
         if command_result is not None and self._raw_results:
-            processed_batch = [self._process_document(doc) for doc in self._raw_results]
-            # Convert dictionaries to sequences for DB API 2.0 compliance
-            sequence_batch = [self._dict_to_sequence(doc) for doc in processed_batch]
-            self._cached_results.extend(sequence_batch)
+            self._process_and_cache_batch(self._raw_results)
 
         # Build description from projection
         self._build_description()
 
+    def _process_and_cache_batch(self, batch: List[Dict[str, Any]]) -> None:
+        """Process and cache a batch of documents"""
+        if not batch:
+            return
+        # Process results through projection mapping
+        processed_batch = [self._process_document(doc) for doc in batch]
+        # Convert dictionaries to output format (sequence or dict)
+        formatted_batch = [self._format_result(doc) for doc in processed_batch]
+        self._cached_results.extend(formatted_batch)
+        self._total_fetched += len(batch)
+
     def _build_description(self) -> None:
         """Build column description from execution plan projection"""
         if not self._execution_plan.projection_stage:
@@ -85,37 +88,37 @@ def _ensure_results_available(self, count: int = 1) -> None:
         if self._cache_exhausted:
             return
 
-        if self._command_result is not None:
-            # For command results, we already have all data in firstBatch
-            # No additional fetching needed
-            self._cache_exhausted = True
-            return
+        # Fetch more results if needed and cursor has more data
+        while len(self._cached_results) < count and self._cursor_id != 0:
+            try:
+                # Use getMore to fetch next batch
+                if self._database and self._execution_plan.collection:
+                    getmore_cmd = {
+                        "getMore": self._cursor_id,
+                        "collection": self._execution_plan.collection,
+                    }
+                    result = self._database.command(getmore_cmd)
+
+                    # Extract and process next batch
+                    cursor_info = result.get("cursor", {})
+                    next_batch = cursor_info.get("nextBatch", [])
+                    self._process_and_cache_batch(next_batch)
+
+                    # Update cursor ID for next iteration
+                    self._cursor_id = cursor_info.get("id", 0)
+                else:
+                    # No database access, mark as exhausted
+                    self._cache_exhausted = True
+                    break
+
+            except PyMongoError as e:
+                self._errors.append({"error": str(e), "type": type(e).__name__})
+                self._cache_exhausted = True
+                raise DatabaseError(f"Error fetching more results: {e}")
 
-        elif self._mongo_cursor is not None:
-            # Fetch more results if needed (legacy mongo cursor support)
-            while len(self._cached_results) < count and not self._cache_exhausted:
-                try:
-                    # Iterate through cursor without calling limit() again
-                    batch = []
-                    for i, doc in enumerate(self._mongo_cursor):
-                        if i >= self.arraysize:
-                            break
-                        batch.append(doc)
-
-                    if not batch:
-                        self._cache_exhausted = True
-                        break
-
-                    # Process results through projection mapping
-                    processed_batch = [self._process_document(doc) for doc in batch]
-                    # Convert dictionaries to sequences for DB API 2.0 compliance
-                    sequence_batch = [self._dict_to_sequence(doc) for doc in processed_batch]
-                    self._cached_results.extend(sequence_batch)
-                    self._total_fetched += len(batch)
-
-                except PyMongoError as e:
-                    self._errors.append({"error": str(e), "type": type(e).__name__})
-                    raise DatabaseError(f"Error fetching results: {e}")
+        # Mark as exhausted if no more results available
+        if self._cursor_id == 0:
+            self._cache_exhausted = True
 
     def _process_document(self, doc: Dict[str, Any]) -> Dict[str, Any]:
         """Process a MongoDB document according to projection mapping"""
@@ -136,7 +139,10 @@ def _process_document(self, doc: Dict[str, Any]) -> Dict[str, Any]:
         return processed
 
     def _mongo_to_bracket_key(self, field_path: str) -> str:
-        """Convert Mongo dot-index notation to bracket notation for display keys.
+        """Convert Mongo dot-index notation to bracket notation.
+
+        Transforms numeric dot segments into bracket indices for both display keys
+        and JMESPath-compatible field paths.
 
         Examples:
             items.0 -> items[0]
@@ -147,15 +153,6 @@ def _mongo_to_bracket_key(self, field_path: str) -> str:
         # Replace .<number> with [<number>]
         return re.sub(r"\.(\d+)", r"[\1]", field_path)
 
-    def _mongo_to_jmespath(self, field_path: str) -> str:
-        """Convert Mongo-style field path to JMESPath-compatible path.
-
-        This mainly transforms numeric dot segments into bracket indices.
-        """
-        if not isinstance(field_path, str):
-            return field_path
-        return self._mongo_to_bracket_key(field_path)
-
     def _get_nested_value(self, doc: Dict[str, Any], field_path: str) -> Any:
         """Extract nested field value from document using JMESPath
 
@@ -170,16 +167,16 @@ def _get_nested_value(self, doc: Dict[str, Any], field_path: str) -> Any:
             if "." not in field_path and "[" not in field_path:
                 return doc.get(field_path)
 
-            # Convert normalized Mongo-style numeric segments to JMESPath bracket notation
-            jmes_field = self._mongo_to_jmespath(field_path)
+            # Convert normalized Mongo-style numeric segments to bracket notation
+            normalized_field = self._mongo_to_bracket_key(field_path)
             # Use jmespath for complex paths
-            return jmespath.search(jmes_field, doc)
+            return jmespath.search(normalized_field, doc)
         except Exception as e:
             _logger.debug(f"Error extracting field '{field_path}': {e}")
             return None
 
-    def _dict_to_sequence(self, doc: Dict[str, Any]) -> Tuple[Any, ...]:
-        """Convert document dictionary to sequence according to column order"""
+    def _format_result(self, doc: Dict[str, Any]) -> Tuple[Any, ...]:
+        """Format processed document to output format (tuple for DB API 2.0 compliance)"""
         if self._column_names is None:
             # First time - establish column order
             self._column_names = list(doc.keys())
@@ -259,33 +256,12 @@ def fetchall(self) -> List[Sequence[Any]]:
         all_results = []
 
         try:
-            if self._command_result is not None:
-                # Handle command result (db.command)
-                if not self._cache_exhausted:
-                    # Results are already processed in constructor, just extend
-                    all_results.extend(self._cached_results)
-                    self._total_fetched += len(self._cached_results)
-                    self._cache_exhausted = True
-
-            elif self._mongo_cursor is not None:
-                # Handle legacy mongo cursor (for backward compatibility)
-                # Add cached results
+            # Handle command result (db.command)
+            if not self._cache_exhausted:
+                # Results are already processed in constructor, just extend
                 all_results.extend(self._cached_results)
-                self._cached_results.clear()
-
-                # Fetch remaining from cursor
-                if not self._cache_exhausted:
-                    # Iterate through all remaining documents in the cursor
-                    remaining_docs = list(self._mongo_cursor)
-                    if remaining_docs:
-                        # Process results through projection mapping
-                        processed_docs = [self._process_document(doc) for doc in remaining_docs]
-                        # Convert dictionaries to sequences for DB API 2.0 compliance
-                        sequence_docs = [self._dict_to_sequence(doc) for doc in processed_docs]
-                        all_results.extend(sequence_docs)
-                        self._total_fetched += len(remaining_docs)
-
-                    self._cache_exhausted = True
+                self._total_fetched += len(self._cached_results)
+                self._cache_exhausted = True
 
         except PyMongoError as e:
             self._errors.append({"error": str(e), "type": type(e).__name__})
@@ -303,17 +279,10 @@ def is_closed(self) -> bool:
     def close(self) -> None:
         """Close the result set and free resources"""
         if not self._is_closed:
-            try:
-                if self._mongo_cursor:
-                    self._mongo_cursor.close()
-                # No special cleanup needed for command results
-            except Exception as e:
-                _logger.warning(f"Error closing MongoDB cursor: {e}")
-            finally:
-                self._is_closed = True
-                self._mongo_cursor = None
-                self._command_result = None
-                self._cached_results.clear()
+            self._is_closed = True
+            self._command_result = None
+            self._database = None
+            self._cached_results.clear()
 
     def __enter__(self):
         return self
@@ -322,5 +291,13 @@ def __exit__(self, exc_type, exc_val, exc_tb):
         self.close()
 
 
+class DictResultSet(ResultSet):
+    """Result set that returns dictionaries instead of sequences"""
+
+    def _format_result(self, doc: Dict[str, Any]) -> Dict[str, Any]:
+        """Override to return dictionary directly instead of converting to sequence"""
+        return doc
+
+
 # For backward compatibility
 MongoResultSet = ResultSet
diff --git a/tests/test_dict_cursor.py b/tests/test_dict_cursor.py