@@ -17,20 +17,20 @@ class SalesforceCommitMessage(WriterCommitMessage):
1717
1818class SalesforceDataSource (DataSource ):
1919 """
20- A Salesforce streaming sink for PySpark to write data to Salesforce objects.
20+ A Salesforce streaming datasource for PySpark to write data to Salesforce objects.
2121
22- This data sink enables writing streaming data from Spark to Salesforce using the
22+ This datasource enables writing streaming data from Spark to Salesforce using the
2323 Salesforce REST API. It supports common Salesforce objects like Account, Contact,
2424 Opportunity, and custom objects.
2525
26- Note: This is a write-only sink , not a full bidirectional data source.
26+ Note: This is a write-only datasource , not a full bidirectional data source.
2727
2828 Name: `salesforce`
2929
3030 Notes
3131 -----
3232 - Requires the `simple-salesforce` library for Salesforce API integration
33- - **Write-only sink **: Only supports streaming write operations (no read operations)
33+ - **Write-only datasource **: Only supports streaming write operations (no read operations)
3434 - Uses Salesforce username/password/security token authentication
3535 - Supports batch writing with Salesforce Composite Tree API for efficient processing
3636 - Implements exactly-once semantics through Spark's checkpoint mechanism
@@ -61,7 +61,7 @@ class SalesforceDataSource(DataSource):
6161
6262 Examples
6363 --------
64- Register the Salesforce sink :
64+ Register the Salesforce Datasource :
6565
6666 >>> from pyspark_datasources import SalesforceDataSource
6767 >>> spark.dataSource.register(SalesforceDataSource)
@@ -82,9 +82,9 @@ class SalesforceDataSource(DataSource):
8282 ... (col("value") * 100000).cast("double").alias("AnnualRevenue")
8383 ... )
8484 >>>
85- >>> # Write to Salesforce using the sink
85+ >>> # Write to Salesforce using the datasource
8686 >>> query = account_data.writeStream \\
87- ... .format("salesforce") \\
87+ ... .format("pyspark.datasource. salesforce") \\
8888 ... .option("username", "your-username@company.com") \\
8989 ... .option("password", "your-password") \\
9090 ... .option("security_token", "your-security-token") \\
@@ -102,7 +102,7 @@ class SalesforceDataSource(DataSource):
102102 ... )
103103 >>>
104104 >>> query = contact_data.writeStream \\
105- ... .format("salesforce") \\
105+ ... .format("pyspark.datasource. salesforce") \\
106106 ... .option("username", "your-username@company.com") \\
107107 ... .option("password", "your-password") \\
108108 ... .option("security_token", "your-security-token") \\
@@ -118,7 +118,7 @@ class SalesforceDataSource(DataSource):
118118 ... )
119119 >>>
120120 >>> query = custom_data.writeStream \\
121- ... .format("salesforce") \\
121+ ... .format("pyspark.datasource. salesforce") \\
122122 ... .option("username", "your-username@company.com") \\
123123 ... .option("password", "your-password") \\
124124 ... .option("security_token", "your-security-token") \\
@@ -132,7 +132,7 @@ class SalesforceDataSource(DataSource):
132132 >>> contact_schema = "FirstName STRING NOT NULL, LastName STRING NOT NULL, Email STRING, Phone STRING"
133133 >>>
134134 >>> query = contact_data.writeStream \\
135- ... .format("salesforce") \\
135+ ... .format("pyspark.datasource. salesforce") \\
136136 ... .option("username", "your-username@company.com") \\
137137 ... .option("password", "your-password") \\
138138 ... .option("security_token", "your-security-token") \\
@@ -152,7 +152,7 @@ class SalesforceDataSource(DataSource):
152152 ... )
153153 >>>
154154 >>> query = opportunity_data.writeStream \\
155- ... .format("salesforce") \\
155+ ... .format("pyspark.datasource. salesforce") \\
156156 ... .option("username", "your-username@company.com") \\
157157 ... .option("password", "your-password") \\
158158 ... .option("security_token", "your-security-token") \\
@@ -163,7 +163,7 @@ class SalesforceDataSource(DataSource):
163163
164164 Key Features:
165165
166- - **Write-only sink **: Designed specifically for writing data to Salesforce
166+ - **Write-only datasource **: Designed specifically for writing data to Salesforce
167167 - **Batch processing**: Uses Salesforce Composite Tree API for efficient bulk writes
168168 - **Exactly-once semantics**: Integrates with Spark's checkpoint mechanism
169169 - **Error handling**: Graceful fallback to individual record creation if batch fails
@@ -172,7 +172,7 @@ class SalesforceDataSource(DataSource):
172172
173173 @classmethod
174174 def name (cls ) -> str :
175- """Return the short name for this Salesforce sink ."""
175+ """Return the short name for this Salesforce datasource ."""
176176 return "pyspark.datasource.salesforce"
177177
178178 def schema (self ) -> str :
@@ -200,12 +200,12 @@ def schema(self) -> str:
200200 """
201201
202202 def streamWriter (self , schema : StructType , overwrite : bool ) -> "SalesforceStreamWriter" :
203- """Create a stream writer for Salesforce sink integration."""
203+ """Create a stream writer for Salesforce datasource integration."""
204204 return SalesforceStreamWriter (schema , self .options )
205205
206206
207207class SalesforceStreamWriter (DataSourceStreamWriter ):
208- """Stream writer implementation for Salesforce sink integration."""
208+ """Stream writer implementation for Salesforce datasource integration."""
209209
210210 def __init__ (self , schema : StructType , options : Dict [str , str ]):
211211 self .schema = schema
0 commit comments