- import sys
- from awsglue.transforms import *
- from awsglue.utils import getResolvedOptions
- from pyspark.context import SparkContext
- from awsglue.context import GlueContext
- from pyspark.sql import SQLContext
- from pyspark.sql.functions import col
- # Initialize Spark and Glue contexts
- sc = SparkContext()
- glueContext = GlueContext(sc)
- spark = glueContext.spark_session
- sqlContext = SQLContext(sc)
- # Parameters
- args = getResolvedOptions(sys.argv, ['JOB_NAME', 's3_input_path', 'redshift_jdbc_url', 'redshift_table'])
- # Read data from S3
- data_frame = spark.read.csv(args['s3_input_path'], header=True, inferSchema=True)
- # Perform data cleaning
- cleaned_data_frame = data_frame.dropDuplicates().fillna(0) # Example: Remove duplicates and fill missing values with 0
- # Write cleaned data to Redshift
- cleaned_data_frame.write \
- .format("jdbc") \
- .option("url", args['redshift_jdbc_url']) \
- .option("dbtable", args['redshift_table']) \
- .option("user", "your_redshift_user") \
- .option("password", "your_redshift_password") \
- .option("driver", "com.amazon.redshift.jdbc42.Driver") \
- .mode("append") \
- .save()
- # Stop Spark context
- sc.stop()
Python software and documentation are licensed under the PSF License Agreement.
Starting with Python 3.8.6, examples, recipes, and other code in the documentation are dual licensed under the PSF License Agreement and the Zero-Clause BSD license.
Some software incorporated into Python is under different licenses. The licenses are listed with code falling under that license. See Licenses and Acknowledgements for Incorporated Software for an incomplete list of these licenses.
Python and it's documentation is:
Copyright © 2001-2022 Python Software Foundation. All rights reserved.
Copyright © 2000 BeOpen.com. All rights reserved.
Copyright © 1995-2000 Corporation for National Research Initiatives. All rights reserved.
Copyright © 1991-1995 Stichting Mathematisch Centrum. All rights reserved.
See History and License for complete license and permissions information:
https://docs.python.org/3/license.html#psf-license