Skip to content
Permalink

Comparing changes

This is a direct comparison between two commits made in this repository or its related repositories. View the default comparison for this range or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: awslabs/aws-athena-query-federation
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: a4c3c227b857bd391d97ff63bc9e35d830b76e3f
Choose a base ref
..
head repository: awslabs/aws-athena-query-federation
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: c0d9c8e06eb36511ec5d58361d9d267b5c8ce75b
Choose a head ref
Showing with 3,598 additions and 34 deletions.
  1. +1 −1 .github/workflows/maven_push.yml
  2. +1 −1 .github/workflows/publish_to_maven_central.yml
  3. +4 −0 .github/workflows/run_release_tests.yml
  4. +81 −0 athena-aws-cmdb/athena-aws-cmdb-package.yaml
  5. +160 −0 athena-clickhouse/athena-clickhouse-package.yaml
  6. +109 −0 athena-cloudera-hive/athena-cloudera-hive-package.yaml
  7. +115 −0 athena-cloudera-impala/athena-cloudera-impala-package.yaml
  8. +75 −0 athena-cloudwatch-metrics/athena-cloudwatch-metrics-package.yaml
  9. +153 −0 athena-cloudwatch/athena-cloudwatch-package.yaml
  10. +115 −0 athena-datalakegen2/athena-datalakegen2-package.yaml
  11. +116 −0 athena-db2-as400/athena-db2-as400-package.yaml
  12. +116 −0 athena-db2/athena-db2-package.yaml
  13. +105 −0 athena-docdb/athena-docdb-package.yaml
  14. +152 −0 athena-elasticsearch/athena-elasticsearch-package.yaml
  15. +8 −11 ...ion-sdk/src/main/java/com/amazonaws/athena/connector/lambda/connection/EnvironmentProperties.java
  16. +129 −0 athena-hbase/athena-hbase-package.yaml
  17. +113 −0 athena-hortonworks-hive/athena-hortonworks-hive-package.yaml
  18. +177 −0 athena-kafka/athena-kafka-package.yaml
  19. +12 −0 athena-kafka/src/main/java/com/amazonaws/athena/connectors/kafka/KafkaRecordHandler.java
  20. +17 −11 athena-kafka/src/test/java/com/amazonaws/athena/connectors/kafka/KafkaRecordHandlerTest.java
  21. +173 −0 athena-msk/athena-msk-package.yaml
  22. +4 −0 athena-msk/src/main/java/com/amazonaws/athena/connectors/msk/consumer/BaseMskConsumer.java
  23. +16 −10 athena-msk/src/test/java/com/amazonaws/athena/connectors/msk/AmazonMskRecordHandlerTest.java
  24. +153 −0 athena-mysql/athena-mysql-package.yaml
  25. +134 −0 athena-neptune/athena-neptune-package.yaml
  26. +158 −0 athena-oracle/athena-oracle-package.yaml
  27. +165 −0 athena-postgresql/athena-postgresql-package.yaml
  28. +120 −0 athena-redis/athena-redis-package.yaml
  29. +167 −0 athena-redshift/athena-redshift-package.yaml
  30. +113 −0 athena-saphana/athena-saphana-package.yaml
  31. +158 −0 athena-sqlserver/athena-sqlserver-package.yaml
  32. +147 −0 athena-synapse/athena-synapse-package.yaml
  33. +118 −0 athena-teradata/athena-teradata-package.yaml
  34. +83 −0 athena-timestream/athena-timestream-package.yaml
  35. +73 −0 athena-tpcds/athena-tpcds-package.yaml
  36. +55 −0 athena-udfs/athena-udfs-package.yaml
  37. +2 −0 pom.xml
2 changes: 1 addition & 1 deletion .github/workflows/maven_push.yml
Original file line number Diff line number Diff line change
@@ -32,7 +32,7 @@ jobs:
env:
AWS_DEFAULT_REGION: us-east-1
AWS_REGION: us-east-1
run: mvn -B clean package -T 1C --file pom.xml -Dmaven.compiler.release=8 -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN --no-transfer-progress
run: mvn -B clean package -T 1C --file pom.xml -Dmaven.compiler.release=11 -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN --no-transfer-progress
# Identify if any files were modified as a result of running maven build.
- name: Identify any Maven Build changes
run: >
2 changes: 1 addition & 1 deletion .github/workflows/publish_to_maven_central.yml
Original file line number Diff line number Diff line change
@@ -26,7 +26,7 @@ jobs:
gpg-passphrase: MAVEN_GPG_PASSPHRASE

- name: Publish to Apache Maven Central
run: mvn clean deploy --batch-mode -Dmaven.compiler.release=8 -am -P release -pl ".,athena-federation-sdk,athena-dynamodb,athena-cloudwatch,athena-cloudwatch-metrics,athena-aws-cmdb,athena-jdbc,athena-mysql"
run: mvn clean deploy --batch-mode -Dmaven.compiler.release=11 -am -P release -pl ".,athena-federation-sdk,athena-dynamodb,athena-cloudwatch,athena-cloudwatch-metrics,athena-aws-cmdb,athena-jdbc,athena-mysql"
env:
MAVEN_USERNAME: ${{ secrets.MAVEN_USERNAME }}
MAVEN_PASSWORD: ${{ secrets.MAVEN_PASSWORD }}
4 changes: 4 additions & 0 deletions .github/workflows/run_release_tests.yml
Original file line number Diff line number Diff line change
@@ -51,3 +51,7 @@ jobs:
DATABASE_PASSWORD: ${{ secrets.DATABASE_PASSWORD }}
S3_DATA_PATH: ${{ secrets.S3_DATA_PATH }}
SPILL_BUCKET: ${{ secrets.SPILL_BUCKET }}
- name: Upload coverage reports to Codecov
uses: codecov/codecov-action@v5
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
81 changes: 81 additions & 0 deletions athena-aws-cmdb/athena-aws-cmdb-package.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
Transform: 'AWS::Serverless-2016-10-31'
Metadata:
'AWS::ServerlessRepo::Application':
Name: AthenaAwsCmdbConnector
Description: 'This connector enables Amazon Athena to communicate with various AWS Services, making your resource inventories accessible via SQL.'
Author: 'default author'
SpdxLicenseId: Apache-2.0
LicenseUrl: LICENSE.txt
ReadmeUrl: README.md
Labels:
- athena-federation
HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation'
SemanticVersion: 2022.47.1
SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation'
Parameters:
AthenaCatalogName:
Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$'
Type: String
AllowedPattern: ^[a-z0-9-_]{1,64}$
SpillBucket:
Description: 'The name of the bucket where this function can spill data.'
Type: String
SpillPrefix:
Description: 'The prefix within SpillBucket where this function can spill data.'
Type: String
Default: athena-spill
LambdaTimeout:
Description: 'Maximum Lambda invocation runtime in seconds. (min 1 - 900 max)'
Default: 900
Type: Number
LambdaMemory:
Description: 'Lambda memory in MB (min 128 - 3008 max).'
Default: 3008
Type: Number
DisableSpillEncryption:
Description: "WARNING: If set to 'true' encryption for spilled data is disabled."
Default: 'false'
Type: String
PermissionsBoundaryARN:
Description: "(Optional) An IAM policy ARN to use as the PermissionsBoundary for the created Lambda function's execution role"
Default: ''
Type: String
Conditions:
HasPermissionsBoundary: !Not [ !Equals [ !Ref PermissionsBoundaryARN, "" ] ]
Resources:
ConnectorConfig:
Type: 'AWS::Serverless::Function'
Properties:
Environment:
Variables:
disable_spill_encryption: !Ref DisableSpillEncryption
spill_bucket: !Ref SpillBucket
spill_prefix: !Ref SpillPrefix
FunctionName: !Ref AthenaCatalogName
Handler: "com.amazonaws.athena.connectors.aws.cmdb.AwsCmdbCompositeHandler"
CodeUri: "./target/athena-aws-cmdb-2022.47.1.jar"
Description: "Enables Amazon Athena to communicate with various AWS Services, making your resource inventories accessible via SQL."
Runtime: java11
Timeout: !Ref LambdaTimeout
MemorySize: !Ref LambdaMemory
PermissionsBoundary: !If [ HasPermissionsBoundary, !Ref PermissionsBoundaryARN, !Ref "AWS::NoValue" ]
Policies:
- Statement:
- Action:
- autoscaling:Describe*
- elasticloadbalancing:Describe*
- ec2:Describe*
- elasticmapreduce:Describe*
- elasticmapreduce:List*
- rds:Describe*
- rds:ListTagsForResource
- athena:GetQueryExecution
- s3:ListBucket
- athena:GetQueryExecution
Effect: Allow
Resource: '*'
Version: '2012-10-17'
#S3CrudPolicy allows our connector to spill large responses to S3. You can optionally replace this pre-made policy
#with one that is more restrictive and can only 'put' but not read,delete, or overwrite files.
- S3CrudPolicy:
BucketName: !Ref SpillBucket
160 changes: 160 additions & 0 deletions athena-clickhouse/athena-clickhouse-package.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@

Transform: 'AWS::Serverless-2016-10-31'
Metadata:
'AWS::ServerlessRepo::Application':
Name: AthenaClickHouseConnector
Description: 'This connector enables Amazon Athena to communicate with your ClickHouse instance(s) using JDBC driver.'
Author: 'default author'
SpdxLicenseId: Apache-2.0
LicenseUrl: LICENSE.txt
ReadmeUrl: README.md
Labels:
- athena-federation
HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation'
SemanticVersion: 2022.47.1
SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation'
Parameters:
LambdaFunctionName:
Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$'
Type: String
AllowedPattern: ^[a-z0-9-_]{1,64}$
DefaultConnectionString:
Description: 'The default connection string is used when catalog is "lambda:${LambdaFunctionName}". Catalog specific Connection Strings can be added later. Format: ${DatabaseType}://${NativeJdbcConnectionString}.'
Type: String
SecretNamePrefix:
Description: 'Used to create resource-based authorization policy for "secretsmanager:GetSecretValue" action. E.g. All Athena Clickhouse Federation secret names can be prefixed with "AthenaClickHouseFederation" and authorization policy will allow "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:AthenaJdbcFederation*". Parameter value in this case should be "AthenaClickhouseFederation". If you do not have a prefix, you can manually update the IAM policy to add allow any secret names.'
Type: String
SpillBucket:
Description: 'The name of the bucket where this function can spill data.'
Type: String
SpillPrefix:
Description: 'The prefix within SpillBucket where this function can spill data.'
Type: String
Default: athena-spill
LambdaTimeout:
Description: 'Maximum Lambda invocation runtime in seconds. (min 1 - 900 max)'
Default: 900
Type: Number
LambdaMemory:
Description: 'Lambda memory in MB (min 128 - 3008 max).'
Default: 3008
Type: Number
LambdaRoleARN:
Description: "(Optional) A custom role to be used by the Connector lambda"
Type: String
Default: ""
DisableSpillEncryption:
Description: 'If set to ''false'' data spilled to S3 is encrypted with AES GCM'
Default: 'false'
Type: String
SecurityGroupIds:
Description: 'One or more SecurityGroup IDs corresponding to the SecurityGroup that should be applied to the Lambda function. (e.g. sg1,sg2,sg3)'
Type: 'List<AWS::EC2::SecurityGroup::Id>'
SubnetIds:
Description: 'One or more Subnet IDs corresponding to the Subnet that the Lambda function can use to access you data source. (e.g. subnet1,subnet2)'
Type: 'List<AWS::EC2::Subnet::Id>'
PermissionsBoundaryARN:
Description: "(Optional) An IAM policy ARN to use as the PermissionsBoundary for the created Lambda function's execution role"
Default: ''
Type: String
Conditions:
HasPermissionsBoundary: !Not [ !Equals [ !Ref PermissionsBoundaryARN, "" ] ]
NotHasLambdaRole: !Equals [!Ref LambdaRoleARN, ""]
Resources:
JdbcConnectorConfig:
Type: 'AWS::Serverless::Function'
Properties:
Environment:
Variables:
disable_spill_encryption: !Ref DisableSpillEncryption
spill_bucket: !Ref SpillBucket
spill_prefix: !Ref SpillPrefix
default: !Ref DefaultConnectionString
FunctionName: !Ref LambdaFunctionName
Handler: "com.amazonaws.athena.connectors.clickhouse.ClickHouseMuxCompositeHandler"
CodeUri: "./target/athena-clickhouse-2022.47.1.jar"
Description: "Enables Amazon Athena to communicate with ClickHouse using JDBC"
Runtime: java11
Timeout: !Ref LambdaTimeout
MemorySize: !Ref LambdaMemory
PermissionsBoundary: !If [ HasPermissionsBoundary, !Ref PermissionsBoundaryARN, !Ref "AWS::NoValue" ]
Role: !If [NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleARN]
VpcConfig:
SecurityGroupIds: !Ref SecurityGroupIds
SubnetIds: !Ref SubnetIds
FunctionRole:
Condition: NotHasLambdaRole
Type: AWS::IAM::Role
Properties:
ManagedPolicyArns:
- !Sub "arn:${AWS::Partition}:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole"
AssumeRolePolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Principal:
Service:
- lambda.amazonaws.com
Action:
- "sts:AssumeRole"
FunctionExecutionPolicy:
Condition: NotHasLambdaRole
Type: "AWS::IAM::Policy"
Properties:
Roles:
- !Ref FunctionRole
PolicyName: FunctionExecutionPolicy
PolicyDocument:
Version: 2012-10-17
Statement:
- Action:
- secretsmanager:DescribeSecret
- secretsmanager:GetSecretValue
- secretsmanager:GetResourcePolicy
- secretsmanager:ListSecretVersionIds
Effect: Allow
Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:*'
- Action:
- secretsmanager:ListSecrets
Effect: Allow
Resource: '*'
- Action:
- logs:CreateLogGroup
Effect: Allow
Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:*'
- Action:
- logs:CreateLogStream
- logs:PutLogEvents
Effect: Allow
Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*'
- Action:
- athena:GetQueryExecution
Effect: Allow
Resource: '*'
- Action:
- ec2:CreateNetworkInterface
- ec2:DeleteNetworkInterface
- ec2:DescribeNetworkInterfaces
- ec2:DetachNetworkInterface
Effect: Allow
Resource: '*'
- Action:
- s3:GetObject
- s3:ListBucket
- s3:GetBucketLocation
- s3:GetObjectVersion
- s3:PutObject
- s3:PutObjectAcl
- s3:GetLifecycleConfiguration
- s3:PutLifecycleConfiguration
- s3:DeleteObject
Effect: Allow
Resource:
- Fn::Sub:
- arn:${AWS::Partition}:s3:::${bucketName}
- bucketName:
Ref: SpillBucket
- Fn::Sub:
- arn:${AWS::Partition}:s3:::${bucketName}/*
- bucketName:
Ref: SpillBucket
109 changes: 109 additions & 0 deletions athena-cloudera-hive/athena-cloudera-hive-package.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
Transform: 'AWS::Serverless-2016-10-31'
Metadata:
'AWS::ServerlessRepo::Application':
Name: AthenaClouderaHiveConnector
Description: 'This connector enables Amazon Athena to communicate with your Cloudera Hive instance(s) using JDBC driver.'
Author: 'default author'
SpdxLicenseId: Apache-2.0
LicenseUrl: LICENSE.txt
ReadmeUrl: README.md
Labels:
- athena-federation
HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation'
SemanticVersion: 2022.47.1
SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation'
Parameters:
LambdaFunctionName:
Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$'
Type: String
AllowedPattern: ^[a-z0-9-_]{1,64}$
DefaultConnectionString:
Description: 'The default connection string is used when catalog is "lambda:${LambdaFunctionName}". Catalog specific Connection Strings can be added later. Format: ${DatabaseType}://${NativeJdbcConnectionString}.'
Type: String
SecretNamePrefix:
Description: 'Used to create resource-based authorization policy for "secretsmanager:GetSecretValue" action. E.g. All Athena JDBC Federation secret names can be prefixed with "AthenaJdbcFederation" and authorization policy will allow "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:AthenaJdbcFederation*". Parameter value in this case should be "AthenaJdbcFederation". If you do not have a prefix, you can manually update the IAM policy to add allow any secret names.'
Type: String
SpillBucket:
Description: 'The name of the bucket where this function can spill data.'
Type: String
SpillPrefix:
Description: 'The prefix within SpillBucket where this function can spill data.'
Type: String
Default: athena-spill
LambdaTimeout:
Description: 'Maximum Lambda invocation runtime in seconds. (min 1 - 900 max)'
Default: 900
Type: Number
LambdaMemory:
Description: 'Lambda memory in MB (min 128 - 3008 max).'
Default: 3008
Type: Number
DisableSpillEncryption:
Description: 'If set to ''false'' data spilled to S3 is encrypted with AES GCM'
Default: 'false'
Type: String
SecurityGroupIds:
Description: 'One or more SecurityGroup IDs corresponding to the SecurityGroup that should be applied to the Lambda function. (e.g. sg1,sg2,sg3)'
Type: 'List<AWS::EC2::SecurityGroup::Id>'
SubnetIds:
Description: 'One or more Subnet IDs corresponding to the Subnet that the Lambda function can use to access you data source. (e.g. subnet1,subnet2)'
Type: 'List<AWS::EC2::Subnet::Id>'
PermissionsBoundaryARN:
Description: "(Optional) An IAM policy ARN to use as the PermissionsBoundary for the created Lambda function's execution role"
Default: ''
Type: String
Conditions:
HasPermissionsBoundary: !Not [ !Equals [ !Ref PermissionsBoundaryARN, "" ] ]
Resources:
JdbcConnectorConfig:
Type: 'AWS::Serverless::Function'
Properties:
Environment:
Variables:
disable_spill_encryption: !Ref DisableSpillEncryption
spill_bucket: !Ref SpillBucket
spill_prefix: !Ref SpillPrefix
default: !Ref DefaultConnectionString
FunctionName: !Ref LambdaFunctionName
Handler: "com.amazonaws.athena.connectors.cloudera.HiveMuxCompositeHandler"
CodeUri: "./target/athena-cloudera-hive-2022.47.1.jar"
Description: "Enables Amazon Athena to communicate with Coludera Hive using JDBC"
Runtime: java11
Timeout: !Ref LambdaTimeout
MemorySize: !Ref LambdaMemory
PermissionsBoundary: !If [ HasPermissionsBoundary, !Ref PermissionsBoundaryARN, !Ref "AWS::NoValue" ]
Policies:
- Statement:
- Action:
- secretsmanager:GetSecretValue
Effect: Allow
Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${SecretNamePrefix}*'
Version: '2012-10-17'
- Statement:
- Action:
- logs:CreateLogGroup
Effect: Allow
Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:*'
Version: '2012-10-17'
- Statement:
- Action:
- logs:CreateLogStream
- logs:PutLogEvents
Effect: Allow
Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*'
Version: '2012-10-17'
- Statement:
- Action:
- athena:GetQueryExecution
Effect: Allow
Resource: '*'
Version: '2012-10-17'
#S3CrudPolicy allows our connector to spill large responses to S3. You can optionally replace this pre-made policy
#with one that is more restrictive and can only 'put' but not read,delete, or overwrite files.
- S3CrudPolicy:
BucketName: !Ref SpillBucket
#VPCAccessPolicy allows our connector to run in a VPC so that it can access your data source.
- VPCAccessPolicy: {}
VpcConfig:
SecurityGroupIds: !Ref SecurityGroupIds
SubnetIds: !Ref SubnetIds
Loading