Create a project directory and cd into it:
mkdir chameleon
cd chameleon
Create directory for housing pg_chameleon configuration file and create configuration file:
mkdir -p .pg_chameleon/configuration/
touch .pg_chameleon/configuration/default.yml
Populate file with your settings. Reference for settings: https://pgchameleon.org/documents/configuration_file.html
It's key for log_dest to be set to stdout for pg_chameleon to run in the foreground. If this is not set, then pg_chameleon will run as a daemon service and the container will just exit.
# example config file
---
# global settings
pid_dir: '~/.pg_chameleon/pid/'
log_dir: '~/.pg_chameleon/logs/'
log_dest: stdout # <- important to run pg_chameleon in foreground
log_level: info
log_days_keep: 10
rollbar_key: ''
rollbar_env: ''
# type_override allows the user to override the default type conversion
# into a different one.
type_override:
"tinyint(1)":
override_to: boolean
override_tables:
- "*"
# postgres destination connection
pg_conn:
host: "localhost"
port: "5432"
user: "usr_replica"
password: "never_commit_password"
database: "db_replica"
charset: "utf8"
sources:
mysql:
db_conn:
host: "localhost"
port: "3306"
user: "usr_replica"
password: "never_commit_passwords"
charset: 'utf8'
connect_timeout: 10
schema_mappings:
delphis_mediterranea: loxodonta_africana
limit_tables:
- delphis_mediterranea.foo
skip_tables:
- delphis_mediterranea.bar
grant_select_to:
- usr_readonly
lock_timeout: "120s"
my_server_id: 100
replica_batch_size: 10000
replay_max_rows: 10000
batch_retention: '1 day'
copy_max_memory: "300M"
copy_mode: 'file'
out_dir: /tmp
sleep_loop: 1
on_error_replay: continue
on_error_read: continue
auto_maintenance: "disabled"
gtid_enable: false
type: mysql
skip_events:
insert:
- delphis_mediterranea.foo # skips inserts on delphis_mediterranea.foo
delete:
- delphis_mediterranea # skips deletes on schema delphis_mediterranea
update:
keep_existing_schema: No
pgsql:
db_conn:
host: "localhost"
port: "5432"
user: "usr_replica"
password: "never_commit_passwords"
database: "db_replica"
charset: 'utf8'
connect_timeout: 10
schema_mappings:
loxodonta_africana: elephas_maximus
limit_tables:
- loxodonta_africana.foo
skip_tables:
- loxodonta_africana.bar
copy_max_memory: "300M"
grant_select_to:
- usr_readonly
lock_timeout: "10s"
my_server_id: 100
replica_batch_size: 3000
replay_max_rows: 10000
sleep_loop: 5
batch_retention: '1 day'
copy_mode: 'file'
out_dir: /tmp
type: pgsql
Create docker file for creating an instance to run pg_chameleon:
FROM python:3.8
RUN /usr/local/bin/python -m pip install --upgrade pip
WORKDIR /root
RUN pip install pg_chameleon
COPY .pg_chameleon/ ./.pg_chameleon
COPY run-chameleon.sh ./
RUN chmod +x ./run-chameleon.sh
ENTRYPOINT ["./run-chameleon.sh"]
Create shell script file that'll execute for the entrypoint of the docker image:
Populate shell script with:
#!/bin/sh
chameleon init_replica --config default --source REPLACE_WITH_YOUR_SOURCE_NAME --debug && \
chameleon enable_replica --config default --source REPLACE_WITH_YOUR_SOURCE_NAME && \
chameleon start_replica --config default --source REPLACE_WITH_YOUR_SOURCE_NAME
Build the chameleon docker image:
docker build . -t chameleon
Enable binary logging on Aurora MySQL source database (AWS Documentation on Enabling Binary Logging)
To enable binary logging for an Aurora DB cluster, follow these steps:
- Open the Amazon Relational Database Service (Amazon RDS) console.
- In the navigation pane, choose Parameter groups.
Note: If you're using the default Aurora DB cluster parameter group, then create a new DB cluster parameter group. For Type, choose DB Cluster Parameter Group. - Select the DB custom cluster parameter group, choose Parameter group actions, and select Edit.
- Change the value for the binlog_format parameter, for example to ROW, Statement, or MIXED.
- Choose Save changes.
If you created a new DB cluster parameter group in step 2, attach the parameter group to your DB cluster:
- Open the Amazon RDS console.
- In the navigation pane, under Clusters, choose Modify.
- Update the DB Cluster Parameter Group to the new DB cluster parameter group, and then choose Apply immediately.
- Choose Continue, and choose Modify cluster.
Setup users in MySQL source database and Postgresql destination database:
# In MySQL DB:
CREATE USER usr_replica ;
SET PASSWORD FOR usr_replica=PASSWORD('replica');
GRANT ALL ON sakila.* TO 'usr_replica';
GRANT RELOAD ON *.* to 'usr_replica';
GRANT REPLICATION CLIENT ON *.* to 'usr_replica';
GRANT REPLICATION SLAVE ON *.* to 'usr_replica';
FLUSH PRIVILEGES;
# In Postgresql DB:
CREATE USER usr_replica WITH PASSWORD 'replica';
CREATE DATABASE db_replica WITH OWNER usr_replica;
Launch docker image but using bash as the entrypoint. This will open up an interactive bash shell inside of a container using our newly created chameleon image:
docker run -it --entrypoint bash chameleon
These commands should only be performed once. These commands setup pg_chameleon's configuration and meta data database in the destination Postgresql database:
chameleon create_replica_schema --debug && \
chameleon add_source --config default --source mysql --debug && \
exit
Now we're ready to push our docker image to your favorite cloud hosting provider, ie AWS, Google Cloud, or Azure or just run locally on your machine:
And you're done! pg_chameleon will perform an initial data transfer and then start to replicate data over.
If you'd like to see the status of the replication, launch another docker instance of chameleon with bash as the entrypoint and issue the command:
chameleon show_status --source YOUR_SOURCE_NAME
If you have any questions leave a comment below!