(2) 中的运维技术->ansible
(3) 中文档
(4) ansible playbooks官方指南
- - hosts: clickhouse_cluster_setup_beijing #hosts 定义单个主机或组
- remote_user: root #以root账户执行
- tasks:
- - name: copy jdk remote hosts
- copy: src=/root/usr/jdk-8u201-linux-x64.tar.gz dest=/usr/local/ backup=yes
- - name: tar jdk
- shell: chdir=/usr/local/ tar -xzvf jdk-8u201-linux-x64.tar.gz
- - name: create links
- file: src=/usr/local/jdk1..0_201 dest=/usr/local/java state=link
- - name: java_profile config
- shell: /bin/echo {{ item }} >> /etc/profile
- with_items:
- - export JAVA_HOME=/usr/local/java
- - export JRE_HOME=/usr/local/java/jre
- - export CLASSPATH=.:\$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar:\$JRE_HOME/lib:\$CLASSPATH
- - export PATH=\$JAVA_HOME/bin:\$PATH
- - name: take effect
- shell: source /etc/profile
- clickhouse_cluster_setup_beijing对应的有五台机器:
- <?xml version="1.0"?>
- <!--
- NOTE: User and query level settings are set up in "users.xml" file.
- -->
- <yandex>
- <logger>
- <!-- Possible levels: -->
- <level>trace</level>
- <log>/data/clickhouse/logs/server.log</log>
- <errorlog>/data/clickhouse/logs/error.log</errorlog>
- <size>1000M</size>
- <count></count>
- <!-- <console></console> --> <!-- Default behavior is autodetection (log to console if not daemon mode and is tty) -->
- </logger>
- <!--display_name>production</display_name--> <!-- It is the name that will be shown in the client -->
- <http_port></http_port>
- <tcp_port></tcp_port>
- <!-- For HTTPS and SSL over native protocol. -->
- <!--
- <https_port></https_port>
- <tcp_port_secure></tcp_port_secure>
- -->
- <!-- Used with https_port and tcp_port_secure. Full ssl options list: -->
- <openSSL>
- <server> <!-- Used for https server AND secure tcp port -->
- <!-- openssl req -subj "/CN=localhost" -new -newkey rsa: -days -nodes -x509 -keyout /etc/clickhouse-server/server.key -out /etc/clickhouse-server/server.crt -->
- <certificateFile>/etc/clickhouse-server/server.crt</certificateFile>
- <privateKeyFile>/etc/clickhouse-server/server.key</privateKeyFile>
- <!-- openssl dhparam -out /etc/clickhouse-server/dhparam.pem -->
- <dhParamsFile>/etc/clickhouse-server/dhparam.pem</dhParamsFile>
- <verificationMode>none</verificationMode>
- <loadDefaultCAFile>true</loadDefaultCAFile>
- <cacheSessions>true</cacheSessions>
- <disableProtocols>sslv2,sslv3</disableProtocols>
- <preferServerCiphers>true</preferServerCiphers>
- </server>
- <client> <!-- Used for connecting to https dictionary source -->
- <loadDefaultCAFile>true</loadDefaultCAFile>
- <cacheSessions>true</cacheSessions>
- <disableProtocols>sslv2,sslv3</disableProtocols>
- <preferServerCiphers>true</preferServerCiphers>
- <!-- Use for self-signed: <verificationMode>none</verificationMode> -->
- <invalidCertificateHandler>
- <!-- Use for self-signed: <name>AcceptCertificateHandler</name> -->
- <name>RejectCertificateHandler</name>
- </invalidCertificateHandler>
- </client>
- </openSSL>
- <!-- Default root page on http[s] server. For example load UI from when opening http://localhost:8123 -->
- <!--
- <http_server_default_response><![CDATA[<html ng-app="SMI2"><head><base href=""></head><body><div ui-view="" class="content-ui"></div><script src=""></script></body></html>]]></http_server_default_response>
- -->
- <!-- Port for communication between replicas. Used for data exchange. -->
- <interserver_http_port></interserver_http_port>
- <!-- Hostname that is used by other replicas to request this server.
- If not specified, than it is determined analoguous to 'hostname -f' command.
- This setting could be used to switch replication to another network interface.
- -->
- <!--
- <interserver_http_host></interserver_http_host>
- -->
- <!-- Listen specified host. use :: (wildcard IPv6 address), if you want to accept connections both with IPv4 and IPv6 from everywhere. -->
- <!-- <listen_host>::</listen_host> -->
- <!-- Same for hosts with disabled ipv6: -->
- <listen_host></listen_host>
- <!-- Default values - try listen localhost on ipv4 and ipv6: -->
- <!--
- <listen_host>::</listen_host>
- <listen_host></listen_host>
- -->
- <!-- Don't exit if ipv6 or ipv4 unavailable, but listen_host with this protocol specified -->
- <!-- <listen_try></listen_try> -->
- <!-- Allow listen on same address:port -->
- <!-- <listen_reuse_port></listen_reuse_port> -->
- <!-- <listen_backlog></listen_backlog> -->
- <max_connections></max_connections>
- <keep_alive_timeout></keep_alive_timeout>
- <!-- Maximum number of concurrent queries. -->
- <max_concurrent_queries></max_concurrent_queries>
- <!-- Set limit on number of open files (default: maximum). This setting makes sense on Mac OS X because getrlimit() fails to retrieve
- correct maximum value. -->
- <!-- <max_open_files></max_open_files> -->
- <!-- Size of cache of uncompressed blocks of data, used in tables of MergeTree family.
- In bytes. Cache is single for server. Memory is allocated only on demand.
- Cache is used when 'use_uncompressed_cache' user setting turned on (off by default).
- Uncompressed cache is advantageous only for very short queries and in rare cases.
- -->
- <uncompressed_cache_size></uncompressed_cache_size>
- <!-- Approximate size of mark cache, used in tables of MergeTree family.
- In bytes. Cache is single for server. Memory is allocated only on demand.
- You should not lower this value.
- -->
- <mark_cache_size></mark_cache_size>
- <!-- Path to data directory, with trailing slash. -->
- <path>/data/clickhouse/</path>
- <!-- Path to temporary data for processing hard queries. -->
- <tmp_path>/data/clickhouse/tmp/</tmp_path>
- <!-- Directory with user provided files that are accessible by 'file' table function. -->
- <user_files_path>/data/clickhouse/user_files/</user_files_path>
- <!-- Path to configuration file with users, access rights, profiles of settings, quotas. -->
- <users_config>users.xml</users_config>
- <!-- Default profile of settings. -->
- <default_profile>default</default_profile>
- <!-- System profile of settings. This settings are used by internal processes (Buffer storage, Distibuted DDL worker and so on). -->
- <!-- <system_profile>default</system_profile> -->
- <!-- Default database. -->
- <default_database>default</default_database>
- <!-- Server time zone could be set here.
- Time zone is used when converting between String and DateTime types,
- when printing DateTime in text formats and parsing DateTime from text,
- it is used in date and time related functions, if specific time zone was not passed as an argument.
- Time zone is specified as identifier from IANA time zone database, like UTC or Africa/Abidjan.
- If not specified, system time zone at server startup is used.
- Please note, that server could display time zone alias instead of specified name.
- Example: W-SU is an alias for Europe/Moscow and Zulu is an alias for UTC.
- -->
- <!-- <timezone>Europe/Moscow</timezone> -->
- <!-- You can specify umask here (see "man umask"). Server will apply it on startup.
- Number is always parsed as octal. Default umask is (other users cannot read logs, data files, etc; group can only read).
- -->
- <!-- <umask></umask> -->
- <!-- Perform mlockall after startup to lower first queries latency
- and to prevent clickhouse executable from being paged out under high IO load.
- Enabling this option is recommended but will lead to increased startup time for up to a few seconds.
- -->
- <mlock_executable>false</mlock_executable>
- <!-- Configuration of clusters that could be used in Distributed tables.
- -->
- <remote_servers incl="clickhouse_remote_servers" />
- <!-- If element has 'incl' attribute, then for it's value will be used corresponding substitution from another file.
- By default, path to file with substitutions is /etc/metrika.xml. It could be changed in config in 'include_from' element.
- Values for substitutions are specified in /yandex/name_of_substitution elements in that file.
- -->
- <include_from>/etc/clickhouse-server/metrika.xml</include_from>
- <!-- ZooKeeper is used to store metadata about replicas, when using Replicated tables.
- Optional. If you don't use replicated tables, you could omit that.
- See
- -->
- <zookeeper incl="zookeeper-servers" optional="true" />
- <!-- Substitutions for parameters of replicated tables.
- Optional. If you don't use replicated tables, you could omit that.
- See
- -->
- <macros incl="macros" optional="true" />
- <!-- Reloading interval for embedded dictionaries, in seconds. Default: . -->
- <builtin_dictionaries_reload_interval></builtin_dictionaries_reload_interval>
- <!-- Maximum session timeout, in seconds. Default: . -->
- <max_session_timeout></max_session_timeout>
- <!-- Default session timeout, in seconds. Default: . -->
- <default_session_timeout></default_session_timeout>
- <!-- Sending data to Graphite for monitoring. Several sections can be defined. -->
- <!--
- interval - send every X second
- root_path - prefix for keys
- hostname_in_path - append hostname to root_path (default = true)
- metrics - send data from table system.metrics
- events - send data from table
- asynchronous_metrics - send data from table system.asynchronous_metrics
- -->
- <!--
- <graphite>
- <host>localhost</host>
- <port></port>
- <timeout>0.1</timeout>
- <interval></interval>
- <root_path>one_min</root_path>
- <hostname_in_path>true</hostname_in_path>
- <metrics>true</metrics>
- <events>true</events>
- <asynchronous_metrics>true</asynchronous_metrics>
- </graphite>
- <graphite>
- <host>localhost</host>
- <port></port>
- <timeout>0.1</timeout>
- <interval></interval>
- <root_path>one_sec</root_path>
- <metrics>true</metrics>
- <events>true</events>
- <asynchronous_metrics>false</asynchronous_metrics>
- </graphite>
- -->
- <!-- Query log. Used only for queries with setting log_queries = . -->
- <query_log>
- <!-- What table to insert data. If table is not exist, it will be created.
- When query log structure is changed after system update,
- then old table will be renamed and new table will be created automatically.
- -->
- <database>system</database>
- <table>query_log</table>
- <!--
- Example:
- event_date
- toMonday(event_date)
- toYYYYMM(event_date)
- toStartOfHour(event_time)
- -->
- <partition_by>toYYYYMM(event_date)</partition_by>
- <!-- Interval of flushing data. -->
- <flush_interval_milliseconds></flush_interval_milliseconds>
- </query_log>
- <!-- Query thread log. Has information about all threads participated in query execution.
- Used only for queries with setting log_query_threads = . -->
- <query_thread_log>
- <database>system</database>
- <table>query_thread_log</table>
- <partition_by>toYYYYMM(event_date)</partition_by>
- <flush_interval_milliseconds></flush_interval_milliseconds>
- </query_thread_log>
- <!-- Uncomment if use part log.
- Part log contains information about all actions with parts in MergeTree tables (creation, deletion, merges, downloads).
- <part_log>
- <database>system</database>
- <table>part_log</table>
- <flush_interval_milliseconds></flush_interval_milliseconds>
- </part_log>
- -->
- <!-- Parameters for embedded dictionaries, used in Yandex.Metrica.
- See
- -->
- <!-- Path to file with region hierarchy. -->
- <!-- <path_to_regions_hierarchy_file>/opt/geo/regions_hierarchy.txt</path_to_regions_hierarchy_file> -->
- <!-- Path to directory with files containing names of regions -->
- <!-- <path_to_regions_names_files>/opt/geo/</path_to_regions_names_files> -->
- <!-- Configuration of external dictionaries. See:
- -->
- <dictionaries_config>*_dictionary.xml</dictionaries_config>
- <!-- Uncomment if you want data to be compressed -% better.
- Don't do that if you just started using ClickHouse.
- -->
- <compression incl="clickhouse_compression">
- <!--
- <!- - Set of variants. Checked in order. Last matching case wins. If nothing matches, lz4 will be used. - ->
- <case>
- <!- - Conditions. All must be satisfied. Some conditions may be omitted. - ->
- <min_part_size></min_part_size> <!- - Min part size in bytes. - ->
- <min_part_size_ratio>0.01</min_part_size_ratio> <!- - Min size of part relative to whole table size. - ->
- <!- - What compression method to use. - ->
- <method>zstd</method>
- </case>
- -->
- </compression>
- <!-- Allow to execute distributed DDL queries (CREATE, DROP, ALTER, RENAME) on cluster.
- Works only if ZooKeeper is enabled. Comment it if such functionality isn't required. -->
- <distributed_ddl>
- <!-- Path in ZooKeeper to queue with DDL queries -->
- <path>/clickhouse/task_queue/ddl</path>
- <!-- Settings from this profile will be used to execute DDL queries -->
- <!-- <profile>default</profile> -->
- </distributed_ddl>
- <!-- Settings to fine tune MergeTree tables. See documentation in source code, in MergeTreeSettings.h -->
- <!--
- <merge_tree>
- <max_suspicious_broken_parts></max_suspicious_broken_parts>
- </merge_tree>
- -->
- <!-- Protection from accidental DROP.
- If size of a MergeTree table is greater than max_table_size_to_drop (in bytes) than table could not be dropped with any DROP query.
- If you want do delete one table and don't want to restart clickhouse-server, you could create special file <clickhouse-path>/flags/force_drop_table and make DROP once.
- By default max_table_size_to_drop is 50GB; max_table_size_to_drop= allows to DROP any tables.
- The same for max_partition_size_to_drop.
- Uncomment to disable protection.
- -->
- <!-- <max_table_size_to_drop></max_table_size_to_drop> -->
- <!-- <max_partition_size_to_drop></max_partition_size_to_drop> -->
- <!-- Example of parameters for GraphiteMergeTree table engine -->
- <graphite_rollup_example>
- <pattern>
- <regexp>click_cost</regexp>
- <function>any</function>
- <retention>
- <age></age>
- <precision></precision>
- </retention>
- <retention>
- <age></age>
- <precision></precision>
- </retention>
- </pattern>
- <default>
- <function>max</function>
- <retention>
- <age></age>
- <precision></precision>
- </retention>
- <retention>
- <age></age>
- <precision></precision>
- </retention>
- <retention>
- <age></age>
- <precision></precision>
- </retention>
- </default>
- </graphite_rollup_example>
- <!-- Directory in <clickhouse-path> containing schema files for various input formats.
- The directory will be created if it doesn't exist.
- -->
- <format_schema_path>/data/clickhouse/format_schemas/</format_schema_path>
- <!-- Uncomment to disable ClickHouse internal DNS caching. -->
- <!-- <disable_internal_dns_cache></disable_internal_dns_cache> -->
- </yandex>
- <?xml version="1.0"?>
- <yandex>
- <!-- Profiles of settings. -->
- <profiles>
- <!-- Default settings. -->
- <default>
- <!-- Maximum memory usage for processing single query, in bytes. -->
- <max_memory_usage></max_memory_usage>
- <!-- Use cache of uncompressed blocks of data. Meaningful only for processing many of very short queries. -->
- <use_uncompressed_cache></use_uncompressed_cache>
- <!-- How to choose between replicas during distributed query processing.
- random - choose random replica from set of replicas with minimum number of errors
- nearest_hostname - from set of replicas with minimum number of errors, choose replica
- with minimum number of different symbols between replica's hostname and local hostname
- (Hamming distance).
- in_order - first live replica is chosen in specified order.
- -->
- <load_balancing>random</load_balancing>
- <!-- log values for select queries -->
- <log_queries></log_queries>
- </default>
- <!-- Profile that allows only read queries. -->
- <readonly>
- <max_memory_usage></max_memory_usage>
- <use_uncompressed_cache></use_uncompressed_cache>
- <load_balancing>random</load_balancing>
- <readonly></readonly>
- </readonly>
- </profiles>
- <!-- Users and ACL. -->
- <users>
- <!-- If user name was not specified, 'default' user is used. -->
- <default>
- <!-- Password could be specified in plaintext or in SHA256 (in hex format).
- If you want to specify password in plaintext (not recommended), place it in 'password' element.
- Example: <password>qwerty</password>.
- Password could be empty.
- If you want to specify SHA256, place it in 'password_sha256_hex' element.
- Example: <password_sha256_hex>65e84be33532fb784c48129675f9eff3a682b27168c0ea744b2cf58ee02337c5</password_sha256_hex>
- How to generate decent password:
- Execute: PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | sha256sum | tr -d '-'
- In first line will be password and in second - corresponding SHA256.
- -->
- <password></password>
- <!-- List of networks with open access.
- To open access from everywhere, specify:
- <ip>::/</ip>
- To open access only from localhost, specify:
- <ip>::</ip>
- <ip></ip>
- Each element of list has one of the following forms:
- <ip> IP-address or network mask. Examples: or or
- 2a02:6b8:: or 2a02:6b8::/ or 2a02:6b8::/ffff:ffff:ffff:ffff::.
- <host> Hostname. Example:
- To check access, DNS query is performed, and all received addresses compared to peer address.
- <host_regexp> Regular expression for host names. Example, ^server\d\d-\d\d-\d\.yandex\.ru$
- To check access, DNS PTR query is performed for peer address and then regexp is applied.
- Then, for result of PTR query, another DNS query is performed and all received addresses compared to peer address.
- Strongly recommended that regexp is ends with $
- All results of DNS requests are cached till server restart.
- -->
- <networks incl="networks" replace="replace">
- <ip>::/</ip>
- </networks>
- <!-- Settings profile for user. -->
- <profile>default</profile>
- <!-- Quota for user. -->
- <quota>default</quota>
- </default>
- <!-- Example of user with readonly access. -->
- <readonly>
- <password></password>
- <networks incl="networks" replace="replace">
- <ip>::</ip>
- <ip></ip>
- </networks>
- <profile>readonly</profile>
- <quota>default</quota>
- </readonly>
- </users>
- <!-- Quotas. -->
- <quotas>
- <!-- Name of quota. -->
- <default>
- <!-- Limits for time interval. You could specify many intervals with different limits. -->
- <interval>
- <!-- Length of interval. -->
- <duration></duration>
- <!-- No limits. Just calculate resource usage for time interval. -->
- <queries></queries>
- <errors></errors>
- <result_rows></result_rows>
- <read_rows></read_rows>
- <execution_time></execution_time>
- </interval>
- </default>
- </quotas>
- </yandex>
- <yandex>
- <clickhouse_remote_servers>
- <cluster-shard{{shard_num}}replica{{replica_num}}>
- {% for i in range(,,) %}
- {% if i< %}
- <shard>
- <internal_replication>true</internal_replication>
- <replica>
- <host>{{shard_host_pre}}{{i}}</host>
- <port>{{shard_port}}</port>
- <user>{{shard_user}}</user>
- </replica>
- </shard>
- {% else %}
- <shard>
- <internal_replication>true</internal_replication>
- <replica>
- <host>{{shard_host_pre}}{{i}}</host>
- <port>{{shard_port}}</port>
- <user>{{shard_user}}</user>
- </replica>
- </shard>
- {%endif%}
- {% endfor %}
- </cluster-shard{{shard_num}}replica{{replica_num}}>
- </clickhouse_remote_servers>
- <zookeeper-servers>
- {% for i in range(,,) %}
- {% if i< %}
- <node index="{{i}}">
- <host>{{zk_host}}{{i}}</host>
- <port>{{zk_prot}}</port>
- </node>
- {% else %}
- <node index="{{i}}">
- <host>{{zk_host}}{{i}}</host>
- <port>{{zk_prot}}</port>
- </node>
- {%endif%}
- {% endfor %}
- </zookeeper-servers>
- <macros>
- </macros>
- <clickhouse_compression>
- <case>
- <min_part_size></min_part_size>
- <min_part_size_ratio>0.01</min_part_size_ratio>
- <method>lz4</method>
- </case>
- </clickhouse_compression>
- <networks>
- <ip>::/</ip>
- </networks>
- </yandex>
- #ansible-playbook playbook.yml --list-hosts
- #ansible-playbook /etc/ansible/install_file/clickhouse_install.yml --list-hosts
- #
- - hosts: clickhouse_cluster_setup_beijing #hosts 定义单个主机或组
- remote_user: root #以root账户执行
- vars: #定义变量
- ck_version: #ck rpm module version
- #shard of ck variable parameter
- shard_port:
- shard_user: default
- shard_host_pre: bjg-techcenter-appservice-appservice-push-push-clickhouse-
- shard_num:
- replica_num:
- #zk variable parameter
- zk_prot:
- zk_host: bje-data-platform-zookeeper-
- tasks:
- - name: download and install curl #在所有机器上下载并安装 curl
- shell: yum install -y curl
- - name: Download and execute the clickhouse installation script provided by on the replica, distributed, chproxy machine # 将指定版本的 clickhouse-server, clickhouse-client 安装到 replica 和 distributed 机器上
- shell: curl -s | sudo bash
- - name: Install clickhouse-server, clickhouse-client on replica and distributed machines
- shell: sudo yum install -y clickhouse-server-{{ck_version}} clickhouse-client-{{ck_version}} clickhouse-compressor-{{ck_version}}
- - name: Batch modify startup scripts # 批量修改启动脚本
- shell: sed -i 's/\/var\/log\/clickhouse-server/\/data\/clickhouse\/logs/g' /etc/init.d/clickhouse-server
- - name: write the metrika config file
- template: src=/etc/ansible/install_file/metrika.xml.j2 dest=/etc/clickhouse-server/metrika.xml backup=yes
- - name: write the config config file
- template: src=/etc/ansible/install_file/config.xml.j2 dest=/etc/clickhouse-server/config.xml backup=yes
- - name: write the user config file
- template: src=/etc/ansible/install_file/users.xml.j2 dest=/etc/clickhouse-server/users.xml backup=yes
- - name: Synchronous configuration #将 clickhouse 用户设置为 login 用户
- shell: usermod -s /bin/bash clickhouse
- - name: Synchronous mkdir configuration
- shell: mkdir /data/clickhouse/logs -p
- - name: Synchronous chown configuration #将 clickhouse 放置到 /data/clickhouse/ 下
- shell: chown clickhouse.clickhouse /data/clickhouse/ -R
- - name: service clickhouse-server restart #重新启动服务
- shell: service clickhouse-server restart
- - hosts: delete_ck_host #hosts 定义单个主机或组
- remote_user: root #以root账户执行
- vars: #定义变量
- port:
- tableName:
- - ck_local_qukan_report_cmd_11001
- - ck_local_qukan_report_cmd_under8
- tasks:
- - name: echo date
- command: date -d "2 days ago" +%Y-%m-%d
- register: date_output
- - name: echo partition
- command: clickhouse-client --host {{inventory_hostname}} --port {{port}} --database default --multiquery -q "SELECT DISTINCT formatDateTime(log_timestamp, '%F') AS partition FROM {{item}}"
- loop: "{{tableName}}"
- register: partitions
- - name: execute shell
- shell: clickhouse-client --host {{inventory_hostname}} --port {{port}} --database default --multiquery -q "alter table {{item[0]}} drop partition '{{item[1]}}'"
- when: item[] < "{{date_output.stdout}}"
- with_nested:
- - "{{tableName}}"
- - "{{partitions.results[0].stdout_lines}}"
sh 文件
- #!/bin/bash
- echo " task start---------------------------------------------"
- ansible-playbook /etc/ansible/install_file/task/delete_ck.yml
- echo " task end--------------------------------------------"
- # * * * /etc/ansible/install_file/task/
