https://blog.csdn.net/heizistudio/article/details/8638488

nrpe安装脚本

nagios-plugins-1.4.13.tar.gz
nrpe-2.8.tar.gz
源码包文件提前下载好

[root@dd]more install.sh
#!/bin/bash
tar -xvzf nagios-plugins-1.4.13.tar.gz
cd nagios-plugins-1.4.13
./configure
make
make install
groupadd nagios
useradd -g nagios -s /sbin/nologin nagios
echo !@NA#$gI%o^S | passwd nagios --stdin
cd ..
tar -xvzf nrpe-2.8.tar.gz
cd nrpe-2.8
./configure
make all
make install-plugin
make install-daemon
make install-daemon-config
echo "OK!"
[root@dd]

修改 nrpe文件

vim /usr/local/nagios/etc/nrpe.cfg

#nrpe_user=nagios
nrpe_user=oracle ---以oracle用户运行

allowed_hosts=127.0.0.1,192.169.169.60 --添加监控允许IP

修改check_oracle插件脚本

vim /usr/local/nagios/libexec/check_oracle

添加两行

#! /bin/sh
#
# latigid010@yahoo.com
# 01/06/2000
#
# This Nagios plugin was created to check Oracle status
#
ORACLE_HOME=/opt/oracle/product/10g
PATH=$ORACLE_HOME/bin:$PATH

vim +257 /usr/local/nagios/libexec/check_oracle

257 ON a.tablespace_name=b.tablespace_name WHERE a.tablespace_name=upper('${5}');---方便监控

修改 /usr/local/nagios/etc/nrpe.cfg 文件

。加入以下内容：

#Check Oracle

command[check_oracle_tns]=/usr/local/nagios/libexec/check_oracle –tns orcl

command[check_oracle_db]=/usr/local/nagios/libexec/check_oracle –db orcl

command[check_oracle_login]=/usr/local/nagios/libexec/check_oracle –login orcl c

ommand[check_oracle_cache]=/usr/local/nagios/libexec/check_oracle –cache orcl system lkjsdf 80 90

command[check_oracle_tablespace]=/usr/local/nagios/libexec/check_oracle –tablespace orcl system lkjsdf tab 90 80

具体参数写法请参考 check_oracle –help

二、配置 nrpe 服务
修改 /usr/local/nagios/etc/nrpe.cfg 文件。加入以下内容：
#Check Oracle
command[check_oracle_tns]=/usr/local/nagios/libexec/check_oracle –tns orcl
command[check_oracle_db]=/usr/local/nagios/libexec/check_oracle –db orcl
command[check_oracle_login]=/usr/local/nagios/libexec/check_oracle –login orcl
command[check_oracle_cache]=/usr/local/nagios/libexec/check_oracle –cache orcl system lkjsdf 80 90
command[check_oracle_tablespace]=/usr/local/nagios/libexec/check_oracle –tablespace orcl system lkjsdf tab 90 80
具体参数写法请参考 check_oracle –help。
配置完成后，重启 xinetd 服务。
三、配置 Nagios 服务端
1.安装 nrpe 脚本支持。—参考官方文档。
2.添加 nrpe 命令配置。修改 nagios/etc/objects/command.cfg 文件：
define command {
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
3.在 nagios/etc/objects 添加oracle 主机配置文件。oracle.cfg 。
define host {
use linux-server
host_name oracle
alias Oracle 10g
address 192.168.100.63
}
define service {
use generic-service
host_name oracle
service_description TNS Check
check_command check_nrpe!check_oracle_tns
}
define service {
use generic-service
host_name oracle
service_description DB Check
check_command check_nrpe!check_oracle_db
}
define service {
use generic-service
host_name oracle
service_description Login Check
check_command check_nrpe!check_oracle_login
}
define service {
use generic-service
host_name oracle
service_description Cache Check
check_command check_nrpe!check_oracle_cache
}
define service {
use generic-service
host_name oracle
service_description Tablespace Check
check_command check_nrpe!check_oracle_tablespace
}
效果图：

NRPE 以及 Nagios-Plugin 安装文件见附件包内。NRPE 官方文档也包含在附件包呢。
备注：
图中 Tablespace check 还没有细调，用了错误的 table 名。
通过修改脚本还可以监视临时表空间。

CRITICAL - ORA-28009: connection as SYS should be as SYSDBA or SYSOPER
[root@dd]/usr/local/nagios/libexec/check_oracle --tablespace duxiu sys oracle USERS 90 80-------勿用sys用户

---------------------------------------------------

监控效果如下

个人分类： linux管理 oracle

###参考如下文档做alert 日志监控

http://www.adp-gmbh.ch/ora/admin/scripts/read_alert_log.html

Script to read Oracle's alert log

The following script creates two tables: read_alert and read_alert_disk.

read_alert_disk is an external table and contains the content of the alert log.

read_alert will be empty after this script has been executed. It is used by the update_alert_log script, shown further below.

define alert_length="2000"
 
drop table alert_log;
 
create table alert_log (
  alert_date date,
  alert_text varchar2(&&alert_length)
)
storage (initial 512k next 512K pctincrease 0);
 
create index alert_log_idx on alert_log(alert_date)
storage (initial 512k next 512K pctincrease 0);
 
column db    new_value _DB    noprint;
column bdump new_value _bdump noprint;
 
select instance_name db from v$instance;
 
select value bdump from v$parameter
 where name ='background_dump_dest';
 
drop   directory BDUMP;
create directory BDUMP as '&&_bdump';
 
drop table alert_log_disk;
 
create table alert_log_disk ( text varchar2(&&alert_length) )
organization external (
  type oracle_loader
  default directory BDUMP
      access parameters (
          records delimited by newline nologfile nobadfile
          fields terminated by "&" ltrim
      )
  location('alert_&&_DB..log')
)
reject limit unlimited;

update_alert_log.sql

Now, after the two tables are created, the alert_log table can be filled with the following script. It only loads those records that are greater than the last time it loaded. And it loads the date/time on every line for convienance. It also helps when the alertlogs get rotated. You still keep the history within an Oracle table. Finally, it also strips out all the «crap» that is really not needed to see if you are looking for errors.

update_alert_log.sql

set serveroutput on 
 
declare
 
  isdate         number := 0;
  start_updating number := 0;
  rows_inserted  number := 0;
 
  alert_date     date;
  max_date       date;
 
  alert_text     alert_log_disk.text%type;
 
begin
 
  /* find a starting date */
  select max(alert_date) into max_date from alert_log;
 
  if (max_date is null) then
    max_date := to_date('01-jan-1980', 'dd-mon-yyyy');
  end if;
 
  for r in (
    select substr(text,1,180) text from alert_log_disk
     where text not like '%offlining%'
       and text not like 'ARC_:%'
       and text not like '%LOG_ARCHIVE_DEST_1%'
       and text not like '%Thread 1 advanced to log sequence%'
       and text not like '%Current log#%seq#%mem#%'
       and text not like '%Undo Segment%lined%'
       and text not like '%alter tablespace%back%'
       and text not like '%Log actively being archived by another process%'
       and text not like '%alter database backup controlfile to trace%'
       and text not like '%Created Undo Segment%'
       and text not like '%started with pid%'
       and text not like '%ORA-12012%'
       and text not like '%ORA-06512%'
       and text not like '%ORA-000060:%'
       and text not like '%coalesce%'
       and text not like '%Beginning log switch checkpoint up to RBA%'
       and text not like '%Completed checkpoint up to RBA%'
       and text not like '%specifies an obsolete parameter%'
       and text not like '%BEGIN BACKUP%'
       and text not like '%END BACKUP%'
  )
  loop
 
    isdate     := 0;
    alert_text := null;
 
    select count(*) into isdate
      from dual
     where substr(r.text, 21) in ('2003','2004','2005','2006','2007')
       and r.text not like '%cycle_run_year%';
 
    if (isdate = 1) then  
 
      select to_date(substr(r.text, 5),'Mon dd hh24:mi:ss rrrr')
        into alert_date
        from dual;
 
      if (alert_date > max_date) then
        start_updating := 1;
      end if;
 
    else
      alert_text := r.text;
    end if;
 
    if (alert_text is not null) and (start_updating = 1) then
 
      insert into alert_log values (alert_date, substr(alert_text, 1, 180));
      rows_inserted := rows_inserted + 1;
      commit;
 
    end if;
 
  end loop;
 
  sys.dbms_output.put_line('Inserting after date '||to_char(max_date, 'MM/DD/RR HH24:MI:SS'));
  sys.dbms_output.put_line('Rows Inserted: '||rows_inserted);
 
  commit;
 
end;
/

Let's execute the script:

SQL> @update_alert_log

Inserting after date 01/01/80 00:00:00
Rows Inserted: 17361
 
PL/SQL procedure successfully completed.

The alert_log table now contains the errors as recorded in the alert.log file:

select alert_date, substr(alert_text,1, 69)
  from alert_log;

12-JUL-06 Error stack returned to user:
12-JUL-06 ORA-02049: timeout: distributed transaction waiting for lock
16-JUL-06 Errors in file /usr/local/opt/oracle/admin/P2BL36A/udump/p2bl36a_ora_
18-JUL-06 ORACLE Instance P2BL36A - Can not allocate log, archival required
18-JUL-06 Thread 1 cannot allocate new log, sequence 265396
18-JUL-06 All online logs needed archiving
18-JUL-06 ORACLE Instance P2BL36A - Can not allocate log, archival required
18-JUL-06 Thread 1 cannot allocate new log, sequence 265397
18-JUL-06 All online logs needed archiving
18-JUL-06 ORACLE Instance P2BL36A - Can not allocate log, archival required
18-JUL-06 Thread 1 cannot allocate new log, sequence 265398
18-JUL-06 All online logs needed archiving
23-JUL-06 Errors in file /usr/local/opt/oracle/admin/P2BL36A/udump/p2bl36a_ora_
25-JUL-06 Errors in file /usr/local/opt/oracle/admin/P2BL36A/bdump/p2bl36a_s001
25-JUL-06 ORA-07445: exception encountered: core dump [kpnrls()+148] [SIGBUS] [
25-JUL-06 ORA-03113: end-of-file on communication channel
25-JUL-06 ORA-02063: preceding line from FRD01
25-JUL-06 found dead shared server 'S001', pid = (21, 1)
27-JUL-06 alter tablespace TBS01
27-JUL-06 add datafile '/ora106/ORACLE/P2BL36A/dbf/tbs01P2BL36A_32.dbf' size 20
27-JUL-06 ORA-1119 signalled during: alter tablespace TBS01
27-JUL-06 add datafile '/ora106/ORACL...
27-JUL-06 alter tablespace TBS01
27-JUL-06 add datafile '/ora106/ORACLE/P2BL36A/dbf/tbs01P2BL36A_32.dbf' size 20
27-JUL-06 Completed: alter tablespace TBS01
27-JUL-06 add datafile '/ora106/ORACL
28-JUL-06 ORA-01555 caused by SQL statement below (Query Duration=59968 sec, SC
28-JUL-06 SELECT /*+ ORDERED NO_EXPAND USE_NL(A2) INDEX(A2 "FC_REQUEST_HISTORY_

Thanks

The scripts above are provided by Shawn M Ellinger.

Links

####sample 1:

#########DB 端
step 1:备份数据库日志，并且清空当前日志

cp alert_<sid>.log alert_<sid>.log_2018
cat > alert_<sid>.log
##end

step 2:

##以下命令都在dbmgr 用户下使用，初始化 alert.log

####建立外部表，alert_log_disk（读取alert_sid.log），和并将数据同步到内部表alert_log
##The following script creates two tables: read_alert and read_alert_disk.
##read_alert_disk is an external table and contains the content of the alert log.
##read_alert will be empty after this script has been executed. It is used by the update_alert_log script, shown further below.

define alert_length="2000"

drop table alert_log;

create table alert_log (
alert_date date,
alert_text varchar2(&&alert_length)
)
storage (initial 512k next 512K pctincrease 0);

create index alert_log_idx on alert_log(alert_date)
storage (initial 512k next 512K pctincrease 0);

column db new_value _DB noprint;
column bdump new_value _bdump noprint;

select instance_name db from v$instance;

select value bdump from v$parameter
where name ='background_dump_dest';

drop directory BDUMP;
create directory BDUMP as '&&_bdump';

drop table alert_log_disk;

create table alert_log_disk ( text varchar2(&&alert_length) )
organization external (
type oracle_loader
default directory BDUMP
access parameters (
records delimited by newline nologfile nobadfile
fields terminated by "&" ltrim
)
location('alert_&&_DB..log')
)
reject limit unlimited;

step 3:
###new procedure insert_alert_log ,然后使用job 每天同步一次数据，从alert_log_disk到alert_log

3.1 create procedure

create or replace procedure insert_alert_log
as
isdate number := 0;
start_updating number := 0;
rows_inserted number := 0;

alert_date date;
max_date date;

alert_text alert_log_disk.text%type;

begin

/* find a starting date */
select max(alert_date) into max_date from alert_log;

if (max_date is null) then
max_date := to_date('01-jan-1980', 'dd-mon-yyyy');
end if;

for r in (
select substr(text,1,180) text from alert_log_disk
where text not like '%offlining%'
and text not like 'ARC_:%'
and text not like '%LOG_ARCHIVE_DEST_1%'
and text not like '%Thread 1 advanced to log sequence%'
and text not like '%Current log#%seq#%mem#%'
and text not like '%Undo Segment%lined%'
and text not like '%alter tablespace%back%'
and text not like '%Log actively being archived by another process%'
and text not like '%alter database backup controlfile to trace%'
and text not like '%Created Undo Segment%'
and text not like '%started with pid%'
and text not like '%ORA-12012%'
and text not like '%ORA-06512%'
and text not like '%ORA-000060:%'
and text not like '%coalesce%'
and text not like '%Beginning log switch checkpoint up to RBA%'
and text not like '%Completed checkpoint up to RBA%'
and text not like '%specifies an obsolete parameter%'
and text not like '%BEGIN BACKUP%'
and text not like '%END BACKUP%'
and text not like '%Incremental checkpoint%%'
)
loop

isdate := 0;
alert_text := null;

select count(*) into isdate
from dual
where substr(r.text, 21) in ('2018','2019','2020','2021','2022','2023','2024','2025')
and r.text not like '%cycle_run_year%';

if (isdate = 1) then

select to_date(substr(r.text, 5),'Mon dd hh24:mi:ss rrrr')
into alert_date
from dual;

if (alert_date > max_date) then
start_updating := 1;
end if;

else
alert_text := r.text;
end if;

if (alert_text is not null) and (start_updating = 1) then

insert into alert_log values (alert_date, substr(alert_text, 1, 180));
rows_inserted := rows_inserted + 1;
commit;

end if;

end loop;

sys.dbms_output.put_line('Inserting after date '||to_char(max_date, 'MM/DD/RR HH24:MI:SS'));
sys.dbms_output.put_line('Rows Inserted: '||rows_inserted);

commit;

end;
/

3.2 test the procedure

set serveroutput on
begin
insert_alert_log;
end;
/

select count(*) from alert_log;

####end

step 4
###授权
grant select on dbmgr.alert_log to dbmonopr;

step 5

##create job every day 22 hour，同步一次

SQL>variable job1 number;

SQL>
begin
dbms_job.submit(:job1,'INSERT_ALERT_LOG;','TRUNC(SYSDATE) + 22/24,''TRUNC(SYSDATE) + 22/24 + 1');
end;
/

SQL>print :job1

SQL>
begin
dbms_job.run(:job1);
end;
/

SQL>
select count(*) from dbmgr.alert_log;

###end

#########nagios 端,需要使用nagios 配置,都在nagios 用户下执行

step 1: /usr/local/ nagios /libexec 下的check_oracle,请在check_oracle 文件加入如下几行记录

##add before line "--db)"

--alert_log)
get_result=`sqlplus -silent /nolog<<EOF
conn dbmonopr/dbmonoprkms11@$2
set heading off feedback off pagesize 0 verify off echo off
select alert_date||' '||substr(alert_text,1, 69) from dbmgr.alert_log where alert_date > sysdate-2 and substr(alert_text,1, 69) like '%ORA-%';
exit;
EOF`
#echo $get_result
result=`echo "$get_result"`

if [ -z "$result" ] ; then
echo "OK - ${2} Database alert_log is ok."
exit 0
else
echo "CRITICAL - ${2} Database alert_log report ($result)."
exit 2
fi
;;

测试命令是否可以执行 <SID>是数据库名字。
/usr/local/nagios/libexec/check_oracle --alert_log <SID>
/usr/local/nagios/libexec/check_oracle --db <SID>

step 2. vim /usr/local/nagios/etc/nrpe.cfg add below command

command[check_db_alert_status]=/usr/local/nagios/libexec/check_oracle --alert_log <SID>

########也有如下方法监控：使用脚本创建

https://heliosguneserol.com/2010/06/11/oracle-alert-log-file-check-script/

http://www.dba-oracle.com/t_linux_shell_script_check_alert_log.htm

https://www.experts-exchange.com/questions/28406794/oracle-alert-log-script-to-check-for-errors.html

http://blog.itpub.net/29129946/viewspace-1487058/

http://blog.chinaunix.net/uid-25557346-id-3238831.html

aintaining Oracle Logs and Diagnostic Files

Prior to version 11g, Oracle databases stored most log and diagnostic information in the $ORACLE_BASE/admin directory. Now in 11g, there is a new directory at $ORACLE_BASE/diagwith a different organizational structure. In either version, it is important to understand what to find in these directories and how to maintain them.

The Admin Directory

In Oracle 10g and earlier, logs and diagnostic information can typically be found in the directory $ORACLE_BASE/admin, but sometimes this admin directory is found in $ORACLE_HOME or is set to another specific location chosen by the administrator. The admin directory typically contains a folder for each database on a system. Within this folder, there are the bdump, cdump and udump directories, among other things.

These directories are populated with diagnostic information by the database. Most of the contents are unique files created by database events, but some files like the alert log are written to on an ongoing basis and will continue to grow if left alone. In either case, these files should occasionally be cleaned up to prevent them from taking up too much disk space.

Checking the Alert Log

The check_alert_log.sh script shown below checks for any ORA- errors in the alert logof a given database. It will take a SID as an argument, or if no SID is specified, it will use the current one if set. In large environments, it may be more desirable to set up a GRID Controller and use that to monitor multiple databases. However, if only a handful of instances need to be monitored, it can be much easier to just set up monitoring scripts like this one.

In order to avoid reporting the same error twice, this script copies the contents out of the alert log, checks for errors and then saves them in a log file by the same name, but with .1 appended to it.

#!/bin/bash
# Add /usr/local/bin to the PATH variable so the oraenv command can be found
PATH=$PATH:/usr/local/bin; export PATH
# If a SID is provided as an argument it will be set and oraenv run
# otherwise we will use the current SID. If no SID is set or provided
# an error message is displayed and the script exits with a status of 1
if [ $1 ]
then
    ORACLE_SID=$1
    ORAENV_ASK=NO
    . oraenv
else
    if [ ! $ORACLE_SID ]
    then
           echo "Error: No ORACLE_SID set or provided as an argument"
           exit 1
    fi
fi
# Set the ORACLE_BASE variable
ORACLE_BASE=/u01/app/oracle; export ORACLE_BASE
cd $ORACLE_BASE/admin/$ORACLE_SID/bdump
# Copy the current alert log into a temporary file and empty the original
cp alert_$ORACLE_SID.log alert_$ORACLE_SID.log.temp
cp /dev/null alert_$ORACLE_SID.log
# Check the copy in the temporary file for ORA- errors
grep 'ORA-' alert_$ORACLE_SID.log.temp > /dev/null
# If found, email the Oracle user with the contents of the alert log
if [ $? = 0 ]
then
    mail -s "$ORACLE_SID database alert log error" oracle < \
           alert_$ORACLE_SID.log.temp
fi
# Move the contents of the temp file onto the permanent copy of the log
# and remove the temp file.
cat alert_$ORACLE_SID.log.temp >> alert_$ORACLE_SID.log.1
rm alert_$ORACLE_SID.log.temp

The block of code which starts with the first if statementis one that will be seen in many of the scripts in this book. If a SID is provided, the $1 variable exists and the oraenv command is executed. If not, the second if statement within the ELSE clause checks to see if a SID has already been set. If it has not, an error message is printed and the script exited with a result code of 1, indicating failure. If this script fails when executed from a crontab, the error message is automatically emailed to the crontab owner.

Since this will likely be run from a crontab entry, you typically provide a SID as an argument. The following crontab entry causes this script to be run every 15 minutes.

00,15,30,45 * * * * /u01/app/oracle/admin/common/check_alert_log.sh oss

####check_oracle from nagios

#! /bin/ksh
#
# latigid010@yahoo.com
# 01/06/2000
#
# This Nagios plugin was created to check Oracle status
#

PROGNAME=`basename $0`
PROGPATH=`echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,'`
REVISION="1.4.15"

. $PROGPATH/utils.sh

print_usage() {
echo "Usage:"
echo " $PROGNAME --tns <Oracle Sid or Hostname/IP address>"
echo " $PROGNAME --db <ORACLE_SID>"
echo " $PROGNAME --status <ORACLE_SID>"
echo " $PROGNAME --asm <ORACLE_SID>"
echo " $PROGNAME --flash <ORACLE_SID>"
echo " $PROGNAME --login <ORACLE_SID>"
echo " $PROGNAME --cache <ORACLE_SID> <USER> <PASS> <CRITICAL> <WARNING>"
echo " $PROGNAME --ttbs <ORACLE_SID>"
echo " $PROGNAME --tablespace <ORACLE_SID> <USER> <PASS> <TABLESPACE> <CRITICAL> <WARNING>"
echo " $PROGNAME --oranames <Hostname>"
echo " $PROGNAME --help"
echo " $PROGNAME --version"
}

print_help() {
print_revision $PROGNAME $REVISION
echo ""
print_usage
echo ""
echo "Check Oracle status"
echo ""
echo "--tns SID/IP Address"
echo " Check remote TNS server"
echo "--db SID"
echo " Check local database (search /bin/ps for PMON process) and check"
echo " filesystem for sgadefORACLE_SID.dbf"
echo "--status SID"
echo "--asm SID"
echo " Check ASM"
echo "--login SID"
echo " Attempt a dummy login and alert if not ORA-01017: invalid username/password"
echo "--cache"
echo " Check local database for library and buffer cache hit ratios"
echo " ---> Requires Oracle user/password and SID specified."
echo " ---> Requires select on v_$sysstat and v_$librarycache"
echo "--tablespace"
echo " Check local database for tablespace capacity in ORACLE_SID"
echo " ---> Requires Oracle user/password specified."
echo " ---> Requires select on dba_data_files and dba_free_space"
echo "--oranames Hostname"
echo " Check remote Oracle Names server"
echo "--help"
echo " Print this help screen"
echo "--version"
echo " Print version and license information"
echo ""
echo "If the plugin doesn't work, check that the ORACLE_HOME environment"
echo "variable is set, that ORACLE_HOME/bin is in your PATH, and the"
echo "tnsnames.ora file is locatable and is properly configured."
echo ""
echo "When checking local database status your ORACLE_SID is case sensitive."
echo ""
echo "If you want to use a default Oracle home, add in your oratab file:"
echo "*:/opt/app/oracle/product/7.3.4:N"
echo ""
support
}

case "$1" in
1)
cmd='--tns'
;;
2)
cmd='--db'
;;
*)
cmd="$1"
;;
esac

# Information options
case "$cmd" in
--help)
print_help
exit $STATE_OK
;;
-h)
print_help
exit $STATE_OK
;;
--version)
print_revision $PROGNAME $REVISION
exit $STATE_OK
;;
-V)
print_revision $PROGNAME $REVISION
exit $STATE_OK
;;
esac

# Hunt down a reasonable ORACLE_HOME
if [ -z "$ORACLE_HOME" ] ; then
# Adjust to taste
for oratab in /var/opt/oracle/oratab /etc/oratab
do
[ ! -f $oratab ] && continue
ORACLE_HOME=`IFS=:
while read SID ORACLE_HOME junk;
do
if [ "$SID" = "$2" -o "$SID" = "*" ] ; then
echo $ORACLE_HOME;
exit;
fi;
done < $oratab`
[ -n "$ORACLE_HOME" ] && break
done
fi
# Last resort
[ -z "$ORACLE_HOME" -a -d $PROGPATH/oracle ] && ORACLE_HOME=$PROGPATH/oracle

if [ "$cmd" != "--db" ]; then
if [ -z "$ORACLE_HOME" -o ! -d "$ORACLE_HOME" ] ; then
echo "Cannot determine ORACLE_HOME for sid $2"
exit $STATE_UNKNOWN
fi
fi
PATH=$PATH:$ORACLE_HOME/bin
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$ORACLE_HOME/lib
export ORACLE_HOME PATH LD_LIBRARY_PATH

case "$cmd" in
--tns)
tnschk=` tnsping $2`
tnschk2=` echo $tnschk | grep -c OK`
if [ ${tnschk2} -eq 1 ] ; then
tnschk3=` echo $tnschk | sed -e 's/.*(//' -e 's/).*//'`
echo "OK - reply time ${tnschk3} from $2"
exit $STATE_OK
else
echo "No TNS Listener on $2"
exit $STATE_CRITICAL
fi
;;
--oranames)
namesctl status $2 | awk '
/Server has been running for:/ {
msg = "OK: Up"
for (i = 6; i <= NF; i++) {
msg = msg " " $i
}
status = '$STATE_OK'
}
/error/ {
msg = "CRITICAL: " $0
status = '$STATE_CRITICAL'
}
END {
print msg
exit status
}'
;;
--db)
pmonchk=`ps -ef | grep -v grep | grep -c "ora_pmon_${2}$"`
if [ ${pmonchk} -ge 1 ] ; then
echo "${2} OK - ${pmonchk} PMON process(es) running"
exit $STATE_OK
#if [ -f $ORACLE_HOME/dbs/sga*${2}* ] ; then
#if [ ${pmonchk} -eq 1 ] ; then
#utime=`ls -la $ORACLE_HOME/dbs/sga*$2* | cut -c 43-55`
#echo "${2} OK - running since ${utime}"
#exit $STATE_OK
#fi
else
echo "${2} Database is DOWN"
exit $STATE_CRITICAL
fi
;;
--login)
loginchk=`sqlplus dummy/user@$2 < /dev/null`
loginchk2=` echo $loginchk | grep -c ORA-01017`
if [ ${loginchk2} -eq 1 ] ; then
echo "OK - dummy login connected"
exit $STATE_OK
else
loginchk3=` echo "$loginchk" | grep "ORA-" | head -1`
echo "CRITICAL - $loginchk3"
exit $STATE_CRITICAL
fi
;;
--status)
get_result=`sqlplus dbmonopr/dbmonoprkms11@$2<<EOF
spool off;
select 'status=' || count(0) from dual;
quit;
EOF`
#echo $get_result
status=`echo "$get_result"|grep status|cut -d "=" -f2`
#echo $status
if [ ${status} -eq 1 ] ; then
echo "OK - ${2} Database running"
exit 0
else
echo "CRITICAL - ${2} Database is DOWN"
exit 2
fi
;;
--flash)
get_result=`sqlplus dbmonopr/dbmonoprkms11@$2<<EOF
spool off;
select 'space_area=' || round(sum(percent_space_used)/1110,2) from v\\$flash_recovery_area_usage;
quit;
EOF`
#echo $get_result
space_area=`echo "$get_result"|grep space_area|cut -d "=" -f2`
#echo $space_area
if [ ${space_area} -gt 80 ] ; then
echo "CRITICAL - ${2} Database($space_area%) flash space used"
exit 2
fi
if [ ${space_area} -gt 60 ] ; then
echo "WARNING - ${2} Database($space_area%) flash space used"
exit 0
fi
if [ ${space_area} -lt 60 ] ; then
echo "OK - ${2} Database($space_area%) flash space used"
exit 0
fi
;;
--asm)
get_result=`sqlplus dbmonopr/dbmonoprkms11@$2<<EOF
spool off;
select 'dbinfo=' || name || '|' || total_mb || '|' || free_mb || '|' || round((free_mb/total_mb)*100,2) as use from v\\$asm_diskgroup;
quit;
EOF`
status=`echo "$get_result"|grep dbinfo`
#echo $status
archiveloginfo=`echo $status|cut -d " " -f1|cut -d "|" -f4`
coredatainfo=`echo $status|cut -d " " -f2|cut -d "|" -f4`
recoveryinfo=`echo $status|cut -d " " -f3|cut -d "|" -f4`
#echo $archiveloginfo
#echo $coredatainfo
#echo $recoveryinfo
flag=0
if [ ${archiveloginfo} -lt 30 ] ; then
flag=1
fi
if [ ${archiveloginfo} -lt 5 ] ; then
flag=2
fi
if [ ${coredatainfo} -lt 30 ] ; then
flag=1
fi
if [ ${coredatainfo} -lt 5 ] ; then
flag=2
fi
if [ ${recoveryinfo} -lt 30 ] ; then
flag=1
fi
if [ ${recoveryinfo} -lt 5 ] ; then
flag=2
fi
if [ ${flag} -eq 0 ] ; then
echo "OK - ${2} Database asm(free archivelog:${archiveloginfo}%,free core_data:${coredatainfo}%,free recovery:${recoveryinfo}%) is ok."
exit 0
fi
if [ ${flag} -eq 1 ] ; then
echo "WARNING - ${2} Database asm(free archivelog:${archiveloginfo}%,free core_data:${coredatainfo}%,free recovery:${recoveryinfo}%) less than 30%."
exit 1
fi
if [ ${flag} -eq 2 ] ; then
echo "CRITICAL - ${2} Database asm(free archivelog:${archiveloginfo}%,free core_data:${coredatainfo}%,free recovery:${recoveryinfo}%) less than 10%."
exit 2
fi
;;
--cache)
if [ ${5} -gt ${6} ] ; then
echo "UNKNOWN - Warning level is less then Crit"
exit $STATE_UNKNOWN
fi
result=`sqlplus -s ${3}/${4}@${2} << EOF
set pagesize 0
set numf '9999999.99'
select (1-(pr.value/(dbg.value+cg.value)))*100
from v\\$sysstat pr, v\\$sysstat dbg, v\\$sysstat cg
where pr.name='physical reads'
and dbg.name='db block gets'
and cg.name='consistent gets';
EOF`

if [ -n "`echo $result | grep ORA-`" ] ; then
error=` echo "$result" | grep "ORA-" | head -1`
echo "CRITICAL - $error"
exit $STATE_CRITICAL
fi

buf_hr=`echo "$result" | awk '/^[0-9\. \t]+$/ {print int($1)}'`
buf_hrx=`echo "$result" | awk '/^[0-9\. \t]+$/ {print $1}'`
result=`sqlplus -s ${3}/${4}@${2} << EOF
set pagesize 0
set numf '9999999.99'
select sum(lc.pins)/(sum(lc.pins)+sum(lc.reloads))*100
from v\\$librarycache lc;
EOF`

if [ -n "`echo $result | grep ORA-`" ] ; then
error=` echo "$result" | grep "ORA-" | head -1`
echo "CRITICAL - $error"
exit $STATE_CRITICAL
fi

lib_hr=`echo "$result" | awk '/^[0-9\. \t]+$/ {print int($1)}'`
lib_hrx=`echo "$result" | awk '/^[0-9\. \t]+$/ {print $1}'`

if [ $buf_hr -le ${5} -o $lib_hr -le ${5} ] ; then
echo "${2} CRITICAL - Cache Hit Rates: $lib_hrx% Lib -- $buf_hrx% Buff|lib=$lib_hrx%;${6};${5};0;100 buffer=$buf_hrx%;${6};${5};0;100"
exit $STATE_CRITICAL
fi
if [ $buf_hr -le ${6} -o $lib_hr -le ${6} ] ; then
echo "${2} WARNING - Cache Hit Rates: $lib_hrx% Lib -- $buf_hrx% Buff|lib=$lib_hrx%;${6};${5};0;100 buffer=$buf_hrx%;${6};${5};0;100"
exit $STATE_WARNING
fi
echo "${2} OK - Cache Hit Rates: $lib_hrx% Lib -- $buf_hrx% Buff|lib=$lib_hrx%;${6};${5};0;100 buffer=$buf_hrx%;${6};${5};0;100"

exit $STATE_OK
;;
--tbs)
get_result=`sqlplus -silent /nolog<<EOF
conn dbmonopr/dbmonoprkms11@$2
set heading off feedback off pagesize 0 verify off echo off
SELECT TRIM(DBMS_LOB.SUBSTR(WM_CONCAT(DATAVAL))) VALUE FROM (SELECT 'CNT=' || MAX(FLAG) AS DATAVAL FROM (SELECT '0' FLAG FROM (SELECT TABLESPACE_NAME, ROUND(SUM(BYTES) / 1024 / 1024, 2) SPACE, SUM(BLOCKS) BLOCKS FROM DBA_DATA_FILES GROUP BY TABLESPACE_NAME) D, (SELECT TABLESPACE_NAME, ROUND(SUM(BYTES) / 1024 / 1024, 2) FREE_SPACE FROM DBA_FREE_SPACE GROUP BY TABLESPACE_NAME) F WHERE D.TABLESPACE_NAME = F.TABLESPACE_NAME(+) AND D.TABLESPACE_NAME NOT IN ('UNDOTBS1', 'UNDOTBS2') AND ROUND((1 - NVL(FREE_SPACE, 0) / SPACE) * 100, 2) < 90 UNION ALL SELECT '1' FLAG FROM (SELECT TABLESPACE_NAME, ROUND(SUM(BYTES) / 1024 / 1024, 2) SPACE, SUM(BLOCKS) BLOCKS FROM DBA_DATA_FILES GROUP BY TABLESPACE_NAME) D, (SELECT TABLESPACE_NAME, ROUND(SUM(BYTES) / 1024 / 1024, 2) FREE_SPACE FROM DBA_FREE_SPACE GROUP BY TABLESPACE_NAME) F WHERE D.TABLESPACE_NAME = F.TABLESPACE_NAME(+) AND D.TABLESPACE_NAME NOT IN ('UNDOTBS1', 'UNDOTBS2') AND ROUND((1 - NVL(FREE_SPACE, 0) / SPACE) * 100, 2) >= 90 UNION ALL SELECT '2' FLAG FROM (SELECT TABLESPACE_NAME, ROUND(SUM(BYTES) / 1024 / 1024, 2) SPACE, SUM(BLOCKS) BLOCKS FROM DBA_DATA_FILES GROUP BY TABLESPACE_NAME) D, (SELECT TABLESPACE_NAME, ROUND(SUM(BYTES) / 1024 / 1024, 2) FREE_SPACE FROM DBA_FREE_SPACE GROUP BY TABLESPACE_NAME) F WHERE D.TABLESPACE_NAME = F.TABLESPACE_NAME(+) AND D.TABLESPACE_NAME NOT IN ('UNDOTBS1', 'UNDOTBS2') AND ROUND((1 - NVL(FREE_SPACE, 0) / SPACE) * 100, 2) >= 95) UNION ALL SELECT TRIM(DBMS_LOB.SUBSTR(WM_CONCAT(ABC))) DATAVAL FROM (SELECT 'NAME=' || TABLESPACE_NAME || ':SUM=' || SUM_SPACE || ':USED=' || USED_SPACE AS ABC FROM (SELECT D.TABLESPACE_NAME, SPACE SUM_SPACE, BLOCKS SUM_BLOCKS, SPACE - NVL(FREE_SPACE, 0) USED_SPACE FROM (SELECT TABLESPACE_NAME, ROUND(SUM(BYTES) / (1024 * 1024), 2) SPACE, SUM(BLOCKS) BLOCKS FROM DBA_DATA_FILES GROUP BY TABLESPACE_NAME) D, (SELECT TABLESPACE_NAME, ROUND(SUM(BYTES) / (1024 * 1024), 2) FREE_SPACE FROM DBA_FREE_SPACE GROUP BY TABLESPACE_NAME) F WHERE D.TABLESPACE_NAME = F.TABLESPACE_NAME(+)) A));
exit;
EOF`
#echo $get_result
result=`echo "$get_result"`

if [ $PERCNT -eq 0 ] ; then
echo "OK - ${2} Database tablespace($space_val) is ok."
exit 0
fi
if [ $PERCNT -eq 1 ] ; then
echo "WARNING - ${2} Database tablespace($space_val) large than 90%."
exit 1
fi
if [ $PERCNT -eq 2 ] ; then
echo "CRITICAL - ${2} Database tablespace($space_val) large than 95%."
exit 2
fi
;;
--tablespace)
if [ ${6} -lt ${7} ] ; then
echo "UNKNOWN - Warning level is more then Crit"
exit $STATE_UNKNOWN
fi
result=`sqlplus -s ${3}/${4}@${2} << EOF
set pagesize 0
set numf '9999999.99'
select NVL(b.free,0.0),a.total,100 - trunc(NVL(b.free,0.0)/a.total * 1000) / 10 prc
from (
select tablespace_name,sum(bytes)/1024/1024 total
from dba_data_files group by tablespace_name) A
LEFT OUTER JOIN
( select tablespace_name,sum(bytes)/1024/1024 free
from dba_free_space group by tablespace_name) B
ON a.tablespace_name=b.tablespace_name WHERE a.tablespace_name='${5}';
EOF`

if [ -n "`echo $result | grep ORA-`" ] ; then
error=` echo "$result" | grep "ORA-" | head -1`
echo "CRITICAL - $error"
exit $STATE_CRITICAL
fi

ts_free=`echo "$result" | awk '/^[ 0-9\.\t ]+$/ {print int($1)}'`
ts_total=`echo "$result" | awk '/^[ 0-9\.\t ]+$/ {print int($2)}'`
ts_pct=`echo "$result" | awk '/^[ 0-9\.\t ]+$/ {print int($3)}'`
ts_pctx=`echo "$result" | awk '/^[ 0-9\.\t ]+$/ {print $3}'`
if [ "$ts_free" -eq 0 -a "$ts_total" -eq 0 -a "$ts_pct" -eq 0 ] ; then
echo "No data returned by Oracle - tablespace $5 not found?"
exit $STATE_UNKNOWN
fi
if [ "$ts_pct" -ge ${6} ] ; then
echo "${2} : ${5} CRITICAL - $ts_pctx% used [ $ts_free / $ts_total MB available ]|${5}=$ts_pctx%;${7};${6};0;100"
exit $STATE_CRITICAL
fi
if [ "$ts_pct" -ge ${7} ] ; then
echo "${2} : ${5} WARNING - $ts_pctx% used [ $ts_free / $ts_total MB available ]|${5}=$ts_pctx%;${7};${6};0;100"
exit $STATE_WARNING
fi
echo "${2} : ${5} OK - $ts_pctx% used [ $ts_free / $ts_total MB available ]|${5}=$ts_pctx%;${7};${6};0;100"
exit $STATE_OK
;;
*)
print_usage
exit $STATE_UNKNOWN
esac

转 nagios监控oracle运行状况的更多相关文章

nagios监控oracle 表空间
oracle表空间满的危害以及处理方式见我的博客链接https://www.cnblogs.com/-abm/p/9764803.html 除此之外我们还需要对表空间实时监控,这样就可以及时了解表空间 ...
nagios监控oracle
本人最近在弄nagios,想用nagios监控oracle,看了网上的很多教程,步骤都是如下.1.由于 nagios 脚本需要读取 oracle 相关文件.所以运行 nagios 的用户需要定义为 o ...
Python监控网站运行状况
利用python便捷的类库,可以方便快速实现对网站运行状况的监控,主要包括对80端口(即网站运行端口),其它tcp服务等端口的监控就可以了解服务器大概的一个运行状况,使用的库主要为urllib2及so ...
使用Zabbix官方模板监控Redis运行状况
运行环境: OS:CentOS 6.8 / Python: 2.6.6 / Pip: 7.1.0 / Redis:3.0 / Zabbix:3.0.3 Zabbix官方提供的监控模板. 项目地址:ht ...
监控pbs运行状况
# 监控内存使用情况 job_id=163997workdir=/share_bio/echo "population_sizes" >> $workdir/pbs/p ...
linux 下监控系统运行状况命令 dstat
推荐读文:https://linux.cn/article-3215-1.html
nagios监控实用教程
nagios监控实用教程 Nagios作为开源网络监视工具,它不但可以有效的监控内存.流量.数据库使用情况.它还可以Windows.Linux主机状态.本专题收录了有关Nagios监控相关文章,供大家 ...
监控 SQL Server (2005/2008) 的运行状况
Microsoft SQL Server 2005 提供了一些工具来监控数据库.方法之一是动态管理视图.动态管理视图 (DMV) 和动态管理函数 (DMF) 返回的服务器状态信息可用于监控服务器实例的 ...
微软官方提供的用于监控MS SQL Server运行状况的工具及SQL语句
Microsoft SQL Server 2005 提供了一些工具来监控数据库.方法之一是动态管理视图.动态管理视图 (DMV) 和动态管理函数 (DMF) 返回的服务器状态信息可用于监控服务器实例的 ...

随机推荐

如何恢复，迁移，添加, 删除 Voting Disks
如何恢复,迁移,添加, 删除 Voting Disks恢复流程在11gR2 之前,我们可以直接直接使用dd命令对voting disk进行备份.DD示例备份votedisk盘:[root@raw1 ...
Cloudera运维
1. 增加一个节点 1. 拷贝cm的jar包到该节点 2. 设置hostname(hostnamectl set-hostname XXX),然后修改hosts文件 3. 所有的节点添加该hostna ...
Percona Xtrabackup 备份MySQL 实例(转)
老规矩,开场白,刚开始用mysqldump,备份100G+的数据库,再加上服务器繁忙,备份速度像蜗牛似的,于是寻找更高效的备份方法.网上都说用xtrabackup比较适合备份大的数据库,而且备份效率也 ...
idea-spark-sbt 打包jar
1.打开idea下的terminal窗口 2.只打包部分项目 sbt insight-import/clean insight-import/assembly 这表示只打包主目录下的insight- ...
一 Optional
从Java8 引入的一个很有趣的特性是Optional类.Optional类主要解决的问题是臭名昭著的空指针异常(NullPointerException). 一: 创建Optional对象: ...
Selenium VS Webdriver
Selenium 是 ThroughtWorks 一个强大的基于浏览器的开源自动化测试工具,它通常用来编写 Web 应用的自动化测试.随着 Selenium 团队发布 Selenium 2(又名 We ...
python 基础操作文件和目录
获得当前目录路径 :os.getcwd() 返回指定目录下的所有文件和目录名:os.listdir() 删除一个文件:os.remove(filename) 删除多个空目录 :os.removefir ...
Windchill 配置LOG文件,使开发中的代码能显示打印的信息
如开发代码的类HomeLogic.java, 包路径在pnt.report.home 需求:需监控此类的打印数据方法:配置D:\ptc\Windchill_10.1\Windchill\codeba ...
PullToRefresh------ListView的使用
第一步 :写出布局文件的设置 <com.handmark.pulltorefresh.library.PullToRefreshListView android:id="@+id/pu ...
strstr strchr strrchr strrstr
通过函数的定义来区分: 1.strstr: 返回子串出现的第一次位置 char *strstr(const char *haystack, const char *needle) 可见,strstr函 ...

转 nagios监控oracle运行状况

Script to read Oracle's alert log

update_alert_log.sql

Thanks

Links

aintaining Oracle Logs and Diagnostic Files

转 nagios监控oracle运行状况的更多相关文章

随机推荐

热门专题