Source code for testcases.KernelLog

#!/usr/bin/env python3
# OpenPOWER Automated Test Project
#
# Contributors Listed Below - COPYRIGHT 2017
# [+] International Business Machines Corp.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied. See the License for the specific language governing
# permissions and limitations under the License.
#

'''
Kernel Log
----------

Check the Linux kernel log in skiroot and the OS for warnings and errors,
filtering for known benign problems (or problems that are just a Linux issue
rather than a firmware issue).

'''

import unittest
import re

import OpTestConfiguration
from common.OpTestSystem import OpSystemState
from common.OpTestConstants import OpTestConstants as BMC_CONST
from common.Exceptions import CommandFailed

import logging
import OpTestLogger
log = OpTestLogger.optest_logger_glob.get_logger(__name__)


class KernelLog():
    def setUp(self):
        conf = OpTestConfiguration.conf
        self.cv_HOST = conf.host()
        self.cv_IPMI = conf.ipmi()
        self.cv_SYSTEM = conf.system()
        self.bmc_type = conf.args.bmc_type

    def runTest(self):
        self.setup_test()

        log_entries = []
        # Depending on where we're running, we may need to do all sorts of
        # things to get a sane dmesg output. Urgh.
        try:
            log_entries = self.c.run_command(
                "dmesg --color=never -T --level=alert,crit,err,warn")
        except CommandFailed:
            try:
                log_entries = self.c.run_command(
                    "dmesg -T --level=alert,crit,err,warn")
            except CommandFailed:
                try:
                    log_entries = self.c.run_command(
                        "dmesg -r|grep '<[4321]>'")
                except CommandFailed as cf:
                    # An exit code of 1 and no output can mean success.
                    # as it means we're not successfully grepping out anything
                    if cf.exitcode == 1 and len(cf.output) == 0:
                        pass

        filter_out = ["Unable to open file.* /etc/keys/x509",
                      "OF: reserved mem: not enough space all defined regions.",
                      "nvidia: loading out-of-tree module taints kernel",
                      "nvidia: module license 'NVIDIA' taints kernel.",
                      "Disabling lock debugging due to kernel taint",
                      "NVRM: loading NVIDIA UNIX ppc64le Kernel Module",
                      "This architecture does not have kernel memory protection.",
                      "aacraid.* Comm Interface type3 enabled",
                      "mpt3sas_cm.* MSI-X vectors supported",
                      "i40e.*PCI-Express bandwidth available for this device may be insu",
                      "i40e.*Please move the device to a different PCI-e link with more",
                      "systemd.*Dependency failed for pNFS block layout mapping daemon.",
                      "NFSD.* Using .* as the NFSv4 state recovery directory",
                      "ipmi_si.* Unable to find any System Interface",
                      "mpt3sas.*invalid short VPD tag 00 at offset 1",
                      "synth uevent.*failed to send uevent",
                      "vio: uevent: failed to send synthetic uevent",
                      "pstore: decompression failed",
                      "NCQ Send/Recv Log not supported",
                      "output lines suppressed due to ratelimiting",
                      # Nouveau not supporting our GPUs is expected, not OPAL bug.
                      "nouveau .* unknown chipset",
                      "nouveau: probe of .* failed with error -12",
                      # The below xive message should go away when https://github.com/open-power/skiboot/issues/171 is resolved
                      "xive: Interrupt.*type mismatch, Linux says Level, FW says Edge",
                      # This is why we can't have nice things.
                      "systemd-journald.*File.*corrupted or uncleanly shut down, renaming and replacing.",
                      # Not having memory on all NUMA nodes isn't *necessarily* fatal or a problem
                      "Could not find start_pfn for node",
                      # PNOR tests open a r/w window on a RO partition, currently fails like this
                      "mtd.*opal_flash_async_op\(op=1\) failed \(rc -6\)",
                      # New warning, but aparrently harmless
                      "Cannot allocate SWIOTLB buffer",
                      # Ignore a quirk that we hit on (at least some) Tuletas,
                      "TI XIO2000a quirk detected; secondary bus fast back-to-back transfers disabled",
                      # SCSI is Fun, and for some reason likes being very severe about discovering disks,
                      "sd .* \[sd.*\] Assuming drive cache: write through",
                      # SCSI is fun. Progress as dots
                      " \.$",
                      # SCSI is fun, of course this is critically important event
                      "s[dr] .* Power-on or device reset occurred",
                      ".?ready$",
                      # Mellanox!
                      "mlx4_en.* Port \d+: Using \d+ [TR]X rings",
                      "mlx4_en.* Port \d+: Initializing port",
                      "mlx4_core.*Old device ETS support detected",
                      "mlx4_core.*Consider upgrading device FW.",
                      # Skiboot doesn't yet support secvars, but the Kernel looks for them
                      "secvar-sysfs: secvar: failed to retrieve secvar operations.",
                      ]

        if self.bmc_type in ['qemu']:
            # Qemu doesn't (yet) have pstate support, so ignore errors there.
            filter_out.append('powernv-cpufreq: ibm,pstate-min node not found')
            filter_out.append('nvram: Failed to find or create lnx,oops-log')
            filter_out.append('nvram: Failed to initialize oops partition!')
            # some weird disk setups
            filter_out.append('vdb.*start.*is beyond EOD')
            # urandom_read fun
            filter_out.append('urandom_read: \d+ callbacks suppressed')

        if self.bmc_type in ['mambo']:
            # We have a couple of things showing up in Mambo runs.
            # We should probably fix this, but ignore for now.
            #
            # First, no pstates:
            filter_out.append('powernv-cpufreq: ibm,pstate-min node not found')
            # Strange IMC failure
            filter_out.append('IMC PMU nest_mcs01_imc Register failed')
            # urandom_read fun
            filter_out.append('urandom_read: \d+ callbacks suppressed')

        for f in filter_out:
            fre = re.compile(f)
            log_entries = [l for l in log_entries if not fre.search(l)]

        msg = '\n'.join([_f for _f in log_entries if _f])
        self.assertTrue(len(log_entries) == 0,
                        "Warnings/Errors in Kernel log:\n%s" % msg)


[docs]class Skiroot(KernelLog, unittest.TestCase): def setup_test(self): self.test = "skiroot" self.cv_SYSTEM.goto_state(OpSystemState.PETITBOOT_SHELL) self.c = self.cv_SYSTEM.console
[docs]class Host(KernelLog, unittest.TestCase): def setup_test(self): self.test = "host" self.cv_SYSTEM.goto_state(OpSystemState.OS) self.c = self.cv_HOST.get_ssh_connection()