Source code for testcases.OpTestPrdDriver

#!/usr/bin/env python3
# IBM_PROLOG_BEGIN_TAG
# This is an automatically generated prolog.
#
# $Source: op-test-framework/testcases/OpTestPrdDriver.py $
#
# OpenPOWER Automated Test Project
#
# Contributors Listed Below - COPYRIGHT 2015
# [+] International Business Machines Corp.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied. See the License for the specific language governing
# permissions and limitations under the License.
#
# IBM_PROLOG_END_TAG

'''
OpTestPrdDriver
---------------

PRD driver package for OpenPower testing.

This class will test the functionality of following:

- PRD (Processor Runtime Diagnostic) enables the support for handing certain
  RAS events by the userspace application.
- For testing out this feature, we require the userspace xscom-utils, part of the 'skiboot' tree.
- skiboot tree is cloning in /tmp directory.
- Using the xscom utility, we need to inject errors through FIR (Fault Isolation Register)
  and observe them getting cleared if PRD handles them successfully.

  - 0x01020013 IPOLL mask register
  - 0x02010840 PBA Local Fault isolation register
  - 0x02010843 PBA Local fault isolation mask register
'''

import time
import subprocess
import re
import sys
import os
import random


from common.OpTestConstants import OpTestConstants as BMC_CONST
import unittest

import OpTestConfiguration
from common.OpTestError import OpTestError
from common.OpTestSystem import OpSystemState
from common.Exceptions import CommandFailed

import logging
import OpTestLogger
log = OpTestLogger.optest_logger_glob.get_logger(__name__)


class ErrorToInject():
    def __init__(self, desc, FIR, FIMR, ERROR):
        self.desc = desc
        self.FIR = FIR
        self.FIMR = FIMR
        self.ERROR = ERROR

    def __str__(self):
        return self.desc


[docs]class OpTestPrdDriver(unittest.TestCase):
[docs] def setUp(self): conf = OpTestConfiguration.conf self.cv_IPMI = conf.ipmi() self.cv_SYSTEM = conf.system() self.cv_HOST = conf.host() self.platform = conf.platform() self.bmc_type = conf.args.bmc_type self.cv_SYSTEM.goto_state(OpSystemState.OS)
# # @return BMC_CONST.FW_SUCCESS or raise OpTestError #
[docs] def prd_init(self): ''' This is a common function for all the PRD test cases. This will be executed before any test case starts. Basically this provides below requirements. 1. Validates all required host commands 2. Get the list Of Chips (Using getscom binary). e.g. :: ['00000000', '00000001', '00000010'] 3. generate a random chip. ''' # Get OS level self.cv_HOST.host_get_OS_Level(console=1) # Getting list of processor chip Id's(executing getscom -l to get chip id's) l_res = self.cv_HOST.host_run_command( "PATH=/usr/local/sbin:$PATH getscom -l", console=1) l_chips = [] for line in l_res: matchObj = re.search("(\d{8}).*processor", line) if matchObj: l_chips.append(matchObj.group(1)) log.debug("chips list:%s list length: %s" % (l_chips, len(l_chips))) self.assertNotEqual( len(l_chips), 0, "Getscom failed to list processor chip id's") l_chips.sort() log.debug(l_chips) # ['00000000', '00000001', '00000010'] self.random_chip = random.choice(l_chips)
[docs] def prd_test_core_fir(self, FIR, FIMR, ERROR): ''' This function injects some core FIR errors and verifies whether opal-prd clears the errors. and also this function injects errors on random chip. :param FIR: Local Fault Isolation register :type FIR: str :param FIMR: Local Fault Isolation mask register :type FIMR: str :param ERROR: Core FIR error, this error will be written to FIR. :type ERROR: str ''' console = self.cv_SYSTEM.cv_HOST.get_ssh_connection() chip_id = "0x" + self.random_chip log.debug(chip_id) log.debug("OPAL-PRD: Injecting error 0x%x on FIR: %s" % (ERROR, FIR)) # Read Local Fault Isolation register l_cmd = "PATH=/usr/local/sbin:$PATH getscom -c %s %s" % (chip_id, FIR) l_res = console.run_command(l_cmd) # Reading Local Fault Isolation mask register l_cmd = "PATH=/usr/local/sbin:$PATH getscom -c %s %s" % (chip_id, FIMR) l_res = console.run_command(l_cmd) log.debug(l_res) # Changing the FIMR value to un-masked value. LEN = 16 l_len = len(l_res[-1]) l_val = hex( int(("0x" + "0"*(LEN - l_len) + l_res[-1]), 16) & (ERROR ^ 0xffffffffffffffff)) # Writing the same value to Local Fault Isolation mask register again l_cmd = "PATH=/usr/local/sbin:$PATH putscom -c %s %s %s" % ( chip_id, FIMR, l_val) l_res = console.run_command(l_cmd) # Inject a core error on FIR l_cmd = "PATH=/usr/local/sbin:$PATH putscom -c %s %s %s" % ( chip_id, FIR, hex(ERROR)) l_res = console.run_command(l_cmd) time.sleep(5) tries = 30 for i in range(1, tries): time.sleep(1) # Read Local Fault Isolation register again l_cmd = "PATH=/usr/local/sbin:$PATH getscom -c %s %s" % ( chip_id, FIR) l_res = console.run_command(l_cmd) if l_res[-1] == BMC_CONST.FAULT_ISOLATION_REGISTER_CONTENT: log.debug("Opal-prd handled core hardware error") break else: log.debug("Opal-prd hardware error not cleared, waiting " "(%d/%d)".format(i, tries)) # Check FIR got cleared by opal-prd self.assertEqual(l_res[-1], BMC_CONST.FAULT_ISOLATION_REGISTER_CONTENT, "Opal-prd not clearing hardware errors in runtime") # Reading the Local Fault Isolation Mask Register again l_cmd = "PATH=/usr/local/sbin:$PATH getscom -c %s %s" % (chip_id, FIMR) l_res = console.run_command(l_cmd) log.debug(l_res) # check for IPOLL mask register value to see opal-prd cleared the value l_cmd = "PATH=/usr/local/sbin:$PATH getscom -c %s %s" % ( chip_id, self.IPOLL_MASK_REGISTER) l_res = console.run_command(l_cmd) log.debug(l_res) self.assertEqual(l_res[-1], self.IPOLL_MASK_REGISTER_CONTENT, "Opal-prd is not clearing the IPOLL MASK REGISTER after injecting core FIR error") log.debug("Opal-prd cleared the IPOLL MASK REGISTER") return BMC_CONST.FW_SUCCESS
## # # @return BMC_CONST.FW_SUCCESS or raise OpTestError #
[docs] def runTest(self): ''' This function performs below steps: 1. Initially connecting to host for execution. 2. check for IPOLL mask register value to see whether opal-prd is running or not if it is 0-->opal-prd is running-->continue else start opal-prd service again 3. call test_prd_for_fir() function for each core FIR error and this function can be used for any number of errors, like it is a generic function ''' if not self.cv_HOST.host_prd_supported(self.bmc_type): self.skipTest("opal-prd NOT supported on this system, bmc_type={}".format(self.bmc_type)) self.prd_init() # need console in case of crash or lockups l_con = self.cv_SYSTEM.console cpu = self.cv_HOST.host_get_proc_gen(console=1) faults_to_inject = [] if cpu not in ["POWER8", "POWER8E", "POWER9", "POWER9P"]: self.skipTest("Unknown CPU type %s" % cpu) if cpu in ["POWER8", "POWER8E"]: self.IPOLL_MASK_REGISTER = "0x01020013" self.IPOLL_MASK_REGISTER_CONTENT = "0000000000000000" PBA_FAULT_ISOLATION_REGISTER = "0x02010840" PBA_FAULT_ISOLATION_MASK_REGISTER = "0x02010843" PBAFIR_OCI_APAR_ERR = 0x8000000000000000 PBAFIR_PB_CE_FW = 0x0400000000000000 PBAFIR_PB_RDDATATO_FW = 0x2000000000000000 PBAFIR_PB_RDADRERR_FW = 0x6000000000000000 faults_to_inject = [ ErrorToInject("PRD: Test for PBAFIR_OCI_APAR_ERR-->OCI Address Parity Error", PBA_FAULT_ISOLATION_REGISTER, PBA_FAULT_ISOLATION_MASK_REGISTER, PBAFIR_OCI_APAR_ERR), ErrorToInject("PRD: Test for PBAFIR_PB_CE_FW-->PB Read Data CE Error for Forwarded Request", PBA_FAULT_ISOLATION_REGISTER, PBA_FAULT_ISOLATION_MASK_REGISTER, PBAFIR_PB_CE_FW), ErrorToInject("PRD: Test for PBAFIR_PB_RDDATATO_FW-->PB Read Data Timeout for Forwarded Request", PBA_FAULT_ISOLATION_REGISTER, PBA_FAULT_ISOLATION_MASK_REGISTER, PBAFIR_PB_RDDATATO_FW), ErrorToInject("PRD: Test for PBAFIR_PB_RDADRERR_FW-->PB CRESP Addr Error Received for Forwarded Read Request", PBA_FAULT_ISOLATION_REGISTER, PBA_FAULT_ISOLATION_MASK_REGISTER, PBAFIR_PB_RDADRERR_FW), ] if cpu in ["POWER9", "POWER9P"]: # TP.TPCHIP.PIB.PCBMS.COMP.INTR_COMP.HOST_MASK_REG self.IPOLL_MASK_REGISTER = "0xF0033" self.IPOLL_MASK_REGISTER_CONTENT = "a400000000000000" try: l_con.run_command("opal-prd --debug --stdio") except CommandFailed as cf: log.debug("opal-prd failed to activate %s" % str(cf)) # check for IPOLL mask register value to check opal-prd is running or not l_cmd = "PATH=/usr/local/sbin:$PATH getscom -c 0x0 %s" % self.IPOLL_MASK_REGISTER l_res = l_con.run_command(l_cmd) if l_res[-1] == self.IPOLL_MASK_REGISTER_CONTENT: log.debug("Opal-prd is running") else: l_con.run_command("service opal-prd start") l_res = l_con.run_command(l_cmd) self.assertEqual(l_res[-1], self.IPOLL_MASK_REGISTER_CONTENT, "IPOLL MASK REGISTER is not getting cleared by opal-prd") log.debug("Opal-prd is running") # Test for PBA FIR with different core errors for e in faults_to_inject: log.debug("PRD Test: %s" % str(e)) self.prd_test_core_fir(e.FIR, e.FIMR, e.ERROR) pass