diff options
| author | SrujanaChalla <schalla@marvell.com> | 2020-03-13 17:17:07 +0530 |
|---|---|---|
| committer | Herbert Xu <herbert@gondor.apana.org.au> | 2020-03-20 14:36:52 +1100 |
| commit | 10b4f09491bfeb0b298cb2f49df585510ee6189a (patch) | |
| tree | 9b11e02fe5f9553fecd93939b09784f17e52f047 | |
| parent | d9110b0b01ff1cd02751cd5c2c94e938a8906083 (diff) | |
| download | linux-10b4f09491bfeb0b298cb2f49df585510ee6189a.tar.gz linux-10b4f09491bfeb0b298cb2f49df585510ee6189a.tar.bz2 linux-10b4f09491bfeb0b298cb2f49df585510ee6189a.zip | |
crypto: marvell - add the Virtual Function driver for CPT
Add support for the cryptographic accelerator unit virtual functions on
OcteonTX 83XX SoC.
Co-developed-by: Lukasz Bartosik <lbartosik@marvell.com>
Signed-off-by: Lukasz Bartosik <lbartosik@marvell.com>
Signed-off-by: SrujanaChalla <schalla@marvell.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
| -rw-r--r-- | drivers/crypto/marvell/octeontx/Makefile | 4 | ||||
| -rw-r--r-- | drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h | 577 | ||||
| -rw-r--r-- | drivers/crypto/marvell/octeontx/otx_cptvf.h | 104 | ||||
| -rw-r--r-- | drivers/crypto/marvell/octeontx/otx_cptvf_algs.c | 1744 | ||||
| -rw-r--r-- | drivers/crypto/marvell/octeontx/otx_cptvf_algs.h | 188 | ||||
| -rw-r--r-- | drivers/crypto/marvell/octeontx/otx_cptvf_main.c | 985 | ||||
| -rw-r--r-- | drivers/crypto/marvell/octeontx/otx_cptvf_mbox.c | 247 | ||||
| -rw-r--r-- | drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c | 612 | ||||
| -rw-r--r-- | drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.h | 227 |
9 files changed, 4686 insertions, 2 deletions
diff --git a/drivers/crypto/marvell/octeontx/Makefile b/drivers/crypto/marvell/octeontx/Makefile index 627d00eb9b3b..5e956fe1a85b 100644 --- a/drivers/crypto/marvell/octeontx/Makefile +++ b/drivers/crypto/marvell/octeontx/Makefile @@ -1,4 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_CRYPTO_DEV_OCTEONTX_CPT) += octeontx-cpt.o +obj-$(CONFIG_CRYPTO_DEV_OCTEONTX_CPT) += octeontx-cpt.o octeontx-cptvf.o octeontx-cpt-objs := otx_cptpf_main.o otx_cptpf_mbox.o otx_cptpf_ucode.o +octeontx-cptvf-objs := otx_cptvf_main.o otx_cptvf_mbox.o otx_cptvf_reqmgr.o \ + otx_cptvf_algs.o diff --git a/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h b/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h index bec483fbfefd..b8bdb9f134f3 100644 --- a/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h +++ b/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h @@ -15,11 +15,19 @@ /* Device IDs */ #define OTX_CPT_PCI_PF_DEVICE_ID 0xa040 +#define OTX_CPT_PCI_VF_DEVICE_ID 0xa041 #define OTX_CPT_PCI_PF_SUBSYS_ID 0xa340 +#define OTX_CPT_PCI_VF_SUBSYS_ID 0xa341 /* Configuration and status registers are in BAR0 on OcteonTX platform */ #define OTX_CPT_PF_PCI_CFG_BAR 0 +#define OTX_CPT_VF_PCI_CFG_BAR 0 + +#define OTX_CPT_BAR_E_CPTX_VFX_BAR0_OFFSET(a, b) \ + (0x000020000000ll + 0x1000000000ll * (a) + 0x100000ll * (b)) +#define OTX_CPT_BAR_E_CPTX_VFX_BAR0_SIZE 0x400000 + /* Mailbox interrupts offset */ #define OTX_CPT_PF_MBOX_INT 3 #define OTX_CPT_PF_INT_VEC_E_MBOXX(x, a) ((x) + (a)) @@ -28,6 +36,19 @@ /* Maximum supported microcode groups */ #define OTX_CPT_MAX_ENGINE_GROUPS 8 +/* CPT instruction size in bytes */ +#define OTX_CPT_INST_SIZE 64 +/* CPT queue next chunk pointer size in bytes */ +#define OTX_CPT_NEXT_CHUNK_PTR_SIZE 8 + +/* OcteonTX CPT VF MSIX vectors and their offsets */ +#define OTX_CPT_VF_MSIX_VECTORS 2 +#define OTX_CPT_VF_INTR_MBOX_MASK BIT(0) +#define OTX_CPT_VF_INTR_DOVF_MASK BIT(1) +#define OTX_CPT_VF_INTR_IRDE_MASK BIT(2) +#define OTX_CPT_VF_INTR_NWRP_MASK BIT(3) +#define OTX_CPT_VF_INTR_SERR_MASK BIT(4) + /* OcteonTX CPT PF registers */ #define OTX_CPT_PF_CONSTANTS (0x0ll) #define OTX_CPT_PF_RESET (0x100ll) @@ -78,6 +99,190 @@ #define OTX_CPT_PF_VFX_MBOXX(b, c) (0x8001000ll | (u64)(b) << 20 | \ (u64)(c) << 8) +/* OcteonTX CPT VF registers */ +#define OTX_CPT_VQX_CTL(b) (0x100ll | (u64)(b) << 20) +#define OTX_CPT_VQX_SADDR(b) (0x200ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_WAIT(b) (0x400ll | (u64)(b) << 20) +#define OTX_CPT_VQX_INPROG(b) (0x410ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE(b) (0x420ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_ACK(b) (0x440ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_INT_W1S(b) (0x460ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_INT_W1C(b) (0x468ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_ENA_W1S(b) (0x470ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_ENA_W1C(b) (0x478ll | (u64)(b) << 20) +#define OTX_CPT_VQX_MISC_INT(b) (0x500ll | (u64)(b) << 20) +#define OTX_CPT_VQX_MISC_INT_W1S(b) (0x508ll | (u64)(b) << 20) +#define OTX_CPT_VQX_MISC_ENA_W1S(b) (0x510ll | (u64)(b) << 20) +#define OTX_CPT_VQX_MISC_ENA_W1C(b) (0x518ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DOORBELL(b) (0x600ll | (u64)(b) << 20) +#define OTX_CPT_VFX_PF_MBOXX(b, c) (0x1000ll | ((b) << 20) | ((c) << 3)) + +/* + * Enumeration otx_cpt_ucode_error_code_e + * + * Enumerates ucode errors + */ +enum otx_cpt_ucode_error_code_e { + CPT_NO_UCODE_ERROR = 0x00, + ERR_OPCODE_UNSUPPORTED = 0x01, + + /* Scatter gather */ + ERR_SCATTER_GATHER_WRITE_LENGTH = 0x02, + ERR_SCATTER_GATHER_LIST = 0x03, + ERR_SCATTER_GATHER_NOT_SUPPORTED = 0x04, + +}; + +/* + * Enumeration otx_cpt_comp_e + * + * CPT OcteonTX Completion Enumeration + * Enumerates the values of CPT_RES_S[COMPCODE]. + */ +enum otx_cpt_comp_e { + CPT_COMP_E_NOTDONE = 0x00, + CPT_COMP_E_GOOD = 0x01, + CPT_COMP_E_FAULT = 0x02, + CPT_COMP_E_SWERR = 0x03, + CPT_COMP_E_HWERR = 0x04, + CPT_COMP_E_LAST_ENTRY = 0x05 +}; + +/* + * Enumeration otx_cpt_vf_int_vec_e + * + * CPT OcteonTX VF MSI-X Vector Enumeration + * Enumerates the MSI-X interrupt vectors. + */ +enum otx_cpt_vf_int_vec_e { + CPT_VF_INT_VEC_E_MISC = 0x00, + CPT_VF_INT_VEC_E_DONE = 0x01 +}; + +/* + * Structure cpt_inst_s + * + * CPT Instruction Structure + * This structure specifies the instruction layout. Instructions are + * stored in memory as little-endian unless CPT()_PF_Q()_CTL[INST_BE] is set. + * cpt_inst_s_s + * Word 0 + * doneint:1 Done interrupt. + * 0 = No interrupts related to this instruction. + * 1 = When the instruction completes, CPT()_VQ()_DONE[DONE] will be + * incremented,and based on the rules described there an interrupt may + * occur. + * Word 1 + * res_addr [127: 64] Result IOVA. + * If nonzero, specifies where to write CPT_RES_S. + * If zero, no result structure will be written. + * Address must be 16-byte aligned. + * Bits <63:49> are ignored by hardware; software should use a + * sign-extended bit <48> for forward compatibility. + * Word 2 + * grp:10 [171:162] If [WQ_PTR] is nonzero, the SSO guest-group to use when + * CPT submits work SSO. + * For the SSO to not discard the add-work request, FPA_PF_MAP() must map + * [GRP] and CPT()_PF_Q()_GMCTL[GMID] as valid. + * tt:2 [161:160] If [WQ_PTR] is nonzero, the SSO tag type to use when CPT + * submits work to SSO + * tag:32 [159:128] If [WQ_PTR] is nonzero, the SSO tag to use when CPT + * submits work to SSO. + * Word 3 + * wq_ptr [255:192] If [WQ_PTR] is nonzero, it is a pointer to a + * work-queue entry that CPT submits work to SSO after all context, + * output data, and result write operations are visible to other + * CNXXXX units and the cores. Bits <2:0> must be zero. + * Bits <63:49> are ignored by hardware; software should + * use a sign-extended bit <48> for forward compatibility. + * Internal: + * Bits <63:49>, <2:0> are ignored by hardware, treated as always 0x0. + * Word 4 + * ei0; [319:256] Engine instruction word 0. Passed to the AE/SE. + * Word 5 + * ei1; [383:320] Engine instruction word 1. Passed to the AE/SE. + * Word 6 + * ei2; [447:384] Engine instruction word 1. Passed to the AE/SE. + * Word 7 + * ei3; [511:448] Engine instruction word 1. Passed to the AE/SE. + * + */ +union otx_cpt_inst_s { + u64 u[8]; + + struct { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_17_63:47; + u64 doneint:1; + u64 reserved_0_15:16; +#else /* Word 0 - Little Endian */ + u64 reserved_0_15:16; + u64 doneint:1; + u64 reserved_17_63:47; +#endif /* Word 0 - End */ + u64 res_addr; +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 2 - Big Endian */ + u64 reserved_172_191:20; + u64 grp:10; + u64 tt:2; + u64 tag:32; +#else /* Word 2 - Little Endian */ + u64 tag:32; + u64 tt:2; + u64 grp:10; + u64 reserved_172_191:20; +#endif /* Word 2 - End */ + u64 wq_ptr; + u64 ei0; + u64 ei1; + u64 ei2; + u64 ei3; + } s; +}; + +/* + * Structure cpt_res_s + * + * CPT Result Structure + * The CPT coprocessor writes the result structure after it completes a + * CPT_INST_S instruction. The result structure is exactly 16 bytes, and + * each instruction completion produces exactly one result structure. + * + * This structure is stored in memory as little-endian unless + * CPT()_PF_Q()_CTL[INST_BE] is set. + * cpt_res_s_s + * Word 0 + * doneint:1 [16:16] Done interrupt. This bit is copied from the + * corresponding instruction's CPT_INST_S[DONEINT]. + * compcode:8 [7:0] Indicates completion/error status of the CPT coprocessor + * for the associated instruction, as enumerated by CPT_COMP_E. + * Core software may write the memory location containing [COMPCODE] to + * 0x0 before ringing the doorbell, and then poll for completion by + * checking for a nonzero value. + * Once the core observes a nonzero [COMPCODE] value in this case,the CPT + * coprocessor will have also completed L2/DRAM write operations. + * Word 1 + * reserved + * + */ +union otx_cpt_res_s { + u64 u[2]; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_17_63:47; + u64 doneint:1; + u64 reserved_8_15:8; + u64 compcode:8; +#else /* Word 0 - Little Endian */ + u64 compcode:8; + u64 reserved_8_15:8; + u64 doneint:1; + u64 reserved_17_63:47; +#endif /* Word 0 - End */ + u64 reserved_64_127; + } s; +}; + /* * Register (NCB) otx_cpt#_pf_bist_status * @@ -246,4 +451,374 @@ union otx_cptx_pf_qx_ctl { #endif /* Word 0 - End */ } s; }; -#endif /* __OTX_CPT_HW_TYPES_H */ + +/* + * Register (NCB) otx_cpt#_vq#_saddr + * + * CPT Queue Starting Buffer Address Registers + * These registers set the instruction buffer starting address. + * otx_cptx_vqx_saddr_s + * Word0 + * reserved_49_63:15 [63:49] Reserved. + * ptr:43 [48:6](R/W/H) Instruction buffer IOVA <48:6> (64-byte aligned). + * When written, it is the initial buffer starting address; when read, + * it is the next read pointer to be requested from L2C. The PTR field + * is overwritten with the next pointer each time that the command buffer + * segment is exhausted. New commands will then be read from the newly + * specified command buffer pointer. + * reserved_0_5:6 [5:0] Reserved. + * + */ +union otx_cptx_vqx_saddr { + u64 u; + struct otx_cptx_vqx_saddr_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_49_63:15; + u64 ptr:43; + u64 reserved_0_5:6; +#else /* Word 0 - Little Endian */ + u64 reserved_0_5:6; + u64 ptr:43; + u64 reserved_49_63:15; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_misc_ena_w1s + * + * CPT Queue Misc Interrupt Enable Set Register + * This register sets interrupt enable bits. + * otx_cptx_vqx_misc_ena_w1s_s + * Word0 + * reserved_5_63:59 [63:5] Reserved. + * swerr:1 [4:4](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[SWERR]. + * nwrp:1 [3:3](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[NWRP]. + * irde:1 [2:2](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[IRDE]. + * dovf:1 [1:1](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[DOVF]. + * mbox:1 [0:0](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[MBOX]. + * + */ +union otx_cptx_vqx_misc_ena_w1s { + u64 u; + struct otx_cptx_vqx_misc_ena_w1s_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_5_63:59; + u64 swerr:1; + u64 nwrp:1; + u64 irde:1; + u64 dovf:1; + u64 mbox:1; +#else /* Word 0 - Little Endian */ + u64 mbox:1; + u64 dovf:1; + u64 irde:1; + u64 nwrp:1; + u64 swerr:1; + u64 reserved_5_63:59; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_doorbell + * + * CPT Queue Doorbell Registers + * Doorbells for the CPT instruction queues. + * otx_cptx_vqx_doorbell_s + * Word0 + * reserved_20_63:44 [63:20] Reserved. + * dbell_cnt:20 [19:0](R/W/H) Number of instruction queue 64-bit words to add + * to the CPT instruction doorbell count. Readback value is the the + * current number of pending doorbell requests. If counter overflows + * CPT()_VQ()_MISC_INT[DBELL_DOVF] is set. To reset the count back to + * zero, write one to clear CPT()_VQ()_MISC_INT_ENA_W1C[DBELL_DOVF], + * then write a value of 2^20 minus the read [DBELL_CNT], then write one + * to CPT()_VQ()_MISC_INT_W1C[DBELL_DOVF] and + * CPT()_VQ()_MISC_INT_ENA_W1S[DBELL_DOVF]. Must be a multiple of 8. + * All CPT instructions are 8 words and require a doorbell count of + * multiple of 8. + */ +union otx_cptx_vqx_doorbell { + u64 u; + struct otx_cptx_vqx_doorbell_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_20_63:44; + u64 dbell_cnt:20; +#else /* Word 0 - Little Endian */ + u64 dbell_cnt:20; + u64 reserved_20_63:44; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_inprog + * + * CPT Queue In Progress Count Registers + * These registers contain the per-queue instruction in flight registers. + * otx_cptx_vqx_inprog_s + * Word0 + * reserved_8_63:56 [63:8] Reserved. + * inflight:8 [7:0](RO/H) Inflight count. Counts the number of instructions + * for the VF for which CPT is fetching, executing or responding to + * instructions. However this does not include any interrupts that are + * awaiting software handling (CPT()_VQ()_DONE[DONE] != 0x0). + * A queue may not be reconfigured until: + * 1. CPT()_VQ()_CTL[ENA] is cleared by software. + * 2. [INFLIGHT] is polled until equals to zero. + */ +union otx_cptx_vqx_inprog { + u64 u; + struct otx_cptx_vqx_inprog_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_8_63:56; + u64 inflight:8; +#else /* Word 0 - Little Endian */ + u64 inflight:8; + u64 reserved_8_63:56; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_misc_int + * + * CPT Queue Misc Interrupt Register + * These registers contain the per-queue miscellaneous interrupts. + * otx_cptx_vqx_misc_int_s + * Word 0 + * reserved_5_63:59 [63:5] Reserved. + * swerr:1 [4:4](R/W1C/H) Software error from engines. + * nwrp:1 [3:3](R/W1C/H) NCB result write response error. + * irde:1 [2:2](R/W1C/H) Instruction NCB read response error. + * dovf:1 [1:1](R/W1C/H) Doorbell overflow. + * mbox:1 [0:0](R/W1C/H) PF to VF mailbox interrupt. Set when + * CPT()_VF()_PF_MBOX(0) is written. + * + */ +union otx_cptx_vqx_misc_int { + u64 u; + struct otx_cptx_vqx_misc_int_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_5_63:59; + u64 swerr:1; + u64 nwrp:1; + u64 irde:1; + u64 dovf:1; + u64 mbox:1; +#else /* Word 0 - Little Endian */ + u64 mbox:1; + u64 dovf:1; + u64 irde:1; + u64 nwrp:1; + u64 swerr:1; + u64 reserved_5_63:59; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_done_ack + * + * CPT Queue Done Count Ack Registers + * This register is written by software to acknowledge interrupts. + * otx_cptx_vqx_done_ack_s + * Word0 + * reserved_20_63:44 [63:20] Reserved. + * done_ack:20 [19:0](R/W/H) Number of decrements to CPT()_VQ()_DONE[DONE]. + * Reads CPT()_VQ()_DONE[DONE]. Written by software to acknowledge + * interrupts. If CPT()_VQ()_DONE[DONE] is still nonzero the interrupt + * will be re-sent if the conditions described in CPT()_VQ()_DONE[DONE] + * are satisfied. + * + */ +union otx_cptx_vqx_done_ack { + u64 u; + struct otx_cptx_vqx_done_ack_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_20_63:44; + u64 done_ack:20; +#else /* Word 0 - Little Endian */ + u64 done_ack:20; + u64 reserved_20_63:44; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_done + * + * CPT Queue Done Count Registers + * These registers contain the per-queue instruction done count. + * cptx_vqx_done_s + * Word0 + * reserved_20_63:44 [63:20] Reserved. + * done:20 [19:0](R/W/H) Done count. When CPT_INST_S[DONEINT] set and that + * instruction completes, CPT()_VQ()_DONE[DONE] is incremented when the + * instruction finishes. Write to this field are for diagnostic use only; + * instead software writes CPT()_VQ()_DONE_ACK with the number of + * decrements for this field. + * Interrupts are sent as follows: + * * When CPT()_VQ()_DONE[DONE] = 0, then no results are pending, the + * interrupt coalescing timer is held to zero, and an interrupt is not + * sent. + * * When CPT()_VQ()_DONE[DONE] != 0, then the interrupt coalescing timer + * counts. If the counter is >= CPT()_VQ()_DONE_WAIT[TIME_WAIT]*1024, or + * CPT()_VQ()_DONE[DONE] >= CPT()_VQ()_DONE_WAIT[NUM_WAIT], i.e. enough + * time has passed or enough results have arrived, then the interrupt is + * sent. + * * When CPT()_VQ()_DONE_ACK is written (or CPT()_VQ()_DONE is written + * but this is not typical), the interrupt coalescing timer restarts. + * Note after decrementing this interrupt equation is recomputed, + * for example if CPT()_VQ()_DONE[DONE] >= CPT()_VQ()_DONE_WAIT[NUM_WAIT] + * and because the timer is zero, the interrupt will be resent immediately. + * (This covers the race case between software acknowledging an interrupt + * and a result returning.) + * * When CPT()_VQ()_DONE_ENA_W1S[DONE] = 0, interrupts are not sent, + * but the counting described above still occurs. + * Since CPT instructions complete out-of-order, if software is using + * completion interrupts the suggested scheme is to request a DONEINT on + * each request, and when an interrupt arrives perform a "greedy" scan for + * completions; even if a later command is acknowledged first this will + * not result in missing a completion. + * Software is responsible for making sure [DONE] does not overflow; + * for example by insuring there are not more than 2^20-1 instructions in + * flight that may request interrupts. + * + */ +union otx_cptx_vqx_done { + u64 u; + struct otx_cptx_vqx_done_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_20_63:44; + u64 done:20; +#else /* Word 0 - Little Endian */ + u64 done:20; + u64 reserved_20_63:44; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_done_wait + * + * CPT Queue Done Interrupt Coalescing Wait Registers + * Specifies the per queue interrupt coalescing settings. + * cptx_vqx_done_wait_s + * Word0 + * reserved_48_63:16 [63:48] Reserved. + * time_wait:16; [47:32](R/W) Time hold-off. When CPT()_VQ()_DONE[DONE] = 0 + * or CPT()_VQ()_DONE_ACK is written a timer is cleared. When the timer + * reaches [TIME_WAIT]*1024 then interrupt coalescing ends. + * see CPT()_VQ()_DONE[DONE]. If 0x0, time coalescing is disabled. + * reserved_20_31:12 [31:20] Reserved. + * num_wait:20 [19:0](R/W) Number of messages hold-off. + * When CPT()_VQ()_DONE[DONE] >= [NUM_WAIT] then interrupt coalescing ends + * see CPT()_VQ()_DONE[DONE]. If 0x0, same behavior as 0x1. + * + */ +union otx_cptx_vqx_done_wait { + u64 u; + struct otx_cptx_vqx_done_wait_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_48_63:16; + u64 time_wait:16; + u64 reserved_20_31:12; + u64 num_wait:20; +#else /* Word 0 - Little Endian */ + u64 num_wait:20; + u64 reserved_20_31:12; + u64 time_wait:16; + u64 reserved_48_63:16; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_done_ena_w1s + * + * CPT Queue Done Interrupt Enable Set Registers + * Write 1 to these registers will enable the DONEINT interrupt for the queue. + * cptx_vqx_done_ena_w1s_s + * Word0 + * reserved_1_63:63 [63:1] Reserved. + * done:1 [0:0](R/W1S/H) Write 1 will enable DONEINT for this queue. + * Write 0 has no effect. Read will return the enable bit. + */ +union otx_cptx_vqx_done_ena_w1s { + u64 u; + struct otx_cptx_vqx_done_ena_w1s_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_1_63:63; + u64 done:1; +#else /* Word 0 - Little Endian */ + u64 done:1; + u64 reserved_1_63:63; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_ctl + * + * CPT VF Queue Control Registers + * This register configures queues. This register should be changed (other than + * clearing [ENA]) only when quiescent (see CPT()_VQ()_INPROG[INFLIGHT]). + * cptx_vqx_ctl_s + * Word0 + * reserved_1_63:63 [63:1] Reserved. + * ena:1 [0:0](R/W/H) Enables the logical instruction queue. + * See also CPT()_PF_Q()_CTL[CONT_ERR] and CPT()_VQ()_INPROG[INFLIGHT]. + * 1 = Queue is enabled. + * 0 = Queue is disabled. + */ +union otx_cptx_vqx_ctl { + u64 u; + struct otx_cptx_vqx_ctl_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_1_63:63; + u64 ena:1; +#else /* Word 0 - Little Endian */ + u64 ena:1; + u64 reserved_1_63:63; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Error Address/Error Codes + * + * In the event of a severe error, microcode writes an 8-byte Error Code + * value (ECODE) to host memory at the Rptr address specified by the host + * system (in the 64-byte request). + * + * Word0 + * [63:56](R) 8-bit completion code + * [55:48](R) Number of the core that reported the severe error + * [47:0] Lower 6 bytes of M-Inst word2. Used to assist in uniquely + * identifying which specific instruction caused the error. This assumes + * that each instruction has a unique result location (RPTR), at least + * for a given period of time. + */ +union otx_cpt_error_code { + u64 u; + struct otx_cpt_error_code_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + uint64_t ccode:8; + uint64_t coreid:8; + uint64_t rptr6:48; +#else /* Word 0 - Little Endian */ + uint64_t rptr6:48; + uint64_t coreid:8; + uint64_t ccode:8; +#endif /* Word 0 - End */ + } s; +}; + +#endif /*__OTX_CPT_HW_TYPES_H */ diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf.h b/drivers/crypto/marvell/octeontx/otx_cptvf.h new file mode 100644 index 000000000000..dd02f21659af --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPTVF_H +#define __OTX_CPTVF_H + +#include <linux/list.h> +#include <linux/interrupt.h> +#include <linux/device.h> +#include "otx_cpt_common.h" +#include "otx_cptvf_reqmgr.h" + +/* Flags to indicate the features supported */ +#define OTX_CPT_FLAG_DEVICE_READY BIT(1) +#define otx_cpt_device_ready(cpt) ((cpt)->flags & OTX_CPT_FLAG_DEVICE_READY) +/* Default command queue length */ +#define OTX_CPT_CMD_QLEN (4*2046) +#define OTX_CPT_CMD_QCHUNK_SIZE 1023 +#define OTX_CPT_NUM_QS_PER_VF 1 + +struct otx_cpt_cmd_chunk { + u8 *head; + dma_addr_t dma_addr; + u32 size; /* Chunk size, max OTX_CPT_INST_CHUNK_MAX_SIZE */ + struct list_head nextchunk; +}; + +struct otx_cpt_cmd_queue { + u32 idx; /* Command queue host write idx */ + u32 num_chunks; /* Number of command chunks */ + struct otx_cpt_cmd_chunk *qhead;/* + * Command queue head, instructions + * are inserted here + */ + struct otx_cpt_cmd_chunk *base; + struct list_head chead; +}; + +struct otx_cpt_cmd_qinfo { + u32 qchunksize; /* Command queue chunk size */ + struct otx_cpt_cmd_queue queue[OTX_CPT_NUM_QS_PER_VF]; +}; + +struct otx_cpt_pending_qinfo { + u32 num_queues; /* Number of queues supported */ + struct otx_cpt_pending_queue queue[OTX_CPT_NUM_QS_PER_VF]; +}; + +#define for_each_pending_queue(qinfo, q, i) \ + for (i = 0, q = &qinfo->queue[i]; i < qinfo->num_queues; i++, \ + q = &qinfo->queue[i]) + +struct otx_cptvf_wqe { + struct tasklet_struct twork; + struct otx_cptvf *cptvf; +}; + +struct otx_cptvf_wqe_info { + struct otx_cptvf_wqe vq_wqe[OTX_CPT_NUM_QS_PER_VF]; +}; + +struct otx_cptvf { + u16 flags; /* Flags to hold device status bits */ + u8 vfid; /* Device Index 0...OTX_CPT_MAX_VF_NUM */ + u8 num_vfs; /* Number of enabled VFs */ + u8 vftype; /* VF type of SE_TYPE(2) or AE_TYPE(1) */ + u8 vfgrp; /* VF group (0 - 8) */ + u8 node; /* Operating node: Bits (46:44) in BAR0 address */ + u8 priority; /* + * VF priority ring: 1-High proirity round + * robin ring;0-Low priority round robin ring; + */ + struct pci_dev *pdev; /* Pci device handle */ + void __iomem *reg_base; /* Register start address */ + void *wqe_info; /* BH worker info */ + /* MSI-X */ + cpumask_var_t affinity_mask[OTX_CPT_VF_MSIX_VECTORS]; + /* Command and Pending queues */ + u32 qsize; + u32 num_queues; + struct otx_cpt_cmd_qinfo cqinfo; /* Command queue information */ + struct otx_cpt_pending_qinfo pqinfo; /* Pending queue information */ + /* VF-PF mailbox communication */ + bool pf_acked; + bool pf_nacked; +}; + +int otx_cptvf_send_vf_up(struct otx_cptvf *cptvf); +int otx_cptvf_send_vf_down(struct otx_cptvf *cptvf); +int otx_cptvf_send_vf_to_grp_msg(struct otx_cptvf *cptvf, int group); +int otx_cptvf_send_vf_priority_msg(struct otx_cptvf *cptvf); +int otx_cptvf_send_vq_size_msg(struct otx_cptvf *cptvf); +int otx_cptvf_check_pf_ready(struct otx_cptvf *cptvf); +void otx_cptvf_handle_mbox_intr(struct otx_cptvf *cptvf); +void otx_cptvf_write_vq_doorbell(struct otx_cptvf *cptvf, u32 val); + +#endif /* __OTX_CPTVF_H */ diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c new file mode 100644 index 000000000000..946fb62949b2 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c @@ -0,0 +1,1744 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <crypto/aes.h> +#include <crypto/authenc.h> +#include <crypto/cryptd.h> +#include <crypto/des.h> +#include <crypto/internal/aead.h> +#include <crypto/sha.h> +#include <crypto/xts.h> +#include <crypto/scatterwalk.h> +#include <linux/rtnetlink.h> +#include <linux/sort.h> +#include <linux/module.h> +#include "otx_cptvf.h" +#include "otx_cptvf_algs.h" +#include "otx_cptvf_reqmgr.h" + +#define CPT_MAX_VF_NUM 64 +/* Size of salt in AES GCM mode */ +#define AES_GCM_SALT_SIZE 4 +/* Size of IV in AES GCM mode */ +#define AES_GCM_IV_SIZE 8 +/* Size of ICV (Integrity Check Value) in AES GCM mode */ +#define AES_GCM_ICV_SIZE 16 +/* Offset of IV in AES GCM mode */ +#define AES_GCM_IV_OFFSET 8 +#define CONTROL_WORD_LEN 8 +#define KEY2_OFFSET 48 +#define DMA_MODE_FLAG(dma_mode) \ + (((dma_mode) == OTX_CPT_DMA_GATHER_SCATTER) ? (1 << 7) : 0) + +/* Truncated SHA digest size */ +#define SHA1_TRUNC_DIGEST_SIZE 12 +#define SHA256_TRUNC_DIGEST_SIZE 16 +#define SHA384_TRUNC_DIGEST_SIZE 24 +#define SHA512_TRUNC_DIGEST_SIZE 32 + +static DEFINE_MUTEX(mutex); +static int is_crypto_registered; + +struct cpt_device_desc { + enum otx_cptpf_type pf_type; + struct pci_dev *dev; + int num_queues; +}; + +struct cpt_device_table { + atomic_t count; + struct cpt_device_desc desc[CPT_MAX_VF_NUM]; +}; + +static struct cpt_device_table se_devices = { + .count = ATOMIC_INIT(0) +}; + +static struct cpt_device_table ae_devices = { + .count = ATOMIC_INIT(0) +}; + +static inline int get_se_device(struct pci_dev **pdev, int *cpu_num) +{ + int count, ret = 0; + + count = atomic_read(&se_devices.count); + if (count < 1) + return -ENODEV; + + *cpu_num = get_cpu(); + + if (se_devices.desc[0].pf_type == OTX_CPT_SE) { + /* + * On OcteonTX platform there is one CPT instruction queue bound + * to each VF. We get maximum performance if one CPT queue + * is available for each cpu otherwise CPT queues need to be + * shared between cpus. + */ + if (*cpu_num >= count) + *cpu_num %= count; + *pdev = se_devices.desc[*cpu_num].dev; + } else { + pr_err("Unknown PF type %d\n", se_devices.desc[0].pf_type); + ret = -EINVAL; + } + put_cpu(); + + return ret; +} + +static inline int validate_hmac_cipher_null(struct otx_cpt_req_info *cpt_req) +{ + struct otx_cpt_req_ctx *rctx; + struct aead_request *req; + struct crypto_aead *tfm; + + req = container_of(cpt_req->areq, struct aead_request, base); + tfm = crypto_aead_reqtfm(req); + rctx = aead_request_ctx(req); + if (memcmp(rctx->fctx.hmac.s.hmac_calc, + rctx->fctx.hmac.s.hmac_recv, + crypto_aead_authsize(tfm)) != 0) + return -EBADMSG; + + return 0; +} + +static void otx_cpt_aead_callback(int status, void *arg1, void *arg2) +{ + struct otx_cpt_info_buffer *cpt_info = arg2; + struct crypto_async_request *areq = arg1; + struct otx_cpt_req_info *cpt_req; + struct pci_dev *pdev; + + cpt_req = cpt_info->req; + if (!status) { + /* + * When selected cipher is NULL we need to manually + * verify whether calculated hmac value matches + * received hmac value + */ + if (cpt_req->req_type == OTX_CPT_AEAD_ENC_DEC_NULL_REQ && + !cpt_req->is_enc) + status = validate_hmac_cipher_null(cpt_req); + } + if (cpt_info) { + pdev = cpt_info->pdev; + do_request_cleanup(pdev, cpt_info); + } + if (areq) + areq->complete(areq, status); +} + +static void output_iv_copyback(struct crypto_async_request *areq) +{ + struct otx_cpt_req_info *req_info; + struct skcipher_request *sreq; + struct crypto_skcipher *stfm; + struct otx_cpt_req_ctx *rctx; + struct otx_cpt_enc_ctx *ctx; + u32 start, ivsize; + + sreq = container_of(areq, struct skcipher_request, base); + stfm = crypto_skcipher_reqtfm(sreq); + ctx = crypto_skcipher_ctx(stfm); + if (ctx->cipher_type == OTX_CPT_AES_CBC || + ctx->cipher_type == OTX_CPT_DES3_CBC) { + rctx = skcipher_request_ctx(sreq); + req_info = &rctx->cpt_req; + ivsize = crypto_skcipher_ivsize(stfm); + start = sreq->cryptlen - ivsize; + + if (req_info->is_enc) { + scatterwalk_map_and_copy(sreq->iv, sreq->dst, start, + ivsize, 0); + } else { + if (sreq->src != sreq->dst) { + scatterwalk_map_and_copy(sreq->iv, sreq->src, + start, ivsize, 0); + } else { + memcpy(sreq->iv, req_info->iv_out, ivsize); + kfree(req_info->iv_out); + } + } + } +} + +static void otx_cpt_skcipher_callback(int status, void *arg1, void *arg2) +{ + struct otx_cpt_info_buffer *cpt_info = arg2; + struct crypto_async_request *areq = arg1; + struct pci_dev *pdev; + + if (areq) { + if (!status) + output_iv_copyback(areq); + if (cpt_info) { + pdev = cpt_info->pdev; + do_request_cleanup(pdev, cpt_info); + } + areq->complete(areq, status); + } +} + +static inline void update_input_data(struct otx_cpt_req_info *req_info, + struct scatterlist *inp_sg, + u32 nbytes, u32 *argcnt) +{ + req_info->req.dlen += nbytes; + + while (nbytes) { + u32 len = min(nbytes, inp_sg->length); + u8 *ptr = sg_virt(inp_sg); + + req_info->in[*argcnt].vptr = (void *)ptr; + req_info->in[*argcnt].size = len; + nbytes -= len; + ++(*argcnt); + inp_sg = sg_next(inp_sg); + } +} + +static inline void update_output_data(struct otx_cpt_req_info *req_info, + struct scatterlist *outp_sg, + u32 offset, u32 nbytes, u32 *argcnt) +{ + req_info->rlen += nbytes; + + while (nbytes) { + u32 len = min(nbytes, outp_sg->length - offset); + u8 *ptr = sg_virt(outp_sg); + + req_info->out[*argcnt].vptr = (void *) (ptr + offset); + req_info->out[*argcnt].size = len; + nbytes -= len; + ++(*argcnt); + offset = 0; + outp_sg = sg_next(outp_sg); + } +} + +static inline u32 create_ctx_hdr(struct skcipher_request *req, u32 enc, + u32 *argcnt) +{ + struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req); + struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + struct crypto_tfm *tfm = crypto_skcipher_tfm(stfm); + struct otx_cpt_enc_ctx *ctx = crypto_tfm_ctx(tfm); + struct otx_cpt_fc_ctx *fctx = &rctx->fctx; + int ivsize = crypto_skcipher_ivsize(stfm); + u32 start = req->cryptlen - ivsize; + u64 *ctrl_flags = NULL; + gfp_t flags; + + flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC; + req_info->ctrl.s.dma_mode = OTX_CPT_DMA_GATHER_SCATTER; + |
