diff options
author | Greg Kroah-Hartman <gregkh@suse.de> | 2011-08-10 08:13:09 -0700 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@suse.de> | 2011-08-10 08:13:09 -0700 |
commit | f08d96e1d3f8d7f884fd31353fbaa0f129142ace (patch) | |
tree | cda23f5038c902dfd30320a63fc9387ee4e04d0e | |
parent | 2d2c590af8fdd1668146e9e7a44f53573df70cb0 (diff) | |
download | stable-queue-f08d96e1d3f8d7f884fd31353fbaa0f129142ace.tar.gz |
3.0 patches
16 files changed, 2499 insertions, 0 deletions
diff --git a/queue-3.0/mpt2sas-fixed-big-indian-issues-on-32-bit-ppc.patch b/queue-3.0/mpt2sas-fixed-big-indian-issues-on-32-bit-ppc.patch new file mode 100644 index 0000000000..c02df22771 --- /dev/null +++ b/queue-3.0/mpt2sas-fixed-big-indian-issues-on-32-bit-ppc.patch @@ -0,0 +1,448 @@ +From c97951ec46d4b076c2236b77db34eeed6dddb8eb Mon Sep 17 00:00:00 2001 +From: "Kashyap, Desai" <kashyap.desai@lsi.com> +Date: Tue, 14 Jun 2011 10:54:56 +0530 +Subject: [SCSI] mpt2sas: Fixed Big Indian Issues on 32 bit PPC + +From: "Kashyap, Desai" <kashyap.desai@lsi.com> + +commit c97951ec46d4b076c2236b77db34eeed6dddb8eb upstream. + +This patch addresses many endian issues solved by runing sparse with the +option __CHECK_ENDIAN__ turned on. + +Signed-off-by: Kashyap Desai <kashyap.desai@lsi.com> +Signed-off-by: James Bottomley <JBottomley@Parallels.com> +Cc: David Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + drivers/scsi/mpt2sas/mpt2sas_base.c | 65 +++++++++++-------------------- + drivers/scsi/mpt2sas/mpt2sas_base.h | 53 +++++++++++++++++++++++-- + drivers/scsi/mpt2sas/mpt2sas_ctl.c | 10 ++-- + drivers/scsi/mpt2sas/mpt2sas_debug.h | 2 + drivers/scsi/mpt2sas/mpt2sas_scsih.c | 12 ++--- + drivers/scsi/mpt2sas/mpt2sas_transport.c | 24 ++++------- + 6 files changed, 96 insertions(+), 70 deletions(-) + +--- a/drivers/scsi/mpt2sas/mpt2sas_base.c ++++ b/drivers/scsi/mpt2sas/mpt2sas_base.c +@@ -94,7 +94,7 @@ module_param(diag_buffer_enable, int, 0) + MODULE_PARM_DESC(diag_buffer_enable, " post diag buffers " + "(TRACE=1/SNAPSHOT=2/EXTENDED=4/default=0)"); + +-int mpt2sas_fwfault_debug; ++static int mpt2sas_fwfault_debug; + MODULE_PARM_DESC(mpt2sas_fwfault_debug, " enable detection of firmware fault " + "and halt firmware - (default=0)"); + +@@ -857,7 +857,7 @@ _base_interrupt(int irq, void *bus_id) + completed_cmds = 0; + cb_idx = 0xFF; + do { +- rd.word = rpf->Words; ++ rd.word = le64_to_cpu(rpf->Words); + if (rd.u.low == UINT_MAX || rd.u.high == UINT_MAX) + goto out; + reply = 0; +@@ -906,7 +906,7 @@ _base_interrupt(int irq, void *bus_id) + + next: + +- rpf->Words = ULLONG_MAX; ++ rpf->Words = cpu_to_le64(ULLONG_MAX); + ioc->reply_post_host_index = (ioc->reply_post_host_index == + (ioc->reply_post_queue_depth - 1)) ? 0 : + ioc->reply_post_host_index + 1; +@@ -1817,7 +1817,9 @@ _base_display_ioc_capabilities(struct MP + char desc[16]; + u8 revision; + u32 iounit_pg1_flags; ++ u32 bios_version; + ++ bios_version = le32_to_cpu(ioc->bios_pg3.BiosVersion); + pci_read_config_byte(ioc->pdev, PCI_CLASS_REVISION, &revision); + strncpy(desc, ioc->manu_pg0.ChipName, 16); + printk(MPT2SAS_INFO_FMT "%s: FWVersion(%02d.%02d.%02d.%02d), " +@@ -1828,10 +1830,10 @@ _base_display_ioc_capabilities(struct MP + (ioc->facts.FWVersion.Word & 0x0000FF00) >> 8, + ioc->facts.FWVersion.Word & 0x000000FF, + revision, +- (ioc->bios_pg3.BiosVersion & 0xFF000000) >> 24, +- (ioc->bios_pg3.BiosVersion & 0x00FF0000) >> 16, +- (ioc->bios_pg3.BiosVersion & 0x0000FF00) >> 8, +- ioc->bios_pg3.BiosVersion & 0x000000FF); ++ (bios_version & 0xFF000000) >> 24, ++ (bios_version & 0x00FF0000) >> 16, ++ (bios_version & 0x0000FF00) >> 8, ++ bios_version & 0x000000FF); + + _base_display_dell_branding(ioc); + _base_display_intel_branding(ioc); +@@ -2150,7 +2152,7 @@ _base_release_memory_pools(struct MPT2SA + static int + _base_allocate_memory_pools(struct MPT2SAS_ADAPTER *ioc, int sleep_flag) + { +- Mpi2IOCFactsReply_t *facts; ++ struct mpt2sas_facts *facts; + u32 queue_size, queue_diff; + u16 max_sge_elements; + u16 num_of_reply_frames; +@@ -2783,7 +2785,7 @@ _base_handshake_req_reply_wait(struct MP + int i; + u8 failed; + u16 dummy; +- u32 *mfp; ++ __le32 *mfp; + + /* make sure doorbell is not in use */ + if ((readl(&ioc->chip->Doorbell) & MPI2_DOORBELL_USED)) { +@@ -2871,7 +2873,7 @@ _base_handshake_req_reply_wait(struct MP + writel(0, &ioc->chip->HostInterruptStatus); + + if (ioc->logging_level & MPT_DEBUG_INIT) { +- mfp = (u32 *)reply; ++ mfp = (__le32 *)reply; + printk(KERN_INFO "\toffset:data\n"); + for (i = 0; i < reply_bytes/4; i++) + printk(KERN_INFO "\t[0x%02x]:%08x\n", i*4, +@@ -3097,7 +3099,8 @@ static int + _base_get_port_facts(struct MPT2SAS_ADAPTER *ioc, int port, int sleep_flag) + { + Mpi2PortFactsRequest_t mpi_request; +- Mpi2PortFactsReply_t mpi_reply, *pfacts; ++ Mpi2PortFactsReply_t mpi_reply; ++ struct mpt2sas_port_facts *pfacts; + int mpi_reply_sz, mpi_request_sz, r; + + dinitprintk(ioc, printk(MPT2SAS_INFO_FMT "%s\n", ioc->name, +@@ -3139,7 +3142,8 @@ static int + _base_get_ioc_facts(struct MPT2SAS_ADAPTER *ioc, int sleep_flag) + { + Mpi2IOCFactsRequest_t mpi_request; +- Mpi2IOCFactsReply_t mpi_reply, *facts; ++ Mpi2IOCFactsReply_t mpi_reply; ++ struct mpt2sas_facts *facts; + int mpi_reply_sz, mpi_request_sz, r; + + dinitprintk(ioc, printk(MPT2SAS_INFO_FMT "%s\n", ioc->name, +@@ -3225,17 +3229,6 @@ _base_send_ioc_init(struct MPT2SAS_ADAPT + mpi_request.MsgVersion = cpu_to_le16(MPI2_VERSION); + mpi_request.HeaderVersion = cpu_to_le16(MPI2_HEADER_VERSION); + +- /* In MPI Revision I (0xA), the SystemReplyFrameSize(offset 0x18) was +- * removed and made reserved. For those with older firmware will need +- * this fix. It was decided that the Reply and Request frame sizes are +- * the same. +- */ +- if ((ioc->facts.HeaderVersion >> 8) < 0xA) { +- mpi_request.Reserved7 = cpu_to_le16(ioc->reply_sz); +-/* mpi_request.SystemReplyFrameSize = +- * cpu_to_le16(ioc->reply_sz); +- */ +- } + + mpi_request.SystemRequestFrameSize = cpu_to_le16(ioc->request_sz/4); + mpi_request.ReplyDescriptorPostQueueDepth = +@@ -3243,25 +3236,17 @@ _base_send_ioc_init(struct MPT2SAS_ADAPT + mpi_request.ReplyFreeQueueDepth = + cpu_to_le16(ioc->reply_free_queue_depth); + +-#if BITS_PER_LONG > 32 + mpi_request.SenseBufferAddressHigh = +- cpu_to_le32(ioc->sense_dma >> 32); ++ cpu_to_le32((u64)ioc->sense_dma >> 32); + mpi_request.SystemReplyAddressHigh = +- cpu_to_le32(ioc->reply_dma >> 32); ++ cpu_to_le32((u64)ioc->reply_dma >> 32); + mpi_request.SystemRequestFrameBaseAddress = +- cpu_to_le64(ioc->request_dma); ++ cpu_to_le64((u64)ioc->request_dma); + mpi_request.ReplyFreeQueueAddress = +- cpu_to_le64(ioc->reply_free_dma); ++ cpu_to_le64((u64)ioc->reply_free_dma); + mpi_request.ReplyDescriptorPostQueueAddress = +- cpu_to_le64(ioc->reply_post_free_dma); +-#else +- mpi_request.SystemRequestFrameBaseAddress = +- cpu_to_le32(ioc->request_dma); +- mpi_request.ReplyFreeQueueAddress = +- cpu_to_le32(ioc->reply_free_dma); +- mpi_request.ReplyDescriptorPostQueueAddress = +- cpu_to_le32(ioc->reply_post_free_dma); +-#endif ++ cpu_to_le64((u64)ioc->reply_post_free_dma); ++ + + /* This time stamp specifies number of milliseconds + * since epoch ~ midnight January 1, 1970. +@@ -3271,10 +3256,10 @@ _base_send_ioc_init(struct MPT2SAS_ADAPT + (current_time.tv_usec / 1000)); + + if (ioc->logging_level & MPT_DEBUG_INIT) { +- u32 *mfp; ++ __le32 *mfp; + int i; + +- mfp = (u32 *)&mpi_request; ++ mfp = (__le32 *)&mpi_request; + printk(KERN_INFO "\toffset:data\n"); + for (i = 0; i < sizeof(Mpi2IOCInitRequest_t)/4; i++) + printk(KERN_INFO "\t[0x%02x]:%08x\n", i*4, +@@ -3759,7 +3744,7 @@ _base_make_ioc_operational(struct MPT2SA + + /* initialize Reply Post Free Queue */ + for (i = 0; i < ioc->reply_post_queue_depth; i++) +- ioc->reply_post_free[i].Words = ULLONG_MAX; ++ ioc->reply_post_free[i].Words = cpu_to_le64(ULLONG_MAX); + + r = _base_send_ioc_init(ioc, sleep_flag); + if (r) +--- a/drivers/scsi/mpt2sas/mpt2sas_base.h ++++ b/drivers/scsi/mpt2sas/mpt2sas_base.h +@@ -541,6 +541,53 @@ struct _tr_list { + + typedef void (*MPT_ADD_SGE)(void *paddr, u32 flags_length, dma_addr_t dma_addr); + ++/* IOC Facts and Port Facts converted from little endian to cpu */ ++union mpi2_version_union { ++ MPI2_VERSION_STRUCT Struct; ++ u32 Word; ++}; ++ ++struct mpt2sas_facts { ++ u16 MsgVersion; ++ u16 HeaderVersion; ++ u8 IOCNumber; ++ u8 VP_ID; ++ u8 VF_ID; ++ u16 IOCExceptions; ++ u16 IOCStatus; ++ u32 IOCLogInfo; ++ u8 MaxChainDepth; ++ u8 WhoInit; ++ u8 NumberOfPorts; ++ u8 MaxMSIxVectors; ++ u16 RequestCredit; ++ u16 ProductID; ++ u32 IOCCapabilities; ++ union mpi2_version_union FWVersion; ++ u16 IOCRequestFrameSize; ++ u16 Reserved3; ++ u16 MaxInitiators; ++ u16 MaxTargets; ++ u16 MaxSasExpanders; ++ u16 MaxEnclosures; ++ u16 ProtocolFlags; ++ u16 HighPriorityCredit; ++ u16 MaxReplyDescriptorPostQueueDepth; ++ u8 ReplyFrameSize; ++ u8 MaxVolumes; ++ u16 MaxDevHandle; ++ u16 MaxPersistentEntries; ++ u16 MinDevHandle; ++}; ++ ++struct mpt2sas_port_facts { ++ u8 PortNumber; ++ u8 VP_ID; ++ u8 VF_ID; ++ u8 PortType; ++ u16 MaxPostedCmdBuffers; ++}; ++ + /** + * struct MPT2SAS_ADAPTER - per adapter struct + * @list: ioc_list +@@ -749,8 +796,8 @@ struct MPT2SAS_ADAPTER { + u32 event_masks[MPI2_EVENT_NOTIFY_EVENTMASK_WORDS]; + + /* static config pages */ +- Mpi2IOCFactsReply_t facts; +- Mpi2PortFactsReply_t *pfacts; ++ struct mpt2sas_facts facts; ++ struct mpt2sas_port_facts *pfacts; + Mpi2ManufacturingPage0_t manu_pg0; + Mpi2BiosPage2_t bios_pg2; + Mpi2BiosPage3_t bios_pg3; +@@ -840,7 +887,7 @@ struct MPT2SAS_ADAPTER { + + /* reply free queue */ + u16 reply_free_queue_depth; +- u32 *reply_free; ++ __le32 *reply_free; + dma_addr_t reply_free_dma; + struct dma_pool *reply_free_dma_pool; + u32 reply_free_host_index; +--- a/drivers/scsi/mpt2sas/mpt2sas_ctl.c ++++ b/drivers/scsi/mpt2sas/mpt2sas_ctl.c +@@ -2706,13 +2706,13 @@ static DEVICE_ATTR(ioc_reset_count, S_IR + _ctl_ioc_reset_count_show, NULL); + + struct DIAG_BUFFER_START { +- u32 Size; +- u32 DiagVersion; ++ __le32 Size; ++ __le32 DiagVersion; + u8 BufferType; + u8 Reserved[3]; +- u32 Reserved1; +- u32 Reserved2; +- u32 Reserved3; ++ __le32 Reserved1; ++ __le32 Reserved2; ++ __le32 Reserved3; + }; + /** + * _ctl_host_trace_buffer_size_show - host buffer size (trace only) +--- a/drivers/scsi/mpt2sas/mpt2sas_debug.h ++++ b/drivers/scsi/mpt2sas/mpt2sas_debug.h +@@ -164,7 +164,7 @@ static inline void + _debug_dump_mf(void *mpi_request, int sz) + { + int i; +- u32 *mfp = (u32 *)mpi_request; ++ __le32 *mfp = (__le32 *)mpi_request; + + printk(KERN_INFO "mf:\n\t"); + for (i = 0; i < sz; i++) { +--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c ++++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c +@@ -1956,7 +1956,7 @@ _scsih_slave_configure(struct scsi_devic + case MPI2_RAID_VOL_TYPE_RAID1E: + qdepth = MPT2SAS_RAID_QUEUE_DEPTH; + if (ioc->manu_pg10.OEMIdentifier && +- (ioc->manu_pg10.GenericFlags0 & ++ (le32_to_cpu(ioc->manu_pg10.GenericFlags0) & + MFG10_GF0_R10_DISPLAY) && + !(raid_device->num_pds % 2)) + r_level = "RAID10"; +@@ -4598,7 +4598,7 @@ _scsih_expander_add(struct MPT2SAS_ADAPT + Mpi2SasEnclosurePage0_t enclosure_pg0; + u32 ioc_status; + u16 parent_handle; +- __le64 sas_address, sas_address_parent = 0; ++ u64 sas_address, sas_address_parent = 0; + int i; + unsigned long flags; + struct _sas_port *mpt2sas_port = NULL; +@@ -5404,7 +5404,7 @@ _scsih_sas_device_status_change_event(st + { + struct MPT2SAS_TARGET *target_priv_data; + struct _sas_device *sas_device; +- __le64 sas_address; ++ u64 sas_address; + unsigned long flags; + Mpi2EventDataSasDeviceStatusChange_t *event_data = + fw_event->event_data; +@@ -6566,7 +6566,7 @@ _scsih_search_responding_expanders(struc + Mpi2ExpanderPage0_t expander_pg0; + Mpi2ConfigReply_t mpi_reply; + u16 ioc_status; +- __le64 sas_address; ++ u64 sas_address; + u16 handle; + + printk(MPT2SAS_INFO_FMT "%s\n", ioc->name, __func__); +@@ -7505,7 +7505,7 @@ _scsih_suspend(struct pci_dev *pdev, pm_ + { + struct Scsi_Host *shost = pci_get_drvdata(pdev); + struct MPT2SAS_ADAPTER *ioc = shost_priv(shost); +- u32 device_state; ++ pci_power_t device_state; + + mpt2sas_base_stop_watchdog(ioc); + scsi_block_requests(shost); +@@ -7532,7 +7532,7 @@ _scsih_resume(struct pci_dev *pdev) + { + struct Scsi_Host *shost = pci_get_drvdata(pdev); + struct MPT2SAS_ADAPTER *ioc = shost_priv(shost); +- u32 device_state = pdev->current_state; ++ pci_power_t device_state = pdev->current_state; + int r; + + printk(MPT2SAS_INFO_FMT "pdev=0x%p, slot=%s, previous " +--- a/drivers/scsi/mpt2sas/mpt2sas_transport.c ++++ b/drivers/scsi/mpt2sas/mpt2sas_transport.c +@@ -299,7 +299,6 @@ _transport_expander_report_manufacture(s + void *data_out = NULL; + dma_addr_t data_out_dma; + u32 sz; +- u64 *sas_address_le; + u16 wait_state_count; + + if (ioc->shost_recovery || ioc->pci_error_recovery) { +@@ -372,8 +371,7 @@ _transport_expander_report_manufacture(s + mpi_request->PhysicalPort = 0xFF; + mpi_request->VF_ID = 0; /* TODO */ + mpi_request->VP_ID = 0; +- sas_address_le = (u64 *)&mpi_request->SASAddress; +- *sas_address_le = cpu_to_le64(sas_address); ++ mpi_request->SASAddress = cpu_to_le64(sas_address); + mpi_request->RequestDataLength = + cpu_to_le16(sizeof(struct rep_manu_request)); + psge = &mpi_request->SGL; +@@ -1049,14 +1047,14 @@ struct phy_error_log_reply{ + u8 function; /* 0x11 */ + u8 function_result; + u8 response_length; +- u16 expander_change_count; ++ __be16 expander_change_count; + u8 reserved_1[3]; + u8 phy_identifier; + u8 reserved_2[2]; +- u32 invalid_dword; +- u32 running_disparity_error; +- u32 loss_of_dword_sync; +- u32 phy_reset_problem; ++ __be32 invalid_dword; ++ __be32 running_disparity_error; ++ __be32 loss_of_dword_sync; ++ __be32 phy_reset_problem; + }; + + /** +@@ -1085,7 +1083,6 @@ _transport_get_expander_phy_error_log(st + void *data_out = NULL; + dma_addr_t data_out_dma; + u32 sz; +- u64 *sas_address_le; + u16 wait_state_count; + + if (ioc->shost_recovery || ioc->pci_error_recovery) { +@@ -1160,8 +1157,7 @@ _transport_get_expander_phy_error_log(st + mpi_request->PhysicalPort = 0xFF; + mpi_request->VF_ID = 0; /* TODO */ + mpi_request->VP_ID = 0; +- sas_address_le = (u64 *)&mpi_request->SASAddress; +- *sas_address_le = cpu_to_le64(phy->identify.sas_address); ++ mpi_request->SASAddress = cpu_to_le64(phy->identify.sas_address); + mpi_request->RequestDataLength = + cpu_to_le16(sizeof(struct phy_error_log_request)); + psge = &mpi_request->SGL; +@@ -1406,7 +1402,6 @@ _transport_expander_phy_control(struct M + void *data_out = NULL; + dma_addr_t data_out_dma; + u32 sz; +- u64 *sas_address_le; + u16 wait_state_count; + + if (ioc->shost_recovery) { +@@ -1486,8 +1481,7 @@ _transport_expander_phy_control(struct M + mpi_request->PhysicalPort = 0xFF; + mpi_request->VF_ID = 0; /* TODO */ + mpi_request->VP_ID = 0; +- sas_address_le = (u64 *)&mpi_request->SASAddress; +- *sas_address_le = cpu_to_le64(phy->identify.sas_address); ++ mpi_request->SASAddress = cpu_to_le64(phy->identify.sas_address); + mpi_request->RequestDataLength = + cpu_to_le16(sizeof(struct phy_error_log_request)); + psge = &mpi_request->SGL; +@@ -1914,7 +1908,7 @@ _transport_smp_handler(struct Scsi_Host + mpi_request->PhysicalPort = 0xFF; + mpi_request->VF_ID = 0; /* TODO */ + mpi_request->VP_ID = 0; +- *((u64 *)&mpi_request->SASAddress) = (rphy) ? ++ mpi_request->SASAddress = (rphy) ? + cpu_to_le64(rphy->identify.sas_address) : + cpu_to_le64(ioc->sas_hba.sas_address); + mpi_request->RequestDataLength = cpu_to_le16(blk_rq_bytes(req) - 4); diff --git a/queue-3.0/series b/queue-3.0/series index a8aa60c4f6..f129241a16 100644 --- a/queue-3.0/series +++ b/queue-3.0/series @@ -59,3 +59,18 @@ net-allow-netif_carrier-to-be-called-safely-from-irq.patch ipv4-use-rt_tos-after-some-rt_tos-conversions.patch gre-fix-improper-error-handling.patch iwlagn-5000-do-not-support-idle-mode.patch +mpt2sas-fixed-big-indian-issues-on-32-bit-ppc.patch +sparc-don-t-leave-sparc_pmu_type-null-on-sun4v.patch +sparc-add-t3-sun4v-cpu-type-and-hypervisor-group-defines.patch +sparc-don-t-do-expensive-hypervisor-pcr-write-unless-necessary.patch +sparc-detect-and-handle-ultrasparc-t3-cpu-types.patch +sparc-sanitize-cpu-feature-detection-and-reporting.patch +sparc-minor-tweaks-to-niagara-page-copy-clear.patch +sparc-use-popc-if-possible-for-hweight-routines.patch +sparc-use-hweight64-in-popc-emulation.patch +sparc-add-some-missing-hypervisor-api-groups.patch +sparc-set-reboot-cmd-using-reboot-data-hypervisor-call-if-available.patch +sparc-use-popc-when-possible-for-ffs-__ffs-ffz.patch +sparc-access-kernel-tsb-using-physical-addressing-when-possible.patch +sparc-size-mondo-queues-more-sanely.patch +sparc-fix-build-with-debug_pagealloc-enabled.patch diff --git a/queue-3.0/sparc-access-kernel-tsb-using-physical-addressing-when-possible.patch b/queue-3.0/sparc-access-kernel-tsb-using-physical-addressing-when-possible.patch new file mode 100644 index 0000000000..a957dd2004 --- /dev/null +++ b/queue-3.0/sparc-access-kernel-tsb-using-physical-addressing-when-possible.patch @@ -0,0 +1,265 @@ +From 417046ec3d4835e8c0a34677f3fb3ec215b03746 Mon Sep 17 00:00:00 2001 +From: "David S. Miller" <davem@davemloft.net> +Date: Fri, 5 Aug 2011 00:53:57 -0700 +Subject: sparc: Access kernel TSB using physical addressing when possible. + + +From: "David S. Miller" <davem@davemloft.net> + +[ Upstream commit 9076d0e7e02b98f7a65df10d1956326c8d8ba61a ] + +On sun4v this is basically required since we point the hypervisor and +the TSB walking hardware at these tables using physical addressing +too. + +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + arch/sparc/include/asm/tsb.h | 51 ++++++++++++++++++---------------------- + arch/sparc/kernel/ktlb.S | 24 +++++++++--------- + arch/sparc/kernel/vmlinux.lds.S | 10 +++++++ + arch/sparc/mm/init_64.c | 40 ++++++++++++++++++++++++++++++- + 4 files changed, 85 insertions(+), 40 deletions(-) + +--- a/arch/sparc/include/asm/tsb.h ++++ b/arch/sparc/include/asm/tsb.h +@@ -133,29 +133,6 @@ extern struct tsb_phys_patch_entry __tsb + sub TSB, 0x8, TSB; \ + TSB_STORE(TSB, TAG); + +-#define KTSB_LOAD_QUAD(TSB, REG) \ +- ldda [TSB] ASI_NUCLEUS_QUAD_LDD, REG; +- +-#define KTSB_STORE(ADDR, VAL) \ +- stxa VAL, [ADDR] ASI_N; +- +-#define KTSB_LOCK_TAG(TSB, REG1, REG2) \ +-99: lduwa [TSB] ASI_N, REG1; \ +- sethi %hi(TSB_TAG_LOCK_HIGH), REG2;\ +- andcc REG1, REG2, %g0; \ +- bne,pn %icc, 99b; \ +- nop; \ +- casa [TSB] ASI_N, REG1, REG2;\ +- cmp REG1, REG2; \ +- bne,pn %icc, 99b; \ +- nop; \ +- +-#define KTSB_WRITE(TSB, TTE, TAG) \ +- add TSB, 0x8, TSB; \ +- stxa TTE, [TSB] ASI_N; \ +- sub TSB, 0x8, TSB; \ +- stxa TAG, [TSB] ASI_N; +- + /* Do a kernel page table walk. Leaves physical PTE pointer in + * REG1. Jumps to FAIL_LABEL on early page table walk termination. + * VADDR will not be clobbered, but REG2 will. +@@ -239,6 +216,8 @@ extern struct tsb_phys_patch_entry __tsb + (KERNEL_TSB_SIZE_BYTES / 16) + #define KERNEL_TSB4M_NENTRIES 4096 + ++#define KTSB_PHYS_SHIFT 15 ++ + /* Do a kernel TSB lookup at tl>0 on VADDR+TAG, branch to OK_LABEL + * on TSB hit. REG1, REG2, REG3, and REG4 are used as temporaries + * and the found TTE will be left in REG1. REG3 and REG4 must +@@ -247,13 +226,22 @@ extern struct tsb_phys_patch_entry __tsb + * VADDR and TAG will be preserved and not clobbered by this macro. + */ + #define KERN_TSB_LOOKUP_TL1(VADDR, TAG, REG1, REG2, REG3, REG4, OK_LABEL) \ +- sethi %hi(swapper_tsb), REG1; \ ++661: sethi %hi(swapper_tsb), REG1; \ + or REG1, %lo(swapper_tsb), REG1; \ ++ .section .swapper_tsb_phys_patch, "ax"; \ ++ .word 661b; \ ++ .previous; \ ++661: nop; \ ++ .section .tsb_ldquad_phys_patch, "ax"; \ ++ .word 661b; \ ++ sllx REG1, KTSB_PHYS_SHIFT, REG1; \ ++ sllx REG1, KTSB_PHYS_SHIFT, REG1; \ ++ .previous; \ + srlx VADDR, PAGE_SHIFT, REG2; \ + and REG2, (KERNEL_TSB_NENTRIES - 1), REG2; \ + sllx REG2, 4, REG2; \ + add REG1, REG2, REG2; \ +- KTSB_LOAD_QUAD(REG2, REG3); \ ++ TSB_LOAD_QUAD(REG2, REG3); \ + cmp REG3, TAG; \ + be,a,pt %xcc, OK_LABEL; \ + mov REG4, REG1; +@@ -263,12 +251,21 @@ extern struct tsb_phys_patch_entry __tsb + * we can make use of that for the index computation. + */ + #define KERN_TSB4M_LOOKUP_TL1(TAG, REG1, REG2, REG3, REG4, OK_LABEL) \ +- sethi %hi(swapper_4m_tsb), REG1; \ ++661: sethi %hi(swapper_4m_tsb), REG1; \ + or REG1, %lo(swapper_4m_tsb), REG1; \ ++ .section .swapper_4m_tsb_phys_patch, "ax"; \ ++ .word 661b; \ ++ .previous; \ ++661: nop; \ ++ .section .tsb_ldquad_phys_patch, "ax"; \ ++ .word 661b; \ ++ sllx REG1, KTSB_PHYS_SHIFT, REG1; \ ++ sllx REG1, KTSB_PHYS_SHIFT, REG1; \ ++ .previous; \ + and TAG, (KERNEL_TSB4M_NENTRIES - 1), REG2; \ + sllx REG2, 4, REG2; \ + add REG1, REG2, REG2; \ +- KTSB_LOAD_QUAD(REG2, REG3); \ ++ TSB_LOAD_QUAD(REG2, REG3); \ + cmp REG3, TAG; \ + be,a,pt %xcc, OK_LABEL; \ + mov REG4, REG1; +--- a/arch/sparc/kernel/ktlb.S ++++ b/arch/sparc/kernel/ktlb.S +@@ -47,16 +47,16 @@ kvmap_itlb_tsb_miss: + kvmap_itlb_vmalloc_addr: + KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath) + +- KTSB_LOCK_TAG(%g1, %g2, %g7) ++ TSB_LOCK_TAG(%g1, %g2, %g7) + + /* Load and check PTE. */ + ldxa [%g5] ASI_PHYS_USE_EC, %g5 + mov 1, %g7 + sllx %g7, TSB_TAG_INVALID_BIT, %g7 + brgez,a,pn %g5, kvmap_itlb_longpath +- KTSB_STORE(%g1, %g7) ++ TSB_STORE(%g1, %g7) + +- KTSB_WRITE(%g1, %g5, %g6) ++ TSB_WRITE(%g1, %g5, %g6) + + /* fallthrough to TLB load */ + +@@ -102,9 +102,9 @@ kvmap_itlb_longpath: + kvmap_itlb_obp: + OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_itlb_longpath) + +- KTSB_LOCK_TAG(%g1, %g2, %g7) ++ TSB_LOCK_TAG(%g1, %g2, %g7) + +- KTSB_WRITE(%g1, %g5, %g6) ++ TSB_WRITE(%g1, %g5, %g6) + + ba,pt %xcc, kvmap_itlb_load + nop +@@ -112,17 +112,17 @@ kvmap_itlb_obp: + kvmap_dtlb_obp: + OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_dtlb_longpath) + +- KTSB_LOCK_TAG(%g1, %g2, %g7) ++ TSB_LOCK_TAG(%g1, %g2, %g7) + +- KTSB_WRITE(%g1, %g5, %g6) ++ TSB_WRITE(%g1, %g5, %g6) + + ba,pt %xcc, kvmap_dtlb_load + nop + + .align 32 + kvmap_dtlb_tsb4m_load: +- KTSB_LOCK_TAG(%g1, %g2, %g7) +- KTSB_WRITE(%g1, %g5, %g6) ++ TSB_LOCK_TAG(%g1, %g2, %g7) ++ TSB_WRITE(%g1, %g5, %g6) + ba,pt %xcc, kvmap_dtlb_load + nop + +@@ -222,16 +222,16 @@ kvmap_linear_patch: + kvmap_dtlb_vmalloc_addr: + KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath) + +- KTSB_LOCK_TAG(%g1, %g2, %g7) ++ TSB_LOCK_TAG(%g1, %g2, %g7) + + /* Load and check PTE. */ + ldxa [%g5] ASI_PHYS_USE_EC, %g5 + mov 1, %g7 + sllx %g7, TSB_TAG_INVALID_BIT, %g7 + brgez,a,pn %g5, kvmap_dtlb_longpath +- KTSB_STORE(%g1, %g7) ++ TSB_STORE(%g1, %g7) + +- KTSB_WRITE(%g1, %g5, %g6) ++ TSB_WRITE(%g1, %g5, %g6) + + /* fallthrough to TLB load */ + +--- a/arch/sparc/kernel/vmlinux.lds.S ++++ b/arch/sparc/kernel/vmlinux.lds.S +@@ -107,6 +107,16 @@ SECTIONS + *(.sun4v_2insn_patch) + __sun4v_2insn_patch_end = .; + } ++ .swapper_tsb_phys_patch : { ++ __swapper_tsb_phys_patch = .; ++ *(.swapper_tsb_phys_patch) ++ __swapper_tsb_phys_patch_end = .; ++ } ++ .swapper_4m_tsb_phys_patch : { ++ __swapper_4m_tsb_phys_patch = .; ++ *(.swapper_4m_tsb_phys_patch) ++ __swapper_4m_tsb_phys_patch_end = .; ++ } + .popc_3insn_patch : { + __popc_3insn_patch = .; + *(.popc_3insn_patch) +--- a/arch/sparc/mm/init_64.c ++++ b/arch/sparc/mm/init_64.c +@@ -1597,6 +1597,42 @@ static void __init tsb_phys_patch(void) + static struct hv_tsb_descr ktsb_descr[NUM_KTSB_DESCR]; + extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; + ++static void patch_one_ktsb_phys(unsigned int *start, unsigned int *end, unsigned long pa) ++{ ++ pa >>= KTSB_PHYS_SHIFT; ++ ++ while (start < end) { ++ unsigned int *ia = (unsigned int *)(unsigned long)*start; ++ ++ ia[0] = (ia[0] & ~0x3fffff) | (pa >> 10); ++ __asm__ __volatile__("flush %0" : : "r" (ia)); ++ ++ ia[1] = (ia[1] & ~0x3ff) | (pa & 0x3ff); ++ __asm__ __volatile__("flush %0" : : "r" (ia + 1)); ++ ++ start++; ++ } ++} ++ ++static void ktsb_phys_patch(void) ++{ ++ extern unsigned int __swapper_tsb_phys_patch; ++ extern unsigned int __swapper_tsb_phys_patch_end; ++ extern unsigned int __swapper_4m_tsb_phys_patch; ++ extern unsigned int __swapper_4m_tsb_phys_patch_end; ++ unsigned long ktsb_pa; ++ ++ ktsb_pa = kern_base + ((unsigned long)&swapper_tsb[0] - KERNBASE); ++ patch_one_ktsb_phys(&__swapper_tsb_phys_patch, ++ &__swapper_tsb_phys_patch_end, ktsb_pa); ++#ifndef CONFIG_DEBUG_PAGEALLOC ++ ktsb_pa = (kern_base + ++ ((unsigned long)&swapper_4m_tsb[0] - KERNBASE)); ++ patch_one_ktsb_phys(&__swapper_4m_tsb_phys_patch, ++ &__swapper_4m_tsb_phys_patch_end, ktsb_pa); ++#endif ++} ++ + static void __init sun4v_ktsb_init(void) + { + unsigned long ktsb_pa; +@@ -1716,8 +1752,10 @@ void __init paging_init(void) + sun4u_pgprot_init(); + + if (tlb_type == cheetah_plus || +- tlb_type == hypervisor) ++ tlb_type == hypervisor) { + tsb_phys_patch(); ++ ktsb_phys_patch(); ++ } + + if (tlb_type == hypervisor) { + sun4v_patch_tlb_handlers(); diff --git a/queue-3.0/sparc-add-some-missing-hypervisor-api-groups.patch b/queue-3.0/sparc-add-some-missing-hypervisor-api-groups.patch new file mode 100644 index 0000000000..46588588a5 --- /dev/null +++ b/queue-3.0/sparc-add-some-missing-hypervisor-api-groups.patch @@ -0,0 +1,57 @@ +From afbae3010f12ace884298a2b51f3a270cd0a0060 Mon Sep 17 00:00:00 2001 +From: "David S. Miller" <davem@davemloft.net> +Date: Mon, 1 Aug 2011 22:45:18 -0700 +Subject: sparc: Add some missing hypervisor API groups. + + +From: "David S. Miller" <davem@davemloft.net> + +[ Upstream commit e2eb9f8158ead43a88c0f0b4d74257b1be938a18 ] + +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + arch/sparc/include/asm/hypervisor.h | 6 ++++++ + arch/sparc/kernel/hvapi.c | 6 ++++++ + 2 files changed, 12 insertions(+) + +--- a/arch/sparc/include/asm/hypervisor.h ++++ b/arch/sparc/include/asm/hypervisor.h +@@ -2940,11 +2940,17 @@ extern unsigned long sun4v_ncs_request(u + #define HV_GRP_CORE 0x0001 + #define HV_GRP_INTR 0x0002 + #define HV_GRP_SOFT_STATE 0x0003 ++#define HV_GRP_TM 0x0080 + #define HV_GRP_PCI 0x0100 + #define HV_GRP_LDOM 0x0101 + #define HV_GRP_SVC_CHAN 0x0102 + #define HV_GRP_NCS 0x0103 + #define HV_GRP_RNG 0x0104 ++#define HV_GRP_PBOOT 0x0105 ++#define HV_GRP_TPM 0x0107 ++#define HV_GRP_SDIO 0x0108 ++#define HV_GRP_SDIO_ERR 0x0109 ++#define HV_GRP_REBOOT_DATA 0x0110 + #define HV_GRP_NIAG_PERF 0x0200 + #define HV_GRP_FIRE_PERF 0x0201 + #define HV_GRP_N2_CPU 0x0202 +--- a/arch/sparc/kernel/hvapi.c ++++ b/arch/sparc/kernel/hvapi.c +@@ -28,11 +28,17 @@ static struct api_info api_table[] = { + { .group = HV_GRP_CORE, .flags = FLAG_PRE_API }, + { .group = HV_GRP_INTR, }, + { .group = HV_GRP_SOFT_STATE, }, ++ { .group = HV_GRP_TM, }, + { .group = HV_GRP_PCI, .flags = FLAG_PRE_API }, + { .group = HV_GRP_LDOM, }, + { .group = HV_GRP_SVC_CHAN, .flags = FLAG_PRE_API }, + { .group = HV_GRP_NCS, .flags = FLAG_PRE_API }, + { .group = HV_GRP_RNG, }, ++ { .group = HV_GRP_PBOOT, }, ++ { .group = HV_GRP_TPM, }, ++ { .group = HV_GRP_SDIO, }, ++ { .group = HV_GRP_SDIO_ERR, }, ++ { .group = HV_GRP_REBOOT_DATA, }, + { .group = HV_GRP_NIAG_PERF, .flags = FLAG_PRE_API }, + { .group = HV_GRP_FIRE_PERF, }, + { .group = HV_GRP_N2_CPU, }, diff --git a/queue-3.0/sparc-add-t3-sun4v-cpu-type-and-hypervisor-group-defines.patch b/queue-3.0/sparc-add-t3-sun4v-cpu-type-and-hypervisor-group-defines.patch new file mode 100644 index 0000000000..364d275e7b --- /dev/null +++ b/queue-3.0/sparc-add-t3-sun4v-cpu-type-and-hypervisor-group-defines.patch @@ -0,0 +1,48 @@ +From 969148a89326685ee6776bb7bdb5d2d673ae1ca5 Mon Sep 17 00:00:00 2001 +From: "David S. Miller" <davem@davemloft.net> +Date: Wed, 27 Jul 2011 20:42:51 -0700 +Subject: sparc: Add T3 sun4v cpu type and hypervisor group defines. + + +From: "David S. Miller" <davem@davemloft.net> + +[ Upstream commit 15e3608d7c273947dbf2eadbcaa66e51143928fb ] + +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + arch/sparc/include/asm/hypervisor.h | 1 + + arch/sparc/include/asm/spitfire.h | 1 + + arch/sparc/kernel/hvapi.c | 1 + + 3 files changed, 3 insertions(+) + +--- a/arch/sparc/include/asm/hypervisor.h ++++ b/arch/sparc/include/asm/hypervisor.h +@@ -2950,6 +2950,7 @@ extern unsigned long sun4v_ncs_request(u + #define HV_GRP_N2_CPU 0x0202 + #define HV_GRP_NIU 0x0204 + #define HV_GRP_VF_CPU 0x0205 ++#define HV_GRP_KT_CPU 0x0209 + #define HV_GRP_DIAG 0x0300 + + #ifndef __ASSEMBLY__ +--- a/arch/sparc/include/asm/spitfire.h ++++ b/arch/sparc/include/asm/spitfire.h +@@ -42,6 +42,7 @@ + #define SUN4V_CHIP_INVALID 0x00 + #define SUN4V_CHIP_NIAGARA1 0x01 + #define SUN4V_CHIP_NIAGARA2 0x02 ++#define SUN4V_CHIP_NIAGARA3 0x03 + #define SUN4V_CHIP_UNKNOWN 0xff + + #ifndef __ASSEMBLY__ +--- a/arch/sparc/kernel/hvapi.c ++++ b/arch/sparc/kernel/hvapi.c +@@ -38,6 +38,7 @@ static struct api_info api_table[] = { + { .group = HV_GRP_N2_CPU, }, + { .group = HV_GRP_NIU, }, + { .group = HV_GRP_VF_CPU, }, ++ { .group = HV_GRP_KT_CPU, }, + { .group = HV_GRP_DIAG, .flags = FLAG_PRE_API }, + }; + diff --git a/queue-3.0/sparc-detect-and-handle-ultrasparc-t3-cpu-types.patch b/queue-3.0/sparc-detect-and-handle-ultrasparc-t3-cpu-types.patch new file mode 100644 index 0000000000..3e1736d508 --- /dev/null +++ b/queue-3.0/sparc-detect-and-handle-ultrasparc-t3-cpu-types.patch @@ -0,0 +1,199 @@ +From 5d82d277c6102ec5055f56d8c533dca4748232bf Mon Sep 17 00:00:00 2001 +From: "David S. Miller" <davem@davemloft.net> +Date: Wed, 27 Jul 2011 21:06:16 -0700 +Subject: sparc: Detect and handle UltraSPARC-T3 cpu types. + + +From: "David S. Miller" <davem@davemloft.net> + +[ Upstream commit 4ba991d3eb379fbaa22049e7002341e97a673685 ] + +The cpu compatible string we look for is "SPARC-T3". + +As far as memset/memcpy optimizations go, we treat this chip the same +as Niagara-T2/T2+. Use cache initializing stores for memset, and use +perfetch, FPU block loads, cache initializing stores, and block stores +for copies. + +We use the Niagara-T2 perf support, since T3 is a close relative in +this regard. Later we'll add support for the new events T3 can +report, plus enable T3's new "sample" mode. + +For now I haven't added any new ELF hwcap flags. We probably need +to add a couple, for example: + +T2 and T3 both support the population count instruction in hardware. + +T3 supports VIS3 instructions, including support (finally) for +partitioned shift. One can also now move directly between float +and integer registers. + +T3 supports instructions meant to help with Galois Field and other HPC +calculations, such as XOR multiply. Also there are "OP and negate" +instructions, for example "fnmul" which is multiply-and-negate. + +T3 recognizes the transactional memory opcodes, however since +transactional memory isn't supported: 1) 'commit' behaves as a NOP and +2) 'chkpt' always branches 3) 'rdcps' returns all zeros and 4) 'wrcps' +behaves as a NOP. + +So we'll need about 3 new elf capability flags in the end to represent +all of these things. + +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + arch/sparc/include/asm/elf_64.h | 6 ++++-- + arch/sparc/include/asm/xor_64.h | 3 ++- + arch/sparc/kernel/cpu.c | 6 ++++++ + arch/sparc/kernel/cpumap.c | 1 + + arch/sparc/kernel/head_64.S | 31 +++++++++++++++++++++++++++++++ + arch/sparc/kernel/pcr.c | 4 ++++ + arch/sparc/kernel/perf_event.c | 3 ++- + 7 files changed, 50 insertions(+), 4 deletions(-) + +--- a/arch/sparc/include/asm/elf_64.h ++++ b/arch/sparc/include/asm/elf_64.h +@@ -177,9 +177,11 @@ static inline unsigned int sparc64_elf_h + cap |= HWCAP_SPARC_ULTRA3; + else if (tlb_type == hypervisor) { + if (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 || +- sun4v_chip_type == SUN4V_CHIP_NIAGARA2) ++ sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || ++ sun4v_chip_type == SUN4V_CHIP_NIAGARA3) + cap |= HWCAP_SPARC_BLKINIT; +- if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2) ++ if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || ++ sun4v_chip_type == SUN4V_CHIP_NIAGARA3) + cap |= HWCAP_SPARC_N2; + } + +--- a/arch/sparc/include/asm/xor_64.h ++++ b/arch/sparc/include/asm/xor_64.h +@@ -65,6 +65,7 @@ static struct xor_block_template xor_blo + #define XOR_SELECT_TEMPLATE(FASTEST) \ + ((tlb_type == hypervisor && \ + (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 || \ +- sun4v_chip_type == SUN4V_CHIP_NIAGARA2)) ? \ ++ sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || \ ++ sun4v_chip_type == SUN4V_CHIP_NIAGARA3)) ? \ + &xor_block_niagara : \ + &xor_block_VIS) +--- a/arch/sparc/kernel/cpu.c ++++ b/arch/sparc/kernel/cpu.c +@@ -474,6 +474,12 @@ static void __init sun4v_cpu_probe(void) + sparc_pmu_type = "niagara2"; + break; + ++ case SUN4V_CHIP_NIAGARA3: ++ sparc_cpu_type = "UltraSparc T3 (Niagara3)"; ++ sparc_fpu_type = "UltraSparc T3 integrated FPU"; ++ sparc_pmu_type = "niagara3"; ++ break; ++ + default: + printk(KERN_WARNING "CPU: Unknown sun4v cpu type [%s]\n", + prom_cpu_compatible); +--- a/arch/sparc/kernel/cpumap.c ++++ b/arch/sparc/kernel/cpumap.c +@@ -324,6 +324,7 @@ static int iterate_cpu(struct cpuinfo_tr + switch (sun4v_chip_type) { + case SUN4V_CHIP_NIAGARA1: + case SUN4V_CHIP_NIAGARA2: ++ case SUN4V_CHIP_NIAGARA3: + rover_inc_table = niagara_iterate_method; + break; + default: +--- a/arch/sparc/kernel/head_64.S ++++ b/arch/sparc/kernel/head_64.S +@@ -132,6 +132,8 @@ prom_sun4v_name: + .asciz "sun4v" + prom_niagara_prefix: + .asciz "SUNW,UltraSPARC-T" ++prom_sparc_prefix: ++ .asciz "SPARC-T" + .align 4 + prom_root_compatible: + .skip 64 +@@ -382,6 +384,22 @@ sun4v_chip_type: + 90: ldub [%g7], %g2 + ldub [%g1], %g4 + cmp %g2, %g4 ++ bne,pn %icc, 89f ++ add %g7, 1, %g7 ++ subcc %g3, 1, %g3 ++ bne,pt %xcc, 90b ++ add %g1, 1, %g1 ++ ba,pt %xcc, 91f ++ nop ++ ++89: sethi %hi(prom_cpu_compatible), %g1 ++ or %g1, %lo(prom_cpu_compatible), %g1 ++ sethi %hi(prom_sparc_prefix), %g7 ++ or %g7, %lo(prom_sparc_prefix), %g7 ++ mov 7, %g3 ++90: ldub [%g7], %g2 ++ ldub [%g1], %g4 ++ cmp %g2, %g4 + bne,pn %icc, 4f + add %g7, 1, %g7 + subcc %g3, 1, %g3 +@@ -390,6 +408,15 @@ sun4v_chip_type: + + sethi %hi(prom_cpu_compatible), %g1 + or %g1, %lo(prom_cpu_compatible), %g1 ++ ldub [%g1 + 7], %g2 ++ cmp %g2, '3' ++ be,pt %xcc, 5f ++ mov SUN4V_CHIP_NIAGARA3, %g4 ++ ba,pt %xcc, 4f ++ nop ++ ++91: sethi %hi(prom_cpu_compatible), %g1 ++ or %g1, %lo(prom_cpu_compatible), %g1 + ldub [%g1 + 17], %g2 + cmp %g2, '1' + be,pt %xcc, 5f +@@ -397,6 +424,7 @@ sun4v_chip_type: + cmp %g2, '2' + be,pt %xcc, 5f + mov SUN4V_CHIP_NIAGARA2, %g4 ++ + 4: + mov SUN4V_CHIP_UNKNOWN, %g4 + 5: sethi %hi(sun4v_chip_type), %g2 +@@ -514,6 +542,9 @@ niagara_tlb_fixup: + cmp %g1, SUN4V_CHIP_NIAGARA2 + be,pt %xcc, niagara2_patch + nop ++ cmp %g1, SUN4V_CHIP_NIAGARA3 ++ be,pt %xcc, niagara2_patch ++ nop + + call generic_patch_copyops + nop +--- a/arch/sparc/kernel/pcr.c ++++ b/arch/sparc/kernel/pcr.c +@@ -109,6 +109,10 @@ static int __init register_perf_hsvc(voi + perf_hsvc_group = HV_GRP_N2_CPU; + break; + ++ case SUN4V_CHIP_NIAGARA3: ++ perf_hsvc_group = HV_GRP_KT_CPU; ++ break; ++ + default: + return -ENODEV; + } +--- a/arch/sparc/kernel/perf_event.c ++++ b/arch/sparc/kernel/perf_event.c +@@ -1301,7 +1301,8 @@ static bool __init supported_pmu(void) + sparc_pmu = &niagara1_pmu; + return true; + } +- if (!strcmp(sparc_pmu_type, "niagara2")) { ++ if (!strcmp(sparc_pmu_type, "niagara2") || ++ !strcmp(sparc_pmu_type, "niagara3")) { + sparc_pmu = &niagara2_pmu; + return true; + } diff --git a/queue-3.0/sparc-don-t-do-expensive-hypervisor-pcr-write-unless-necessary.patch b/queue-3.0/sparc-don-t-do-expensive-hypervisor-pcr-write-unless-necessary.patch new file mode 100644 index 0000000000..bc827f0dbc --- /dev/null +++ b/queue-3.0/sparc-don-t-do-expensive-hypervisor-pcr-write-unless-necessary.patch @@ -0,0 +1,38 @@ +From 25d49bec974117ae1ddb5cd5bfaae549281abea9 Mon Sep 17 00:00:00 2001 +From: "David S. Miller" <davem@davemloft.net> +Date: Wed, 27 Jul 2011 20:46:25 -0700 +Subject: sparc: Don't do expensive hypervisor PCR write unless necessary. + + +From: "David S. Miller" <davem@davemloft.net> + +[ Upstream commit 314ff52727fe94dfbe07f3a9a489ab3ca8d8df5a ] + +The hypervisor call is only necessary if hypervisor events are +being requested. + +So if we're not tracking hypervisor events, simply do a direct +register write. + +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + arch/sparc/kernel/pcr.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/arch/sparc/kernel/pcr.c ++++ b/arch/sparc/kernel/pcr.c +@@ -80,8 +80,11 @@ static void n2_pcr_write(u64 val) + { + unsigned long ret; + +- ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val); +- if (ret != HV_EOK) ++ if (val & PCR_N2_HTRACE) { ++ ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val); ++ if (ret != HV_EOK) ++ write_pcr(val); ++ } else + write_pcr(val); + } + diff --git a/queue-3.0/sparc-don-t-leave-sparc_pmu_type-null-on-sun4v.patch b/queue-3.0/sparc-don-t-leave-sparc_pmu_type-null-on-sun4v.patch new file mode 100644 index 0000000000..6c330bc3e1 --- /dev/null +++ b/queue-3.0/sparc-don-t-leave-sparc_pmu_type-null-on-sun4v.patch @@ -0,0 +1,28 @@ +From 4c5463882ef8fc0c2d42714c0e247046a9db0b24 Mon Sep 17 00:00:00 2001 +From: "David S. Miller" <davem@davemloft.net> +Date: Wed, 27 Jul 2011 20:25:57 -0700 +Subject: sparc: Don't leave sparc_pmu_type NULL on sun4v. + + +From: "David S. Miller" <davem@davemloft.net> + +[ Upstream commit facfddef2c76110b8e321921f7e54518c3dd1579 ] + +Otherwise we'll crash in the sparc perf init code. + +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + arch/sparc/kernel/cpu.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/sparc/kernel/cpu.c ++++ b/arch/sparc/kernel/cpu.c +@@ -479,6 +479,7 @@ static void __init sun4v_cpu_probe(void) + prom_cpu_compatible); + sparc_cpu_type = "Unknown SUN4V CPU"; + sparc_fpu_type = "Unknown SUN4V FPU"; ++ sparc_pmu_type = "Unknown SUN4V PMU"; + break; + } + } diff --git a/queue-3.0/sparc-fix-build-with-debug_pagealloc-enabled.patch b/queue-3.0/sparc-fix-build-with-debug_pagealloc-enabled.patch new file mode 100644 index 0000000000..18546d6c4f --- /dev/null +++ b/queue-3.0/sparc-fix-build-with-debug_pagealloc-enabled.patch @@ -0,0 +1,44 @@ +From 34acb7348fb03338e9b619f52015890f6db9c6df Mon Sep 17 00:00:00 2001 +From: "David S. Miller" <davem@davemloft.net> +Date: Sat, 6 Aug 2011 05:26:35 -0700 +Subject: sparc: Fix build with DEBUG_PAGEALLOC enabled. + + +From: "David S. Miller" <davem@davemloft.net> + +[ Upstream commit 0785a8e87be0202744d8681363aecbd4ffbb5f5a ] + +arch/sparc/mm/init_64.c:1622:22: error: unused variable '__swapper_4m_tsb_phys_patch_end' [-Werror=unused-variable] +arch/sparc/mm/init_64.c:1621:22: error: unused variable '__swapper_4m_tsb_phys_patch' [-Werror=unused-variable] + +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + arch/sparc/mm/init_64.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/arch/sparc/mm/init_64.c ++++ b/arch/sparc/mm/init_64.c +@@ -1618,18 +1618,20 @@ static void ktsb_phys_patch(void) + { + extern unsigned int __swapper_tsb_phys_patch; + extern unsigned int __swapper_tsb_phys_patch_end; +- extern unsigned int __swapper_4m_tsb_phys_patch; +- extern unsigned int __swapper_4m_tsb_phys_patch_end; + unsigned long ktsb_pa; + + ktsb_pa = kern_base + ((unsigned long)&swapper_tsb[0] - KERNBASE); + patch_one_ktsb_phys(&__swapper_tsb_phys_patch, + &__swapper_tsb_phys_patch_end, ktsb_pa); + #ifndef CONFIG_DEBUG_PAGEALLOC ++ { ++ extern unsigned int __swapper_4m_tsb_phys_patch; ++ extern unsigned int __swapper_4m_tsb_phys_patch_end; + ktsb_pa = (kern_base + + ((unsigned long)&swapper_4m_tsb[0] - KERNBASE)); + patch_one_ktsb_phys(&__swapper_4m_tsb_phys_patch, + &__swapper_4m_tsb_phys_patch_end, ktsb_pa); ++ } + #endif + } + diff --git a/queue-3.0/sparc-minor-tweaks-to-niagara-page-copy-clear.patch b/queue-3.0/sparc-minor-tweaks-to-niagara-page-copy-clear.patch new file mode 100644 index 0000000000..238c9084e6 --- /dev/null +++ b/queue-3.0/sparc-minor-tweaks-to-niagara-page-copy-clear.patch @@ -0,0 +1,245 @@ +From 6fa98bc470f338c16dd79d26e6104ddea48889d4 Mon Sep 17 00:00:00 2001 +From: "David S. Miller" <davem@davemloft.net> +Date: Mon, 1 Aug 2011 18:18:57 -0700 +Subject: sparc: Minor tweaks to Niagara page copy/clear. + + +From: "David S. Miller" <davem@davemloft.net> + +[ Upstream commit e95ade083939dcb4b0c51c1a2c8504ea9ef3d6ef ] + +Don't use floating point on Niagara2, use the traditional +plain Niagara code instead. + +Unroll Niagara loops to 128 bytes for copy, and 256 bytes +for clear. + +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + arch/sparc/kernel/head_64.S | 2 + arch/sparc/lib/Makefile | 2 + arch/sparc/lib/NG2page.S | 61 ----------------------- + arch/sparc/lib/NGpage.S | 114 ++++++++++++++++++++++++++++---------------- + 4 files changed, 77 insertions(+), 102 deletions(-) + delete mode 100644 arch/sparc/lib/NG2page.S + +--- a/arch/sparc/kernel/head_64.S ++++ b/arch/sparc/kernel/head_64.S +@@ -559,7 +559,7 @@ niagara2_patch: + nop + call niagara_patch_bzero + nop +- call niagara2_patch_pageops ++ call niagara_patch_pageops + nop + + ba,a,pt %xcc, 80f +--- a/arch/sparc/lib/Makefile ++++ b/arch/sparc/lib/Makefile +@@ -31,7 +31,7 @@ lib-$(CONFIG_SPARC64) += NGmemcpy.o NGco + lib-$(CONFIG_SPARC64) += NGpatch.o NGpage.o NGbzero.o + + lib-$(CONFIG_SPARC64) += NG2memcpy.o NG2copy_from_user.o NG2copy_to_user.o +-lib-$(CONFIG_SPARC64) += NG2patch.o NG2page.o ++lib-$(CONFIG_SPARC64) += NG2patch.o + + lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o + lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o +--- a/arch/sparc/lib/NG2page.S ++++ /dev/null +@@ -1,61 +0,0 @@ +-/* NG2page.S: Niagara-2 optimized clear and copy page. +- * +- * Copyright (C) 2007 (davem@davemloft.net) +- */ +- +-#include <asm/asi.h> +-#include <asm/page.h> +-#include <asm/visasm.h> +- +- .text +- .align 32 +- +- /* This is heavily simplified from the sun4u variants +- * because Niagara-2 does not have any D-cache aliasing issues. +- */ +-NG2copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ +- prefetch [%o1 + 0x00], #one_read +- prefetch [%o1 + 0x40], #one_read +- VISEntryHalf +- set PAGE_SIZE, %g7 +- sub %o0, %o1, %g3 +-1: stxa %g0, [%o1 + %g3] ASI_BLK_INIT_QUAD_LDD_P +- subcc %g7, 64, %g7 +- ldda [%o1] ASI_BLK_P, %f0 +- stda %f0, [%o1 + %g3] ASI_BLK_P +- add %o1, 64, %o1 +- bne,pt %xcc, 1b +- prefetch [%o1 + 0x40], #one_read +- membar #Sync +- VISExitHalf +- retl +- nop +- +-#define BRANCH_ALWAYS 0x10680000 +-#define NOP 0x01000000 +-#define NG_DO_PATCH(OLD, NEW) \ +- sethi %hi(NEW), %g1; \ +- or %g1, %lo(NEW), %g1; \ +- sethi %hi(OLD), %g2; \ +- or %g2, %lo(OLD), %g2; \ +- sub %g1, %g2, %g1; \ +- sethi %hi(BRANCH_ALWAYS), %g3; \ +- sll %g1, 11, %g1; \ +- srl %g1, 11 + 2, %g1; \ +- or %g3, %lo(BRANCH_ALWAYS), %g3; \ +- or %g3, %g1, %g3; \ +- stw %g3, [%g2]; \ +- sethi %hi(NOP), %g3; \ +- or %g3, %lo(NOP), %g3; \ +- stw %g3, [%g2 + 0x4]; \ +- flush %g2; +- +- .globl niagara2_patch_pageops +- .type niagara2_patch_pageops,#function +-niagara2_patch_pageops: +- NG_DO_PATCH(copy_user_page, NG2copy_user_page) +- NG_DO_PATCH(_clear_page, NGclear_page) +- NG_DO_PATCH(clear_user_page, NGclear_user_page) +- retl +- nop +- .size niagara2_patch_pageops,.-niagara2_patch_pageops +--- a/arch/sparc/lib/NGpage.S ++++ b/arch/sparc/lib/NGpage.S +@@ -16,55 +16,91 @@ + */ + + NGcopy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ +- prefetch [%o1 + 0x00], #one_read +- mov 8, %g1 +- mov 16, %g2 +- mov 24, %g3 ++ save %sp, -192, %sp ++ rd %asi, %g3 ++ wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi + set PAGE_SIZE, %g7 ++ prefetch [%i1 + 0x00], #one_read ++ prefetch [%i1 + 0x40], #one_read + +-1: ldda [%o1 + %g0] ASI_BLK_INIT_QUAD_LDD_P, %o2 +- ldda [%o1 + %g2] ASI_BLK_INIT_QUAD_LDD_P, %o4 +- prefetch [%o1 + 0x40], #one_read +- add %o1, 32, %o1 +- stxa %o2, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P +- stxa %o3, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P +- ldda [%o1 + %g0] ASI_BLK_INIT_QUAD_LDD_P, %o2 +- stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P +- stxa %o5, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P +- ldda [%o1 + %g2] ASI_BLK_INIT_QUAD_LDD_P, %o4 +- add %o1, 32, %o1 +- add %o0, 32, %o0 +- stxa %o2, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P +- stxa %o3, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P +- stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P +- stxa %o5, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P +- subcc %g7, 64, %g7 ++1: prefetch [%i1 + 0x80], #one_read ++ prefetch [%i1 + 0xc0], #one_read ++ ldda [%i1 + 0x00] %asi, %o2 ++ ldda [%i1 + 0x10] %asi, %o4 ++ ldda [%i1 + 0x20] %asi, %l2 ++ ldda [%i1 + 0x30] %asi, %l4 ++ stxa %o2, [%i0 + 0x00] %asi ++ stxa %o3, [%i0 + 0x08] %asi ++ stxa %o4, [%i0 + 0x10] %asi ++ stxa %o5, [%i0 + 0x18] %asi ++ stxa %l2, [%i0 + 0x20] %asi ++ stxa %l3, [%i0 + 0x28] %asi ++ stxa %l4, [%i0 + 0x30] %asi ++ stxa %l5, [%i0 + 0x38] %asi ++ ldda [%i1 + 0x40] %asi, %o2 ++ ldda [%i1 + 0x50] %asi, %o4 ++ ldda [%i1 + 0x60] %asi, %l2 ++ ldda [%i1 + 0x70] %asi, %l4 ++ stxa %o2, [%i0 + 0x40] %asi ++ stxa %o3, [%i0 + 0x48] %asi ++ stxa %o4, [%i0 + 0x50] %asi ++ stxa %o5, [%i0 + 0x58] %asi ++ stxa %l2, [%i0 + 0x60] %asi ++ stxa %l3, [%i0 + 0x68] %asi ++ stxa %l4, [%i0 + 0x70] %asi ++ stxa %l5, [%i0 + 0x78] %asi ++ add %i1, 128, %i1 ++ subcc %g7, 128, %g7 + bne,pt %xcc, 1b +- add %o0, 32, %o0 ++ add %i0, 128, %i0 ++ wr %g3, 0x0, %asi + membar #Sync +- retl +- nop ++ ret ++ restore + +- .globl NGclear_page, NGclear_user_page ++ .align 32 + NGclear_page: /* %o0=dest */ + NGclear_user_page: /* %o0=dest, %o1=vaddr */ +- mov 8, %g1 +- mov 16, %g2 +- mov 24, %g3 ++ rd %asi, %g3 ++ wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi + set PAGE_SIZE, %g7 + +-1: stxa %g0, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P +- stxa %g0, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P +- stxa %g0, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P +- stxa %g0, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P +- add %o0, 32, %o0 +- stxa %g0, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P +- stxa %g0, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P +- stxa %g0, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P +- stxa %g0, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P +- subcc %g7, 64, %g7 ++1: stxa %g0, [%o0 + 0x00] %asi ++ stxa %g0, [%o0 + 0x08] %asi ++ stxa %g0, [%o0 + 0x10] %asi ++ stxa %g0, [%o0 + 0x18] %asi ++ stxa %g0, [%o0 + 0x20] %asi ++ stxa %g0, [%o0 + 0x28] %asi ++ stxa %g0, [%o0 + 0x30] %asi ++ stxa %g0, [%o0 + 0x38] %asi ++ stxa %g0, [%o0 + 0x40] %asi ++ stxa %g0, [%o0 + 0x48] %asi ++ stxa %g0, [%o0 + 0x50] %asi ++ stxa %g0, [%o0 + 0x58] %asi ++ stxa %g0, [%o0 + 0x60] %asi ++ stxa %g0, [%o0 + 0x68] %asi ++ stxa %g0, [%o0 + 0x70] %asi ++ stxa %g0, [%o0 + 0x78] %asi ++ stxa %g0, [%o0 + 0x80] %asi ++ stxa %g0, [%o0 + 0x88] %asi ++ stxa %g0, [%o0 + 0x90] %asi ++ stxa %g0, [%o0 + 0x98] %asi ++ stxa %g0, [%o0 + 0xa0] %asi ++ stxa %g0, [%o0 + 0xa8] %asi ++ stxa %g0, [%o0 + 0xb0] %asi ++ stxa %g0, [%o0 + 0xb8] %asi ++ stxa %g0, [%o0 + 0xc0] %asi ++ stxa %g0, [%o0 + 0xc8] %asi ++ stxa %g0, [%o0 + 0xd0] %asi ++ stxa %g0, [%o0 + 0xd8] %asi ++ stxa %g0, [%o0 + 0xe0] %asi ++ stxa %g0, [%o0 + 0xe8] %asi ++ stxa %g0, [%o0 + 0xf0] %asi ++ stxa %g0, [%o0 + 0xf8] %asi ++ subcc %g7, 256, %g7 + bne,pt %xcc, 1b +- add %o0, 32, %o0 ++ add %o0, 256, %o0 ++ wr %g3, 0x0, %asi + membar #Sync + retl + nop diff --git a/queue-3.0/sparc-sanitize-cpu-feature-detection-and-reporting.patch b/queue-3.0/sparc-sanitize-cpu-feature-detection-and-reporting.patch new file mode 100644 index 0000000000..2c7a598555 --- /dev/null +++ b/queue-3.0/sparc-sanitize-cpu-feature-detection-and-reporting.patch @@ -0,0 +1,316 @@ +From 9f9be9c235ebde040791c392dde618caa7e414ab Mon Sep 17 00:00:00 2001 +From: "David S. Miller" <davem@davemloft.net> +Date: Thu, 28 Jul 2011 23:31:26 -0700 +Subject: sparc: Sanitize cpu feature detection and reporting. + + +From: "David S. Miller" <davem@davemloft.net> + +[ Upstream commit ac85fe8b21248054851e05bfaa352562e5b06dd3 ] + +Instead of evaluating the cpu features for ELF_HWCAP every exec, +calculate it once at boot time. + +Add AV_SPARC_* capability flag bits, compatible with what Solaris +reports to applications. + +Report these capabilities once in the kernel log, and also via +/proc/cpuinfo in a new "cpucaps" entry. + +If available, fetch the cpu features from the machine description +'hwcap-list' property of the 'cpu' node. + +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + arch/sparc/include/asm/elf_64.h | 65 +++++++---------- + arch/sparc/kernel/cpu.c | 1 + arch/sparc/kernel/kernel.h | 6 + + arch/sparc/kernel/setup_64.c | 149 ++++++++++++++++++++++++++++++++++++++++ + 4 files changed, 185 insertions(+), 36 deletions(-) + +--- a/arch/sparc/include/asm/elf_64.h ++++ b/arch/sparc/include/asm/elf_64.h +@@ -59,15 +59,33 @@ + #define R_SPARC_6 45 + + /* Bits present in AT_HWCAP, primarily for Sparc32. */ +- +-#define HWCAP_SPARC_FLUSH 1 /* CPU supports flush instruction. */ +-#define HWCAP_SPARC_STBAR 2 +-#define HWCAP_SPARC_SWAP 4 +-#define HWCAP_SPARC_MULDIV 8 +-#define HWCAP_SPARC_V9 16 +-#define HWCAP_SPARC_ULTRA3 32 +-#define HWCAP_SPARC_BLKINIT 64 +-#define HWCAP_SPARC_N2 128 ++#define HWCAP_SPARC_FLUSH 0x00000001 ++#define HWCAP_SPARC_STBAR 0x00000002 ++#define HWCAP_SPARC_SWAP 0x00000004 ++#define HWCAP_SPARC_MULDIV 0x00000008 ++#define HWCAP_SPARC_V9 0x00000010 ++#define HWCAP_SPARC_ULTRA3 0x00000020 ++#define HWCAP_SPARC_BLKINIT 0x00000040 ++#define HWCAP_SPARC_N2 0x00000080 ++ ++/* Solaris compatible AT_HWCAP bits. */ ++#define AV_SPARC_MUL32 0x00000100 /* 32x32 multiply is efficient */ ++#define AV_SPARC_DIV32 0x00000200 /* 32x32 divide is efficient */ ++#define AV_SPARC_FSMULD 0x00000400 /* 'fsmuld' is efficient */ ++#define AV_SPARC_V8PLUS 0x00000800 /* v9 insn available to 32bit */ ++#define AV_SPARC_POPC 0x00001000 /* 'popc' is efficient */ ++#define AV_SPARC_VIS 0x00002000 /* VIS insns available */ ++#define AV_SPARC_VIS2 0x00004000 /* VIS2 insns available */ ++#define AV_SPARC_ASI_BLK_INIT 0x00008000 /* block init ASIs available */ ++#define AV_SPARC_FMAF 0x00010000 /* fused multiply-add */ ++#define AV_SPARC_VIS3 0x00020000 /* VIS3 insns available */ ++#define AV_SPARC_HPC 0x00040000 /* HPC insns available */ ++#define AV_SPARC_RANDOM 0x00080000 /* 'random' insn available */ ++#define AV_SPARC_TRANS 0x00100000 /* transaction insns available */ ++#define AV_SPARC_FJFMAU 0x00200000 /* unfused multiply-add */ ++#define AV_SPARC_IMA 0x00400000 /* integer multiply-add */ ++#define AV_SPARC_ASI_CACHE_SPARING \ ++ 0x00800000 /* cache sparing ASIs available */ + + #define CORE_DUMP_USE_REGSET + +@@ -162,33 +180,8 @@ typedef struct { + #define ELF_ET_DYN_BASE 0x0000010000000000UL + #define COMPAT_ELF_ET_DYN_BASE 0x0000000070000000UL + +- +-/* This yields a mask that user programs can use to figure out what +- instruction set this cpu supports. */ +- +-/* On Ultra, we support all of the v8 capabilities. */ +-static inline unsigned int sparc64_elf_hwcap(void) +-{ +- unsigned int cap = (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR | +- HWCAP_SPARC_SWAP | HWCAP_SPARC_MULDIV | +- HWCAP_SPARC_V9); +- +- if (tlb_type == cheetah || tlb_type == cheetah_plus) +- cap |= HWCAP_SPARC_ULTRA3; +- else if (tlb_type == hypervisor) { +- if (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 || +- sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || +- sun4v_chip_type == SUN4V_CHIP_NIAGARA3) +- cap |= HWCAP_SPARC_BLKINIT; +- if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || +- sun4v_chip_type == SUN4V_CHIP_NIAGARA3) +- cap |= HWCAP_SPARC_N2; +- } +- +- return cap; +-} +- +-#define ELF_HWCAP sparc64_elf_hwcap(); ++extern unsigned long sparc64_elf_hwcap; ++#define ELF_HWCAP sparc64_elf_hwcap + + /* This yields a string that ld.so will use to load implementation + specific libraries for optimization. This is more specific in +--- a/arch/sparc/kernel/cpu.c ++++ b/arch/sparc/kernel/cpu.c +@@ -396,6 +396,7 @@ static int show_cpuinfo(struct seq_file + , cpu_data(0).clock_tick + #endif + ); ++ cpucap_info(m); + #ifdef CONFIG_SMP + smp_bogo(m); + #endif +--- a/arch/sparc/kernel/kernel.h ++++ b/arch/sparc/kernel/kernel.h +@@ -10,6 +10,12 @@ extern const char *sparc_pmu_type; + extern unsigned int fsr_storage; + extern int ncpus_probed; + ++#ifdef CONFIG_SPARC64 ++/* setup_64.c */ ++struct seq_file; ++extern void cpucap_info(struct seq_file *); ++#endif ++ + #ifdef CONFIG_SPARC32 + /* cpu.c */ + extern void cpu_probe(void); +--- a/arch/sparc/kernel/setup_64.c ++++ b/arch/sparc/kernel/setup_64.c +@@ -29,6 +29,7 @@ + #include <linux/interrupt.h> + #include <linux/cpu.h> + #include <linux/initrd.h> ++#include <linux/module.h> + + #include <asm/system.h> + #include <asm/io.h> +@@ -46,6 +47,8 @@ + #include <asm/mmu.h> + #include <asm/ns87303.h> + #include <asm/btext.h> ++#include <asm/elf.h> ++#include <asm/mdesc.h> + + #ifdef CONFIG_IP_PNP + #include <net/ipconfig.h> +@@ -278,6 +281,151 @@ void __init boot_cpu_id_too_large(int cp + } + #endif + ++/* On Ultra, we support all of the v8 capabilities. */ ++unsigned long sparc64_elf_hwcap = (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR | ++ HWCAP_SPARC_SWAP | HWCAP_SPARC_MULDIV | ++ HWCAP_SPARC_V9); ++EXPORT_SYMBOL(sparc64_elf_hwcap); ++ ++static const char *hwcaps[] = { ++ "flush", "stbar", "swap", "muldiv", "v9", ++ "ultra3", "blkinit", "n2", ++ ++ /* These strings are as they appear in the machine description ++ * 'hwcap-list' property for cpu nodes. ++ */ ++ "mul32", "div32", "fsmuld", "v8plus", "popc", "vis", "vis2", ++ "ASIBlkInit", "fmaf", "vis3", "hpc", "random", "trans", "fjfmau", ++ "ima", "cspare", ++}; ++ ++void cpucap_info(struct seq_file *m) ++{ ++ unsigned long caps = sparc64_elf_hwcap; ++ int i, printed = 0; ++ ++ seq_puts(m, "cpucaps\t\t: "); ++ for (i = 0; i < ARRAY_SIZE(hwcaps); i++) { ++ unsigned long bit = 1UL << i; ++ if (caps & bit) { ++ seq_printf(m, "%s%s", ++ printed ? "," : "", hwcaps[i]); ++ printed++; ++ } ++ } ++ seq_putc(m, '\n'); ++} ++ ++static void __init report_hwcaps(unsigned long caps) ++{ ++ int i, printed = 0; ++ ++ printk(KERN_INFO "CPU CAPS: ["); ++ for (i = 0; i < ARRAY_SIZE(hwcaps); i++) { ++ unsigned long bit = 1UL << i; ++ if (caps & bit) { ++ printk(KERN_CONT "%s%s", ++ printed ? "," : "", hwcaps[i]); ++ if (++printed == 8) { ++ printk(KERN_CONT "]\n"); ++ printk(KERN_INFO "CPU CAPS: ["); ++ printed = 0; ++ } ++ } ++ } ++ printk(KERN_CONT "]\n"); ++} ++ ++static unsigned long __init mdesc_cpu_hwcap_list(void) ++{ ++ struct mdesc_handle *hp; ++ unsigned long caps = 0; ++ const char *prop; ++ int len; ++ u64 pn; ++ ++ hp = mdesc_grab(); ++ if (!hp) ++ return 0; ++ ++ pn = mdesc_node_by_name(hp, MDESC_NODE_NULL, "cpu"); ++ if (pn == MDESC_NODE_NULL) ++ goto out; ++ ++ prop = mdesc_get_property(hp, pn, "hwcap-list", &len); ++ if (!prop) ++ goto out; ++ ++ while (len) { ++ int i, plen; ++ ++ for (i = 0; i < ARRAY_SIZE(hwcaps); i++) { ++ unsigned long bit = 1UL << i; ++ ++ if (!strcmp(prop, hwcaps[i])) { ++ caps |= bit; ++ break; ++ } ++ } ++ ++ plen = strlen(prop) + 1; ++ prop += plen; ++ len -= plen; ++ } ++ ++out: ++ mdesc_release(hp); ++ return caps; ++} ++ ++/* This yields a mask that user programs can use to figure out what ++ * instruction set this cpu supports. ++ */ ++static void __init init_sparc64_elf_hwcap(void) ++{ ++ unsigned long cap = sparc64_elf_hwcap; ++ unsigned long mdesc_caps; ++ ++ if (tlb_type == cheetah || tlb_type == cheetah_plus) ++ cap |= HWCAP_SPARC_ULTRA3; ++ else if (tlb_type == hypervisor) { ++ if (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 || ++ sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || ++ sun4v_chip_type == SUN4V_CHIP_NIAGARA3) ++ cap |= HWCAP_SPARC_BLKINIT; ++ if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || ++ sun4v_chip_type == SUN4V_CHIP_NIAGARA3) ++ cap |= HWCAP_SPARC_N2; ++ } ++ ++ cap |= (AV_SPARC_MUL32 | AV_SPARC_DIV32 | AV_SPARC_V8PLUS); ++ ++ mdesc_caps = mdesc_cpu_hwcap_list(); ++ if (!mdesc_caps) { ++ if (tlb_type == spitfire) ++ cap |= AV_SPARC_VIS; ++ if (tlb_type == cheetah || tlb_type == cheetah_plus) ++ cap |= AV_SPARC_VIS | AV_SPARC_VIS2; ++ if (tlb_type == cheetah_plus) ++ cap |= AV_SPARC_POPC; ++ if (tlb_type == hypervisor) { ++ if (sun4v_chip_type == SUN4V_CHIP_NIAGARA1) ++ cap |= AV_SPARC_ASI_BLK_INIT; ++ if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || ++ sun4v_chip_type == SUN4V_CHIP_NIAGARA3) ++ cap |= (AV_SPARC_VIS | AV_SPARC_VIS2 | ++ AV_SPARC_ASI_BLK_INIT | ++ AV_SPARC_POPC); ++ if (sun4v_chip_type == SUN4V_CHIP_NIAGARA3) ++ cap |= (AV_SPARC_VIS3 | AV_SPARC_HPC | ++ AV_SPARC_FMAF); ++ } ++ } ++ sparc64_elf_hwcap = cap | mdesc_caps; ++ ++ report_hwcaps(sparc64_elf_hwcap); ++} ++ + void __init setup_arch(char **cmdline_p) + { + /* Initialize PROM console and command line. */ +@@ -337,6 +485,7 @@ void __init setup_arch(char **cmdline_p) + init_cur_cpu_trap(current_thread_info()); + + paging_init(); ++ init_sparc64_elf_hwcap(); + } + + extern int stop_a_enabled; diff --git a/queue-3.0/sparc-set-reboot-cmd-using-reboot-data-hypervisor-call-if-available.patch b/queue-3.0/sparc-set-reboot-cmd-using-reboot-data-hypervisor-call-if-available.patch new file mode 100644 index 0000000000..bbb079375a --- /dev/null +++ b/queue-3.0/sparc-set-reboot-cmd-using-reboot-data-hypervisor-call-if-available.patch @@ -0,0 +1,162 @@ +From 3551005cbfebbdaaccf92eda706bab61bc210caa Mon Sep 17 00:00:00 2001 +From: "David S. Miller" <davem@davemloft.net> +Date: Mon, 1 Aug 2011 23:27:17 -0700 +Subject: sparc: Set reboot-cmd using reboot data hypervisor call if available. + + +From: "David S. Miller" <davem@davemloft.net> + +[ Upstream commit ea5e7447ea9d555558e0f13798f5143dd51a915a ] + +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + arch/sparc/include/asm/hypervisor.h | 7 +++++++ + arch/sparc/kernel/ds.c | 30 ++++++++++++++++++++++++++++-- + arch/sparc/kernel/hvcalls.S | 7 +++++++ + arch/sparc/kernel/kernel.h | 9 +++++++++ + arch/sparc/kernel/sstate.c | 9 ++------- + 5 files changed, 53 insertions(+), 9 deletions(-) + +--- a/arch/sparc/include/asm/hypervisor.h ++++ b/arch/sparc/include/asm/hypervisor.h +@@ -2927,6 +2927,13 @@ extern unsigned long sun4v_ncs_request(u + #define HV_FAST_FIRE_GET_PERFREG 0x120 + #define HV_FAST_FIRE_SET_PERFREG 0x121 + ++#define HV_FAST_REBOOT_DATA_SET 0x172 ++ ++#ifndef __ASSEMBLY__ ++extern unsigned long sun4v_reboot_data_set(unsigned long ra, ++ unsigned long len); ++#endif ++ + /* Function numbers for HV_CORE_TRAP. */ + #define HV_CORE_SET_VER 0x00 + #define HV_CORE_PUTCHAR 0x01 +--- a/arch/sparc/kernel/ds.c ++++ b/arch/sparc/kernel/ds.c +@@ -15,12 +15,15 @@ + #include <linux/reboot.h> + #include <linux/cpu.h> + ++#include <asm/hypervisor.h> + #include <asm/ldc.h> + #include <asm/vio.h> + #include <asm/mdesc.h> + #include <asm/head.h> + #include <asm/irq.h> + ++#include "kernel.h" ++ + #define DRV_MODULE_NAME "ds" + #define PFX DRV_MODULE_NAME ": " + #define DRV_MODULE_VERSION "1.0" +@@ -828,18 +831,32 @@ void ldom_set_var(const char *var, const + } + } + ++static char full_boot_str[256] __attribute__((aligned(32))); ++static int reboot_data_supported; ++ + void ldom_reboot(const char *boot_command) + { + /* Don't bother with any of this if the boot_command + * is empty. + */ + if (boot_command && strlen(boot_command)) { +- char full_boot_str[256]; ++ unsigned long len; + + strcpy(full_boot_str, "boot "); + strcpy(full_boot_str + strlen("boot "), boot_command); ++ len = strlen(full_boot_str); + +- ldom_set_var("reboot-command", full_boot_str); ++ if (reboot_data_supported) { ++ unsigned long ra = kimage_addr_to_ra(full_boot_str); ++ unsigned long hv_ret; ++ ++ hv_ret = sun4v_reboot_data_set(ra, len); ++ if (hv_ret != HV_EOK) ++ pr_err("SUN4V: Unable to set reboot data " ++ "hv_ret=%lu\n", hv_ret); ++ } else { ++ ldom_set_var("reboot-command", full_boot_str); ++ } + } + sun4v_mach_sir(); + } +@@ -1237,6 +1254,15 @@ static struct vio_driver ds_driver = { + + static int __init ds_init(void) + { ++ unsigned long hv_ret, major, minor; ++ ++ hv_ret = sun4v_get_version(HV_GRP_REBOOT_DATA, &major, &minor); ++ if (hv_ret == HV_EOK) { ++ pr_info("SUN4V: Reboot data supported (maj=%lu,min=%lu).\n", ++ major, minor); ++ reboot_data_supported = 1; ++ } ++ + kthread_run(ds_thread, NULL, "kldomd"); + + return vio_register_driver(&ds_driver); +--- a/arch/sparc/kernel/hvcalls.S ++++ b/arch/sparc/kernel/hvcalls.S +@@ -798,3 +798,10 @@ ENTRY(sun4v_niagara2_setperf) + retl + nop + ENDPROC(sun4v_niagara2_setperf) ++ ++ENTRY(sun4v_reboot_data_set) ++ mov HV_FAST_REBOOT_DATA_SET, %o5 ++ ta HV_FAST_TRAP ++ retl ++ nop ++ENDPROC(sun4v_reboot_data_set) +--- a/arch/sparc/kernel/kernel.h ++++ b/arch/sparc/kernel/kernel.h +@@ -4,6 +4,8 @@ + #include <linux/interrupt.h> + + #include <asm/traps.h> ++#include <asm/head.h> ++#include <asm/io.h> + + /* cpu.c */ + extern const char *sparc_pmu_type; +@@ -14,6 +16,13 @@ extern int ncpus_probed; + /* setup_64.c */ + struct seq_file; + extern void cpucap_info(struct seq_file *); ++ ++static inline unsigned long kimage_addr_to_ra(const char *p) ++{ ++ unsigned long val = (unsigned long) p; ++ ++ return kern_base + (val - KERNBASE); ++} + #endif + + #ifdef CONFIG_SPARC32 +--- a/arch/sparc/kernel/sstate.c ++++ b/arch/sparc/kernel/sstate.c +@@ -14,14 +14,9 @@ + #include <asm/head.h> + #include <asm/io.h> + +-static int hv_supports_soft_state; +- +-static unsigned long kimage_addr_to_ra(const char *p) +-{ +- unsigned long val = (unsigned long) p; ++#include "kernel.h" + +- return kern_base + (val - KERNBASE); +-} ++static int hv_supports_soft_state; + + static void do_set_sstate(unsigned long state, const char *msg) + { diff --git a/queue-3.0/sparc-size-mondo-queues-more-sanely.patch b/queue-3.0/sparc-size-mondo-queues-more-sanely.patch new file mode 100644 index 0000000000..73f2be509d --- /dev/null +++ b/queue-3.0/sparc-size-mondo-queues-more-sanely.patch @@ -0,0 +1,103 @@ +From d28c07bef1fdcd4c3ecd825ff3a1c1d8b40c0245 Mon Sep 17 00:00:00 2001 +From: "David S. Miller" <davem@davemloft.net> +Date: Fri, 5 Aug 2011 02:38:27 -0700 +Subject: sparc: Size mondo queues more sanely. + + +From: "David S. Miller" <davem@davemloft.net> + +[ Upstream commit 961f65fc41cdc1f9099a6075258816c0db98e390 ] + +There is currently no upper limit on the mondo queue sizes we'll use, +which guarentees that we'll eventually his page allocation limits, and +thus allocation failures, due to MAX_ORDER. + +Cap the sizes sanely, current limits are: + +CPU MONDO 2 * max_possible_cpus +DEV MONDO 256 (basically NR_IRQS) +RES MONDO 128 +NRES MONDO 4 + +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + arch/sparc/kernel/mdesc.c | 30 +++++++++++++++++++++++------- + 1 file changed, 23 insertions(+), 7 deletions(-) + +--- a/arch/sparc/kernel/mdesc.c ++++ b/arch/sparc/kernel/mdesc.c +@@ -508,6 +508,8 @@ const char *mdesc_node_name(struct mdesc + } + EXPORT_SYMBOL(mdesc_node_name); + ++static u64 max_cpus = 64; ++ + static void __init report_platform_properties(void) + { + struct mdesc_handle *hp = mdesc_grab(); +@@ -543,8 +545,10 @@ static void __init report_platform_prope + if (v) + printk("PLATFORM: watchdog-max-timeout [%llu ms]\n", *v); + v = mdesc_get_property(hp, pn, "max-cpus", NULL); +- if (v) +- printk("PLATFORM: max-cpus [%llu]\n", *v); ++ if (v) { ++ max_cpus = *v; ++ printk("PLATFORM: max-cpus [%llu]\n", max_cpus); ++ } + + #ifdef CONFIG_SMP + { +@@ -715,7 +719,7 @@ static void __cpuinit set_proc_ids(struc + } + + static void __cpuinit get_one_mondo_bits(const u64 *p, unsigned int *mask, +- unsigned char def) ++ unsigned long def, unsigned long max) + { + u64 val; + +@@ -726,6 +730,9 @@ static void __cpuinit get_one_mondo_bits + if (!val || val >= 64) + goto use_default; + ++ if (val > max) ++ val = max; ++ + *mask = ((1U << val) * 64U) - 1U; + return; + +@@ -736,19 +743,28 @@ use_default: + static void __cpuinit get_mondo_data(struct mdesc_handle *hp, u64 mp, + struct trap_per_cpu *tb) + { ++ static int printed; + const u64 *val; + + val = mdesc_get_property(hp, mp, "q-cpu-mondo-#bits", NULL); +- get_one_mondo_bits(val, &tb->cpu_mondo_qmask, 7); ++ get_one_mondo_bits(val, &tb->cpu_mondo_qmask, 7, ilog2(max_cpus * 2)); + + val = mdesc_get_property(hp, mp, "q-dev-mondo-#bits", NULL); +- get_one_mondo_bits(val, &tb->dev_mondo_qmask, 7); ++ get_one_mondo_bits(val, &tb->dev_mondo_qmask, 7, 8); + + val = mdesc_get_property(hp, mp, "q-resumable-#bits", NULL); +- get_one_mondo_bits(val, &tb->resum_qmask, 6); ++ get_one_mondo_bits(val, &tb->resum_qmask, 6, 7); + + val = mdesc_get_property(hp, mp, "q-nonresumable-#bits", NULL); +- get_one_mondo_bits(val, &tb->nonresum_qmask, 2); ++ get_one_mondo_bits(val, &tb->nonresum_qmask, 2, 2); ++ if (!printed++) { ++ pr_info("SUN4V: Mondo queue sizes " ++ "[cpu(%u) dev(%u) r(%u) nr(%u)]\n", ++ tb->cpu_mondo_qmask + 1, ++ tb->dev_mondo_qmask + 1, ++ tb->resum_qmask + 1, ++ tb->nonresum_qmask + 1); ++ } + } + + static void * __cpuinit mdesc_iterate_over_cpus(void *(*func)(struct mdesc_handle *, u64, int, void *), void *arg, cpumask_t *mask) diff --git a/queue-3.0/sparc-use-hweight64-in-popc-emulation.patch b/queue-3.0/sparc-use-hweight64-in-popc-emulation.patch new file mode 100644 index 0000000000..6406664887 --- /dev/null +++ b/queue-3.0/sparc-use-hweight64-in-popc-emulation.patch @@ -0,0 +1,57 @@ +From 54bc60d0d137f801e593a81ffdd3aa2ed43cf271 Mon Sep 17 00:00:00 2001 +From: "David S. Miller" <davem@davemloft.net> +Date: Mon, 1 Aug 2011 19:41:12 -0700 +Subject: sparc: Use hweight64() in popc emulation. + + +From: "David S. Miller" <davem@davemloft.net> + +[ Upstream commit d600cbed0fe8fceec04500824f638dfe4996c653 ] + +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + arch/sparc/kernel/unaligned_64.c | 15 ++++----------- + 1 file changed, 4 insertions(+), 11 deletions(-) + +--- a/arch/sparc/kernel/unaligned_64.c ++++ b/arch/sparc/kernel/unaligned_64.c +@@ -22,6 +22,7 @@ + #include <linux/bitops.h> + #include <linux/perf_event.h> + #include <linux/ratelimit.h> ++#include <linux/bitops.h> + #include <asm/fpumacro.h> + + enum direction { +@@ -373,16 +374,11 @@ asmlinkage void kernel_unaligned_trap(st + } + } + +-static char popc_helper[] = { +-0, 1, 1, 2, 1, 2, 2, 3, +-1, 2, 2, 3, 2, 3, 3, 4, +-}; +- + int handle_popc(u32 insn, struct pt_regs *regs) + { +- u64 value; +- int ret, i, rd = ((insn >> 25) & 0x1f); + int from_kernel = (regs->tstate & TSTATE_PRIV) != 0; ++ int ret, rd = ((insn >> 25) & 0x1f); ++ u64 value; + + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); + if (insn & 0x2000) { +@@ -392,10 +388,7 @@ int handle_popc(u32 insn, struct pt_regs + maybe_flush_windows(0, insn & 0x1f, rd, from_kernel); + value = fetch_reg(insn & 0x1f, regs); + } +- for (ret = 0, i = 0; i < 16; i++) { +- ret += popc_helper[value & 0xf]; +- value >>= 4; +- } ++ ret = hweight64(value); + if (rd < 16) { + if (rd) + regs->u_regs[rd] = ret; diff --git a/queue-3.0/sparc-use-popc-if-possible-for-hweight-routines.patch b/queue-3.0/sparc-use-popc-if-possible-for-hweight-routines.patch new file mode 100644 index 0000000000..a069230274 --- /dev/null +++ b/queue-3.0/sparc-use-popc-if-possible-for-hweight-routines.patch @@ -0,0 +1,238 @@ +From 4599e05015ea32b05c3052c8b109e8f1144c8711 Mon Sep 17 00:00:00 2001 +From: "David S. Miller" <davem@davemloft.net> +Date: Fri, 29 Jul 2011 09:42:07 -0700 +Subject: sparc: Use popc if possible for hweight routines. + + +From: "David S. Miller" <davem@davemloft.net> + +[ Upstream commit ef7c4d4675d2a9206f913f26ca1a5cd41bff9d41 ] + +Just like powerpc, we code patch at boot time. + +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + arch/sparc/include/asm/bitops_64.h | 42 ++---------------------------- + arch/sparc/kernel/entry.h | 7 +++++ + arch/sparc/kernel/setup_64.c | 27 +++++++++++++++++++ + arch/sparc/kernel/sparc_ksyms_64.c | 7 +++++ + arch/sparc/kernel/vmlinux.lds.S | 6 +++- + arch/sparc/lib/Makefile | 2 - + arch/sparc/lib/hweight.S | 51 +++++++++++++++++++++++++++++++++++++ + 7 files changed, 102 insertions(+), 40 deletions(-) + create mode 100644 arch/sparc/lib/hweight.S + +--- a/arch/sparc/include/asm/bitops_64.h ++++ b/arch/sparc/include/asm/bitops_64.h +@@ -42,45 +42,11 @@ extern void change_bit(unsigned long nr, + * of bits set) of a N-bit word + */ + +-#ifdef ULTRA_HAS_POPULATION_COUNT ++extern unsigned long __arch_hweight64(__u64 w); ++extern unsigned int __arch_hweight32(unsigned int w); ++extern unsigned int __arch_hweight16(unsigned int w); ++extern unsigned int __arch_hweight8(unsigned int w); + +-static inline unsigned int __arch_hweight64(unsigned long w) +-{ +- unsigned int res; +- +- __asm__ ("popc %1,%0" : "=r" (res) : "r" (w)); +- return res; +-} +- +-static inline unsigned int __arch_hweight32(unsigned int w) +-{ +- unsigned int res; +- +- __asm__ ("popc %1,%0" : "=r" (res) : "r" (w & 0xffffffff)); +- return res; +-} +- +-static inline unsigned int __arch_hweight16(unsigned int w) +-{ +- unsigned int res; +- +- __asm__ ("popc %1,%0" : "=r" (res) : "r" (w & 0xffff)); +- return res; +-} +- +-static inline unsigned int __arch_hweight8(unsigned int w) +-{ +- unsigned int res; +- +- __asm__ ("popc %1,%0" : "=r" (res) : "r" (w & 0xff)); +- return res; +-} +- +-#else +- +-#include <asm-generic/bitops/arch_hweight.h> +- +-#endif + #include <asm-generic/bitops/const_hweight.h> + #include <asm-generic/bitops/lock.h> + #endif /* __KERNEL__ */ +--- a/arch/sparc/kernel/entry.h ++++ b/arch/sparc/kernel/entry.h +@@ -42,6 +42,13 @@ extern void fpsave(unsigned long *fpregs + extern void fpload(unsigned long *fpregs, unsigned long *fsr); + + #else /* CONFIG_SPARC32 */ ++struct popc_3insn_patch_entry { ++ unsigned int addr; ++ unsigned int insns[3]; ++}; ++extern struct popc_3insn_patch_entry __popc_3insn_patch, ++ __popc_3insn_patch_end; ++ + extern void __init per_cpu_patch(void); + extern void __init sun4v_patch(void); + extern void __init boot_cpu_id_too_large(int cpu); +--- a/arch/sparc/kernel/setup_64.c ++++ b/arch/sparc/kernel/setup_64.c +@@ -272,6 +272,30 @@ void __init sun4v_patch(void) + sun4v_hvapi_init(); + } + ++static void __init popc_patch(void) ++{ ++ struct popc_3insn_patch_entry *p3; ++ ++ p3 = &__popc_3insn_patch; ++ while (p3 < &__popc_3insn_patch_end) { ++ unsigned long addr = p3->addr; ++ ++ *(unsigned int *) (addr + 0) = p3->insns[0]; ++ wmb(); ++ __asm__ __volatile__("flush %0" : : "r" (addr + 0)); ++ ++ *(unsigned int *) (addr + 4) = p3->insns[1]; ++ wmb(); ++ __asm__ __volatile__("flush %0" : : "r" (addr + 4)); ++ ++ *(unsigned int *) (addr + 8) = p3->insns[2]; ++ wmb(); ++ __asm__ __volatile__("flush %0" : : "r" (addr + 4)); ++ ++ p3++; ++ } ++} ++ + #ifdef CONFIG_SMP + void __init boot_cpu_id_too_large(int cpu) + { +@@ -424,6 +448,9 @@ static void __init init_sparc64_elf_hwca + sparc64_elf_hwcap = cap | mdesc_caps; + + report_hwcaps(sparc64_elf_hwcap); ++ ++ if (sparc64_elf_hwcap & AV_SPARC_POPC) ++ popc_patch(); + } + + void __init setup_arch(char **cmdline_p) +--- a/arch/sparc/kernel/sparc_ksyms_64.c ++++ b/arch/sparc/kernel/sparc_ksyms_64.c +@@ -8,6 +8,7 @@ + #include <linux/module.h> + #include <linux/pci.h> + #include <linux/init.h> ++#include <linux/bitops.h> + + #include <asm/system.h> + #include <asm/cpudata.h> +@@ -38,5 +39,11 @@ EXPORT_SYMBOL(sun4v_niagara_setperf); + EXPORT_SYMBOL(sun4v_niagara2_getperf); + EXPORT_SYMBOL(sun4v_niagara2_setperf); + ++/* from hweight.S */ ++EXPORT_SYMBOL(__arch_hweight8); ++EXPORT_SYMBOL(__arch_hweight16); ++EXPORT_SYMBOL(__arch_hweight32); ++EXPORT_SYMBOL(__arch_hweight64); ++ + /* Exporting a symbol from /init/main.c */ + EXPORT_SYMBOL(saved_command_line); +--- a/arch/sparc/kernel/vmlinux.lds.S ++++ b/arch/sparc/kernel/vmlinux.lds.S +@@ -107,7 +107,11 @@ SECTIONS + *(.sun4v_2insn_patch) + __sun4v_2insn_patch_end = .; + } +- ++ .popc_3insn_patch : { ++ __popc_3insn_patch = .; ++ *(.popc_3insn_patch) ++ __popc_3insn_patch_end = .; ++ } + PERCPU_SECTION(SMP_CACHE_BYTES) + + . = ALIGN(PAGE_SIZE); +--- a/arch/sparc/lib/Makefile ++++ b/arch/sparc/lib/Makefile +@@ -37,7 +37,7 @@ lib-$(CONFIG_SPARC64) += GENmemcpy.o GEN + lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o + + lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o +-lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o ++lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o + + obj-y += iomap.o + obj-$(CONFIG_SPARC32) += atomic32.o +--- /dev/null ++++ b/arch/sparc/lib/hweight.S +@@ -0,0 +1,51 @@ ++#include <linux/linkage.h> ++ ++ .text ++ .align 32 ++ENTRY(__arch_hweight8) ++ ba,pt %xcc, __sw_hweight8 ++ nop ++ nop ++ENDPROC(__arch_hweight8) ++ .section .popc_3insn_patch, "ax" ++ .word __arch_hweight8 ++ sllx %o0, 64-8, %g1 ++ retl ++ popc %g1, %o0 ++ .previous ++ ++ENTRY(__arch_hweight16) ++ ba,pt %xcc, __sw_hweight16 ++ nop ++ nop ++ENDPROC(__arch_hweight16) ++ .section .popc_3insn_patch, "ax" ++ .word __arch_hweight16 ++ sllx %o0, 64-16, %g1 ++ retl ++ popc %g1, %o0 ++ .previous ++ ++ENTRY(__arch_hweight32) ++ ba,pt %xcc, __sw_hweight32 ++ nop ++ nop ++ENDPROC(__arch_hweight32) ++ .section .popc_3insn_patch, "ax" ++ .word __arch_hweight32 ++ sllx %o0, 64-32, %g1 ++ retl ++ popc %g1, %o0 ++ .previous ++ ++ENTRY(__arch_hweight64) ++ ba,pt %xcc, __sw_hweight64 ++ nop ++ nop ++ENDPROC(__arch_hweight64) ++ .section .popc_3insn_patch, "ax" ++ .word __arch_hweight64 ++ retl ++ popc %o0, %o0 ++ nop ++ .previous diff --git a/queue-3.0/sparc-use-popc-when-possible-for-ffs-__ffs-ffz.patch b/queue-3.0/sparc-use-popc-when-possible-for-ffs-__ffs-ffz.patch new file mode 100644 index 0000000000..c64294b07c --- /dev/null +++ b/queue-3.0/sparc-use-popc-when-possible-for-ffs-__ffs-ffz.patch @@ -0,0 +1,236 @@ +From d4503069f07ab8d88927ef4e10557ea95ebbd17b Mon Sep 17 00:00:00 2001 +From: "David S. Miller" <davem@davemloft.net> +Date: Tue, 2 Aug 2011 20:23:34 -0700 +Subject: sparc: Use popc when possible for ffs/__ffs/ffz. + + +From: "David S. Miller" <davem@davemloft.net> + +[ Upstream commit 56d205cc5c0a3032a605121d4253e111193bf923 ] + +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + arch/sparc/include/asm/bitops_64.h | 7 +-- + arch/sparc/kernel/entry.h | 7 +++ + arch/sparc/kernel/setup_64.c | 34 +++++++++----- + arch/sparc/kernel/sparc_ksyms_64.c | 4 + + arch/sparc/kernel/vmlinux.lds.S | 5 ++ + arch/sparc/lib/Makefile | 2 + arch/sparc/lib/ffs.S | 84 +++++++++++++++++++++++++++++++++++++ + 7 files changed, 127 insertions(+), 16 deletions(-) + create mode 100644 arch/sparc/lib/ffs.S + +--- a/arch/sparc/include/asm/bitops_64.h ++++ b/arch/sparc/include/asm/bitops_64.h +@@ -26,16 +26,17 @@ extern void change_bit(unsigned long nr, + #define smp_mb__before_clear_bit() barrier() + #define smp_mb__after_clear_bit() barrier() + +-#include <asm-generic/bitops/ffz.h> +-#include <asm-generic/bitops/__ffs.h> + #include <asm-generic/bitops/fls.h> + #include <asm-generic/bitops/__fls.h> + #include <asm-generic/bitops/fls64.h> + + #ifdef __KERNEL__ + ++extern int ffs(int x); ++extern unsigned long __ffs(unsigned long); ++ ++#include <asm-generic/bitops/ffz.h> + #include <asm-generic/bitops/sched.h> +-#include <asm-generic/bitops/ffs.h> + + /* + * hweightN: returns the hamming weight (i.e. the number +--- a/arch/sparc/kernel/entry.h ++++ b/arch/sparc/kernel/entry.h +@@ -49,6 +49,13 @@ struct popc_3insn_patch_entry { + extern struct popc_3insn_patch_entry __popc_3insn_patch, + __popc_3insn_patch_end; + ++struct popc_6insn_patch_entry { ++ unsigned int addr; ++ unsigned int insns[6]; ++}; ++extern struct popc_6insn_patch_entry __popc_6insn_patch, ++ __popc_6insn_patch_end; ++ + extern void __init per_cpu_patch(void); + extern void __init sun4v_patch(void); + extern void __init boot_cpu_id_too_large(int cpu); +--- a/arch/sparc/kernel/setup_64.c ++++ b/arch/sparc/kernel/setup_64.c +@@ -275,25 +275,35 @@ void __init sun4v_patch(void) + static void __init popc_patch(void) + { + struct popc_3insn_patch_entry *p3; ++ struct popc_6insn_patch_entry *p6; + + p3 = &__popc_3insn_patch; + while (p3 < &__popc_3insn_patch_end) { +- unsigned long addr = p3->addr; ++ unsigned long i, addr = p3->addr; + +- *(unsigned int *) (addr + 0) = p3->insns[0]; +- wmb(); +- __asm__ __volatile__("flush %0" : : "r" (addr + 0)); +- +- *(unsigned int *) (addr + 4) = p3->insns[1]; +- wmb(); +- __asm__ __volatile__("flush %0" : : "r" (addr + 4)); +- +- *(unsigned int *) (addr + 8) = p3->insns[2]; +- wmb(); +- __asm__ __volatile__("flush %0" : : "r" (addr + 4)); ++ for (i = 0; i < 3; i++) { ++ *(unsigned int *) (addr + (i * 4)) = p3->insns[i]; ++ wmb(); ++ __asm__ __volatile__("flush %0" ++ : : "r" (addr + (i * 4))); ++ } + + p3++; + } ++ ++ p6 = &__popc_6insn_patch; ++ while (p6 < &__popc_6insn_patch_end) { ++ unsigned long i, addr = p6->addr; ++ ++ for (i = 0; i < 6; i++) { ++ *(unsigned int *) (addr + (i * 4)) = p6->insns[i]; ++ wmb(); ++ __asm__ __volatile__("flush %0" ++ : : "r" (addr + (i * 4))); ++ } ++ ++ p6++; ++ } + } + + #ifdef CONFIG_SMP +--- a/arch/sparc/kernel/sparc_ksyms_64.c ++++ b/arch/sparc/kernel/sparc_ksyms_64.c +@@ -45,5 +45,9 @@ EXPORT_SYMBOL(__arch_hweight16); + EXPORT_SYMBOL(__arch_hweight32); + EXPORT_SYMBOL(__arch_hweight64); + ++/* from ffs_ffz.S */ ++EXPORT_SYMBOL(ffs); ++EXPORT_SYMBOL(__ffs); ++ + /* Exporting a symbol from /init/main.c */ + EXPORT_SYMBOL(saved_command_line); +--- a/arch/sparc/kernel/vmlinux.lds.S ++++ b/arch/sparc/kernel/vmlinux.lds.S +@@ -112,6 +112,11 @@ SECTIONS + *(.popc_3insn_patch) + __popc_3insn_patch_end = .; + } ++ .popc_6insn_patch : { ++ __popc_6insn_patch = .; ++ *(.popc_6insn_patch) ++ __popc_6insn_patch_end = .; ++ } + PERCPU_SECTION(SMP_CACHE_BYTES) + + . = ALIGN(PAGE_SIZE); +--- a/arch/sparc/lib/Makefile ++++ b/arch/sparc/lib/Makefile +@@ -37,7 +37,7 @@ lib-$(CONFIG_SPARC64) += GENmemcpy.o GEN + lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o + + lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o +-lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ++lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o + + obj-y += iomap.o + obj-$(CONFIG_SPARC32) += atomic32.o +--- /dev/null ++++ b/arch/sparc/lib/ffs.S +@@ -0,0 +1,84 @@ ++#include <linux/linkage.h> ++ ++ .register %g2,#scratch ++ ++ .text ++ .align 32 ++ ++ENTRY(ffs) ++ brnz,pt %o0, 1f ++ mov 1, %o1 ++ retl ++ clr %o0 ++ nop ++ nop ++ENTRY(__ffs) ++ sllx %o0, 32, %g1 /* 1 */ ++ srlx %o0, 32, %g2 ++ ++ clr %o1 /* 2 */ ++ movrz %g1, %g2, %o0 ++ ++ movrz %g1, 32, %o1 /* 3 */ ++1: clr %o2 ++ ++ sllx %o0, (64 - 16), %g1 /* 4 */ ++ srlx %o0, 16, %g2 ++ ++ movrz %g1, %g2, %o0 /* 5 */ ++ clr %o3 ++ ++ movrz %g1, 16, %o2 /* 6 */ ++ clr %o4 ++ ++ and %o0, 0xff, %g1 /* 7 */ ++ srlx %o0, 8, %g2 ++ ++ movrz %g1, %g2, %o0 /* 8 */ ++ clr %o5 ++ ++ movrz %g1, 8, %o3 /* 9 */ ++ add %o2, %o1, %o2 ++ ++ and %o0, 0xf, %g1 /* 10 */ ++ srlx %o0, 4, %g2 ++ ++ movrz %g1, %g2, %o0 /* 11 */ ++ add %o2, %o3, %o2 ++ ++ movrz %g1, 4, %o4 /* 12 */ ++ ++ and %o0, 0x3, %g1 /* 13 */ ++ srlx %o0, 2, %g2 ++ ++ movrz %g1, %g2, %o0 /* 14 */ ++ add %o2, %o4, %o2 ++ ++ movrz %g1, 2, %o5 /* 15 */ ++ ++ and %o0, 0x1, %g1 /* 16 */ ++ ++ add %o2, %o5, %o2 /* 17 */ ++ xor %g1, 0x1, %g1 ++ ++ retl /* 18 */ ++ add %o2, %g1, %o0 ++ENDPROC(ffs) ++ENDPROC(__ffs) ++ ++ .section .popc_6insn_patch, "ax" ++ .word ffs ++ brz,pn %o0, 98f ++ neg %o0, %g1 ++ xnor %o0, %g1, %o1 ++ popc %o1, %o0 ++98: retl ++ nop ++ .word __ffs ++ neg %o0, %g1 ++ xnor %o0, %g1, %o1 ++ popc %o1, %o0 ++ retl ++ sub %o0, 1, %o0 ++ nop ++ .previous |