diff -Naur NVIDIA_kernel-1.0-3123/nv-linux.h NVIDIA_kernel-1.0-3123-pa/nv-linux.h --- NVIDIA_kernel-1.0-3123/nv-linux.h 2002-09-16 14:08:14.000000000 -0500 +++ NVIDIA_kernel-1.0-3123-pa/nv-linux.h 2002-09-16 14:37:31.000000000 -0500 @@ -127,6 +127,7 @@ # define MEM_MAP_INC_COUNT(map_nr) (atomic_inc(&(map_nr)->count)) # define MEM_MAP_DEC_COUNT(map_nr) (atomic_dec(&(map_nr)->count)) # define GET_EVENT_QUEUE(nv) ((struct __wait_queue_head *) ((nv)->event_queue)) +# define VMA_PRIVATE(vma) ((vma)->vm_private_data) #else # define in_irq() (local_irq_count[smp_processor_id()]) # define LINUX_VMA_OFFS(vma) ((vma)->vm_offset) @@ -137,6 +138,7 @@ # define MEM_MAP_INC_COUNT(map_nr) (atomic_inc(&mem_map[map_nr].count)) # define MEM_MAP_DEC_COUNT(map_nr) (atomic_dec(&mem_map[map_nr].count)) # define GET_EVENT_QUEUE(nv) ((struct wait_queue **) &((nv)->event_queue)) +# define VMA_PRIVATE(vma) ((void*)((vma)->vm_pte)) #endif #define NV_PAGE_ALIGN(addr) ( ((addr) + PAGE_SIZE - 1) / PAGE_SIZE) @@ -173,6 +175,7 @@ typedef struct nv_alloc_s { struct nv_alloc_s *next; struct vm_area_struct *vma; + unsigned int usage_count; unsigned int process_id; unsigned int thread_gid; unsigned int num_pages; diff -Naur NVIDIA_kernel-1.0-3123/nv.c NVIDIA_kernel-1.0-3123-pa/nv.c --- NVIDIA_kernel-1.0-3123/nv.c 2002-09-16 14:08:12.000000000 -0500 +++ NVIDIA_kernel-1.0-3123-pa/nv.c 2002-09-16 14:37:34.000000000 -0500 @@ -85,11 +85,21 @@ static int nvos_is_nv_device(struct pci_dev *dev); static int nvos_set_primary_card(nv_ioctl_primary_card_t *info); static int nvos_probe_devices(void); -static void * nvos_malloc(unsigned long); -static void nvos_free(void **); - static void nvos_proc_create(void); static void nvos_proc_remove(void); +static void * nvos_malloc_pages(unsigned long); +static void nvos_unlock_pages(void **); +static void nvos_free_pages(void **); + +#define nvos_unlock_and_free_pages(count, page_list) \ + if (page_list) { \ + if (count == 0) \ + nvos_unlock_pages(page_list); \ + nvos_free_pages(page_list); \ + } + +static nv_alloc_t *nvos_create_alloc(); +static int nvos_free_alloc(nv_alloc_t *); /* nvl_ functions.. take a linux state device pointer */ static nv_alloc_t *nvl_find_alloc(nv_linux_state_t *, unsigned long, nv_alloc_t **); @@ -375,18 +385,39 @@ * memory on systems with high memory support enabled. */ -static void *nvos_malloc(unsigned long size) +/* note that there's a subtle kernel interaction with regards to bookkeeping + * on these pages. So long as the pages are marked reserved, the kernel won't + * touch them (alter the usage count on them). this leads to a subtle problem + * with mmap. Normally, allocating the pages would set the count to 1, then + * mmaping them would bump the count up to 2. The order of unmapping and freeing + * the pages wouldn't matter, as they wouldn't really be considered free by the + * kernel until the count dropped back to 0. Since the kernel won't touch the + * count when the page is reserved, we need to be careful about this order and + * unreserving the pages. if we unreserve the pages while freeing them, and the + * munmap comes later, the munmap code path will attempt a second free on the + * same pages. We also don't have a lot of control over which comes first, + * sometimes we'll get called to free the pages first, sometimes we'll get called + * to munmap them first. Oh, and we'll get vma open/close calls every time the + * process is cloned, then execv'd, and munmap == vma close. + * sooo, we keep our own count of the allocation usage, and don't unreserve the + * pages until our count drops to 0. this should currently happen in either + * vma_release or nvos_free, both of which will be followed by a kernel attempt + * to free the page. Since the page fill finally be unreserved, the kernel will + * reduce the count to 0 and successfully free the page for us, only once. + * sigh... you have to love s&%*^y interfaces that force you to *know* too much + * about kernel internals. + */ + +static void *nvos_malloc_pages(unsigned long pages_needed) { unsigned long *page_list = NULL; unsigned long *page_ptr = NULL; - unsigned int pages_needed; unsigned int page_list_size; /* * allocate a pointer for each physical page and an * integer to hold the number of pages allocated */ - pages_needed = (size >> PAGE_SHIFT); page_list_size = (pages_needed + 1) * sizeof(unsigned long *); page_list = vmalloc(page_list_size); @@ -435,11 +466,15 @@ return NULL; } -static void nvos_free(void **page_list) +// unlock the pages we've locked down for dma purposes +static void nvos_unlock_pages(void **page_list) { unsigned long *page_ptr; unsigned int pages_left; + if (page_list == NULL) + return; + page_ptr = (unsigned long *) page_list; /* retrieve the number of pages allocated */ @@ -447,11 +482,70 @@ while (pages_left) { mem_map_unreserve(GET_MAP_NR(*page_ptr)); + pages_left--; + } +} + +static void nvos_free_pages(void **page_list) +{ + unsigned long *page_ptr; + unsigned int pages_left; + + if (page_list == NULL) + return; + + page_ptr = (unsigned long *) page_list; + + /* retrieve the number of pages allocated */ + pages_left = *(unsigned int *) (page_list - 1); + + while (pages_left) { free_page((unsigned long) phys_to_virt(*page_ptr++)); pages_left--; } +} - vfree(page_list); +static +nv_alloc_t *nvos_create_alloc(void) +{ + nv_alloc_t *at; + + NV_KMALLOC(at, sizeof(nv_alloc_t)); + if (at == NULL) + return NULL; + + memset(at, 0, sizeof(nv_alloc_t)); + + at->process_id = current->pid; +#if !defined (KERNEL_2_2) + at->thread_gid = current->tgid; +#else + at->thread_gid = -1; +#endif + + return at; +} + +static +int nvos_free_alloc( + nv_alloc_t *at +) +{ + if (at == NULL) + return -1; + + if (at->usage_count) + return 1; + + // we keep the page_table around after freeing the pages + // for bookkeeping reasons. Free the page_table and assume + // the underlying pages are already unlocked and freed. + if (at->page_table) + vfree(at->page_table - 1); + + NV_KFREE(at); + + return 0; } static u8 nvos_find_agp_capability(struct pci_dev *dev) @@ -998,6 +1092,12 @@ void nv_kern_vma_open(struct vm_area_struct *vma) { + if (VMA_PRIVATE(vma)) + { + nv_alloc_t *at = (nv_alloc_t *) VMA_PRIVATE(vma); + at->usage_count++; + } + MOD_INC_USE_COUNT; } @@ -1005,6 +1105,25 @@ void nv_kern_vma_release(struct vm_area_struct *vma) { + if (VMA_PRIVATE(vma)) + { + nv_alloc_t *at = (nv_alloc_t *) VMA_PRIVATE(vma); + + at->usage_count--; + + // if usage_count is down to 0, the kernel virtual mapping was freed + // but the underlying physical pages were not, due to the reserved bit + // being set. We need to clear the reserved bit, then munmap will + // zap the pages and free the physical pages. + if (at->usage_count == 0) + { + if (at->page_table) + nvos_unlock_pages(at->page_table); + nvos_free_alloc(at); + VMA_PRIVATE(vma) = NULL; + } + } + MOD_DEC_USE_COUNT; } @@ -1345,6 +1464,8 @@ } at->vma = vma; + VMA_PRIVATE(vma) = at; + at->usage_count++; start = vma->vm_start; while (pages--) @@ -1377,6 +1498,8 @@ } at->vma = vma; + VMA_PRIVATE(vma) = at; + at->usage_count++; if (NV_OSAGP_ENABLED(nv)) { @@ -2224,24 +2347,14 @@ int rm_status = 0; nv_linux_state_t *nvl = (nv_linux_state_t *) nv; - NV_KMALLOC(at, sizeof(nv_alloc_t)); + at = nvos_create_alloc(); if (at == NULL) return RM_ERROR; - memset(at, 0, sizeof(nv_alloc_t)); - page_count = RM_PAGES_TO_OS_PAGES(page_count); at->num_pages = page_count; - - at->process_id = current->pid; -#if !defined (KERNEL_2_2) - at->thread_gid = current->tgid; -#else - at->thread_gid = -1; -#endif - at->class = class; - at->vma = NULL; + at->usage_count++; if (at->class == NV01_ROOT) { @@ -2287,7 +2400,7 @@ NV_ADD_AT(nvl, at); } else { /* use nvidia's nvagp support */ - at->page_table = nvos_malloc(page_count << PAGE_SHIFT); + at->page_table = nvos_malloc_pages(page_count); if (at->page_table == NULL) goto failed; @@ -2311,7 +2424,7 @@ nv->agp_buffers++; } else { /* allocate general system memory */ - at->page_table = nvos_malloc(page_count << PAGE_SHIFT); + at->page_table = nvos_malloc_pages(page_count); if (at->page_table == NULL) goto failed; @@ -2324,10 +2437,10 @@ failed: /* free any pages we may have allocated */ if (at->page_table) - nvos_free(at->page_table); + nvos_unlock_and_free_pages(at->usage_count, at->page_table); + + nvos_free_alloc(at); - /* free it */ - NV_KFREE(at); return -1; } @@ -2365,17 +2478,19 @@ NV_REMOVE_AT_FROM_LIST(nvl, at, prev); nv_unlock_at(nv); + at->usage_count--; + if (NV_OSAGP_ENABLED(nv)) { rmStatus = KernFreeAGPPages(pAddress, priv_data); } else { rmStatus = rm_free_agp_pages(nv, pAddress, priv_data); - if (rmStatus == 0x0) - nvos_free(at->page_table); + if (rmStatus == RM_OK) + nvos_unlock_and_free_pages(at->usage_count, at->page_table); } /* we may hold off on disabling agp until all buffers are freed */ - if (rmStatus == 0x0) + if (rmStatus == RM_OK) { nv->agp_buffers--; if (!nv->agp_buffers && nv->agp_teardown) @@ -2390,6 +2505,8 @@ NV_REMOVE_AT_FROM_LIST(nvl, at, prev); nv_unlock_at(nv); + at->usage_count--; + if (at->class == NV01_ROOT) { int order, i; @@ -2407,11 +2524,13 @@ } else { - nvos_free(at->page_table); + nvos_unlock_and_free_pages(at->usage_count, at->page_table); } } - NV_KFREE(at); + if (at->usage_count == 0) + nvos_free_alloc(at); + return rmStatus; }