Skip to content Skip to sidebar Skip to footer

Scraping Pricing Off A Search Bar - Site Link Changed

With the help of some experts here I was able to build a scraper that works fine. The essential line of code is really: data = {'partOptionFilter': {'PartNumber': PN.iloc[i, 0], 'A

Solution 1:

After using DevTools in Firefox/Chrome I created this code.

Page use different url, send different data, get result with different keys.

You would have to use DevTools to observe more requests from browser to server to recognize how to use more params in data

import requests

query = "mobile"data = {
#    "facets":[{
#        "name":"OEM",
#        "value":"GE%20Healthcare"
#    }],
    "facets":[],    
    "facilityId": 38451,
    "id_ins": "a2a3d332-73a7-4194-ad87-fe7412388916",
    "limit": 15,
    "query": query,
    "referer": "/catalog/Service",
    "start": 0,
#    "urlParams":[{
#        "name": "OEM",
#        "value": "GE Healthcare"
#    }],
    "urlParams":[]    
}

r = requests.post('https://prodasf-vip.partsfinder.com/Orion/CatalogService/api/v1/search', json=data)
data = r.json()

#print(data['products'])
#print(data['products'][0])
#print(data['products'][0]['options'])
#print(data['products'][0]['options'][0])

print(data['products'][0]['options'][0]['price'])

EDIT (2020.09.01)

If you have manu queries then use for-loop to run the same code many times but with different query. And when you get data for one query then use for-loop to get all prices from data['products']

EDIT (2020.09.06)

I added variable start and limit in get_data() and later I run it in loop for start in range(0, limit*10, limit) to get 10 pages (every with 100 elements)

import requests
# import pprint  # to format data on screen `pprint.pprint()# --- fucntions ---defget_data(query, start=0, limit=15): # <-- new (2020.09.06)"""Get data from server"""
    
    payload = {
    #    "facets":[{#        "name":"OEM",#        "value":"GE%20Healthcare"#    }],"facets":[],    
        "facilityId": 38451,
        "id_ins": "a2a3d332-73a7-4194-ad87-fe7412388916",
        "limit": limit,  # <-- new (2020.09.06)"query": query,
        "referer": "/catalog/Service",
        "start": start,  # <-- new (2020.09.06)#    "urlParams":[{#        "name": "OEM",#        "value": "GE Healthcare"#    }],"urlParams":[]    
    }

    r = requests.post('https://prodasf-vip.partsfinder.com/Orion/CatalogService/api/v1/search', json=payload)
    data = r.json()
    
    return data

defshow_data(data):
    #print(data['products'])#print(data['products'][0])#print(data['products'][0]['options'])#print(data['products'][0]['options'][0])print(data['products'][0]['options'][0]['price'])

    for item in data['products']:
        #pprint.pprint(item)print('title:', item['title'])
    
        ifnot item['options']:
            print('price: unknown')
        else:
            for option in item['options']:
                print('price:', option['price'], '| vendor item number:', option['vendorItemNumber'])

        print('---')
    
deffilter_data(data):
    filtered = []
    
    for item in data['products']:
        ifnot item['options']:
            filtered.append( [] )  # unknownelse:
            all_prices = [option['price'] for option in item['options']]
            filtered.append( all_prices )
            
    return filtered
    
# --- main ---

all_queries = ["mobile", 'GE Healthcare']

limit = 100# <-- new (2020.09.06)for query in all_queries:

    # paginationfor start inrange(0, limit*10, limit): # <-- new (2020.09.06)print('\n--- QUERY:', query, 'start:', start, '---\n')

        data = get_data(query, start, limit)
        #show_data(data)

        filtered = filter_data(data)
        print(filtered)

Post a Comment for "Scraping Pricing Off A Search Bar - Site Link Changed"