Currently I have a scrapy which is used to crawl data from websites. I want to send the current data to a connected client it has scraped using a socket.
clients = set()
clients_lock = threading.Lock()
host = socket.gethostbyname("")
port = 10010
def listener(client, address, item):
print ("Accepted connection from: ", address)
with clients_lock:
clients.add(client)
try:
data = client.recv(1024)
if data == ('0').encode():
timestamp = datetime.datetime.now().strftime("%I:%M:%S %p")
client.send(timestamp.encode())
finally:
with clients_lock:
clients.remove(client)
client.close()
def send_to_socket(item):
s = socket.socket()
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
s.bind((host,port))
s.listen(3)
th = []
client, address = s.accept()
th.append(Thread(target=listener, args = (client,address, item)).start())
s.close()
The code give below is the scrapy code which calls the function that starts the socket, the current issue is that I am not able to retrieve data from client code real-time. The current code closes client connection every-time it sends a data. Is there any way to continuously send data without closing client connection. If I remove client.remove(client) the code gets stuck and won't run _build_link_item.
_build_link_item runs continuously to generate data from different URL's and I want to send that data it generated to the client.
def _build_link_item(response):
"""Builds a LinkItem object for the given response"""
parser = HTMLParser(response)
item = LinkItem(
url=response.url,
status=response.status,
content_type=response.headers.get("Content-Type", b"").decode("utf-8") or "",
)
print(json.dumps(item.__dict__), flush=True)
send_to_socket(json.dumps(item.__dict__))
question from:https://stackoverflow.com/questions/65931199/passing-data-from-scrapy-to-socket