如果需要搜索引擎获取到搜索的内容,又不想支付其他费用,同时要求返回的格式为json格式,则可以使用 DuckDuckGo API
代码如下:
import argparse
import json
import time
from itertools import islice
from typing import Union
from duckduckgo_search import DDGS
DUCKDUCKGO_MAX_ATTEMPTS = 3
def safe_duckduckdo_results(results: Union[str, list]) -> str:
"""
Return the results of a Google search in a safe format.
Args:
results (Union[str, list]): The search results.
Returns:
str: The results of the search.
"""
if isinstance(results, list):
safe_message = json.dumps(
[result.encode("utf-8", "ignore").decode("utf-8") for result in results]
)
else:
safe_message = results.encode("utf-8", "ignore").decode("utf-8")
return safe_message
def web_search(query: str, num_results: int = 8) -> str:
"""Return the results of a Google search
Args:
query (str): The search query.
num_results (int): The number of results to return.
Returns:
str: The results of the search.
"""
search_results = []
attempts = 0
while attempts < DUCKDUCKGO_MAX_ATTEMPTS:
if not query:
return json.dumps(search_results)
results = DDGS().text(query)
search_results = list(islice(results, num_results))
if search_results:
break
time.sleep(1)
attempts += 1
results = json.dumps(search_results, ensure_ascii=False, indent=4)
return safe_duckduckdo_results(results)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Web search using DuckDuckGo')
parser.add_argument('query', type=str, help='The search query')
parser.add_argument('--num_results', type=int, default=8, help='The number of results to return')
args = parser.parse_args()
results = web_search(args.query, args.num_results)
results = json.loads(results)
print(results)