Gửi tệp bằng POST từ tập lệnh Python

139

Có cách nào để gửi tệp bằng POST từ tập lệnh Python không?

— Chỉ đọc
nguồn

214

Từ: https://requests.readthedocs.io/en/latest/user/quickstart/#post-a-multipart-encoding-file

Yêu cầu làm cho nó rất đơn giản để tải lên các tệp được mã hóa nhiều phần:

with open('report.xls', 'rb') as f:
    r = requests.post('http://httpbin.org/post', files={'report.xls': f})

Đó là nó. Tôi không nói đùa - đây là một dòng mã. Các tập tin đã được gửi. Hãy kiểm tra:

>>> r.text
{
  "origin": "179.13.100.4",
  "files": {
    "report.xls": "<censored...binary...data>"
  },
  "form": {},
  "url": "http://httpbin.org/post",
  "args": {},
  "headers": {
    "Content-Length": "3196",
    "Accept-Encoding": "identity, deflate, compress, gzip",
    "Accept": "*/*",
    "User-Agent": "python-requests/0.8.0",
    "Host": "httpbin.org:80",
    "Content-Type": "multipart/form-data; boundary=127.0.0.1.502.21746.1321131593.786.1"
  },
  "data": ""
}

— Piotr Dobrogost
nguồn

2

Tôi đang thử điều tương tự & nó hoạt động tốt nếu kích thước tệp nhỏ hơn ~ 1,5 MB. khác nó ném một lỗi .. xin vui lòng xem tại đây .

— Niks Jain

1

những gì tôi đang cố gắng làm là đăng nhập vào một số trang web bằng cách sử dụng yêu cầu mà tôi đã thực hiện thành công nhưng bây giờ tôi muốn tải lên video sau khi đăng nhập và biểu mẫu có một trường khác phải được điền trước khi gửi. Vậy làm cách nào để vượt qua các giá trị đó như mô tả video, tiêu đề video, v.v.

— TaraGurung 31/05/2015

15

Có lẽ bạn muốn làm with open('report.xls', 'rb') as f: r = requests.post('http://httpbin.org/post', files={'report.xls': f})thay vào đó, vì vậy nó sẽ đóng tệp lại sau khi mở.

— Hjulle

3

Huh? Vì khi gửi yêu cầu rất đơn giản?

— palsch

1

Câu trả lời này cần được cập nhật để bao gồm đề xuất của Hjulle về việc sử dụng trình quản lý bối cảnh để đảm bảo tệp được đóng.

— bmoran

28

Đúng. Bạn sẽ sử dụng urllib2mô-đun và mã hóa bằng multipart/form-dataloại nội dung. Đây là một số mã mẫu để giúp bạn bắt đầu - nó hơi nhiều hơn là chỉ tải lên tệp, nhưng bạn sẽ có thể đọc qua nó và xem cách nó hoạt động:

user_agent = "image uploader"
default_message = "Image $current of $total"

import logging
import os
from os.path import abspath, isabs, isdir, isfile, join
import random
import string
import sys
import mimetypes
import urllib2
import httplib
import time
import re

def random_string (length):
    return ''.join (random.choice (string.letters) for ii in range (length + 1))

def encode_multipart_data (data, files):
    boundary = random_string (30)

    def get_content_type (filename):
        return mimetypes.guess_type (filename)[0] or 'application/octet-stream'

    def encode_field (field_name):
        return ('--' + boundary,
                'Content-Disposition: form-data; name="%s"' % field_name,
                '', str (data [field_name]))

    def encode_file (field_name):
        filename = files [field_name]
        return ('--' + boundary,
                'Content-Disposition: form-data; name="%s"; filename="%s"' % (field_name, filename),
                'Content-Type: %s' % get_content_type(filename),
                '', open (filename, 'rb').read ())

    lines = []
    for name in data:
        lines.extend (encode_field (name))
    for name in files:
        lines.extend (encode_file (name))
    lines.extend (('--%s--' % boundary, ''))
    body = '\r\n'.join (lines)

    headers = {'content-type': 'multipart/form-data; boundary=' + boundary,
               'content-length': str (len (body))}

    return body, headers

def send_post (url, data, files):
    req = urllib2.Request (url)
    connection = httplib.HTTPConnection (req.get_host ())
    connection.request ('POST', req.get_selector (),
                        *encode_multipart_data (data, files))
    response = connection.getresponse ()
    logging.debug ('response = %s', response.read ())
    logging.debug ('Code: %s %s', response.status, response.reason)

def make_upload_file (server, thread, delay = 15, message = None,
                      username = None, email = None, password = None):

    delay = max (int (delay or '0'), 15)

    def upload_file (path, current, total):
        assert isabs (path)
        assert isfile (path)

        logging.debug ('Uploading %r to %r', path, server)
        message_template = string.Template (message or default_message)

        data = {'MAX_FILE_SIZE': '3145728',
                'sub': '',
                'mode': 'regist',
                'com': message_template.safe_substitute (current = current, total = total),
                'resto': thread,
                'name': username or '',
                'email': email or '',
                'pwd': password or random_string (20),}
        files = {'upfile': path}

        send_post (server, data, files)

        logging.info ('Uploaded %r', path)
        rand_delay = random.randint (delay, delay + 5)
        logging.debug ('Sleeping for %.2f seconds------------------------------\n\n', rand_delay)
        time.sleep (rand_delay)

    return upload_file

def upload_directory (path, upload_file):
    assert isabs (path)
    assert isdir (path)

    matching_filenames = []
    file_matcher = re.compile (r'\.(?:jpe?g|gif|png)$', re.IGNORECASE)

    for dirpath, dirnames, filenames in os.walk (path):
        for name in filenames:
            file_path = join (dirpath, name)
            logging.debug ('Testing file_path %r', file_path)
            if file_matcher.search (file_path):
                matching_filenames.append (file_path)
            else:
                logging.info ('Ignoring non-image file %r', path)

    total_count = len (matching_filenames)
    for index, file_path in enumerate (matching_filenames):
        upload_file (file_path, index + 1, total_count)

def run_upload (options, paths):
    upload_file = make_upload_file (**options)

    for arg in paths:
        path = abspath (arg)
        if isdir (path):
            upload_directory (path, upload_file)
        elif isfile (path):
            upload_file (path)
        else:
            logging.error ('No such path: %r' % path)

    logging.info ('Done!')

— John Millikin
nguồn

1

Trên python 2.6.6 tôi đã gặp lỗi khi phân tích cú pháp ranh giới Multipart trong khi sử dụng mã này trên Windows. Tôi đã phải thay đổi từ string.letters thành string.ascii_letters như được thảo luận tại stackoverflow.com/questions/2823316/. Để điều này hoạt động. Yêu cầu về ranh giới được thảo luận ở đây: stackoverflow.com/questions/147451/ từ

— amit

gọi run_upload ({'server': '', 'thread': ''}, path = ['/ path / to / file.txt']) gây ra lỗi trong dòng này: upload_file (path) vì "tệp tải lên" yêu cầu 3 tham số vì vậy tôi thay thế nó bằng dòng này upload_file (path, 1, 1)

— Radian

4

Điều duy nhất ngăn bạn sử dụng urlopen trực tiếp trên một đối tượng tệp là thực tế là đối tượng tệp dựng sẵn thiếu định nghĩa len . Một cách đơn giản là tạo một lớp con, cung cấp urlopen với tệp chính xác. Tôi cũng đã sửa đổi tiêu đề Kiểu nội dung trong tệp bên dưới.

import os
import urllib2
class EnhancedFile(file):
    def __init__(self, *args, **keyws):
        file.__init__(self, *args, **keyws)

    def __len__(self):
        return int(os.fstat(self.fileno())[6])

theFile = EnhancedFile('a.xml', 'r')
theUrl = "http://example.com/abcde"
theHeaders= {'Content-Type': 'text/xml'}

theRequest = urllib2.Request(theUrl, theFile, theHeaders)

response = urllib2.urlopen(theRequest)

theFile.close()


for line in response:
    print line

— ilmarinen
nguồn

@robert Tôi kiểm tra mã của bạn trong Python2.7 nhưng nó không hoạt động. urlopen (Yêu cầu (theUrl, theFile, ...)) chỉ mã hóa nội dung của tệp như thể một bài đăng bình thường nhưng không thể chỉ định trường biểu mẫu chính xác. Tôi thậm chí đã thử biến thể urlopen (theUrl, urlencode ({'serveride_field_name': EnhizedFile ('my_file.txt')})), nó tải lên một tệp nhưng (dĩ nhiên!) Với nội dung không chính xác là <mở tệp 'my_file.txt', chế độ 'r' ở 0x00D6B718>. Tôi đã bỏ lỡ một cái gì đó?

— RayLuo

Cảm ơn câu trả lời. Bằng cách sử dụng đoạn mã trên, tôi đã chuyển tệp hình ảnh thô 2,2 GB bằng cách sử dụng yêu cầu PUT vào máy chủ web.

— Akshay Patil

4

Có vẻ như các yêu cầu python không xử lý các tệp đa phần cực lớn.

Các tài liệu khuyên bạn nên xem xét requests-toolbelt.

Đây là trang thích hợp từ tài liệu của họ.

— lúa mạch đen
nguồn

2

Thư viện poster của Chris Atlee hoạt động thực sự tốt cho việc này (đặc biệt là chức năng tiện lợi poster.encode.multipart_encode()). Như một phần thưởng, nó hỗ trợ truyền phát các tệp lớn mà không tải toàn bộ tệp vào bộ nhớ. Xem thêm vấn đề Python 3244 .

— gotgenes
nguồn

2

Tôi đang cố gắng thử nghiệm django rest api và nó hoạt động cho tôi:

def test_upload_file(self):
        filename = "/Users/Ranvijay/tests/test_price_matrix.csv"
        data = {'file': open(filename, 'rb')}
        client = APIClient()
        # client.credentials(HTTP_AUTHORIZATION='Token ' + token.key)
        response = client.post(reverse('price-matrix-csv'), data, format='multipart')

        print response
        self.assertEqual(response.status_code, status.HTTP_200_OK)

— Ranvijay Sachan
nguồn

1

mã này cung cấp cho rò rỉ bộ nhớ - bạn đã quên close()một tập tin.

— Chánh văn phòng

0

Bạn cũng có thể muốn có một cái nhìn về omeplib2 , với các ví dụ . Tôi thấy việc sử dụng omeplib2 ngắn gọn hơn so với sử dụng các mô đun HTTP tích hợp.

— pdc
nguồn

2

Không có ví dụ nào cho thấy cách xử lý tải lên tệp.

— dland

Liên kết đã lỗi thời + không có ví dụ nội tuyến.

— jlr

3

Nó đã được chuyển đến github.com/httplib2/httplib2 . Mặt khác, ngày nay tôi có thể muốn giới thiệu requeststhay thế.

— pdc

0

def visit_v2(device_code, camera_code):
    image1 = MultipartParam.from_file("files", "/home/yuzx/1.txt")
    image2 = MultipartParam.from_file("files", "/home/yuzx/2.txt")
    datagen, headers = multipart_encode([('device_code', device_code), ('position', 3), ('person_data', person_data), image1, image2])
    print "".join(datagen)
    if server_port == 80:
        port_str = ""
    else:
        port_str = ":%s" % (server_port,)
    url_str = "http://" + server_ip + port_str + "/adopen/device/visit_v2"
    headers['nothing'] = 'nothing'
    request = urllib2.Request(url_str, datagen, headers)
    try:
        response = urllib2.urlopen(request)
        resp = response.read()
        print "http_status =", response.code
        result = json.loads(resp)
        print resp
        return result
    except urllib2.HTTPError, e:
        print "http_status =", e.code
        print e.read()

— người dùng6081103
nguồn