I'd like to identify a method to attain the Worksheet ID within the URL for each of the worksheets within a Google Spreadsheet Workbook. For example, the worksheet id for 'sheet2' of this workbook is '1244369280' , since it's url is https://docs.google.com/spreadsheets/d/1yd8qTYjRns4_OT8PbsZzH0zajvzguKS79dq6j--hnTs/edit#gid=1244369280
One method I've found is to pull the XML of a Google Spreadsheet, since according to this question, the only way to get the Worksheet ID is to stream down the XML of a worksheet, but the example is in Javascript and I need to do this in Python
This is the Javascript Code that I'd like to execute in Python:
Dim worksheetFeed As WorksheetFeed Dim query As WorksheetQuery Dim worksheet As WorksheetEntry Dim output As New MemoryStream Dim xml As String Dim gid As String = String.Empty Try _service = New Spreadsheets.SpreadsheetsService("ServiceName") _service.setUserCredentials(UserId, Password) query = New WorksheetQuery(feedUrl) worksheetFeed = _service.Query(query) worksheet = worksheetFeed.Entries(0) ' Save worksheet feed to memory stream so we can ' get the xml returned from the feed url and look for ' the gid. Gid allows us to download the specific worksheet tab Using output worksheet.SaveToXml(output) End Using xml = Encoding.ASCII.GetString(output.ToArray())
It seems that the best way to get the XML from a Google Spreadsheet is using Gdata, so I've downloaded GData and tried the Google Spreadsheet example with my credentials.
See below
#!/usr/bin/python # # Copyright (C) 2007 Google Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. __author__ = 'api.laurabeth@gmail.com (Laura Beth Lincoln)' try: from xml.etree import ElementTree except ImportError: from elementtree import ElementTree import gdata.spreadsheet.service import gdata.service import atom.service import gdata.spreadsheet import atom import getopt import sys import string class SimpleCRUD: def __init__(self, email, password): self.gd_client = gdata.spreadsheet.service.SpreadsheetsService() self.gd_client.email = 'chris@curalate.com' self.gd_client.password = 'jkjkdioerzumawya' self.gd_client.source = 'Spreadsheets GData Sample' self.gd_client.ProgrammaticLogin() self.curr_key = '' self.curr_wksht_id = '' self.list_feed = None def _PromptForSpreadsheet(self): # Get the list of spreadsheets feed = self.gd_client.GetSpreadsheetsFeed() self._PrintFeed(feed) input = raw_input('\nSelection: ') id_parts = feed.entry[string.atoi(input)].id.text.split('/') self.curr_key = id_parts[len(id_parts) - 1] def _PromptForWorksheet(self): # Get the list of worksheets feed = self.gd_client.GetWorksheetsFeed(self.curr_key) self._PrintFeed(feed) input = raw_input('\nSelection: ') id_parts = feed.entry[string.atoi(input)].id.text.split('/') self.curr_wksht_id = id_parts[len(id_parts) - 1] def _PromptForCellsAction(self): print ('dump\n' 'update {row} {col} {input_value}\n' '\n') input = raw_input('Command: ') command = input.split(' ', 1) if command[0] == 'dump': self._CellsGetAction() elif command[0] == 'update': parsed = command[1].split(' ', 2) if len(parsed) == 3: self._CellsUpdateAction(parsed[0], parsed[1], parsed[2]) else: self._CellsUpdateAction(parsed[0], parsed[1], '') else: self._InvalidCommandError(input) def _PromptForListAction(self): print ('dump\n' 'insert {row_data} (example: insert label=content)\n' 'update {row_index} {row_data}\n' 'delete {row_index}\n' 'Note: No uppercase letters in column names!\n' '\n') input = raw_input('Command: ') command = input.split(' ' , 1) if command[0] == 'dump': self._ListGetAction() elif command[0] == 'insert': self._ListInsertAction(command[1]) elif command[0] == 'update': parsed = command[1].split(' ', 1) self._ListUpdateAction(parsed[0], parsed[1]) elif command[0] == 'delete': self._ListDeleteAction(command[1]) else: self._InvalidCommandError(input) def _CellsGetAction(self): # Get the feed of cells feed = self.gd_client.GetCellsFeed(self.curr_key, self.curr_wksht_id) self._PrintFeed(feed) def _CellsUpdateAction(self, row, col, inputValue): entry = self.gd_client.UpdateCell(row=row, col=col, inputValue=inputValue, key=self.curr_key, wksht_id=self.curr_wksht_id) if isinstance(entry, gdata.spreadsheet.SpreadsheetsCell): print 'Updated!' def _ListGetAction(self): # Get the list feed self.list_feed = self.gd_client.GetListFeed(self.curr_key, self.curr_wksht_id) self._PrintFeed(self.list_feed) def _ListInsertAction(self, row_data): entry = self.gd_client.InsertRow(self._StringToDictionary(row_data), self.curr_key, self.curr_wksht_id) if isinstance(entry, gdata.spreadsheet.SpreadsheetsList): print 'Inserted!' def _ListUpdateAction(self, index, row_data): self.list_feed = self.gd_client.GetListFeed(self.curr_key, self.curr_wksht_id) entry = self.gd_client.UpdateRow( self.list_feed.entry[string.atoi(index)], self._StringToDictionary(row_data)) if isinstance(entry, gdata.spreadsheet.SpreadsheetsList): print 'Updated!' def _ListDeleteAction(self, index): self.list_feed = self.gd_client.GetListFeed(self.curr_key, self.curr_wksht_id) self.gd_client.DeleteRow(self.list_feed.entry[string.atoi(index)]) print 'Deleted!' def _StringToDictionary(self, row_data): dict = {} for param in row_data.split(): temp = param.split('=') dict[temp[0]] = temp[1] return dict def _PrintFeed(self, feed): for i, entry in enumerate(feed.entry): if isinstance(feed, gdata.spreadsheet.SpreadsheetsCellsFeed): print '%s %s\n' % (entry.title.text, entry.content.text) elif isinstance(feed, gdata.spreadsheet.SpreadsheetsListFeed): print '%s %s %s' % (i, entry.title.text, entry.content.text) # Print this row's value for each column (the custom dictionary is # built using the gsx: elements in the entry.) print 'Contents:' for key in entry.custom: print ' %s: %s' % (key, entry.custom[key].text) print '\n', else: print '%s %s\n' % (i, entry.title.text) def _InvalidCommandError(self, input): print 'Invalid input: %s\n' % (input) def Run(self): self._PromptForSpreadsheet() self._PromptForWorksheet() input = raw_input('cells or list? ') if input == 'cells': while True: self._PromptForCellsAction() elif input == 'list': while True: self._PromptForListAction() def main(): # parse command line options try: opts, args = getopt.getopt(sys.argv[1:], "", ["user=", "pw="]) except getopt.error, msg: print 'python spreadsheetExample.py --user [username] --pw [password] ' sys.exit(2) user = 'fake@gmail.com' pw = 'fakepassword' key = '' # Process options for o, a in opts: if o == "--user": user = a elif o == "--pw": pw = a if user == '' or pw == '': print 'python spreadsheetExample.py --user [username] --pw [password] ' sys.exit(2) sample = SimpleCRUD(user, pw) sample.Run() if __name__ == '__main__': main()
However this returns the following error:
Traceback (most recent call last): File "/Users/Chris/Desktop/gdata_test.py", line 200, in <module> main() File "/Users/Chris/Desktop/gdata_test.py", line 196, in main sample.Run() File "/Users/Chris/Desktop/gdata_test.py", line 162, in Run self._PromptForSpreadsheet() File "/Users/Chris/Desktop/gdata_test.py", line 49, in _PromptForSpreadsheet feed = self.gd_client.GetSpreadsheetsFeed() File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/gdata/spreadsheet/service.py", line 99, in GetSpreadsheetsFeed converter=gdata.spreadsheet.SpreadsheetsSpreadsheetsFeedFromString) File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/gdata/service.py", line 1074, in Get return converter(result_body) File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/gdata/spreadsheet/__init__.py", line 395, in SpreadsheetsSpreadsheetsFeedFromString xml_string) File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/atom/__init__.py", line 93, in optional_warn_function return f(*args, **kwargs) File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/atom/__init__.py", line 127, in CreateClassFromXMLString tree = ElementTree.fromstring(xml_string) File "<string>", line 125, in XML cElementTree.ParseError: no element found: line 1, column 0 [Finished in 0.3s with exit code 1] [shell_cmd: python -u "/Users/Chris/Desktop/gdata_test.py"] [dir: /Users/Chris/Desktop] [path: /usr/bin:/bin:/usr/sbin:/sbin]
I should also mention that I've been using Gspread as a method to interact with Google Spreadsheets, but when I run the below code, I get the gid, but I need to have the worksheet id.
gc = gspread.authorize(credentials) sh = gc.open_by_url('google_spreadsheet_url') sh.get_id_fields() >> {'spreadsheet_id': '1BgCEn-3Nor7UxOEPwD-qv8qXe7CaveJBrn9_Lcpo4W4','worksheet_id': 'oqitk0d'}
1 Answers
Answers 1
See the self.gd_client.ProgrammaticLogin()
call - this is causing the major problem since it uses the "ClientLogin" authorization method which was first deprecated and later removed on April 20, 2015.
I would actually look into the more fresh and actively developed gspread
module instead.
Here is a, somewhat insane, example demonstrating how to extract the actual "gid" value for a given spreadsheet and worksheet name. Note that you would first need to generate the JSON file with the OAuth credentials (I'm assuming you've already done that).
The code (added comments that would hopefully help to understand it):
import urlparse import xml.etree.ElementTree as ET import gspread from oauth2client.service_account import ServiceAccountCredentials SPREADSHEET_NAME = 'My Test Spreadsheet' WORKSHEET_NAME = "Sheet2" PATH_TO_JSON_KEYFILE = '/path/to/json/key/file.json' NAMESPACES = {'ns0': 'http://www.w3.org/2005/Atom'} SCOPES = ['https://spreadsheets.google.com/feeds'] # log in credentials = ServiceAccountCredentials.from_json_keyfile_name(PATH_TO_JSON_KEYFILE, SCOPES) gss_client = gspread.authorize(credentials) # open spreadsheet gss = gss_client.open(SPREADSHEET_NAME) # extract the full feed url root = gss._feed_entry full_feed_url = next(elm.attrib["href"] for elm in root.findall("ns0:link", namespaces=NAMESPACES) if "full" in elm.attrib["href"]) # get the feed and extract the gid value for a given sheet name response = gss_client.session.get(full_feed_url) root = ET.fromstring(response.content) sheet_entry = next(elm for elm in root.findall("ns0:entry", namespaces=NAMESPACES) if elm.find("ns0:title", namespaces=NAMESPACES).text == WORKSHEET_NAME) link = next(elm.attrib["href"] for elm in sheet_entry.findall("ns0:link", namespaces=NAMESPACES) if "gid=" in elm.attrib["href"]) # extract "gid" from URL gid = urlparse.parse_qs(urlparse.urlparse(link).query)["gid"][0] print(gid)
It also looks like there is a way to convert the worksheet ID to a gid value, see:
0 comments:
Post a Comment