#include #include #include #include void DumpTableRows(IHTMLTable* pTable, std::wofstream& file) { IHTMLElementCollection* pRows = NULL; HRESULT hr = pTable->get_rows(&pRows); long numRows = 0; pRows->get_length(&numRows); VARIANT idNum; V_VT(&idNum) = VT_I4; for(long i = 0; i < numRows; ++i) { V_I4(&idNum) = i; LPDISPATCH pRowDisp = NULL; hr = pRows->item(idNum, idNum, &pRowDisp); IHTMLTableRow* pRow = NULL; hr = pRowDisp->QueryInterface(&pRow); pRowDisp->Release(); IHTMLElementCollection* pCellsCollection = NULL; hr = pRow->get_cells(&pCellsCollection); long numCells = 0; hr = pCellsCollection->get_length(&numCells); VARIANT cellNum = {VT_I4}; for(long j = 0; j < numCells; ++j) { V_I4(&cellNum) = j; LPDISPATCH pCellDisp = NULL; hr = pCellsCollection->item(cellNum, cellNum, &pCellDisp); IHTMLElement* pCellElem = NULL; hr = pCellDisp->QueryInterface(&pCellElem); pCellDisp->Release(); BSTR cellText = NULL; hr = pCellElem->get_innerText(&cellText); std::wstring cellTextStr; if(SUCCEEDED(hr) && cellText) { cellTextStr = cellText; cellTextStr.push_back(L'"'); cellTextStr.insert(0, j == 0 ? L"\"" : L", \""); SysFreeString(cellText); pCellElem->Release(); } file << cellTextStr; } pCellsCollection->Release(); file << L'\n'; pRow->Release(); } } void DumpTable42InnerHTMLToFile(IHTMLElementCollection* pColl, std::wofstream& file) { long numTables = 0; HRESULT hr = pColl->get_length(&numTables); if(numTables > 41) { VARIANT idNum; V_VT(&idNum) = VT_I4; V_I4(&idNum) = 41; LPDISPATCH pDispItem = NULL; hr = pColl->item(idNum, idNum, &pDispItem); IHTMLElement* pElement = NULL; hr = pDispItem->QueryInterface(&pElement); IHTMLTable* pTable = NULL; hr = pDispItem->QueryInterface(&pTable); pDispItem->Release(); DumpTableRows(pTable, file); BSTR html; hr = pElement->get_innerHTML(&html); pElement->Release(); pTable->Release(); SysFreeString(html); } } class PropSink : public IPropertyNotifySink { public: PropSink(IHTMLDocument2* pDoc, HANDLE hReadyEvent) : m_pMSHTML(pDoc), hReadyEvent(hReadyEvent), m_dwRef(1), m_hrConnected(CONNECT_E_CANNOTCONNECT), m_dwCookie(0), m_pCP(NULL), m_szURL(NULL), m_nScheme(INTERNET_SCHEME_UNKNOWN), m_lReadyState(READYSTATE_UNINITIALIZED) {} ~PropSink() { } HRESULT Passivate() { HRESULT hr = NOERROR; // Disconnect from property change notifications if (m_pCP) { if (m_dwCookie) { hr = m_pCP->Unadvise(m_dwCookie); m_dwCookie = 0; } // Release the connection point m_pCP->Release(); m_pCP = NULL; } if (m_pMSHTML) { m_pMSHTML->Release(); m_pMSHTML = NULL; } return NOERROR; } // IUnknown methods STDMETHOD(QueryInterface)(REFIID riid, LPVOID* ppv); STDMETHOD_(ULONG, AddRef)(); STDMETHOD_(ULONG, Release)(); // IPropertyNotifySink methods STDMETHOD(OnChanged)(DISPID dispID); STDMETHOD(OnRequestEdit)(DISPID dispID); protected: // Persistence helpers HRESULT LoadURLFromFile(); HRESULT LoadURLFromMoniker(); IHTMLDocument2* m_pMSHTML; HANDLE hReadyEvent; DWORD m_dwRef; DWORD m_dwCookie; LPCONNECTIONPOINT m_pCP; HRESULT m_hrConnected; LPTSTR m_szURL; INTERNET_SCHEME m_nScheme; READYSTATE m_lReadyState; }; STDMETHODIMP PropSink::QueryInterface(REFIID riid, LPVOID* ppv) { *ppv = NULL; if (IID_IUnknown == riid || IID_IPropertyNotifySink == riid) { *ppv = (LPUNKNOWN)(IPropertyNotifySink*)this; AddRef(); return NOERROR; } else { return E_NOTIMPL; } } STDMETHODIMP_(ULONG) PropSink::AddRef() { return 1; } STDMETHODIMP_(ULONG) PropSink::Release() { return 1; } // Fired on change of the value of a 'bindable' property STDMETHODIMP PropSink::OnChanged(DISPID dispID) { HRESULT hr; TCHAR szBuff[255]; if (DISPID_READYSTATE == dispID) { // check the value of the readystate property assert(m_pMSHTML); VARIANT vResult = {0}; EXCEPINFO excepInfo; UINT uArgErr; DISPPARAMS dp = {NULL, NULL, 0, 0}; if (SUCCEEDED(hr = m_pMSHTML->Invoke(DISPID_READYSTATE, IID_NULL, LOCALE_SYSTEM_DEFAULT, DISPATCH_PROPERTYGET, &dp, &vResult, &excepInfo, &uArgErr))) { // CODE REVIEW: Recode to replace banned lstrcpy API in several cases below. #define RSLENGTH 20 TCHAR szReadyState[RSLENGTH]; assert(VT_I4 == V_VT(&vResult)); m_lReadyState = (READYSTATE)V_I4(&vResult); switch (m_lReadyState) { case READYSTATE_UNINITIALIZED: //= 0, // CODE REVIEW: Replace banned lstrcpy API. // lstrcpy(szReadyState, TEXT("Uninitialized")); StringCchCopy(szReadyState, RSLENGTH, TEXT("Uninitialized")); break; case READYSTATE_LOADING: // = 1, // CODE REVIEW: Replace banned lstrcpy API. // lstrcpy(szReadyState, TEXT("Loading")); StringCchCopy(szReadyState, RSLENGTH, TEXT("Loading")); break; case READYSTATE_LOADED: // = 2, // CODE REVIEW: Replace banned lstrcpy API. // lstrcpy(szReadyState, TEXT("Loaded")); StringCchCopy(szReadyState, RSLENGTH, TEXT("Loaded")); break; case READYSTATE_INTERACTIVE: // = 3, // CODE REVIEW: Replace banned lstrcpy API. // lstrcpy(szReadyState, TEXT("Interactive")); StringCchCopy(szReadyState, RSLENGTH, TEXT("Interactive")); break; case READYSTATE_COMPLETE: // = 4 // CODE REVIEW: Replace banned lstrcpy API. // lstrcpy(szReadyState, TEXT("Complete")); StringCchCopy(szReadyState, RSLENGTH, TEXT("Complete")); SetEvent(hReadyEvent); break; } // CODE REVIEW: Replace Banned API. // wsprintf(szBuff, TEXT("OnChanged: readyState = %s\n"), szReadyState); StringCchPrintf(szBuff, 255, TEXT("OnChanged: readyState = %s\n"), szReadyState); VariantClear(&vResult); } else { // CODE REVIEW: Replace banned lstrcpy API. // lstrcpy(szBuff, TEXT("Unable to obtain readyState value\n")); StringCchCopy(szBuff, 255, TEXT("Unable to obtain readyState value\n")); } } else { // CODE REVIEW: Replace Banned API. // wsprintf(szBuff, TEXT("OnChanged: %d\n"), dispID); StringCchPrintf(szBuff, 255, TEXT("OnChanged: %d\n"), dispID); } _putws(szBuff); return NOERROR; } STDMETHODIMP PropSink::OnRequestEdit(DISPID dispID) { return NOERROR; } int __cdecl wmain(int argc, wchar_t** argv) { if(argc < 3) { _putws(L"Usage: StripTable "); return 1; } CoInitializeEx(NULL, COINIT_APARTMENTTHREADED | COINIT_DISABLE_OLE1DDE); { IHTMLDocument2* pDocument = NULL; HRESULT hr = CoCreateInstance(CLSID_HTMLDocument, NULL, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&pDocument)); HANDLE hEvent = CreateEvent(NULL, TRUE, FALSE, NULL); PropSink props(pDocument, hEvent); IConnectionPointContainer* pCont = NULL; hr = pDocument->QueryInterface(&pCont); IConnectionPoint* pConPoint = NULL; hr = pCont->FindConnectionPoint(IID_IPropertyNotifySink, &pConPoint); pCont->Release(); DWORD cookie = 0; hr = pConPoint->Advise(&props, &cookie); IPersistFile* pFile = NULL; hr = pDocument->QueryInterface(&pFile); WCHAR absPath[MAX_PATH] = {0}; GetFullPathName(argv[1], MAX_PATH, absPath, NULL); hr = pFile->Load(absPath, 0); while(MsgWaitForMultipleObjects(1, &hEvent, FALSE, INFINITE, QS_ALLEVENTS) != WAIT_OBJECT_0) { MSG msg = {0}; while(PeekMessage(&msg, NULL, 0, 0, PM_REMOVE)) { TranslateMessage(&msg); DispatchMessage(&msg); } } CloseHandle(hEvent);; hr = pConPoint->Unadvise(cookie); pConPoint->Release(); pFile->Release(); IHTMLElementCollection* pColl = NULL, *pTables = NULL; hr = pDocument->get_all(&pColl); long numElems = 0; hr = pColl->get_length(&numElems); BSTR html; pDocument->get_fileSize(&html); SysFreeString(html); VARIANT tableName; V_VT(&tableName) = VT_BSTR; V_BSTR(&tableName) = L"table"; LPDISPATCH pTableElems = NULL; hr = pColl->tags(tableName, &pTableElems); hr = pTableElems->QueryInterface(&pTables); std::wofstream outFile(argv[2]); DumpTable42InnerHTMLToFile(pTables, outFile); pTables->Release(); pColl->Release(); pTableElems->Release(); pDocument->Release(); OleUninitialize(); } return 0; }